MALI: rockchip: upgrade bifrost DDK to g21p0-01eac0, from g18p0-01eac0

"/sys/kernel/tracing/events/power/gpu_work_period/*" required by Android 14.0 is implemented. NOTE: For RK3588, the mali_csffw.bin used with this driver MUST be from DDK g21p0-01eac0 correspondingly. Change-Id: Ifab61806a6a350ba53c5dc0296d20628c28d633a Signed-off-by: Zhen Chen <chenzhen@rock-chips.com>
2023-06-05 10:40:56 +08:00
parent 6a55166690
commit b72fff5ed8
384 changed files with 36480 additions and 28455 deletions
--- a/Documentation/ABI/testing/sysfs-device-mali
+++ b/Documentation/ABI/testing/sysfs-device-mali
@@ -1,6 +1,6 @@
 /*
 *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -68,6 +68,16 @@ Description:
 		is supported or is powered down after suspending command
 		stream groups.

+What:		/sys/class/misc/mali%u/device/idle_hysteresis_time_ns
+Description:
+		This attribute is available only with mali platform
+		device-driver that supports a CSF GPU. This attribute is
+		used to configure the timeout value in nanoseconds for the
+		GPU idle handling. If GPU has been idle for this timeout
+		period, then it is put to sleep for GPUs where sleep feature
+		is supported or is powered down after suspending command
+		stream groups.
+
 What:		/sys/class/misc/mali%u/device/js_ctx_scheduling_mode
 Description:
 		This attribute is available only with platform device that
@@ -232,6 +242,23 @@ Description:
 		If we set the value to zero then MCU-controlled shader/tiler
 		power management will be disabled.

+What:       /sys/class/misc/mali%u/device/mcu_shader_pwroff_timeout_ns
+Description:
+		This attribute is available only with mali platform
+		device-driver that supports a CSF GPU. The duration value unit
+		is in nanoseconds and is used for configuring MCU shader Core power-off
+		timer. The configured MCU shader Core power-off timer will only have
+		effect when the host driver has delegated the shader cores
+		power management to MCU. The supplied value will be
+		recorded internally without any change. But the actual field
+		value will be subject to core power-off timer source frequency
+		scaling and maximum value limiting. The default source will be
+		SYSTEM_TIMESTAMP counter. But in case the platform is not able
+		to supply it, the GPU CYCLE_COUNTER source will be used as an
+		alternative.
+
+		If we set the value to zero then MCU-controlled shader/tiler
+		power management will be disabled.

 What:           /sys/class/misc/mali%u/device/csg_scheduling_period
 Description:
--- a/Documentation/ABI/testing/sysfs-device-mali-coresight-source
+++ b/Documentation/ABI/testing/sysfs-device-mali-coresight-source
@@ -1,6 +1,6 @@
 /*
 *
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -71,6 +71,10 @@ Description:
            [11:10] = 1, Generate TS request approx every 128 cycles
            [22:16] = 1, Trace bus ID

+What:		/sys/bus/coresight/devices/mali-source-ela/reset_regs
+Description:
+        Attribute used to reset registers to zero.
+
 What:		/sys/bus/coresight/devices/mali-source-ela/enable_source
 Description:
        Attribute used to enable Coresight Source ELA.
@@ -79,35 +83,121 @@ What:		/sys/bus/coresight/devices/mali-source-ela/is_enabled
 Description:
        Attribute used to check if Coresight Source ELA is enabled.

-What:		/sys/bus/coresight/devices/mali-source-ela/select
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/TIMECTRL
 Description:
-        Coresight Source ELA select trace mode:
-            [0], NONE
-            [1], JCN
-            [2], CEU_EXEC
-            [3], CEU_CMDS
-            [4], MCU_AHBP
-            [5], HOST_AXI
-            [6], NR_TRACEMODE
-
+        Coresight Source ELA TIMECTRL register set/get.
        Refer to specification for more details.

-What:		/sys/bus/coresight/devices/mali-source-ela/sigmask0
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/TSSR
 Description:
-        Coresight Source ELA SIGMASK0 register set/get.
+        Coresight Source ELA TSR register set/get.
        Refer to specification for more details.

-What:		/sys/bus/coresight/devices/mali-source-ela/sigmask4
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/ATBCTRL
 Description:
-        Coresight Source ELA SIGMASK4 register set/get.
+        Coresight Source ELA ATBCTRL register set/get.
        Refer to specification for more details.

-What:		/sys/bus/coresight/devices/mali-source-ela/sigcomp0
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/PTACTION
 Description:
-        Coresight Source ELA SIGCOMP0 register set/get.
+        Coresight Source ELA PTACTION register set/get.
        Refer to specification for more details.

-What:		/sys/bus/coresight/devices/mali-source-ela/sigcomp4
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/AUXCTRL
 Description:
-        Coresight Source ELA SIGCOMP4 register set/get.
+        Coresight Source ELA AUXCTRL register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/CNTSEL
+Description:
+        Coresight Source ELA CNTSEL register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/SIGSELn
+Description:
+        Coresight Source ELA SIGSELn register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/TRIGCTRLn
+Description:
+        Coresight Source ELA TRIGCTRLn register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/NEXTSTATEn
+Description:
+        Coresight Source ELA NEXTSTATEn register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/ACTIONn
+Description:
+        Coresight Source ELA ACTIONn register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/ALTNEXTSTATEn
+Description:
+        Coresight Source ELA ALTNEXTSTATEn register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/ALTACTIONn
+Description:
+        Coresight Source ELA ALTACTIONn register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/COMPCTRLn
+Description:
+        Coresight Source ELA COMPCTRLn register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/ALTCOMPCTRLn
+Description:
+        Coresight Source ELA ALTCOMPCTRLn register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/COUNTCOMPn
+Description:
+        Coresight Source ELA COUNTCOMPn register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/TWBSELn
+Description:
+        Coresight Source ELA TWBSELn register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/EXTMASKn
+Description:
+        Coresight Source ELA EXTMASKn register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/EXTCOMPn
+Description:
+        Coresight Source ELA EXTCOMPn register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/QUALMASKn
+Description:
+        Coresight Source ELA QUALMASKn register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/QUALCOMPn
+        Coresight Source ELA QUALCOMPn register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/SIGMASKn_0-7
+Description:
+        Coresight Source ELA SIGMASKn_0-7 register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/SIGCOMPn_0-7
+Description:
+        Coresight Source ELA SIGCOMPn_0-7 register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/SIGSELn_0-7
+Description:
+        Coresight Source ELA SIGSELn_0-7 register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/regs/SIGMASKn_0-7
+Description:
+        Coresight Source ELA SIGMASKn_0-7 register set/get.
        Refer to specification for more details.
--- a/Documentation/csf_sync_state_dump.txt
+++ b/Documentation/csf_sync_state_dump.txt
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -109,3 +109,65 @@ surpass the arg value.
 ===============================================================================================================================
 queue:KCPU-0-1 exec:S cmd:CQS_WAIT_OPERATION obj:0x0000007fbf6f2ff8 live_value:0x0000000000000000 | op:gt arg_value: 0x00000000
 ===============================================================================================================================
+
+CSF Sync State Dump For Fence Signal Timeouts
+---------------------------------------------
+
+Summary
+-------
+A timer has been added to the KCPU queues which is checked to ensure
+the queues have not "timed out" between the enqueuing of a fence signal command
+and it's eventual execution. If this timeout happens then the CSF sync state
+of all KCPU queues of the offending context is dumped. This feature is enabled
+by default, but can be disabled/enabled later.
+
+Explanation
+------------
+This new timer is created and destroyed alongside the creation and destruction
+of each KCPU queue. It is started when a fence signal is enqueued, and cancelled
+when the fence signal command has been processed. The timer times out after
+10 seconds (at 100 MHz) if the execution of that fence signal event was never
+processed. If this timeout occurs then the timer callback function identifies
+the KCPU queue which the timer belongs to and invokes the CSF synchronisation
+state dump mechanism, writing the sync state for the context of the queue
+causing the timeout is dump to dmesg.
+
+Fence Timeouts Controls
+-----------------------
+Disable/Enable Feature
+----------------------
+This feature is enabled by default, but can be disabled/ re-enabled via DebugFS
+controls. The 'fence_signal_timeout_enable' debugfs entry is a global flag
+which is written to, to turn this feature on and off.
+
+Example:
+--------
+when writing to fence_signal_timeout_enable entry:
+echo 1 > /sys/kernel/debug/mali0/fence_signal_timeout_enable -> feature is enabled.
+echo 0 > /sys/kernel/debug/mali0/fence_signal_timeout_enable -> feature is disabled.
+
+It is also possible to read from this file to check if the feature is currently
+enabled or not checking the return value of fence_signal_timeout_enable.
+
+Example:
+--------
+when reading from fence_signal_timeout_enable entry, if:
+cat /sys/kernel/debug/mali0/fence_signal_timeout_enable returns 1 -> feature is enabled.
+cat /sys/kernel/debug/mali0/fence_signal_timeout_enable returns 0 -> feature is disabled.
+
+Update Timer Duration
+---------------------
+The timeout duration can be accessed through the 'fence_signal_timeout_ms'
+debugfs entry. This can be read from to retrieve the current time in
+milliseconds.
+
+Example:
+--------
+cat /sys/kernel/debug/mali0/fence_signal_timeout_ms
+
+The 'fence_signal_timeout_ms' debugfs entry can also be written to, to update
+the time in milliseconds.
+
+Example:
+--------
+echo 10000 > /sys/kernel/debug/mali0/fence_signal_timeout_ms
--- a/Documentation/devicetree/bindings/arm/mali-bifrost.txt
+++ b/Documentation/devicetree/bindings/arm/mali-bifrost.txt
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -44,22 +44,22 @@ Documentation/devicetree/bindings/regulator/regulator.txt for details.
 - mem-supply : Phandle to memory regulator for the Mali device. This is optional.
 - operating-points-v2 : Refer to Documentation/devicetree/bindings/power/mali-opp.txt
 for details.
- quirks_gpu : Used to write to the JM_CONFIG or CSF_CONFIG register.
+- quirks-gpu : Used to write to the JM_CONFIG or CSF_CONFIG register.
 	  Should be used with care. Options passed here are used to override
 	  certain default behavior. Note: This will override 'idvs-group-size'
 	  field in devicetree and module param 'corestack_driver_control',
-	  therefore if 'quirks_gpu' is used then 'idvs-group-size' and
-	  'corestack_driver_control' value should be incorporated into 'quirks_gpu'.
- quirks_sc : Used to write to the SHADER_CONFIG register.
+	  therefore if 'quirks-gpu' is used then 'idvs-group-size' and
+	  'corestack_driver_control' value should be incorporated into 'quirks-gpu'.
+- quirks-sc : Used to write to the SHADER_CONFIG register.
 	  Should be used with care. Options passed here are used to override
 	  certain default behavior.
- quirks_tiler : Used to write to the TILER_CONFIG register.
+- quirks-tiler : Used to write to the TILER_CONFIG register.
 	  Should be used with care. Options passed here are used to
 	  disable or override certain default behavior.
- quirks_mmu : Used to write to the L2_CONFIG register.
+- quirks-mmu : Used to write to the L2_CONFIG register.
 	  Should be used with care. Options passed here are used to
 	  disable or override certain default behavior.
- power_model : Sets the power model parameters. Defined power models include:
+- power-model : Sets the power model parameters. Defined power models include:
 	  "mali-simple-power-model", "mali-g51-power-model", "mali-g52-power-model",
 	  "mali-g52_r1-power-model", "mali-g71-power-model", "mali-g72-power-model",
 	  "mali-g76-power-model", "mali-g77-power-model", "mali-tnax-power-model",
@@ -96,7 +96,7 @@ for details.
 	  are used at different points so care should be taken to configure
 	  both power models in the device tree (specifically dynamic-coefficient,
 	  static-coefficient and scale) to best match the platform.
- power_policy : Sets the GPU power policy at probe time. Available options are
+- power-policy : Sets the GPU power policy at probe time. Available options are
                 "coarse_demand" and "always_on". If not set, then "coarse_demand" is used.
 - system-coherency : Sets the coherency protocol to be used for coherent
 		     accesses made from the GPU.
@@ -116,17 +116,19 @@ for details.
 -  l2-hash-values : Override L2 hash function using provided hash values, on GPUs that supports it.
 		    It is mutually exclusive with 'l2-hash'. Only one or the other must be
 		    used in a supported GPU.
-  arbiter_if : Phandle to the arbif platform device, used to provide KBASE with an interface
+-  arbiter-if : Phandle to the arbif platform device, used to provide KBASE with an interface
 		to the Arbiter. This is required when using arbitration; setting to a non-NULL
 		value will enable arbitration.
 		If arbitration is in use, then there should be no external GPU control.
-		When arbiter_if is in use then the following must not be:
-		- power_model                         (no IPA allowed with arbitration)
+		When arbiter-if is in use then the following must not be:
+		- power-model                         (no IPA allowed with arbitration)
 		- #cooling-cells
 		- operating-points-v2                 (no dvfs in kbase with arbitration)
 		- system-coherency with a value of 1  (no full coherency with arbitration)
- int_id_override: list of <ID Setting[7:0]> tuples defining the IDs needed to be
+- int-id-override: list of <ID Setting[7:0]> tuples defining the IDs needed to be
 		   set and the setting coresponding to the SYSC_ALLOC register.
+- propagate-bits: Used to write to L2_CONFIG.PBHA_HWU. This bitset establishes which
+		   PBHA bits are propagated on the AXI bus.


 Example for a Mali GPU with 1 clock and 1 regulator:
@@ -234,8 +236,8 @@ Example for a Mali GPU supporting PBHA configuration via DTB (default):
 gpu@0xfc010000 {
    ...
    pbha {
-        int_id_override = <2 0x32>, <9 0x05>, <16 0x32>;
-        propagate_bits = <0x03>;
+        int-id-override = <2 0x32>, <9 0x05>, <16 0x32>;
+        propagate-bits = /bits/ 4 <0x03>;
    };
    ...
 };
--- a/Documentation/devicetree/bindings/arm/mali-coresight-source.txt
+++ b/Documentation/devicetree/bindings/arm/mali-coresight-source.txt
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -87,27 +87,6 @@ Required properties

 - compatible: Has to be "arm,coresight-mali-source-ela"
 - gpu : phandle to a Mali GPU definition
- signal-groups: Signal groups indexed from 0 to 5.
-                 Used to configure the signal channels.
-  - sgN: Types of signals attached to one channel.
-         It can be more than one type in the case of
-         JCN request/response.
-
-         Types:
-           - "jcn-request": Can share the channel with "jcn-response"
-           - "jcn-response": Can share the channel with "jcn-request"
-           - "ceu-execution": Cannot share the channel with other types
-           - "ceu-commands": Cannot share the channel with other types
-           - "mcu-ahbp": Cannot share the channel with other types
-           - "host-axi": Cannot share the channel with other types
-
-
-         If the HW implementation shares a common channel
-         for JCN response and request (total of 4 channels),
-         Refer to:
-           - "Example: Shared JCN request/response channel"
-         Otherwise (total of 5 channels), refer to:
-           - "Example: Split JCN request/response channel"
 - port:
  - endpoint:
    - remote-endpoint: phandle to a Coresight sink port
@@ -116,19 +95,12 @@ Example: Split JCN request/response channel
 --------------------------------------------

 This examples applies to implementations with a total of 5 signal groups,
-where JCN request and response are assigned to independent channels.
+where JCN request and response are assigned to independent or shared
+channels depending on the GPU model.

 mali-source-ela {
  compatible = "arm,coresight-mali-source-ela";
  gpu = <&gpu>;
-  signal-groups {
-    sg0 = "jcn-request";
-    sg1 = "jcn-response";
-    sg2 = "ceu-execution";
-    sg3 = "ceu-commands";
-    sg4 = "mcu-ahbp";
-    sg5 = "host-axi";
-  };
  port {
    mali_source_ela_out_port0: endpoint {
      remote-endpoint = <&mali_sink_in_port2>;
@@ -136,25 +108,9 @@ mali-source-ela {
  };
 };

-Example: Shared JCN request/response channel
+SysFS Configuration
 --------------------------------------------

-This examples applies to implementations with a total of 4 signal groups,
-where JCN request and response are assigned to the same channel.
-
-mali-source-ela {
-  compatible = "arm,coresight-mali-source-ela";
-  gpu = <&gpu>;
-  signal-groups {
-    sg0 = "jcn-request", "jcn-response";
-    sg1 = "ceu-execution";
-    sg2 = "ceu-commands";
-    sg3 = "mcu-ahbp";
-    sg4 = "host-axi";
-  };
-  port {
-    mali_source_ela_out_port0: endpoint {
-      remote-endpoint = <&mali_sink_in_port1>;
-    };
-  };
-};
+The register values used by CoreSight for ELA can be configured using SysFS
+interfaces. This implicitly includes configuring the ELA for independent or
+shared JCN request and response channels.
--- a/drivers/base/arm/Kbuild
+++ b/drivers/base/arm/Kbuild
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -21,6 +21,7 @@
 #
 # ccflags
 #
+src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src))
 ccflags-y += -I$(src)/../../../include

 subdir-ccflags-y += $(ccflags-y)
--- a/drivers/base/arm/Kconfig
+++ b/drivers/base/arm/Kconfig
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
--- a/drivers/base/arm/Makefile
+++ b/drivers/base/arm/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -23,49 +23,53 @@
 #
 KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build
 KDIR ?= $(KERNEL_SRC)
+M ?= $(shell pwd)

 ifeq ($(KDIR),)
    $(error Must specify KDIR to point to the kernel to target))
 endif

-vars :=
-#
-# Default configuration values
-#
-CONFIG_MALI_BASE_MODULES ?= n
+CONFIGS :=

-ifeq ($(CONFIG_MALI_BASE_MODULES),y)
-    CONFIG_MALI_CSF_SUPPORT ?= n
+ifeq ($(MALI_KCONFIG_EXT_PREFIX),)
+    #
+    # Default configuration values
+    #
+    CONFIG_MALI_BASE_MODULES ?= n
+
+    ifeq ($(CONFIG_MALI_BASE_MODULES),y)
+        CONFIG_MALI_CSF_SUPPORT ?= n
+
+        ifneq ($(CONFIG_DMA_SHARED_BUFFER),n)
+            CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER ?= y
+        else
+            # Prevent misuse when CONFIG_DMA_SHARED_BUFFER=n
+            CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER = n
+        endif
+
+        CONFIG_MALI_MEMORY_GROUP_MANAGER ?= y
+
+        ifneq ($(CONFIG_MALI_CSF_SUPPORT), n)
+            CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR ?= y
+        endif

-    ifneq ($(CONFIG_DMA_SHARED_BUFFER),n)
-        CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER ?= y
    else
-        # Prevent misuse when CONFIG_DMA_SHARED_BUFFER=n
+        # Prevent misuse when CONFIG_MALI_BASE_MODULES=n
        CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER = n
+        CONFIG_MALI_MEMORY_GROUP_MANAGER = n
+        CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR = n
+
    endif

-    CONFIG_MALI_MEMORY_GROUP_MANAGER ?= y
-
-    ifneq ($(CONFIG_MALI_CSF_SUPPORT), n)
-        CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR ?= y
-    endif
-
-else
-    # Prevent misuse when CONFIG_MALI_BASE_MODULES=n
-    CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER = n
-    CONFIG_MALI_MEMORY_GROUP_MANAGER = n
-    CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR = n
+    CONFIGS += \
+        CONFIG_MALI_BASE_MODULES \
+        CONFIG_MALI_CSF_SUPPORT \
+        CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER \
+        CONFIG_MALI_MEMORY_GROUP_MANAGER \
+        CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR \

 endif

-CONFIGS := \
-    CONFIG_MALI_BASE_MODULES \
-    CONFIG_MALI_CSF_SUPPORT \
-    CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER \
-    CONFIG_MALI_MEMORY_GROUP_MANAGER \
-    CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR \
-
-
 #
 # MAKE_ARGS to pass the custom CONFIGs on out-of-tree build
 #
@@ -88,65 +92,65 @@ EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \
                    $(if $(filter y m,$(value $(value config))), \
                        -D$(value config)=1))

-KBUILD_CFLAGS += -Wall -Werror
+CFLAGS_MODULE += -Wall -Werror

 ifeq ($(CONFIG_GCOV_KERNEL), y)
-    KBUILD_CFLAGS += $(call cc-option, -ftest-coverage)
-    KBUILD_CFLAGS += $(call cc-option, -fprofile-arcs)
+    CFLAGS_MODULE += $(call cc-option, -ftest-coverage)
+    CFLAGS_MODULE += $(call cc-option, -fprofile-arcs)
    EXTRA_CFLAGS += -DGCOV_PROFILE=1
 endif

 ifeq ($(CONFIG_MALI_KCOV),y)
-    KBUILD_CFLAGS += $(call cc-option, -fsanitize-coverage=trace-cmp)
+    CFLAGS_MODULE += $(call cc-option, -fsanitize-coverage=trace-cmp)
    EXTRA_CFLAGS += -DKCOV=1
    EXTRA_CFLAGS += -DKCOV_ENABLE_COMPARISONS=1
 endif

 # The following were added to align with W=1 in scripts/Makefile.extrawarn
 # from the Linux source tree (v5.18.14)
-KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter
-KBUILD_CFLAGS += -Wmissing-declarations
-KBUILD_CFLAGS += -Wmissing-format-attribute
-KBUILD_CFLAGS += -Wmissing-prototypes
-KBUILD_CFLAGS += -Wold-style-definition
+CFLAGS_MODULE += -Wextra -Wunused -Wno-unused-parameter
+CFLAGS_MODULE += -Wmissing-declarations
+CFLAGS_MODULE += -Wmissing-format-attribute
+CFLAGS_MODULE += -Wmissing-prototypes
+CFLAGS_MODULE += -Wold-style-definition
 # The -Wmissing-include-dirs cannot be enabled as the path to some of the
 # included directories change depending on whether it is an in-tree or
 # out-of-tree build.
-KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable)
-KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable)
-KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned)
-KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation)
+CFLAGS_MODULE += $(call cc-option, -Wunused-but-set-variable)
+CFLAGS_MODULE += $(call cc-option, -Wunused-const-variable)
+CFLAGS_MODULE += $(call cc-option, -Wpacked-not-aligned)
+CFLAGS_MODULE += $(call cc-option, -Wstringop-truncation)
 # The following turn off the warnings enabled by -Wextra
-KBUILD_CFLAGS += -Wno-sign-compare
-KBUILD_CFLAGS += -Wno-shift-negative-value
+CFLAGS_MODULE += -Wno-sign-compare
+CFLAGS_MODULE += -Wno-shift-negative-value
 # This flag is needed to avoid build errors on older kernels
-KBUILD_CFLAGS += $(call cc-option, -Wno-cast-function-type)
+CFLAGS_MODULE += $(call cc-option, -Wno-cast-function-type)

 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1

 # The following were added to align with W=2 in scripts/Makefile.extrawarn
 # from the Linux source tree (v5.18.14)
-KBUILD_CFLAGS += -Wdisabled-optimization
+CFLAGS_MODULE += -Wdisabled-optimization
 # The -Wshadow flag cannot be enabled unless upstream kernels are
 # patched to fix redefinitions of certain built-in functions and
 # global variables.
-KBUILD_CFLAGS += $(call cc-option, -Wlogical-op)
-KBUILD_CFLAGS += -Wmissing-field-initializers
+CFLAGS_MODULE += $(call cc-option, -Wlogical-op)
+CFLAGS_MODULE += -Wmissing-field-initializers
 # -Wtype-limits must be disabled due to build failures on kernel 5.x
-KBUILD_CFLAGS += -Wno-type-limit
-KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
-KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
+CFLAGS_MODULE += -Wno-type-limits
+CFLAGS_MODULE += $(call cc-option, -Wmaybe-uninitialized)
+CFLAGS_MODULE += $(call cc-option, -Wunused-macros)

 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2

 # This warning is disabled to avoid build failures in some kernel versions
-KBUILD_CFLAGS += -Wno-ignored-qualifiers
+CFLAGS_MODULE += -Wno-ignored-qualifiers

 all:
-	$(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+	$(MAKE) -C $(KDIR) M=$(M) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules

 modules_install:
-	$(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) modules_install
+	$(MAKE) -C $(KDIR) M=$(M) $(MAKE_ARGS) modules_install

 clean:
-	$(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) clean
+	$(MAKE) -C $(KDIR) M=$(M) $(MAKE_ARGS) clean
--- a/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c
+++ b/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -31,6 +31,10 @@
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/dma-mapping.h>
+#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE
+#include <linux/dma-resv.h>
+#endif
+#include <linux/version_compat_defs.h>

 #define DMA_BUF_TE_VER_MAJOR 1
 #define DMA_BUF_TE_VER_MINOR 0
@@ -47,6 +51,10 @@
 #define NO_SG_CHAIN
 #endif

+#ifndef CSTD_UNUSED
+#define CSTD_UNUSED(x) ((void)(x))
+#endif
+
 struct dma_buf_te_alloc {
 	/* the real alloc */
 	size_t nr_pages;
@@ -65,6 +73,9 @@ struct dma_buf_te_alloc {
 	bool contiguous;
 	dma_addr_t contig_dma_addr;
 	void *contig_cpu_addr;
+
+	/* @lock: Used internally to serialize list manipulation, attach/detach etc. */
+	struct mutex lock;
 };

 struct dma_buf_te_attachment {
@@ -75,12 +86,13 @@ struct dma_buf_te_attachment {
 static struct miscdevice te_device;

 #if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
-static int dma_buf_te_attach(struct dma_buf *buf, struct device *dev, struct dma_buf_attachment *attachment)
+static int dma_buf_te_attach(struct dma_buf *buf, struct device *dev,
+			     struct dma_buf_attachment *attachment)
 #else
 static int dma_buf_te_attach(struct dma_buf *buf, struct dma_buf_attachment *attachment)
 #endif
 {
-	struct dma_buf_te_alloc	*alloc;
+	struct dma_buf_te_alloc *alloc;

 	alloc = buf->priv;

@@ -91,8 +103,9 @@ static int dma_buf_te_attach(struct dma_buf *buf, struct dma_buf_attachment *att
 	if (!attachment->priv)
 		return -ENOMEM;

-	/* dma_buf is externally locked during call */
+	mutex_lock(&alloc->lock);
 	alloc->nr_attached_devices++;
+	mutex_unlock(&alloc->lock);
 	return 0;
 }

@@ -107,20 +120,23 @@ static void dma_buf_te_detach(struct dma_buf *buf, struct dma_buf_attachment *at
 	struct dma_buf_te_alloc *alloc = buf->priv;
 	struct dma_buf_te_attachment *pa = attachment->priv;

-	/* dma_buf is externally locked during call */
+	mutex_lock(&alloc->lock);

-	WARN(pa->attachment_mapped, "WARNING: dma-buf-test-exporter detected detach with open device mappings");
+	WARN(pa->attachment_mapped,
+	     "WARNING: dma-buf-test-exporter detected detach with open device mappings");

 	alloc->nr_attached_devices--;
+	mutex_unlock(&alloc->lock);

 	kfree(pa);
 }

-static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, enum dma_data_direction direction)
+static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment,
+				       enum dma_data_direction direction)
 {
 	struct sg_table *sg;
 	struct scatterlist *iter;
-	struct dma_buf_te_alloc	*alloc;
+	struct dma_buf_te_alloc *alloc;
 	struct dma_buf_te_attachment *pa = attachment->priv;
 	size_t i;
 	int ret;
@@ -130,8 +146,7 @@ static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, en
 	if (alloc->fail_map)
 		return ERR_PTR(-ENOMEM);

-	if (WARN(pa->attachment_mapped,
-	    "WARNING: Attempted to map already mapped attachment."))
+	if (WARN(pa->attachment_mapped, "WARNING: Attempted to map already mapped attachment."))
 		return ERR_PTR(-EBUSY);

 #ifdef NO_SG_CHAIN
@@ -145,21 +160,22 @@ static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, en
 		return ERR_PTR(-ENOMEM);

 	/* from here we access the allocation object, so lock the dmabuf pointing to it */
-	mutex_lock(&attachment->dmabuf->lock);
+	mutex_lock(&alloc->lock);

 	if (alloc->contiguous)
 		ret = sg_alloc_table(sg, 1, GFP_KERNEL);
 	else
 		ret = sg_alloc_table(sg, alloc->nr_pages, GFP_KERNEL);
 	if (ret) {
-		mutex_unlock(&attachment->dmabuf->lock);
+		mutex_unlock(&alloc->lock);
 		kfree(sg);
 		return ERR_PTR(ret);
 	}

 	if (alloc->contiguous) {
 		sg_dma_len(sg->sgl) = alloc->nr_pages * PAGE_SIZE;
-		sg_set_page(sg->sgl, pfn_to_page(PFN_DOWN(alloc->contig_dma_addr)), alloc->nr_pages * PAGE_SIZE, 0);
+		sg_set_page(sg->sgl, pfn_to_page(PFN_DOWN(alloc->contig_dma_addr)),
+			    alloc->nr_pages * PAGE_SIZE, 0);
 		sg_dma_address(sg->sgl) = alloc->contig_dma_addr;
 	} else {
 		for_each_sg(sg->sgl, iter, alloc->nr_pages, i)
@@ -167,7 +183,7 @@ static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, en
 	}

 	if (!dma_map_sg(attachment->dev, sg->sgl, sg->nents, direction)) {
-		mutex_unlock(&attachment->dmabuf->lock);
+		mutex_unlock(&alloc->lock);
 		sg_free_table(sg);
 		kfree(sg);
 		return ERR_PTR(-ENOMEM);
@@ -176,26 +192,26 @@ static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, en
 	alloc->nr_device_mappings++;
 	pa->attachment_mapped = true;
 	pa->sg = sg;
-	mutex_unlock(&attachment->dmabuf->lock);
+	mutex_unlock(&alloc->lock);
 	return sg;
 }

-static void dma_buf_te_unmap(struct dma_buf_attachment *attachment,
-							 struct sg_table *sg, enum dma_data_direction direction)
+static void dma_buf_te_unmap(struct dma_buf_attachment *attachment, struct sg_table *sg,
+			     enum dma_data_direction direction)
 {
 	struct dma_buf_te_alloc *alloc;
 	struct dma_buf_te_attachment *pa = attachment->priv;

 	alloc = attachment->dmabuf->priv;

-	mutex_lock(&attachment->dmabuf->lock);
+	mutex_lock(&alloc->lock);

 	WARN(!pa->attachment_mapped, "WARNING: Unmatched unmap of attachment.");

 	alloc->nr_device_mappings--;
 	pa->attachment_mapped = false;
 	pa->sg = NULL;
-	mutex_unlock(&attachment->dmabuf->lock);
+	mutex_unlock(&alloc->lock);

 	dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, direction);
 	sg_free_table(sg);
@@ -209,13 +225,12 @@ static void dma_buf_te_release(struct dma_buf *buf)

 	alloc = buf->priv;
 	/* no need for locking */
+	mutex_destroy(&alloc->lock);

 	if (alloc->contiguous) {
-		dma_free_attrs(te_device.this_device,
-						alloc->nr_pages * PAGE_SIZE,
-						alloc->contig_cpu_addr,
-						alloc->contig_dma_addr,
-						DMA_ATTR_WRITE_COMBINE);
+		dma_free_attrs(te_device.this_device, alloc->nr_pages * PAGE_SIZE,
+			       alloc->contig_cpu_addr, alloc->contig_dma_addr,
+			       DMA_ATTR_WRITE_COMBINE);
 	} else {
 		for (i = 0; i < alloc->nr_pages; i++)
 			__free_page(alloc->pages[i]);
@@ -228,46 +243,62 @@ static void dma_buf_te_release(struct dma_buf *buf)
 	kfree(alloc);
 }

-static int dma_buf_te_sync(struct dma_buf *dmabuf,
-			enum dma_data_direction direction,
-			bool start_cpu_access)
+static int dma_buf_te_sync(struct dma_buf *dmabuf, enum dma_data_direction direction,
+			   bool start_cpu_access)
 {
 	struct dma_buf_attachment *attachment;
+	struct dma_buf_te_alloc *alloc = dmabuf->priv;

+	/* Use the kernel lock to prevent the concurrent update of dmabuf->attachments */
+#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE
+	dma_resv_lock(dmabuf->resv, NULL);
+#else
 	mutex_lock(&dmabuf->lock);
+#endif
+
+	/* Use the internal lock to block the concurrent attach/detach calls */
+	mutex_lock(&alloc->lock);

 	list_for_each_entry(attachment, &dmabuf->attachments, node) {
 		struct dma_buf_te_attachment *pa = attachment->priv;
 		struct sg_table *sg = pa->sg;

 		if (!sg) {
-			dev_dbg(te_device.this_device, "no mapping for device %s\n", dev_name(attachment->dev));
+			dev_dbg(te_device.this_device, "no mapping for device %s\n",
+				dev_name(attachment->dev));
 			continue;
 		}

 		if (start_cpu_access) {
-			dev_dbg(te_device.this_device, "sync cpu with device %s\n", dev_name(attachment->dev));
+			dev_dbg(te_device.this_device, "sync cpu with device %s\n",
+				dev_name(attachment->dev));

 			dma_sync_sg_for_cpu(attachment->dev, sg->sgl, sg->nents, direction);
 		} else {
-			dev_dbg(te_device.this_device, "sync device %s with cpu\n", dev_name(attachment->dev));
+			dev_dbg(te_device.this_device, "sync device %s with cpu\n",
+				dev_name(attachment->dev));

 			dma_sync_sg_for_device(attachment->dev, sg->sgl, sg->nents, direction);
 		}
 	}

+	mutex_unlock(&alloc->lock);
+
+#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE
+	dma_resv_unlock(dmabuf->resv);
+#else
 	mutex_unlock(&dmabuf->lock);
+#endif
+
 	return 0;
 }

-static int dma_buf_te_begin_cpu_access(struct dma_buf *dmabuf,
-					enum dma_data_direction direction)
+static int dma_buf_te_begin_cpu_access(struct dma_buf *dmabuf, enum dma_data_direction direction)
 {
 	return dma_buf_te_sync(dmabuf, direction, true);
 }

-static int dma_buf_te_end_cpu_access(struct dma_buf *dmabuf,
-				enum dma_data_direction direction)
+static int dma_buf_te_end_cpu_access(struct dma_buf *dmabuf, enum dma_data_direction direction)
 {
 	return dma_buf_te_sync(dmabuf, direction, false);
 }
@@ -280,9 +311,9 @@ static void dma_buf_te_mmap_open(struct vm_area_struct *vma)
 	dma_buf = vma->vm_private_data;
 	alloc = dma_buf->priv;

-	mutex_lock(&dma_buf->lock);
+	mutex_lock(&alloc->lock);
 	alloc->nr_cpu_mappings++;
-	mutex_unlock(&dma_buf->lock);
+	mutex_unlock(&alloc->lock);
 }

 static void dma_buf_te_mmap_close(struct vm_area_struct *vma)
@@ -293,10 +324,10 @@ static void dma_buf_te_mmap_close(struct vm_area_struct *vma)
 	dma_buf = vma->vm_private_data;
 	alloc = dma_buf->priv;

+	mutex_lock(&alloc->lock);
 	BUG_ON(alloc->nr_cpu_mappings <= 0);
-	mutex_lock(&dma_buf->lock);
 	alloc->nr_cpu_mappings--;
-	mutex_unlock(&dma_buf->lock);
+	mutex_unlock(&alloc->lock);
 }

 #if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
@@ -331,11 +362,9 @@ static vm_fault_t dma_buf_te_mmap_fault(struct vm_fault *vmf)
 	return 0;
 }

-static const struct vm_operations_struct dma_buf_te_vm_ops = {
-	.open = dma_buf_te_mmap_open,
-	.close = dma_buf_te_mmap_close,
-	.fault = dma_buf_te_mmap_fault
-};
+static const struct vm_operations_struct dma_buf_te_vm_ops = { .open = dma_buf_te_mmap_open,
+							       .close = dma_buf_te_mmap_close,
+							       .fault = dma_buf_te_mmap_fault };

 static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
 {
@@ -346,7 +375,7 @@ static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
 	if (alloc->fail_mmap)
 		return -ENOMEM;

-	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+	vm_flags_set(vma, VM_IO | VM_DONTEXPAND | VM_DONTDUMP);
 	vma->vm_ops = &dma_buf_te_vm_ops;
 	vma->vm_private_data = dmabuf;

@@ -374,10 +403,9 @@ static void *dma_buf_te_kmap(struct dma_buf *buf, unsigned long page_num)
 	if (page_num >= alloc->nr_pages)
 		return NULL;

-	return kmap(alloc->pages[page_num]);
+	return kbase_kmap(alloc->pages[page_num]);
 }
-static void dma_buf_te_kunmap(struct dma_buf *buf,
-		unsigned long page_num, void *addr)
+static void dma_buf_te_kunmap(struct dma_buf *buf, unsigned long page_num, void *addr)
 {
 	struct dma_buf_te_alloc *alloc;

@@ -385,7 +413,7 @@ static void dma_buf_te_kunmap(struct dma_buf *buf,
 	if (page_num >= alloc->nr_pages)
 		return;

-	kunmap(alloc->pages[page_num]);
+	kbase_kunmap(alloc->pages[page_num], addr);
 }

 static struct dma_buf_ops dma_buf_te_ops = {
@@ -467,8 +495,9 @@ static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf,
 #endif /* NO_SG_CHAIN */

 	if (alloc_req.size > max_nr_pages) {
-		dev_err(te_device.this_device, "%s: buffer size of %llu pages exceeded the mapping limit of %zu pages",
-				__func__, alloc_req.size, max_nr_pages);
+		dev_err(te_device.this_device,
+			"%s: buffer size of %llu pages exceeded the mapping limit of %zu pages",
+			__func__, alloc_req.size, max_nr_pages);
 		goto invalid_size;
 	}

@@ -488,23 +517,21 @@ static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf,
 #endif

 	if (!alloc->pages) {
-		dev_err(te_device.this_device,
-				"%s: couldn't alloc %zu page structures",
-				__func__, alloc->nr_pages);
+		dev_err(te_device.this_device, "%s: couldn't alloc %zu page structures", __func__,
+			alloc->nr_pages);
 		goto free_alloc_object;
 	}

 	if (contiguous) {
 		dma_addr_t dma_aux;

-		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
-				alloc->nr_pages * PAGE_SIZE,
-				&alloc->contig_dma_addr,
-				GFP_KERNEL | __GFP_ZERO,
-				DMA_ATTR_WRITE_COMBINE);
+		alloc->contig_cpu_addr = dma_alloc_attrs(
+			te_device.this_device, alloc->nr_pages * PAGE_SIZE, &alloc->contig_dma_addr,
+			GFP_KERNEL | __GFP_ZERO, DMA_ATTR_WRITE_COMBINE);
 		if (!alloc->contig_cpu_addr) {
-			dev_err(te_device.this_device, "%s: couldn't alloc contiguous buffer %zu pages",
-				__func__, alloc->nr_pages);
+			dev_err(te_device.this_device,
+				"%s: couldn't alloc contiguous buffer %zu pages", __func__,
+				alloc->nr_pages);
 			goto free_page_struct;
 		}
 		dma_aux = alloc->contig_dma_addr;
@@ -522,6 +549,8 @@ static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf,
 		}
 	}

+	mutex_init(&alloc->lock);
+
 	/* alloc ready, let's export it */
 	{
 		struct dma_buf_export_info export_info = {
@@ -555,13 +584,12 @@ no_fd:
 	dma_buf_put(dma_buf);
 no_export:
 	/* i still valid */
+	mutex_destroy(&alloc->lock);
 no_page:
 	if (contiguous) {
-		dma_free_attrs(te_device.this_device,
-						alloc->nr_pages * PAGE_SIZE,
-						alloc->contig_cpu_addr,
-						alloc->contig_dma_addr,
-						DMA_ATTR_WRITE_COMBINE);
+		dma_free_attrs(te_device.this_device, alloc->nr_pages * PAGE_SIZE,
+			       alloc->contig_cpu_addr, alloc->contig_dma_addr,
+			       DMA_ATTR_WRITE_COMBINE);
 	} else {
 		while (i-- > 0)
 			__free_page(alloc->pages[i]);
@@ -602,11 +630,11 @@ static int do_dma_buf_te_ioctl_status(struct dma_buf_te_ioctl_status __user *arg
 	alloc = dmabuf->priv;

 	/* lock while reading status to take a snapshot */
-	mutex_lock(&dmabuf->lock);
+	mutex_lock(&alloc->lock);
 	status.attached_devices = alloc->nr_attached_devices;
 	status.device_mappings = alloc->nr_device_mappings;
 	status.cpu_mappings = alloc->nr_cpu_mappings;
-	mutex_unlock(&dmabuf->lock);
+	mutex_unlock(&alloc->lock);

 	if (copy_to_user(arg, &status, sizeof(status)))
 		goto err_have_dmabuf;
@@ -640,11 +668,11 @@ static int do_dma_buf_te_ioctl_set_failing(struct dma_buf_te_ioctl_set_failing _
 	/* ours, set the fail modes */
 	alloc = dmabuf->priv;
 	/* lock to set the fail modes atomically */
-	mutex_lock(&dmabuf->lock);
+	mutex_lock(&alloc->lock);
 	alloc->fail_attach = f.fail_attach;
-	alloc->fail_map    = f.fail_map;
-	alloc->fail_mmap   = f.fail_mmap;
-	mutex_unlock(&dmabuf->lock);
+	alloc->fail_map = f.fail_map;
+	alloc->fail_mmap = f.fail_mmap;
+	mutex_unlock(&alloc->lock);

 	/* success */
 	res = 0;
@@ -709,7 +737,6 @@ no_import:

 static int do_dma_buf_te_ioctl_fill(struct dma_buf_te_ioctl_fill __user *arg)
 {
-
 	struct dma_buf *dmabuf;
 	struct dma_buf_te_ioctl_fill f;
 	int ret;
@@ -729,17 +756,21 @@ static int do_dma_buf_te_ioctl_fill(struct dma_buf_te_ioctl_fill __user *arg)

 static long dma_buf_te_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
+	CSTD_UNUSED(file);
+
 	switch (cmd) {
 	case DMA_BUF_TE_VERSION:
 		return do_dma_buf_te_ioctl_version((struct dma_buf_te_ioctl_version __user *)arg);
 	case DMA_BUF_TE_ALLOC:
-		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, false);
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg,
+						 false);
 	case DMA_BUF_TE_ALLOC_CONT:
 		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, true);
 	case DMA_BUF_TE_QUERY:
 		return do_dma_buf_te_ioctl_status((struct dma_buf_te_ioctl_status __user *)arg);
 	case DMA_BUF_TE_SET_FAILING:
-		return do_dma_buf_te_ioctl_set_failing((struct dma_buf_te_ioctl_set_failing __user *)arg);
+		return do_dma_buf_te_ioctl_set_failing(
+			(struct dma_buf_te_ioctl_set_failing __user *)arg);
 	case DMA_BUF_TE_FILL:
 		return do_dma_buf_te_ioctl_fill((struct dma_buf_te_ioctl_fill __user *)arg);
 	default:
@@ -770,7 +801,6 @@ static int __init dma_buf_te_init(void)

 	dev_info(te_device.this_device, "dma_buf_te ready\n");
 	return 0;
-
 }

 static void __exit dma_buf_te_exit(void)
--- a/drivers/base/arm/memory_group_manager/memory_group_manager.c
+++ b/drivers/base/arm/memory_group_manager/memory_group_manager.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -32,15 +32,13 @@
 #include <linux/mm.h>
 #include <linux/memory_group_manager.h>

-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0))
-#undef DEFINE_SIMPLE_ATTRIBUTE
-#define DEFINE_SIMPLE_ATTRIBUTE DEFINE_DEBUGFS_ATTRIBUTE
-#define debugfs_create_file debugfs_create_file_unsafe
+#ifndef CSTD_UNUSED
+#define CSTD_UNUSED(x) ((void)(x))
 #endif

 #if (KERNEL_VERSION(4, 20, 0) > LINUX_VERSION_CODE)
-static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma,
-			unsigned long addr, unsigned long pfn, pgprot_t pgprot)
+static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
+					     unsigned long pfn, pgprot_t pgprot)
 {
 	int err = vm_insert_pfn_prot(vma, addr, pfn, pgprot);

@@ -72,10 +70,10 @@ static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma,
 * debugfs. Display is organized per group with small and large sized pages.
 */
 struct mgm_group {
-	size_t size;
-	size_t lp_size;
-	size_t insert_pfn;
-	size_t update_gpu_pte;
+	atomic_t size;
+	atomic_t lp_size;
+	atomic_t insert_pfn;
+	atomic_t update_gpu_pte;
 };

 /**
@@ -102,7 +100,7 @@ static int mgm_size_get(void *data, u64 *val)
 {
 	struct mgm_group *group = data;

-	*val = group->size;
+	*val = atomic_read(&group->size);

 	return 0;
 }
@@ -110,27 +108,21 @@ static int mgm_size_get(void *data, u64 *val)
 static int mgm_lp_size_get(void *data, u64 *val)
 {
 	struct mgm_group *group = data;
-
-	*val = group->lp_size;
-
+	*val = atomic_read(&group->lp_size);
 	return 0;
 }

 static int mgm_insert_pfn_get(void *data, u64 *val)
 {
 	struct mgm_group *group = data;
-
-	*val = group->insert_pfn;
-
+	*val = atomic_read(&group->insert_pfn);
 	return 0;
 }

 static int mgm_update_gpu_pte_get(void *data, u64 *val)
 {
 	struct mgm_group *group = data;
-
-	*val = group->update_gpu_pte;
-
+	*val = atomic_read(&group->update_gpu_pte);
 	return 0;
 }

@@ -154,8 +146,7 @@ static int mgm_initialize_debugfs(struct mgm_groups *mgm_data)
 	/*
 	 * Create root directory of memory-group-manager
 	 */
-	mgm_data->mgm_debugfs_root =
-		debugfs_create_dir("physical-memory-group-manager", NULL);
+	mgm_data->mgm_debugfs_root = debugfs_create_dir("physical-memory-group-manager", NULL);
 	if (IS_ERR_OR_NULL(mgm_data->mgm_debugfs_root)) {
 		dev_err(mgm_data->dev, "fail to create debugfs root directory\n");
 		return -ENODEV;
@@ -165,43 +156,37 @@ static int mgm_initialize_debugfs(struct mgm_groups *mgm_data)
 	 * Create debugfs files per group
 	 */
 	for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) {
-		scnprintf(debugfs_group_name, MGM_DEBUGFS_GROUP_NAME_MAX,
-				"group_%d", i);
-		g = debugfs_create_dir(debugfs_group_name,
-				mgm_data->mgm_debugfs_root);
+		scnprintf(debugfs_group_name, MGM_DEBUGFS_GROUP_NAME_MAX, "group_%d", i);
+		g = debugfs_create_dir(debugfs_group_name, mgm_data->mgm_debugfs_root);
 		if (IS_ERR_OR_NULL(g)) {
 			dev_err(mgm_data->dev, "fail to create group[%d]\n", i);
 			goto remove_debugfs;
 		}

-		e = debugfs_create_file("size", 0444, g, &mgm_data->groups[i],
-				&fops_mgm_size);
+		e = debugfs_create_file("size", 0444, g, &mgm_data->groups[i], &fops_mgm_size);
 		if (IS_ERR_OR_NULL(e)) {
 			dev_err(mgm_data->dev, "fail to create size[%d]\n", i);
 			goto remove_debugfs;
 		}

-		e = debugfs_create_file("lp_size", 0444, g,
-				&mgm_data->groups[i], &fops_mgm_lp_size);
+		e = debugfs_create_file("lp_size", 0444, g, &mgm_data->groups[i],
+					&fops_mgm_lp_size);
 		if (IS_ERR_OR_NULL(e)) {
-			dev_err(mgm_data->dev,
-				"fail to create lp_size[%d]\n", i);
+			dev_err(mgm_data->dev, "fail to create lp_size[%d]\n", i);
 			goto remove_debugfs;
 		}

-		e = debugfs_create_file("insert_pfn", 0444, g,
-				&mgm_data->groups[i], &fops_mgm_insert_pfn);
+		e = debugfs_create_file("insert_pfn", 0444, g, &mgm_data->groups[i],
+					&fops_mgm_insert_pfn);
 		if (IS_ERR_OR_NULL(e)) {
-			dev_err(mgm_data->dev,
-				"fail to create insert_pfn[%d]\n", i);
+			dev_err(mgm_data->dev, "fail to create insert_pfn[%d]\n", i);
 			goto remove_debugfs;
 		}

-		e = debugfs_create_file("update_gpu_pte", 0444, g,
-				&mgm_data->groups[i], &fops_mgm_update_gpu_pte);
+		e = debugfs_create_file("update_gpu_pte", 0444, g, &mgm_data->groups[i],
+					&fops_mgm_update_gpu_pte);
 		if (IS_ERR_OR_NULL(e)) {
-			dev_err(mgm_data->dev,
-				"fail to create update_gpu_pte[%d]\n", i);
+			dev_err(mgm_data->dev, "fail to create update_gpu_pte[%d]\n", i);
 			goto remove_debugfs;
 		}
 	}
@@ -236,31 +221,30 @@ static void update_size(struct memory_group_manager_device *mgm_dev, unsigned in
 	switch (order) {
 	case ORDER_SMALL_PAGE:
 		if (alloc)
-			data->groups[group_id].size++;
+			atomic_inc(&data->groups[group_id].size);
 		else {
-			WARN_ON(data->groups[group_id].size == 0);
-			data->groups[group_id].size--;
+			WARN_ON(atomic_read(&data->groups[group_id].size) == 0);
+			atomic_dec(&data->groups[group_id].size);
 		}
-	break;
+		break;

 	case ORDER_LARGE_PAGE:
 		if (alloc)
-			data->groups[group_id].lp_size++;
+			atomic_inc(&data->groups[group_id].lp_size);
 		else {
-			WARN_ON(data->groups[group_id].lp_size == 0);
-			data->groups[group_id].lp_size--;
+			WARN_ON(atomic_read(&data->groups[group_id].lp_size) == 0);
+			atomic_dec(&data->groups[group_id].lp_size);
 		}
-	break;
+		break;

 	default:
 		dev_err(data->dev, "Unknown order(%d)\n", order);
-	break;
+		break;
 	}
 }

-static struct page *example_mgm_alloc_page(
-	struct memory_group_manager_device *mgm_dev, int group_id,
-	gfp_t gfp_mask, unsigned int order)
+static struct page *example_mgm_alloc_page(struct memory_group_manager_device *mgm_dev,
+					   int group_id, gfp_t gfp_mask, unsigned int order)
 {
 	struct mgm_groups *const data = mgm_dev->data;
 	struct page *p;
@@ -268,8 +252,7 @@ static struct page *example_mgm_alloc_page(
 	dev_dbg(data->dev, "%s(mgm_dev=%pK, group_id=%d gfp_mask=0x%x order=%u\n", __func__,
 		(void *)mgm_dev, group_id, gfp_mask, order);

-	if (WARN_ON(group_id < 0) ||
-		WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+	if (WARN_ON(group_id < 0) || WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
 		return NULL;

 	p = alloc_pages(gfp_mask, order);
@@ -285,17 +268,15 @@ static struct page *example_mgm_alloc_page(
 	return p;
 }

-static void example_mgm_free_page(
-	struct memory_group_manager_device *mgm_dev, int group_id,
-	struct page *page, unsigned int order)
+static void example_mgm_free_page(struct memory_group_manager_device *mgm_dev, int group_id,
+				  struct page *page, unsigned int order)
 {
 	struct mgm_groups *const data = mgm_dev->data;

 	dev_dbg(data->dev, "%s(mgm_dev=%pK, group_id=%d page=%pK order=%u\n", __func__,
 		(void *)mgm_dev, group_id, (void *)page, order);

-	if (WARN_ON(group_id < 0) ||
-		WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+	if (WARN_ON(group_id < 0) || WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
 		return;

 	__free_pages(page, order);
@@ -303,9 +284,8 @@ static void example_mgm_free_page(
 	update_size(mgm_dev, group_id, order, false);
 }

-static int example_mgm_get_import_memory_id(
-	struct memory_group_manager_device *mgm_dev,
-	struct memory_group_manager_import_data *import_data)
+static int example_mgm_get_import_memory_id(struct memory_group_manager_device *mgm_dev,
+					    struct memory_group_manager_import_data *import_data)
 {
 	struct mgm_groups *const data = mgm_dev->data;

@@ -315,24 +295,21 @@ static int example_mgm_get_import_memory_id(
 	if (!WARN_ON(!import_data)) {
 		WARN_ON(!import_data->u.dma_buf);

-		WARN_ON(import_data->type !=
-				MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF);
+		WARN_ON(import_data->type != MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF);
 	}

 	return IMPORTED_MEMORY_ID;
 }

-static u64 example_mgm_update_gpu_pte(
-	struct memory_group_manager_device *const mgm_dev, int const group_id,
-	int const mmu_level, u64 pte)
+static u64 example_mgm_update_gpu_pte(struct memory_group_manager_device *const mgm_dev,
+				      int const group_id, int const mmu_level, u64 pte)
 {
 	struct mgm_groups *const data = mgm_dev->data;

 	dev_dbg(data->dev, "%s(mgm_dev=%pK, group_id=%d, mmu_level=%d, pte=0x%llx)\n", __func__,
 		(void *)mgm_dev, group_id, mmu_level, pte);

-	if (WARN_ON(group_id < 0) ||
-		WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+	if (WARN_ON(group_id < 0) || WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
 		return pte;

 	pte |= ((u64)group_id << PTE_PBHA_SHIFT) & PTE_PBHA_MASK;
@@ -340,7 +317,7 @@ static u64 example_mgm_update_gpu_pte(
 	/* Address could be translated into a different bus address here */
 	pte |= ((u64)1 << PTE_RES_BIT_MULTI_AS_SHIFT);

-	data->groups[group_id].update_gpu_pte++;
+	atomic_inc(&data->groups[group_id].update_gpu_pte);

 	return pte;
 }
@@ -348,6 +325,10 @@ static u64 example_mgm_update_gpu_pte(
 static u64 example_mgm_pte_to_original_pte(struct memory_group_manager_device *const mgm_dev,
 					   int const group_id, int const mmu_level, u64 pte)
 {
+	CSTD_UNUSED(mgm_dev);
+	CSTD_UNUSED(group_id);
+	CSTD_UNUSED(mmu_level);
+
 	/* Undo the group ID modification */
 	pte &= ~PTE_PBHA_MASK;
 	/* Undo the bit set */
@@ -356,10 +337,11 @@ static u64 example_mgm_pte_to_original_pte(struct memory_group_manager_device *c
 	return pte;
 }

-static vm_fault_t example_mgm_vmf_insert_pfn_prot(
-	struct memory_group_manager_device *const mgm_dev, int const group_id,
-	struct vm_area_struct *const vma, unsigned long const addr,
-	unsigned long const pfn, pgprot_t const prot)
+static vm_fault_t example_mgm_vmf_insert_pfn_prot(struct memory_group_manager_device *const mgm_dev,
+						  int const group_id,
+						  struct vm_area_struct *const vma,
+						  unsigned long const addr, unsigned long const pfn,
+						  pgprot_t const prot)
 {
 	struct mgm_groups *const data = mgm_dev->data;
 	vm_fault_t fault;
@@ -369,14 +351,13 @@ static vm_fault_t example_mgm_vmf_insert_pfn_prot(
 		__func__, (void *)mgm_dev, group_id, (void *)vma, addr, pfn,
 		(unsigned long long)pgprot_val(prot));

-	if (WARN_ON(group_id < 0) ||
-		WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+	if (WARN_ON(group_id < 0) || WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
 		return VM_FAULT_SIGBUS;

 	fault = vmf_insert_pfn_prot(vma, addr, pfn, prot);

 	if (fault == VM_FAULT_NOPAGE)
-		data->groups[group_id].insert_pfn++;
+		atomic_inc(&data->groups[group_id].insert_pfn);
 	else
 		dev_err(data->dev, "vmf_insert_pfn_prot failed\n");

@@ -388,10 +369,10 @@ static int mgm_initialize_data(struct mgm_groups *mgm_data)
 	int i;

 	for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) {
-		mgm_data->groups[i].size = 0;
-		mgm_data->groups[i].lp_size = 0;
-		mgm_data->groups[i].insert_pfn = 0;
-		mgm_data->groups[i].update_gpu_pte = 0;
+		atomic_set(&mgm_data->groups[i].size, 0);
+		atomic_set(&mgm_data->groups[i].lp_size, 0);
+		atomic_set(&mgm_data->groups[i].insert_pfn, 0);
+		atomic_set(&mgm_data->groups[i].update_gpu_pte, 0);
 	}

 	return mgm_initialize_debugfs(mgm_data);
@@ -402,14 +383,12 @@ static void mgm_term_data(struct mgm_groups *data)
 	int i;

 	for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) {
-		if (data->groups[i].size != 0)
-			dev_warn(data->dev,
-				"%zu 0-order pages in group(%d) leaked\n",
-				data->groups[i].size, i);
-		if (data->groups[i].lp_size != 0)
-			dev_warn(data->dev,
-				"%zu 9 order pages in group(%d) leaked\n",
-				data->groups[i].lp_size, i);
+		if (atomic_read(&data->groups[i].size) != 0)
+			dev_warn(data->dev, "%d 0-order pages in group(%d) leaked\n",
+				 atomic_read(&data->groups[i].size), i);
+		if (atomic_read(&data->groups[i].lp_size) != 0)
+			dev_warn(data->dev, "%d 9 order pages in group(%d) leaked\n",
+				 atomic_read(&data->groups[i].lp_size), i);
 	}

 	mgm_term_debugfs(data);
@@ -427,8 +406,7 @@ static int memory_group_manager_probe(struct platform_device *pdev)
 	mgm_dev->owner = THIS_MODULE;
 	mgm_dev->ops.mgm_alloc_page = example_mgm_alloc_page;
 	mgm_dev->ops.mgm_free_page = example_mgm_free_page;
-	mgm_dev->ops.mgm_get_import_memory_id =
-			example_mgm_get_import_memory_id;
+	mgm_dev->ops.mgm_get_import_memory_id = example_mgm_get_import_memory_id;
 	mgm_dev->ops.mgm_vmf_insert_pfn_prot = example_mgm_vmf_insert_pfn_prot;
 	mgm_dev->ops.mgm_update_gpu_pte = example_mgm_update_gpu_pte;
 	mgm_dev->ops.mgm_pte_to_original_pte = example_mgm_pte_to_original_pte;
@@ -456,8 +434,7 @@ static int memory_group_manager_probe(struct platform_device *pdev)

 static int memory_group_manager_remove(struct platform_device *pdev)
 {
-	struct memory_group_manager_device *mgm_dev =
-		platform_get_drvdata(pdev);
+	struct memory_group_manager_device *mgm_dev = platform_get_drvdata(pdev);
 	struct mgm_groups *mgm_data = mgm_dev->data;

 	mgm_term_data(mgm_data);
@@ -476,20 +453,20 @@ static const struct of_device_id memory_group_manager_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, memory_group_manager_dt_ids);

-static struct platform_driver memory_group_manager_driver = {
-	.probe = memory_group_manager_probe,
-	.remove = memory_group_manager_remove,
-	.driver = {
-		.name = "physical-memory-group-manager",
-		.of_match_table = of_match_ptr(memory_group_manager_dt_ids),
-		/*
+static struct platform_driver
+	memory_group_manager_driver = { .probe = memory_group_manager_probe,
+					.remove = memory_group_manager_remove,
+					.driver = {
+						.name = "physical-memory-group-manager",
+						.of_match_table =
+							of_match_ptr(memory_group_manager_dt_ids),
+						/*
 		 * Prevent the mgm_dev from being unbound and freed, as other's
 		 * may have pointers to it and would get confused, or crash, if
 		 * it suddenly disappear.
 		 */
-		.suppress_bind_attrs = true,
-	}
-};
+						.suppress_bind_attrs = true,
+					} };

 module_platform_driver(memory_group_manager_driver);

--- a/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c
+++ b/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -45,7 +45,7 @@
 * @dev:     Device pointer
 * @allocated_pages_bitfield_arr: Status of all the physical memory pages within the
 *                                protected memory region, one bit per page
- * @rmem_base:      Base address of the reserved memory region
+ * @rmem_base:      Base physical address of the reserved memory region
 * @rmem_size:      Size of the reserved memory region, in pages
 * @num_free_pages: Number of free pages in the memory region
 * @rmem_lock:      Lock to serialize the allocation and freeing of
@@ -68,9 +68,9 @@ struct simple_pma_device {
 * PAGES_PER_BITFIELD_ELEM, adds an extra page for the remainder.
 * @num_pages: number of pages
 */
-#define ALLOC_PAGES_BITFIELD_ARR_SIZE(num_pages) \
-	((PAGES_PER_BITFIELD_ELEM * (0 != (num_pages % PAGES_PER_BITFIELD_ELEM)) + \
-	num_pages) / PAGES_PER_BITFIELD_ELEM)
+#define ALLOC_PAGES_BITFIELD_ARR_SIZE(num_pages)                                                \
+	((PAGES_PER_BITFIELD_ELEM * (0 != (num_pages % PAGES_PER_BITFIELD_ELEM)) + num_pages) / \
+	 PAGES_PER_BITFIELD_ELEM)

 /**
 * small_granularity_alloc() - Allocate 1-32 power-of-two pages.
@@ -90,8 +90,7 @@ struct simple_pma_device {
 * It can be thought of as the 'small-granularity' allocator.
 */
 static void small_granularity_alloc(struct simple_pma_device *const epma_dev,
-				    size_t alloc_bitfield_idx, size_t start_bit,
-				    size_t order,
+				    size_t alloc_bitfield_idx, size_t start_bit, size_t order,
 				    struct protected_memory_allocation *pma)
 {
 	size_t i;
@@ -99,28 +98,26 @@ static void small_granularity_alloc(struct simple_pma_device *const epma_dev,
 	u64 *bitfield;
 	size_t alloc_pages_bitfield_size;

-	if (WARN_ON(!epma_dev) ||
-	    WARN_ON(!pma))
+	if (WARN_ON(!epma_dev) || WARN_ON(!pma))
 		return;

 	WARN(epma_dev->rmem_size == 0, "%s: rmem_size is 0", __func__);
 	alloc_pages_bitfield_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size);

-	WARN(alloc_bitfield_idx >= alloc_pages_bitfield_size,
-	     "%s: idx>bf_size: %zu %zu", __func__,
+	WARN(alloc_bitfield_idx >= alloc_pages_bitfield_size, "%s: idx>bf_size: %zu %zu", __func__,
 	     alloc_bitfield_idx, alloc_pages_bitfield_size);

-	WARN((start_bit + (1 << order)) > PAGES_PER_BITFIELD_ELEM,
-	     "%s: start=%zu order=%zu ppbe=%zu",
-	     __func__, start_bit, order, PAGES_PER_BITFIELD_ELEM);
+	WARN((start_bit + (1ULL << order)) > PAGES_PER_BITFIELD_ELEM,
+	     "%s: start=%zu order=%zu ppbe=%zu", __func__, start_bit, order,
+	     PAGES_PER_BITFIELD_ELEM);

 	bitfield = &epma_dev->allocated_pages_bitfield_arr[alloc_bitfield_idx];

-	for (i = 0; i < (1 << order); i++) {
+	for (i = 0; i < (1ULL << order); i++) {
 		/* Check the pages represented by this bit are actually free */
 		WARN(*bitfield & (1ULL << (start_bit + i)),
-		      "in %s: page not free: %zu %zu %.16llx %zu\n",
-		      __func__, i, order, *bitfield, alloc_pages_bitfield_size);
+		     "in %s: page not free: %zu %zu %.16llx %zu\n", __func__, i, order, *bitfield,
+		     alloc_pages_bitfield_size);

 		/* Mark the pages as now allocated */
 		*bitfield |= (1ULL << (start_bit + i));
@@ -152,8 +149,7 @@ static void small_granularity_alloc(struct simple_pma_device *const epma_dev,
 * as the 'large-granularity' allocator.
 */
 static void large_granularity_alloc(struct simple_pma_device *const epma_dev,
-				    size_t start_alloc_bitfield_idx,
-				    size_t order,
+				    size_t start_alloc_bitfield_idx, size_t order,
 				    struct protected_memory_allocation *pma)
 {
 	size_t i;
@@ -161,8 +157,7 @@ static void large_granularity_alloc(struct simple_pma_device *const epma_dev,
 	size_t num_bitfield_elements_needed = num_pages_to_alloc / PAGES_PER_BITFIELD_ELEM;
 	size_t start_page_idx = start_alloc_bitfield_idx * PAGES_PER_BITFIELD_ELEM;

-	if (WARN_ON(!epma_dev) ||
-	    WARN_ON(!pma))
+	if (WARN_ON(!epma_dev) || WARN_ON(!pma))
 		return;

 	/*
@@ -170,29 +165,30 @@ static void large_granularity_alloc(struct simple_pma_device *const epma_dev,
 	 * between the start element and the end of the bitfield array
 	 * to fulfill the request?
 	 */
-	WARN((start_alloc_bitfield_idx + order) >= ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size),
-	     "%s: start=%zu order=%zu ms=%zu",
-	     __func__, start_alloc_bitfield_idx, order, epma_dev->rmem_size);
+	WARN((start_alloc_bitfield_idx + order) >=
+		     ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size),
+	     "%s: start=%zu order=%zu ms=%zu", __func__, start_alloc_bitfield_idx, order,
+	     epma_dev->rmem_size);

 	for (i = 0; i < num_bitfield_elements_needed; i++) {
-		u64 *bitfield = &epma_dev->allocated_pages_bitfield_arr[start_alloc_bitfield_idx + i];
+		u64 *bitfield =
+			&epma_dev->allocated_pages_bitfield_arr[start_alloc_bitfield_idx + i];

 		/* We expect all pages that relate to this bitfield element to be free */
-		WARN((*bitfield != 0),
-		     "in %s: pages not free: i=%zu o=%zu bf=%.16llx\n",
-		     __func__, i, order, *bitfield);
+		WARN((*bitfield != 0), "in %s: pages not free: i=%zu o=%zu bf=%.16llx\n", __func__,
+		     i, order, *bitfield);

 		/* Mark all the pages for this element as not free */
 		*bitfield = ~0ULL;
 	}

 	/* Fill-in the allocation struct for the caller */
-	pma->pa = epma_dev->rmem_base + (start_page_idx  << PAGE_SHIFT);
+	pma->pa = epma_dev->rmem_base + (start_page_idx << PAGE_SHIFT);
 	pma->order = order;
 }

-static struct protected_memory_allocation *simple_pma_alloc_page(
-	struct protected_memory_allocator_device *pma_dev, unsigned int order)
+static struct protected_memory_allocation *
+simple_pma_alloc_page(struct protected_memory_allocator_device *pma_dev, unsigned int order)
 {
 	struct simple_pma_device *const epma_dev =
 		container_of(pma_dev, struct simple_pma_device, pma_dev);
@@ -204,8 +200,7 @@ static struct protected_memory_allocation *simple_pma_alloc_page(
 	size_t bit;
 	size_t count;

-	dev_dbg(epma_dev->dev, "%s(pma_dev=%px, order=%u\n",
-		__func__, (void *)pma_dev, order);
+	dev_dbg(epma_dev->dev, "%s(pma_dev=%px, order=%u\n", __func__, (void *)pma_dev, order);

 	/* This is an example function that follows an extremely simple logic
 	 * and is very likely to fail to allocate memory if put under stress.
@@ -260,22 +255,18 @@ static struct protected_memory_allocation *simple_pma_alloc_page(
 			count = 0;

 			for (bit = 0; bit < PAGES_PER_BITFIELD_ELEM; bit++) {
-				if  (0 == (bitfields[i] & (1ULL << bit))) {
+				if (0 == (bitfields[i] & (1ULL << bit))) {
 					if ((count + 1) >= num_pages_to_alloc) {
 						/*
 						 * We've found enough free, consecutive pages with which to
 						 * make an allocation
 						 */
-						small_granularity_alloc(
-							epma_dev, i,
-							bit - count, order,
-							pma);
+						small_granularity_alloc(epma_dev, i, bit - count,
+									order, pma);

-						epma_dev->num_free_pages -=
-							num_pages_to_alloc;
+						epma_dev->num_free_pages -= num_pages_to_alloc;

-						spin_unlock(
-							&epma_dev->rmem_lock);
+						spin_unlock(&epma_dev->rmem_lock);
 						return pma;
 					}

@@ -307,12 +298,10 @@ static struct protected_memory_allocation *simple_pma_alloc_page(
 			if (bitfields[i] == 0) {
 				count += PAGES_PER_BITFIELD_ELEM;

-				if (count >= (1 << order)) {
+				if (count >= (1ULL << order)) {
 					size_t start_idx = (i + 1) - num_bitfield_elements_needed;

-					large_granularity_alloc(epma_dev,
-								start_idx,
-								order, pma);
+					large_granularity_alloc(epma_dev, start_idx, order, pma);

 					epma_dev->num_free_pages -= 1 << order;
 					spin_unlock(&epma_dev->rmem_lock);
@@ -327,28 +316,26 @@ static struct protected_memory_allocation *simple_pma_alloc_page(
 	spin_unlock(&epma_dev->rmem_lock);
 	devm_kfree(epma_dev->dev, pma);

-	dev_err(epma_dev->dev, "not enough contiguous pages (need %zu), total free pages left %zu\n",
+	dev_err(epma_dev->dev,
+		"not enough contiguous pages (need %zu), total free pages left %zu\n",
 		num_pages_to_alloc, epma_dev->num_free_pages);
 	return NULL;
 }

-static phys_addr_t simple_pma_get_phys_addr(
-	struct protected_memory_allocator_device *pma_dev,
-	struct protected_memory_allocation *pma)
+static phys_addr_t simple_pma_get_phys_addr(struct protected_memory_allocator_device *pma_dev,
+					    struct protected_memory_allocation *pma)
 {
 	struct simple_pma_device *const epma_dev =
 		container_of(pma_dev, struct simple_pma_device, pma_dev);

-	dev_dbg(epma_dev->dev, "%s(pma_dev=%px, pma=%px, pa=%llx\n",
-		__func__, (void *)pma_dev, (void *)pma,
-		(unsigned long long)pma->pa);
+	dev_dbg(epma_dev->dev, "%s(pma_dev=%px, pma=%px, pa=%pK\n", __func__, (void *)pma_dev,
+		(void *)pma, (void *)pma->pa);

 	return pma->pa;
 }

-static void simple_pma_free_page(
-	struct protected_memory_allocator_device *pma_dev,
-	struct protected_memory_allocation *pma)
+static void simple_pma_free_page(struct protected_memory_allocator_device *pma_dev,
+				 struct protected_memory_allocation *pma)
 {
 	struct simple_pma_device *const epma_dev =
 		container_of(pma_dev, struct simple_pma_device, pma_dev);
@@ -364,9 +351,8 @@ static void simple_pma_free_page(

 	WARN_ON(pma == NULL);

-	dev_dbg(epma_dev->dev, "%s(pma_dev=%px, pma=%px, pa=%llx\n",
-		__func__, (void *)pma_dev, (void *)pma,
-		(unsigned long long)pma->pa);
+	dev_dbg(epma_dev->dev, "%s(pma_dev=%px, pma=%px, pa=%pK\n", __func__, (void *)pma_dev,
+		(void *)pma, (void *)pma->pa);

 	WARN_ON(pma->pa < epma_dev->rmem_base);

@@ -402,14 +388,14 @@ static void simple_pma_free_page(
 		*bitfield &= ~(((1ULL << num_pages_in_allocation) - 1) << bitfield_start_bit);
 	} else {
 		WARN(page_num % PAGES_PER_BITFIELD_ELEM,
-		     "%s: Expecting allocs of order >= %d to be %zu-page aligned\n",
-		     __func__, ORDER_OF_PAGES_PER_BITFIELD_ELEM, PAGES_PER_BITFIELD_ELEM);
+		     "%s: Expecting allocs of order >= %d to be %zu-page aligned\n", __func__,
+		     ORDER_OF_PAGES_PER_BITFIELD_ELEM, PAGES_PER_BITFIELD_ELEM);

 		for (i = 0; i < num_bitfield_elems_used_by_alloc; i++) {
 			bitfield = &epma_dev->allocated_pages_bitfield_arr[bitfield_idx + i];

 			/* We expect all bits to be set (all pages allocated) */
-			WARN((*bitfield != ~0),
+			WARN((*bitfield != ~0ULL),
 			     "%s: alloc being freed is not fully allocated: of=%zu np=%zu bf=%.16llx\n",
 			     __func__, offset, num_pages_in_allocation, *bitfield);

@@ -480,8 +466,8 @@ static int protected_memory_allocator_probe(struct platform_device *pdev)

 	alloc_bitmap_pages_arr_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size);

-	epma_dev->allocated_pages_bitfield_arr = devm_kzalloc(&pdev->dev,
-		alloc_bitmap_pages_arr_size * BITFIELD_ELEM_SIZE, GFP_KERNEL);
+	epma_dev->allocated_pages_bitfield_arr = devm_kzalloc(
+		&pdev->dev, alloc_bitmap_pages_arr_size * BITFIELD_ELEM_SIZE, GFP_KERNEL);

 	if (!epma_dev->allocated_pages_bitfield_arr) {
 		dev_err(&pdev->dev, "failed to allocate resources\n");
@@ -491,31 +477,27 @@ static int protected_memory_allocator_probe(struct platform_device *pdev)

 	if (epma_dev->rmem_size % PAGES_PER_BITFIELD_ELEM) {
 		size_t extra_pages =
-			alloc_bitmap_pages_arr_size * PAGES_PER_BITFIELD_ELEM -
-			epma_dev->rmem_size;
+			alloc_bitmap_pages_arr_size * PAGES_PER_BITFIELD_ELEM - epma_dev->rmem_size;
 		size_t last_bitfield_index = alloc_bitmap_pages_arr_size - 1;

 		/* Mark the extra pages (that lie outside the reserved range) as
 		 * always in use.
 		 */
 		epma_dev->allocated_pages_bitfield_arr[last_bitfield_index] =
-			((1ULL << extra_pages) - 1) <<
-			(PAGES_PER_BITFIELD_ELEM - extra_pages);
+			((1ULL << extra_pages) - 1) << (PAGES_PER_BITFIELD_ELEM - extra_pages);
 	}

 	platform_set_drvdata(pdev, &epma_dev->pma_dev);
-	dev_info(&pdev->dev,
-		"Protected memory allocator probed successfully\n");
-	dev_info(&pdev->dev, "Protected memory region: base=%llx num pages=%zu\n",
-		(unsigned long long)rmem_base, rmem_size);
+	dev_info(&pdev->dev, "Protected memory allocator probed successfully\n");
+	dev_info(&pdev->dev, "Protected memory region: base=%pK num pages=%zu\n", (void *)rmem_base,
+		 rmem_size);

 	return 0;
 }

 static int protected_memory_allocator_remove(struct platform_device *pdev)
 {
-	struct protected_memory_allocator_device *pma_dev =
-		platform_get_drvdata(pdev);
+	struct protected_memory_allocator_device *pma_dev = platform_get_drvdata(pdev);
 	struct simple_pma_device *epma_dev;
 	struct device *dev;

@@ -527,15 +509,14 @@ static int protected_memory_allocator_remove(struct platform_device *pdev)

 	if (epma_dev->num_free_pages < epma_dev->rmem_size) {
 		dev_warn(&pdev->dev, "Leaking %zu pages of protected memory\n",
-			epma_dev->rmem_size - epma_dev->num_free_pages);
+			 epma_dev->rmem_size - epma_dev->num_free_pages);
 	}

 	platform_set_drvdata(pdev, NULL);
 	devm_kfree(dev, epma_dev->allocated_pages_bitfield_arr);
 	devm_kfree(dev, epma_dev);

-	dev_info(&pdev->dev,
-		"Protected memory allocator removed successfully\n");
+	dev_info(&pdev->dev, "Protected memory allocator removed successfully\n");

 	return 0;
 }
@@ -546,14 +527,14 @@ static const struct of_device_id protected_memory_allocator_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, protected_memory_allocator_dt_ids);

-static struct platform_driver protected_memory_allocator_driver = {
-	.probe = protected_memory_allocator_probe,
-	.remove = protected_memory_allocator_remove,
-	.driver = {
-		.name = "simple_protected_memory_allocator",
-		.of_match_table = of_match_ptr(protected_memory_allocator_dt_ids),
-	}
-};
+static struct platform_driver
+	protected_memory_allocator_driver = { .probe = protected_memory_allocator_probe,
+					      .remove = protected_memory_allocator_remove,
+					      .driver = {
+						      .name = "simple_protected_memory_allocator",
+						      .of_match_table = of_match_ptr(
+							      protected_memory_allocator_dt_ids),
+					      } };

 module_platform_driver(protected_memory_allocator_driver);

--- a/drivers/gpu/arm/bifrost/Kbuild
+++ b/drivers/gpu/arm/bifrost/Kbuild
@@ -69,7 +69,7 @@ endif
 #

 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= '"g18p0-01eac0"'
+MALI_RELEASE_NAME ?= '"g21p0-01eac0"'
 # Set up defaults if not defined by build system
 ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y)
    MALI_UNIT_TEST = 1
@@ -149,6 +149,7 @@ bifrost_kbase-y := \
    mali_kbase_cache_policy.o \
    mali_kbase_ccswe.o \
    mali_kbase_mem.o \
+    mali_kbase_reg_track.o \
    mali_kbase_mem_migrate.o \
    mali_kbase_mem_pool_group.o \
    mali_kbase_native_mgm.o \
@@ -157,7 +158,6 @@ bifrost_kbase-y := \
    mali_kbase_pm.o \
    mali_kbase_config.o \
    mali_kbase_kinstr_prfcnt.o \
-    mali_kbase_vinstr.o \
    mali_kbase_softjobs.o \
    mali_kbase_hw.o \
    mali_kbase_debug.o \
@@ -173,7 +173,6 @@ bifrost_kbase-y := \
    mali_kbase_mem_pool.o \
    mali_kbase_mem_pool_debugfs.o \
    mali_kbase_debugfs_helper.o \
-    mali_kbase_strings.o \
    mali_kbase_as_fault_debugfs.o \
    mali_kbase_regs_history_debugfs.o \
    mali_kbase_dvfs_debugfs.o \
@@ -190,6 +189,10 @@ bifrost_kbase-$(CONFIG_SYNC_FILE) += \
    mali_kbase_sync_file.o \
    mali_kbase_sync_common.o

+bifrost_kbase-$(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) += \
+        mali_power_gpu_work_period_trace.o \
+        mali_kbase_gpu_metrics.o
+
 ifneq ($(CONFIG_MALI_CSF_SUPPORT),y)
    bifrost_kbase-y += \
        mali_kbase_jm.o \
@@ -217,6 +220,7 @@ INCLUDE_SUBDIR = \
    $(src)/tl/Kbuild \
    $(src)/hwcnt/Kbuild \
    $(src)/gpu/Kbuild \
+    $(src)/hw_access/Kbuild \
    $(src)/thirdparty/Kbuild \
    $(src)/platform/$(MALI_PLATFORM_DIR)/Kbuild

--- a/drivers/gpu/arm/bifrost/Kconfig
+++ b/drivers/gpu/arm/bifrost/Kconfig
@@ -63,11 +63,18 @@ config MALI_BIFROST_NO_MALI
 	  All calls to the simulated hardware will complete immediately as if the hardware
 	  completed the task.

+config MALI_NO_MALI_DEFAULT_GPU
+	string "Default GPU for No Mali"
+	depends on MALI_BIFROST_NO_MALI
+	default "tMIx"
+	help
+	  This option sets the default GPU to identify as for No Mali builds.
+

 endchoice

 menu "Platform specific options"
-source "drivers/gpu/arm/bifrost/platform/Kconfig"
+source "$(MALI_KCONFIG_EXT_PREFIX)drivers/gpu/arm/bifrost/platform/Kconfig"
 endmenu

 config MALI_CSF_SUPPORT
@@ -163,32 +170,36 @@ menuconfig MALI_BIFROST_EXPERT

 if MALI_BIFROST_EXPERT

-config LARGE_PAGE_ALLOC_OVERRIDE
-	bool "Override default setting of 2MB pages"
+config LARGE_PAGE_SUPPORT
+	bool "Support for 2MB page allocations"
 	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
-	default n
+	default y
 	help
-	  An override config for LARGE_PAGE_ALLOC config.
-	  When LARGE_PAGE_ALLOC_OVERRIDE is Y, 2MB page allocation will be
-	  enabled by LARGE_PAGE_ALLOC. When this is N, the feature will be
-	  enabled when GPU HW satisfies requirements.
+	 Rather than allocating all GPU memory page-by-page, allow the system
+	 to decide whether to attempt to allocate 2MB pages from the kernel.
+	 This reduces TLB pressure.

-	  If in doubt, say N
+	 Note that this option only enables the support for the module parameter
+	 and does not necessarily mean that 2MB pages will be used automatically.
+	 This depends on GPU support.

-config LARGE_PAGE_ALLOC
-	bool "Attempt to allocate 2MB pages"
+	 If in doubt, say Y.
+
+config PAGE_MIGRATION_SUPPORT
+	bool "Enable support for page migration"
 	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
-	default n
+	default y
+	default n if ANDROID
 	help
-	  Rather than allocating all GPU memory page-by-page, attempt to
-	  allocate 2MB pages from the kernel. This reduces TLB pressure and
-	  helps to prevent memory fragmentation.
+	  Compile in support for page migration.
+	  If set to disabled ('n') then page migration cannot
+	  be enabled at all, and related symbols are not compiled in.
+	  If not set, page migration is compiled in by default, and
+	  if not explicitly enabled or disabled with the insmod parameter,
+	  page migration becomes automatically enabled with large pages.

-	  Note this config applies only when LARGE_PAGE_ALLOC_OVERRIDE config
-	  is enabled and enabling this on a GPU HW that does not satisfy
-	  requirements can cause serious problem.
-
-	  If in doubt, say N
+	  If in doubt, say Y. To strip out page migration symbols and support,
+	  say N.

 config MALI_MEMORY_FULLY_BACKED
 	bool "Enable memory fully physically-backed"
@@ -383,7 +394,16 @@ config MALI_ARBITRATION
 	  virtualization setup for Mali
 	  If unsure, say N.

+config MALI_TRACE_POWER_GPU_WORK_PERIOD
+	bool "Enable per-application GPU metrics tracepoints"
+	depends on MALI_BIFROST
+	default y
+	help
+	  This option enables per-application GPU metrics tracepoints.

-# source "drivers/gpu/arm/bifrost/tests/Kconfig"
+	  If unsure, say N.
+
+
+# source "$(MALI_KCONFIG_EXT_PREFIX)drivers/gpu/arm/bifrost/tests/Kconfig"

 endif
--- a/drivers/gpu/arm/bifrost/Makefile
+++ b/drivers/gpu/arm/bifrost/Makefile
@@ -20,6 +20,7 @@

 KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build
 KDIR ?= $(KERNEL_SRC)
+M ?= $(shell pwd)

 ifeq ($(KDIR),)
    $(error Must specify KDIR to point to the kernel to target))
@@ -31,158 +32,169 @@ endif
 # Dependency resolution is done through statements as Kconfig
 # is not supported for out-of-tree builds.
 #
+CONFIGS :=
+ifeq ($(MALI_KCONFIG_EXT_PREFIX),)
+    CONFIG_MALI_BIFROST ?= m
+    ifeq ($(CONFIG_MALI_BIFROST),m)
+        CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
+        CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD ?= y
+        CONFIG_MALI_BIFROST_GATOR_SUPPORT ?= y
+        CONFIG_MALI_ARBITRATION ?= n
+        CONFIG_MALI_PARTITION_MANAGER ?= n
+        CONFIG_MALI_64BIT_HW_ACCESS ?= n

-CONFIG_MALI_BIFROST ?= m
-ifeq ($(CONFIG_MALI_BIFROST),m)
-    CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
-    CONFIG_MALI_BIFROST_GATOR_SUPPORT ?= y
-    CONFIG_MALI_ARBITRATION ?= n
-    CONFIG_MALI_PARTITION_MANAGER ?= n
-
-    ifneq ($(CONFIG_MALI_BIFROST_NO_MALI),y)
-        # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI
-        CONFIG_MALI_REAL_HW ?= y
-        CONFIG_MALI_CORESIGHT = n
-    endif
-
-    ifeq ($(CONFIG_MALI_BIFROST_DVFS),y)
-        # Prevent misuse when CONFIG_MALI_BIFROST_DVFS=y
-        CONFIG_MALI_BIFROST_DEVFREQ ?= n
-    else
-        CONFIG_MALI_BIFROST_DEVFREQ ?= y
-    endif
-
-    ifeq ($(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND), y)
-        # Prevent misuse when CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y
-        CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n
-    endif
-
-    ifeq ($(CONFIG_MALI_CSF_SUPPORT), y)
-        CONFIG_MALI_CORESIGHT ?= n
-    endif
-
-    #
-    # Expert/Debug/Test released configurations
-    #
-    ifeq ($(CONFIG_MALI_BIFROST_EXPERT), y)
-        ifeq ($(CONFIG_MALI_BIFROST_NO_MALI), y)
-            CONFIG_MALI_REAL_HW = n
+        ifneq ($(CONFIG_MALI_BIFROST_NO_MALI),y)
+            # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI=y
+            CONFIG_MALI_REAL_HW ?= y
+            CONFIG_MALI_CORESIGHT = n
+        endif

+        ifeq ($(CONFIG_MALI_BIFROST_DVFS),y)
+            # Prevent misuse when CONFIG_MALI_BIFROST_DVFS=y
+            CONFIG_MALI_BIFROST_DEVFREQ ?= n
        else
-            # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI=n
-            CONFIG_MALI_REAL_HW = y
-            CONFIG_MALI_BIFROST_ERROR_INJECT = n
+            CONFIG_MALI_BIFROST_DEVFREQ ?= y
        endif

-
-        ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y)
-            # Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y
-            CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n
+        ifeq ($(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND), y)
+            # Prevent misuse when CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y
+            CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n
        endif

-        ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y)
-            CONFIG_MALI_BIFROST_ENABLE_TRACE ?= y
-            CONFIG_MALI_BIFROST_SYSTEM_TRACE ?= y
+        ifeq ($(CONFIG_MALI_CSF_SUPPORT), y)
+            CONFIG_MALI_CORESIGHT ?= n
+        endif
+
+        #
+        # Expert/Debug/Test released configurations
+        #
+        ifeq ($(CONFIG_MALI_BIFROST_EXPERT), y)
+            ifeq ($(CONFIG_MALI_BIFROST_NO_MALI), y)
+                CONFIG_MALI_REAL_HW = n
+                CONFIG_MALI_NO_MALI_DEFAULT_GPU ?= "tMIx"

-            ifeq ($(CONFIG_SYNC_FILE), y)
-                CONFIG_MALI_BIFROST_FENCE_DEBUG ?= y
            else
+                # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI=n
+                CONFIG_MALI_REAL_HW = y
+                CONFIG_MALI_BIFROST_ERROR_INJECT = n
+            endif
+
+
+            ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y)
+                # Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y
+                CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n
+            endif
+
+            ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y)
+                CONFIG_MALI_BIFROST_ENABLE_TRACE ?= y
+                CONFIG_MALI_BIFROST_SYSTEM_TRACE ?= y
+
+                ifeq ($(CONFIG_SYNC_FILE), y)
+                    CONFIG_MALI_BIFROST_FENCE_DEBUG ?= y
+                else
+                    CONFIG_MALI_BIFROST_FENCE_DEBUG = n
+                endif
+            else
+                # Prevent misuse when CONFIG_MALI_BIFROST_DEBUG=n
+                CONFIG_MALI_BIFROST_ENABLE_TRACE = n
+                CONFIG_MALI_BIFROST_SYSTEM_TRACE = n
                CONFIG_MALI_BIFROST_FENCE_DEBUG = n
            endif
        else
-            # Prevent misuse when CONFIG_MALI_BIFROST_DEBUG=n
+            # Prevent misuse when CONFIG_MALI_BIFROST_EXPERT=n
+            CONFIG_MALI_CORESTACK = n
+            CONFIG_LARGE_PAGE_SUPPORT = y
+            CONFIG_MALI_PWRSOFT_765 = n
+            CONFIG_MALI_MEMORY_FULLY_BACKED = n
+            CONFIG_MALI_JOB_DUMP = n
+            CONFIG_MALI_BIFROST_NO_MALI = n
+            CONFIG_MALI_REAL_HW = y
+            CONFIG_MALI_BIFROST_ERROR_INJECT = n
+            CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n
+            CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n
+            CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n
+            CONFIG_MALI_BIFROST_DEBUG = n
            CONFIG_MALI_BIFROST_ENABLE_TRACE = n
            CONFIG_MALI_BIFROST_SYSTEM_TRACE = n
            CONFIG_MALI_BIFROST_FENCE_DEBUG = n
        endif
-    else
-        # Prevent misuse when CONFIG_MALI_BIFROST_EXPERT=n
-        CONFIG_MALI_CORESTACK = n
-        CONFIG_LARGE_PAGE_ALLOC_OVERRIDE = n
-        CONFIG_LARGE_PAGE_ALLOC = n
-        CONFIG_MALI_PWRSOFT_765 = n
-        CONFIG_MALI_MEMORY_FULLY_BACKED = n
-        CONFIG_MALI_JOB_DUMP = n
-        CONFIG_MALI_BIFROST_NO_MALI = n
-        CONFIG_MALI_REAL_HW = y
-        CONFIG_MALI_BIFROST_ERROR_INJECT = n
-        CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n
-        CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n
-        CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n
-        CONFIG_MALI_BIFROST_DEBUG = n
-        CONFIG_MALI_BIFROST_ENABLE_TRACE = n
-        CONFIG_MALI_BIFROST_SYSTEM_TRACE = n
-        CONFIG_MALI_BIFROST_FENCE_DEBUG = n
-    endif

-    ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y)
-        CONFIG_MALI_KUTF ?= y
-        ifeq ($(CONFIG_MALI_KUTF), y)
-            CONFIG_MALI_KUTF_IRQ_TEST ?= y
-            CONFIG_MALI_KUTF_CLK_RATE_TRACE ?= y
-            CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST ?= y
+        ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y)
+            CONFIG_MALI_KUTF ?= y
+            ifeq ($(CONFIG_MALI_KUTF), y)
+                CONFIG_MALI_KUTF_IRQ_TEST ?= y
+                CONFIG_MALI_KUTF_CLK_RATE_TRACE ?= y
+                CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST ?= y
+                ifeq ($(CONFIG_MALI_BIFROST_DEVFREQ), y)
+                    ifeq ($(CONFIG_MALI_BIFROST_NO_MALI), y)
+                        CONFIG_MALI_KUTF_IPA_UNIT_TEST ?= y
+                    endif
+                endif
+
+            else
+                # Prevent misuse when CONFIG_MALI_KUTF=n
+                CONFIG_MALI_KUTF_IRQ_TEST = n
+                CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
+                CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
+            endif
        else
-            # Prevent misuse when CONFIG_MALI_KUTF=n
+            # Prevent misuse when CONFIG_MALI_BIFROST_DEBUG=n
+            CONFIG_MALI_KUTF = n
            CONFIG_MALI_KUTF_IRQ_TEST = n
            CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
            CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
        endif
    else
-        # Prevent misuse when CONFIG_MALI_BIFROST_DEBUG=n
+        # Prevent misuse when CONFIG_MALI_BIFROST=n
+        CONFIG_MALI_ARBITRATION = n
        CONFIG_MALI_KUTF = n
        CONFIG_MALI_KUTF_IRQ_TEST = n
        CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
        CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
    endif
-else
-    # Prevent misuse when CONFIG_MALI_BIFROST=n
-    CONFIG_MALI_ARBITRATION = n
-    CONFIG_MALI_KUTF = n
-    CONFIG_MALI_KUTF_IRQ_TEST = n
-    CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
-    CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
+
+    # All Mali CONFIG should be listed here
+    CONFIGS += \
+        CONFIG_MALI_BIFROST \
+        CONFIG_MALI_CSF_SUPPORT \
+        CONFIG_MALI_BIFROST_GATOR_SUPPORT \
+        CONFIG_MALI_ARBITER_SUPPORT \
+        CONFIG_MALI_ARBITRATION \
+        CONFIG_MALI_PARTITION_MANAGER \
+        CONFIG_MALI_REAL_HW \
+        CONFIG_MALI_BIFROST_DEVFREQ \
+        CONFIG_MALI_BIFROST_DVFS \
+        CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \
+        CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \
+        CONFIG_MALI_BIFROST_EXPERT \
+        CONFIG_MALI_CORESTACK \
+        CONFIG_LARGE_PAGE_SUPPORT \
+        CONFIG_MALI_PWRSOFT_765 \
+        CONFIG_MALI_MEMORY_FULLY_BACKED \
+        CONFIG_MALI_JOB_DUMP \
+        CONFIG_MALI_BIFROST_NO_MALI \
+        CONFIG_MALI_BIFROST_ERROR_INJECT \
+        CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \
+        CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \
+        CONFIG_MALI_PRFCNT_SET_PRIMARY \
+        CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY \
+        CONFIG_MALI_PRFCNT_SET_TERTIARY \
+        CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS \
+        CONFIG_MALI_BIFROST_DEBUG \
+        CONFIG_MALI_BIFROST_ENABLE_TRACE \
+        CONFIG_MALI_BIFROST_SYSTEM_TRACE \
+        CONFIG_MALI_BIFROST_FENCE_DEBUG \
+        CONFIG_MALI_KUTF \
+        CONFIG_MALI_KUTF_IRQ_TEST \
+        CONFIG_MALI_KUTF_CLK_RATE_TRACE \
+        CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \
+        CONFIG_MALI_XEN \
+        CONFIG_MALI_CORESIGHT \
+        CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD
+
+
 endif

-# All Mali CONFIG should be listed here
-CONFIGS := \
-    CONFIG_MALI_BIFROST \
-    CONFIG_MALI_CSF_SUPPORT \
-    CONFIG_MALI_BIFROST_GATOR_SUPPORT \
-    CONFIG_MALI_ARBITER_SUPPORT \
-    CONFIG_MALI_ARBITRATION \
-    CONFIG_MALI_PARTITION_MANAGER \
-    CONFIG_MALI_REAL_HW \
-    CONFIG_MALI_BIFROST_DEVFREQ \
-    CONFIG_MALI_BIFROST_DVFS \
-    CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \
-    CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \
-    CONFIG_MALI_BIFROST_EXPERT \
-    CONFIG_MALI_CORESTACK \
-    CONFIG_LARGE_PAGE_ALLOC_OVERRIDE \
-    CONFIG_LARGE_PAGE_ALLOC \
-    CONFIG_MALI_PWRSOFT_765 \
-    CONFIG_MALI_MEMORY_FULLY_BACKED \
-    CONFIG_MALI_JOB_DUMP \
-    CONFIG_MALI_BIFROST_NO_MALI \
-    CONFIG_MALI_BIFROST_ERROR_INJECT \
-    CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \
-    CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \
-    CONFIG_MALI_PRFCNT_SET_PRIMARY \
-    CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY \
-    CONFIG_MALI_PRFCNT_SET_TERTIARY \
-    CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS \
-    CONFIG_MALI_BIFROST_DEBUG \
-    CONFIG_MALI_BIFROST_ENABLE_TRACE \
-    CONFIG_MALI_BIFROST_SYSTEM_TRACE \
-    CONFIG_MALI_BIFROST_FENCE_DEBUG \
-    CONFIG_MALI_KUTF \
-    CONFIG_MALI_KUTF_IRQ_TEST \
-    CONFIG_MALI_KUTF_CLK_RATE_TRACE \
-    CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \
-    CONFIG_MALI_XEN \
-    CONFIG_MALI_CORESIGHT
-
-
 THIS_DIR := $(dir $(lastword $(MAKEFILE_LIST)))
 -include $(THIS_DIR)/../arbitration/Makefile

@@ -197,7 +209,9 @@ MAKE_ARGS := $(foreach config,$(CONFIGS), \
                        $(value config)=$(value $(value config)), \
                        $(value config)=n))

-MAKE_ARGS += CONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME)
+ifeq ($(MALI_KCONFIG_EXT_PREFIX),)
+    MAKE_ARGS += CONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME)
+endif

 #
 # EXTRA_CFLAGS to define the custom CONFIGs on out-of-tree build
@@ -209,71 +223,78 @@ EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \
                    $(if $(filter y m,$(value $(value config))), \
                        -D$(value config)=1))

-EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME)
+ifeq ($(MALI_KCONFIG_EXT_PREFIX),)
+    EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME='\"$(CONFIG_MALI_PLATFORM_NAME)\"'
+    EXTRA_CFLAGS += -DCONFIG_MALI_NO_MALI_DEFAULT_GPU='\"$(CONFIG_MALI_NO_MALI_DEFAULT_GPU)\"'
+endif

 #
 # KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
 #
+BASE_SYMBOLS = $(M)/../../base/arm/Module.symvers

-KBUILD_CFLAGS += -Wall -Werror
+EXTRA_SYMBOLS += \
+    $(BASE_SYMBOLS)
+
+CFLAGS_MODULE += -Wall -Werror

 # The following were added to align with W=1 in scripts/Makefile.extrawarn
 # from the Linux source tree (v5.18.14)
-KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter
-KBUILD_CFLAGS += -Wmissing-declarations
-KBUILD_CFLAGS += -Wmissing-format-attribute
-KBUILD_CFLAGS += -Wmissing-prototypes
-KBUILD_CFLAGS += -Wold-style-definition
+CFLAGS_MODULE += -Wextra -Wunused -Wno-unused-parameter
+CFLAGS_MODULE += -Wmissing-declarations
+CFLAGS_MODULE += -Wmissing-format-attribute
+CFLAGS_MODULE += -Wmissing-prototypes
+CFLAGS_MODULE += -Wold-style-definition
 # The -Wmissing-include-dirs cannot be enabled as the path to some of the
 # included directories change depending on whether it is an in-tree or
 # out-of-tree build.
-KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable)
-KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable)
-KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned)
-KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation)
+CFLAGS_MODULE += $(call cc-option, -Wunused-but-set-variable)
+CFLAGS_MODULE += $(call cc-option, -Wunused-const-variable)
+CFLAGS_MODULE += $(call cc-option, -Wpacked-not-aligned)
+CFLAGS_MODULE += $(call cc-option, -Wstringop-truncation)
 # The following turn off the warnings enabled by -Wextra
-KBUILD_CFLAGS += -Wno-sign-compare
-KBUILD_CFLAGS += -Wno-shift-negative-value
+CFLAGS_MODULE += -Wno-sign-compare
+CFLAGS_MODULE += -Wno-shift-negative-value
 # This flag is needed to avoid build errors on older kernels
-KBUILD_CFLAGS += $(call cc-option, -Wno-cast-function-type)
+CFLAGS_MODULE += $(call cc-option, -Wno-cast-function-type)

 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1

 # The following were added to align with W=2 in scripts/Makefile.extrawarn
 # from the Linux source tree (v5.18.14)
-KBUILD_CFLAGS += -Wdisabled-optimization
+CFLAGS_MODULE += -Wdisabled-optimization
 # The -Wshadow flag cannot be enabled unless upstream kernels are
 # patched to fix redefinitions of certain built-in functions and
 # global variables.
-KBUILD_CFLAGS += $(call cc-option, -Wlogical-op)
-KBUILD_CFLAGS += -Wmissing-field-initializers
+CFLAGS_MODULE += $(call cc-option, -Wlogical-op)
+CFLAGS_MODULE += -Wmissing-field-initializers
 # -Wtype-limits must be disabled due to build failures on kernel 5.x
-KBUILD_CFLAGS += -Wno-type-limit
-KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
-KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
+CFLAGS_MODULE += -Wno-type-limits
+CFLAGS_MODULE += $(call cc-option, -Wmaybe-uninitialized)
+CFLAGS_MODULE += $(call cc-option, -Wunused-macros)

 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2

 # This warning is disabled to avoid build failures in some kernel versions
-KBUILD_CFLAGS += -Wno-ignored-qualifiers
+CFLAGS_MODULE += -Wno-ignored-qualifiers

 ifeq ($(CONFIG_GCOV_KERNEL),y)
-    KBUILD_CFLAGS += $(call cc-option, -ftest-coverage)
-    KBUILD_CFLAGS += $(call cc-option, -fprofile-arcs)
+    CFLAGS_MODULE += $(call cc-option, -ftest-coverage)
+    CFLAGS_MODULE += $(call cc-option, -fprofile-arcs)
    EXTRA_CFLAGS += -DGCOV_PROFILE=1
 endif

 ifeq ($(CONFIG_MALI_KCOV),y)
-    KBUILD_CFLAGS += $(call cc-option, -fsanitize-coverage=trace-cmp)
+    CFLAGS_MODULE += $(call cc-option, -fsanitize-coverage=trace-cmp)
    EXTRA_CFLAGS += -DKCOV=1
    EXTRA_CFLAGS += -DKCOV_ENABLE_COMPARISONS=1
 endif

 all:
-	$(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+	$(MAKE) -C $(KDIR) M=$(M) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules

 modules_install:
-	$(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) modules_install
+	$(MAKE) -C $(KDIR) M=$(M) $(MAKE_ARGS) modules_install

 clean:
-	$(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) clean
+	$(MAKE) -C $(KDIR) M=$(M) $(MAKE_ARGS) clean
--- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c
+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -32,13 +32,11 @@

 /* Arbiter interface version against which was implemented this module */
 #define MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION 5
-#if MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION != \
-			MALI_ARBITER_INTERFACE_VERSION
+#if MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION != MALI_ARBITER_INTERFACE_VERSION
 #error "Unsupported Mali Arbiter interface version."
 #endif

-static void on_max_config(struct device *dev, uint32_t max_l2_slices,
-			  uint32_t max_core_mask)
+static void on_max_config(struct device *dev, uint32_t max_l2_slices, uint32_t max_core_mask)
 {
 	struct kbase_device *kbdev;

@@ -54,9 +52,7 @@ static void on_max_config(struct device *dev, uint32_t max_l2_slices,
 	}

 	if (!max_l2_slices || !max_core_mask) {
-		dev_dbg(dev,
-			"%s(): max_config ignored as one of the fields is zero",
-			__func__);
+		dev_dbg(dev, "%s(): max_config ignored as one of the fields is zero", __func__);
 		return;
 	}

@@ -187,8 +183,7 @@ int kbase_arbif_init(struct kbase_device *kbdev)

 	dev_dbg(kbdev->dev, "%s\n", __func__);

-	arbiter_if_node = of_parse_phandle(kbdev->dev->of_node,
-		"arbiter_if", 0);
+	arbiter_if_node = of_parse_phandle(kbdev->dev->of_node, "arbiter_if", 0);
 	if (!arbiter_if_node) {
 		dev_dbg(kbdev->dev, "No arbiter_if in Device Tree\n");
 		/* no arbiter interface defined in device tree */
@@ -230,10 +225,9 @@ int kbase_arbif_init(struct kbase_device *kbdev)

 	/* register kbase arbiter_if callbacks */
 	if (arb_if->vm_ops.vm_arb_register_dev) {
-		err = arb_if->vm_ops.vm_arb_register_dev(arb_if,
-			kbdev->dev, &ops);
+		err = arb_if->vm_ops.vm_arb_register_dev(arb_if, kbdev->dev, &ops);
 		if (err) {
-			dev_err(&pdev->dev, "Failed to register with arbiter\n");
+			dev_err(&pdev->dev, "Failed to register with arbiter. (err = %d)\n", err);
 			module_put(pdev->dev.driver->owner);
 			put_device(&pdev->dev);
 			if (err != -EPROBE_DEFER)
--- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c
+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -36,19 +36,19 @@
 #define GPU_REQUEST_TIMEOUT 1000
 #define KHZ_TO_HZ 1000

-#define MAX_L2_SLICES_MASK		0xFF
+#define MAX_L2_SLICES_MASK 0xFF

 /* Maximum time in ms, before deferring probe incase
 * GPU_GRANTED message is not received
 */
 static int gpu_req_timeout = 1;
 module_param(gpu_req_timeout, int, 0644);
-MODULE_PARM_DESC(gpu_req_timeout,
+MODULE_PARM_DESC(
+	gpu_req_timeout,
 	"On a virtualized platform, if the GPU is not granted within this time(ms) kbase will defer the probe");

 static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev);
-static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(
-	struct kbase_device *kbdev);
+static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(struct kbase_device *kbdev);

 /**
 * kbase_arbiter_pm_vm_state_str() - Helper function to get string
@@ -57,8 +57,7 @@ static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(
 *
 * Return: string representation of Kbase_vm_state
 */
-static inline const char *kbase_arbiter_pm_vm_state_str(
-	enum kbase_vm_state state)
+static inline const char *kbase_arbiter_pm_vm_state_str(enum kbase_vm_state state)
 {
 	switch (state) {
 	case KBASE_VM_STATE_INITIALIZING:
@@ -98,8 +97,7 @@ static inline const char *kbase_arbiter_pm_vm_state_str(
 *
 * Return: String representation of Kbase_arbif_event
 */
-static inline const char *kbase_arbiter_pm_vm_event_str(
-	enum kbase_arbif_evt evt)
+static inline const char *kbase_arbiter_pm_vm_event_str(enum kbase_arbif_evt evt)
 {
 	switch (evt) {
 	case KBASE_VM_GPU_INITIALIZED_EVT:
@@ -131,19 +129,18 @@ static inline const char *kbase_arbiter_pm_vm_event_str(
 *
 * This function sets the new state for the VM
 */
-static void kbase_arbiter_pm_vm_set_state(struct kbase_device *kbdev,
-	enum kbase_vm_state new_state)
+static void kbase_arbiter_pm_vm_set_state(struct kbase_device *kbdev, enum kbase_vm_state new_state)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;

 	dev_dbg(kbdev->dev, "VM set_state %s -> %s",
-	kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state),
-	kbase_arbiter_pm_vm_state_str(new_state));
+		kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state),
+		kbase_arbiter_pm_vm_state_str(new_state));

 	lockdep_assert_held(&arb_vm_state->vm_state_lock);
 	arb_vm_state->vm_state = new_state;
 	if (new_state != KBASE_VM_STATE_INITIALIZING_WITH_GPU &&
-		new_state != KBASE_VM_STATE_INITIALIZING)
+	    new_state != KBASE_VM_STATE_INITIALIZING)
 		KBASE_KTRACE_ADD(kbdev, ARB_VM_STATE, NULL, new_state);
 	wake_up(&arb_vm_state->vm_state_wait);
 }
@@ -157,21 +154,18 @@ static void kbase_arbiter_pm_vm_set_state(struct kbase_device *kbdev,
 */
 static void kbase_arbiter_pm_suspend_wq(struct work_struct *data)
 {
-	struct kbase_arbiter_vm_state *arb_vm_state = container_of(data,
-				struct kbase_arbiter_vm_state,
-				vm_suspend_work);
+	struct kbase_arbiter_vm_state *arb_vm_state =
+		container_of(data, struct kbase_arbiter_vm_state, vm_suspend_work);
 	struct kbase_device *kbdev = arb_vm_state->kbdev;

 	mutex_lock(&arb_vm_state->vm_state_lock);
 	dev_dbg(kbdev->dev, ">%s\n", __func__);
 	if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE ||
-			arb_vm_state->vm_state ==
-					KBASE_VM_STATE_STOPPING_ACTIVE ||
-			arb_vm_state->vm_state ==
-					KBASE_VM_STATE_SUSPEND_PENDING) {
+	    arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_ACTIVE ||
+	    arb_vm_state->vm_state == KBASE_VM_STATE_SUSPEND_PENDING) {
 		mutex_unlock(&arb_vm_state->vm_state_lock);
 		dev_dbg(kbdev->dev, ">kbase_pm_driver_suspend\n");
-		kbase_pm_driver_suspend(kbdev);
+		WARN_ON_ONCE(kbase_pm_driver_suspend(kbdev));
 		dev_dbg(kbdev->dev, "<kbase_pm_driver_suspend\n");
 		mutex_lock(&arb_vm_state->vm_state_lock);
 	}
@@ -188,9 +182,8 @@ static void kbase_arbiter_pm_suspend_wq(struct work_struct *data)
 */
 static void kbase_arbiter_pm_resume_wq(struct work_struct *data)
 {
-	struct kbase_arbiter_vm_state *arb_vm_state = container_of(data,
-				struct kbase_arbiter_vm_state,
-				vm_resume_work);
+	struct kbase_arbiter_vm_state *arb_vm_state =
+		container_of(data, struct kbase_arbiter_vm_state, vm_resume_work);
 	struct kbase_device *kbdev = arb_vm_state->kbdev;

 	mutex_lock(&arb_vm_state->vm_state_lock);
@@ -222,15 +215,15 @@ static void kbase_arbiter_pm_resume_wq(struct work_struct *data)
 */
 static enum hrtimer_restart request_timer_callback(struct hrtimer *timer)
 {
-	struct kbase_arbiter_vm_state *arb_vm_state = container_of(timer,
-			struct kbase_arbiter_vm_state, vm_request_timer);
+	struct kbase_arbiter_vm_state *arb_vm_state =
+		container_of(timer, struct kbase_arbiter_vm_state, vm_request_timer);

 	KBASE_DEBUG_ASSERT(arb_vm_state);
 	KBASE_DEBUG_ASSERT(arb_vm_state->kbdev);

 	dev_warn(arb_vm_state->kbdev->dev,
-		"Still waiting for GPU to be granted from Arbiter after %d ms\n",
-		GPU_REQUEST_TIMEOUT);
+		 "Still waiting for GPU to be granted from Arbiter after %d ms\n",
+		 GPU_REQUEST_TIMEOUT);
 	return HRTIMER_NORESTART;
 }

@@ -246,9 +239,8 @@ static void start_request_timer(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;

-	hrtimer_start(&arb_vm_state->vm_request_timer,
-			HR_TIMER_DELAY_MSEC(GPU_REQUEST_TIMEOUT),
-			HRTIMER_MODE_REL);
+	hrtimer_start(&arb_vm_state->vm_request_timer, HR_TIMER_DELAY_MSEC(GPU_REQUEST_TIMEOUT),
+		      HRTIMER_MODE_REL);
 }

 /**
@@ -280,8 +272,7 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev)
 	int err;
 	struct kbase_arbiter_vm_state *arb_vm_state = NULL;

-	arb_vm_state = kmalloc(sizeof(struct kbase_arbiter_vm_state),
-				GFP_KERNEL);
+	arb_vm_state = kmalloc(sizeof(struct kbase_arbiter_vm_state), GFP_KERNEL);
 	if (arb_vm_state == NULL)
 		return -ENOMEM;

@@ -290,8 +281,7 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev)

 	mutex_init(&arb_vm_state->vm_state_lock);
 	init_waitqueue_head(&arb_vm_state->vm_state_wait);
-	arb_vm_state->vm_arb_wq = alloc_ordered_workqueue("kbase_vm_arb_wq",
-		WQ_HIGHPRI);
+	arb_vm_state->vm_arb_wq = alloc_ordered_workqueue("kbase_vm_arb_wq", WQ_HIGHPRI);
 	if (!arb_vm_state->vm_arb_wq) {
 		dev_err(kbdev->dev, "Failed to allocate vm_arb workqueue\n");
 		kfree(arb_vm_state);
@@ -301,15 +291,13 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev)
 	INIT_WORK(&arb_vm_state->vm_resume_work, kbase_arbiter_pm_resume_wq);
 	arb_vm_state->vm_arb_starting = false;
 	atomic_set(&kbdev->pm.gpu_users_waiting, 0);
-	hrtimer_init(&arb_vm_state->vm_request_timer, CLOCK_MONOTONIC,
-							HRTIMER_MODE_REL);
-	arb_vm_state->vm_request_timer.function =
-						request_timer_callback;
+	hrtimer_init(&arb_vm_state->vm_request_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	arb_vm_state->vm_request_timer.function = request_timer_callback;
 	kbdev->pm.arb_vm_state = arb_vm_state;

 	err = kbase_arbif_init(kbdev);
 	if (err) {
-		dev_err(kbdev->dev, "Failed to initialise arbif module\n");
+		dev_err(kbdev->dev, "Failed to initialise arbif module. (err = %d)\n", err);
 		goto arbif_init_fail;
 	}

@@ -318,21 +306,20 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev)
 		dev_dbg(kbdev->dev, "Waiting for initial GPU assignment...\n");

 		err = wait_event_timeout(arb_vm_state->vm_state_wait,
-			arb_vm_state->vm_state ==
-					KBASE_VM_STATE_INITIALIZING_WITH_GPU,
-			msecs_to_jiffies(gpu_req_timeout));
+					 arb_vm_state->vm_state ==
+						 KBASE_VM_STATE_INITIALIZING_WITH_GPU,
+					 msecs_to_jiffies(gpu_req_timeout));

 		if (!err) {
 			dev_dbg(kbdev->dev,
-			"Kbase probe Deferred after waiting %d ms to receive GPU_GRANT\n",
-			gpu_req_timeout);
+				"Kbase probe Deferred after waiting %d ms to receive GPU_GRANT\n",
+				gpu_req_timeout);

 			err = -ENODEV;
 			goto arbif_timeout;
 		}

-		dev_dbg(kbdev->dev,
-			"Waiting for initial GPU assignment - done\n");
+		dev_dbg(kbdev->dev, "Waiting for initial GPU assignment - done\n");
 	}
 	return 0;

@@ -423,9 +410,8 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev)
 	lockdep_assert_held(&arb_vm_state->vm_state_lock);

 	if (atomic_read(&kbdev->pm.gpu_users_waiting) > 0 &&
-			arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE)
-		kbase_arbiter_pm_vm_set_state(kbdev,
-			 KBASE_VM_STATE_STOPPING_ACTIVE);
+	    arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE)
+		kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPING_ACTIVE);

 	dev_dbg(kbdev->dev, "%s %s\n", __func__,
 		kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state));
@@ -438,8 +424,7 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev)
 	switch (arb_vm_state->vm_state) {
 	case KBASE_VM_STATE_STOPPING_ACTIVE:
 		request_gpu = true;
-		kbase_arbiter_pm_vm_set_state(kbdev,
-			KBASE_VM_STATE_STOPPED_GPU_REQUESTED);
+		kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPED_GPU_REQUESTED);
 		break;
 	case KBASE_VM_STATE_STOPPING_IDLE:
 		kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPED);
@@ -448,8 +433,7 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev)
 		kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED);
 		break;
 	default:
-		dev_warn(kbdev->dev, "unexpected pm_stop VM state %u",
-			arb_vm_state->vm_state);
+		dev_warn(kbdev->dev, "unexpected pm_stop VM state %u", arb_vm_state->vm_state);
 		break;
 	}

@@ -459,8 +443,7 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev)
 		start_request_timer(kbdev);
 }

-void kbase_arbiter_set_max_config(struct kbase_device *kbdev,
-				  uint32_t max_l2_slices,
+void kbase_arbiter_set_max_config(struct kbase_device *kbdev, uint32_t max_l2_slices,
 				  uint32_t max_core_mask)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state;
@@ -544,8 +527,7 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev)
 	cancel_request_timer(kbdev);
 	switch (arb_vm_state->vm_state) {
 	case KBASE_VM_STATE_INITIALIZING:
-		kbase_arbiter_pm_vm_set_state(kbdev,
-			KBASE_VM_STATE_INITIALIZING_WITH_GPU);
+		kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_INITIALIZING_WITH_GPU);
 		break;
 	case KBASE_VM_STATE_STOPPED_GPU_REQUESTED:
 		kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STARTING);
@@ -557,8 +539,7 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev)
 		 */
 		kbase_gpuprops_req_curr_config_update(kbdev);
 		/* curr_config will be updated while resuming the PM. */
-		queue_work(arb_vm_state->vm_arb_wq,
-			&arb_vm_state->vm_resume_work);
+		queue_work(arb_vm_state->vm_arb_wq, &arb_vm_state->vm_resume_work);
 		break;
 	case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT:
 		kbase_pm_set_gpu_lost(kbdev, false);
@@ -572,10 +553,8 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev)
 		 * without a frequency update
 		 */
 		if (!freq_updated)
-			dev_warn(kbdev->dev,
-				"GPU_GRANTED when not expected - state %s\n",
-				kbase_arbiter_pm_vm_state_str(
-					arb_vm_state->vm_state));
+			dev_warn(kbdev->dev, "GPU_GRANTED when not expected - state %s\n",
+				 kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state));
 		break;
 	}
 }
@@ -599,31 +578,25 @@ static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev)

 	switch (arb_vm_state->vm_state) {
 	case KBASE_VM_STATE_IDLE:
-		kbase_arbiter_pm_vm_set_state(kbdev,
-			KBASE_VM_STATE_STOPPING_IDLE);
-		queue_work(arb_vm_state->vm_arb_wq,
-			&arb_vm_state->vm_suspend_work);
+		kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPING_IDLE);
+		queue_work(arb_vm_state->vm_arb_wq, &arb_vm_state->vm_suspend_work);
 		break;
 	case KBASE_VM_STATE_ACTIVE:
-		kbase_arbiter_pm_vm_set_state(kbdev,
-			KBASE_VM_STATE_STOPPING_ACTIVE);
-		queue_work(arb_vm_state->vm_arb_wq,
-			&arb_vm_state->vm_suspend_work);
+		kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPING_ACTIVE);
+		queue_work(arb_vm_state->vm_arb_wq, &arb_vm_state->vm_suspend_work);
 		break;
 	case KBASE_VM_STATE_STARTING:
 		dev_dbg(kbdev->dev, "Got GPU_STOP event while STARTING.");
-		kbase_arbiter_pm_vm_set_state(kbdev,
-			KBASE_VM_STATE_STOPPING_ACTIVE);
+		kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPING_ACTIVE);
 		if (arb_vm_state->vm_arb_starting)
-			queue_work(arb_vm_state->vm_arb_wq,
-				&arb_vm_state->vm_suspend_work);
+			queue_work(arb_vm_state->vm_arb_wq, &arb_vm_state->vm_suspend_work);
 		break;
 	case KBASE_VM_STATE_SUSPEND_PENDING:
 		/* Suspend finishes with a stop so nothing else to do */
 		break;
 	default:
 		dev_warn(kbdev->dev, "GPU_STOP when not expected - state %s\n",
-			kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state));
+			 kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state));
 		break;
 	}
 }
@@ -646,7 +619,7 @@ static void kbase_gpu_lost(struct kbase_device *kbdev)
 	case KBASE_VM_STATE_ACTIVE:
 	case KBASE_VM_STATE_IDLE:
 		dev_warn(kbdev->dev, "GPU lost in state %s",
-		kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state));
+			 kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state));
 		kbase_arbiter_pm_vm_gpu_stop(kbdev);
 		handle_gpu_lost = true;
 		break;
@@ -689,8 +662,7 @@ static void kbase_gpu_lost(struct kbase_device *kbdev)
 *
 * Return: True if its ready to be suspended else False.
 */
-static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state(
-	struct kbase_device *kbdev)
+static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state(struct kbase_device *kbdev)
 {
 	switch (kbdev->pm.arb_vm_state->vm_state) {
 	case KBASE_VM_STATE_SUSPENDED:
@@ -718,8 +690,7 @@ static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev)

 	lockdep_assert_held(&arb_vm_state->vm_state_lock);
 	if (kbdev->arb.arb_if) {
-		if (kbdev->pm.arb_vm_state->vm_state ==
-					KBASE_VM_STATE_SUSPENDED)
+		if (kbdev->pm.arb_vm_state->vm_state == KBASE_VM_STATE_SUSPENDED)
 			return;
 	}
 	/* Block suspend OS function until we are in a stable state
@@ -730,17 +701,15 @@ static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev)
 		switch (arb_vm_state->vm_state) {
 		case KBASE_VM_STATE_STOPPING_ACTIVE:
 		case KBASE_VM_STATE_STOPPING_IDLE:
-			kbase_arbiter_pm_vm_set_state(kbdev,
-				KBASE_VM_STATE_SUSPEND_PENDING);
+			kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPEND_PENDING);
 			break;
 		case KBASE_VM_STATE_STOPPED_GPU_REQUESTED:
-			kbase_arbiter_pm_vm_set_state(kbdev,
-				KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT);
+			kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT);
 			break;
 		case KBASE_VM_STATE_STARTING:
 			if (!arb_vm_state->vm_arb_starting) {
 				kbase_arbiter_pm_vm_set_state(kbdev,
-					KBASE_VM_STATE_SUSPEND_PENDING);
+							      KBASE_VM_STATE_SUSPEND_PENDING);
 				kbase_arbiter_pm_vm_stopped(kbdev);
 			}
 			break;
@@ -748,24 +717,21 @@ static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev)
 			break;
 		}
 		mutex_unlock(&arb_vm_state->vm_state_lock);
-		wait_event(arb_vm_state->vm_state_wait,
-			arb_vm_state->vm_state != prev_state);
+		wait_event(arb_vm_state->vm_state_wait, arb_vm_state->vm_state != prev_state);
 		mutex_lock(&arb_vm_state->vm_state_lock);
 	}

 	switch (arb_vm_state->vm_state) {
 	case KBASE_VM_STATE_STOPPED:
-		kbase_arbiter_pm_vm_set_state(kbdev,
-			KBASE_VM_STATE_SUSPENDED);
+		kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED);
 		break;
 	case KBASE_VM_STATE_IDLE:
 	case KBASE_VM_STATE_ACTIVE:
-		kbase_arbiter_pm_vm_set_state(kbdev,
-			KBASE_VM_STATE_SUSPEND_PENDING);
+		kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPEND_PENDING);
 		mutex_unlock(&arb_vm_state->vm_state_lock);
 		/* Ensure resume has completed fully before starting suspend */
 		flush_work(&arb_vm_state->vm_resume_work);
-		kbase_pm_driver_suspend(kbdev);
+		WARN_ON_ONCE(kbase_pm_driver_suspend(kbdev));
 		mutex_lock(&arb_vm_state->vm_state_lock);
 		break;
 	case KBASE_VM_STATE_SUSPENDED:
@@ -789,12 +755,10 @@ static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev)
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;

 	lockdep_assert_held(&arb_vm_state->vm_state_lock);
-	KBASE_DEBUG_ASSERT_MSG(arb_vm_state->vm_state ==
-						KBASE_VM_STATE_SUSPENDED,
-		"Unexpected state to resume");
+	KBASE_DEBUG_ASSERT_MSG(arb_vm_state->vm_state == KBASE_VM_STATE_SUSPENDED,
+			       "Unexpected state to resume");

-	kbase_arbiter_pm_vm_set_state(kbdev,
-		KBASE_VM_STATE_STOPPED_GPU_REQUESTED);
+	kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPED_GPU_REQUESTED);
 	kbase_arbif_gpu_request(kbdev);
 	start_request_timer(kbdev);

@@ -816,8 +780,7 @@ static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev)
 * The state machine function. Receives events and transitions states
 * according the event received and the current state
 */
-void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev,
-	enum kbase_arbif_evt evt)
+void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, enum kbase_arbif_evt evt)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;

@@ -825,10 +788,9 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev,
 		return;

 	mutex_lock(&arb_vm_state->vm_state_lock);
-	dev_dbg(kbdev->dev, "%s %s\n", __func__,
-		kbase_arbiter_pm_vm_event_str(evt));
+	dev_dbg(kbdev->dev, "%s %s\n", __func__, kbase_arbiter_pm_vm_event_str(evt));
 	if (arb_vm_state->vm_state != KBASE_VM_STATE_INITIALIZING_WITH_GPU &&
-		arb_vm_state->vm_state != KBASE_VM_STATE_INITIALIZING)
+	    arb_vm_state->vm_state != KBASE_VM_STATE_INITIALIZING)
 		KBASE_KTRACE_ADD(kbdev, ARB_VM_EVT, NULL, evt);
 	switch (evt) {
 	case KBASE_VM_GPU_GRANTED_EVT:
@@ -850,8 +812,7 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev,
 	case KBASE_VM_GPU_IDLE_EVENT:
 		switch (arb_vm_state->vm_state) {
 		case KBASE_VM_STATE_ACTIVE:
-			kbase_arbiter_pm_vm_set_state(kbdev,
-				KBASE_VM_STATE_IDLE);
+			kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_IDLE);
 			kbase_arbif_gpu_idle(kbdev);
 			break;
 		default:
@@ -863,13 +824,11 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev,
 		switch (arb_vm_state->vm_state) {
 		case KBASE_VM_STATE_STARTING:
 		case KBASE_VM_STATE_IDLE:
-			kbase_arbiter_pm_vm_set_state(kbdev,
-			KBASE_VM_STATE_ACTIVE);
+			kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_ACTIVE);
 			kbase_arbif_gpu_active(kbdev);
 			break;
 		case KBASE_VM_STATE_STOPPING_IDLE:
-			kbase_arbiter_pm_vm_set_state(kbdev,
-				KBASE_VM_STATE_STOPPING_ACTIVE);
+			kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPING_ACTIVE);
 			break;
 		default:
 			break;
@@ -881,12 +840,10 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev,
 		case KBASE_VM_STATE_INITIALIZING_WITH_GPU:
 			lockdep_assert_held(&kbdev->pm.lock);
 			if (kbdev->pm.active_count > 0) {
-				kbase_arbiter_pm_vm_set_state(kbdev,
-					KBASE_VM_STATE_ACTIVE);
+				kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_ACTIVE);
 				kbase_arbif_gpu_active(kbdev);
 			} else {
-				kbase_arbiter_pm_vm_set_state(kbdev,
-					KBASE_VM_STATE_IDLE);
+				kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_IDLE);
 				kbase_arbif_gpu_idle(kbdev);
 			}
 			break;
@@ -916,8 +873,8 @@ static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev)

 	dev_dbg(kbdev->dev, "Waiting for GPU assignment...\n");
 	wait_event(arb_vm_state->vm_state_wait,
-		arb_vm_state->vm_state == KBASE_VM_STATE_IDLE ||
-		arb_vm_state->vm_state == KBASE_VM_STATE_ACTIVE);
+		   arb_vm_state->vm_state == KBASE_VM_STATE_IDLE ||
+			   arb_vm_state->vm_state == KBASE_VM_STATE_ACTIVE);
 	dev_dbg(kbdev->dev, "Waiting for GPU assignment - done\n");
 }

@@ -929,8 +886,7 @@ static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev)
 *
 * Return: true if GPU is assigned, else false.
 */
-static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(
-	struct kbase_device *kbdev)
+static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;

@@ -953,7 +909,7 @@ static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(
 * Return: 0 on success else 1 suspend handler isn not possible.
 */
 int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev,
-	enum kbase_pm_suspend_handler suspend_handler)
+					       enum kbase_pm_suspend_handler suspend_handler)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
 	int res = 0;
@@ -962,23 +918,18 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev,
 		mutex_lock(&arb_vm_state->vm_state_lock);
 		while (!kbase_arbiter_pm_vm_gpu_assigned_lockheld(kbdev)) {
 			/* Update VM state since we have GPU work to do */
-			if (arb_vm_state->vm_state ==
-						KBASE_VM_STATE_STOPPING_IDLE)
+			if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE)
 				kbase_arbiter_pm_vm_set_state(kbdev,
-					KBASE_VM_STATE_STOPPING_ACTIVE);
-			else if (arb_vm_state->vm_state ==
-						KBASE_VM_STATE_STOPPED) {
+							      KBASE_VM_STATE_STOPPING_ACTIVE);
+			else if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPED) {
 				kbase_arbiter_pm_vm_set_state(kbdev,
-					KBASE_VM_STATE_STOPPED_GPU_REQUESTED);
+							      KBASE_VM_STATE_STOPPED_GPU_REQUESTED);
 				kbase_arbif_gpu_request(kbdev);
 				start_request_timer(kbdev);
-			} else if (arb_vm_state->vm_state ==
-					KBASE_VM_STATE_INITIALIZING_WITH_GPU)
+			} else if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING_WITH_GPU)
 				break;

-			if (suspend_handler !=
-				KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE) {
-
+			if (suspend_handler != KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE) {
 				/* In case of GPU lost, even if
 				 * active_count > 0, we no longer have GPU
 				 * access
@@ -1024,8 +975,7 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev,
 * @arb_freq: Pointer to struchture holding GPU clock frequenecy data
 * @freq: New frequency value in KHz
 */
-void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq,
-	uint32_t freq)
+void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, uint32_t freq)
 {
 	struct kbase_gpu_clk_notifier_data ndata;

@@ -1037,8 +987,7 @@ void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq,
 		arb_freq->arb_freq = freq;
 		arb_freq->freq_updated = true;
 		if (arb_freq->nb)
-			arb_freq->nb->notifier_call(arb_freq->nb,
-						    POST_RATE_CHANGE, &ndata);
+			arb_freq->nb->notifier_call(arb_freq->nb, POST_RATE_CHANGE, &ndata);
 	}

 	mutex_unlock(&arb_freq->arb_freq_lock);
@@ -1052,8 +1001,7 @@ void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq,
 * Return: Pointer to structure holding GPU clock frequency data reported from
 * arbiter, only index 0 is valid.
 */
-static void *get_arb_gpu_clk(struct kbase_device *kbdev,
-		unsigned int index)
+static void *get_arb_gpu_clk(struct kbase_device *kbdev, unsigned int index)
 {
 	if (index == 0)
 		return &kbdev->arb.arb_freq;
@@ -1067,12 +1015,10 @@ static void *get_arb_gpu_clk(struct kbase_device *kbdev,
 *
 * Return: The GPU clock frequency value saved when gpu is granted from arbiter
 */
-static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev,
-		void *gpu_clk_handle)
+static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev, void *gpu_clk_handle)
 {
 	uint32_t freq;
-	struct kbase_arbiter_freq *arb_dev_freq =
-			(struct kbase_arbiter_freq *) gpu_clk_handle;
+	struct kbase_arbiter_freq *arb_dev_freq = (struct kbase_arbiter_freq *)gpu_clk_handle;

 	mutex_lock(&arb_dev_freq->arb_freq_lock);
 	/* Convert from KHz to Hz */
@@ -1092,12 +1038,11 @@ static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev,
 *
 * Return: 0 on success, negative error code otherwise.
 */
-static int arb_gpu_clk_notifier_register(struct kbase_device *kbdev,
-	void *gpu_clk_handle, struct notifier_block *nb)
+static int arb_gpu_clk_notifier_register(struct kbase_device *kbdev, void *gpu_clk_handle,
+					 struct notifier_block *nb)
 {
 	int ret = 0;
-	struct kbase_arbiter_freq *arb_dev_freq =
-		(struct kbase_arbiter_freq *)gpu_clk_handle;
+	struct kbase_arbiter_freq *arb_dev_freq = (struct kbase_arbiter_freq *)gpu_clk_handle;

 	if (!arb_dev_freq->nb)
 		arb_dev_freq->nb = nb;
@@ -1117,16 +1062,14 @@ static int arb_gpu_clk_notifier_register(struct kbase_device *kbdev,
 * was previously registered to get notified of a frequency change of the
 * clock corresponding to @gpu_clk_handle.
 */
-static void arb_gpu_clk_notifier_unregister(struct kbase_device *kbdev,
-	void *gpu_clk_handle, struct notifier_block *nb)
+static void arb_gpu_clk_notifier_unregister(struct kbase_device *kbdev, void *gpu_clk_handle,
+					    struct notifier_block *nb)
 {
-	struct kbase_arbiter_freq *arb_dev_freq =
-		(struct kbase_arbiter_freq *)gpu_clk_handle;
+	struct kbase_arbiter_freq *arb_dev_freq = (struct kbase_arbiter_freq *)gpu_clk_handle;
 	if (arb_dev_freq->nb == nb) {
 		arb_dev_freq->nb = NULL;
 	} else {
-		dev_err(kbdev->dev, "%s - notifier did not match\n",
-			 __func__);
+		dev_err(kbdev->dev, "%s - notifier did not match\n", __func__);
 	}
 }

--- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h
+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -114,8 +114,7 @@ int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev);
 * The state machine function. Receives events and transitions states
 * according the event received and the current state
 */
-void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev,
-	enum kbase_arbif_evt event);
+void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, enum kbase_arbif_evt event);

 /**
 * kbase_arbiter_pm_ctx_active_handle_suspend() - Handle suspend operation for
@@ -131,8 +130,7 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev,
 * Return: 0 if success, 1 if failure due to system suspending/suspended
 */
 int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev,
-	enum kbase_pm_suspend_handler suspend_handler);
-
+					       enum kbase_pm_suspend_handler suspend_handler);

 /**
 * kbase_arbiter_pm_vm_stopped() - Handle stop event for the VM
@@ -152,8 +150,7 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev);
 * This function handles a stop event for the VM.
 * It will update the VM state and forward the stop event to the driver.
 */
-void kbase_arbiter_set_max_config(struct kbase_device *kbdev,
-				  uint32_t max_l2_slices,
+void kbase_arbiter_set_max_config(struct kbase_device *kbdev, uint32_t max_l2_slices,
 				  uint32_t max_core_mask);

 /**
@@ -190,7 +187,6 @@ struct kbase_arbiter_freq {
 *
 * Updates the GPU frequency and triggers any notifications
 */
-void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq,
-	uint32_t freq);
+void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, uint32_t freq);

 #endif /*_MALI_KBASE_ARBITER_PM_H_ */
--- a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild
+++ b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild
@@ -22,7 +22,6 @@ bifrost_kbase-y += \
    backend/gpu/mali_kbase_cache_policy_backend.o \
    backend/gpu/mali_kbase_gpuprops_backend.o \
    backend/gpu/mali_kbase_irq_linux.o \
-    backend/gpu/mali_kbase_js_backend.o \
    backend/gpu/mali_kbase_pm_backend.o \
    backend/gpu/mali_kbase_pm_driver.o \
    backend/gpu/mali_kbase_pm_metrics.o \
@@ -40,7 +39,8 @@ ifeq ($(MALI_USE_CSF),0)
        backend/gpu/mali_kbase_jm_as.o \
        backend/gpu/mali_kbase_debug_job_fault_backend.o \
        backend/gpu/mali_kbase_jm_hw.o \
-        backend/gpu/mali_kbase_jm_rb.o
+        backend/gpu/mali_kbase_jm_rb.o \
+        backend/gpu/mali_kbase_js_backend.o
 endif


--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_backend_config.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_backend_config.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -27,4 +27,3 @@
 #define _KBASE_BACKEND_CONFIG_H_

 #endif /* _KBASE_BACKEND_CONFIG_H_ */
-
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2014-2016, 2018, 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -22,71 +22,43 @@
 #include "backend/gpu/mali_kbase_cache_policy_backend.h"
 #include <device/mali_kbase_device.h>

-/**
- * kbasep_amba_register_present() - Check AMBA_<> register is present
- *                                  in the GPU.
- * @kbdev:    Device pointer
- *
- * Note: Only for arch version 12.x.1 onwards.
- *
- * Return: true if AMBA_FEATURES/ENABLE registers are present.
- */
-static bool kbasep_amba_register_present(struct kbase_device *kbdev)
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, u32 mode)
 {
-	return (ARCH_MAJOR_REV_REG(kbdev->gpu_props.props.raw_props.gpu_id) >=
-		GPU_ID2_ARCH_MAJOR_REV_MAKE(12, 1));
-}

-void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
-		u32 mode)
-{
 	kbdev->current_gpu_coherency_mode = mode;

-	if (kbasep_amba_register_present(kbdev)) {
-		u32 val = kbase_reg_read(kbdev, AMBA_ENABLE);
+#if MALI_USE_CSF
+	if (kbdev->gpu_props.gpu_id.arch_id >= GPU_ID_ARCH_MAKE(12, 0, 1)) {
+		/* AMBA_ENABLE present from 12.0.1 */
+		u32 val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(AMBA_ENABLE));

 		val = AMBA_ENABLE_COHERENCY_PROTOCOL_SET(val, mode);
-		kbase_reg_write(kbdev, AMBA_ENABLE, val);
-	} else
-		kbase_reg_write(kbdev, COHERENCY_ENABLE, mode);
-}
-
-u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev)
-{
-	u32 coherency_features;
-
-	if (kbasep_amba_register_present(kbdev))
-		coherency_features =
-			kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_FEATURES));
-	else
-		coherency_features = kbase_reg_read(
-			kbdev, GPU_CONTROL_REG(COHERENCY_FEATURES));
-
-	return coherency_features;
-}
-
-void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev,
-					 bool enable)
-{
-	if (kbasep_amba_register_present(kbdev)) {
-		u32 val = kbase_reg_read(kbdev, AMBA_ENABLE);
-
-		val = AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(val, enable);
-		kbase_reg_write(kbdev, AMBA_ENABLE, val);
-
+		kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(AMBA_ENABLE), val);
 	} else {
-		WARN(1, "memory_cache_support not supported");
+		/* Fallback to COHERENCY_ENABLE for older versions */
+		kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(COHERENCY_ENABLE), mode);
 	}
+#else /* MALI_USE_CSF */
+	kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(COHERENCY_ENABLE), mode);
+#endif /* MALI_USE_CSF */
 }

-void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable)
+void kbase_amba_set_shareable_cache_support(struct kbase_device *kbdev)
 {
-	if (kbasep_amba_register_present(kbdev)) {
-		u32 val = kbase_reg_read(kbdev, AMBA_ENABLE);
+#if MALI_USE_CSF

-		val = AMBA_ENABLE_INVALIDATE_HINT_SET(val, enable);
-		kbase_reg_write(kbdev, AMBA_ENABLE, val);
-	} else {
-		WARN(1, "invalidate_hint not supported");
+	/* AMBA registers only present from 12.0.1 */
+	if (kbdev->gpu_props.gpu_id.arch_id < GPU_ID_ARCH_MAKE(12, 0, 1))
+		return;
+
+	if (kbdev->system_coherency != COHERENCY_NONE) {
+		u32 val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(AMBA_FEATURES));
+
+		if (AMBA_FEATURES_SHAREABLE_CACHE_SUPPORT_GET(val)) {
+			val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(AMBA_ENABLE));
+			val = AMBA_ENABLE_SHAREABLE_CACHE_SUPPORT_SET(val, 1);
+			kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(AMBA_ENABLE), val);
+		}
 	}
+#endif /* MALI_USE_CSF */
 }
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2014-2016, 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -31,35 +31,14 @@
 * @kbdev:    Device pointer
 * @mode:     Coherency mode. COHERENCY_ACE/ACE_LITE
 */
-void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
-		u32 mode);
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, u32 mode);

 /**
- * kbase_cache_get_coherency_features() - Get the coherency features
- *                                        in the GPU.
+ * kbase_amba_set_shareable_cache_support() - Sets AMBA shareable cache support
+ *                                            in the GPU.
 * @kbdev:    Device pointer
 *
- * Return:    Register value to be returned
- */
-u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev);
-
-/**
- * kbase_amba_set_memory_cache_support() - Sets AMBA memory cache support
- *                                         in the GPU.
- * @kbdev:    Device pointer
- * @enable:   true for enable.
- *
 * Note: Only for arch version 12.x.1 onwards.
 */
-void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev,
-					 bool enable);
-/**
- * kbase_amba_set_invalidate_hint() - Sets AMBA invalidate hint
- *                                    in the GPU.
- * @kbdev:    Device pointer
- * @enable:   true for enable.
- *
- * Note: Only for arch version 12.x.1 onwards.
- */
-void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable);
+void kbase_amba_set_shareable_cache_support(struct kbase_device *kbdev);
 #endif /* _KBASE_CACHE_POLICY_BACKEND_H_ */
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -58,8 +58,10 @@ get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev)
 	if (WARN_ON(!kbdev) || WARN_ON(!kbdev->dev))
 		return callbacks;

-	arbiter_if_node =
-		of_get_property(kbdev->dev->of_node, "arbiter_if", NULL);
+	arbiter_if_node = of_get_property(kbdev->dev->of_node, "arbiter-if", NULL);
+	if (!arbiter_if_node)
+		arbiter_if_node = of_get_property(kbdev->dev->of_node, "arbiter_if", NULL);
+
 	/* Arbitration enabled, override the callback pointer.*/
 	if (arbiter_if_node)
 		callbacks = &arb_clk_rate_trace_ops;
@@ -72,8 +74,7 @@ get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev)
 	return callbacks;
 }

-static int gpu_clk_rate_change_notifier(struct notifier_block *nb,
-			unsigned long event, void *data)
+static int gpu_clk_rate_change_notifier(struct notifier_block *nb, unsigned long event, void *data)
 {
 	struct kbase_gpu_clk_notifier_data *ndata = data;
 	struct kbase_clk_data *clk_data =
@@ -86,10 +87,9 @@ static int gpu_clk_rate_change_notifier(struct notifier_block *nb,

 	spin_lock_irqsave(&clk_rtm->lock, flags);
 	if (event == POST_RATE_CHANGE) {
-		if (!clk_rtm->gpu_idle &&
-		    (clk_data->clock_val != ndata->new_rate)) {
-			kbase_clk_rate_trace_manager_notify_all(
-				clk_rtm, clk_data->index, ndata->new_rate);
+		if (!clk_rtm->gpu_idle && (clk_data->clock_val != ndata->new_rate)) {
+			kbase_clk_rate_trace_manager_notify_all(clk_rtm, clk_data->index,
+								ndata->new_rate);
 		}

 		clk_data->clock_val = ndata->new_rate;
@@ -99,8 +99,7 @@ static int gpu_clk_rate_change_notifier(struct notifier_block *nb,
 	return NOTIFY_DONE;
 }

-static int gpu_clk_data_init(struct kbase_device *kbdev,
-		void *gpu_clk_handle, unsigned int index)
+static int gpu_clk_data_init(struct kbase_device *kbdev, void *gpu_clk_handle, unsigned int index)
 {
 	struct kbase_clk_rate_trace_op_conf *callbacks;
 	struct kbase_clk_data *clk_data;
@@ -109,44 +108,42 @@ static int gpu_clk_data_init(struct kbase_device *kbdev,

 	callbacks = get_clk_rate_trace_callbacks(kbdev);

-	if (WARN_ON(!callbacks) ||
-	    WARN_ON(!gpu_clk_handle) ||
+	if (WARN_ON(!callbacks) || WARN_ON(!gpu_clk_handle) ||
 	    WARN_ON(index >= BASE_MAX_NR_CLOCKS_REGULATORS))
 		return -EINVAL;

 	clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL);
 	if (!clk_data) {
-		dev_err(kbdev->dev, "Failed to allocate data for clock enumerated at index %u", index);
+		dev_err(kbdev->dev, "Failed to allocate data for clock enumerated at index %u",
+			index);
 		return -ENOMEM;
 	}

 	clk_data->index = (u8)index;
 	clk_data->gpu_clk_handle = gpu_clk_handle;
 	/* Store the initial value of clock */
-	clk_data->clock_val =
-		callbacks->get_gpu_clk_rate(kbdev, gpu_clk_handle);
+	clk_data->clock_val = callbacks->get_gpu_clk_rate(kbdev, gpu_clk_handle);

 	{
 		/* At the initialization time, GPU is powered off. */
 		unsigned long flags;

 		spin_lock_irqsave(&clk_rtm->lock, flags);
-		kbase_clk_rate_trace_manager_notify_all(
-			clk_rtm, clk_data->index, 0);
+		kbase_clk_rate_trace_manager_notify_all(clk_rtm, clk_data->index, 0);
 		spin_unlock_irqrestore(&clk_rtm->lock, flags);
 	}

 	clk_data->clk_rtm = clk_rtm;
 	clk_rtm->clks[index] = clk_data;

-	clk_data->clk_rate_change_nb.notifier_call =
-			gpu_clk_rate_change_notifier;
+	clk_data->clk_rate_change_nb.notifier_call = gpu_clk_rate_change_notifier;

 	if (callbacks->gpu_clk_notifier_register)
-		ret = callbacks->gpu_clk_notifier_register(kbdev,
-				gpu_clk_handle, &clk_data->clk_rate_change_nb);
+		ret = callbacks->gpu_clk_notifier_register(kbdev, gpu_clk_handle,
+							   &clk_data->clk_rate_change_nb);
 	if (ret) {
-		dev_err(kbdev->dev, "Failed to register notifier for clock enumerated at index %u", index);
+		dev_err(kbdev->dev, "Failed to register notifier for clock enumerated at index %u",
+			index);
 		kfree(clk_data);
 	}

@@ -174,8 +171,7 @@ int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev)
 	clk_rtm->gpu_idle = true;

 	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
-		void *gpu_clk_handle =
-			callbacks->enumerate_gpu_clk(kbdev, i);
+		void *gpu_clk_handle = callbacks->enumerate_gpu_clk(kbdev, i);

 		if (!gpu_clk_handle)
 			break;
@@ -200,8 +196,8 @@ int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev)
 error:
 	while (i--) {
 		clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister(
-				kbdev, clk_rtm->clks[i]->gpu_clk_handle,
-				&clk_rtm->clks[i]->clk_rate_change_nb);
+			kbdev, clk_rtm->clks[i]->gpu_clk_handle,
+			&clk_rtm->clks[i]->clk_rate_change_nb);
 		kfree(clk_rtm->clks[i]);
 	}

@@ -223,9 +219,9 @@ void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev)
 			break;

 		if (clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister)
-			clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister
-			(kbdev, clk_rtm->clks[i]->gpu_clk_handle,
-			&clk_rtm->clks[i]->clk_rate_change_nb);
+			clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister(
+				kbdev, clk_rtm->clks[i]->gpu_clk_handle,
+				&clk_rtm->clks[i]->clk_rate_change_nb);
 		kfree(clk_rtm->clks[i]);
 	}

@@ -252,8 +248,8 @@ void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev)
 		if (unlikely(!clk_data->clock_val))
 			continue;

-		kbase_clk_rate_trace_manager_notify_all(
-			clk_rtm, clk_data->index, clk_data->clock_val);
+		kbase_clk_rate_trace_manager_notify_all(clk_rtm, clk_data->index,
+							clk_data->clock_val);
 	}

 	clk_rtm->gpu_idle = false;
@@ -280,18 +276,15 @@ void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev)
 		if (unlikely(!clk_data->clock_val))
 			continue;

-		kbase_clk_rate_trace_manager_notify_all(
-			clk_rtm, clk_data->index, 0);
+		kbase_clk_rate_trace_manager_notify_all(clk_rtm, clk_data->index, 0);
 	}

 	clk_rtm->gpu_idle = true;
 	spin_unlock_irqrestore(&clk_rtm->lock, flags);
 }

-void kbase_clk_rate_trace_manager_notify_all(
-	struct kbase_clk_rate_trace_manager *clk_rtm,
-	u32 clk_index,
-	unsigned long new_rate)
+void kbase_clk_rate_trace_manager_notify_all(struct kbase_clk_rate_trace_manager *clk_rtm,
+					     u32 clk_index, unsigned long new_rate)
 {
 	struct kbase_clk_rate_listener *pos;
 	struct kbase_device *kbdev;
@@ -300,8 +293,8 @@ void kbase_clk_rate_trace_manager_notify_all(

 	kbdev = container_of(clk_rtm, struct kbase_device, pm.clk_rtm);

-	dev_dbg(kbdev->dev, "%s - GPU clock %u rate changed to %lu, pid: %d",
-		__func__, clk_index, new_rate, current->pid);
+	dev_dbg(kbdev->dev, "%s - GPU clock %u rate changed to %lu, pid: %d", __func__, clk_index,
+		new_rate, current->pid);

 	/* Raise standard `power/gpu_frequency` ftrace event */
 	{
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -90,9 +90,9 @@ void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev);
 *
 * kbase_clk_rate_trace_manager:lock must be held by the caller.
 */
-static inline void kbase_clk_rate_trace_manager_subscribe_no_lock(
-	struct kbase_clk_rate_trace_manager *clk_rtm,
-	struct kbase_clk_rate_listener *listener)
+static inline void
+kbase_clk_rate_trace_manager_subscribe_no_lock(struct kbase_clk_rate_trace_manager *clk_rtm,
+					       struct kbase_clk_rate_listener *listener)
 {
 	lockdep_assert_held(&clk_rtm->lock);
 	list_add(&listener->node, &clk_rtm->listeners);
@@ -104,15 +104,14 @@ static inline void kbase_clk_rate_trace_manager_subscribe_no_lock(
 * @clk_rtm:    Clock rate manager instance.
 * @listener:   Listener handle
 */
-static inline void kbase_clk_rate_trace_manager_subscribe(
-	struct kbase_clk_rate_trace_manager *clk_rtm,
-	struct kbase_clk_rate_listener *listener)
+static inline void
+kbase_clk_rate_trace_manager_subscribe(struct kbase_clk_rate_trace_manager *clk_rtm,
+				       struct kbase_clk_rate_listener *listener)
 {
 	unsigned long flags;

 	spin_lock_irqsave(&clk_rtm->lock, flags);
-	kbase_clk_rate_trace_manager_subscribe_no_lock(
-		clk_rtm, listener);
+	kbase_clk_rate_trace_manager_subscribe_no_lock(clk_rtm, listener);
 	spin_unlock_irqrestore(&clk_rtm->lock, flags);
 }

@@ -122,9 +121,9 @@ static inline void kbase_clk_rate_trace_manager_subscribe(
 * @clk_rtm:    Clock rate manager instance.
 * @listener:   Listener handle
 */
-static inline void kbase_clk_rate_trace_manager_unsubscribe(
-	struct kbase_clk_rate_trace_manager *clk_rtm,
-	struct kbase_clk_rate_listener *listener)
+static inline void
+kbase_clk_rate_trace_manager_unsubscribe(struct kbase_clk_rate_trace_manager *clk_rtm,
+					 struct kbase_clk_rate_listener *listener)
 {
 	unsigned long flags;

@@ -145,10 +144,7 @@ static inline void kbase_clk_rate_trace_manager_unsubscribe(
 * This function is exported to be used by clock rate trace test
 * portal.
 */
-void kbase_clk_rate_trace_manager_notify_all(
-	struct kbase_clk_rate_trace_manager *clk_rtm,
-	u32 clock_index,
-	unsigned long new_rate);
+void kbase_clk_rate_trace_manager_notify_all(struct kbase_clk_rate_trace_manager *clk_rtm,
+					     u32 clock_index, unsigned long new_rate);

 #endif /* _KBASE_CLK_RATE_TRACE_MGR_ */
-
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2012-2015, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -21,71 +21,47 @@

 #include <mali_kbase.h>
 #include <device/mali_kbase_device.h>
+#include <hw_access/mali_kbase_hw_access.h>
 #include "mali_kbase_debug_job_fault.h"

 #if IS_ENABLED(CONFIG_DEBUG_FS)

 /*GPU_CONTROL_REG(r)*/
-static int gpu_control_reg_snapshot[] = {
-	GPU_ID,
-	SHADER_READY_LO,
-	SHADER_READY_HI,
-	TILER_READY_LO,
-	TILER_READY_HI,
-	L2_READY_LO,
-	L2_READY_HI
-};
+static int gpu_control_reg_snapshot[] = { GPU_CONTROL_ENUM(GPU_ID), GPU_CONTROL_ENUM(SHADER_READY),
+					  GPU_CONTROL_ENUM(TILER_READY),
+					  GPU_CONTROL_ENUM(L2_READY) };

 /* JOB_CONTROL_REG(r) */
-static int job_control_reg_snapshot[] = {
-	JOB_IRQ_MASK,
-	JOB_IRQ_STATUS
-};
+static int job_control_reg_snapshot[] = { JOB_CONTROL_ENUM(JOB_IRQ_MASK),
+					  JOB_CONTROL_ENUM(JOB_IRQ_STATUS) };

 /* JOB_SLOT_REG(n,r) */
-static int job_slot_reg_snapshot[] = {
-	JS_HEAD_LO,
-	JS_HEAD_HI,
-	JS_TAIL_LO,
-	JS_TAIL_HI,
-	JS_AFFINITY_LO,
-	JS_AFFINITY_HI,
-	JS_CONFIG,
-	JS_STATUS,
-	JS_HEAD_NEXT_LO,
-	JS_HEAD_NEXT_HI,
-	JS_AFFINITY_NEXT_LO,
-	JS_AFFINITY_NEXT_HI,
-	JS_CONFIG_NEXT
-};
+static int job_slot_reg_snapshot[] = { JOB_SLOT_ENUM(0, HEAD) - JOB_SLOT_BASE_ENUM(0),
+				       JOB_SLOT_ENUM(0, TAIL) - JOB_SLOT_BASE_ENUM(0),
+				       JOB_SLOT_ENUM(0, AFFINITY) - JOB_SLOT_BASE_ENUM(0),
+				       JOB_SLOT_ENUM(0, CONFIG) - JOB_SLOT_BASE_ENUM(0),
+				       JOB_SLOT_ENUM(0, STATUS) - JOB_SLOT_BASE_ENUM(0),
+				       JOB_SLOT_ENUM(0, HEAD_NEXT) - JOB_SLOT_BASE_ENUM(0),
+				       JOB_SLOT_ENUM(0, AFFINITY_NEXT) - JOB_SLOT_BASE_ENUM(0),
+				       JOB_SLOT_ENUM(0, CONFIG_NEXT) - JOB_SLOT_BASE_ENUM(0) };

-/*MMU_REG(r)*/
-static int mmu_reg_snapshot[] = {
-	MMU_IRQ_MASK,
-	MMU_IRQ_STATUS
-};
+/*MMU_CONTROL_REG(r)*/
+static int mmu_reg_snapshot[] = { MMU_CONTROL_ENUM(IRQ_MASK), MMU_CONTROL_ENUM(IRQ_STATUS) };

 /* MMU_AS_REG(n,r) */
-static int as_reg_snapshot[] = {
-	AS_TRANSTAB_LO,
-	AS_TRANSTAB_HI,
-	AS_TRANSCFG_LO,
-	AS_TRANSCFG_HI,
-	AS_MEMATTR_LO,
-	AS_MEMATTR_HI,
-	AS_FAULTSTATUS,
-	AS_FAULTADDRESS_LO,
-	AS_FAULTADDRESS_HI,
-	AS_STATUS
-};
+static int as_reg_snapshot[] = { MMU_AS_ENUM(0, TRANSTAB) - MMU_AS_BASE_ENUM(0),
+				 MMU_AS_ENUM(0, TRANSCFG) - MMU_AS_BASE_ENUM(0),
+				 MMU_AS_ENUM(0, MEMATTR) - MMU_AS_BASE_ENUM(0),
+				 MMU_AS_ENUM(0, FAULTSTATUS) - MMU_AS_BASE_ENUM(0),
+				 MMU_AS_ENUM(0, FAULTADDRESS) - MMU_AS_BASE_ENUM(0),
+				 MMU_AS_ENUM(0, STATUS) - MMU_AS_BASE_ENUM(0) };

-bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
-		int reg_range)
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, int reg_range)
 {
-	int i, j;
+	uint i, j;
 	int offset = 0;
-	int slot_number;
-	int as_number;
+	uint slot_number;
+	uint as_number;

 	if (kctx->reg_dump == NULL)
 		return false;
@@ -94,51 +70,61 @@ bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
 	as_number = kctx->kbdev->gpu_props.num_address_spaces;

 	/* get the GPU control registers*/
-	for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) {
-		kctx->reg_dump[offset] =
-				GPU_CONTROL_REG(gpu_control_reg_snapshot[i]);
-		offset += 2;
+	for (i = 0; i < ARRAY_SIZE(gpu_control_reg_snapshot); i++) {
+		kctx->reg_dump[offset] = gpu_control_reg_snapshot[i];
+		if (kbase_reg_is_size64(kctx->kbdev, kctx->reg_dump[offset]))
+			offset += 4;
+		else
+			offset += 2;
 	}

 	/* get the Job control registers*/
-	for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) {
-		kctx->reg_dump[offset] =
-				JOB_CONTROL_REG(job_control_reg_snapshot[i]);
-		offset += 2;
+	for (i = 0; i < ARRAY_SIZE(job_control_reg_snapshot); i++) {
+		kctx->reg_dump[offset] = job_control_reg_snapshot[i];
+		if (kbase_reg_is_size64(kctx->kbdev, kctx->reg_dump[offset]))
+			offset += 4;
+		else
+			offset += 2;
 	}

 	/* get the Job Slot registers*/
-	for (j = 0; j < slot_number; j++)	{
-		for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) {
-			kctx->reg_dump[offset] =
-			JOB_SLOT_REG(j, job_slot_reg_snapshot[i]);
-			offset += 2;
+	for (j = 0; j < slot_number; j++) {
+		for (i = 0; i < ARRAY_SIZE(job_slot_reg_snapshot); i++) {
+			kctx->reg_dump[offset] = JOB_SLOT_BASE_OFFSET(j) + job_slot_reg_snapshot[i];
+			if (kbase_reg_is_size64(kctx->kbdev, kctx->reg_dump[offset]))
+				offset += 4;
+			else
+				offset += 2;
 		}
 	}

 	/* get the MMU registers*/
-	for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) {
-		kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]);
-		offset += 2;
+	for (i = 0; i < ARRAY_SIZE(mmu_reg_snapshot); i++) {
+		kctx->reg_dump[offset] = mmu_reg_snapshot[i];
+		if (kbase_reg_is_size64(kctx->kbdev, kctx->reg_dump[offset]))
+			offset += 4;
+		else
+			offset += 2;
 	}

 	/* get the Address space registers*/
 	for (j = 0; j < as_number; j++) {
-		for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) {
-			kctx->reg_dump[offset] =
-					MMU_AS_REG(j, as_reg_snapshot[i]);
-			offset += 2;
+		for (i = 0; i < ARRAY_SIZE(as_reg_snapshot); i++) {
+			kctx->reg_dump[offset] = MMU_AS_BASE_OFFSET(j) + as_reg_snapshot[i];
+			if (kbase_reg_is_size64(kctx->kbdev, kctx->reg_dump[offset]))
+				offset += 4;
+			else
+				offset += 2;
 		}
 	}

-	WARN_ON(offset >= (reg_range*2/4));
+	WARN_ON(offset >= (reg_range * 2 / 4));

 	/* set the termination flag*/
 	kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG;
 	kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG;

-	dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n",
-			offset);
+	dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n", offset);

 	return true;
 }
@@ -146,18 +132,32 @@ bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
 bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx)
 {
 	int offset = 0;
+	int reg_enum;
+	u64 val64;

 	if (kctx->reg_dump == NULL)
 		return false;

 	while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
-		kctx->reg_dump[offset+1] =
-				kbase_reg_read(kctx->kbdev,
-						kctx->reg_dump[offset]);
-		offset += 2;
+		reg_enum = kctx->reg_dump[offset];
+		/* Get register offset from enum */
+		kbase_reg_get_offset(kctx->kbdev, reg_enum, &kctx->reg_dump[offset]);
+
+		if (kbase_reg_is_size64(kctx->kbdev, reg_enum)) {
+			val64 = kbase_reg_read64(kctx->kbdev, reg_enum);
+
+			/* offset computed offset to get _HI offset */
+			kctx->reg_dump[offset + 2] = kctx->reg_dump[offset] + 4;
+
+			kctx->reg_dump[offset + 1] = (u32)(val64 & 0xFFFFFFFF);
+			kctx->reg_dump[offset + 3] = (u32)(val64 >> 32);
+			offset += 4;
+		} else {
+			kctx->reg_dump[offset + 1] = kbase_reg_read32(kctx->kbdev, reg_enum);
+			offset += 2;
+		}
 	}
 	return true;
 }

-
 #endif
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -91,8 +91,8 @@ static unsigned long get_voltage(struct kbase_device *kbdev, unsigned long freq)
 	return voltage;
 }

-void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq,
-	u64 *core_mask, unsigned long *freqs, unsigned long *volts)
+void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, u64 *core_mask,
+				 unsigned long *freqs, unsigned long *volts)
 {
 	unsigned int i;

@@ -102,10 +102,8 @@ void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq,

 			*core_mask = kbdev->devfreq_table[i].core_mask;
 			for (j = 0; j < kbdev->nr_clocks; j++) {
-				freqs[j] =
-					kbdev->devfreq_table[i].real_freqs[j];
-				volts[j] =
-					kbdev->devfreq_table[i].opp_volts[j];
+				freqs[j] = kbdev->devfreq_table[i].real_freqs[j];
+				volts[j] = kbdev->devfreq_table[i].opp_volts[j];
 			}

 			break;
@@ -118,7 +116,7 @@ void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq,
 	if (i == kbdev->num_opps) {
 		unsigned long voltage = get_voltage(kbdev, freq);

-		*core_mask = kbdev->gpu_props.props.raw_props.shader_present;
+		*core_mask = kbdev->gpu_props.shader_present;

 		for (i = 0; i < kbdev->nr_clocks; i++) {
 			freqs[i] = freq;
@@ -127,8 +125,7 @@ void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq,
 	}
 }

-static int
-kbase_devfreq_target(struct device *dev, unsigned long *freq, u32 flags)
+static int kbase_devfreq_target(struct device *dev, unsigned long *freq, u32 flags)
 {
 	struct kbase_device *kbdev = dev_get_drvdata(dev);
 	struct rockchip_opp_info *opp_info = &kbdev->opp_info;
@@ -168,8 +165,7 @@ void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq)
 	kbase_devfreq_target(kbdev->dev, &target_freq, 0);
 }

-static int
-kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+static int kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
 {
 	struct kbase_device *kbdev = dev_get_drvdata(dev);

@@ -178,8 +174,7 @@ kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
 	return 0;
 }

-static int
-kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+static int kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
 {
 	struct kbase_device *kbdev = dev_get_drvdata(dev);
 	struct kbasep_pm_metrics diff;
@@ -199,8 +194,7 @@ kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
 	return 0;
 }

-static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
-		struct devfreq_dev_profile *dp)
+static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, struct devfreq_dev_profile *dp)
 {
 	int count;
 	int i = 0;
@@ -217,8 +211,7 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
 	if (count < 0)
 		return count;

-	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
-				GFP_KERNEL);
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]), GFP_KERNEL);
 	if (!dp->freq_table)
 		return -ENOMEM;

@@ -240,8 +233,7 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
 #endif

 	if (count != i)
-		dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
-				count, i);
+		dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n", count, i);

 	dp->max_state = i;

@@ -286,8 +278,7 @@ static void kbase_devfreq_exit(struct device *dev)
 		kbase_devfreq_term_freq_table(kbdev);
 }

-static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev,
-		struct device_node *node)
+static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev, struct device_node *node)
 {
 	u64 freq = 0;
 	int err = 0;
@@ -316,8 +307,7 @@ static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev,
 		return;

 	kbdev->pm.backend.gpu_clock_suspend_freq = freq;
-	dev_info(kbdev->dev,
-		"suspend clock %llu by opp-mali-errata-1485982", freq);
+	dev_info(kbdev->dev, "suspend clock %llu by opp-mali-errata-1485982", freq);
 }

 static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
@@ -332,12 +322,12 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
 	 */
 	return 0;
 #else
-	struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node,
-			"operating-points-v2", 0);
+	struct device_node *opp_node =
+		of_parse_phandle(kbdev->dev->of_node, "operating-points-v2", 0);
 	struct device_node *node;
-	int i = 0;
+	unsigned int i = 0;
 	int count;
-	u64 shader_present = kbdev->gpu_props.props.raw_props.shader_present;
+	u64 shader_present = kbdev->gpu_props.shader_present;

 	if (!opp_node)
 		return 0;
@@ -345,15 +335,13 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
 		return 0;

 	count = dev_pm_opp_get_opp_count(kbdev->dev);
-	kbdev->devfreq_table = kmalloc_array(count,
-			sizeof(struct kbase_devfreq_opp), GFP_KERNEL);
+	kbdev->devfreq_table = kmalloc_array(count, sizeof(struct kbase_devfreq_opp), GFP_KERNEL);
 	if (!kbdev->devfreq_table)
 		return -ENOMEM;

 	for_each_available_child_of_node(opp_node, node) {
 		const void *core_count_p;
-		u64 core_mask, opp_freq,
-			real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS];
+		u64 core_mask, opp_freq, real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS];
 		int err;
 #if IS_ENABLED(CONFIG_REGULATOR)
 		u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS];
@@ -365,30 +353,28 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)

 		err = of_property_read_u64(node, "opp-hz", &opp_freq);
 		if (err) {
-			dev_warn(kbdev->dev, "Failed to read opp-hz property with error %d\n",
-					err);
+			dev_warn(kbdev->dev, "Failed to read opp-hz property with error %d\n", err);
 			continue;
 		}


 #if BASE_MAX_NR_CLOCKS_REGULATORS > 1
-		err = of_property_read_u64_array(node, "opp-hz-real",
-				real_freqs, kbdev->nr_clocks);
+		err = of_property_read_u64_array(node, "opp-hz-real", real_freqs, kbdev->nr_clocks);
 #else
 		WARN_ON(kbdev->nr_clocks != 1);
 		err = of_property_read_u64(node, "opp-hz-real", real_freqs);
 #endif
 		if (err < 0) {
 			dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d\n",
-					err);
+				 err);
 			continue;
 		}
 #if IS_ENABLED(CONFIG_REGULATOR)
-		err = of_property_read_u32_array(node,
-			"opp-microvolt", opp_volts, kbdev->nr_regulators);
+		err = of_property_read_u32_array(node, "opp-microvolt", opp_volts,
+						 kbdev->nr_regulators);
 		if (err < 0) {
-			dev_warn(kbdev->dev, "Failed to read opp-microvolt property with error %d\n",
-					err);
+			dev_warn(kbdev->dev,
+				 "Failed to read opp-microvolt property with error %d\n", err);
 			continue;
 		}
 #endif
@@ -397,15 +383,16 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
 			core_mask = shader_present;
 		if (core_mask != shader_present && corestack_driver_control) {

-			dev_warn(kbdev->dev, "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n",
-					opp_freq);
+			dev_warn(
+				kbdev->dev,
+				"Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n",
+				opp_freq);
 			continue;
 		}

 		core_count_p = of_get_property(node, "opp-core-count", NULL);
 		if (core_count_p) {
-			u64 remaining_core_mask =
-				kbdev->gpu_props.props.raw_props.shader_present;
+			u64 remaining_core_mask = kbdev->gpu_props.shader_present;
 			int core_count = be32_to_cpup(core_count_p);

 			core_mask = 0;
@@ -418,8 +405,8 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
 					return -ENODEV;
 				}

-				core_mask |= (1ull << (core-1));
-				remaining_core_mask &= ~(1ull << (core-1));
+				core_mask |= (1ull << (core - 1));
+				remaining_core_mask &= ~(1ull << (core - 1));
 			}
 		}

@@ -431,24 +418,22 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
 		kbdev->devfreq_table[i].opp_freq = opp_freq;
 		kbdev->devfreq_table[i].core_mask = core_mask;
 		if (kbdev->nr_clocks > 0) {
-			int j;
+			unsigned int j;

 			for (j = 0; j < kbdev->nr_clocks; j++)
-				kbdev->devfreq_table[i].real_freqs[j] =
-					real_freqs[j];
+				kbdev->devfreq_table[i].real_freqs[j] = real_freqs[j];
 		}
 #if IS_ENABLED(CONFIG_REGULATOR)
 		if (kbdev->nr_regulators > 0) {
-			int j;
+			unsigned int j;

 			for (j = 0; j < kbdev->nr_regulators; j++)
-				kbdev->devfreq_table[i].opp_volts[j] =
-						opp_volts[j];
+				kbdev->devfreq_table[i].opp_volts[j] = opp_volts[j];
 		}
 #endif

-		dev_info(kbdev->dev, "OPP %d : opp_freq=%llu core_mask=%llx\n",
-				i, opp_freq, core_mask);
+		dev_info(kbdev->dev, "OPP %d : opp_freq=%llu core_mask=%llx\n", i, opp_freq,
+			 core_mask);

 		i++;
 	}
@@ -481,10 +466,9 @@ static const char *kbase_devfreq_req_type_name(enum kbase_devfreq_work_type type

 static void kbase_devfreq_suspend_resume_worker(struct work_struct *work)
 {
-	struct kbase_devfreq_queue_info *info = container_of(work,
-			struct kbase_devfreq_queue_info, work);
-	struct kbase_device *kbdev = container_of(info, struct kbase_device,
-			devfreq_queue);
+	struct kbase_devfreq_queue_info *info =
+		container_of(work, struct kbase_devfreq_queue_info, work);
+	struct kbase_device *kbdev = container_of(info, struct kbase_device, devfreq_queue);
 	unsigned long flags;
 	enum kbase_devfreq_work_type type, acted_type;

@@ -494,8 +478,7 @@ static void kbase_devfreq_suspend_resume_worker(struct work_struct *work)

 	acted_type = kbdev->devfreq_queue.acted_type;
 	dev_dbg(kbdev->dev, "Worker handles queued req: %s (acted: %s)\n",
-		kbase_devfreq_req_type_name(type),
-		kbase_devfreq_req_type_name(acted_type));
+		kbase_devfreq_req_type_name(type), kbase_devfreq_req_type_name(acted_type));
 	switch (type) {
 	case DEVFREQ_WORK_SUSPEND:
 	case DEVFREQ_WORK_RESUME:
@@ -515,8 +498,7 @@ static void kbase_devfreq_suspend_resume_worker(struct work_struct *work)
 	}
 }

-void kbase_devfreq_enqueue_work(struct kbase_device *kbdev,
-				       enum kbase_devfreq_work_type work_type)
+void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, enum kbase_devfreq_work_type work_type)
 {
 	unsigned long flags;

@@ -525,12 +507,10 @@ void kbase_devfreq_enqueue_work(struct kbase_device *kbdev,
 	/* Skip enqueuing a work if workqueue has already been terminated. */
 	if (likely(kbdev->devfreq_queue.workq)) {
 		kbdev->devfreq_queue.req_type = work_type;
-		queue_work(kbdev->devfreq_queue.workq,
-			   &kbdev->devfreq_queue.work);
+		queue_work(kbdev->devfreq_queue.workq, &kbdev->devfreq_queue.work);
 	}
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-	dev_dbg(kbdev->dev, "Enqueuing devfreq req: %s\n",
-		kbase_devfreq_req_type_name(work_type));
+	dev_dbg(kbdev->dev, "Enqueuing devfreq req: %s\n", kbase_devfreq_req_type_name(work_type));
 }

 static int kbase_devfreq_work_init(struct kbase_device *kbdev)
@@ -542,8 +522,7 @@ static int kbase_devfreq_work_init(struct kbase_device *kbdev)
 	if (!kbdev->devfreq_queue.workq)
 		return -ENOMEM;

-	INIT_WORK(&kbdev->devfreq_queue.work,
-			kbase_devfreq_suspend_resume_worker);
+	INIT_WORK(&kbdev->devfreq_queue.work, kbase_devfreq_suspend_resume_worker);
 	return 0;
 }

@@ -577,10 +556,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev)

 	for (i = 0; i < kbdev->nr_clocks; i++) {
 		if (kbdev->clocks[i])
-			kbdev->current_freqs[i] =
-				clk_get_rate(kbdev->clocks[i]);
-		else
-			kbdev->current_freqs[i] = 0;
+			kbdev->current_freqs[i] = clk_get_rate(kbdev->clocks[i]);
 	}
 	kbdev->current_nominal_freq = kbdev->current_freqs[0];

@@ -603,15 +579,16 @@ int kbase_devfreq_init(struct kbase_device *kbdev)

 	if (dp->max_state > 0) {
 		/* Record the maximum frequency possible */
-		kbdev->gpu_props.props.core_props.gpu_freq_khz_max =
-			dp->freq_table[0] / 1000;
+		kbdev->gpu_props.gpu_freq_khz_max = dp->freq_table[0] / 1000;
 	};
+
 #if IS_ENABLED(CONFIG_DEVFREQ_THERMAL)
 	of_property_read_u32(kbdev->dev->of_node, "dynamic-power-coefficient",
 			     &dyn_power_coeff);
 	if (dyn_power_coeff)
 		dp->is_cooling_device = true;
 #endif
+
 	err = kbase_devfreq_init_core_mask_table(kbdev);
 	if (err)
 		goto init_core_mask_table_failed;
@@ -620,8 +597,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
 			     &ondemand_data.upthreshold);
 	of_property_read_u32(np, "downdifferential",
 			     &ondemand_data.downdifferential);
-	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
-				"simple_ondemand", &ondemand_data);
+	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, "simple_ondemand", NULL);
 	if (IS_ERR(kbdev->devfreq)) {
 		err = PTR_ERR(kbdev->devfreq);
 		kbdev->devfreq = NULL;
@@ -646,8 +622,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev)

 	err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
 	if (err) {
-		dev_err(kbdev->dev,
-			"Failed to register OPP notifier (%d)", err);
+		dev_err(kbdev->dev, "Failed to register OPP notifier (%d)", err);
 		goto opp_notifier_failed;
 	}

@@ -672,9 +647,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
 				&kbase_ipa_power_model_ops);
 		if (IS_ERR(kbdev->devfreq_cooling)) {
 			err = PTR_ERR(kbdev->devfreq_cooling);
-			dev_err(kbdev->dev,
-					"Failed to register cooling device (%d)\n",
-					err);
+			dev_err(kbdev->dev, "Failed to register cooling device (%d)", err);
 			goto cooling_reg_failed;
               }
 	}
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2014, 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,14 @@
 #ifndef _BASE_DEVFREQ_H_
 #define _BASE_DEVFREQ_H_

+/**
+ * kbase_devfreq_init - Initialize kbase device for DevFreq.
+ * @kbdev:      Device pointer
+ *
+ * This function must be called only when a kbase device is initialized.
+ *
+ * Return: 0 on success.
+ */
 int kbase_devfreq_init(struct kbase_device *kbdev);

 void kbase_devfreq_term(struct kbase_device *kbdev);
@@ -39,8 +47,7 @@ void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq);
 * @kbdev:      Device pointer
 * @work_type:  The type of the devfreq work item, i.e. suspend or resume
 */
-void kbase_devfreq_enqueue_work(struct kbase_device *kbdev,
-				enum kbase_devfreq_work_type work_type);
+void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, enum kbase_devfreq_work_type work_type);

 /**
 * kbase_devfreq_opp_translate - Translate nominal OPP frequency from devicetree
@@ -57,6 +64,6 @@ void kbase_devfreq_enqueue_work(struct kbase_device *kbdev,
 * untranslated frequency (and corresponding voltage) and all cores enabled.
 * The voltages returned are in micro Volts (uV).
 */
-void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq,
-	u64 *core_mask, unsigned long *freqs, unsigned long *volts);
+void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, u64 *core_mask,
+				 unsigned long *freqs, unsigned long *volts);
 #endif /* _BASE_DEVFREQ_H_ */
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -25,175 +25,112 @@

 #include <mali_kbase.h>
 #include <device/mali_kbase_device.h>
-#include <backend/gpu/mali_kbase_pm_internal.h>
-#include <backend/gpu/mali_kbase_cache_policy_backend.h>
 #include <mali_kbase_hwaccess_gpuprops.h>
+#include <mali_kbase_gpuprops_private_types.h>

-int kbase_backend_gpuprops_get(struct kbase_device *kbdev,
-					struct kbase_gpuprops_regdump *regdump)
+int kbase_backend_gpuprops_get(struct kbase_device *kbdev, struct kbasep_gpuprops_regdump *regdump)
 {
 	int i;
-	struct kbase_gpuprops_regdump registers = { 0 };

-	/* Fill regdump with the content of the relevant registers */
-	registers.gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID));
+	/* regdump is zero intiialized, individual entries do not need to be explicitly set */
+	regdump->gpu_id = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(GPU_ID));

-	registers.l2_features = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(L2_FEATURES));
+	regdump->shader_present = kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(SHADER_PRESENT));
+	regdump->tiler_present = kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(TILER_PRESENT));
+	regdump->l2_present = kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(L2_PRESENT));
+	if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(AS_PRESENT)))
+		regdump->as_present = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(AS_PRESENT));
+	if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(STACK_PRESENT)))
+		regdump->stack_present = kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(STACK_PRESENT));

-	registers.tiler_features = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(TILER_FEATURES));
-	registers.mem_features = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(MEM_FEATURES));
-	registers.mmu_features = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(MMU_FEATURES));
-	registers.as_present = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(AS_PRESENT));
 #if !MALI_USE_CSF
-	registers.js_present = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(JS_PRESENT));
-#else /* !MALI_USE_CSF */
-	registers.js_present = 0;
+	regdump->js_present = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(JS_PRESENT));
+	/* Not a valid register on TMIX */
+
+	/* TGOx specific register */
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_TLS_ALLOC))
+		regdump->thread_tls_alloc =
+			kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(THREAD_TLS_ALLOC));
 #endif /* !MALI_USE_CSF */

+	regdump->thread_max_threads = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(THREAD_MAX_THREADS));
+	regdump->thread_max_workgroup_size =
+		kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(THREAD_MAX_WORKGROUP_SIZE));
+	regdump->thread_max_barrier_size =
+		kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(THREAD_MAX_BARRIER_SIZE));
+	regdump->thread_features = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(THREAD_FEATURES));
+
+	/* Feature Registers */
+	/* AMBA_FEATURES enum is mapped to COHERENCY_FEATURES enum */
+	regdump->coherency_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(COHERENCY_FEATURES));
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CORE_FEATURES))
+		regdump->core_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(CORE_FEATURES));
+
+#if MALI_USE_CSF
+	if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(GPU_FEATURES)))
+		regdump->gpu_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(GPU_FEATURES));
+#endif /* MALI_USE_CSF */
+
+	regdump->tiler_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(TILER_FEATURES));
+	regdump->l2_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(L2_FEATURES));
+	regdump->mem_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(MEM_FEATURES));
+	regdump->mmu_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(MMU_FEATURES));
+
+#if !MALI_USE_CSF
 	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
-#if !MALI_USE_CSF
-		registers.js_features[i] = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(JS_FEATURES_REG(i)));
-#else /* !MALI_USE_CSF */
-		registers.js_features[i] = 0;
+		regdump->js_features[i] = kbase_reg_read32(kbdev, GPU_JS_FEATURES_OFFSET(i));
 #endif /* !MALI_USE_CSF */

-	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
-		registers.texture_features[i] = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)));
-
-	registers.thread_max_threads = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(THREAD_MAX_THREADS));
-	registers.thread_max_workgroup_size = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE));
-	registers.thread_max_barrier_size = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE));
-	registers.thread_features = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(THREAD_FEATURES));
-	registers.thread_tls_alloc = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(THREAD_TLS_ALLOC));
-
-	registers.shader_present_lo = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(SHADER_PRESENT_LO));
-	registers.shader_present_hi = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(SHADER_PRESENT_HI));
-
-	registers.tiler_present_lo = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(TILER_PRESENT_LO));
-	registers.tiler_present_hi = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(TILER_PRESENT_HI));
-
-	registers.l2_present_lo = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(L2_PRESENT_LO));
-	registers.l2_present_hi = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(L2_PRESENT_HI));
-
-	registers.stack_present_lo = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(STACK_PRESENT_LO));
-	registers.stack_present_hi = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(STACK_PRESENT_HI));
-
-	if (registers.gpu_id >= GPU_ID2_PRODUCT_MAKE(11, 8, 5, 2)) {
-		registers.gpu_features_lo = kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(GPU_FEATURES_LO));
-		registers.gpu_features_hi = kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(GPU_FEATURES_HI));
-	} else {
-		registers.gpu_features_lo = 0;
-		registers.gpu_features_hi = 0;
+#if MALI_USE_CSF
+#endif /* MALI_USE_CSF */
+	{
+		for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+			regdump->texture_features[i] =
+				kbase_reg_read32(kbdev, GPU_TEXTURE_FEATURES_OFFSET(i));
 	}

-	if (!kbase_is_gpu_removed(kbdev)) {
-		*regdump = registers;
-		return 0;
-	} else
+	if (kbase_is_gpu_removed(kbdev))
 		return -EIO;
+	return 0;
 }

 int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev,
-		struct kbase_current_config_regdump *curr_config_regdump)
+					   struct kbase_current_config_regdump *curr_config_regdump)
 {
 	if (WARN_ON(!kbdev) || WARN_ON(!curr_config_regdump))
 		return -EINVAL;

-	curr_config_regdump->mem_features = kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(MEM_FEATURES));
-
-	curr_config_regdump->shader_present_lo = kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(SHADER_PRESENT_LO));
-	curr_config_regdump->shader_present_hi = kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(SHADER_PRESENT_HI));
-
-	curr_config_regdump->l2_present_lo = kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(L2_PRESENT_LO));
-	curr_config_regdump->l2_present_hi = kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(L2_PRESENT_HI));
+	curr_config_regdump->mem_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(MEM_FEATURES));
+	curr_config_regdump->l2_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(L2_FEATURES));
+	curr_config_regdump->shader_present =
+		kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(SHADER_PRESENT));
+	curr_config_regdump->l2_present = kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(L2_PRESENT));

 	if (kbase_is_gpu_removed(kbdev))
 		return -EIO;

 	return 0;
-
-}
-
-int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
-					struct kbase_gpuprops_regdump *regdump)
-{
-	u32 coherency_features;
-	int error = 0;
-
-	/* Ensure we can access the GPU registers */
-	kbase_pm_register_access_enable(kbdev);
-
-	coherency_features = kbase_cache_get_coherency_features(kbdev);
-
-	if (kbase_is_gpu_removed(kbdev))
-		error = -EIO;
-
-	regdump->coherency_features = coherency_features;
-
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CORE_FEATURES))
-		regdump->core_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES));
-	else
-		regdump->core_features = 0;
-
-	kbase_pm_register_access_disable(kbdev);
-
-	return error;
 }

 int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev,
-					struct kbase_gpuprops_regdump *regdump)
+					   struct kbasep_gpuprops_regdump *regdump)
 {
 	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) {
-		u32 l2_features = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(L2_FEATURES));
-		u32 l2_config =
-			kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG));
-		u32 asn_hash[ASN_HASH_COUNT] = {
-			0,
-		};
-		int i;
+		regdump->l2_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(L2_FEATURES));
+		regdump->l2_config = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(L2_CONFIG));

-		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)) {
-			for (i = 0; i < ASN_HASH_COUNT; i++)
-				asn_hash[i] = kbase_reg_read(
-					kbdev, GPU_CONTROL_REG(ASN_HASH(i)));
+#if MALI_USE_CSF
+		if (kbase_hw_has_l2_slice_hash_feature(kbdev)) {
+			int i;
+			for (i = 0; i < GPU_L2_SLICE_HASH_COUNT; i++)
+				regdump->l2_slice_hash[i] =
+					kbase_reg_read32(kbdev, GPU_L2_SLICE_HASH_OFFSET(i));
 		}
+#endif /* MALI_USE_CSF */

 		if (kbase_is_gpu_removed(kbdev))
 			return -EIO;
-
-		regdump->l2_features = l2_features;
-		regdump->l2_config = l2_config;
-		for (i = 0; i < ASN_HASH_COUNT; i++)
-			regdump->l2_asn_hash[i] = asn_hash[i];
 	}

 	return 0;
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -24,7 +24,7 @@
 */

 #include <mali_kbase.h>
-#include <gpu/mali_kbase_gpu_regmap.h>
+#include <hw_access/mali_kbase_hw_access_regmap.h>
 #include <mali_kbase_hwaccess_instr.h>
 #include <device/mali_kbase_device.h>
 #include <backend/gpu/mali_kbase_instr_internal.h>
@@ -34,8 +34,8 @@ static int wait_prfcnt_ready(struct kbase_device *kbdev)
 	u32 loops;

 	for (loops = 0; loops < KBASE_PRFCNT_ACTIVE_MAX_LOOPS; loops++) {
-		const u32 prfcnt_active = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) &
-								  GPU_STATUS_PRFCNT_ACTIVE;
+		const u32 prfcnt_active = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS)) &
+					  GPU_STATUS_PRFCNT_ACTIVE;
 		if (!prfcnt_active)
 			return 0;
 	}
@@ -44,9 +44,8 @@ static int wait_prfcnt_ready(struct kbase_device *kbdev)
 	return -EBUSY;
 }

-int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
-					struct kbase_context *kctx,
-					struct kbase_instr_hwcnt_enable *enable)
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, struct kbase_context *kctx,
+				      struct kbase_instr_hwcnt_enable *enable)
 {
 	unsigned long flags;
 	int err = -EINVAL;
@@ -74,9 +73,9 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 	}

 	/* Enable interrupt */
-	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
-						PRFCNT_SAMPLE_COMPLETED);
+	irq_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK));
+	kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK),
+			  irq_mask | PRFCNT_SAMPLE_COMPLETED);

 	/* In use, this context is the owner */
 	kbdev->hwcnt.kctx = kctx;
@@ -89,8 +88,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 	/* Configure */
 	prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
 #ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
-	prfcnt_config |= kbdev->hwcnt.backend.override_counter_set
-			 << PRFCNT_CONFIG_SETSELECT_SHIFT;
+	prfcnt_config |= kbdev->hwcnt.backend.override_counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT;
 #else
 	prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT;
 #endif
@@ -100,32 +98,25 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 	if (err)
 		return err;

-	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
-			prfcnt_config | PRFCNT_CONFIG_MODE_OFF);
+	kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_CONFIG),
+			  prfcnt_config | PRFCNT_CONFIG_MODE_OFF);

 	/* Wait until prfcnt is disabled before writing configuration registers */
 	err = wait_prfcnt_ready(kbdev);
 	if (err)
 		return err;

-	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
-					enable->dump_buffer & 0xFFFFFFFF);
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
-					enable->dump_buffer >> 32);
+	kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(PRFCNT_BASE), enable->dump_buffer);

-	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
-					enable->fe_bm);
+	kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_JM_EN), enable->fe_bm);

-	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
-					enable->shader_bm);
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
-					enable->mmu_l2_bm);
+	kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_SHADER_EN), enable->shader_bm);
+	kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_MMU_L2_EN), enable->mmu_l2_bm);

-	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
-					enable->tiler_bm);
+	kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_TILER_EN), enable->tiler_bm);

-	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
-			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL);
+	kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_CONFIG),
+			  prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL);

 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);

@@ -151,15 +142,16 @@ static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev)
 		return;

 	/* Disable interrupt */
-	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+	irq_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK));

-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED);
+	kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK),
+			  irq_mask & ~PRFCNT_SAMPLE_COMPLETED);

 	/* Wait until prfcnt config register can be written, then disable the counters.
 	 * Return value is ignored as we are disabling anyway.
 	 */
 	wait_prfcnt_ready(kbdev);
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0);
+	kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_CONFIG), 0);

 	kbdev->hwcnt.kctx = NULL;
 	kbdev->hwcnt.addr = 0ULL;
@@ -206,8 +198,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);

 		/* Ongoing dump/setup - wait for its completion */
-		wait_event(kbdev->hwcnt.backend.wait,
-					kbdev->hwcnt.backend.triggered != 0);
+		wait_event(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0);
 	}

 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
@@ -218,8 +209,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);

-	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %pK",
-									kctx);
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %pK", kctx);

 	return 0;
 }
@@ -261,28 +251,22 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
 		goto unlock;

 	/* Reconfigure the dump address */
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
-					kbdev->hwcnt.addr & 0xFFFFFFFF);
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
-					kbdev->hwcnt.addr >> 32);
+	kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(PRFCNT_BASE), kbdev->hwcnt.addr);

 	/* Start dumping */
-	KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL,
-			kbdev->hwcnt.addr);
+	KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, kbdev->hwcnt.addr);

-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
-					GPU_COMMAND_PRFCNT_SAMPLE);
+	kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), GPU_COMMAND_PRFCNT_SAMPLE);

 	dev_dbg(kbdev->dev, "HW counters dumping done for context %pK", kctx);

- unlock:
+unlock:
 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
 	return err;
 }
 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);

-bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
-						bool * const success)
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, bool *const success)
 {
 	unsigned long flags;
 	bool complete = false;
@@ -335,8 +319,7 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
 	int err;

 	/* Wait for dump & cache clean to complete */
-	wait_event(kbdev->hwcnt.backend.wait,
-					kbdev->hwcnt.backend.triggered != 0);
+	wait_event(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0);

 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);

@@ -347,8 +330,7 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
 		err = -EIO;
 	} else {
 		/* Dump done */
-		KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
-							KBASE_INSTR_STATE_IDLE);
+		KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE);
 		err = 0;
 	}

@@ -368,8 +350,7 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
 	/* Check it's the context previously set up and we're not in IDLE
 	 * state.
 	 */
-	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
-							KBASE_INSTR_STATE_IDLE)
+	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE)
 		goto unlock;

 	if (kbase_is_gpu_removed(kbdev)) {
@@ -384,8 +365,7 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx)

 	/* Clear the counters */
 	KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, 0);
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
-						GPU_COMMAND_PRFCNT_CLEAR);
+	kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), GPU_COMMAND_PRFCNT_CLEAR);

 unlock:
 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
@@ -443,8 +423,6 @@ int kbase_instr_backend_init(struct kbase_device *kbdev)

 	init_waitqueue_head(&kbdev->hwcnt.backend.wait);

-	kbdev->hwcnt.backend.triggered = 0;
-
 #ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
 /* Use the build time option for the override default. */
 #if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY)
@@ -474,8 +452,7 @@ void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev)
 	 * Valid inputs are the values accepted bythe SET_SELECT bits of the
 	 * PRFCNT_CONFIG register as defined in the architecture specification.
 	 */
-	debugfs_create_u8("hwcnt_set_select", 0644,
-			  kbdev->mali_debugfs_directory,
+	debugfs_create_u8("hwcnt_set_select", 0644, kbdev->mali_debugfs_directory,
 			  (u8 *)&kbdev->hwcnt.backend.override_counter_set);
 }
 #endif
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2014-2015, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -37,11 +37,10 @@ void kbase_release_interrupts(struct kbase_device *kbdev);
 */
 void kbase_synchronize_irqs(struct kbase_device *kbdev);

-int kbasep_common_test_interrupt_handlers(
-					struct kbase_device * const kbdev);
+int kbasep_common_test_interrupt_handlers(struct kbase_device *const kbdev);

 irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val);
-int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
-				 irq_handler_t custom_handler, int irq_type);
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev, irq_handler_t custom_handler,
+				 int irq_type);

 #endif /* _KBASE_IRQ_INTERNAL_H_ */
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -34,12 +34,12 @@

 static void *kbase_tag(void *ptr, u32 tag)
 {
-	return (void *)(((uintptr_t) ptr) | tag);
+	return (void *)(((uintptr_t)ptr) | tag);
 }

 static void *kbase_untag(void *ptr)
 {
-	return (void *)(((uintptr_t) ptr) & ~3);
+	return (void *)(((uintptr_t)ptr) & ~3);
 }

 static irqreturn_t kbase_job_irq_handler(int irq, void *data)
@@ -56,12 +56,12 @@ static irqreturn_t kbase_job_irq_handler(int irq, void *data)
 		return IRQ_NONE;
 	}

-	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
+	val = kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_STATUS));

 #ifdef CONFIG_MALI_BIFROST_DEBUG
 	if (!kbdev->pm.backend.driver_ready_for_irqs)
-		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
-				__func__, irq, val);
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", __func__,
+			 irq, val);
 #endif /* CONFIG_MALI_BIFROST_DEBUG */

 	if (!val) {
@@ -99,12 +99,12 @@ static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)

 	atomic_inc(&kbdev->faults_pending);

-	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS));
+	val = kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_STATUS));

 #ifdef CONFIG_MALI_BIFROST_DEBUG
 	if (!kbdev->pm.backend.driver_ready_for_irqs)
-		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
-				__func__, irq, val);
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", __func__,
+			 irq, val);
 #endif /* CONFIG_MALI_BIFROST_DEBUG */
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);

@@ -126,7 +126,8 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
 {
 	unsigned long flags;
 	struct kbase_device *kbdev = kbase_untag(data);
-	u32 val;
+	u32 gpu_irq_status;
+	irqreturn_t irq_state = IRQ_NONE;

 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);

@@ -136,23 +137,25 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
 		return IRQ_NONE;
 	}

-	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS));
+	gpu_irq_status = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_STATUS));

 #ifdef CONFIG_MALI_BIFROST_DEBUG
-	if (!kbdev->pm.backend.driver_ready_for_irqs)
-		dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
-				__func__, irq, val);
+	if (!kbdev->pm.backend.driver_ready_for_irqs) {
+		dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", __func__,
+			irq, gpu_irq_status);
+	}
 #endif /* CONFIG_MALI_BIFROST_DEBUG */
+
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);

-	if (!val)
-		return IRQ_NONE;
+	if (gpu_irq_status) {
+		dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, gpu_irq_status);
+		kbase_gpu_interrupt(kbdev, gpu_irq_status);

-	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+		irq_state = IRQ_HANDLED;
+	}

-	kbase_gpu_interrupt(kbdev, val);
-
-	return IRQ_HANDLED;
+	return irq_state;
 }

 static irq_handler_t kbase_handler_table[] = {
@@ -162,14 +165,14 @@ static irq_handler_t kbase_handler_table[] = {
 };

 #ifdef CONFIG_MALI_BIFROST_DEBUG
-#define  JOB_IRQ_HANDLER JOB_IRQ_TAG
-#define  GPU_IRQ_HANDLER GPU_IRQ_TAG
+#define JOB_IRQ_HANDLER JOB_IRQ_TAG
+#define GPU_IRQ_HANDLER GPU_IRQ_TAG

 /**
 * kbase_gpu_irq_test_handler - Variant (for test) of kbase_gpu_irq_handler()
 * @irq:  IRQ number
 * @data: Data associated with this IRQ (i.e. kbdev)
- * @val:  Value of the GPU_CONTROL_REG(GPU_IRQ_STATUS)
+ * @val:  Value of the GPU_CONTROL_ENUM(GPU_IRQ_STATUS)
 *
 * Handle the GPU device interrupt source requests reflected in the
 * given source bit-pattern. The test code caller is responsible for
@@ -206,33 +209,30 @@ KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler);
 *
 * Return: 0 case success, error code otherwise
 */
-int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
-					irq_handler_t custom_handler,
-					int irq_type)
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev, irq_handler_t custom_handler,
+				 int irq_type)
 {
 	int result = 0;
 	irq_handler_t requested_irq_handler = NULL;

-	KBASE_DEBUG_ASSERT((irq_type >= JOB_IRQ_HANDLER) &&
-			   (irq_type <= GPU_IRQ_HANDLER));
+	KBASE_DEBUG_ASSERT((irq_type >= JOB_IRQ_HANDLER) && (irq_type <= GPU_IRQ_HANDLER));

 	/* Release previous handler */
 	if (kbdev->irqs[irq_type].irq)
 		free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));

-	requested_irq_handler = (custom_handler != NULL) ?
-					custom_handler :
-					kbase_handler_table[irq_type];
+	requested_irq_handler = (custom_handler != NULL) ? custom_handler :
+								 kbase_handler_table[irq_type];

 	if (request_irq(kbdev->irqs[irq_type].irq, requested_irq_handler,
-			kbdev->irqs[irq_type].flags | IRQF_SHARED,
-			dev_name(kbdev->dev),
+			kbdev->irqs[irq_type].flags | IRQF_SHARED, dev_name(kbdev->dev),
 			kbase_tag(kbdev, irq_type)) != 0) {
 		result = -EINVAL;
 		dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
-					kbdev->irqs[irq_type].irq, irq_type);
+			kbdev->irqs[irq_type].irq, irq_type);
 #if IS_ENABLED(CONFIG_SPARSE_IRQ)
-		dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+		dev_err(kbdev->dev,
+			"You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
 #endif /* CONFIG_SPARSE_IRQ */
 	}

@@ -251,7 +251,7 @@ struct kbasep_irq_test {

 static struct kbasep_irq_test kbasep_irq_test_data;

-#define IRQ_TEST_TIMEOUT    500
+#define IRQ_TEST_TIMEOUT 500

 static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
 {
@@ -267,7 +267,7 @@ static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
 		return IRQ_NONE;
 	}

-	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
+	val = kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_STATUS));

 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);

@@ -279,7 +279,7 @@ static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
 	kbasep_irq_test_data.triggered = 1;
 	wake_up(&kbasep_irq_test_data.wait);

-	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
+	kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_CLEAR), val);

 	return IRQ_HANDLED;
 }
@@ -298,7 +298,7 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
 		return IRQ_NONE;
 	}

-	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS));
+	val = kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_STATUS));

 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);

@@ -310,15 +310,14 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
 	kbasep_irq_test_data.triggered = 1;
 	wake_up(&kbasep_irq_test_data.wait);

-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val);
+	kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_CLEAR), val);

 	return IRQ_HANDLED;
 }

 static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
 {
-	struct kbasep_irq_test *test_data = container_of(timer,
-						struct kbasep_irq_test, timer);
+	struct kbasep_irq_test *test_data = container_of(timer, struct kbasep_irq_test, timer);

 	test_data->timeout = 1;
 	test_data->triggered = 1;
@@ -326,8 +325,7 @@ static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }

-static int kbasep_common_test_interrupt(
-				struct kbase_device * const kbdev, u32 tag)
+static int kbasep_common_test_interrupt(struct kbase_device *const kbdev, u32 tag)
 {
 	int err = 0;
 	irq_handler_t test_handler;
@@ -339,13 +337,13 @@ static int kbasep_common_test_interrupt(
 	switch (tag) {
 	case JOB_IRQ_TAG:
 		test_handler = kbase_job_irq_test_handler;
-		rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT);
-		mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK);
+		rawstat_offset = JOB_CONTROL_ENUM(JOB_IRQ_RAWSTAT);
+		mask_offset = JOB_CONTROL_ENUM(JOB_IRQ_MASK);
 		break;
 	case MMU_IRQ_TAG:
 		test_handler = kbase_mmu_irq_test_handler;
-		rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
-		mask_offset = MMU_REG(MMU_IRQ_MASK);
+		rawstat_offset = MMU_CONTROL_ENUM(IRQ_RAWSTAT);
+		mask_offset = MMU_CONTROL_ENUM(IRQ_MASK);
 		break;
 	case GPU_IRQ_TAG:
 		/* already tested by pm_driver - bail out */
@@ -354,9 +352,9 @@ static int kbasep_common_test_interrupt(
 	}

 	/* store old mask */
-	old_mask_val = kbase_reg_read(kbdev, mask_offset);
+	old_mask_val = kbase_reg_read32(kbdev, mask_offset);
 	/* mask interrupts */
-	kbase_reg_write(kbdev, mask_offset, 0x0);
+	kbase_reg_write32(kbdev, mask_offset, 0x0);

 	if (kbdev->irqs[tag].irq) {
 		/* release original handler and install test handler */
@@ -364,36 +362,33 @@ static int kbasep_common_test_interrupt(
 			err = -EINVAL;
 		} else {
 			kbasep_irq_test_data.timeout = 0;
-			hrtimer_init(&kbasep_irq_test_data.timer,
-					CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-			kbasep_irq_test_data.timer.function =
-						kbasep_test_interrupt_timeout;
+			hrtimer_init(&kbasep_irq_test_data.timer, CLOCK_MONOTONIC,
+				     HRTIMER_MODE_REL);
+			kbasep_irq_test_data.timer.function = kbasep_test_interrupt_timeout;

 			/* trigger interrupt */
-			kbase_reg_write(kbdev, mask_offset, 0x1);
-			kbase_reg_write(kbdev, rawstat_offset, 0x1);
+			kbase_reg_write32(kbdev, mask_offset, 0x1);
+			kbase_reg_write32(kbdev, rawstat_offset, 0x1);

 			hrtimer_start(&kbasep_irq_test_data.timer,
-					HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
-					HRTIMER_MODE_REL);
+				      HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT), HRTIMER_MODE_REL);

-			wait_event(kbasep_irq_test_data.wait,
-					kbasep_irq_test_data.triggered != 0);
+			wait_event(kbasep_irq_test_data.wait, kbasep_irq_test_data.triggered != 0);

 			if (kbasep_irq_test_data.timeout != 0) {
 				dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n",
-						kbdev->irqs[tag].irq, tag);
+					kbdev->irqs[tag].irq, tag);
 				err = -EINVAL;
 			} else {
 				dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n",
-						kbdev->irqs[tag].irq, tag);
+					kbdev->irqs[tag].irq, tag);
 			}

 			hrtimer_cancel(&kbasep_irq_test_data.timer);
 			kbasep_irq_test_data.triggered = 0;

 			/* mask interrupts */
-			kbase_reg_write(kbdev, mask_offset, 0x0);
+			kbase_reg_write32(kbdev, mask_offset, 0x0);

 			/* release test handler */
 			free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
@@ -401,21 +396,20 @@ static int kbasep_common_test_interrupt(

 		/* restore original interrupt */
 		if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag],
-				kbdev->irqs[tag].flags | IRQF_SHARED,
-				dev_name(kbdev->dev), kbase_tag(kbdev, tag))) {
+				kbdev->irqs[tag].flags | IRQF_SHARED, dev_name(kbdev->dev),
+				kbase_tag(kbdev, tag))) {
 			dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n",
-						kbdev->irqs[tag].irq, tag);
+				kbdev->irqs[tag].irq, tag);
 			err = -EINVAL;
 		}
 	}
 	/* restore old mask */
-	kbase_reg_write(kbdev, mask_offset, old_mask_val);
+	kbase_reg_write32(kbdev, mask_offset, old_mask_val);

 	return err;
 }

-int kbasep_common_test_interrupt_handlers(
-					struct kbase_device * const kbdev)
+int kbasep_common_test_interrupt_handlers(struct kbase_device *const kbdev)
 {
 	int err;

@@ -427,19 +421,21 @@ int kbasep_common_test_interrupt_handlers(

 	err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG);
 	if (err) {
-		dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		dev_err(kbdev->dev,
+			"Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
 		goto out;
 	}

 	err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG);
 	if (err) {
-		dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		dev_err(kbdev->dev,
+			"Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
 		goto out;
 	}

 	dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n");

- out:
+out:
 	kbase_pm_context_idle(kbdev);

 	return err;
@@ -454,14 +450,14 @@ int kbase_install_interrupts(struct kbase_device *kbdev)

 	for (i = 0; i < nr; i++) {
 		err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i],
-				kbdev->irqs[i].flags | IRQF_SHARED,
-				dev_name(kbdev->dev),
-				kbase_tag(kbdev, i));
+				  kbdev->irqs[i].flags | IRQF_SHARED, dev_name(kbdev->dev),
+				  kbase_tag(kbdev, i));
 		if (err) {
 			dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
-							kbdev->irqs[i].irq, i);
+				kbdev->irqs[i].irq, i);
 #if IS_ENABLED(CONFIG_SPARSE_IRQ)
-			dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+			dev_err(kbdev->dev,
+				"You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
 #endif /* CONFIG_SPARSE_IRQ */
 			goto release;
 		}
@@ -469,7 +465,7 @@ int kbase_install_interrupts(struct kbase_device *kbdev)

 	return 0;

- release:
+release:
 	while (i-- > 0)
 		free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));

--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -52,6 +52,8 @@ static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
 {
 	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;

+	CSTD_UNUSED(current_as);
+
 	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
 	lockdep_assert_held(&js_devdata->runpool_mutex);
 	lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -88,8 +90,7 @@ bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_contex
 	return false;
 }

-void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
-						struct kbase_context *kctx)
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, struct kbase_context *kctx)
 {
 	int as_nr = kctx->as_nr;

@@ -111,13 +112,14 @@ void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
 	kbase_js_runpool_dec_context_count(kbdev, kctx);
 }

-void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
-						struct kbase_context *kctx)
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, struct kbase_context *kctx)
 {
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
 }

-int kbase_backend_find_and_release_free_address_space(
-		struct kbase_device *kbdev, struct kbase_context *kctx)
+int kbase_backend_find_and_release_free_address_space(struct kbase_device *kbdev,
+						      struct kbase_context *kctx)
 {
 	struct kbasep_js_device_data *js_devdata;
 	struct kbasep_js_kctx_info *js_kctx_info;
@@ -146,12 +148,11 @@ int kbase_backend_find_and_release_free_address_space(
 		 * descheduled.
 		 */
 		if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) &&
-			atomic_read(&as_kctx->refcount) == 1) {
+		    atomic_read(&as_kctx->refcount) == 1) {
 			if (!kbase_ctx_sched_inc_refcount_nolock(as_kctx)) {
 				WARN(1, "Failed to retain active context\n");

-				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
-						flags);
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 				mutex_unlock(&js_devdata->runpool_mutex);
 				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);

@@ -168,7 +169,6 @@ int kbase_backend_find_and_release_free_address_space(
 			mutex_unlock(&js_devdata->runpool_mutex);
 			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);

-
 			/* Release context from address space */
 			mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex);
 			mutex_lock(&js_devdata->runpool_mutex);
@@ -176,9 +176,7 @@ int kbase_backend_find_and_release_free_address_space(
 			kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);

 			if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) {
-				kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
-								as_kctx,
-								true);
+				kbasep_js_runpool_requeue_or_kill_ctx(kbdev, as_kctx, true);

 				mutex_unlock(&js_devdata->runpool_mutex);
 				mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
@@ -206,9 +204,7 @@ int kbase_backend_find_and_release_free_address_space(
 	return KBASEP_AS_NR_INVALID;
 }

-bool kbase_backend_use_ctx(struct kbase_device *kbdev,
-				struct kbase_context *kctx,
-				int as_nr)
+bool kbase_backend_use_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr)
 {
 	struct kbasep_js_device_data *js_devdata;
 	struct kbase_as *new_address_space = NULL;
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -109,21 +109,21 @@ struct kbase_backend_data {
 	atomic_t reset_gpu;

 /* The GPU reset isn't pending */
-#define KBASE_RESET_GPU_NOT_PENDING     0
+#define KBASE_RESET_GPU_NOT_PENDING 0
 /* kbase_prepare_to_reset_gpu has been called */
-#define KBASE_RESET_GPU_PREPARED        1
+#define KBASE_RESET_GPU_PREPARED 1
 /* kbase_reset_gpu has been called - the reset will now definitely happen
 * within the timeout period
 */
-#define KBASE_RESET_GPU_COMMITTED       2
+#define KBASE_RESET_GPU_COMMITTED 2
 /* The GPU reset process is currently occuring (timeout has expired or
 * kbasep_try_reset_gpu_early was called)
 */
-#define KBASE_RESET_GPU_HAPPENING       3
+#define KBASE_RESET_GPU_HAPPENING 3
 /* Reset the GPU silently, used when resetting the GPU as part of normal
 * behavior (e.g. when exiting protected mode).
 */
-#define KBASE_RESET_GPU_SILENT          4
+#define KBASE_RESET_GPU_SILENT 4
 	struct workqueue_struct *reset_workq;
 	struct work_struct reset_work;
 	wait_queue_head_t reset_wait;
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h
@@ -41,13 +41,13 @@
 * @job_tail:		Job tail address reported by GPU
 * @end_timestamp:	Timestamp of job completion
 */
-void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
-					u64 job_tail, ktime_t *end_timestamp);
+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, u64 job_tail,
+			 ktime_t *end_timestamp);

 #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
 static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string, size_t js_size)
 {
-	snprintf(js_string, js_size, "job_slot_%u", js);
+	(void)scnprintf(js_string, js_size, "job_slot_%u", js);
 	return js_string;
 }
 #endif
@@ -74,8 +74,8 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
 *						   on the specified atom
 * @kbdev:		Device pointer
 * @js:			Job slot to stop on
- * @action:		The action to perform, either JSn_COMMAND_HARD_STOP or
- *			JSn_COMMAND_SOFT_STOP
+ * @action:		The action to perform, either JS_COMMAND_HARD_STOP or
+ *			JS_COMMAND_SOFT_STOP
 * @core_reqs:		Core requirements of atom to stop
 * @target_katom:	Atom to stop
 *
@@ -94,8 +94,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, uns
 * @kctx:	Context pointer. May be NULL
 * @katom:	Specific atom to stop. May be NULL
 * @js:		Job slot to hard stop
- * @action:	The action to perform, either JSn_COMMAND_HARD_STOP or
- *		JSn_COMMAND_SOFT_STOP
+ * @action:	The action to perform, either JS_COMMAND_HARD_STOP or
+ *		JS_COMMAND_SOFT_STOP
 *
 * If no context is provided then all jobs on the slot will be soft or hard
 * stopped.
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -28,34 +28,23 @@
 #include <mali_kbase_reset_gpu.h>
 #include <backend/gpu/mali_kbase_jm_internal.h>
 #include <backend/gpu/mali_kbase_js_internal.h>
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#include <mali_kbase_gpu_metrics.h>
+#endif

-#if !MALI_USE_CSF
 /*
 * Hold the runpool_mutex for this
 */
-static inline bool timer_callback_should_run(struct kbase_device *kbdev)
+static inline bool timer_callback_should_run(struct kbase_device *kbdev, int nr_running_ctxs)
 {
-	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
-	int nr_running_ctxs;
-
 	lockdep_assert_held(&kbdev->js_data.runpool_mutex);

-	/* Timer must stop if we are suspending */
-	if (backend->suspend_timer)
-		return false;
-
-	/* nr_contexts_pullable is updated with the runpool_mutex. However, the
-	 * locking in the caller gives us a barrier that ensures
-	 * nr_contexts_pullable is up-to-date for reading
-	 */
-	nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
-
 #ifdef CONFIG_MALI_BIFROST_DEBUG
 	if (kbdev->js_data.softstop_always) {
 		/* Debug support for allowing soft-stop on a single context */
 		return true;
 	}
-#endif				/* CONFIG_MALI_BIFROST_DEBUG */
+#endif /* CONFIG_MALI_BIFROST_DEBUG */

 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
 		/* Timeouts would have to be 4x longer (due to micro-
@@ -69,19 +58,16 @@ static inline bool timer_callback_should_run(struct kbase_device *kbdev)
 		 * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
 		 */
 		{
-			int nr_compute_ctxs =
-				kbasep_js_ctx_attr_count_on_runpool(kbdev,
-						KBASEP_JS_CTX_ATTR_COMPUTE);
-			int nr_noncompute_ctxs = nr_running_ctxs -
-							nr_compute_ctxs;
+			int nr_compute_ctxs = kbasep_js_ctx_attr_count_on_runpool(
+				kbdev, KBASEP_JS_CTX_ATTR_COMPUTE);
+			int nr_noncompute_ctxs = nr_running_ctxs - nr_compute_ctxs;

-			return (bool) (nr_compute_ctxs >= 2 ||
-							nr_noncompute_ctxs > 0);
+			return (bool)(nr_compute_ctxs >= 2 || nr_noncompute_ctxs > 0);
 		}
 	} else {
 		/* Run the timer callback whenever you have at least 1 context
 		 */
-		return (bool) (nr_running_ctxs > 0);
+		return (bool)(nr_running_ctxs > 0);
 	}
 }

@@ -96,8 +82,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)

 	KBASE_DEBUG_ASSERT(timer != NULL);

-	backend = container_of(timer, struct kbase_backend_data,
-							scheduling_timer);
+	backend = container_of(timer, struct kbase_backend_data, scheduling_timer);
 	kbdev = container_of(backend, struct kbase_device, hwaccess.backend);
 	js_devdata = &kbdev->js_data;

@@ -119,26 +104,19 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 				u32 ticks = atom->ticks++;

 #if !defined(CONFIG_MALI_JOB_DUMP) && !defined(CONFIG_MALI_VECTOR_DUMP)
-				u32 soft_stop_ticks, hard_stop_ticks,
-								gpu_reset_ticks;
+				u32 soft_stop_ticks, hard_stop_ticks, gpu_reset_ticks;
 				if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
-					soft_stop_ticks =
-						js_devdata->soft_stop_ticks_cl;
-					hard_stop_ticks =
-						js_devdata->hard_stop_ticks_cl;
-					gpu_reset_ticks =
-						js_devdata->gpu_reset_ticks_cl;
+					soft_stop_ticks = js_devdata->soft_stop_ticks_cl;
+					hard_stop_ticks = js_devdata->hard_stop_ticks_cl;
+					gpu_reset_ticks = js_devdata->gpu_reset_ticks_cl;
 				} else {
-					soft_stop_ticks =
-						js_devdata->soft_stop_ticks;
+					soft_stop_ticks = js_devdata->soft_stop_ticks;
 					if (kbase_is_quick_reset_enabled(kbdev)) {
 						hard_stop_ticks = 2;
 						gpu_reset_ticks = 3;
 					} else {
-					hard_stop_ticks =
-						js_devdata->hard_stop_ticks_ss;
-					gpu_reset_ticks =
-						js_devdata->gpu_reset_ticks_ss;
+						hard_stop_ticks = js_devdata->hard_stop_ticks_ss;
+						gpu_reset_ticks = js_devdata->gpu_reset_ticks_ss;
 					}
 				}

@@ -149,8 +127,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 				 * races between this worker and the thread
 				 * changing the timeouts.
 				 */
-				if (backend->timeouts_updated &&
-						ticks > soft_stop_ticks)
+				if (backend->timeouts_updated && ticks > soft_stop_ticks)
 					ticks = atom->ticks = soft_stop_ticks;

 				/* Job is Soft-Stoppable */
@@ -162,7 +139,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 					 */
 #if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
 					int disjoint_threshold =
-		KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
+						KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
 					u32 softstop_flags = 0u;

 					dev_dbg(kbdev->dev, "Soft-stop");
@@ -183,13 +160,12 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 					 * older value and register a disjoint
 					 * event when we try soft-stopping
 					 */
-					if (js_devdata->nr_user_contexts_running
-							>= disjoint_threshold)
-						softstop_flags |=
-						JS_COMMAND_SW_CAUSES_DISJOINT;
+					if (js_devdata->nr_user_contexts_running >=
+					    disjoint_threshold)
+						softstop_flags |= JS_COMMAND_SW_CAUSES_DISJOINT;

-					kbase_job_slot_softstop_swflags(kbdev,
-						s, atom, softstop_flags);
+					kbase_job_slot_softstop_swflags(kbdev, s, atom,
+									softstop_flags);
 #endif
 				} else if (ticks == hard_stop_ticks) {
 					/* Job has been scheduled for at least
@@ -198,15 +174,13 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 					 * now. Hard stop the slot.
 					 */
 #if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
-					int ms =
-						js_devdata->scheduling_period_ns
-								/ 1000000u;
+					int ms = js_devdata->scheduling_period_ns / 1000000u;
 					if (!kbase_is_quick_reset_enabled(kbdev))
-						dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
-								(unsigned long)ticks,
-								(unsigned long)ms);
-					kbase_job_slot_hardstop(atom->kctx, s,
-									atom);
+						dev_warn(
+							kbdev->dev,
+							"JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks, (unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s, atom);
 #endif
 				} else if (ticks == gpu_reset_ticks) {
 					/* Job has been scheduled for at least
@@ -217,7 +191,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 					 */
 					reset_needed = true;
 				}
-#else				/* !CONFIG_MALI_JOB_DUMP */
+#else /* !CONFIG_MALI_JOB_DUMP */
 				/* NOTE: During CONFIG_MALI_JOB_DUMP, we use
 				 * the alternate timeouts, which makes the hard-
 				 * stop and GPU reset timeout much longer. We
@@ -230,24 +204,20 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 					 * CONFIG_MALI_JOB_DUMP, however.
 					 */
 					dev_dbg(kbdev->dev, "Soft-stop");
-				} else if (ticks ==
-					js_devdata->hard_stop_ticks_dumping) {
+				} else if (ticks == js_devdata->hard_stop_ticks_dumping) {
 					/* Job has been scheduled for at least
 					 * js_devdata->hard_stop_ticks_dumping
 					 * ticks. Hard stop the slot.
 					 */
 #if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
-					int ms =
-						js_devdata->scheduling_period_ns
-								/ 1000000u;
-					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
-							(unsigned long)ticks,
-							(unsigned long)ms);
-					kbase_job_slot_hardstop(atom->kctx, s,
-									atom);
+					int ms = js_devdata->scheduling_period_ns / 1000000u;
+					dev_warn(
+						kbdev->dev,
+						"JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+						(unsigned long)ticks, (unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s, atom);
 #endif
-				} else if (ticks ==
-					js_devdata->gpu_reset_ticks_dumping) {
+				} else if (ticks == js_devdata->gpu_reset_ticks_dumping) {
 					/* Job has been scheduled for at least
 					 * js_devdata->gpu_reset_ticks_dumping
 					 * ticks. It should have left the GPU by
@@ -256,16 +226,16 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 					 */
 					reset_needed = true;
 				}
-#endif				/* !CONFIG_MALI_JOB_DUMP */
+#endif /* !CONFIG_MALI_JOB_DUMP */
 			}
 		}
 	}
 	if (reset_needed) {
 		if (kbase_is_quick_reset_enabled(kbdev))
 			dev_err(kbdev->dev, "quick reset");
-		else {
-			dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issuing GPU soft-reset to resolve.");
-		}
+		else
+			dev_err(kbdev->dev,
+				"JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issuing GPU soft-reset to resolve.");

 		if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
 			kbase_reset_gpu_locked(kbdev);
@@ -274,8 +244,8 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)

 	if (backend->timer_running)
 		hrtimer_start(&backend->scheduling_timer,
-			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
-			HRTIMER_MODE_REL);
+			      HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+			      HRTIMER_MODE_REL);

 	backend->timeouts_updated = false;

@@ -283,18 +253,19 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)

 	return HRTIMER_NORESTART;
 }
-#endif /* !MALI_USE_CSF */

 void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
 {
-#if !MALI_USE_CSF
 	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
 	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
 	unsigned long flags;
+	/* Timer must stop if we are suspending */
+	const bool suspend_timer = backend->suspend_timer;
+	const int nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);

 	lockdep_assert_held(&js_devdata->runpool_mutex);

-	if (!timer_callback_should_run(kbdev)) {
+	if (suspend_timer || !timer_callback_should_run(kbdev, nr_running_ctxs)) {
 		/* Take spinlock to force synchronisation with timer */
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 		backend->timer_running = false;
@@ -308,47 +279,70 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
 		hrtimer_cancel(&backend->scheduling_timer);
 	}

-	if (timer_callback_should_run(kbdev) && !backend->timer_running) {
+	if (!suspend_timer && timer_callback_should_run(kbdev, nr_running_ctxs) &&
+	    !backend->timer_running) {
 		/* Take spinlock to force synchronisation with timer */
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 		backend->timer_running = true;
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		hrtimer_start(&backend->scheduling_timer,
-			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
-							HRTIMER_MODE_REL);
+			      HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+			      HRTIMER_MODE_REL);

 		KBASE_KTRACE_ADD_JM(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, 0u);
 	}
-#else /* !MALI_USE_CSF */
-	CSTD_UNUSED(kbdev);
-#endif /* !MALI_USE_CSF */
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	if (unlikely(suspend_timer)) {
+		js_devdata->gpu_metrics_timer_needed = false;
+		/* Cancel the timer as System suspend is happening */
+		hrtimer_cancel(&js_devdata->gpu_metrics_timer);
+		js_devdata->gpu_metrics_timer_running = false;
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		/* Explicitly emit the tracepoint on System suspend */
+		kbase_gpu_metrics_emit_tracepoint(kbdev, ktime_get_raw_ns());
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return;
+	}
+
+	if (!nr_running_ctxs) {
+		/* Just set the flag to not restart the timer on expiry */
+		js_devdata->gpu_metrics_timer_needed = false;
+		return;
+	}
+
+	/* There are runnable contexts so the timer is needed */
+	if (!js_devdata->gpu_metrics_timer_needed) {
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		js_devdata->gpu_metrics_timer_needed = true;
+		/* No need to restart the timer if it is already running. */
+		if (!js_devdata->gpu_metrics_timer_running) {
+			hrtimer_start(&js_devdata->gpu_metrics_timer,
+				      HR_TIMER_DELAY_NSEC(kbase_gpu_metrics_get_tp_emit_interval()),
+				      HRTIMER_MODE_REL);
+			js_devdata->gpu_metrics_timer_running = true;
+		}
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+#endif
 }

 int kbase_backend_timer_init(struct kbase_device *kbdev)
 {
-#if !MALI_USE_CSF
 	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;

-	hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
-							HRTIMER_MODE_REL);
+	hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	backend->scheduling_timer.function = timer_callback;
 	backend->timer_running = false;
-#else /* !MALI_USE_CSF */
-	CSTD_UNUSED(kbdev);
-#endif /* !MALI_USE_CSF */

 	return 0;
 }

 void kbase_backend_timer_term(struct kbase_device *kbdev)
 {
-#if !MALI_USE_CSF
 	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;

 	hrtimer_cancel(&backend->scheduling_timer);
-#else /* !MALI_USE_CSF */
-	CSTD_UNUSED(kbdev);
-#endif /* !MALI_USE_CSF */
 }

 void kbase_backend_timer_suspend(struct kbase_device *kbdev)
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -19,8 +19,9 @@
 *
 */

+#include <linux/version_compat_defs.h>
+
 #include <mali_kbase.h>
-#include <mali_kbase_bits.h>
 #include <mali_kbase_config_defaults.h>
 #include <device/mali_kbase_device.h>
 #include "mali_kbase_l2_mmu_config.h"
@@ -61,43 +62,30 @@ struct l2_mmu_config_limit {
 */
 static const struct l2_mmu_config_limit limits[] = {
 	/* GPU, read, write */
-	{GPU_ID2_PRODUCT_LBEX,
-		{0, GENMASK(10, 5), 5},
-		{0, GENMASK(16, 12), 12} },
-	{GPU_ID2_PRODUCT_TBEX,
-		{0, GENMASK(10, 5), 5},
-		{0, GENMASK(16, 12), 12} },
-	{GPU_ID2_PRODUCT_TBAX,
-		{0, GENMASK(10, 5), 5},
-		{0, GENMASK(16, 12), 12} },
-	{GPU_ID2_PRODUCT_TTRX,
-		{0, GENMASK(12, 7), 7},
-		{0, GENMASK(17, 13), 13} },
-	{GPU_ID2_PRODUCT_TNAX,
-		{0, GENMASK(12, 7), 7},
-		{0, GENMASK(17, 13), 13} },
-	{GPU_ID2_PRODUCT_TGOX,
-		{KBASE_3BIT_AID_32, GENMASK(14, 12), 12},
-		{KBASE_3BIT_AID_32, GENMASK(17, 15), 15} },
-	{GPU_ID2_PRODUCT_TNOX,
-		{KBASE_3BIT_AID_32, GENMASK(14, 12), 12},
-		{KBASE_3BIT_AID_32, GENMASK(17, 15), 15} },
+	{ GPU_ID_PRODUCT_LBEX, { 0, GENMASK(10, 5), 5 }, { 0, GENMASK(16, 12), 12 } },
+	{ GPU_ID_PRODUCT_TBEX, { 0, GENMASK(10, 5), 5 }, { 0, GENMASK(16, 12), 12 } },
+	{ GPU_ID_PRODUCT_TBAX, { 0, GENMASK(10, 5), 5 }, { 0, GENMASK(16, 12), 12 } },
+	{ GPU_ID_PRODUCT_TTRX, { 0, GENMASK(12, 7), 7 }, { 0, GENMASK(17, 13), 13 } },
+	{ GPU_ID_PRODUCT_TNAX, { 0, GENMASK(12, 7), 7 }, { 0, GENMASK(17, 13), 13 } },
+	{ GPU_ID_PRODUCT_TGOX,
+	  { KBASE_3BIT_AID_32, GENMASK(14, 12), 12 },
+	  { KBASE_3BIT_AID_32, GENMASK(17, 15), 15 } },
+	{ GPU_ID_PRODUCT_TNOX,
+	  { KBASE_3BIT_AID_32, GENMASK(14, 12), 12 },
+	  { KBASE_3BIT_AID_32, GENMASK(17, 15), 15 } },
 };

 int kbase_set_mmu_quirks(struct kbase_device *kbdev)
 {
 	/* All older GPUs had 2 bits for both fields, this is a default */
-	struct l2_mmu_config_limit limit = {
-		  0, /* Any GPU not in the limits array defined above */
-		 {KBASE_AID_32, GENMASK(25, 24), 24},
-		 {KBASE_AID_32, GENMASK(27, 26), 26}
-		};
-	u32 product_model, gpu_id;
-	u32 mmu_config;
-	int i;
+	struct l2_mmu_config_limit limit = { 0, /* Any GPU not in the limits array defined above */
+					     { KBASE_AID_32, GENMASK(25, 24), 24 },
+					     { KBASE_AID_32, GENMASK(27, 26), 26 } };
+	u32 product_model;
+	u32 mmu_config = 0;
+	unsigned int i;

-	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
-	product_model = gpu_id & GPU_ID2_PRODUCT_MODEL;
+	product_model = kbdev->gpu_props.gpu_id.product_model;

 	/* Limit the GPU bus bandwidth if the platform needs this. */
 	for (i = 0; i < ARRAY_SIZE(limits); i++) {
@@ -107,7 +95,8 @@ int kbase_set_mmu_quirks(struct kbase_device *kbdev)
 		}
 	}

-	mmu_config = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG));
+	if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(L2_MMU_CONFIG)))
+		mmu_config = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(L2_MMU_CONFIG));

 	if (kbase_is_gpu_removed(kbdev))
 		return -EIO;
@@ -115,7 +104,7 @@ int kbase_set_mmu_quirks(struct kbase_device *kbdev)
 	mmu_config &= ~(limit.read.mask | limit.write.mask);
 	/* Can't use FIELD_PREP() macro here as the mask isn't constant */
 	mmu_config |= (limit.read.value << limit.read.shift) |
-			(limit.write.value << limit.write.shift);
+		      (limit.write.value << limit.write.shift);

 	kbdev->hw_quirks_mmu = mmu_config;

--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2014-2015, 2017-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -43,54 +43,55 @@

 #define model_error_log(module, ...) pr_err(__VA_ARGS__)

-#define NUM_SLOTS 4		/*number of job slots */
+#define NUM_SLOTS 4 /*number of job slots */

 /*Errors Mask Codes*/
 /* each bit of errors_mask is associated to a specific error:
 * NON FAULT STATUS CODES: only the following are implemented since the others
 * represent normal working statuses
 */
-#define KBASE_JOB_INTERRUPTED         (1<<0)
-#define KBASE_JOB_STOPPED             (1<<1)
-#define KBASE_JOB_TERMINATED          (1<<2)
+#define KBASE_JOB_INTERRUPTED (1 << 0)
+#define KBASE_JOB_STOPPED (1 << 1)
+#define KBASE_JOB_TERMINATED (1 << 2)

 /* JOB EXCEPTIONS: */
-#define KBASE_JOB_CONFIG_FAULT        (1<<3)
-#define KBASE_JOB_POWER_FAULT         (1<<4)
-#define KBASE_JOB_READ_FAULT          (1<<5)
-#define KBASE_JOB_WRITE_FAULT         (1<<6)
-#define KBASE_JOB_AFFINITY_FAULT      (1<<7)
-#define KBASE_JOB_BUS_FAULT           (1<<8)
-#define KBASE_INSTR_INVALID_PC        (1<<9)
-#define KBASE_INSTR_INVALID_ENC       (1<<10)
-#define KBASE_INSTR_TYPE_MISMATCH     (1<<11)
-#define KBASE_INSTR_OPERAND_FAULT     (1<<12)
-#define KBASE_INSTR_TLS_FAULT         (1<<13)
-#define KBASE_INSTR_BARRIER_FAULT     (1<<14)
-#define KBASE_INSTR_ALIGN_FAULT       (1<<15)
-#define KBASE_DATA_INVALID_FAULT      (1<<16)
-#define KBASE_TILE_RANGE_FAULT        (1<<17)
-#define KBASE_ADDR_RANGE_FAULT        (1<<18)
-#define KBASE_OUT_OF_MEMORY           (1<<19)
-#define KBASE_UNKNOWN                 (1<<20)
+#define KBASE_JOB_CONFIG_FAULT (1 << 3)
+#define KBASE_JOB_POWER_FAULT (1 << 4)
+#define KBASE_JOB_READ_FAULT (1 << 5)
+#define KBASE_JOB_WRITE_FAULT (1 << 6)
+#define KBASE_JOB_AFFINITY_FAULT (1 << 7)
+#define KBASE_JOB_BUS_FAULT (1 << 8)
+#define KBASE_INSTR_INVALID_PC (1 << 9)
+#define KBASE_INSTR_INVALID_ENC (1 << 10)
+#define KBASE_INSTR_TYPE_MISMATCH (1 << 11)
+#define KBASE_INSTR_OPERAND_FAULT (1 << 12)
+#define KBASE_INSTR_TLS_FAULT (1 << 13)
+#define KBASE_INSTR_BARRIER_FAULT (1 << 14)
+#define KBASE_INSTR_ALIGN_FAULT (1 << 15)
+#define KBASE_DATA_INVALID_FAULT (1 << 16)
+#define KBASE_TILE_RANGE_FAULT (1 << 17)
+#define KBASE_ADDR_RANGE_FAULT (1 << 18)
+#define KBASE_OUT_OF_MEMORY (1 << 19)
+#define KBASE_UNKNOWN (1 << 20)

 /* GPU EXCEPTIONS:*/
-#define KBASE_DELAYED_BUS_FAULT       (1<<21)
-#define KBASE_SHAREABILITY_FAULT      (1<<22)
+#define KBASE_DELAYED_BUS_FAULT (1 << 21)
+#define KBASE_SHAREABILITY_FAULT (1 << 22)

 /* MMU EXCEPTIONS:*/
-#define KBASE_TRANSLATION_FAULT       (1<<23)
-#define KBASE_PERMISSION_FAULT        (1<<24)
-#define KBASE_TRANSTAB_BUS_FAULT      (1<<25)
-#define KBASE_ACCESS_FLAG             (1<<26)
+#define KBASE_TRANSLATION_FAULT (1 << 23)
+#define KBASE_PERMISSION_FAULT (1 << 24)
+#define KBASE_TRANSTAB_BUS_FAULT (1 << 25)
+#define KBASE_ACCESS_FLAG (1 << 26)

 /* generic useful bitmasks */
 #define IS_A_JOB_ERROR ((KBASE_UNKNOWN << 1) - KBASE_JOB_INTERRUPTED)
 #define IS_A_MMU_ERROR ((KBASE_ACCESS_FLAG << 1) - KBASE_TRANSLATION_FAULT)
-#define IS_A_GPU_ERROR (KBASE_DELAYED_BUS_FAULT|KBASE_SHAREABILITY_FAULT)
+#define IS_A_GPU_ERROR (KBASE_DELAYED_BUS_FAULT | KBASE_SHAREABILITY_FAULT)

 /* number of possible MMU address spaces */
-#define NUM_MMU_AS 16 /* total number of MMU address spaces as in
+#define NUM_MMU_AS \
+	16 /* total number of MMU address spaces as in
 		       * MMU_IRQ_RAWSTAT register
 		       */

@@ -169,8 +170,7 @@ struct gpu_model_prfcnt_en {

 void midgard_set_error(int job_slot);
 int job_atom_inject_error(struct kbase_error_params *params);
-int gpu_model_control(void *h,
-				struct kbase_model_control_params *params);
+int gpu_model_control(void *h, struct kbase_model_control_params *params);

 /**
 * gpu_model_set_dummy_prfcnt_user_sample() - Set performance counter values
@@ -194,10 +194,10 @@ int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size);
 */
 void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size);

-void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev,
-		u64 *l2_present, u64 *shader_present);
-void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev,
-		u64 l2_present, u64 shader_present);
+void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, u64 *l2_present,
+				      u64 *shader_present);
+void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, u64 l2_present,
+				      u64 shader_present);

 /* Clear the counter values array maintained by the dummy model */
 void gpu_model_clear_prfcnt_values(void);
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c
@@ -56,44 +56,37 @@ static void gpu_generate_error(void)
 		/* pick up a faulty mmu address space */
 		hw_error_status.faulty_mmu_as = prandom_u32() % NUM_MMU_AS;
 		/* pick up an mmu table level */
-		hw_error_status.mmu_table_level =
-			1 + (prandom_u32() % MAX_MMU_TABLE_LEVEL);
-		hw_error_status.errors_mask =
-			(u32)(1 << (prandom_u32() % TOTAL_FAULTS));
+		hw_error_status.mmu_table_level = 1 + (prandom_u32() % MAX_MMU_TABLE_LEVEL);
+		hw_error_status.errors_mask = (u32)(1 << (prandom_u32() % TOTAL_FAULTS));

 		/*is there also one or more errors? */
 		if ((prandom_u32() % 100) < multiple_error_probability) {
-			errors_num = 1 + (prandom_u32() %
-					  (MAX_CONCURRENT_FAULTS - 1));
+			errors_num = 1 + (prandom_u32() % (MAX_CONCURRENT_FAULTS - 1));
 			while (errors_num-- > 0) {
 				u32 temp_mask;

-				temp_mask = (u32)(
-					1 << (prandom_u32() % TOTAL_FAULTS));
+				temp_mask = (u32)(1 << (prandom_u32() % TOTAL_FAULTS));
 				/* below we check that no bit of the same error
 				 * type is set again in the error mask
 				 */
 				if ((temp_mask & IS_A_JOB_ERROR) &&
-						(hw_error_status.errors_mask &
-							IS_A_JOB_ERROR)) {
+				    (hw_error_status.errors_mask & IS_A_JOB_ERROR)) {
 					errors_num++;
 					continue;
 				}
 				if ((temp_mask & IS_A_MMU_ERROR) &&
-						(hw_error_status.errors_mask &
-							IS_A_MMU_ERROR)) {
+				    (hw_error_status.errors_mask & IS_A_MMU_ERROR)) {
 					errors_num++;
 					continue;
 				}
 				if ((temp_mask & IS_A_GPU_ERROR) &&
-						(hw_error_status.errors_mask &
-							IS_A_GPU_ERROR)) {
+				    (hw_error_status.errors_mask & IS_A_GPU_ERROR)) {
 					errors_num++;
 					continue;
 				}
 				/* this error mask is already set */
 				if ((hw_error_status.errors_mask | temp_mask) ==
-						hw_error_status.errors_mask) {
+				    hw_error_status.errors_mask) {
 					errors_num++;
 					continue;
 				}
@@ -114,8 +107,7 @@ int job_atom_inject_error(struct kbase_error_params *params)

 	if (!new_elem) {
 		model_error_log(KBASE_CORE,
-			"\njob_atom_inject_error: kzalloc failed for new_elem\n"
-									);
+				"\njob_atom_inject_error: kzalloc failed for new_elem\n");
 		return -ENOMEM;
 	}
 	new_elem->params.jc = params->jc;
@@ -124,7 +116,7 @@ int job_atom_inject_error(struct kbase_error_params *params)
 	new_elem->params.faulty_mmu_as = params->faulty_mmu_as;

 	/*circular list below */
-	if (error_track_list == NULL) {	/*no elements */
+	if (error_track_list == NULL) { /*no elements */
 		error_track_list = new_elem;
 		new_elem->next = error_track_list;
 	} else {
@@ -154,12 +146,9 @@ void midgard_set_error(int job_slot)
 				/* found a faulty atom matching with the
 				 * current one
 				 */
-				hw_error_status.errors_mask =
-						walker->params.errors_mask;
-				hw_error_status.mmu_table_level =
-						walker->params.mmu_table_level;
-				hw_error_status.faulty_mmu_as =
-						walker->params.faulty_mmu_as;
+				hw_error_status.errors_mask = walker->params.errors_mask;
+				hw_error_status.mmu_table_level = walker->params.mmu_table_level;
+				hw_error_status.faulty_mmu_as = walker->params.faulty_mmu_as;
 				hw_error_status.current_job_slot = job_slot;

 				if (walker->next == walker) {
@@ -179,5 +168,5 @@ void midgard_set_error(int job_slot)
 			walker = walker->next;
 		} while (auxiliar->next != error_track_list);
 	}
-#endif				/* CONFIG_MALI_ERROR_INJECT_RANDOM */
+#endif /* CONFIG_MALI_ERROR_INJECT_RANDOM */
 }
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2010, 2012-2015, 2017-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -24,7 +24,7 @@
 */

 #include <mali_kbase.h>
-#include <gpu/mali_kbase_gpu_regmap.h>
+#include <hw_access/mali_kbase_hw_access_regmap.h>

 #include "backend/gpu/mali_kbase_model_linux.h"
 #include "device/mali_kbase_device.h"
@@ -39,16 +39,14 @@ struct model_irq_data {

 static void serve_job_irq(struct work_struct *work)
 {
-	struct model_irq_data *data = container_of(work, struct model_irq_data,
-									work);
+	struct model_irq_data *data = container_of(work, struct model_irq_data, work);
 	struct kbase_device *kbdev = data->kbdev;

 	/* Make sure no worker is already serving this IRQ */
 	while (atomic_cmpxchg(&kbdev->serving_job_irq, 1, 0) == 1) {
 		u32 val;

-		while ((val = kbase_reg_read(kbdev,
-				JOB_CONTROL_REG(JOB_IRQ_STATUS)))) {
+		while ((val = kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_STATUS)))) {
 			unsigned long flags;

 			/* Handle the IRQ */
@@ -67,19 +65,18 @@ static void serve_job_irq(struct work_struct *work)

 static void serve_gpu_irq(struct work_struct *work)
 {
-	struct model_irq_data *data = container_of(work, struct model_irq_data,
-									work);
+	struct model_irq_data *data = container_of(work, struct model_irq_data, work);
 	struct kbase_device *kbdev = data->kbdev;

 	/* Make sure no worker is already serving this IRQ */
 	while (atomic_cmpxchg(&kbdev->serving_gpu_irq, 1, 0) == 1) {
 		u32 val;

-		while ((val = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(GPU_IRQ_STATUS)))) {
-			/* Handle the IRQ */
+		while ((val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_STATUS)))) {
+			/* Handle the GPU_IRQ */
 			kbase_gpu_interrupt(kbdev, val);
 		}
+
 	}

 	kmem_cache_free(kbdev->irq_slab, data);
@@ -87,16 +84,14 @@ static void serve_gpu_irq(struct work_struct *work)

 static void serve_mmu_irq(struct work_struct *work)
 {
-	struct model_irq_data *data = container_of(work, struct model_irq_data,
-									work);
+	struct model_irq_data *data = container_of(work, struct model_irq_data, work);
 	struct kbase_device *kbdev = data->kbdev;

 	/* Make sure no worker is already serving this IRQ */
 	if (atomic_cmpxchg(&kbdev->serving_mmu_irq, 1, 0) == 1) {
 		u32 val;

-		while ((val = kbase_reg_read(kbdev,
-					MMU_REG(MMU_IRQ_STATUS)))) {
+		while ((val = kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_STATUS)))) {
 			/* Handle the IRQ */
 			kbase_mmu_interrupt(kbdev, val);
 		}
@@ -142,30 +137,6 @@ void gpu_device_raise_irq(void *model, u32 irq)
 		queue_work(kbdev->irq_workq, &data->work);
 }

-void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&kbdev->reg_op_lock, flags);
-	midgard_model_write_reg(kbdev->model, offset, value);
-	spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
-}
-
-KBASE_EXPORT_TEST_API(kbase_reg_write);
-
-u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
-{
-	unsigned long flags;
-	u32 val;
-
-	spin_lock_irqsave(&kbdev->reg_op_lock, flags);
-	midgard_model_read_reg(kbdev->model, offset, &val);
-	spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
-
-	return val;
-}
-KBASE_EXPORT_TEST_API(kbase_reg_read);
-
 int kbase_install_interrupts(struct kbase_device *kbdev)
 {
 	KBASE_DEBUG_ASSERT(kbdev);
@@ -178,8 +149,8 @@ int kbase_install_interrupts(struct kbase_device *kbdev)
 	if (kbdev->irq_workq == NULL)
 		return -ENOMEM;

-	kbdev->irq_slab = kmem_cache_create("dummy_irq_slab",
-				sizeof(struct model_irq_data), 0, 0, NULL);
+	kbdev->irq_slab =
+		kmem_cache_create("dummy_irq_slab", sizeof(struct model_irq_data), 0, 0, NULL);
 	if (kbdev->irq_slab == NULL) {
 		destroy_workqueue(kbdev->irq_workq);
 		return -ENOMEM;
@@ -203,9 +174,8 @@ void kbase_synchronize_irqs(struct kbase_device *kbdev)

 KBASE_EXPORT_TEST_API(kbase_synchronize_irqs);

-int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
-					irq_handler_t custom_handler,
-					int irq_type)
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev, irq_handler_t custom_handler,
+				 int irq_type)
 {
 	return 0;
 }
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2010-2015, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -28,11 +28,13 @@

 static bool always_on_shaders_needed(struct kbase_device *kbdev)
 {
+	CSTD_UNUSED(kbdev);
 	return true;
 }

 static bool always_on_get_core_active(struct kbase_device *kbdev)
 {
+	CSTD_UNUSED(kbdev);
 	return true;
 }

@@ -58,15 +60,15 @@ static void always_on_term(struct kbase_device *kbdev)
 * and name.
 */
 const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
-	"always_on",			/* name */
-	always_on_init,			/* init */
-	always_on_term,			/* term */
-	always_on_shaders_needed,	/* shaders_needed */
-	always_on_get_core_active,	/* get_core_active */
-	NULL,				/* handle_event */
-	KBASE_PM_POLICY_ID_ALWAYS_ON,	/* id */
+	"always_on", /* name */
+	always_on_init, /* init */
+	always_on_term, /* term */
+	always_on_shaders_needed, /* shaders_needed */
+	always_on_get_core_active, /* get_core_active */
+	NULL, /* handle_event */
+	KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */
 #if MALI_USE_CSF
-	ALWAYS_ON_PM_SCHED_FLAGS,	/* pm_sched_flags */
+	ALWAYS_ON_PM_SCHED_FLAGS, /* pm_sched_flags */
 #endif
 };

--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2011-2015, 2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -75,4 +75,3 @@ struct kbasep_pm_policy_always_on {
 extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops;

 #endif /* MALI_KBASE_PM_ALWAYS_ON_H */
-
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -24,7 +24,7 @@
 */

 #include <mali_kbase.h>
-#include <gpu/mali_kbase_gpu_regmap.h>
+#include <hw_access/mali_kbase_hw_access_regmap.h>
 #include <mali_kbase_config_defaults.h>

 #include <mali_kbase_pm.h>
@@ -52,30 +52,22 @@ int kbase_pm_runtime_init(struct kbase_device *kbdev)

 	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
 	if (callbacks) {
-		kbdev->pm.backend.callback_power_on =
-					callbacks->power_on_callback;
-		kbdev->pm.backend.callback_power_off =
-					callbacks->power_off_callback;
-		kbdev->pm.backend.callback_power_suspend =
-					callbacks->power_suspend_callback;
-		kbdev->pm.backend.callback_power_resume =
-					callbacks->power_resume_callback;
-		kbdev->pm.callback_power_runtime_init =
-					callbacks->power_runtime_init_callback;
-		kbdev->pm.callback_power_runtime_term =
-					callbacks->power_runtime_term_callback;
-		kbdev->pm.backend.callback_power_runtime_on =
-					callbacks->power_runtime_on_callback;
+		kbdev->pm.backend.callback_power_on = callbacks->power_on_callback;
+		kbdev->pm.backend.callback_power_off = callbacks->power_off_callback;
+		kbdev->pm.backend.callback_power_suspend = callbacks->power_suspend_callback;
+		kbdev->pm.backend.callback_power_resume = callbacks->power_resume_callback;
+		kbdev->pm.callback_power_runtime_init = callbacks->power_runtime_init_callback;
+		kbdev->pm.callback_power_runtime_term = callbacks->power_runtime_term_callback;
+		kbdev->pm.backend.callback_power_runtime_on = callbacks->power_runtime_on_callback;
 		kbdev->pm.backend.callback_power_runtime_off =
-					callbacks->power_runtime_off_callback;
+			callbacks->power_runtime_off_callback;
 		kbdev->pm.backend.callback_power_runtime_idle =
-					callbacks->power_runtime_idle_callback;
-		kbdev->pm.backend.callback_soft_reset =
-					callbacks->soft_reset_callback;
+			callbacks->power_runtime_idle_callback;
+		kbdev->pm.backend.callback_soft_reset = callbacks->soft_reset_callback;
 		kbdev->pm.backend.callback_power_runtime_gpu_idle =
-					callbacks->power_runtime_gpu_idle_callback;
+			callbacks->power_runtime_gpu_idle_callback;
 		kbdev->pm.backend.callback_power_runtime_gpu_active =
-					callbacks->power_runtime_gpu_active_callback;
+			callbacks->power_runtime_gpu_active_callback;

 		if (callbacks->power_runtime_init_callback)
 			return callbacks->power_runtime_init_callback(kbdev);
@@ -83,19 +75,6 @@ int kbase_pm_runtime_init(struct kbase_device *kbdev)
 			return 0;
 	}

-	kbdev->pm.backend.callback_power_on = NULL;
-	kbdev->pm.backend.callback_power_off = NULL;
-	kbdev->pm.backend.callback_power_suspend = NULL;
-	kbdev->pm.backend.callback_power_resume = NULL;
-	kbdev->pm.callback_power_runtime_init = NULL;
-	kbdev->pm.callback_power_runtime_term = NULL;
-	kbdev->pm.backend.callback_power_runtime_on = NULL;
-	kbdev->pm.backend.callback_power_runtime_off = NULL;
-	kbdev->pm.backend.callback_power_runtime_idle = NULL;
-	kbdev->pm.backend.callback_soft_reset = NULL;
-	kbdev->pm.backend.callback_power_runtime_gpu_idle = NULL;
-	kbdev->pm.backend.callback_power_runtime_gpu_active = NULL;
-
 	return 0;
 }

@@ -142,24 +121,17 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)

 	mutex_init(&kbdev->pm.lock);

-	kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait",
-			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	kbdev->pm.backend.gpu_poweroff_wait_wq =
+		alloc_workqueue("kbase_pm_poweroff_wait", WQ_HIGHPRI | WQ_UNBOUND, 1);
 	if (!kbdev->pm.backend.gpu_poweroff_wait_wq)
 		return -ENOMEM;

-	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work,
-			kbase_pm_gpu_poweroff_wait_wq);
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, kbase_pm_gpu_poweroff_wait_wq);

 	kbdev->pm.backend.ca_cores_enabled = ~0ull;
-	kbdev->pm.backend.gpu_powered = false;
-	kbdev->pm.backend.gpu_ready = false;
-	kbdev->pm.suspending = false;
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 	kbase_pm_set_gpu_lost(kbdev, false);
 #endif
-#ifdef CONFIG_MALI_BIFROST_DEBUG
-	kbdev->pm.backend.driver_ready_for_irqs = false;
-#endif /* CONFIG_MALI_BIFROST_DEBUG */
 	init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);

 #if !MALI_USE_CSF
@@ -187,6 +159,7 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)

 	init_waitqueue_head(&kbdev->pm.backend.poweroff_wait);

+
 	if (kbase_pm_ca_init(kbdev) != 0)
 		goto workq_fail;

@@ -195,10 +168,8 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
 	if (kbase_pm_state_machine_init(kbdev) != 0)
 		goto pm_state_machine_fail;

-	kbdev->pm.backend.hwcnt_desired = false;
 	kbdev->pm.backend.hwcnt_disabled = true;
-	INIT_WORK(&kbdev->pm.backend.hwcnt_disable_work,
-		kbase_pm_hwcnt_disable_worker);
+	INIT_WORK(&kbdev->pm.backend.hwcnt_disable_work, kbase_pm_hwcnt_disable_worker);
 	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);

 #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
@@ -209,35 +180,24 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
 		kbdev->pm.backend.callback_power_runtime_gpu_idle;
 #endif

-	if (IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED)) {
-		kbdev->pm.backend.l2_always_on = false;
-		kbdev->pm.backend.gpu_clock_slow_down_wa = false;
-
+	if (IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED))
 		return 0;
-	}

 	/* WA1: L2 always_on for GPUs being affected by GPU2017-1336 */
 	if (!IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE)) {
-		kbdev->pm.backend.gpu_clock_slow_down_wa = false;
 		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336))
 			kbdev->pm.backend.l2_always_on = true;
-		else
-			kbdev->pm.backend.l2_always_on = false;

 		return 0;
 	}

 	/* WA3: Clock slow down for GPUs being affected by GPU2017-1336 */
-	kbdev->pm.backend.l2_always_on = false;
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336)) {
 		kbdev->pm.backend.gpu_clock_slow_down_wa = true;
-		kbdev->pm.backend.gpu_clock_suspend_freq = 0;
 		kbdev->pm.backend.gpu_clock_slow_down_desired = true;
-		kbdev->pm.backend.gpu_clock_slowed_down = false;
 		INIT_WORK(&kbdev->pm.backend.gpu_clock_control_work,
-			kbase_pm_gpu_clock_control_worker);
-	} else
-		kbdev->pm.backend.gpu_clock_slow_down_wa = false;
+			  kbase_pm_gpu_clock_control_worker);
+	}

 	return 0;

@@ -299,8 +259,7 @@ static void pm_handle_power_off(struct kbase_device *kbdev)
 		return;
 	}
 #endif
-	WARN_ON(backend->shaders_state !=
-			KBASE_SHADERS_OFF_CORESTACK_OFF ||
+	WARN_ON(backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF ||
 		backend->l2_state != KBASE_L2_OFF);
 #if MALI_USE_CSF
 	mcu_state = backend->mcu_state;
@@ -351,8 +310,8 @@ static void pm_handle_power_off(struct kbase_device *kbdev)

 static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
 {
-	struct kbase_device *kbdev = container_of(data, struct kbase_device,
-			pm.backend.gpu_poweroff_wait_work);
+	struct kbase_device *kbdev =
+		container_of(data, struct kbase_device, pm.backend.gpu_poweroff_wait_work);
 	struct kbase_pm_device_data *pm = &kbdev->pm;
 	struct kbase_pm_backend_data *backend = &pm->backend;
 	unsigned long flags;
@@ -413,8 +372,7 @@ static void kbase_pm_l2_clock_slow(struct kbase_device *kbdev)
 	kbdev->previous_frequency = kbdev->current_nominal_freq;

 	/* Slow down GPU clock to the suspend clock*/
-	kbase_devfreq_force_freq(kbdev,
-			kbdev->pm.backend.gpu_clock_suspend_freq);
+	kbase_devfreq_force_freq(kbdev, kbdev->pm.backend.gpu_clock_suspend_freq);

 #elif defined(CONFIG_MALI_BIFROST_DVFS) /* CONFIG_MALI_BIFROST_DEVFREQ */

@@ -428,8 +386,7 @@ static void kbase_pm_l2_clock_slow(struct kbase_device *kbdev)
 	kbdev->previous_frequency = clk_get_rate(clk);

 	/* Slow down GPU clock to the suspend clock*/
-	if (WARN_ON_ONCE(clk_set_rate(clk,
-				kbdev->pm.backend.gpu_clock_suspend_freq)))
+	if (WARN_ON_ONCE(clk_set_rate(clk, kbdev->pm.backend.gpu_clock_suspend_freq)))
 		dev_err(kbdev->dev, "Failed to set suspend freq\n");

 #endif /* CONFIG_MALI_BIFROST_DVFS */
@@ -459,8 +416,7 @@ static void kbase_pm_l2_clock_normalize(struct kbase_device *kbdev)

 	/* Restore GPU clock */
 	if (WARN_ON_ONCE(clk_set_rate(clk, kbdev->previous_frequency)))
-		dev_err(kbdev->dev, "Failed to restore freq (%lu)\n",
-			kbdev->previous_frequency);
+		dev_err(kbdev->dev, "Failed to restore freq (%lu)\n", kbdev->previous_frequency);

 	/* Restart the metrics gathering framework */
 	kbase_pm_metrics_start(kbdev);
@@ -470,8 +426,8 @@ static void kbase_pm_l2_clock_normalize(struct kbase_device *kbdev)

 static void kbase_pm_gpu_clock_control_worker(struct work_struct *data)
 {
-	struct kbase_device *kbdev = container_of(data, struct kbase_device,
-			pm.backend.gpu_clock_control_work);
+	struct kbase_device *kbdev =
+		container_of(data, struct kbase_device, pm.backend.gpu_clock_control_work);
 	struct kbase_pm_device_data *pm = &kbdev->pm;
 	struct kbase_pm_backend_data *backend = &pm->backend;
 	unsigned long flags;
@@ -479,12 +435,10 @@ static void kbase_pm_gpu_clock_control_worker(struct work_struct *data)

 	/* Determine if GPU clock control is required */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	if (!backend->gpu_clock_slowed_down &&
-			backend->gpu_clock_slow_down_desired) {
+	if (!backend->gpu_clock_slowed_down && backend->gpu_clock_slow_down_desired) {
 		slow_down = true;
 		backend->gpu_clock_slowed_down = true;
-	} else if (backend->gpu_clock_slowed_down &&
-			!backend->gpu_clock_slow_down_desired) {
+	} else if (backend->gpu_clock_slowed_down && !backend->gpu_clock_slow_down_desired) {
 		normalize = true;
 		backend->gpu_clock_slowed_down = false;
 	}
@@ -507,8 +461,8 @@ static void kbase_pm_gpu_clock_control_worker(struct work_struct *data)

 static void kbase_pm_hwcnt_disable_worker(struct work_struct *data)
 {
-	struct kbase_device *kbdev = container_of(data, struct kbase_device,
-			pm.backend.hwcnt_disable_work);
+	struct kbase_device *kbdev =
+		container_of(data, struct kbase_device, pm.backend.hwcnt_disable_work);
 	struct kbase_pm_device_data *pm = &kbdev->pm;
 	struct kbase_pm_backend_data *backend = &pm->backend;
 	unsigned long flags;
@@ -575,18 +529,19 @@ static int kbase_pm_do_poweroff_sync(struct kbase_device *kbdev)
 {
 	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
 	unsigned long flags;
-	int ret = 0;
+	int ret;

 	WARN_ON(kbdev->pm.active_count);

-	kbase_pm_wait_for_poweroff_work_complete(kbdev);
+	ret = kbase_pm_wait_for_poweroff_work_complete(kbdev);
+	if (ret)
+		return ret;

 	kbase_pm_lock(kbdev);
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	WARN_ON(backend->poweroff_wait_in_progress);
 	WARN_ON(backend->gpu_sleep_mode_active);
 	if (backend->gpu_powered) {
-
 		backend->mcu_desired = false;
 		backend->l2_desired = false;
 		kbase_pm_update_state(kbdev);
@@ -594,9 +549,8 @@ static int kbase_pm_do_poweroff_sync(struct kbase_device *kbdev)

 		ret = kbase_pm_wait_for_desired_state(kbdev);
 		if (ret) {
-			dev_warn(
-				kbdev->dev,
-				"Wait for pm state change failed on synchronous power off");
+			dev_warn(kbdev->dev,
+				 "Wait for pm state change failed on synchronous power off");
 			ret = -EBUSY;
 			goto out;
 		}
@@ -605,8 +559,7 @@ static int kbase_pm_do_poweroff_sync(struct kbase_device *kbdev)
 		 * throughout and so need to invoke the idle callback before
 		 * the power down.
 		 */
-		if (backend->callback_power_runtime_gpu_idle &&
-		    !backend->gpu_idled) {
+		if (backend->callback_power_runtime_gpu_idle && !backend->gpu_idled) {
 			backend->callback_power_runtime_gpu_idle(kbdev);
 			backend->gpu_idled = true;
 		}
@@ -664,25 +617,6 @@ unlock_hwaccess:
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }

-static bool is_poweroff_in_progress(struct kbase_device *kbdev)
-{
-	bool ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	ret = (kbdev->pm.backend.poweroff_wait_in_progress == false);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-	return ret;
-}
-
-void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev)
-{
-	wait_event_killable(kbdev->pm.backend.poweroff_wait,
-			is_poweroff_in_progress(kbdev));
-}
-KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete);
-
 /**
 * is_gpu_powered_down - Check whether GPU is powered down
 *
@@ -704,13 +638,11 @@ static bool is_gpu_powered_down(struct kbase_device *kbdev)

 void kbase_pm_wait_for_gpu_power_down(struct kbase_device *kbdev)
 {
-	wait_event_killable(kbdev->pm.backend.poweroff_wait,
-			is_gpu_powered_down(kbdev));
+	wait_event_killable(kbdev->pm.backend.poweroff_wait, is_gpu_powered_down(kbdev));
 }
 KBASE_EXPORT_TEST_API(kbase_pm_wait_for_gpu_power_down);

-int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
-		unsigned int flags)
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, unsigned int flags)
 {
 	unsigned long irq_flags;
 	int ret;
@@ -731,8 +663,7 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
 		return ret;
 	}
 #if MALI_USE_CSF
-	kbdev->pm.debug_core_mask =
-		kbdev->gpu_props.props.raw_props.shader_present;
+	kbdev->pm.debug_core_mask = kbdev->gpu_props.shader_present;
 	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
 	/* Set the initial value for 'shaders_avail'. It would be later
 	 * modified only from the MCU state machine, when the shader core
@@ -744,9 +675,8 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
 #else
 	kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
-			kbdev->pm.debug_core_mask[1] =
-			kbdev->pm.debug_core_mask[2] =
-			kbdev->gpu_props.props.raw_props.shader_present;
+		kbdev->pm.debug_core_mask[1] = kbdev->pm.debug_core_mask[2] =
+			kbdev->gpu_props.shader_present;
 #endif

 	/* Pretend the GPU is active to prevent a power policy turning the GPU
@@ -763,13 +693,10 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
 	}
 #endif

-	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
-								irq_flags);
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, irq_flags);
 	/* Ensure cycle counter is off */
 	kbdev->pm.backend.gpu_cycle_counter_requests = 0;
-	spin_unlock_irqrestore(
-			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
-								irq_flags);
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, irq_flags);

 	/* We are ready to receive IRQ's now as power policy is set up, so
 	 * enable them now.
@@ -881,23 +808,23 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask)
 }
 KBASE_EXPORT_TEST_API(kbase_pm_set_debug_core_mask);
 #else
-void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
-		u64 new_core_mask_js0, u64 new_core_mask_js1,
-		u64 new_core_mask_js2)
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_js0,
+				  u64 new_core_mask_js1, u64 new_core_mask_js2)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 	lockdep_assert_held(&kbdev->pm.lock);

 	if (kbase_dummy_job_wa_enabled(kbdev)) {
-		dev_warn_once(kbdev->dev, "Change of core mask not supported for slot 0 as dummy job WA is enabled");
+		dev_warn_once(
+			kbdev->dev,
+			"Change of core mask not supported for slot 0 as dummy job WA is enabled");
 		new_core_mask_js0 = kbdev->pm.debug_core_mask[0];
 	}

 	kbdev->pm.debug_core_mask[0] = new_core_mask_js0;
 	kbdev->pm.debug_core_mask[1] = new_core_mask_js1;
 	kbdev->pm.debug_core_mask[2] = new_core_mask_js2;
-	kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 |
-			new_core_mask_js2;
+	kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | new_core_mask_js2;

 	kbase_pm_update_dynamic_cores_onoff(kbdev);
 }
@@ -936,7 +863,15 @@ int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)

 	kbase_pm_unlock(kbdev);

-	kbase_pm_wait_for_poweroff_work_complete(kbdev);
+	ret = kbase_pm_wait_for_poweroff_work_complete(kbdev);
+	if (ret) {
+#if !MALI_USE_CSF
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		kbase_backend_timer_resume(kbdev);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+#endif /* !MALI_USE_CSF */
+		return ret;
+	}
 #endif

 	WARN_ON(kbdev->pm.backend.gpu_powered);
@@ -945,13 +880,15 @@ int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
 	if (kbdev->pm.backend.callback_power_suspend)
 		kbdev->pm.backend.callback_power_suspend(kbdev);

-	return ret;
+	return 0;
 }

 void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
 {
 	kbase_pm_lock(kbdev);

+	/* System resume callback has begun */
+	kbdev->pm.resuming = true;
 	kbdev->pm.suspending = false;
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 	if (kbase_pm_is_gpu_lost(kbdev)) {
@@ -966,7 +903,6 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
 	kbase_backend_timer_resume(kbdev);
 #endif /* !MALI_USE_CSF */

-	wake_up_all(&kbdev->pm.resume_wait);
 	kbase_pm_unlock(kbdev);
 }

@@ -982,22 +918,19 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev)

 	mutex_lock(&kbdev->pm.lock);
 	mutex_lock(&arb_vm_state->vm_state_lock);
-	if (kbdev->pm.backend.gpu_powered &&
-			!kbase_pm_is_gpu_lost(kbdev)) {
+	if (kbdev->pm.backend.gpu_powered && !kbase_pm_is_gpu_lost(kbdev)) {
 		kbase_pm_set_gpu_lost(kbdev, true);

 		/* GPU is no longer mapped to VM.  So no interrupts will
 		 * be received and Mali registers have been replaced by
 		 * dummy RAM
 		 */
-		WARN(!kbase_is_gpu_removed(kbdev),
-			"GPU is still available after GPU lost event\n");
+		WARN(!kbase_is_gpu_removed(kbdev), "GPU is still available after GPU lost event\n");

 		/* Full GPU reset will have been done by hypervisor, so
 		 * cancel
 		 */
-		atomic_set(&kbdev->hwaccess.backend.reset_gpu,
-				KBASE_RESET_GPU_NOT_PENDING);
+		atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING);
 		hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
 		kbase_synchronize_irqs(kbdev);

@@ -1012,7 +945,7 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev)
 		/* Cancel any pending HWC dumps */
 		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING ||
-				kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		    kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
 			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
 			kbdev->hwcnt.backend.triggered = 1;
 			wake_up(&kbdev->hwcnt.backend.wait);
@@ -1064,9 +997,7 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev)
 	ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev);
 	if (ret) {
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-		dev_warn(
-			kbdev->dev,
-			"Waiting for MCU to wake up failed on runtime suspend");
+		dev_warn(kbdev->dev, "Waiting for MCU to wake up failed on runtime suspend");
 		kbdev->pm.backend.gpu_wakeup_override = false;
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		return ret;
@@ -1074,9 +1005,9 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev)

 	/* Check if a Doorbell mirror interrupt occurred meanwhile */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	if (kbdev->pm.backend.gpu_sleep_mode_active &&
-	    kbdev->pm.backend.exit_gpu_sleep_mode) {
-		dev_dbg(kbdev->dev, "DB mirror interrupt occurred during runtime suspend after L2 power up");
+	if (kbdev->pm.backend.gpu_sleep_mode_active && kbdev->pm.backend.exit_gpu_sleep_mode) {
+		dev_dbg(kbdev->dev,
+			"DB mirror interrupt occurred during runtime suspend after L2 power up");
 		kbdev->pm.backend.gpu_wakeup_override = false;
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		return -EBUSY;
@@ -1099,8 +1030,7 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev)
 	/* After re-acquiring the kbdev->pm.lock, check if the device
 	 * became active (or active then idle) meanwhile.
 	 */
-	if (kbdev->pm.active_count ||
-	    kbdev->pm.backend.poweroff_wait_in_progress) {
+	if (kbdev->pm.active_count || kbdev->pm.backend.poweroff_wait_in_progress) {
 		dev_dbg(kbdev->dev,
 			"Device became active on runtime suspend after suspending Scheduler");
 		ret = -EBUSY;
@@ -1175,17 +1105,16 @@ int kbase_pm_handle_runtime_suspend(struct kbase_device *kbdev)
 	 * the fact that pm.lock is released before invoking Scheduler function
 	 * to suspend the CSGs.
 	 */
-	if (kbdev->pm.active_count ||
-	    kbdev->pm.backend.poweroff_wait_in_progress) {
+	if (kbdev->pm.active_count || kbdev->pm.backend.poweroff_wait_in_progress) {
 		dev_dbg(kbdev->dev, "Device became active on runtime suspend");
 		ret = -EBUSY;
 		goto unlock;
 	}

 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	if (kbdev->pm.backend.gpu_sleep_mode_active &&
-	    kbdev->pm.backend.exit_gpu_sleep_mode) {
-		dev_dbg(kbdev->dev, "DB mirror interrupt occurred during runtime suspend before L2 power up");
+	if (kbdev->pm.backend.gpu_sleep_mode_active && kbdev->pm.backend.exit_gpu_sleep_mode) {
+		dev_dbg(kbdev->dev,
+			"DB mirror interrupt occurred during runtime suspend before L2 power up");
 		ret = -EBUSY;
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		goto unlock;
@@ -1203,7 +1132,8 @@ int kbase_pm_handle_runtime_suspend(struct kbase_device *kbdev)

 	/* Disable interrupts and turn off the GPU clocks */
 	if (!kbase_pm_clock_off(kbdev)) {
-		dev_warn(kbdev->dev, "Failed to turn off GPU clocks on runtime suspend, MMU faults pending");
+		dev_warn(kbdev->dev,
+			 "Failed to turn off GPU clocks on runtime suspend, MMU faults pending");

 		WARN_ON(!kbdev->poweroff_pending);
 		/* Previous call to kbase_pm_clock_off() would have disabled
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -37,8 +37,7 @@ int kbase_pm_ca_init(struct kbase_device *kbdev)
 	if (kbdev->current_core_mask)
 		pm_backend->ca_cores_enabled = kbdev->current_core_mask;
 	else
-		pm_backend->ca_cores_enabled =
-				kbdev->gpu_props.props.raw_props.shader_present;
+		pm_backend->ca_cores_enabled = kbdev->gpu_props.shader_present;
 #endif

 	return 0;
@@ -46,6 +45,7 @@ int kbase_pm_ca_init(struct kbase_device *kbdev)

 void kbase_pm_ca_term(struct kbase_device *kbdev)
 {
+	CSTD_UNUSED(kbdev);
 }

 #ifdef CONFIG_MALI_BIFROST_DEVFREQ
@@ -70,13 +70,15 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
 	old_core_mask = pm_backend->ca_cores_enabled;
 #else
 	if (!(core_mask & kbdev->pm.debug_core_mask_all)) {
-		dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n",
-				core_mask, kbdev->pm.debug_core_mask_all);
+		dev_err(kbdev->dev,
+			"OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n",
+			core_mask, kbdev->pm.debug_core_mask_all);
 		goto unlock;
 	}

 	if (kbase_dummy_job_wa_enabled(kbdev)) {
-		dev_err_once(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled");
+		dev_err_once(kbdev->dev,
+			     "Dynamic core scaling not supported as dummy job WA is enabled");
 		goto unlock;
 	}
 #endif /* MALI_USE_CSF */
@@ -98,8 +100,7 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
 	}
 #endif

-	dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n",
-			pm_backend->ca_cores_enabled);
+	dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n", pm_backend->ca_cores_enabled);

 	return;
 unlock:
@@ -125,12 +126,10 @@ u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
 	 * to limit it to be a subgroup of the curr config, otherwise the
 	 * shaders state machine on the PM does not evolve.
 	 */
-	return kbdev->gpu_props.curr_config.shader_present &
-			kbdev->pm.backend.ca_cores_enabled &
-			debug_core_mask;
+	return kbdev->gpu_props.curr_config.shader_present & kbdev->pm.backend.ca_cores_enabled &
+	       debug_core_mask;
 #else
-	return kbdev->gpu_props.curr_config.shader_present &
-		debug_core_mask;
+	return kbdev->gpu_props.curr_config.shader_present & debug_core_mask;
 #endif
 }

--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -71,7 +71,7 @@ u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev);
 * Calls into the core availability policy
 */
 void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
-						u64 cores_transitioning);
+				    u64 cores_transitioning);

 /**
 * kbase_pm_ca_get_instr_core_mask - Get the PM state sync-ed shaders core mask
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca_devfreq.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca_devfreq.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -56,4 +56,3 @@ extern const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops;
 void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask);

 #endif /* MALI_KBASE_PM_CA_DEVFREQ_H */
-
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -52,15 +52,15 @@ static void coarse_demand_term(struct kbase_device *kbdev)
 * and name.
 */
 const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
-	"coarse_demand",			/* name */
-	coarse_demand_init,			/* init */
-	coarse_demand_term,			/* term */
-	coarse_demand_shaders_needed,		/* shaders_needed */
-	coarse_demand_get_core_active,		/* get_core_active */
-	NULL,					/* handle_event */
-	KBASE_PM_POLICY_ID_COARSE_DEMAND,	/* id */
+	"coarse_demand", /* name */
+	coarse_demand_init, /* init */
+	coarse_demand_term, /* term */
+	coarse_demand_shaders_needed, /* shaders_needed */
+	coarse_demand_get_core_active, /* get_core_active */
+	NULL, /* handle_event */
+	KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */
 #if MALI_USE_CSF
-	COARSE_ON_DEMAND_PM_SCHED_FLAGS,	/* pm_sched_flags */
+	COARSE_ON_DEMAND_PM_SCHED_FLAGS, /* pm_sched_flags */
 #endif
 };

--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -49,17 +49,22 @@ struct kbase_jd_atom;
 * - kbase_pm_get_present_cores()
 * - kbase_pm_get_active_cores()
 * - kbase_pm_get_trans_cores()
- * - kbase_pm_get_ready_cores().
+ * - kbase_pm_get_ready_cores()
+ * - kbase_pm_get_state()
+ * - core_type_to_reg()
+ * - pwr_cmd_constructor()
+ * - valid_to_power_up()
+ * - valid_to_power_down()
+ * - kbase_pm_invoke()
 *
- * They specify which type of core should be acted on.  These values are set in
- * a manner that allows core_type_to_reg() function to be simpler and more
- * efficient.
+ * They specify which type of core should be acted on.
 */
+
 enum kbase_pm_core_type {
-	KBASE_PM_CORE_L2 = L2_PRESENT_LO,
-	KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO,
-	KBASE_PM_CORE_TILER = TILER_PRESENT_LO,
-	KBASE_PM_CORE_STACK = STACK_PRESENT_LO
+	KBASE_PM_CORE_L2 = GPU_CONTROL_ENUM(L2_PRESENT),
+	KBASE_PM_CORE_SHADER = GPU_CONTROL_ENUM(SHADER_PRESENT),
+	KBASE_PM_CORE_TILER = GPU_CONTROL_ENUM(TILER_PRESENT),
+	KBASE_PM_CORE_STACK = GPU_CONTROL_ENUM(STACK_PRESENT)
 };

 /*
@@ -67,7 +72,7 @@ enum kbase_pm_core_type {
 *                            state machine.
 */
 enum kbase_l2_core_state {
-#define KBASEP_L2_STATE(n) KBASE_L2_ ## n,
+#define KBASEP_L2_STATE(n) KBASE_L2_##n,
 #include "mali_kbase_pm_l2_states.h"
 #undef KBASEP_L2_STATE
 };
@@ -77,7 +82,7 @@ enum kbase_l2_core_state {
 * enum kbase_mcu_state - The states used for the MCU state machine.
 */
 enum kbase_mcu_state {
-#define KBASEP_MCU_STATE(n) KBASE_MCU_ ## n,
+#define KBASEP_MCU_STATE(n) KBASE_MCU_##n,
 #include "mali_kbase_pm_mcu_states.h"
 #undef KBASEP_MCU_STATE
 };
@@ -87,7 +92,7 @@ enum kbase_mcu_state {
 * enum kbase_shader_core_state - The states used for the shaders' state machine.
 */
 enum kbase_shader_core_state {
-#define KBASEP_SHADER_STATE(n) KBASE_SHADERS_ ## n,
+#define KBASEP_SHADER_STATE(n) KBASE_SHADERS_##n,
 #include "mali_kbase_pm_shader_states.h"
 #undef KBASEP_SHADER_STATE
 };
@@ -353,6 +358,11 @@ union kbase_pm_policy_data {
 *                       mode for the saving the HW state before power down.
 * @db_mirror_interrupt_enabled: Flag tracking if the Doorbell mirror interrupt
 *                               is enabled or not.
+ * @l2_force_off_after_mcu_halt: Flag to indicate that L2 cache power down is
+ *				 must after performing the MCU halt. Flag is set
+ *				 immediately after the MCU halt and cleared
+ *				 after the L2 cache power down. MCU can't be
+ *				 re-enabled whilst the flag is set.
 * @in_reset: True if a GPU is resetting and normal power manager operation is
 *            suspended
 * @partial_shaderoff: True if we want to partial power off shader cores,
@@ -470,6 +480,8 @@ struct kbase_pm_backend_data {
 	bool gpu_wakeup_override;
 	bool db_mirror_interrupt_enabled;
 #endif
+
+	bool l2_force_off_after_mcu_halt;
 #endif
 	bool l2_desired;
 	bool l2_always_on;
@@ -498,16 +510,16 @@ struct kbase_pm_backend_data {

 #if MALI_USE_CSF
 /* CSF PM flag, signaling that the MCU shader Core should be kept on */
-#define  CSF_DYNAMIC_PM_CORE_KEEP_ON (1 << 0)
+#define CSF_DYNAMIC_PM_CORE_KEEP_ON (1 << 0)
 /* CSF PM flag, signaling no scheduler suspension on idle groups */
 #define CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE (1 << 1)
 /* CSF PM flag, signaling no scheduler suspension on no runnable groups */
 #define CSF_DYNAMIC_PM_SCHED_NO_SUSPEND (1 << 2)

 /* The following flags corresponds to existing defined PM policies */
-#define ALWAYS_ON_PM_SCHED_FLAGS (CSF_DYNAMIC_PM_CORE_KEEP_ON | \
-				  CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE | \
-				  CSF_DYNAMIC_PM_SCHED_NO_SUSPEND)
+#define ALWAYS_ON_PM_SCHED_FLAGS                                          \
+	(CSF_DYNAMIC_PM_CORE_KEEP_ON | CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE | \
+	 CSF_DYNAMIC_PM_SCHED_NO_SUSPEND)
 #define COARSE_ON_DEMAND_PM_SCHED_FLAGS (0)
 #if !MALI_CUSTOMER_RELEASE
 #define ALWAYS_ON_DEMAND_PM_SCHED_FLAGS (CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE)
@@ -547,7 +559,7 @@ enum kbase_pm_policy_event {
 	 * @KBASE_PM_POLICY_EVENT_TIMER_MISS: Indicates that the GPU did not
 	 * become active before the Shader Tick Timer timeout occurred.
 	 */
-	KBASE_PM_POLICY_EVENT_TIMER_MISS,
+	KBASE_PM_POLICY_EVENT_TIMER_MISS
 };

 /**
@@ -631,8 +643,7 @@ struct kbase_pm_policy {
 	 *         valid pointer)
 	 * @event: The id of the power event that has occurred
 	 */
-	void (*handle_event)(struct kbase_device *kbdev,
-			     enum kbase_pm_policy_event event);
+	void (*handle_event)(struct kbase_device *kbdev, enum kbase_pm_policy_event event);

 	enum kbase_pm_policy_id id;

--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -31,7 +31,6 @@
 #include "backend/gpu/mali_kbase_pm_ca.h"
 #include "mali_kbase_pm_policy.h"

-
 /**
 * kbase_pm_dev_idle - The GPU is idle.
 *
@@ -56,7 +55,7 @@ void kbase_pm_dev_activate(struct kbase_device *kbdev);
 *
 * @kbdev: The kbase device structure for the device (must be a valid
 *         pointer)
- * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ * @core_type:  The type of core (see the enum kbase_pm_core_type enumeration)
 *
 * This function can be called by the active power policy to return a bitmask of
 * the cores (of a specified type) present in the GPU device and also a count of
@@ -64,15 +63,14 @@ void kbase_pm_dev_activate(struct kbase_device *kbdev);
 *
 * Return: The bit mask of cores present
 */
-u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
-						enum kbase_pm_core_type type);
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type);

 /**
 * kbase_pm_get_active_cores - Get details of the cores that are currently
 *                             active in the device.
 *
 * @kbdev: The kbase device structure for the device (must be a valid pointer)
- * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ * @core_type:  The type of core (see the enum kbase_pm_core_type enumeration)
 *
 * This function can be called by the active power policy to return a bitmask of
 * the cores (of a specified type) that are actively processing work (i.e.
@@ -80,15 +78,14 @@ u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
 *
 * Return: The bit mask of active cores
 */
-u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
-						enum kbase_pm_core_type type);
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type);

 /**
 * kbase_pm_get_trans_cores - Get details of the cores that are currently
 *                            transitioning between power states.
 *
 * @kbdev: The kbase device structure for the device (must be a valid pointer)
- * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ * @core_type:  The type of core (see the enum kbase_pm_core_type enumeration)
 *
 * This function can be called by the active power policy to return a bitmask of
 * the cores (of a specified type) that are currently transitioning between
@@ -96,15 +93,14 @@ u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
 *
 * Return: The bit mask of transitioning cores
 */
-u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
-						enum kbase_pm_core_type type);
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type);

 /**
 * kbase_pm_get_ready_cores - Get details of the cores that are currently
 *                            powered and ready for jobs.
 *
 * @kbdev: The kbase device structure for the device (must be a valid pointer)
- * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ * @core_type:  The type of core (see the enum kbase_pm_core_type enumeration)
 *
 * This function can be called by the active power policy to return a bitmask of
 * the cores (of a specified type) that are powered and ready for jobs (they may
@@ -112,8 +108,7 @@ u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
 *
 * Return: The bit mask of ready cores
 */
-u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
-						enum kbase_pm_core_type type);
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type);

 /**
 * kbase_pm_clock_on - Turn the clock for the device on, and enable device
@@ -224,7 +219,7 @@ void kbase_pm_reset_done(struct kbase_device *kbdev);
 * power off in progress and kbase_pm_context_active() was called instead of
 * kbase_csf_scheduler_pm_active().
 *
- * Return: 0 on success, error code on error
+ * Return: 0 on success, or -ETIMEDOUT code on timeout error.
 */
 int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
 #else
@@ -247,11 +242,26 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
 * must ensure that this is not the case by, for example, calling
 * kbase_pm_wait_for_poweroff_work_complete()
 *
- * Return: 0 on success, error code on error
+ * Return: 0 on success, or -ETIMEDOUT error code on timeout error.
 */
 int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
 #endif

+/**
+ * kbase_pm_killable_wait_for_desired_state - Wait for the desired power state to be
+ *                                            reached in a killable state.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function is same as kbase_pm_wait_for_desired_state(), expect that it would
+ * allow the SIGKILL signal to interrupt the wait.
+ * This function is supposed to be called from the code that is executed in ioctl or
+ * Userspace context, wherever it is safe to do so.
+ *
+ * Return: 0 on success, or -ETIMEDOUT code on timeout error or -ERESTARTSYS if the
+ *         wait was interrupted.
+ */
+int kbase_pm_killable_wait_for_desired_state(struct kbase_device *kbdev);
+
 /**
 * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on
 *
@@ -333,6 +343,8 @@ void kbase_pm_update_state(struct kbase_device *kbdev);
 *                               shader poweroff timer
 * @kbdev: Device pointer
 *
+ * This function must be called only when a kbase device is initialized.
+ *
 * Return: 0 on success, error code on error
 */
 int kbase_pm_state_machine_init(struct kbase_device *kbdev);
@@ -360,8 +372,8 @@ void kbase_pm_update_cores_state(struct kbase_device *kbdev);
 * kbasep_pm_metrics_init - Initialize the metrics gathering framework.
 * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
- * This must be called before other metric gathering APIs are called.
- *
+ * This function must be called only when a kbase device is initialized and
+ * also must be called before other metric gathering APIs are called.
 *
 * Return: 0 on success, error code on error
 */
@@ -467,8 +479,26 @@ void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);
 * This function effectively just waits for the @gpu_poweroff_wait_work work
 * item to complete, if it was enqueued. GPU may not have been powered down
 * before this function returns.
+ *
+ * Return: 0 on success, error code on error
 */
-void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev);
+int kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_killable_wait_for_poweroff_work_complete - Wait for the poweroff workqueue to
+ *                                                     complete in killable state.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function is same as kbase_pm_wait_for_poweroff_work_complete(), expect that
+ * it would allow the SIGKILL signal to interrupt the wait.
+ * This function is supposed to be called from the code that is executed in ioctl or
+ * Userspace context, wherever it is safe to do so.
+ *
+ * Return: 0 on success, or -ETIMEDOUT code on timeout error or -ERESTARTSYS if the
+ *         wait was interrupted.
+ */
+int kbase_pm_killable_wait_for_poweroff_work_complete(struct kbase_device *kbdev);

 /**
 * kbase_pm_wait_for_gpu_power_down - Wait for the GPU power down to complete
@@ -484,8 +514,9 @@ void kbase_pm_wait_for_gpu_power_down(struct kbase_device *kbdev);
 * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * Setup the power management callbacks and initialize/enable the runtime-pm
- * for the Mali GPU platform device, using the callback function. This must be
- * called before the kbase_pm_register_access_enable() function.
+ * for the Mali GPU platform device, using the callback function.
+ * This function must be called only when a kbase device is initialized and
+ * also must be called before the kbase_pm_register_access_enable() function.
 *
 * Return: 0 on success, error code on error
 */
@@ -569,8 +600,7 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
 void kbase_pm_do_poweroff(struct kbase_device *kbdev);

 #if defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS)
-void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
-			       struct kbasep_pm_metrics *last,
+void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, struct kbasep_pm_metrics *last,
 			       struct kbasep_pm_metrics *diff);
 #endif /* defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) */

@@ -605,8 +635,8 @@ int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation);
 *
 * Return:         Returns 0 on failure and non zero on success.
 */
-int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
-			      u32 util_gl_share, u32 util_cl_share[2]);
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share,
+			      u32 util_cl_share[2]);
 #endif

 #endif /* CONFIG_MALI_BIFROST_DVFS */
@@ -621,8 +651,7 @@ void kbase_pm_power_changed(struct kbase_device *kbdev);
 *
 * Caller must hold hwaccess_lock
 */
-void kbase_pm_metrics_update(struct kbase_device *kbdev,
-				ktime_t *now);
+void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *now);

 /**
 * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU
@@ -790,8 +819,7 @@ bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev);
 *
 * Return: true if MCU is inactive
 */
-bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev,
-			      enum kbase_mcu_state state);
+bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, enum kbase_mcu_state state);

 /**
 * kbase_pm_idle_groups_sched_suspendable - Check whether the scheduler can be
@@ -802,13 +830,11 @@ bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev,
 *
 * Return: true if allowed to enter the suspended state.
 */
-static inline
-bool kbase_pm_idle_groups_sched_suspendable(struct kbase_device *kbdev)
+static inline bool kbase_pm_idle_groups_sched_suspendable(struct kbase_device *kbdev)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);

-	return !(kbdev->pm.backend.csf_pm_sched_flags &
-		 CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE);
+	return !(kbdev->pm.backend.csf_pm_sched_flags & CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE);
 }

 /**
@@ -820,13 +846,11 @@ bool kbase_pm_idle_groups_sched_suspendable(struct kbase_device *kbdev)
 *
 * Return: true if allowed to enter the suspended state.
 */
-static inline
-bool kbase_pm_no_runnables_sched_suspendable(struct kbase_device *kbdev)
+static inline bool kbase_pm_no_runnables_sched_suspendable(struct kbase_device *kbdev)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);

-	return !(kbdev->pm.backend.csf_pm_sched_flags &
-		 CSF_DYNAMIC_PM_SCHED_NO_SUSPEND);
+	return !(kbdev->pm.backend.csf_pm_sched_flags & CSF_DYNAMIC_PM_SCHED_NO_SUSPEND);
 }

 /**
@@ -842,8 +866,7 @@ static inline bool kbase_pm_no_mcu_core_pwroff(struct kbase_device *kbdev)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);

-	return kbdev->pm.backend.csf_pm_sched_flags &
-		CSF_DYNAMIC_PM_CORE_KEEP_ON;
+	return kbdev->pm.backend.csf_pm_sched_flags & CSF_DYNAMIC_PM_CORE_KEEP_ON;
 }

 /**
@@ -857,6 +880,8 @@ static inline bool kbase_pm_mcu_is_in_desired_state(struct kbase_device *kbdev)
 {
 	bool in_desired_state = true;

+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
 	if (kbase_pm_is_mcu_desired(kbdev) && kbdev->pm.backend.mcu_state != KBASE_MCU_ON)
 		in_desired_state = false;
 	else if (!kbase_pm_is_mcu_desired(kbdev) &&
@@ -961,13 +986,12 @@ static inline void kbase_pm_enable_db_mirror_interrupt(struct kbase_device *kbde
 	lockdep_assert_held(&kbdev->hwaccess_lock);

 	if (!kbdev->pm.backend.db_mirror_interrupt_enabled) {
-		u32 irq_mask = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(GPU_IRQ_MASK));
+		u32 irq_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK));

 		WARN_ON(irq_mask & DOORBELL_MIRROR);

-		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
-				irq_mask | DOORBELL_MIRROR);
+		kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK),
+				  irq_mask | DOORBELL_MIRROR);
 		kbdev->pm.backend.db_mirror_interrupt_enabled = true;
 	}
 }
@@ -985,11 +1009,10 @@ static inline void kbase_pm_disable_db_mirror_interrupt(struct kbase_device *kbd
 	lockdep_assert_held(&kbdev->hwaccess_lock);

 	if (kbdev->pm.backend.db_mirror_interrupt_enabled) {
-		u32 irq_mask = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(GPU_IRQ_MASK));
+		u32 irq_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK));

-		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
-				irq_mask & ~DOORBELL_MIRROR);
+		kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK),
+				  irq_mask & ~DOORBELL_MIRROR);
 		kbdev->pm.backend.db_mirror_interrupt_enabled = false;
 	}
 }
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -43,7 +43,7 @@
 * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
 * under 11s. Exceeding this will cause overflow
 */
-#define KBASE_PM_TIME_SHIFT			8
+#define KBASE_PM_TIME_SHIFT 8
 #endif

 #if MALI_USE_CSF
@@ -111,9 +111,6 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev)
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 	kbdev->pm.backend.metrics.kbdev = kbdev;
 	kbdev->pm.backend.metrics.time_period_start = ktime_get_raw();
-	kbdev->pm.backend.metrics.values.time_busy = 0;
-	kbdev->pm.backend.metrics.values.time_idle = 0;
-	kbdev->pm.backend.metrics.values.time_in_protm = 0;

 	perf_counter.scaling_factor = GPU_ACTIVE_SCALING_FACTOR;

@@ -126,39 +123,21 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev)
 	/* We need the GPU_ACTIVE counter */
 	perf_counter.idx = GPU_ACTIVE_CNT_IDX;

-	err = kbase_ipa_control_register(
-		kbdev, &perf_counter, NUM_PERF_COUNTERS,
-		&kbdev->pm.backend.metrics.ipa_control_client);
+	err = kbase_ipa_control_register(kbdev, &perf_counter, NUM_PERF_COUNTERS,
+					 &kbdev->pm.backend.metrics.ipa_control_client);
 	if (err) {
-		dev_err(kbdev->dev,
-			"Failed to register IPA with kbase_ipa_control: err=%d",
-			err);
+		dev_err(kbdev->dev, "Failed to register IPA with kbase_ipa_control: err=%d", err);
 		return -1;
 	}
 #else
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 	kbdev->pm.backend.metrics.kbdev = kbdev;
 	kbdev->pm.backend.metrics.time_period_start = ktime_get_raw();
-
-	kbdev->pm.backend.metrics.gpu_active = false;
-	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
-	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
-	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
-	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
-	kbdev->pm.backend.metrics.active_gl_ctx[2] = 0;
-
-	kbdev->pm.backend.metrics.values.time_busy = 0;
-	kbdev->pm.backend.metrics.values.time_idle = 0;
-	kbdev->pm.backend.metrics.values.busy_cl[0] = 0;
-	kbdev->pm.backend.metrics.values.busy_cl[1] = 0;
-	kbdev->pm.backend.metrics.values.busy_gl = 0;
-
 #endif
 	spin_lock_init(&kbdev->pm.backend.metrics.lock);

 #ifdef CONFIG_MALI_BIFROST_DVFS
-	hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
-							HRTIMER_MODE_REL);
+	hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	kbdev->pm.backend.metrics.timer.function = dvfs_callback;
 	kbdev->pm.backend.metrics.initialized = true;
 	atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF);
@@ -189,8 +168,9 @@ void kbasep_pm_metrics_term(struct kbase_device *kbdev)
 #endif /* CONFIG_MALI_BIFROST_DVFS */

 #if MALI_USE_CSF
-	kbase_ipa_control_unregister(
-		kbdev, kbdev->pm.backend.metrics.ipa_control_client);
+	kbase_ipa_control_unregister(kbdev, kbdev->pm.backend.metrics.ipa_control_client);
+#else
+	CSTD_UNUSED(kbdev);
 #endif
 }

@@ -213,9 +193,8 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev)
 	/* Query IPA_CONTROL for the latest GPU-active and protected-time
 	 * info.
 	 */
-	err = kbase_ipa_control_query(
-		kbdev, kbdev->pm.backend.metrics.ipa_control_client,
-		&gpu_active_counter, 1, &protected_time);
+	err = kbase_ipa_control_query(kbdev, kbdev->pm.backend.metrics.ipa_control_client,
+				      &gpu_active_counter, 1, &protected_time);

 	/* Read the timestamp after reading the GPU_ACTIVE counter value.
 	 * This ensures the time gap between the 2 reads is consistent for
@@ -226,15 +205,13 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev)
 	now = ktime_get_raw();

 	if (err) {
-		dev_err(kbdev->dev,
-			"Failed to query the increment of GPU_ACTIVE counter: err=%d",
+		dev_err(kbdev->dev, "Failed to query the increment of GPU_ACTIVE counter: err=%d",
 			err);
 	} else {
 		u64 diff_ns;
 		s64 diff_ns_signed;
 		u32 ns_time;
-		ktime_t diff = ktime_sub(
-			now, kbdev->pm.backend.metrics.time_period_start);
+		ktime_t diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);

 		diff_ns_signed = ktime_to_ns(diff);

@@ -294,25 +271,21 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev)
 		 */
 		gpu_active_counter = MIN(gpu_active_counter, ns_time);

-		kbdev->pm.backend.metrics.values.time_busy +=
-			gpu_active_counter;
+		kbdev->pm.backend.metrics.values.time_busy += gpu_active_counter;

-		kbdev->pm.backend.metrics.values.time_idle +=
-			ns_time - gpu_active_counter;
+		kbdev->pm.backend.metrics.values.time_idle += ns_time - gpu_active_counter;

 		/* Also make time in protected mode available explicitly,
 		 * so users of this data have this info, too.
 		 */
-		kbdev->pm.backend.metrics.values.time_in_protm +=
-			protected_time;
+		kbdev->pm.backend.metrics.values.time_in_protm += protected_time;
 	}

 	kbdev->pm.backend.metrics.time_period_start = now;
 }
 #endif /* defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) */
 #else
-static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
-					       ktime_t now)
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, ktime_t now)
 {
 	ktime_t diff;

@@ -323,7 +296,7 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
 		return;

 	if (kbdev->pm.backend.metrics.gpu_active) {
-		u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+		u32 ns_time = (u32)(ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);

 		kbdev->pm.backend.metrics.values.time_busy += ns_time;
 		if (kbdev->pm.backend.metrics.active_cl_ctx[0])
@@ -343,11 +316,10 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,

 	kbdev->pm.backend.metrics.time_period_start = now;
 }
-#endif  /* MALI_USE_CSF */
+#endif /* MALI_USE_CSF */

 #if defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS)
-void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
-			       struct kbasep_pm_metrics *last,
+void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, struct kbasep_pm_metrics *last,
 			       struct kbasep_pm_metrics *diff)
 {
 	struct kbasep_pm_metrics *cur = &kbdev->pm.backend.metrics.values;
@@ -394,11 +366,9 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)

 	diff = &kbdev->pm.backend.metrics.dvfs_diff;

-	kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last,
-				  diff);
+	kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, diff);

-	utilisation = (100 * diff->time_busy) /
-			max(diff->time_busy + diff->time_idle, 1u);
+	utilisation = (100 * diff->time_busy) / max(diff->time_busy + diff->time_idle, 1u);

 #if !MALI_USE_CSF
 	busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u);
@@ -407,8 +377,7 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
 	util_cl_share[0] = (100 * diff->busy_cl[0]) / busy;
 	util_cl_share[1] = (100 * diff->busy_cl[1]) / busy;

-	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share,
-				  util_cl_share);
+	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, util_cl_share);
 #else
 	/* Note that, at present, we don't pass protected-mode time to the
 	 * platform here. It's unlikely to be useful, however, as the platform
@@ -451,7 +420,6 @@ void kbase_pm_metrics_stop(struct kbase_device *kbdev)
 	atomic_cmpxchg(&kbdev->pm.backend.metrics.timer_state, TIMER_ON, TIMER_STOPPED);
 }

-
 #endif /* CONFIG_MALI_BIFROST_DVFS */

 #if !MALI_USE_CSF
@@ -484,12 +452,12 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
 		if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED)
 			katom = kbase_gpu_inspect(kbdev, js, 1);

-		if (katom && katom->gpu_rb_state ==
-				KBASE_ATOM_GPU_RB_SUBMITTED) {
+		if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) {
 			if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
-				int device_nr = (katom->core_req &
-					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
-						? katom->device_nr : 0;
+				int device_nr =
+					(katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) ?
+						      katom->device_nr :
+						      0;
 				if (!WARN_ON(device_nr >= 2))
 					kbdev->pm.backend.metrics.active_cl_ctx[device_nr] = 1;
 			} else {
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -24,7 +24,7 @@
 */

 #include <mali_kbase.h>
-#include <gpu/mali_kbase_gpu_regmap.h>
+#include <hw_access/mali_kbase_hw_access_regmap.h>
 #include <mali_kbase_pm.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 #include <mali_kbase_reset_gpu.h>
@@ -51,9 +51,11 @@ void kbase_pm_policy_init(struct kbase_device *kbdev)
 	struct device_node *np = kbdev->dev->of_node;
 	const char *power_policy_name;
 	unsigned long flags;
-	int i;
+	unsigned int i;

-	if (of_property_read_string(np, "power_policy", &power_policy_name) == 0) {
+	/* Read "power-policy" property and fallback to "power_policy" if not found */
+	if ((of_property_read_string(np, "power-policy", &power_policy_name) == 0) ||
+	    (of_property_read_string(np, "power_policy", &power_policy_name) == 0)) {
 		for (i = 0; i < ARRAY_SIZE(all_policy_list); i++)
 			if (sysfs_streq(all_policy_list[i]->name, power_policy_name)) {
 				default_policy = all_policy_list[i];
@@ -103,13 +105,13 @@ void kbase_pm_update_active(struct kbase_device *kbdev)

 	active = backend->pm_current_policy->get_core_active(kbdev);
 	WARN((kbase_pm_is_active(kbdev) && !active),
-		"GPU is active but policy '%s' is indicating that it can be powered off",
-		kbdev->pm.backend.pm_current_policy->name);
+	     "GPU is active but policy '%s' is indicating that it can be powered off",
+	     kbdev->pm.backend.pm_current_policy->name);

 	if (active) {
 		/* Power on the GPU and any cores requested by the policy */
 		if (!pm->backend.invoke_poweroff_wait_wq_when_l2_off &&
-				pm->backend.poweroff_wait_in_progress) {
+		    pm->backend.poweroff_wait_in_progress) {
 			KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
 			pm->backend.poweron_required = true;
 			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -207,7 +209,8 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
 #endif

 	if (kbdev->pm.backend.shaders_desired != shaders_desired) {
-		KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, kbdev->pm.backend.shaders_desired);
+		KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL,
+				 kbdev->pm.backend.shaders_desired);

 		kbdev->pm.backend.shaders_desired = shaders_desired;
 		kbase_pm_update_state(kbdev);
@@ -225,9 +228,9 @@ void kbase_pm_update_cores_state(struct kbase_device *kbdev)
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }

-int kbase_pm_list_policies(struct kbase_device *kbdev,
-	const struct kbase_pm_policy * const **list)
+int kbase_pm_list_policies(struct kbase_device *kbdev, const struct kbase_pm_policy *const **list)
 {
+	CSTD_UNUSED(kbdev);
 	if (list)
 		*list = all_policy_list;

@@ -259,32 +262,29 @@ static int policy_change_wait_for_L2_off(struct kbase_device *kbdev)
 	 * for host control of shader cores.
 	 */
 #if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE
-	remaining = wait_event_killable_timeout(
-		kbdev->pm.backend.gpu_in_desired_state_wait,
-		kbdev->pm.backend.l2_state == KBASE_L2_OFF, timeout);
+	remaining = wait_event_killable_timeout(kbdev->pm.backend.gpu_in_desired_state_wait,
+						kbdev->pm.backend.l2_state == KBASE_L2_OFF,
+						timeout);
 #else
-	remaining = wait_event_timeout(
-		kbdev->pm.backend.gpu_in_desired_state_wait,
-		kbdev->pm.backend.l2_state == KBASE_L2_OFF, timeout);
+	remaining = wait_event_timeout(kbdev->pm.backend.gpu_in_desired_state_wait,
+				       kbdev->pm.backend.l2_state == KBASE_L2_OFF, timeout);
 #endif

 	if (!remaining) {
 		err = -ETIMEDOUT;
 	} else if (remaining < 0) {
-		dev_info(kbdev->dev,
-			 "Wait for L2_off got interrupted");
+		dev_info(kbdev->dev, "Wait for L2_off got interrupted");
 		err = (int)remaining;
 	}

-	dev_dbg(kbdev->dev, "%s: err=%d mcu_state=%d, L2_state=%d\n", __func__,
-		err, kbdev->pm.backend.mcu_state, kbdev->pm.backend.l2_state);
+	dev_dbg(kbdev->dev, "%s: err=%d mcu_state=%d, L2_state=%d\n", __func__, err,
+		kbdev->pm.backend.mcu_state, kbdev->pm.backend.l2_state);

 	return err;
 }
 #endif

-void kbase_pm_set_policy(struct kbase_device *kbdev,
-				const struct kbase_pm_policy *new_policy)
+void kbase_pm_set_policy(struct kbase_device *kbdev, const struct kbase_pm_policy *new_policy)
 {
 	const struct kbase_pm_policy *old_policy;
 	unsigned long flags;
@@ -294,6 +294,8 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
 	bool reset_gpu = false;
 	bool reset_op_prevented = true;
 	struct kbase_csf_scheduler *scheduler = NULL;
+	u32 pwroff;
+	bool switching_to_always_on;
 #endif

 	KBASE_DEBUG_ASSERT(kbdev != NULL);
@@ -302,6 +304,18 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
 	KBASE_KTRACE_ADD(kbdev, PM_SET_POLICY, NULL, new_policy->id);

 #if MALI_USE_CSF
+	pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev);
+	switching_to_always_on = new_policy == &kbase_pm_always_on_policy_ops;
+	if (pwroff == 0 && !switching_to_always_on) {
+		dev_warn(
+			kbdev->dev,
+			"power_policy: cannot switch away from always_on with mcu_shader_pwroff_timeout set to 0\n");
+		dev_warn(
+			kbdev->dev,
+			"power_policy: resetting mcu_shader_pwroff_timeout to default value to switch policy from always_on\n");
+		kbase_csf_firmware_reset_mcu_core_pwroff_time(kbdev);
+	}
+
 	scheduler = &kbdev->csf.scheduler;
 	KBASE_DEBUG_ASSERT(scheduler != NULL);

@@ -372,8 +386,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
 	if (old_policy->term)
 		old_policy->term(kbdev);

-	memset(&kbdev->pm.backend.pm_policy_data, 0,
-	       sizeof(union kbase_pm_policy_data));
+	memset(&kbdev->pm.backend.pm_policy_data, 0, sizeof(union kbase_pm_policy_data));

 	KBASE_KTRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, new_policy->id);
 	if (new_policy->init)
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2010-2015, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -80,8 +80,7 @@ void kbase_pm_update_cores(struct kbase_device *kbdev);
 * Return: true if the request to the HW was successfully made else false if the
 *         request is still pending.
 */
-static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev,
-		bool shader_required)
+static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev, bool shader_required)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);

@@ -89,14 +88,14 @@ static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev,
 	 * available, and shaders are definitely not powered.
 	 */
 	if (kbdev->pm.backend.l2_state != KBASE_L2_PEND_ON &&
-			kbdev->pm.backend.l2_state != KBASE_L2_ON &&
-			kbdev->pm.backend.l2_state != KBASE_L2_ON_HWCNT_ENABLE)
+	    kbdev->pm.backend.l2_state != KBASE_L2_ON &&
+	    kbdev->pm.backend.l2_state != KBASE_L2_ON_HWCNT_ENABLE)
 		return false;

 	if (shader_required &&
-			kbdev->pm.backend.shaders_state != KBASE_SHADERS_PEND_ON_CORESTACK_ON &&
-			kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON &&
-			kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON_RECHECK)
+	    kbdev->pm.backend.shaders_state != KBASE_SHADERS_PEND_ON_CORESTACK_ON &&
+	    kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON &&
+	    kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON_RECHECK)
 		return false;

 	return true;
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c
@@ -29,28 +29,48 @@
 #include <device/mali_kbase_device.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 #include <mali_kbase_config_defaults.h>
+#include <linux/version_compat_defs.h>

-void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
-					  u64 *cycle_counter,
-					  u64 *system_time,
-					  struct timespec64 *ts)
+struct kbase_timeout_info {
+	char *selector_str;
+	u64 timeout_cycles;
+};
+
+#if MALI_USE_CSF
+static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = {
+	[CSF_FIRMWARE_TIMEOUT] = { "CSF_FIRMWARE_TIMEOUT", MIN(CSF_FIRMWARE_TIMEOUT_CYCLES,
+							       CSF_FIRMWARE_PING_TIMEOUT_CYCLES) },
+	[CSF_PM_TIMEOUT] = { "CSF_PM_TIMEOUT", CSF_PM_TIMEOUT_CYCLES },
+	[CSF_GPU_RESET_TIMEOUT] = { "CSF_GPU_RESET_TIMEOUT", CSF_GPU_RESET_TIMEOUT_CYCLES },
+	[CSF_CSG_SUSPEND_TIMEOUT] = { "CSF_CSG_SUSPEND_TIMEOUT", CSF_CSG_SUSPEND_TIMEOUT_CYCLES },
+	[CSF_FIRMWARE_BOOT_TIMEOUT] = { "CSF_FIRMWARE_BOOT_TIMEOUT",
+					CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES },
+	[CSF_FIRMWARE_PING_TIMEOUT] = { "CSF_FIRMWARE_PING_TIMEOUT",
+					CSF_FIRMWARE_PING_TIMEOUT_CYCLES },
+	[CSF_SCHED_PROTM_PROGRESS_TIMEOUT] = { "CSF_SCHED_PROTM_PROGRESS_TIMEOUT",
+					       DEFAULT_PROGRESS_TIMEOUT_CYCLES },
+	[MMU_AS_INACTIVE_WAIT_TIMEOUT] = { "MMU_AS_INACTIVE_WAIT_TIMEOUT",
+					   MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES },
+	[KCPU_FENCE_SIGNAL_TIMEOUT] = { "KCPU_FENCE_SIGNAL_TIMEOUT",
+					KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES },
+};
+#else
+static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = {
+	[MMU_AS_INACTIVE_WAIT_TIMEOUT] = { "MMU_AS_INACTIVE_WAIT_TIMEOUT",
+					   MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES },
+	[JM_DEFAULT_JS_FREE_TIMEOUT] = { "JM_DEFAULT_JS_FREE_TIMEOUT",
+					 JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES },
+};
+#endif
+
+void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, u64 *cycle_counter,
+					  u64 *system_time, struct timespec64 *ts)
 {
-	u32 hi1, hi2;
-
 	if (cycle_counter)
 		*cycle_counter = kbase_backend_get_cycle_cnt(kbdev);

 	if (system_time) {
-		/* Read hi, lo, hi to ensure a coherent u64 */
-		do {
-			hi1 = kbase_reg_read(kbdev,
-					     GPU_CONTROL_REG(TIMESTAMP_HI));
-			*system_time = kbase_reg_read(kbdev,
-					     GPU_CONTROL_REG(TIMESTAMP_LO));
-			hi2 = kbase_reg_read(kbdev,
-					     GPU_CONTROL_REG(TIMESTAMP_HI));
-		} while (hi1 != hi2);
-		*system_time |= (((u64) hi1) << 32);
+		*system_time = kbase_reg_read64_coherent(kbdev, GPU_CONTROL_ENUM(TIMESTAMP));
 	}

 	/* Record the CPU's idea of current time */
@@ -80,7 +100,7 @@ static bool timedwait_cycle_count_active(struct kbase_device *kbdev)
 	const unsigned long remaining = jiffies + msecs_to_jiffies(timeout);

 	while (time_is_after_jiffies(remaining)) {
-		if ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) &
+		if ((kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS)) &
 		     GPU_STATUS_CYCLE_COUNT_ACTIVE)) {
 			success = true;
 			break;
@@ -91,129 +111,150 @@ static bool timedwait_cycle_count_active(struct kbase_device *kbdev)
 }
 #endif

-void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
-				u64 *system_time, struct timespec64 *ts)
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, u64 *system_time,
+				struct timespec64 *ts)
 {
 #if !MALI_USE_CSF
 	kbase_pm_request_gpu_cycle_counter(kbdev);
-	WARN_ONCE(kbdev->pm.backend.l2_state != KBASE_L2_ON,
-		  "L2 not powered up");
-	WARN_ONCE((!timedwait_cycle_count_active(kbdev)),
-		  "Timed out on CYCLE_COUNT_ACTIVE");
+	WARN_ONCE(kbdev->pm.backend.l2_state != KBASE_L2_ON, "L2 not powered up");
+	WARN_ONCE((!timedwait_cycle_count_active(kbdev)), "Timed out on CYCLE_COUNT_ACTIVE");
 #endif
-	kbase_backend_get_gpu_time_norequest(kbdev, cycle_counter, system_time,
-					     ts);
+	kbase_backend_get_gpu_time_norequest(kbdev, cycle_counter, system_time, ts);
 #if !MALI_USE_CSF
 	kbase_pm_release_gpu_cycle_counter(kbdev);
 #endif
 }

-unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
-				  enum kbase_timeout_selector selector)
+static u64 kbase_device_get_scaling_frequency(struct kbase_device *kbdev)
 {
+	u64 freq_khz = kbdev->lowest_gpu_freq_khz;
+
+	if (!freq_khz) {
+		dev_dbg(kbdev->dev,
+			"Lowest frequency uninitialized! Using reference frequency for scaling");
+		return DEFAULT_REF_TIMEOUT_FREQ_KHZ;
+	}
+
+	return freq_khz;
+}
+
+void kbase_device_set_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_selector selector,
+				 unsigned int timeout_ms)
+{
+	char *selector_str;
+
+	if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) {
+		selector = KBASE_DEFAULT_TIMEOUT;
+		selector_str = timeout_info[selector].selector_str;
+		dev_warn(kbdev->dev,
+			 "Unknown timeout selector passed, falling back to default: %s\n",
+			 timeout_info[selector].selector_str);
+	}
+	selector_str = timeout_info[selector].selector_str;
+
+	kbdev->backend_time.device_scaled_timeouts[selector] = timeout_ms;
+	dev_dbg(kbdev->dev, "\t%-35s: %ums\n", selector_str, timeout_ms);
+}
+
+void kbase_device_set_timeout(struct kbase_device *kbdev, enum kbase_timeout_selector selector,
+			      u64 timeout_cycles, u32 cycle_multiplier)
+{
+	u64 final_cycles;
+	u64 timeout;
+	u64 freq_khz = kbase_device_get_scaling_frequency(kbdev);
+
+	if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) {
+		selector = KBASE_DEFAULT_TIMEOUT;
+		dev_warn(kbdev->dev,
+			 "Unknown timeout selector passed, falling back to default: %s\n",
+			 timeout_info[selector].selector_str);
+	}
+
+	/* If the multiplication overflows, we will have unsigned wrap-around, and so might
+	 * end up with a shorter timeout. In those cases, we then want to have the largest
+	 * timeout possible that will not run into these issues. Note that this will not
+	 * wait for U64_MAX/frequency ms, as it will be clamped to a max of UINT_MAX
+	 * milliseconds by subsequent steps.
+	 */
+	if (check_mul_overflow(timeout_cycles, (u64)cycle_multiplier, &final_cycles))
+		final_cycles = U64_MAX;
+
 	/* Timeout calculation:
 	 * dividing number of cycles by freq in KHz automatically gives value
 	 * in milliseconds. nr_cycles will have to be multiplied by 1e3 to
 	 * get result in microseconds, and 1e6 to get result in nanoseconds.
 	 */
+	timeout = div_u64(final_cycles, freq_khz);

-	u64 timeout, nr_cycles = 0;
-	u64 freq_khz;
-
-	/* Only for debug messages, safe default in case it's mis-maintained */
-	const char *selector_str = "(unknown)";
-
-	if (!kbdev->lowest_gpu_freq_khz) {
+	if (unlikely(timeout > UINT_MAX)) {
 		dev_dbg(kbdev->dev,
-			"Lowest frequency uninitialized! Using reference frequency for scaling");
-		freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ;
-	} else {
-		freq_khz = kbdev->lowest_gpu_freq_khz;
-	}
-
-	switch (selector) {
-	case MMU_AS_INACTIVE_WAIT_TIMEOUT:
-		selector_str = "MMU_AS_INACTIVE_WAIT_TIMEOUT";
-		nr_cycles = MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES;
-		break;
-	case KBASE_TIMEOUT_SELECTOR_COUNT:
-	default:
-#if !MALI_USE_CSF
-		WARN(1, "Invalid timeout selector used! Using default value");
-		nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES;
-		break;
-	case JM_DEFAULT_JS_FREE_TIMEOUT:
-		selector_str = "JM_DEFAULT_JS_FREE_TIMEOUT";
-		nr_cycles = JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES;
-		break;
-#else
-		/* Use Firmware timeout if invalid selection */
-		WARN(1,
-		     "Invalid timeout selector used! Using CSF Firmware timeout");
-		fallthrough;
-	case CSF_FIRMWARE_TIMEOUT:
-		selector_str = "CSF_FIRMWARE_TIMEOUT";
-		/* Any FW timeout cannot be longer than the FW ping interval, after which
-		 * the firmware_aliveness_monitor will be triggered and may restart
-		 * the GPU if the FW is unresponsive.
-		 */
-		nr_cycles = min(CSF_FIRMWARE_PING_TIMEOUT_CYCLES, CSF_FIRMWARE_TIMEOUT_CYCLES);
-
-		if (nr_cycles == CSF_FIRMWARE_PING_TIMEOUT_CYCLES)
-			dev_warn(kbdev->dev, "Capping %s to CSF_FIRMWARE_PING_TIMEOUT\n",
-				 selector_str);
-		break;
-	case CSF_PM_TIMEOUT:
-		selector_str = "CSF_PM_TIMEOUT";
-		nr_cycles = CSF_PM_TIMEOUT_CYCLES;
-		break;
-	case CSF_GPU_RESET_TIMEOUT:
-		selector_str = "CSF_GPU_RESET_TIMEOUT";
-		nr_cycles = CSF_GPU_RESET_TIMEOUT_CYCLES;
-		break;
-	case CSF_CSG_SUSPEND_TIMEOUT:
-		selector_str = "CSF_CSG_SUSPEND_TIMEOUT";
-		nr_cycles = CSF_CSG_SUSPEND_TIMEOUT_CYCLES;
-		break;
-	case CSF_FIRMWARE_BOOT_TIMEOUT:
-		selector_str = "CSF_FIRMWARE_BOOT_TIMEOUT";
-		nr_cycles = CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES;
-		break;
-	case CSF_FIRMWARE_PING_TIMEOUT:
-		selector_str = "CSF_FIRMWARE_PING_TIMEOUT";
-		nr_cycles = CSF_FIRMWARE_PING_TIMEOUT_CYCLES;
-		break;
-	case CSF_SCHED_PROTM_PROGRESS_TIMEOUT:
-		selector_str = "CSF_SCHED_PROTM_PROGRESS_TIMEOUT";
-		nr_cycles = kbase_csf_timeout_get(kbdev);
-		break;
-#endif
-	}
-
-	timeout = div_u64(nr_cycles, freq_khz);
-	if (WARN(timeout > UINT_MAX,
-		 "Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms",
-		 (unsigned long long)timeout, selector_str, (unsigned long long)freq_khz))
+			"Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms",
+			timeout, timeout_info[selector].selector_str,
+			kbase_device_get_scaling_frequency(kbdev));
 		timeout = UINT_MAX;
-	return (unsigned int)timeout;
+	}
+
+	kbase_device_set_timeout_ms(kbdev, selector, (unsigned int)timeout);
+}
+
+/**
+ * kbase_timeout_scaling_init - Initialize the table of scaled timeout
+ *                              values associated with a @kbase_device.
+ *
+ * @kbdev:	KBase device pointer.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+static int kbase_timeout_scaling_init(struct kbase_device *kbdev)
+{
+	int err;
+	enum kbase_timeout_selector selector;
+
+	/* First, we initialize the minimum and maximum device frequencies, which
+	 * are used to compute the timeouts.
+	 */
+	err = kbase_pm_gpu_freq_init(kbdev);
+	if (unlikely(err < 0)) {
+		dev_dbg(kbdev->dev, "Could not initialize GPU frequency\n");
+		return err;
+	}
+
+	dev_dbg(kbdev->dev, "Scaling kbase timeouts:\n");
+	for (selector = 0; selector < KBASE_TIMEOUT_SELECTOR_COUNT; selector++) {
+		u32 cycle_multiplier = 1;
+		u64 nr_cycles = timeout_info[selector].timeout_cycles;
+#if MALI_USE_CSF
+		/* Special case: the scheduler progress timeout can be set manually,
+		 * and does not have a canonical length defined in the headers. Hence,
+		 * we query it once upon startup to get a baseline, and change it upon
+		 * every invocation of the appropriate functions
+		 */
+		if (selector == CSF_SCHED_PROTM_PROGRESS_TIMEOUT)
+			nr_cycles = kbase_csf_timeout_get(kbdev);
+#endif
+
+		/* Since we are in control of the iteration bounds for the selector,
+		 * we don't have to worry about bounds checking when setting the timeout.
+		 */
+		kbase_device_set_timeout(kbdev, selector, nr_cycles, cycle_multiplier);
+	}
+	return 0;
+}
+
+unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_selector selector)
+{
+	if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) {
+		dev_warn(kbdev->dev, "Querying wrong selector, falling back to default\n");
+		selector = KBASE_DEFAULT_TIMEOUT;
+	}
+
+	return kbdev->backend_time.device_scaled_timeouts[selector];
 }
 KBASE_EXPORT_TEST_API(kbase_get_timeout_ms);

 u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev)
 {
-	u32 hi1, hi2, lo;
-
-	/* Read hi, lo, hi to ensure a coherent u64 */
-	do {
-		hi1 = kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(CYCLE_COUNT_HI));
-		lo = kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(CYCLE_COUNT_LO));
-		hi2 = kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(CYCLE_COUNT_HI));
-	} while (hi1 != hi2);
-
-	return lo | (((u64) hi1) << 32);
+	return kbase_reg_read64_coherent(kbdev, GPU_CONTROL_ENUM(CYCLE_COUNT));
 }

 #if MALI_USE_CSF
@@ -247,18 +288,21 @@ static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_t

 int kbase_backend_time_init(struct kbase_device *kbdev)
 {
+	int err = 0;
 #if MALI_USE_CSF
 	u64 cpu_ts = 0;
 	u64 gpu_ts = 0;
 	u64 freq;
 	u64 common_factor;

+	kbase_pm_register_access_enable(kbdev);
 	get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL);
 	freq = arch_timer_get_cntfrq();

 	if (!freq) {
 		dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!");
-		return -EINVAL;
+		err = -EINVAL;
+		goto disable_registers;
 	}

 	common_factor = gcd(NSEC_PER_SEC, freq);
@@ -268,12 +312,23 @@ int kbase_backend_time_init(struct kbase_device *kbdev)

 	if (!kbdev->backend_time.divisor) {
 		dev_warn(kbdev->dev, "CPU to GPU divisor is zero!");
-		return -EINVAL;
+		err = -EINVAL;
+		goto disable_registers;
 	}

 	kbdev->backend_time.offset = cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier,
 							kbdev->backend_time.divisor);
 #endif

-	return 0;
+	if (kbase_timeout_scaling_init(kbdev)) {
+		dev_warn(kbdev->dev, "Could not initialize timeout scaling");
+		err = -EINVAL;
+	}
+
+#if MALI_USE_CSF
+disable_registers:
+	kbase_pm_register_access_disable(kbdev);
+#endif
+
+	return err;
 }
--- a/drivers/gpu/arm/bifrost/build.bp
+++ b/drivers/gpu/arm/bifrost/build.bp
@@ -35,9 +35,6 @@ bob_defaults {
            "CONFIG_GPU_HWVER={{.hwver}}",
        ],
    },
-    mali_platform_dt_pin_rst: {
-        kbuild_options: ["CONFIG_MALI_PLATFORM_DT_PIN_RST=y"],
-    },
    gpu_has_csf: {
        kbuild_options: ["CONFIG_MALI_CSF_SUPPORT=y"],
    },
@@ -62,11 +59,11 @@ bob_defaults {
    mali_dma_buf_legacy_compat: {
        kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"],
    },
-    large_page_alloc_override: {
-        kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC_OVERRIDE=y"],
+    page_migration_support: {
+        kbuild_options: ["CONFIG_PAGE_MIGRATION_SUPPORT=y"],
    },
-    large_page_alloc: {
-        kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC=y"],
+    large_page_support: {
+        kbuild_options: ["CONFIG_LARGE_PAGE_SUPPORT=y"],
    },
    mali_memory_fully_backed: {
        kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"],
@@ -101,9 +98,6 @@ bob_defaults {
    mali_system_trace: {
        kbuild_options: ["CONFIG_MALI_BIFROST_SYSTEM_TRACE=y"],
    },
-    buslog: {
-        kbuild_options: ["CONFIG_MALI_BUSLOG=y"],
-    },
    cinstr_vector_dump: {
        kbuild_options: ["CONFIG_MALI_VECTOR_DUMP=y"],
    },
@@ -140,6 +134,15 @@ bob_defaults {
    mali_coresight: {
        kbuild_options: ["CONFIG_MALI_CORESIGHT=y"],
    },
+    mali_fw_trace_mode_manual: {
+        kbuild_options: ["CONFIG_MALI_FW_TRACE_MODE_MANUAL=y"],
+    },
+    mali_fw_trace_mode_auto_print: {
+        kbuild_options: ["CONFIG_MALI_FW_TRACE_MODE_AUTO_PRINT=y"],
+    },
+    mali_fw_trace_mode_auto_discard: {
+        kbuild_options: ["CONFIG_MALI_FW_TRACE_MODE_AUTO_DISCARD=y"],
+    },
    kbuild_options: [
        "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}",
        "MALI_CUSTOMER_RELEASE={{.release}}",
@@ -201,6 +204,7 @@ bob_kernel_module {
        "platform/*/*/*/*.h",
        "platform/*/*/*/Kbuild",
        "thirdparty/*.c",
+        "thirdparty/*.h",
        "thirdparty/Kbuild",
        "debug/*.c",
        "debug/*.h",
@@ -211,6 +215,11 @@ bob_kernel_module {
        "gpu/*.c",
        "gpu/*.h",
        "gpu/Kbuild",
+        "hw_access/*.c",
+        "hw_access/*.h",
+        "hw_access/*/*.c",
+        "hw_access/*/*.h",
+        "hw_access/Kbuild",
        "tl/*.c",
        "tl/*.h",
        "tl/Kbuild",
@@ -272,9 +281,4 @@ bob_kernel_module {
        "CONFIG_MALI_BIFROST=m",
        "CONFIG_MALI_KUTF=n",
    ],
-    buslog: {
-        extra_symbols: [
-            "bus_logger",
-        ],
-    },
 }
--- a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c
+++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -24,12 +24,13 @@
 */

 #include <context/mali_kbase_context_internal.h>
-#include <gpu/mali_kbase_gpu_regmap.h>
+#include <hw_access/mali_kbase_hw_access_regmap.h>
 #include <mali_kbase.h>
 #include <mali_kbase_mem_linux.h>
 #include <mali_kbase_mem_pool_group.h>
 #include <mmu/mali_kbase_mmu.h>
 #include <tl/mali_kbase_timeline.h>
+#include <mali_kbase_ctx_sched.h>

 #if IS_ENABLED(CONFIG_DEBUG_FS)
 #include <csf/mali_kbase_csf_csg_debugfs.h>
@@ -92,24 +93,20 @@ static const struct kbase_context_init context_init[] = {
 	  "Memory pool group initialization failed" },
 	{ kbase_mem_evictable_init, kbase_mem_evictable_deinit,
 	  "Memory evictable initialization failed" },
-	{ kbase_context_mmu_init, kbase_context_mmu_term,
-	  "MMU initialization failed" },
-	{ kbase_context_mem_alloc_page, kbase_context_mem_pool_free,
-	  "Memory alloc page failed" },
+	{ kbase_ctx_sched_init_ctx, NULL, NULL },
+	{ kbase_context_mmu_init, kbase_context_mmu_term, "MMU initialization failed" },
+	{ kbase_context_mem_alloc_page, kbase_context_mem_pool_free, "Memory alloc page failed" },
 	{ kbase_region_tracker_init, kbase_region_tracker_term,
 	  "Region tracker initialization failed" },
 	{ kbase_sticky_resource_init, kbase_context_sticky_resource_term,
 	  "Sticky resource initialization failed" },
 	{ kbase_jit_init, kbase_jit_term, "JIT initialization failed" },
-	{ kbase_csf_ctx_init, kbase_csf_ctx_term,
-	  "CSF context initialization failed" },
+	{ kbase_csf_ctx_init, kbase_csf_ctx_term, "CSF context initialization failed" },
 	{ kbase_context_add_to_dev_list, kbase_context_remove_from_dev_list,
 	  "Adding kctx to device failed" },
 };

-static void kbase_context_term_partial(
-	struct kbase_context *kctx,
-	unsigned int i)
+static void kbase_context_term_partial(struct kbase_context *kctx, unsigned int i)
 {
 	while (i-- > 0) {
 		if (context_init[i].term)
@@ -117,11 +114,10 @@ static void kbase_context_term_partial(
 	}
 }

-struct kbase_context *kbase_create_context(struct kbase_device *kbdev,
-	bool is_compat,
-	base_context_create_flags const flags,
-	unsigned long const api_version,
-	struct file *const filp)
+struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_compat,
+					   base_context_create_flags const flags,
+					   unsigned long const api_version,
+					   struct kbase_file *const kfile)
 {
 	struct kbase_context *kctx;
 	unsigned int i = 0;
@@ -140,9 +136,11 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev,

 	kctx->kbdev = kbdev;
 	kctx->api_version = api_version;
-	kctx->filp = filp;
+	kctx->kfile = kfile;
 	kctx->create_flags = flags;

+	memcpy(kctx->comm, current->comm, sizeof(current->comm));
+
 	if (is_compat)
 		kbase_ctx_flag_set(kctx, KCTX_COMPAT);
 #if defined(CONFIG_64BIT)
@@ -157,8 +155,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev,
 			err = context_init[i].init(kctx);

 		if (err) {
-			dev_err(kbdev->dev, "%s error = %d\n",
-						context_init[i].err_mes, err);
+			dev_err(kbdev->dev, "%s error = %d\n", context_init[i].err_mes, err);

 			/* kctx should be freed by kbase_context_free().
 			 * Otherwise it will result in memory leak.
@@ -190,14 +187,22 @@ void kbase_destroy_context(struct kbase_context *kctx)
 	 * Customer side that a hang could occur if context termination is
 	 * not blocked until the resume of GPU device.
 	 */
-	while (kbase_pm_context_active_handle_suspend(
-		kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
-		dev_info(kbdev->dev,
-			 "Suspend in progress when destroying context");
-		wait_event(kbdev->pm.resume_wait,
-			   !kbase_pm_is_suspending(kbdev));
+	while (kbase_pm_context_active_handle_suspend(kbdev,
+						      KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+		dev_info(kbdev->dev, "Suspend in progress when destroying context");
+		wait_event(kbdev->pm.resume_wait, !kbase_pm_is_suspending(kbdev));
 	}

+	/* Have synchronized against the System suspend and incremented the
+	 * pm.active_count. So any subsequent invocation of System suspend
+	 * callback would get blocked.
+	 * If System suspend callback was already in progress then the above loop
+	 * would have waited till the System resume callback has begun.
+	 * So wait for the System resume callback to also complete as we want to
+	 * avoid context termination during System resume also.
+	 */
+	wait_event(kbdev->pm.resume_wait, !kbase_pm_is_resuming(kbdev));
+
 	kbase_mem_pool_group_mark_dying(&kctx->mem_pools);

 	kbase_context_term_partial(kctx, ARRAY_SIZE(context_init));
--- a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c
+++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -24,7 +24,7 @@
 */

 #include <context/mali_kbase_context_internal.h>
-#include <gpu/mali_kbase_gpu_regmap.h>
+#include <hw_access/mali_kbase_hw_access_regmap.h>
 #include <mali_kbase.h>
 #include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_kinstr_jm.h>
@@ -81,8 +81,7 @@ static void kbase_context_kbase_kinstr_jm_term(struct kbase_context *kctx)

 static int kbase_context_kbase_timer_setup(struct kbase_context *kctx)
 {
-	kbase_timer_setup(&kctx->soft_job_timeout,
-			  kbasep_soft_job_timeout_worker);
+	kbase_timer_setup(&kctx->soft_job_timeout, kbasep_soft_job_timeout_worker);

 	return 0;
 }
@@ -133,41 +132,33 @@ static const struct kbase_context_init context_init[] = {
 	  "Memory pool group initialization failed" },
 	{ kbase_mem_evictable_init, kbase_mem_evictable_deinit,
 	  "Memory evictable initialization failed" },
-	{ kbase_context_mmu_init, kbase_context_mmu_term,
-	  "MMU initialization failed" },
-	{ kbase_context_mem_alloc_page, kbase_context_mem_pool_free,
-	  "Memory alloc page failed" },
+	{ kbase_ctx_sched_init_ctx, NULL, NULL },
+	{ kbase_context_mmu_init, kbase_context_mmu_term, "MMU initialization failed" },
+	{ kbase_context_mem_alloc_page, kbase_context_mem_pool_free, "Memory alloc page failed" },
 	{ kbase_region_tracker_init, kbase_region_tracker_term,
 	  "Region tracker initialization failed" },
 	{ kbase_sticky_resource_init, kbase_context_sticky_resource_term,
 	  "Sticky resource initialization failed" },
 	{ kbase_jit_init, kbase_jit_term, "JIT initialization failed" },
-	{ kbase_context_kbase_kinstr_jm_init,
-	  kbase_context_kbase_kinstr_jm_term,
+	{ kbase_context_kbase_kinstr_jm_init, kbase_context_kbase_kinstr_jm_term,
 	  "JM instrumentation initialization failed" },
-	{ kbase_context_kbase_timer_setup, NULL,
-	  "Timers initialization failed" },
-	{ kbase_event_init, kbase_event_cleanup,
-	  "Event initialization failed" },
-	{ kbasep_js_kctx_init, kbasep_js_kctx_term,
-	  "JS kctx initialization failed" },
+	{ kbase_context_kbase_timer_setup, NULL, "Timers initialization failed" },
+	{ kbase_event_init, kbase_event_cleanup, "Event initialization failed" },
+	{ kbasep_js_kctx_init, kbasep_js_kctx_term, "JS kctx initialization failed" },
 	{ kbase_jd_init, kbase_jd_exit, "JD initialization failed" },
 	{ kbase_context_submit_check, NULL, "Enabling job submission failed" },
 #if IS_ENABLED(CONFIG_DEBUG_FS)
-	{ kbase_debug_job_fault_context_init,
-	  kbase_debug_job_fault_context_term,
+	{ kbase_debug_job_fault_context_init, kbase_debug_job_fault_context_term,
 	  "Job fault context initialization failed" },
 #endif
+	{ kbasep_platform_context_init, kbasep_platform_context_term,
+	  "Platform callback for kctx initialization failed" },
 	{ NULL, kbase_context_flush_jobs, NULL },
 	{ kbase_context_add_to_dev_list, kbase_context_remove_from_dev_list,
 	  "Adding kctx to device failed" },
-	{ kbasep_platform_context_init, kbasep_platform_context_term,
-	  "Platform callback for kctx initialization failed" },
 };

-static void kbase_context_term_partial(
-	struct kbase_context *kctx,
-	unsigned int i)
+static void kbase_context_term_partial(struct kbase_context *kctx, unsigned int i)
 {
 	while (i-- > 0) {
 		if (context_init[i].term)
@@ -175,11 +166,10 @@ static void kbase_context_term_partial(
 	}
 }

-struct kbase_context *kbase_create_context(struct kbase_device *kbdev,
-	bool is_compat,
-	base_context_create_flags const flags,
-	unsigned long const api_version,
-	struct file *const filp)
+struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_compat,
+					   base_context_create_flags const flags,
+					   unsigned long const api_version,
+					   struct kbase_file *const kfile)
 {
 	struct kbase_context *kctx;
 	unsigned int i = 0;
@@ -198,7 +188,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev,

 	kctx->kbdev = kbdev;
 	kctx->api_version = api_version;
-	kctx->filp = filp;
+	kctx->kfile = kfile;
 	kctx->create_flags = flags;

 	if (is_compat)
@@ -215,8 +205,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev,
 			err = context_init[i].init(kctx);

 		if (err) {
-			dev_err(kbdev->dev, "%s error = %d\n",
-						context_init[i].err_mes, err);
+			dev_err(kbdev->dev, "%s error = %d\n", context_init[i].err_mes, err);

 			/* kctx should be freed by kbase_context_free().
 			 * Otherwise it will result in memory leak.
@@ -243,7 +232,7 @@ void kbase_destroy_context(struct kbase_context *kctx)
 	if (WARN_ON(!kbdev))
 		return;

-	/* Context termination could happen whilst the system suspend of
+		/* Context termination could happen whilst the system suspend of
 	 * the GPU device is ongoing or has completed. It has been seen on
 	 * Customer side that a hang could occur if context termination is
 	 * not blocked until the resume of GPU device.
@@ -251,13 +240,22 @@ void kbase_destroy_context(struct kbase_context *kctx)
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 	atomic_inc(&kbdev->pm.gpu_users_waiting);
 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
-	while (kbase_pm_context_active_handle_suspend(
-		kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
-		dev_dbg(kbdev->dev,
-			 "Suspend in progress when destroying context");
-		wait_event(kbdev->pm.resume_wait,
-			   !kbase_pm_is_suspending(kbdev));
+	while (kbase_pm_context_active_handle_suspend(kbdev,
+						      KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+		dev_dbg(kbdev->dev, "Suspend in progress when destroying context");
+		wait_event(kbdev->pm.resume_wait, !kbase_pm_is_suspending(kbdev));
 	}
+
+	/* Have synchronized against the System suspend and incremented the
+	 * pm.active_count. So any subsequent invocation of System suspend
+	 * callback would get blocked.
+	 * If System suspend callback was already in progress then the above loop
+	 * would have waited till the System resume callback has begun.
+	 * So wait for the System resume callback to also complete as we want to
+	 * avoid context termination during System resume also.
+	 */
+	wait_event(kbdev->pm.resume_wait, !kbase_pm_is_resuming(kbdev));
+
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 	atomic_dec(&kbdev->pm.gpu_users_waiting);
 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
--- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c
+++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c
@@ -25,12 +25,16 @@
 #include <linux/version.h>
 #if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
 #include <linux/sched/task.h>
+#endif
+
+#if KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/signal.h>
 #else
 #include <linux/sched.h>
 #endif

 #include <mali_kbase.h>
-#include <gpu/mali_kbase_gpu_regmap.h>
+#include <hw_access/mali_kbase_hw_access_regmap.h>
 #include <mali_kbase_mem_linux.h>
 #include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_mem_pool_group.h>
@@ -53,8 +57,7 @@ static struct kbase_process *find_process_node(struct rb_node *node, pid_t tgid)

 	/* Check if the kctx creation request is from a existing process.*/
 	while (node) {
-		struct kbase_process *prcs_node =
-			rb_entry(node, struct kbase_process, kprcs_node);
+		struct kbase_process *prcs_node = rb_entry(node, struct kbase_process, kprcs_node);
 		if (prcs_node->tgid == tgid) {
 			kprcs = prcs_node;
 			break;
@@ -110,8 +113,7 @@ static int kbase_insert_kctx_to_process(struct kbase_context *kctx)
 			struct kbase_process *prcs_node;

 			parent = *new;
-			prcs_node = rb_entry(parent, struct kbase_process,
-					     kprcs_node);
+			prcs_node = rb_entry(parent, struct kbase_process, kprcs_node);
 			if (tgid < prcs_node->tgid)
 				new = &(*new)->rb_left;
 			else
@@ -135,19 +137,15 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	/* creating a context is considered a disjoint event */
 	kbase_disjoint_event(kctx->kbdev);

-	kctx->process_mm = NULL;
-	kctx->task = NULL;
-	atomic_set(&kctx->nonmapped_pages, 0);
-	atomic_set(&kctx->permanent_mapped_pages, 0);
 	kctx->tgid = task_tgid_vnr(current);
 	kctx->pid = task_pid_vnr(current);

 	/* Check if this is a Userspace created context */
-	if (likely(kctx->filp)) {
+	if (likely(kctx->kfile)) {
 		struct pid *pid_struct;

 		rcu_read_lock();
-		pid_struct = find_get_pid(kctx->tgid);
+		pid_struct = get_pid(task_tgid(current));
 		if (likely(pid_struct)) {
 			struct task_struct *task = pid_task(pid_struct, PIDTYPE_PID);

@@ -158,16 +156,14 @@ int kbase_context_common_init(struct kbase_context *kctx)
 				get_task_struct(task);
 				kctx->task = task;
 			} else {
-				dev_err(kctx->kbdev->dev,
-					"Failed to get task pointer for %s/%d",
+				dev_err(kctx->kbdev->dev, "Failed to get task pointer for %s/%d",
 					current->comm, kctx->pid);
 				err = -ESRCH;
 			}

 			put_pid(pid_struct);
 		} else {
-			dev_err(kctx->kbdev->dev,
-				"Failed to get pid pointer for %s/%d",
+			dev_err(kctx->kbdev->dev, "Failed to get pid pointer for %s/%d",
 				current->comm, kctx->pid);
 			err = -ESRCH;
 		}
@@ -180,8 +176,6 @@ int kbase_context_common_init(struct kbase_context *kctx)
 		kctx->process_mm = current->mm;
 	}

-	atomic_set(&kctx->used_pages, 0);
-
 	mutex_init(&kctx->reg_lock);

 	spin_lock_init(&kctx->mem_partials_lock);
@@ -190,21 +184,6 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	spin_lock_init(&kctx->waiting_soft_jobs_lock);
 	INIT_LIST_HEAD(&kctx->waiting_soft_jobs);

-	init_waitqueue_head(&kctx->event_queue);
-	atomic_set(&kctx->event_count, 0);
-
-#if !MALI_USE_CSF
-	atomic_set(&kctx->event_closed, false);
-#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
-	atomic_set(&kctx->jctx.work_id, 0);
-#endif
-#endif
-
-#if MALI_USE_CSF
-	atomic64_set(&kctx->num_fixable_allocs, 0);
-	atomic64_set(&kctx->num_fixed_allocs, 0);
-#endif
-
 	kbase_gpu_vm_lock(kctx);
 	bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG);
 	kbase_gpu_vm_unlock(kctx);
@@ -215,9 +194,8 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	err = kbase_insert_kctx_to_process(kctx);
 	mutex_unlock(&kctx->kbdev->kctx_list_lock);
 	if (err) {
-		dev_err(kctx->kbdev->dev,
-			"(err:%d) failed to insert kctx to kbase_process", err);
-		if (likely(kctx->filp)) {
+		dev_err(kctx->kbdev->dev, "(err:%d) failed to insert kctx to kbase_process", err);
+		if (likely(kctx->kfile)) {
 			mmdrop(kctx->process_mm);
 			put_task_struct(kctx->task);
 		}
@@ -298,8 +276,7 @@ void kbase_context_common_term(struct kbase_context *kctx)

 	pages = atomic_read(&kctx->used_pages);
 	if (pages != 0)
-		dev_warn(kctx->kbdev->dev,
-			"%s: %d pages in use!\n", __func__, pages);
+		dev_warn(kctx->kbdev->dev, "%s: %d pages in use!\n", __func__, pages);

 	WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);

@@ -307,7 +284,7 @@ void kbase_context_common_term(struct kbase_context *kctx)
 	kbase_remove_kctx_from_process(kctx);
 	mutex_unlock(&kctx->kbdev->kctx_list_lock);

-	if (likely(kctx->filp)) {
+	if (likely(kctx->kfile)) {
 		mmdrop(kctx->process_mm);
 		put_task_struct(kctx->task);
 	}
@@ -328,9 +305,8 @@ void kbase_context_mem_pool_group_term(struct kbase_context *kctx)

 int kbase_context_mmu_init(struct kbase_context *kctx)
 {
-	return kbase_mmu_init(
-		kctx->kbdev, &kctx->mmu, kctx,
-		kbase_context_mmu_group_id_get(kctx->create_flags));
+	return kbase_mmu_init(kctx->kbdev, &kctx->mmu, kctx,
+			      kbase_context_mmu_group_id_get(kctx->create_flags));
 }

 void kbase_context_mmu_term(struct kbase_context *kctx)
@@ -342,7 +318,7 @@ int kbase_context_mem_alloc_page(struct kbase_context *kctx)
 {
 	struct page *p;

-	p = kbase_mem_alloc_page(&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK]);
+	p = kbase_mem_alloc_page(&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK], false);
 	if (!p)
 		return -ENOMEM;

@@ -354,10 +330,8 @@ int kbase_context_mem_alloc_page(struct kbase_context *kctx)
 void kbase_context_mem_pool_free(struct kbase_context *kctx)
 {
 	/* drop the aliasing sink page now that it can't be mapped anymore */
-	kbase_mem_pool_free(
-		&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK],
-		as_page(kctx->aliasing_sink_page),
-		false);
+	kbase_mem_pool_free(&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK],
+			    as_page(kctx->aliasing_sink_page), false);
 }

 void kbase_context_sticky_resource_term(struct kbase_context *kctx)
@@ -369,18 +343,15 @@ void kbase_context_sticky_resource_term(struct kbase_context *kctx)

 	/* free pending region setups */
 	pending_regions_to_clean = KBASE_COOKIE_MASK;
-	bitmap_andnot(&pending_regions_to_clean, &pending_regions_to_clean,
-		      kctx->cookies, BITS_PER_LONG);
+	bitmap_andnot(&pending_regions_to_clean, &pending_regions_to_clean, kctx->cookies,
+		      BITS_PER_LONG);
 	while (pending_regions_to_clean) {
-		unsigned int cookie = find_first_bit(&pending_regions_to_clean,
-				BITS_PER_LONG);
+		unsigned int cookie = find_first_bit(&pending_regions_to_clean, BITS_PER_LONG);

 		if (!WARN_ON(!kctx->pending_regions[cookie])) {
 			dev_dbg(kctx->kbdev->dev, "Freeing pending unmapped region\n");
-			kbase_mem_phy_alloc_put(
-				kctx->pending_regions[cookie]->cpu_alloc);
-			kbase_mem_phy_alloc_put(
-				kctx->pending_regions[cookie]->gpu_alloc);
+			kbase_mem_phy_alloc_put(kctx->pending_regions[cookie]->cpu_alloc);
+			kbase_mem_phy_alloc_put(kctx->pending_regions[cookie]->gpu_alloc);
 			kfree(kctx->pending_regions[cookie]);

 			kctx->pending_regions[cookie] = NULL;
@@ -390,3 +361,10 @@ void kbase_context_sticky_resource_term(struct kbase_context *kctx)
 	}
 	kbase_gpu_vm_unlock(kctx);
 }
+
+bool kbase_ctx_compat_mode(struct kbase_context *kctx)
+{
+	return !IS_ENABLED(CONFIG_64BIT) ||
+	       (IS_ENABLED(CONFIG_64BIT) && kbase_ctx_flag(kctx, KCTX_COMPAT));
+}
+KBASE_EXPORT_TEST_API(kbase_ctx_compat_mode);
--- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.h
+++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.h
@@ -56,8 +56,9 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx);
 *               BASEP_CONTEXT_CREATE_KERNEL_FLAGS.
 * @api_version: Application program interface version, as encoded in
 *               a single integer by the KBASE_API_VERSION macro.
- * @filp:        Pointer to the struct file corresponding to device file
- *               /dev/malixx instance, passed to the file's open method.
+ * @kfile:       Pointer to the object representing the /dev/malixx device
+ *               file instance. Shall be passed as NULL for internally created
+ *               contexts.
 *
 * Up to one context can be created for each client that opens the device file
 * /dev/malixx. Context creation is deferred until a special ioctl() system call
@@ -65,11 +66,10 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx);
 *
 * Return: new kbase context or NULL on failure
 */
-struct kbase_context *
-kbase_create_context(struct kbase_device *kbdev, bool is_compat,
-	base_context_create_flags const flags,
-	unsigned long api_version,
-	struct file *filp);
+struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_compat,
+					   base_context_create_flags const flags,
+					   unsigned long api_version,
+					   struct kbase_file *const kfile);

 /**
 * kbase_destroy_context - Destroy a kernel base context.
@@ -86,8 +86,7 @@ void kbase_destroy_context(struct kbase_context *kctx);
 *
 * Return: true if @flag is set on @kctx, false if not.
 */
-static inline bool kbase_ctx_flag(struct kbase_context *kctx,
-				      enum kbase_context_flags flag)
+static inline bool kbase_ctx_flag(struct kbase_context *kctx, enum kbase_context_flags flag)
 {
 	return atomic_read(&kctx->flags) & flag;
 }
@@ -99,11 +98,7 @@ static inline bool kbase_ctx_flag(struct kbase_context *kctx,
 *
 * Return: True if needs to maintain compatibility, False otherwise.
 */
-static inline bool kbase_ctx_compat_mode(struct kbase_context *kctx)
-{
-	return !IS_ENABLED(CONFIG_64BIT) ||
-	       (IS_ENABLED(CONFIG_64BIT) && kbase_ctx_flag(kctx, KCTX_COMPAT));
-}
+bool kbase_ctx_compat_mode(struct kbase_context *kctx);

 /**
 * kbase_ctx_flag_clear - Clear @flag on @kctx
@@ -116,8 +111,7 @@ static inline bool kbase_ctx_compat_mode(struct kbase_context *kctx)
 * Some flags have locking requirements, check the documentation for the
 * respective flags.
 */
-static inline void kbase_ctx_flag_clear(struct kbase_context *kctx,
-					enum kbase_context_flags flag)
+static inline void kbase_ctx_flag_clear(struct kbase_context *kctx, enum kbase_context_flags flag)
 {
 	atomic_andnot(flag, &kctx->flags);
 }
@@ -133,8 +127,7 @@ static inline void kbase_ctx_flag_clear(struct kbase_context *kctx,
 * Some flags have locking requirements, check the documentation for the
 * respective flags.
 */
-static inline void kbase_ctx_flag_set(struct kbase_context *kctx,
-				      enum kbase_context_flags flag)
+static inline void kbase_ctx_flag_set(struct kbase_context *kctx, enum kbase_context_flags flag)
 {
 	atomic_or(flag, &kctx->flags);
 }
--- a/drivers/gpu/arm/bifrost/context/mali_kbase_context_internal.h
+++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context_internal.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -36,6 +36,14 @@ struct kbase_context_init {
 	char *err_mes;
 };

+/**
+ * kbase_context_common_init() - Initialize kbase context
+ * @kctx:   Pointer to the kbase context to be initialized.
+ *
+ * This function must be called only when a kbase context is instantiated.
+ *
+ * Return: 0 on success.
+ */
 int kbase_context_common_init(struct kbase_context *kctx);
 void kbase_context_common_term(struct kbase_context *kctx);

--- a/drivers/gpu/arm/bifrost/csf/Kbuild
+++ b/drivers/gpu/arm/bifrost/csf/Kbuild
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -19,6 +19,7 @@
 #

 bifrost_kbase-y += \
+    csf/mali_kbase_csf_util.o \
    csf/mali_kbase_csf_firmware_cfg.o \
    csf/mali_kbase_csf_trace_buffer.o \
    csf/mali_kbase_csf.o \
@@ -29,11 +30,15 @@ bifrost_kbase-y += \
    csf/mali_kbase_csf_tl_reader.o \
    csf/mali_kbase_csf_heap_context_alloc.o \
    csf/mali_kbase_csf_reset_gpu.o \
+    csf/mali_kbase_csf_csg.o \
    csf/mali_kbase_csf_csg_debugfs.o \
    csf/mali_kbase_csf_kcpu_debugfs.o \
+    csf/mali_kbase_csf_sync.o \
    csf/mali_kbase_csf_sync_debugfs.o \
+    csf/mali_kbase_csf_kcpu_fence_debugfs.o \
    csf/mali_kbase_csf_protected_memory.o \
    csf/mali_kbase_csf_tiler_heap_debugfs.o \
+    csf/mali_kbase_csf_cpu_queue.o \
    csf/mali_kbase_csf_cpu_queue_debugfs.o \
    csf/mali_kbase_csf_event.o \
    csf/mali_kbase_csf_firmware_log.o \
--- a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c
+++ b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -64,12 +64,19 @@
 * struct kbase_ipa_control_listener_data - Data for the GPU clock frequency
 *                                          listener
 *
- * @listener: GPU clock frequency listener.
- * @kbdev:    Pointer to kbase device.
+ * @listener:     GPU clock frequency listener.
+ * @kbdev:        Pointer to kbase device.
+ * @clk_chg_wq:   Dedicated workqueue to process the work item corresponding to
+ *                a clock rate notification.
+ * @clk_chg_work: Work item to process the clock rate change
+ * @rate:         The latest notified rate change, in unit of Hz
 */
 struct kbase_ipa_control_listener_data {
 	struct kbase_clk_rate_listener listener;
 	struct kbase_device *kbdev;
+	struct workqueue_struct *clk_chg_wq;
+	struct work_struct clk_chg_work;
+	atomic_t rate;
 };

 static u32 timer_value(u32 gpu_rate)
@@ -80,14 +87,14 @@ static u32 timer_value(u32 gpu_rate)
 static int wait_status(struct kbase_device *kbdev, u32 flags)
 {
 	unsigned int max_loops = IPA_INACTIVE_MAX_LOOPS;
-	u32 status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS));
+	u32 status = kbase_reg_read32(kbdev, IPA_CONTROL_ENUM(STATUS));

 	/*
 	 * Wait for the STATUS register to indicate that flags have been
 	 * cleared, in case a transition is pending.
 	 */
 	while (--max_loops && (status & flags))
-		status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS));
+		status = kbase_reg_read32(kbdev, IPA_CONTROL_ENUM(STATUS));
 	if (max_loops == 0) {
 		dev_err(kbdev->dev, "IPA_CONTROL STATUS register stuck");
 		return -EBUSY;
@@ -100,41 +107,17 @@ static int apply_select_config(struct kbase_device *kbdev, u64 *select)
 {
 	int ret;

-	u32 select_cshw_lo = (u32)(select[KBASE_IPA_CORE_TYPE_CSHW] & U32_MAX);
-	u32 select_cshw_hi =
-		(u32)((select[KBASE_IPA_CORE_TYPE_CSHW] >> 32) & U32_MAX);
-	u32 select_memsys_lo =
-		(u32)(select[KBASE_IPA_CORE_TYPE_MEMSYS] & U32_MAX);
-	u32 select_memsys_hi =
-		(u32)((select[KBASE_IPA_CORE_TYPE_MEMSYS] >> 32) & U32_MAX);
-	u32 select_tiler_lo =
-		(u32)(select[KBASE_IPA_CORE_TYPE_TILER] & U32_MAX);
-	u32 select_tiler_hi =
-		(u32)((select[KBASE_IPA_CORE_TYPE_TILER] >> 32) & U32_MAX);
-	u32 select_shader_lo =
-		(u32)(select[KBASE_IPA_CORE_TYPE_SHADER] & U32_MAX);
-	u32 select_shader_hi =
-		(u32)((select[KBASE_IPA_CORE_TYPE_SHADER] >> 32) & U32_MAX);
-
-	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_LO), select_cshw_lo);
-	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_HI), select_cshw_hi);
-	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_MEMSYS_LO),
-			select_memsys_lo);
-	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_MEMSYS_HI),
-			select_memsys_hi);
-	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_TILER_LO),
-			select_tiler_lo);
-	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_TILER_HI),
-			select_tiler_hi);
-	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_SHADER_LO),
-			select_shader_lo);
-	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_SHADER_HI),
-			select_shader_hi);
+	kbase_reg_write64(kbdev, IPA_CONTROL_ENUM(SELECT_CSHW), select[KBASE_IPA_CORE_TYPE_CSHW]);
+	kbase_reg_write64(kbdev, IPA_CONTROL_ENUM(SELECT_MEMSYS),
+			  select[KBASE_IPA_CORE_TYPE_MEMSYS]);
+	kbase_reg_write64(kbdev, IPA_CONTROL_ENUM(SELECT_TILER), select[KBASE_IPA_CORE_TYPE_TILER]);
+	kbase_reg_write64(kbdev, IPA_CONTROL_ENUM(SELECT_SHADER),
+			  select[KBASE_IPA_CORE_TYPE_SHADER]);

 	ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE);

 	if (!ret) {
-		kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_APPLY);
+		kbase_reg_write32(kbdev, IPA_CONTROL_ENUM(COMMAND), COMMAND_APPLY);
 		ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE);
 	} else {
 		dev_err(kbdev->dev, "Wait for the pending command failed");
@@ -145,48 +128,25 @@ static int apply_select_config(struct kbase_device *kbdev, u64 *select)

 static u64 read_value_cnt(struct kbase_device *kbdev, u8 type, int select_idx)
 {
-	u32 value_lo, value_hi;
-
 	switch (type) {
 	case KBASE_IPA_CORE_TYPE_CSHW:
-		value_lo = kbase_reg_read(
-			kbdev, IPA_CONTROL_REG(VALUE_CSHW_REG_LO(select_idx)));
-		value_hi = kbase_reg_read(
-			kbdev, IPA_CONTROL_REG(VALUE_CSHW_REG_HI(select_idx)));
-		break;
+		return kbase_reg_read64(kbdev, IPA_VALUE_CSHW_OFFSET(select_idx));
+
 	case KBASE_IPA_CORE_TYPE_MEMSYS:
-		value_lo = kbase_reg_read(
-			kbdev,
-			IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(select_idx)));
-		value_hi = kbase_reg_read(
-			kbdev,
-			IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(select_idx)));
-		break;
+		return kbase_reg_read64(kbdev, IPA_VALUE_MEMSYS_OFFSET(select_idx));
+
 	case KBASE_IPA_CORE_TYPE_TILER:
-		value_lo = kbase_reg_read(
-			kbdev, IPA_CONTROL_REG(VALUE_TILER_REG_LO(select_idx)));
-		value_hi = kbase_reg_read(
-			kbdev, IPA_CONTROL_REG(VALUE_TILER_REG_HI(select_idx)));
-		break;
+		return kbase_reg_read64(kbdev, IPA_VALUE_TILER_OFFSET(select_idx));
+
 	case KBASE_IPA_CORE_TYPE_SHADER:
-		value_lo = kbase_reg_read(
-			kbdev,
-			IPA_CONTROL_REG(VALUE_SHADER_REG_LO(select_idx)));
-		value_hi = kbase_reg_read(
-			kbdev,
-			IPA_CONTROL_REG(VALUE_SHADER_REG_HI(select_idx)));
-		break;
+		return kbase_reg_read64(kbdev, IPA_VALUE_SHADER_OFFSET(select_idx));
 	default:
 		WARN(1, "Unknown core type: %u\n", type);
-		value_lo = value_hi = 0;
-		break;
+		return 0;
 	}
-
-	return (((u64)value_hi << 32) | value_lo);
 }

-static void build_select_config(struct kbase_ipa_control *ipa_ctrl,
-				u64 *select_config)
+static void build_select_config(struct kbase_ipa_control *ipa_ctrl, u64 *select_config)
 {
 	size_t i;

@@ -200,8 +160,7 @@ static void build_select_config(struct kbase_ipa_control *ipa_ctrl,
 				&ipa_ctrl->blocks[i].select[j];

 			select_config[i] |=
-				((u64)prfcnt_config->idx
-				 << (IPA_CONTROL_SELECT_BITS_PER_CNT * j));
+				((u64)prfcnt_config->idx << (IPA_CONTROL_SELECT_BITS_PER_CNT * j));
 		}
 	}
 }
@@ -218,20 +177,17 @@ static int update_select_registers(struct kbase_device *kbdev)
 }

 static inline void calc_prfcnt_delta(struct kbase_device *kbdev,
-				     struct kbase_ipa_control_prfcnt *prfcnt,
-				     bool gpu_ready)
+				     struct kbase_ipa_control_prfcnt *prfcnt, bool gpu_ready)
 {
 	u64 delta_value, raw_value;

 	if (gpu_ready)
-		raw_value = read_value_cnt(kbdev, (u8)prfcnt->type,
-					   prfcnt->select_idx);
+		raw_value = read_value_cnt(kbdev, (u8)prfcnt->type, prfcnt->select_idx);
 	else
 		raw_value = prfcnt->latest_raw_value;

 	if (raw_value < prfcnt->latest_raw_value) {
-		delta_value = (MAX_PRFCNT_VALUE - prfcnt->latest_raw_value) +
-			      raw_value;
+		delta_value = (MAX_PRFCNT_VALUE - prfcnt->latest_raw_value) + raw_value;
 	} else {
 		delta_value = raw_value - prfcnt->latest_raw_value;
 	}
@@ -266,63 +222,65 @@ static inline void calc_prfcnt_delta(struct kbase_device *kbdev,
 * affect all performance counters which require GPU normalization
 * in every session.
 */
-static void
-kbase_ipa_control_rate_change_notify(struct kbase_clk_rate_listener *listener,
-				     u32 clk_index, u32 clk_rate_hz)
+static void kbase_ipa_control_rate_change_notify(struct kbase_clk_rate_listener *listener,
+						 u32 clk_index, u32 clk_rate_hz)
 {
 	if ((clk_index == KBASE_CLOCK_DOMAIN_TOP) && (clk_rate_hz != 0)) {
-		size_t i;
-		unsigned long flags;
 		struct kbase_ipa_control_listener_data *listener_data =
-			container_of(listener,
-				     struct kbase_ipa_control_listener_data,
-				     listener);
-		struct kbase_device *kbdev = listener_data->kbdev;
-		struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+			container_of(listener, struct kbase_ipa_control_listener_data, listener);

-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		/* Save the rate and delegate the job to a work item */
+		atomic_set(&listener_data->rate, clk_rate_hz);
+		queue_work(listener_data->clk_chg_wq, &listener_data->clk_chg_work);
+	}
+}

-		if (!kbdev->pm.backend.gpu_ready) {
-			dev_err(kbdev->dev,
-				"%s: GPU frequency cannot change while GPU is off",
-				__func__);
-			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-			return;
-		}
+static void kbase_ipa_ctrl_rate_change_worker(struct work_struct *data)
+{
+	struct kbase_ipa_control_listener_data *listener_data =
+		container_of(data, struct kbase_ipa_control_listener_data, clk_chg_work);
+	struct kbase_device *kbdev = listener_data->kbdev;
+	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+	unsigned long flags;
+	u32 rate;
+	size_t i;

-		/* Interrupts are already disabled and interrupt state is also saved */
-		spin_lock(&ipa_ctrl->lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);

-		for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) {
-			struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i];
+	if (!kbdev->pm.backend.gpu_ready) {
+		dev_err(kbdev->dev, "%s: GPU frequency cannot change while GPU is off", __func__);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return;
+	}

-			if (session->active) {
-				size_t j;
+	spin_lock(&ipa_ctrl->lock);
+	/* Picking up the latest notified rate */
+	rate = (u32)atomic_read(&listener_data->rate);

-				for (j = 0; j < session->num_prfcnts; j++) {
-					struct kbase_ipa_control_prfcnt *prfcnt =
-						&session->prfcnts[j];
+	for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) {
+		struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i];

-					if (prfcnt->gpu_norm)
-						calc_prfcnt_delta(kbdev, prfcnt, true);
-				}
+		if (session->active) {
+			size_t j;
+
+			for (j = 0; j < session->num_prfcnts; j++) {
+				struct kbase_ipa_control_prfcnt *prfcnt = &session->prfcnts[j];
+
+				if (prfcnt->gpu_norm)
+					calc_prfcnt_delta(kbdev, prfcnt, true);
 			}
 		}
-
-		ipa_ctrl->cur_gpu_rate = clk_rate_hz;
-
-		/* Update the timer for automatic sampling if active sessions
-		 * are present. Counters have already been manually sampled.
-		 */
-		if (ipa_ctrl->num_active_sessions > 0) {
-			kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER),
-					timer_value(ipa_ctrl->cur_gpu_rate));
-		}
-
-		spin_unlock(&ipa_ctrl->lock);
-
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	}
+
+	ipa_ctrl->cur_gpu_rate = rate;
+	/* Update the timer for automatic sampling if active sessions
+	 * are present. Counters have already been manually sampled.
+	 */
+	if (ipa_ctrl->num_active_sessions > 0)
+		kbase_reg_write32(kbdev, IPA_CONTROL_ENUM(TIMER), timer_value(rate));
+
+	spin_unlock(&ipa_ctrl->lock);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }

 void kbase_ipa_control_init(struct kbase_device *kbdev)
@@ -330,39 +288,44 @@ void kbase_ipa_control_init(struct kbase_device *kbdev)
 	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
 	struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
 	struct kbase_ipa_control_listener_data *listener_data;
-	size_t i, j;
+	size_t i;
+	unsigned long flags;

 	for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) {
-		for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) {
-			ipa_ctrl->blocks[i].select[j].idx = 0;
-			ipa_ctrl->blocks[i].select[j].refcount = 0;
-		}
-		ipa_ctrl->blocks[i].num_available_counters =
-			KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS;
+		ipa_ctrl->blocks[i].num_available_counters = KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS;
 	}

 	spin_lock_init(&ipa_ctrl->lock);
-	ipa_ctrl->num_active_sessions = 0;
-	for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++)
-		ipa_ctrl->sessions[i].active = false;

-	listener_data = kmalloc(sizeof(struct kbase_ipa_control_listener_data),
-				GFP_KERNEL);
+	listener_data = kmalloc(sizeof(struct kbase_ipa_control_listener_data), GFP_KERNEL);
 	if (listener_data) {
-		listener_data->listener.notify =
-			kbase_ipa_control_rate_change_notify;
-		listener_data->kbdev = kbdev;
-		ipa_ctrl->rtm_listener_data = listener_data;
-	}
+		listener_data->clk_chg_wq =
+			alloc_workqueue("ipa_ctrl_wq", WQ_HIGHPRI | WQ_UNBOUND, 1);
+		if (listener_data->clk_chg_wq) {
+			INIT_WORK(&listener_data->clk_chg_work, kbase_ipa_ctrl_rate_change_worker);
+			listener_data->listener.notify = kbase_ipa_control_rate_change_notify;
+			listener_data->kbdev = kbdev;
+			ipa_ctrl->rtm_listener_data = listener_data;
+			/* Initialise to 0, which is out of normal notified rates */
+			atomic_set(&listener_data->rate, 0);
+		} else {
+			dev_warn(kbdev->dev,
+				 "%s: failed to allocate workqueue, clock rate update disabled",
+				 __func__);
+			kfree(listener_data);
+			listener_data = NULL;
+		}
+	} else
+		dev_warn(kbdev->dev,
+			 "%s: failed to allocate memory, IPA control clock rate update disabled",
+			 __func__);

-	spin_lock(&clk_rtm->lock);
+	spin_lock_irqsave(&clk_rtm->lock, flags);
 	if (clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP])
-		ipa_ctrl->cur_gpu_rate =
-			clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP]->clock_val;
+		ipa_ctrl->cur_gpu_rate = clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP]->clock_val;
 	if (listener_data)
-		kbase_clk_rate_trace_manager_subscribe_no_lock(
-			clk_rtm, &listener_data->listener);
-	spin_unlock(&clk_rtm->lock);
+		kbase_clk_rate_trace_manager_subscribe_no_lock(clk_rtm, &listener_data->listener);
+	spin_unlock_irqrestore(&clk_rtm->lock, flags);
 }
 KBASE_EXPORT_TEST_API(kbase_ipa_control_init);

@@ -371,18 +334,19 @@ void kbase_ipa_control_term(struct kbase_device *kbdev)
 	unsigned long flags;
 	struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
 	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
-	struct kbase_ipa_control_listener_data *listener_data =
-		ipa_ctrl->rtm_listener_data;
+	struct kbase_ipa_control_listener_data *listener_data = ipa_ctrl->rtm_listener_data;

 	WARN_ON(ipa_ctrl->num_active_sessions);

-	if (listener_data)
+	if (listener_data) {
 		kbase_clk_rate_trace_manager_unsubscribe(clk_rtm, &listener_data->listener);
+		destroy_workqueue(listener_data->clk_chg_wq);
+	}
 	kfree(ipa_ctrl->rtm_listener_data);

 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	if (kbdev->pm.backend.gpu_powered)
-		kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), 0);
+		kbase_reg_write32(kbdev, IPA_CONTROL_ENUM(TIMER), 0);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 KBASE_EXPORT_TEST_API(kbase_ipa_control_term);
@@ -403,8 +367,7 @@ static void session_read_raw_values(struct kbase_device *kbdev,

 	for (i = 0; i < session->num_prfcnts; i++) {
 		struct kbase_ipa_control_prfcnt *prfcnt = &session->prfcnts[i];
-		u64 raw_value = read_value_cnt(kbdev, (u8)prfcnt->type,
-					       prfcnt->select_idx);
+		u64 raw_value = read_value_cnt(kbdev, (u8)prfcnt->type, prfcnt->select_idx);

 		prfcnt->latest_raw_value = raw_value;
 	}
@@ -429,12 +392,10 @@ static void session_read_raw_values(struct kbase_device *kbdev,
 *
 * Return: 0 on success, or error code on failure.
 */
-static int session_gpu_start(struct kbase_device *kbdev,
-			     struct kbase_ipa_control *ipa_ctrl,
+static int session_gpu_start(struct kbase_device *kbdev, struct kbase_ipa_control *ipa_ctrl,
 			     struct kbase_ipa_control_session *session)
 {
-	bool first_start =
-		(session != NULL) && (ipa_ctrl->num_active_sessions == 0);
+	bool first_start = (session != NULL) && (ipa_ctrl->num_active_sessions == 0);
 	int ret = 0;

 	lockdep_assert_held(&kbdev->csf.ipa_control.lock);
@@ -455,14 +416,12 @@ static int session_gpu_start(struct kbase_device *kbdev,
 	 * sampling.
 	 */
 	if (!session || first_start) {
-		kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND),
-				COMMAND_SAMPLE);
+		kbase_reg_write32(kbdev, IPA_CONTROL_ENUM(COMMAND), COMMAND_SAMPLE);
 		ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE);
 		if (ret)
-			dev_err(kbdev->dev, "%s: failed to sample new counters",
-				__func__);
-		kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER),
-				timer_value(ipa_ctrl->cur_gpu_rate));
+			dev_err(kbdev->dev, "%s: failed to sample new counters", __func__);
+		kbase_reg_write32(kbdev, IPA_CONTROL_ENUM(TIMER),
+				  timer_value(ipa_ctrl->cur_gpu_rate));
 	}

 	/*
@@ -482,10 +441,10 @@ static int session_gpu_start(struct kbase_device *kbdev,
 		} else {
 			size_t session_idx;

-			for (session_idx = 0;
-			     session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS;
+			for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS;
 			     session_idx++) {
-				struct kbase_ipa_control_session *session_to_check = &ipa_ctrl->sessions[session_idx];
+				struct kbase_ipa_control_session *session_to_check =
+					&ipa_ctrl->sessions[session_idx];

 				if (session_to_check->active)
 					session_read_raw_values(kbdev, session_to_check);
@@ -496,10 +455,9 @@ static int session_gpu_start(struct kbase_device *kbdev,
 	return ret;
 }

-int kbase_ipa_control_register(
-	struct kbase_device *kbdev,
-	const struct kbase_ipa_control_perf_counter *perf_counters,
-	size_t num_counters, void **client)
+int kbase_ipa_control_register(struct kbase_device *kbdev,
+			       const struct kbase_ipa_control_perf_counter *perf_counters,
+			       size_t num_counters, void **client)
 {
 	int ret = 0;
 	size_t i, session_idx, req_counters[KBASE_IPA_CORE_TYPE_NUM];
@@ -542,10 +500,8 @@ int kbase_ipa_control_register(
 		enum kbase_ipa_core_type type = perf_counters[i].type;
 		u8 idx = perf_counters[i].idx;

-		if ((type >= KBASE_IPA_CORE_TYPE_NUM) ||
-		    (idx >= KBASE_IPA_CONTROL_CNT_MAX_IDX)) {
-			dev_err(kbdev->dev,
-				"%s: invalid requested type %u and/or index %u",
+		if ((type >= KBASE_IPA_CORE_TYPE_NUM) || (idx >= KBASE_IPA_CONTROL_CNT_MAX_IDX)) {
+			dev_err(kbdev->dev, "%s: invalid requested type %u and/or index %u",
 				__func__, type, idx);
 			ret = -EINVAL;
 			goto exit;
@@ -571,8 +527,7 @@ int kbase_ipa_control_register(
 	}

 	for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++)
-		if (req_counters[i] >
-		    ipa_ctrl->blocks[i].num_available_counters) {
+		if (req_counters[i] > ipa_ctrl->blocks[i].num_available_counters) {
 			dev_err(kbdev->dev,
 				"%s: more counters (%zu) than available (%zu) have been requested for type %zu",
 				__func__, req_counters[i],
@@ -587,8 +542,7 @@ int kbase_ipa_control_register(
 	 * of the session and update the configuration of performance counters
 	 * in the internal state of kbase_ipa_control.
 	 */
-	for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS;
-	     session_idx++) {
+	for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS; session_idx++) {
 		if (!ipa_ctrl->sessions[session_idx].active) {
 			session = &ipa_ctrl->sessions[session_idx];
 			break;
@@ -596,8 +550,7 @@ int kbase_ipa_control_register(
 	}

 	if (!session) {
-		dev_err(kbdev->dev, "%s: wrong or corrupt session state",
-			__func__);
+		dev_err(kbdev->dev, "%s: wrong or corrupt session state", __func__);
 		ret = -EBUSY;
 		goto exit;
 	}
@@ -612,8 +565,7 @@ int kbase_ipa_control_register(
 			prfcnt_config = &ipa_ctrl->blocks[type].select[j];

 			if (already_configured[i]) {
-				if ((prfcnt_config->refcount > 0) &&
-				    (prfcnt_config->idx == idx)) {
+				if ((prfcnt_config->refcount > 0) && (prfcnt_config->idx == idx)) {
 					break;
 				}
 			} else {
@@ -622,8 +574,7 @@ int kbase_ipa_control_register(
 			}
 		}

-		if (WARN_ON((prfcnt_config->refcount > 0 &&
-			     prfcnt_config->idx != idx) ||
+		if (WARN_ON((prfcnt_config->refcount > 0 && prfcnt_config->idx != idx) ||
 			    (j == KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS))) {
 			dev_err(kbdev->dev,
 				"%s: invalid internal state: counter already configured or no counter available to configure",
@@ -640,8 +591,7 @@ int kbase_ipa_control_register(
 		session->prfcnts[i].accumulated_diff = 0;
 		session->prfcnts[i].type = type;
 		session->prfcnts[i].select_idx = j;
-		session->prfcnts[i].scaling_factor =
-			perf_counters[i].scaling_factor;
+		session->prfcnts[i].scaling_factor = perf_counters[i].scaling_factor;
 		session->prfcnts[i].gpu_norm = perf_counters[i].gpu_norm;

 		/* Reports to this client for GPU time spent in protected mode
@@ -663,8 +613,7 @@ int kbase_ipa_control_register(
 	if (new_config) {
 		ret = update_select_registers(kbdev);
 		if (ret)
-			dev_err(kbdev->dev,
-				"%s: failed to apply new SELECT configuration",
+			dev_err(kbdev->dev, "%s: failed to apply new SELECT configuration",
 				__func__);
 	}

@@ -730,8 +679,7 @@ int kbase_ipa_control_unregister(struct kbase_device *kbdev, const void *client)
 	}

 	if (!session->active) {
-		dev_err(kbdev->dev, "%s: session is already inactive",
-			__func__);
+		dev_err(kbdev->dev, "%s: session is already inactive", __func__);
 		ret = -EINVAL;
 		goto exit;
 	}
@@ -755,9 +703,7 @@ int kbase_ipa_control_unregister(struct kbase_device *kbdev, const void *client)
 	if (new_config) {
 		ret = update_select_registers(kbdev);
 		if (ret)
-			dev_err(kbdev->dev,
-				"%s: failed to apply SELECT configuration",
-				__func__);
+			dev_err(kbdev->dev, "%s: failed to apply SELECT configuration", __func__);
 	}

 	session->num_prfcnts = 0;
@@ -771,8 +717,8 @@ exit:
 }
 KBASE_EXPORT_TEST_API(kbase_ipa_control_unregister);

-int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client,
-			    u64 *values, size_t num_values, u64 *protected_time)
+int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client, u64 *values,
+			    size_t num_values, u64 *protected_time)
 {
 	struct kbase_ipa_control *ipa_ctrl;
 	struct kbase_ipa_control_session *session;
@@ -792,14 +738,12 @@ int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client,
 	session = (struct kbase_ipa_control_session *)client;

 	if (!session->active) {
-		dev_err(kbdev->dev,
-			"%s: attempt to query inactive session", __func__);
+		dev_err(kbdev->dev, "%s: attempt to query inactive session", __func__);
 		return -EINVAL;
 	}

 	if (WARN_ON(num_values < session->num_prfcnts)) {
-		dev_err(kbdev->dev,
-			"%s: not enough space (%zu) to return all counter values (%zu)",
+		dev_err(kbdev->dev, "%s: not enough space (%zu) to return all counter values (%zu)",
 			__func__, num_values, session->num_prfcnts);
 		return -EINVAL;
 	}
@@ -826,8 +770,7 @@ int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client,

 		if (kbdev->protected_mode) {
 			*protected_time +=
-				time_now - MAX(session->last_query_time,
-					       ipa_ctrl->protm_start);
+				time_now - MAX(session->last_query_time, ipa_ctrl->protm_start);
 		}
 		session->last_query_time = time_now;
 		session->protm_time = 0;
@@ -857,35 +800,27 @@ void kbase_ipa_control_handle_gpu_power_off(struct kbase_device *kbdev)
 	spin_lock(&ipa_ctrl->lock);

 	/* First disable the automatic sampling through TIMER  */
-	kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), 0);
+	kbase_reg_write32(kbdev, IPA_CONTROL_ENUM(TIMER), 0);
 	ret = wait_status(kbdev, STATUS_TIMER_ENABLED);
 	if (ret) {
-		dev_err(kbdev->dev,
-			"Wait for disabling of IPA control timer failed: %d",
-			ret);
+		dev_err(kbdev->dev, "Wait for disabling of IPA control timer failed: %d", ret);
 	}

 	/* Now issue the manual SAMPLE command */
-	kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_SAMPLE);
+	kbase_reg_write32(kbdev, IPA_CONTROL_ENUM(COMMAND), COMMAND_SAMPLE);
 	ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE);
 	if (ret) {
-		dev_err(kbdev->dev,
-			"Wait for the completion of manual sample failed: %d",
-			ret);
+		dev_err(kbdev->dev, "Wait for the completion of manual sample failed: %d", ret);
 	}

-	for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS;
-	     session_idx++) {
-
-		struct kbase_ipa_control_session *session =
-			&ipa_ctrl->sessions[session_idx];
+	for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS; session_idx++) {
+		struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[session_idx];

 		if (session->active) {
 			size_t i;

 			for (i = 0; i < session->num_prfcnts; i++) {
-				struct kbase_ipa_control_prfcnt *prfcnt =
-					&session->prfcnts[i];
+				struct kbase_ipa_control_prfcnt *prfcnt = &session->prfcnts[i];

 				calc_prfcnt_delta(kbdev, prfcnt, true);
 			}
@@ -909,8 +844,7 @@ void kbase_ipa_control_handle_gpu_power_on(struct kbase_device *kbdev)

 	ret = update_select_registers(kbdev);
 	if (ret) {
-		dev_err(kbdev->dev,
-			"Failed to reconfigure the select registers: %d", ret);
+		dev_err(kbdev->dev, "Failed to reconfigure the select registers: %d", ret);
 	}

 	/* Accumulator registers would not contain any sample after GPU power
@@ -943,15 +877,13 @@ void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev)
 	spin_lock(&ipa_ctrl->lock);

 	/* Check the status reset bit is set before acknowledging it */
-	status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS));
+	status = kbase_reg_read32(kbdev, IPA_CONTROL_ENUM(STATUS));
 	if (status & STATUS_RESET) {
 		/* Acknowledge the reset command */
-		kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_RESET_ACK);
+		kbase_reg_write32(kbdev, IPA_CONTROL_ENUM(COMMAND), COMMAND_RESET_ACK);
 		ret = wait_status(kbdev, STATUS_RESET);
 		if (ret) {
-			dev_err(kbdev->dev,
-				"Wait for the reset ack command failed: %d",
-				ret);
+			dev_err(kbdev->dev, "Wait for the reset ack command failed: %d", ret);
 		}
 	}

@@ -973,8 +905,7 @@ void kbase_ipa_control_handle_gpu_sleep_enter(struct kbase_device *kbdev)
 		/* SELECT_CSHW register needs to be cleared to prevent any
 		 * IPA control message to be sent to the top level GPU HWCNT.
 		 */
-		kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_LO), 0);
-		kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_HI), 0);
+		kbase_reg_write64(kbdev, IPA_CONTROL_ENUM(SELECT_CSHW), 0);

 		/* No need to issue the APPLY command here */
 	}
@@ -999,15 +930,15 @@ KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_sleep_exit);
 #endif

 #if MALI_UNIT_TEST
-void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev,
-					       u32 clk_index, u32 clk_rate_hz)
+void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev, u32 clk_index,
+					       u32 clk_rate_hz)
 {
 	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
-	struct kbase_ipa_control_listener_data *listener_data =
-		ipa_ctrl->rtm_listener_data;
+	struct kbase_ipa_control_listener_data *listener_data = ipa_ctrl->rtm_listener_data;

-	kbase_ipa_control_rate_change_notify(&listener_data->listener,
-					     clk_index, clk_rate_hz);
+	kbase_ipa_control_rate_change_notify(&listener_data->listener, clk_index, clk_rate_hz);
+	/* Ensure the callback has taken effect before returning back to the test caller */
+	flush_work(&listener_data->clk_chg_work);
 }
 KBASE_EXPORT_TEST_API(kbase_ipa_control_rate_change_notify_test);
 #endif
@@ -1030,13 +961,11 @@ void kbase_ipa_control_protm_exited(struct kbase_device *kbdev)
 	lockdep_assert_held(&kbdev->hwaccess_lock);

 	for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) {
-
-		struct kbase_ipa_control_session *session =
-			&ipa_ctrl->sessions[i];
+		struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i];

 		if (session->active) {
-			u64 protm_time = time_now - MAX(session->last_query_time,
-							ipa_ctrl->protm_start);
+			u64 protm_time =
+				time_now - MAX(session->last_query_time, ipa_ctrl->protm_start);

 			session->protm_time += protm_time;
 		}
@@ -1045,19 +974,15 @@ void kbase_ipa_control_protm_exited(struct kbase_device *kbdev)
 	/* Acknowledge the protected_mode bit in the IPA_CONTROL STATUS
 	 * register
 	 */
-	status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS));
+	status = kbase_reg_read32(kbdev, IPA_CONTROL_ENUM(STATUS));
 	if (status & STATUS_PROTECTED_MODE) {
 		int ret;

 		/* Acknowledge the protm command */
-		kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND),
-				COMMAND_PROTECTED_ACK);
+		kbase_reg_write32(kbdev, IPA_CONTROL_ENUM(COMMAND), COMMAND_PROTECTED_ACK);
 		ret = wait_status(kbdev, STATUS_PROTECTED_MODE);
 		if (ret) {
-			dev_err(kbdev->dev,
-				"Wait for the protm ack command failed: %d",
-				ret);
+			dev_err(kbdev->dev, "Wait for the protm ack command failed: %d", ret);
 		}
 	}
 }
-
--- a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h
+++ b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -68,6 +68,8 @@ struct kbase_ipa_control_perf_counter {
 * kbase_ipa_control_init - Initialize the IPA Control component
 *
 * @kbdev: Pointer to Kbase device.
+ *
+ * This function must be called only when a kbase device is initialized.
 */
 void kbase_ipa_control_init(struct kbase_device *kbdev);

@@ -103,10 +105,9 @@ void kbase_ipa_control_term(struct kbase_device *kbdev);
 *
 * Return: 0 on success, negative -errno on error
 */
-int kbase_ipa_control_register(
-	struct kbase_device *kbdev,
-	const struct kbase_ipa_control_perf_counter *perf_counters,
-	size_t num_counters, void **client);
+int kbase_ipa_control_register(struct kbase_device *kbdev,
+			       const struct kbase_ipa_control_perf_counter *perf_counters,
+			       size_t num_counters, void **client);

 /**
 * kbase_ipa_control_unregister - Unregister a client from IPA Control
@@ -117,8 +118,7 @@ int kbase_ipa_control_register(
 *
 * Return: 0 on success, negative -errno on error
 */
-int kbase_ipa_control_unregister(struct kbase_device *kbdev,
-				 const void *client);
+int kbase_ipa_control_unregister(struct kbase_device *kbdev, const void *client);

 /**
 * kbase_ipa_control_query - Query performance counters
@@ -152,9 +152,8 @@ int kbase_ipa_control_unregister(struct kbase_device *kbdev,
 *
 * Return: 0 on success, negative -errno on error
 */
-int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client,
-			    u64 *values, size_t num_values,
-			    u64 *protected_time);
+int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client, u64 *values,
+			    size_t num_values, u64 *protected_time);

 /**
 * kbase_ipa_control_handle_gpu_power_on - Handle the GPU power on event
@@ -236,8 +235,8 @@ void kbase_ipa_control_handle_gpu_sleep_exit(struct kbase_device *kbdev);
 *
 * Notify the IPA Control component about a GPU rate change.
 */
-void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev,
-					       u32 clk_index, u32 clk_rate_hz);
+void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev, u32 clk_index,
+					       u32 clk_rate_hz);
 #endif /* MALI_UNIT_TEST */

 /**
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h
@@ -48,7 +48,7 @@
 */
 #define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX)

-#define FIRMWARE_IDLE_HYSTERESIS_TIME_USEC (10000) /* Default 10 milliseconds */
+#define FIRMWARE_IDLE_HYSTERESIS_TIME_NS (10 * 1000 * 1000) /* Default 10 milliseconds */

 /* Idle hysteresis time can be scaled down when GPU sleep feature is used */
 #define FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER (5)
@@ -72,8 +72,19 @@ int kbase_csf_ctx_init(struct kbase_context *kctx);
 * This function terminates all GPU command queue groups in the context and
 * notifies the event notification thread of the fault.
 */
-void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
-		struct kbase_fault *fault);
+void kbase_csf_ctx_handle_fault(struct kbase_context *kctx, struct kbase_fault *fault);
+
+/**
+ * kbase_csf_ctx_report_page_fault_for_active_groups - Notify Userspace about GPU page fault
+ *                                                   for active groups of the faulty context.
+ *
+ * @kctx:       Pointer to faulty kbase context.
+ * @fault:      Pointer to the fault.
+ *
+ * This function notifies the event notification thread of the GPU page fault.
+ */
+void kbase_csf_ctx_report_page_fault_for_active_groups(struct kbase_context *kctx,
+						       struct kbase_fault *fault);

 /**
 * kbase_csf_ctx_term - Terminate the CSF interface for a GPU address space.
@@ -96,8 +107,7 @@ void kbase_csf_ctx_term(struct kbase_context *kctx);
 *
 * Return:	0 on success, or negative on failure.
 */
-int kbase_csf_queue_register(struct kbase_context *kctx,
-			     struct kbase_ioctl_cs_queue_register *reg);
+int kbase_csf_queue_register(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_register *reg);

 /**
 * kbase_csf_queue_register_ex - Register a GPU command queue with
@@ -113,7 +123,7 @@ int kbase_csf_queue_register(struct kbase_context *kctx,
 * Return:	0 on success, or negative on failure.
 */
 int kbase_csf_queue_register_ex(struct kbase_context *kctx,
-			     struct kbase_ioctl_cs_queue_register_ex *reg);
+				struct kbase_ioctl_cs_queue_register_ex *reg);

 /**
 * kbase_csf_queue_terminate - Terminate a GPU command queue.
@@ -124,7 +134,7 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx,
 *		queue is to be terminated.
 */
 void kbase_csf_queue_terminate(struct kbase_context *kctx,
-			      struct kbase_ioctl_cs_queue_terminate *term);
+			       struct kbase_ioctl_cs_queue_terminate *term);

 /**
 * kbase_csf_free_command_stream_user_pages() - Free the resources allocated
@@ -160,7 +170,7 @@ void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx,
 * Return:	0 on success, or negative on failure.
 */
 int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
-			struct kbase_queue *queue);
+					      struct kbase_queue *queue);

 /**
 * kbase_csf_queue_bind - Bind a GPU command queue to a queue group.
@@ -171,8 +181,7 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
 *
 * Return:	0 on success, or negative on failure.
 */
-int kbase_csf_queue_bind(struct kbase_context *kctx,
-			 union kbase_ioctl_cs_queue_bind *bind);
+int kbase_csf_queue_bind(struct kbase_context *kctx, union kbase_ioctl_cs_queue_bind *bind);

 /**
 * kbase_csf_queue_unbind - Unbind a GPU command queue from a queue group
@@ -204,11 +213,10 @@ void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue);
 *
 * Return:	0 on success, or negative on failure.
 */
-int kbase_csf_queue_kick(struct kbase_context *kctx,
-			 struct kbase_ioctl_cs_queue_kick *kick);
+int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_kick *kick);

 /**
- * kbase_csf_queue_group_handle_is_valid - Find the queue group corresponding
+ * kbase_csf_find_queue_group - Find the queue group corresponding
 *                                         to the indicated handle.
 *
 * @kctx:          The kbase context under which the queue group exists.
@@ -233,8 +241,7 @@ struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx,
 *
 * Return:		0 on success, or negative on failure.
 */
-int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx,
-	u8 group_handle);
+int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, u8 group_handle);

 /**
 * kbase_csf_queue_group_create - Create a GPU command queue group.
@@ -248,7 +255,7 @@ int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx,
 * Return:	0 on success, or negative on failure.
 */
 int kbase_csf_queue_group_create(struct kbase_context *kctx,
-	union kbase_ioctl_cs_queue_group_create *create);
+				 union kbase_ioctl_cs_queue_group_create *create);

 /**
 * kbase_csf_queue_group_terminate - Terminate a GPU command queue group.
@@ -258,8 +265,7 @@ int kbase_csf_queue_group_create(struct kbase_context *kctx,
 * @group_handle:	Pointer to the structure which identifies the queue
 *			group which is to be terminated.
 */
-void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
-	u8 group_handle);
+void kbase_csf_queue_group_terminate(struct kbase_context *kctx, u8 group_handle);

 /**
 * kbase_csf_term_descheduled_queue_group - Terminate a GPU command queue
@@ -291,7 +297,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group);
 *			queue group and copy suspend buffer contents.
 */
 int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
-	struct kbase_suspend_copy_buffer *sus_buf, u8 group_handle);
+				  struct kbase_suspend_copy_buffer *sus_buf, u8 group_handle);
 #endif

 /**
@@ -300,9 +306,8 @@ int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
 * @group:       GPU command queue group.
 * @err_payload: Error payload to report.
 */
-void kbase_csf_add_group_fatal_error(
-	struct kbase_queue_group *const group,
-	struct base_gpu_queue_group_error const *const err_payload);
+void kbase_csf_add_group_fatal_error(struct kbase_queue_group *const group,
+				     struct base_gpu_queue_group_error const *const err_payload);

 /**
 * kbase_csf_interrupt - Handle interrupts issued by CSF firmware.
@@ -312,6 +317,19 @@ void kbase_csf_add_group_fatal_error(
 */
 void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val);

+/**
+ * kbase_csf_handle_csg_sync_update - Handle SYNC_UPDATE notification for the group.
+ *
+ * @kbdev: The kbase device to handle the SYNC_UPDATE interrupt.
+ * @ginfo: Pointer to the CSG interface used by the @group
+ * @group: Pointer to the GPU command queue group.
+ * @req:   CSG_REQ register value corresponding to @group.
+ * @ack:   CSG_ACK register value corresponding to @group.
+ */
+void kbase_csf_handle_csg_sync_update(struct kbase_device *const kbdev,
+				      struct kbase_csf_cmd_stream_group_info *ginfo,
+				      struct kbase_queue_group *group, u32 req, u32 ack);
+
 /**
 * kbase_csf_doorbell_mapping_init - Initialize the fields that facilitates
 *                                   the update of userspace mapping of HW
@@ -360,6 +378,22 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev);
 */
 void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev);

+/**
+ * kbase_csf_pending_gpuq_kicks_init - Initialize the data used for handling
+ *                                     GPU queue kicks.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ */
+void kbase_csf_pending_gpuq_kicks_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_pending_gpuq_kicks_term - De-initialize the data used for handling
+ *                                     GPU queue kicks.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ */
+void kbase_csf_pending_gpuq_kicks_term(struct kbase_device *kbdev);
+
 /**
 * kbase_csf_ring_csg_doorbell - ring the doorbell for a CSG interface.
 *
@@ -379,8 +413,7 @@ void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot);
 *
 * The function kicks a notification on a set of CSG interfaces to firmware.
 */
-void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev,
-				       u32 slot_bitmap);
+void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, u32 slot_bitmap);

 /**
 * kbase_csf_ring_cs_kernel_doorbell - ring the kernel doorbell for a CSI
@@ -400,8 +433,7 @@ void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev,
 * The function sends a doorbell interrupt notification to the firmware for
 * a CSI assigned to a GPU queue.
 */
-void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
-				       int csi_index, int csg_nr,
+void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, int csi_index, int csg_nr,
 				       bool ring_csg_doorbell);

 /**
@@ -414,8 +446,7 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
 * The function kicks a notification to the firmware on the doorbell assigned
 * to the queue.
 */
-void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev,
-			struct kbase_queue *queue);
+void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev, struct kbase_queue *queue);

 /**
 * kbase_csf_active_queue_groups_reset - Reset the state of all active GPU
@@ -431,8 +462,7 @@ void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev,
 *
 * This is similar to the action taken in response to an unexpected OoM event.
 */
-void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
-			struct kbase_context *kctx);
+void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev, struct kbase_context *kctx);

 /**
 * kbase_csf_priority_check - Check the priority requested
@@ -484,12 +514,12 @@ static inline u8 kbase_csf_priority_queue_group_priority_to_relative(u8 priority
 }

 /**
- * kbase_csf_ktrace_gpu_cycle_cnt - Wrapper to retreive the GPU cycle counter
+ * kbase_csf_ktrace_gpu_cycle_cnt - Wrapper to retrieve the GPU cycle counter
 *                                  value for Ktrace purpose.
 *
 * @kbdev: Instance of a GPU platform device that implements a CSF interface.
 *
- * This function is just a wrapper to retreive the GPU cycle counter value, to
+ * This function is just a wrapper to retrieve the GPU cycle counter value, to
 * avoid any overhead on Release builds where Ktrace is disabled by default.
 *
 * Return: Snapshot of the GPU cycle count register.
@@ -499,8 +529,21 @@ static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev)
 #if KBASE_KTRACE_ENABLE
 	return kbase_backend_get_cycle_cnt(kbdev);
 #else
+	CSTD_UNUSED(kbdev);
 	return 0;
 #endif
 }

+/**
+ * kbase_csf_process_queue_kick() - Process a pending kicked GPU command queue.
+ *
+ * @queue: Pointer to the queue to process.
+ *
+ * This function starts the pending queue, for which the work
+ * was previously submitted via ioctl call from application thread.
+ * If the queue is already scheduled and resident, it will be started
+ * right away, otherwise once the group is made resident.
+ */
+void kbase_csf_process_queue_kick(struct kbase_queue *queue);
+
 #endif /* _KBASE_CSF_H_ */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase_csf_cpu_queue.h"
+#include "mali_kbase_csf_util.h"
+#include <mali_kbase.h>
+#include <asm/atomic.h>
+
+void kbase_csf_cpu_queue_init(struct kbase_context *kctx)
+{
+	if (WARN_ON(!kctx))
+		return;
+
+	kctx->csf.cpu_queue.buffer = NULL;
+	kctx->csf.cpu_queue.buffer_size = 0;
+	atomic_set(&kctx->csf.cpu_queue.dump_req_status, BASE_CSF_CPU_QUEUE_DUMP_COMPLETE);
+}
+
+bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx,
+				       struct base_csf_notification *req)
+{
+	if (atomic_cmpxchg(&kctx->csf.cpu_queue.dump_req_status, BASE_CSF_CPU_QUEUE_DUMP_ISSUED,
+			   BASE_CSF_CPU_QUEUE_DUMP_PENDING) != BASE_CSF_CPU_QUEUE_DUMP_ISSUED) {
+		return false;
+	}
+
+	req->type = BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP;
+	return true;
+}
+
+bool kbase_csf_cpu_queue_dump_needed(struct kbase_context *kctx)
+{
+	return (atomic_read(&kctx->csf.cpu_queue.dump_req_status) ==
+		BASE_CSF_CPU_QUEUE_DUMP_ISSUED);
+}
+
+int kbase_csf_cpu_queue_dump_buffer(struct kbase_context *kctx, u64 buffer, size_t buf_size)
+{
+	size_t alloc_size = buf_size;
+	char *dump_buffer;
+
+	if (!buffer || !buf_size)
+		return 0;
+
+	if (alloc_size > KBASE_MEM_ALLOC_MAX_SIZE)
+		return -EINVAL;
+
+	alloc_size = (alloc_size + PAGE_SIZE) & ~(PAGE_SIZE - 1);
+	dump_buffer = kzalloc(alloc_size, GFP_KERNEL);
+	if (!dump_buffer)
+		return -ENOMEM;
+
+	WARN_ON(kctx->csf.cpu_queue.buffer != NULL);
+
+	if (copy_from_user(dump_buffer, u64_to_user_ptr(buffer), buf_size)) {
+		kfree(dump_buffer);
+		return -EFAULT;
+	}
+
+	mutex_lock(&kctx->csf.lock);
+
+	kfree(kctx->csf.cpu_queue.buffer);
+
+	if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) == BASE_CSF_CPU_QUEUE_DUMP_PENDING) {
+		kctx->csf.cpu_queue.buffer = dump_buffer;
+		kctx->csf.cpu_queue.buffer_size = buf_size;
+		complete_all(&kctx->csf.cpu_queue.dump_cmp);
+	} else
+		kfree(dump_buffer);
+
+	mutex_unlock(&kctx->csf.lock);
+
+	return 0;
+}
+
+int kbasep_csf_cpu_queue_dump_print(struct kbase_context *kctx, struct kbasep_printer *kbpr)
+{
+	mutex_lock(&kctx->csf.lock);
+	if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) != BASE_CSF_CPU_QUEUE_DUMP_COMPLETE) {
+		kbasep_print(kbpr, "Dump request already started! (try again)\n");
+		mutex_unlock(&kctx->csf.lock);
+		return -EBUSY;
+	}
+
+	atomic_set(&kctx->csf.cpu_queue.dump_req_status, BASE_CSF_CPU_QUEUE_DUMP_ISSUED);
+	init_completion(&kctx->csf.cpu_queue.dump_cmp);
+	kbase_event_wakeup(kctx);
+	mutex_unlock(&kctx->csf.lock);
+
+	kbasep_print(kbpr, "CPU Queues table (version:v" __stringify(
+				   MALI_CSF_CPU_QUEUE_DUMP_VERSION) "):\n");
+
+	wait_for_completion_timeout(&kctx->csf.cpu_queue.dump_cmp, msecs_to_jiffies(3000));
+
+	mutex_lock(&kctx->csf.lock);
+	if (kctx->csf.cpu_queue.buffer) {
+		WARN_ON(atomic_read(&kctx->csf.cpu_queue.dump_req_status) !=
+			BASE_CSF_CPU_QUEUE_DUMP_PENDING);
+
+		/* The CPU queue dump is returned as a single formatted string */
+		kbasep_puts(kbpr, kctx->csf.cpu_queue.buffer);
+		kbasep_puts(kbpr, "\n");
+
+		kfree(kctx->csf.cpu_queue.buffer);
+		kctx->csf.cpu_queue.buffer = NULL;
+		kctx->csf.cpu_queue.buffer_size = 0;
+	} else
+		kbasep_print(kbpr, "Dump error! (time out)\n");
+
+	atomic_set(&kctx->csf.cpu_queue.dump_req_status, BASE_CSF_CPU_QUEUE_DUMP_COMPLETE);
+
+	mutex_unlock(&kctx->csf.lock);
+	return 0;
+}
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_CSF_CPU_QUEUE_H_
+#define _KBASE_CSF_CPU_QUEUE_H_
+
+#include <linux/types.h>
+
+/* Forward declaration */
+struct base_csf_notification;
+struct kbase_context;
+struct kbasep_printer;
+
+#define MALI_CSF_CPU_QUEUE_DUMP_VERSION 0
+
+/* CPU queue dump status */
+/* Dumping is done or no dumping is in progress. */
+#define BASE_CSF_CPU_QUEUE_DUMP_COMPLETE 0
+/* Dumping request is pending. */
+#define BASE_CSF_CPU_QUEUE_DUMP_PENDING 1
+/* Dumping request is issued to Userspace */
+#define BASE_CSF_CPU_QUEUE_DUMP_ISSUED 2
+
+/**
+ * kbase_csf_cpu_queue_init() - Initialise cpu queue handling per context cpu queue(s)
+ *
+ * @kctx: The kbase_context
+ */
+void kbase_csf_cpu_queue_init(struct kbase_context *kctx);
+
+/**
+ * kbase_csf_cpu_queue_read_dump_req() - Read cpu queue dump request event
+ *
+ * @kctx: The kbase_context which cpu queue dumped belongs to.
+ * @req:  Notification with cpu queue dump request.
+ *
+ * Return: true if needs CPU queue dump, or false otherwise.
+ */
+bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx,
+				       struct base_csf_notification *req);
+
+/**
+ * kbase_csf_cpu_queue_dump_needed() - Check the requirement for cpu queue dump
+ *
+ * @kctx: The kbase_context which cpu queue dumped belongs to.
+ *
+ * Return: true if it needs cpu queue dump, or false otherwise.
+ */
+bool kbase_csf_cpu_queue_dump_needed(struct kbase_context *kctx);
+
+/**
+ * kbase_csf_cpu_queue_dump_buffer() - dump buffer containing cpu queue information
+ *
+ * @kctx: The kbase_context which cpu queue dumped belongs to.
+ * @buffer: Buffer containing the cpu queue information.
+ * @buf_size: Buffer size.
+ *
+ * Return: Return 0 for dump successfully, or error code.
+ */
+int kbase_csf_cpu_queue_dump_buffer(struct kbase_context *kctx, u64 buffer, size_t buf_size);
+
+/**
+ * kbasep_csf_cpu_queue_dump_print() - Dump cpu queue information to file
+ *
+ * @kctx: The kbase_context which cpu queue dumped belongs to.
+ * @kbpr: Pointer to printer instance.
+ *
+ * Return: Return 0 for dump successfully, or error code.
+ */
+int kbasep_csf_cpu_queue_dump_print(struct kbase_context *kctx, struct kbasep_printer *kbpr);
+
+#endif /* _KBASE_CSF_CPU_QUEUE_H_ */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -20,24 +20,12 @@
 */

 #include "mali_kbase_csf_cpu_queue_debugfs.h"
-#include <mali_kbase.h>
-#include <linux/seq_file.h>

 #if IS_ENABLED(CONFIG_DEBUG_FS)
-
-bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx,
-					struct base_csf_notification *req)
-{
-	if (atomic_cmpxchg(&kctx->csf.cpu_queue.dump_req_status,
-			   BASE_CSF_CPU_QUEUE_DUMP_ISSUED,
-			   BASE_CSF_CPU_QUEUE_DUMP_PENDING) !=
-		BASE_CSF_CPU_QUEUE_DUMP_ISSUED) {
-		return false;
-	}
-
-	req->type = BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP;
-	return true;
-}
+#include "mali_kbase_csf_cpu_queue.h"
+#include "mali_kbase_csf_util.h"
+#include <mali_kbase.h>
+#include <linux/seq_file.h>

 /**
 * kbasep_csf_cpu_queue_debugfs_show() - Print cpu queue information for per context
@@ -49,45 +37,18 @@ bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx,
 */
 static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data)
 {
-	struct kbase_context *kctx = file->private;
+	struct kbasep_printer *kbpr;
+	struct kbase_context *const kctx = file->private;
+	int ret = -EINVAL;
+	CSTD_UNUSED(data);

-	mutex_lock(&kctx->csf.lock);
-	if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) !=
-				BASE_CSF_CPU_QUEUE_DUMP_COMPLETE) {
-		seq_puts(file, "Dump request already started! (try again)\n");
-		mutex_unlock(&kctx->csf.lock);
-		return -EBUSY;
+	kbpr = kbasep_printer_file_init(file);
+	if (kbpr != NULL) {
+		ret = kbasep_csf_cpu_queue_dump_print(kctx, kbpr);
+		kbasep_printer_term(kbpr);
 	}

-	atomic_set(&kctx->csf.cpu_queue.dump_req_status, BASE_CSF_CPU_QUEUE_DUMP_ISSUED);
-	init_completion(&kctx->csf.cpu_queue.dump_cmp);
-	kbase_event_wakeup(kctx);
-	mutex_unlock(&kctx->csf.lock);
-
-	seq_puts(file,
-		"CPU Queues table (version:v" __stringify(MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION) "):\n");
-
-	wait_for_completion_timeout(&kctx->csf.cpu_queue.dump_cmp,
-			msecs_to_jiffies(3000));
-
-	mutex_lock(&kctx->csf.lock);
-	if (kctx->csf.cpu_queue.buffer) {
-		WARN_ON(atomic_read(&kctx->csf.cpu_queue.dump_req_status) !=
-				    BASE_CSF_CPU_QUEUE_DUMP_PENDING);
-
-		seq_printf(file, "%s\n", kctx->csf.cpu_queue.buffer);
-
-		kfree(kctx->csf.cpu_queue.buffer);
-		kctx->csf.cpu_queue.buffer = NULL;
-		kctx->csf.cpu_queue.buffer_size = 0;
-	} else
-		seq_puts(file, "Dump error! (time out)\n");
-
-	atomic_set(&kctx->csf.cpu_queue.dump_req_status,
-			BASE_CSF_CPU_QUEUE_DUMP_COMPLETE);
-
-	mutex_unlock(&kctx->csf.lock);
-	return 0;
+	return ret;
 }

 static int kbasep_csf_cpu_queue_debugfs_open(struct inode *in, struct file *file)
@@ -109,66 +70,14 @@ void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx)
 	if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry)))
 		return;

-	file = debugfs_create_file("cpu_queue", 0444, kctx->kctx_dentry,
-			kctx, &kbasep_csf_cpu_queue_debugfs_fops);
+	file = debugfs_create_file("cpu_queue", 0444, kctx->kctx_dentry, kctx,
+				   &kbasep_csf_cpu_queue_debugfs_fops);

 	if (IS_ERR_OR_NULL(file)) {
-		dev_warn(kctx->kbdev->dev,
-				"Unable to create cpu queue debugfs entry");
+		dev_warn(kctx->kbdev->dev, "Unable to create cpu queue debugfs entry");
 	}
-
-	kctx->csf.cpu_queue.buffer = NULL;
-	kctx->csf.cpu_queue.buffer_size = 0;
-	atomic_set(&kctx->csf.cpu_queue.dump_req_status,
-		   BASE_CSF_CPU_QUEUE_DUMP_COMPLETE);
 }

-int kbase_csf_cpu_queue_dump(struct kbase_context *kctx,
-		u64 buffer, size_t buf_size)
-{
-	int err = 0;
-
-	size_t alloc_size = buf_size;
-	char *dump_buffer;
-
-	if (!buffer || !alloc_size)
-		goto done;
-
-	alloc_size = (alloc_size + PAGE_SIZE) & ~(PAGE_SIZE - 1);
-	dump_buffer = kzalloc(alloc_size, GFP_KERNEL);
-	if (ZERO_OR_NULL_PTR(dump_buffer)) {
-		err = -ENOMEM;
-		goto done;
-	}
-
-	WARN_ON(kctx->csf.cpu_queue.buffer != NULL);
-
-	err = copy_from_user(dump_buffer,
-			u64_to_user_ptr(buffer),
-			buf_size);
-	if (err) {
-		kfree(dump_buffer);
-		err = -EFAULT;
-		goto done;
-	}
-
-	mutex_lock(&kctx->csf.lock);
-
-	kfree(kctx->csf.cpu_queue.buffer);
-
-	if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) ==
-			BASE_CSF_CPU_QUEUE_DUMP_PENDING) {
-		kctx->csf.cpu_queue.buffer = dump_buffer;
-		kctx->csf.cpu_queue.buffer_size = buf_size;
-		complete_all(&kctx->csf.cpu_queue.dump_cmp);
-	} else {
-		kfree(dump_buffer);
-	}
-
-	mutex_unlock(&kctx->csf.lock);
-done:
-	return err;
-}
 #else
 /*
 * Stub functions for when debugfs is disabled
@@ -177,15 +86,4 @@ void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx)
 {
 }

-bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx,
-					struct base_csf_notification *req)
-{
-	return false;
-}
-
-int kbase_csf_cpu_queue_dump(struct kbase_context *kctx,
-			u64 buffer, size_t buf_size)
-{
-	return 0;
-}
 #endif /* CONFIG_DEBUG_FS */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -22,24 +22,8 @@
 #ifndef _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_
 #define _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_

-#include <asm/atomic.h>
-#include <linux/types.h>
-
-#include "mali_kbase.h"
-
 /* Forward declaration */
-struct base_csf_notification;
-
-#define MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION 0
-
-/* CPU queue dump status */
-/* Dumping is done or no dumping is in progress. */
-#define BASE_CSF_CPU_QUEUE_DUMP_COMPLETE	0
-/* Dumping request is pending. */
-#define BASE_CSF_CPU_QUEUE_DUMP_PENDING		1
-/* Dumping request is issued to Userspace */
-#define BASE_CSF_CPU_QUEUE_DUMP_ISSUED		2
-
+struct kbase_context;

 /**
 * kbase_csf_cpu_queue_debugfs_init() - Create a debugfs entry for per context cpu queue(s)
@@ -48,43 +32,4 @@ struct base_csf_notification;
 */
 void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx);

-/**
- * kbase_csf_cpu_queue_read_dump_req - Read cpu queue dump request event
- *
- * @kctx: The kbase_context which cpu queue dumpped belongs to
- * @req:  Notification with cpu queue dump request.
- *
- * Return: true if needs CPU queue dump, or false otherwise.
- */
-bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx,
-					struct base_csf_notification *req);
-
-/**
- * kbase_csf_cpu_queue_dump_needed - Check the requirement for cpu queue dump
- *
- * @kctx: The kbase_context which cpu queue dumpped belongs to
- *
- * Return: true if it needs cpu queue dump, or false otherwise.
- */
-static inline bool kbase_csf_cpu_queue_dump_needed(struct kbase_context *kctx)
-{
-#if IS_ENABLED(CONFIG_DEBUG_FS)
-	return (atomic_read(&kctx->csf.cpu_queue.dump_req_status) ==
-		BASE_CSF_CPU_QUEUE_DUMP_ISSUED);
-#else
-	return false;
-#endif
-}
-
-/**
- * kbase_csf_cpu_queue_dump - dump buffer containing cpu queue information to debugfs
- *
- * @kctx: The kbase_context which cpu queue dumpped belongs to
- * @buffer: Buffer containing the cpu queue information.
- * @buf_size: Buffer size.
- *
- * Return: Return 0 for dump successfully, or error code.
- */
-int kbase_csf_cpu_queue_dump(struct kbase_context *kctx,
-		u64 buffer, size_t buf_size);
 #endif /* _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_ */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg.c
@@ -0,0 +1,648 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase_csf_csg.h"
+#include "mali_kbase_csf_scheduler.h"
+#include "mali_kbase_csf_util.h"
+#include <mali_kbase.h>
+#include <linux/delay.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/* Wait time to be used cumulatively for all the CSG slots.
+ * Since scheduler lock is held when STATUS_UPDATE request is sent, there won't be
+ * any other Host request pending on the FW side and usually FW would be responsive
+ * to the Doorbell IRQs as it won't do any polling for a long time and also it won't
+ * have to wait for any HW state transition to complete for publishing the status.
+ * So it is reasonable to expect that handling of STATUS_UPDATE request would be
+ * relatively very quick.
+ */
+#define STATUS_UPDATE_WAIT_TIMEOUT_NS 500
+
+/* Number of nearby commands around the "cmd_ptr" of GPU queues.
+ *
+ *     [cmd_ptr - MAX_NR_NEARBY_INSTR, cmd_ptr + MAX_NR_NEARBY_INSTR].
+ */
+#define MAX_NR_NEARBY_INSTR 32
+
+/* The bitmask of CSG slots for which the STATUS_UPDATE request completed.
+ * The access to it is serialized with scheduler lock, so at a time it would
+ * get used either for "active_groups" or per context "groups".
+ */
+static DECLARE_BITMAP(csg_slots_status_updated, MAX_SUPPORTED_CSGS);
+
+/* String header for dumping cs user I/O status information */
+#define KBASEP_CSF_CSG_DUMP_CS_HEADER_USER_IO \
+	"Bind Idx,     Ringbuf addr,     Size, Prio,    Insert offset,   Extract offset, Active, Doorbell\n"
+
+/* String representation of WAITING */
+#define WAITING "Waiting"
+
+/* String representation of NOT_WAITING */
+#define NOT_WAITING "Not waiting"
+
+/**
+ * csg_slot_status_update_finish() - Complete STATUS_UPDATE request for a group slot.
+ *
+ * @kbdev:  Pointer to kbase device.
+ * @csg_nr: The group slot number.
+ *
+ * Return: Non-zero if not complete, otherwise zero.
+ */
+static bool csg_slot_status_update_finish(struct kbase_device *kbdev, u32 csg_nr)
+{
+	struct kbase_csf_cmd_stream_group_info const *const ginfo =
+		&kbdev->csf.global_iface.groups[csg_nr];
+
+	return !((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^
+		  kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) &
+		 CSG_REQ_STATUS_UPDATE_MASK);
+}
+
+/**
+ * csg_slots_status_update_finish() - Complete STATUS_UPDATE requests for all group slots.
+ *
+ * @kbdev:      Pointer to kbase device.
+ * @slots_mask: The group slots mask.
+ *
+ * Return: Non-zero if not complete, otherwise zero.
+ */
+static bool csg_slots_status_update_finish(struct kbase_device *kbdev,
+					   const unsigned long *slots_mask)
+{
+	const u32 max_csg_slots = kbdev->csf.global_iface.group_num;
+	bool changed = false;
+	u32 csg_nr;
+
+	lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+	for_each_set_bit(csg_nr, slots_mask, max_csg_slots) {
+		if (csg_slot_status_update_finish(kbdev, csg_nr)) {
+			set_bit(csg_nr, csg_slots_status_updated);
+			changed = true;
+		}
+	}
+
+	return changed;
+}
+
+/**
+ * wait_csg_slots_status_update_finish() - Wait completion of STATUS_UPDATE requests for all
+ *                                         group slots.
+ *
+ * @kbdev:  Pointer to kbase device.
+ * @slots_mask: The group slots mask.
+ */
+static void wait_csg_slots_status_update_finish(struct kbase_device *kbdev,
+						unsigned long *slots_mask)
+{
+	const u32 max_csg_slots = kbdev->csf.global_iface.group_num;
+	long remaining = kbase_csf_timeout_in_jiffies(STATUS_UPDATE_WAIT_TIMEOUT_NS);
+
+	lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+	bitmap_zero(csg_slots_status_updated, max_csg_slots);
+
+	while (!bitmap_empty(slots_mask, max_csg_slots) && remaining) {
+		remaining = wait_event_timeout(kbdev->csf.event_wait,
+					       csg_slots_status_update_finish(kbdev, slots_mask),
+					       remaining);
+		if (likely(remaining)) {
+			bitmap_andnot(slots_mask, slots_mask, csg_slots_status_updated,
+				      max_csg_slots);
+		} else {
+			dev_warn(kbdev->dev, "STATUS_UPDATE request timed out for slots 0x%lx",
+				 slots_mask[0]);
+		}
+	}
+}
+
+/**
+ * blocked_reason_to_string() - Convert blocking reason id to a string
+ *
+ * @reason_id: blocked_reason
+ *
+ * Return: Suitable string
+ */
+static const char *blocked_reason_to_string(u32 reason_id)
+{
+	/* possible blocking reasons of a cs */
+	static const char *const cs_blocked_reason[] = {
+		[CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED] = "UNBLOCKED",
+		[CS_STATUS_BLOCKED_REASON_REASON_WAIT] = "WAIT",
+		[CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT] = "PROGRESS_WAIT",
+		[CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT] = "SYNC_WAIT",
+		[CS_STATUS_BLOCKED_REASON_REASON_DEFERRED] = "DEFERRED",
+		[CS_STATUS_BLOCKED_REASON_REASON_RESOURCE] = "RESOURCE",
+		[CS_STATUS_BLOCKED_REASON_REASON_FLUSH] = "FLUSH"
+	};
+
+	if (WARN_ON(reason_id >= ARRAY_SIZE(cs_blocked_reason)))
+		return "UNKNOWN_BLOCKED_REASON_ID";
+
+	return cs_blocked_reason[reason_id];
+}
+
+/**
+ * sb_source_supported() - Check SB_SOURCE GLB version support
+ *
+ * @glb_version:  The GLB version
+ *
+ * Return: False or true on success.
+ */
+static bool sb_source_supported(u32 glb_version)
+{
+	bool supported = false;
+
+	if (((GLB_VERSION_MAJOR_GET(glb_version) == 3) &&
+	     (GLB_VERSION_MINOR_GET(glb_version) >= 5)) ||
+	    ((GLB_VERSION_MAJOR_GET(glb_version) == 2) &&
+	     (GLB_VERSION_MINOR_GET(glb_version) >= 6)) ||
+	    ((GLB_VERSION_MAJOR_GET(glb_version) == 1) &&
+	     (GLB_VERSION_MINOR_GET(glb_version) >= 3)))
+		supported = true;
+
+	return supported;
+}
+
+
+/**
+ * kbasep_csf_csg_active_dump_cs_status_wait() - Dump active queue sync status information.
+ *
+ * @kctx:                 Pointer to kbase context.
+ * @kbpr:                 Pointer to printer instance.
+ * @glb_version:          The GLB version.
+ * @wait_status:          The CS_STATUS_WAIT value.
+ * @wait_sync_value:      The queue's cached sync value.
+ * @wait_sync_live_value: The queue's sync object current value.
+ * @wait_sync_pointer:    The queue's sync object pointer.
+ * @sb_status:            The CS_STATUS_SCOREBOARDS value.
+ * @blocked_reason:       The CS_STATUS_BLCOKED_REASON value.
+ */
+static void kbasep_csf_csg_active_dump_cs_status_wait(struct kbase_context *kctx,
+						      struct kbasep_printer *kbpr, u32 glb_version,
+						      u32 wait_status, u32 wait_sync_value,
+						      u64 wait_sync_live_value,
+						      u64 wait_sync_pointer, u32 sb_status,
+						      u32 blocked_reason)
+{
+	kbasep_print(kbpr, "SB_MASK: %d\n", CS_STATUS_WAIT_SB_MASK_GET(wait_status));
+	if (sb_source_supported(glb_version))
+		kbasep_print(kbpr, "SB_SOURCE: %d\n", CS_STATUS_WAIT_SB_SOURCE_GET(wait_status));
+
+	{
+		kbasep_print(kbpr, "PROGRESS_WAIT: %s\n",
+			     CS_STATUS_WAIT_PROGRESS_WAIT_GET(wait_status) ? WAITING : NOT_WAITING);
+	}
+	kbasep_print(kbpr, "PROTM_PEND: %s\n",
+		     CS_STATUS_WAIT_PROTM_PEND_GET(wait_status) ? WAITING : NOT_WAITING);
+	kbasep_print(kbpr, "SYNC_WAIT: %s\n",
+		     CS_STATUS_WAIT_SYNC_WAIT_GET(wait_status) ? WAITING : NOT_WAITING);
+	kbasep_print(kbpr, "WAIT_CONDITION: %s\n",
+		     CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(wait_status) ? "greater than" :
+										 "less or equal");
+	kbasep_print(kbpr, "SYNC_POINTER: 0x%llx\n", wait_sync_pointer);
+	kbasep_print(kbpr, "SYNC_VALUE: %d\n", wait_sync_value);
+	kbasep_print(kbpr, "SYNC_LIVE_VALUE: 0x%016llx\n", wait_sync_live_value);
+	kbasep_print(kbpr, "SB_STATUS: %u\n", CS_STATUS_SCOREBOARDS_NONZERO_GET(sb_status));
+	kbasep_print(kbpr, "BLOCKED_REASON: %s\n",
+		     blocked_reason_to_string(CS_STATUS_BLOCKED_REASON_REASON_GET(blocked_reason)));
+}
+
+/**
+ * kbasep_csf_csg_active_dump_cs_trace() - Dump active queue CS trace information.
+ *
+ * @kctx:   Pointer to kbase context.
+ * @kbpr:   Pointer to printer instance.
+ * @stream: Pointer to command stream information.
+ */
+static void
+kbasep_csf_csg_active_dump_cs_trace(struct kbase_context *kctx, struct kbasep_printer *kbpr,
+				    struct kbase_csf_cmd_stream_info const *const stream)
+{
+	u32 val = kbase_csf_firmware_cs_input_read(stream, CS_INSTR_BUFFER_BASE_LO);
+	u64 addr = ((u64)kbase_csf_firmware_cs_input_read(stream, CS_INSTR_BUFFER_BASE_HI) << 32) |
+		   val;
+	val = kbase_csf_firmware_cs_input_read(stream, CS_INSTR_BUFFER_SIZE);
+
+	kbasep_print(kbpr, "CS_TRACE_BUF_ADDR: 0x%16llx, SIZE: %u\n", addr, val);
+
+	/* Write offset variable address (pointer) */
+	val = kbase_csf_firmware_cs_input_read(stream, CS_INSTR_BUFFER_OFFSET_POINTER_LO);
+	addr = ((u64)kbase_csf_firmware_cs_input_read(stream, CS_INSTR_BUFFER_OFFSET_POINTER_HI)
+		<< 32) |
+	       val;
+	kbasep_print(kbpr, "CS_TRACE_BUF_OFFSET_PTR: 0x%16llx\n", addr);
+
+	/* EVENT_SIZE and EVENT_STATEs */
+	val = kbase_csf_firmware_cs_input_read(stream, CS_INSTR_CONFIG);
+	kbasep_print(kbpr, "TRACE_EVENT_SIZE: 0x%x, TRACE_EVENT_STATES 0x%x\n",
+		     CS_INSTR_CONFIG_EVENT_SIZE_GET(val), CS_INSTR_CONFIG_EVENT_STATE_GET(val));
+}
+
+/**
+ * kbasep_csf_read_cmdbuff_value() - Read a command from a queue offset.
+ *
+ * @queue:          Address of a GPU command queue to examine.
+ * @cmdbuff_offset: GPU address offset in queue's memory buffer.
+ *
+ * Return: Encoded CSF command (64-bit)
+ */
+static u64 kbasep_csf_read_cmdbuff_value(struct kbase_queue *queue, u32 cmdbuff_offset)
+{
+	u64 page_off = cmdbuff_offset >> PAGE_SHIFT;
+	u64 offset_within_page = cmdbuff_offset & ~PAGE_MASK;
+	struct page *page = as_page(queue->queue_reg->gpu_alloc->pages[page_off]);
+	u64 *cmdbuff = vmap(&page, 1, VM_MAP, pgprot_noncached(PAGE_KERNEL));
+	u64 value;
+
+	if (!cmdbuff) {
+		struct kbase_context *kctx = queue->kctx;
+
+		dev_info(kctx->kbdev->dev, "%s failed to map the buffer page for read a command!",
+			 __func__);
+		/* Return an alternative 0 for dumping operation*/
+		value = 0;
+	} else {
+		value = cmdbuff[offset_within_page / sizeof(u64)];
+		vunmap(cmdbuff);
+	}
+
+	return value;
+}
+
+/**
+ * kbasep_csf_csg_active_dump_cs_status_cmd_ptr() - Dump CMD_PTR information and nearby commands.
+ *
+ * @kbpr:    Pointer to printer instance.
+ * @queue:   Address of a GPU command queue to examine.
+ * @cmd_ptr: CMD_PTR address.
+ */
+static void kbasep_csf_csg_active_dump_cs_status_cmd_ptr(struct kbasep_printer *kbpr,
+							 struct kbase_queue *queue, u64 cmd_ptr)
+{
+	u64 cmd_ptr_offset;
+	u64 cursor, end_cursor, instr;
+	u32 nr_nearby_instr_size;
+	struct kbase_va_region *reg;
+
+	kbase_gpu_vm_lock(queue->kctx);
+	reg = kbase_region_tracker_find_region_enclosing_address(queue->kctx, cmd_ptr);
+	if (reg && !(reg->flags & KBASE_REG_FREE) && (reg->flags & KBASE_REG_CPU_RD) &&
+	    (reg->gpu_alloc->type == KBASE_MEM_TYPE_NATIVE)) {
+		kbasep_print(kbpr, "CMD_PTR region nr_pages: %zu\n", reg->nr_pages);
+		nr_nearby_instr_size = MAX_NR_NEARBY_INSTR * sizeof(u64);
+		cmd_ptr_offset = cmd_ptr - queue->base_addr;
+		cursor = (cmd_ptr_offset > nr_nearby_instr_size) ?
+				       cmd_ptr_offset - nr_nearby_instr_size :
+				       0;
+		end_cursor = cmd_ptr_offset + nr_nearby_instr_size;
+		if (end_cursor > queue->size)
+			end_cursor = queue->size;
+		kbasep_print(kbpr,
+			     "queue:GPU-%u-%u-%u at:0x%.16llx cmd_ptr:0x%.16llx "
+			     "dump_begin:0x%.16llx dump_end:0x%.16llx\n",
+			     queue->kctx->id, queue->group->handle, queue->csi_index,
+			     (queue->base_addr + cursor), cmd_ptr, (queue->base_addr + cursor),
+			     (queue->base_addr + end_cursor));
+		while ((cursor < end_cursor)) {
+			instr = kbasep_csf_read_cmdbuff_value(queue, (u32)cursor);
+			if (instr != 0)
+				kbasep_print(kbpr,
+					     "queue:GPU-%u-%u-%u at:0x%.16llx cmd:0x%.16llx\n",
+					     queue->kctx->id, queue->group->handle,
+					     queue->csi_index, (queue->base_addr + cursor), instr);
+			cursor += sizeof(u64);
+		}
+	}
+	kbase_gpu_vm_unlock(queue->kctx);
+}
+
+/**
+ * kbasep_csf_csg_active_dump_queue() - Dump GPU command queue debug information.
+ *
+ * @kbpr:  Pointer to printer instance.
+ * @queue: Address of a GPU command queue to examine
+ */
+static void kbasep_csf_csg_active_dump_queue(struct kbasep_printer *kbpr, struct kbase_queue *queue)
+{
+	u64 *addr;
+	u32 *addr32;
+	u64 cs_extract;
+	u64 cs_insert;
+	u32 cs_active;
+	u64 wait_sync_pointer;
+	u32 wait_status, wait_sync_value;
+	u32 sb_status;
+	u32 blocked_reason;
+	struct kbase_vmap_struct *mapping;
+	u64 *evt;
+	u64 wait_sync_live_value;
+	u32 glb_version;
+	u64 cmd_ptr;
+
+	if (!queue)
+		return;
+
+	glb_version = queue->kctx->kbdev->csf.global_iface.version;
+
+	if (WARN_ON(queue->csi_index == KBASEP_IF_NR_INVALID || !queue->group))
+		return;
+
+	addr = queue->user_io_addr;
+	cs_insert = addr[CS_INSERT_LO / sizeof(*addr)];
+
+	addr = queue->user_io_addr + PAGE_SIZE / sizeof(*addr);
+	cs_extract = addr[CS_EXTRACT_LO / sizeof(*addr)];
+
+	addr32 = (u32 *)(queue->user_io_addr + PAGE_SIZE / sizeof(*addr));
+	cs_active = addr32[CS_ACTIVE / sizeof(*addr32)];
+
+	kbasep_puts(kbpr, KBASEP_CSF_CSG_DUMP_CS_HEADER_USER_IO);
+	kbasep_print(kbpr, "%8d, %16llx, %8x, %4u, %16llx, %16llx, %6u, %8d\n", queue->csi_index,
+		     queue->base_addr, queue->size, queue->priority, cs_insert, cs_extract,
+		     cs_active, queue->doorbell_nr);
+
+	/* Print status information for blocked group waiting for sync object. For on-slot queues,
+	 * if cs_trace is enabled, dump the interface's cs_trace configuration.
+	 */
+	if (kbase_csf_scheduler_group_get_slot(queue->group) < 0) {
+		kbasep_print(kbpr, "SAVED_CMD_PTR: 0x%llx\n", queue->saved_cmd_ptr);
+		if (CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) {
+			wait_status = queue->status_wait;
+			wait_sync_value = queue->sync_value;
+			wait_sync_pointer = queue->sync_ptr;
+			sb_status = queue->sb_status;
+			blocked_reason = queue->blocked_reason;
+
+			evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, wait_sync_pointer,
+								 &mapping);
+			if (evt) {
+				wait_sync_live_value = evt[0];
+				kbase_phy_alloc_mapping_put(queue->kctx, mapping);
+			} else {
+				wait_sync_live_value = U64_MAX;
+			}
+
+			kbasep_csf_csg_active_dump_cs_status_wait(
+				queue->kctx, kbpr, glb_version, wait_status, wait_sync_value,
+				wait_sync_live_value, wait_sync_pointer, sb_status, blocked_reason);
+		}
+		kbasep_csf_csg_active_dump_cs_status_cmd_ptr(kbpr, queue, queue->saved_cmd_ptr);
+	} else {
+		struct kbase_device const *const kbdev = queue->group->kctx->kbdev;
+		struct kbase_csf_cmd_stream_group_info const *const ginfo =
+			&kbdev->csf.global_iface.groups[queue->group->csg_nr];
+		struct kbase_csf_cmd_stream_info const *const stream =
+			&ginfo->streams[queue->csi_index];
+		u32 req_res;
+
+		if (WARN_ON(!stream))
+			return;
+
+		cmd_ptr = kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_LO);
+		cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_HI) << 32;
+		req_res = kbase_csf_firmware_cs_output(stream, CS_STATUS_REQ_RESOURCE);
+
+		kbasep_print(kbpr, "CMD_PTR: 0x%llx\n", cmd_ptr);
+		kbasep_print(kbpr, "REQ_RESOURCE [COMPUTE]: %d\n",
+			     CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_GET(req_res));
+		kbasep_print(kbpr, "REQ_RESOURCE [FRAGMENT]: %d\n",
+			     CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_GET(req_res));
+		kbasep_print(kbpr, "REQ_RESOURCE [TILER]: %d\n",
+			     CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_GET(req_res));
+		kbasep_print(kbpr, "REQ_RESOURCE [IDVS]: %d\n",
+			     CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_GET(req_res));
+
+		wait_status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT);
+		wait_sync_value = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT_SYNC_VALUE);
+		wait_sync_pointer =
+			kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT_SYNC_POINTER_LO);
+		wait_sync_pointer |=
+			(u64)kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT_SYNC_POINTER_HI)
+			<< 32;
+
+		sb_status = kbase_csf_firmware_cs_output(stream, CS_STATUS_SCOREBOARDS);
+		blocked_reason = kbase_csf_firmware_cs_output(stream, CS_STATUS_BLOCKED_REASON);
+
+		evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, wait_sync_pointer, &mapping);
+		if (evt) {
+			wait_sync_live_value = evt[0];
+			kbase_phy_alloc_mapping_put(queue->kctx, mapping);
+		} else {
+			wait_sync_live_value = U64_MAX;
+		}
+
+		kbasep_csf_csg_active_dump_cs_status_wait(queue->kctx, kbpr, glb_version,
+							  wait_status, wait_sync_value,
+							  wait_sync_live_value, wait_sync_pointer,
+							  sb_status, blocked_reason);
+		/* Dealing with cs_trace */
+		if (kbase_csf_scheduler_queue_has_trace(queue))
+			kbasep_csf_csg_active_dump_cs_trace(queue->kctx, kbpr, stream);
+		else
+			kbasep_print(kbpr, "NO CS_TRACE\n");
+		kbasep_csf_csg_active_dump_cs_status_cmd_ptr(kbpr, queue, cmd_ptr);
+	}
+}
+
+/**
+ * kbasep_csf_csg_active_dump_group() - Dump an active group.
+ *
+ * @kbpr:  Pointer to printer instance.
+ * @group: GPU group.
+ */
+static void kbasep_csf_csg_active_dump_group(struct kbasep_printer *kbpr,
+					     struct kbase_queue_group *const group)
+{
+	if (kbase_csf_scheduler_group_get_slot(group) >= 0) {
+		struct kbase_device *const kbdev = group->kctx->kbdev;
+		u32 ep_c, ep_r;
+		char exclusive;
+		char idle = 'N';
+		struct kbase_csf_cmd_stream_group_info const *const ginfo =
+			&kbdev->csf.global_iface.groups[group->csg_nr];
+		u8 slot_priority = kbdev->csf.scheduler.csg_slots[group->csg_nr].priority;
+
+		ep_c = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_EP_CURRENT);
+		ep_r = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_EP_REQ);
+
+		if (CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_GET(ep_r))
+			exclusive = 'C';
+		else if (CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_GET(ep_r))
+			exclusive = 'F';
+		else
+			exclusive = '0';
+
+		if (kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
+		    CSG_STATUS_STATE_IDLE_MASK)
+			idle = 'Y';
+
+		if (!test_bit(group->csg_nr, csg_slots_status_updated)) {
+			kbasep_print(kbpr, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n",
+				     group->csg_nr);
+			kbasep_print(kbpr, "*** The following group-record is likely stale\n");
+		}
+			kbasep_print(
+				kbpr,
+				"GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req),"
+				" F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n");
+			kbasep_print(
+				kbpr,
+				"%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n",
+				group->handle, group->csg_nr, slot_priority, group->run_state,
+				group->priority, CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(ep_c),
+				CSG_STATUS_EP_REQ_COMPUTE_EP_GET(ep_r),
+				CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(ep_c),
+				CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r),
+				CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c),
+				CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r), exclusive, idle);
+
+	} else {
+		kbasep_print(kbpr, "GroupID, CSG NR, Run State, Priority\n");
+		kbasep_print(kbpr, "%7d, %6d, %9d, %8d\n", group->handle, group->csg_nr,
+			     group->run_state, group->priority);
+	}
+
+	if (group->run_state != KBASE_CSF_GROUP_TERMINATED) {
+		unsigned int i;
+
+		kbasep_print(kbpr, "Bound queues:\n");
+
+		for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++)
+			kbasep_csf_csg_active_dump_queue(kbpr, group->bound_queues[i]);
+	}
+}
+
+void kbase_csf_csg_update_status(struct kbase_device *kbdev)
+{
+	u32 max_csg_slots = kbdev->csf.global_iface.group_num;
+	DECLARE_BITMAP(used_csgs, MAX_SUPPORTED_CSGS) = { 0 };
+	u32 csg_nr;
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+	/* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell
+	 * ring for Extract offset update, shall not be made when MCU has been
+	 * put to sleep otherwise it will undesirably make MCU exit the sleep
+	 * state. Also it isn't really needed as FW will implicitly update the
+	 * status of all on-slot groups when MCU sleep request is sent to it.
+	 */
+	if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
+		/* Wait for the MCU sleep request to complete. */
+		kbase_pm_wait_for_desired_state(kbdev);
+		bitmap_copy(csg_slots_status_updated, kbdev->csf.scheduler.csg_inuse_bitmap,
+			    max_csg_slots);
+		return;
+	}
+
+	for (csg_nr = 0; csg_nr < max_csg_slots; csg_nr++) {
+		struct kbase_queue_group *const group =
+			kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
+		if (!group)
+			continue;
+		/* Ring the User doorbell for FW to update the Extract offset */
+		kbase_csf_ring_doorbell(kbdev, group->doorbell_nr);
+		set_bit(csg_nr, used_csgs);
+	}
+
+	/* Return early if there are no on-slot groups */
+	if (bitmap_empty(used_csgs, max_csg_slots))
+		return;
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	for_each_set_bit(csg_nr, used_csgs, max_csg_slots) {
+		struct kbase_csf_cmd_stream_group_info const *const ginfo =
+			&kbdev->csf.global_iface.groups[csg_nr];
+		kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ,
+						  ~kbase_csf_firmware_csg_output(ginfo, CSG_ACK),
+						  CSG_REQ_STATUS_UPDATE_MASK);
+	}
+
+	BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(used_csgs[0]) * BITS_PER_BYTE));
+	kbase_csf_ring_csg_slots_doorbell(kbdev, used_csgs[0]);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	wait_csg_slots_status_update_finish(kbdev, used_csgs);
+	/* Wait for the user doorbell ring to take effect */
+	msleep(100);
+}
+
+int kbasep_csf_csg_dump_print(struct kbase_context *const kctx, struct kbasep_printer *kbpr)
+{
+	u32 gr;
+	struct kbase_device *kbdev;
+
+	if (WARN_ON(!kctx))
+		return -EINVAL;
+
+	kbdev = kctx->kbdev;
+
+	kbasep_print(kbpr,
+		     "CSF groups status (version: v" __stringify(MALI_CSF_CSG_DUMP_VERSION) "):\n");
+
+	mutex_lock(&kctx->csf.lock);
+	kbase_csf_scheduler_lock(kbdev);
+	kbase_csf_csg_update_status(kbdev);
+	kbasep_print(kbpr, "Ctx %d_%d\n", kctx->tgid, kctx->id);
+	for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) {
+		struct kbase_queue_group *const group = kctx->csf.queue_groups[gr];
+
+		if (!group)
+			continue;
+
+		kbasep_csf_csg_active_dump_group(kbpr, group);
+	}
+	kbase_csf_scheduler_unlock(kbdev);
+	mutex_unlock(&kctx->csf.lock);
+
+	return 0;
+}
+
+int kbasep_csf_csg_active_dump_print(struct kbase_device *kbdev, struct kbasep_printer *kbpr)
+{
+	u32 csg_nr;
+	u32 num_groups;
+
+	if (WARN_ON(!kbdev))
+		return -EINVAL;
+
+	num_groups = kbdev->csf.global_iface.group_num;
+
+	kbasep_print(kbpr, "CSF active groups status (version: v" __stringify(
+				   MALI_CSF_CSG_DUMP_VERSION) "):\n");
+
+	kbase_csf_scheduler_lock(kbdev);
+	kbase_csf_csg_update_status(kbdev);
+	for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
+		struct kbase_queue_group *const group =
+			kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
+
+		if (!group)
+			continue;
+
+		kbasep_print(kbpr, "Ctx %d_%d\n", group->kctx->tgid, group->kctx->id);
+
+		kbasep_csf_csg_active_dump_group(kbpr, group);
+	}
+	kbase_csf_scheduler_unlock(kbdev);
+
+	return 0;
+}
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_CSF_CSG_H_
+#define _KBASE_CSF_CSG_H_
+
+/* Forward declaration */
+struct kbase_context;
+struct kbase_device;
+struct kbasep_printer;
+
+#define MALI_CSF_CSG_DUMP_VERSION 0
+
+/**
+ * kbase_csf_csg_update_status() - Update on-slot gpu group statuses
+ *
+ * @kbdev: Pointer to the device.
+ */
+void kbase_csf_csg_update_status(struct kbase_device *kbdev);
+
+/**
+ * kbasep_csf_csg_dump_print() - Dump all gpu groups information to file
+ *
+ * @kctx: The kbase_context which gpu group dumped belongs to.
+ * @kbpr: Pointer to printer instance.
+ *
+ * Return: Return 0 for dump successfully, or error code.
+ */
+int kbasep_csf_csg_dump_print(struct kbase_context *const kctx, struct kbasep_printer *kbpr);
+
+/**
+ * kbasep_csf_csg_active_dump_print() - Dump on-slot gpu groups information to file
+ *
+ * @kbdev: Pointer to the device.
+ * @kbpr: Pointer to printer instance.
+ *
+ * Return: Return 0 for dump successfully, or error code.
+ */
+int kbasep_csf_csg_active_dump_print(struct kbase_device *kbdev, struct kbasep_printer *kbpr);
+
+#endif /* _KBASE_CSF_CSG_H_ */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -20,143 +20,26 @@
 */

 #include "mali_kbase_csf_csg_debugfs.h"
-#include <mali_kbase.h>
-#include <linux/seq_file.h>
-#include <linux/delay.h>
-#include <backend/gpu/mali_kbase_pm_internal.h>

 #if IS_ENABLED(CONFIG_DEBUG_FS)
+#include "mali_kbase_csf_csg.h"
 #include "mali_kbase_csf_tl_reader.h"
-
-/* Wait time to be used cumulatively for all the CSG slots.
- * Since scheduler lock is held when STATUS_UPDATE request is sent, there won't be
- * any other Host request pending on the FW side and usually FW would be responsive
- * to the Doorbell IRQs as it won't do any polling for a long time and also it won't
- * have to wait for any HW state transition to complete for publishing the status.
- * So it is reasonable to expect that handling of STATUS_UPDATE request would be
- * relatively very quick.
- */
-#define STATUS_UPDATE_WAIT_TIMEOUT 500
-
-/* The bitmask of CSG slots for which the STATUS_UPDATE request completed.
- * The access to it is serialized with scheduler lock, so at a time it would
- * get used either for "active_groups" or per context "groups" debugfs file.
- */
-static DECLARE_BITMAP(csg_slots_status_updated, MAX_SUPPORTED_CSGS);
-
-static
-bool csg_slot_status_update_finish(struct kbase_device *kbdev, u32 csg_nr)
-{
-	struct kbase_csf_cmd_stream_group_info const *const ginfo =
-		&kbdev->csf.global_iface.groups[csg_nr];
-
-	return !((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^
-		  kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) &
-			     CSG_REQ_STATUS_UPDATE_MASK);
-}
-
-static
-bool csg_slots_status_update_finish(struct kbase_device *kbdev,
-		const unsigned long *slots_mask)
-{
-	const u32 max_csg_slots = kbdev->csf.global_iface.group_num;
-	bool changed = false;
-	u32 csg_nr;
-
-	lockdep_assert_held(&kbdev->csf.scheduler.lock);
-
-	for_each_set_bit(csg_nr, slots_mask, max_csg_slots) {
-		if (csg_slot_status_update_finish(kbdev, csg_nr)) {
-			set_bit(csg_nr, csg_slots_status_updated);
-			changed = true;
-		}
-	}
-
-	return changed;
-}
-
-static void wait_csg_slots_status_update_finish(struct kbase_device *kbdev,
-		unsigned long *slots_mask)
-{
-	const u32 max_csg_slots = kbdev->csf.global_iface.group_num;
-	long remaining = kbase_csf_timeout_in_jiffies(STATUS_UPDATE_WAIT_TIMEOUT);
-
-	lockdep_assert_held(&kbdev->csf.scheduler.lock);
-
-	bitmap_zero(csg_slots_status_updated, max_csg_slots);
-
-	while (!bitmap_empty(slots_mask, max_csg_slots) && remaining) {
-		remaining = wait_event_timeout(kbdev->csf.event_wait,
-				csg_slots_status_update_finish(kbdev, slots_mask),
-				remaining);
-		if (likely(remaining)) {
-			bitmap_andnot(slots_mask, slots_mask,
-				csg_slots_status_updated, max_csg_slots);
-		} else {
-			dev_warn(kbdev->dev,
-				 "STATUS_UPDATE request timed out for slots 0x%lx",
-				 slots_mask[0]);
-		}
-	}
-}
-
-void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev)
-{
-	u32 max_csg_slots = kbdev->csf.global_iface.group_num;
-	DECLARE_BITMAP(used_csgs, MAX_SUPPORTED_CSGS) = { 0 };
-	u32 csg_nr;
-	unsigned long flags;
-
-	lockdep_assert_held(&kbdev->csf.scheduler.lock);
-
-	/* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell
-	 * ring for Extract offset update, shall not be made when MCU has been
-	 * put to sleep otherwise it will undesirably make MCU exit the sleep
-	 * state. Also it isn't really needed as FW will implicitly update the
-	 * status of all on-slot groups when MCU sleep request is sent to it.
-	 */
-	if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
-		/* Wait for the MCU sleep request to complete. */
-		kbase_pm_wait_for_desired_state(kbdev);
-		bitmap_copy(csg_slots_status_updated,
-			    kbdev->csf.scheduler.csg_inuse_bitmap, max_csg_slots);
-		return;
-	}
-
-	for (csg_nr = 0; csg_nr < max_csg_slots; csg_nr++) {
-		struct kbase_queue_group *const group =
-			kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
-		if (!group)
-			continue;
-		/* Ring the User doorbell for FW to update the Extract offset */
-		kbase_csf_ring_doorbell(kbdev, group->doorbell_nr);
-		set_bit(csg_nr, used_csgs);
-	}
-
-	/* Return early if there are no on-slot groups */
-	if (bitmap_empty(used_csgs, max_csg_slots))
-		return;
-
-	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	for_each_set_bit(csg_nr, used_csgs, max_csg_slots) {
-		struct kbase_csf_cmd_stream_group_info const *const ginfo =
-			&kbdev->csf.global_iface.groups[csg_nr];
-		kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ,
-						  ~kbase_csf_firmware_csg_output(ginfo, CSG_ACK),
-						  CSG_REQ_STATUS_UPDATE_MASK);
-	}
-
-	BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(used_csgs[0]) * BITS_PER_BYTE));
-	kbase_csf_ring_csg_slots_doorbell(kbdev, used_csgs[0]);
-	kbase_csf_scheduler_spin_unlock(kbdev, flags);
-	wait_csg_slots_status_update_finish(kbdev, used_csgs);
-	/* Wait for the User doobell ring to take effect */
-	msleep(100);
-}
+#include "mali_kbase_csf_util.h"
+#include <mali_kbase.h>
+#include <linux/seq_file.h>
+#include <linux/version_compat_defs.h>

 #define MAX_SCHED_STATE_STRING_LEN (16)
+/**
+ * scheduler_state_to_string() - Get string name of scheduler state.
+ *
+ * @kbdev:       Pointer to kbase device.
+ * @sched_state: Scheduler state.
+ *
+ * Return: Suitable string.
+ */
 static const char *scheduler_state_to_string(struct kbase_device *kbdev,
-			enum kbase_csf_scheduler_state sched_state)
+					     enum kbase_csf_scheduler_state sched_state)
 {
 	switch (sched_state) {
 	case SCHED_BUSY:
@@ -176,404 +59,62 @@ static const char *scheduler_state_to_string(struct kbase_device *kbdev,
 }

 /**
- * blocked_reason_to_string() - Convert blocking reason id to a string
- *
- * @reason_id: blocked_reason
- *
- * Return: Suitable string
- */
-static const char *blocked_reason_to_string(u32 reason_id)
-{
-	/* possible blocking reasons of a cs */
-	static const char *const cs_blocked_reason[] = {
-		[CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED] = "UNBLOCKED",
-		[CS_STATUS_BLOCKED_REASON_REASON_WAIT] = "WAIT",
-		[CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT] =
-			"PROGRESS_WAIT",
-		[CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT] = "SYNC_WAIT",
-		[CS_STATUS_BLOCKED_REASON_REASON_DEFERRED] = "DEFERRED",
-		[CS_STATUS_BLOCKED_REASON_REASON_RESOURCE] = "RESOURCE",
-		[CS_STATUS_BLOCKED_REASON_REASON_FLUSH] = "FLUSH"
-	};
-
-	if (WARN_ON(reason_id >= ARRAY_SIZE(cs_blocked_reason)))
-		return "UNKNOWN_BLOCKED_REASON_ID";
-
-	return cs_blocked_reason[reason_id];
-}
-
-static bool sb_source_supported(u32 glb_version)
-{
-	bool supported = false;
-
-	if (((GLB_VERSION_MAJOR_GET(glb_version) == 3) &&
-	     (GLB_VERSION_MINOR_GET(glb_version) >= 5)) ||
-	    ((GLB_VERSION_MAJOR_GET(glb_version) == 2) &&
-	     (GLB_VERSION_MINOR_GET(glb_version) >= 6)) ||
-	    ((GLB_VERSION_MAJOR_GET(glb_version) == 1) &&
-	     (GLB_VERSION_MINOR_GET(glb_version) >= 3)))
-		supported = true;
-
-	return supported;
-}
-
-static void kbasep_csf_scheduler_dump_active_queue_cs_status_wait(
-	struct seq_file *file, u32 glb_version, u32 wait_status, u32 wait_sync_value,
-	u64 wait_sync_live_value, u64 wait_sync_pointer, u32 sb_status, u32 blocked_reason)
-{
-#define WAITING "Waiting"
-#define NOT_WAITING "Not waiting"
-
-	seq_printf(file, "SB_MASK: %d\n",
-			CS_STATUS_WAIT_SB_MASK_GET(wait_status));
-	if (sb_source_supported(glb_version))
-		seq_printf(file, "SB_SOURCE: %d\n", CS_STATUS_WAIT_SB_SOURCE_GET(wait_status));
-	seq_printf(file, "PROGRESS_WAIT: %s\n",
-			CS_STATUS_WAIT_PROGRESS_WAIT_GET(wait_status) ?
-			WAITING : NOT_WAITING);
-	seq_printf(file, "PROTM_PEND: %s\n",
-			CS_STATUS_WAIT_PROTM_PEND_GET(wait_status) ?
-			WAITING : NOT_WAITING);
-	seq_printf(file, "SYNC_WAIT: %s\n",
-			CS_STATUS_WAIT_SYNC_WAIT_GET(wait_status) ?
-			WAITING : NOT_WAITING);
-	seq_printf(file, "WAIT_CONDITION: %s\n",
-			CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(wait_status) ?
-			"greater than" : "less or equal");
-	seq_printf(file, "SYNC_POINTER: 0x%llx\n", wait_sync_pointer);
-	seq_printf(file, "SYNC_VALUE: %d\n", wait_sync_value);
-	seq_printf(file, "SYNC_LIVE_VALUE: 0x%016llx\n", wait_sync_live_value);
-	seq_printf(file, "SB_STATUS: %u\n",
-		   CS_STATUS_SCOREBOARDS_NONZERO_GET(sb_status));
-	seq_printf(file, "BLOCKED_REASON: %s\n",
-		   blocked_reason_to_string(CS_STATUS_BLOCKED_REASON_REASON_GET(
-			   blocked_reason)));
-}
-
-static void kbasep_csf_scheduler_dump_active_cs_trace(struct seq_file *file,
-			struct kbase_csf_cmd_stream_info const *const stream)
-{
-	u32 val = kbase_csf_firmware_cs_input_read(stream,
-			CS_INSTR_BUFFER_BASE_LO);
-	u64 addr = ((u64)kbase_csf_firmware_cs_input_read(stream,
-				CS_INSTR_BUFFER_BASE_HI) << 32) | val;
-	val = kbase_csf_firmware_cs_input_read(stream,
-				CS_INSTR_BUFFER_SIZE);
-
-	seq_printf(file, "CS_TRACE_BUF_ADDR: 0x%16llx, SIZE: %u\n", addr, val);
-
-	/* Write offset variable address (pointer) */
-	val = kbase_csf_firmware_cs_input_read(stream,
-			CS_INSTR_BUFFER_OFFSET_POINTER_LO);
-	addr = ((u64)kbase_csf_firmware_cs_input_read(stream,
-			CS_INSTR_BUFFER_OFFSET_POINTER_HI) << 32) | val;
-	seq_printf(file, "CS_TRACE_BUF_OFFSET_PTR: 0x%16llx\n", addr);
-
-	/* EVENT_SIZE and EVENT_STATEs */
-	val = kbase_csf_firmware_cs_input_read(stream, CS_INSTR_CONFIG);
-	seq_printf(file, "TRACE_EVENT_SIZE: 0x%x, TRACE_EVENT_STAES 0x%x\n",
-			CS_INSTR_CONFIG_EVENT_SIZE_GET(val),
-			CS_INSTR_CONFIG_EVENT_STATE_GET(val));
-}
-
-/**
- * kbasep_csf_scheduler_dump_active_queue() - Print GPU command queue
- *                                            debug information
- *
- * @file:  seq_file for printing to
- * @queue: Address of a GPU command queue to examine
- */
-static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
-		struct kbase_queue *queue)
-{
-	u32 *addr;
-	u64 cs_extract;
-	u64 cs_insert;
-	u32 cs_active;
-	u64 wait_sync_pointer;
-	u32 wait_status, wait_sync_value;
-	u32 sb_status;
-	u32 blocked_reason;
-	struct kbase_vmap_struct *mapping;
-	u64 *evt;
-	u64 wait_sync_live_value;
-	u32 glb_version;
-
-	if (!queue)
-		return;
-
-	glb_version = queue->kctx->kbdev->csf.global_iface.version;
-
-	if (WARN_ON(queue->csi_index == KBASEP_IF_NR_INVALID ||
-		    !queue->group))
-		return;
-
-	addr = (u32 *)queue->user_io_addr;
-	cs_insert = addr[CS_INSERT_LO/4] | ((u64)addr[CS_INSERT_HI/4] << 32);
-
-	addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
-	cs_extract = addr[CS_EXTRACT_LO/4] | ((u64)addr[CS_EXTRACT_HI/4] << 32);
-	cs_active = addr[CS_ACTIVE/4];
-
-#define KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO \
-	"Bind Idx,     Ringbuf addr,     Size, Prio,    Insert offset,   Extract offset, Active, Doorbell\n"
-
-	seq_printf(file, KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO "%8d, %16llx, %8x, %4u, %16llx, %16llx, %6u, %8d\n",
-			queue->csi_index, queue->base_addr,
-			queue->size,
-			queue->priority, cs_insert, cs_extract, cs_active, queue->doorbell_nr);
-
-	/* Print status information for blocked group waiting for sync object. For on-slot queues,
-	 * if cs_trace is enabled, dump the interface's cs_trace configuration.
-	 */
-	if (kbase_csf_scheduler_group_get_slot(queue->group) < 0) {
-		seq_printf(file, "SAVED_CMD_PTR: 0x%llx\n", queue->saved_cmd_ptr);
-		if (CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) {
-			wait_status = queue->status_wait;
-			wait_sync_value = queue->sync_value;
-			wait_sync_pointer = queue->sync_ptr;
-			sb_status = queue->sb_status;
-			blocked_reason = queue->blocked_reason;
-
-			evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, wait_sync_pointer, &mapping);
-			if (evt) {
-				wait_sync_live_value = evt[0];
-				kbase_phy_alloc_mapping_put(queue->kctx, mapping);
-			} else {
-				wait_sync_live_value = U64_MAX;
-			}
-
-			kbasep_csf_scheduler_dump_active_queue_cs_status_wait(
-				file, glb_version, wait_status, wait_sync_value,
-				wait_sync_live_value, wait_sync_pointer, sb_status, blocked_reason);
-		}
-	} else {
-		struct kbase_device const *const kbdev =
-			queue->group->kctx->kbdev;
-		struct kbase_csf_cmd_stream_group_info const *const ginfo =
-			&kbdev->csf.global_iface.groups[queue->group->csg_nr];
-		struct kbase_csf_cmd_stream_info const *const stream =
-			&ginfo->streams[queue->csi_index];
-		u64 cmd_ptr;
-		u32 req_res;
-
-		if (WARN_ON(!stream))
-			return;
-
-		cmd_ptr = kbase_csf_firmware_cs_output(stream,
-				CS_STATUS_CMD_PTR_LO);
-		cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream,
-				CS_STATUS_CMD_PTR_HI) << 32;
-		req_res = kbase_csf_firmware_cs_output(stream,
-					CS_STATUS_REQ_RESOURCE);
-
-		seq_printf(file, "CMD_PTR: 0x%llx\n", cmd_ptr);
-		seq_printf(file, "REQ_RESOURCE [COMPUTE]: %d\n",
-			CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_GET(req_res));
-		seq_printf(file, "REQ_RESOURCE [FRAGMENT]: %d\n",
-			CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_GET(req_res));
-		seq_printf(file, "REQ_RESOURCE [TILER]: %d\n",
-			CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_GET(req_res));
-		seq_printf(file, "REQ_RESOURCE [IDVS]: %d\n",
-			CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_GET(req_res));
-
-		wait_status = kbase_csf_firmware_cs_output(stream,
-				CS_STATUS_WAIT);
-		wait_sync_value = kbase_csf_firmware_cs_output(stream,
-					CS_STATUS_WAIT_SYNC_VALUE);
-		wait_sync_pointer = kbase_csf_firmware_cs_output(stream,
-					CS_STATUS_WAIT_SYNC_POINTER_LO);
-		wait_sync_pointer |= (u64)kbase_csf_firmware_cs_output(stream,
-					CS_STATUS_WAIT_SYNC_POINTER_HI) << 32;
-
-		sb_status = kbase_csf_firmware_cs_output(stream,
-							 CS_STATUS_SCOREBOARDS);
-		blocked_reason = kbase_csf_firmware_cs_output(
-			stream, CS_STATUS_BLOCKED_REASON);
-
-		evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, wait_sync_pointer, &mapping);
-		if (evt) {
-			wait_sync_live_value = evt[0];
-			kbase_phy_alloc_mapping_put(queue->kctx, mapping);
-		} else {
-			wait_sync_live_value = U64_MAX;
-		}
-
-		kbasep_csf_scheduler_dump_active_queue_cs_status_wait(
-			file, glb_version, wait_status, wait_sync_value, wait_sync_live_value,
-			wait_sync_pointer, sb_status, blocked_reason);
-		/* Dealing with cs_trace */
-		if (kbase_csf_scheduler_queue_has_trace(queue))
-			kbasep_csf_scheduler_dump_active_cs_trace(file, stream);
-		else
-			seq_puts(file, "NO CS_TRACE\n");
-	}
-
-	seq_puts(file, "\n");
-}
-
-static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
-		struct kbase_queue_group *const group)
-{
-	if (kbase_csf_scheduler_group_get_slot(group) >= 0) {
-		struct kbase_device *const kbdev = group->kctx->kbdev;
-		u32 ep_c, ep_r;
-		char exclusive;
-		char idle = 'N';
-		struct kbase_csf_cmd_stream_group_info const *const ginfo =
-			&kbdev->csf.global_iface.groups[group->csg_nr];
-		u8 slot_priority =
-			kbdev->csf.scheduler.csg_slots[group->csg_nr].priority;
-
-		ep_c = kbase_csf_firmware_csg_output(ginfo,
-				CSG_STATUS_EP_CURRENT);
-		ep_r = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_EP_REQ);
-
-		if (CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_GET(ep_r))
-			exclusive = 'C';
-		else if (CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_GET(ep_r))
-			exclusive = 'F';
-		else
-			exclusive = '0';
-
-		if (kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
-				CSG_STATUS_STATE_IDLE_MASK)
-			idle = 'Y';
-
-		if (!test_bit(group->csg_nr, csg_slots_status_updated)) {
-			seq_printf(file, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n",
-				group->csg_nr);
-			seq_puts(file, "*** The following group-record is likely stale\n");
-		}
-
-		seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n");
-		seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n",
-			group->handle,
-			group->csg_nr,
-			slot_priority,
-			group->run_state,
-			group->priority,
-			CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(ep_c),
-			CSG_STATUS_EP_REQ_COMPUTE_EP_GET(ep_r),
-			CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(ep_c),
-			CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r),
-			CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c),
-			CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r),
-			exclusive,
-			idle);
-	} else {
-		seq_puts(file, "GroupID, CSG NR, Run State, Priority\n");
-		seq_printf(file, "%7d, %6d, %9d, %8d\n",
-			group->handle,
-			group->csg_nr,
-			group->run_state,
-			group->priority);
-	}
-
-	if (group->run_state != KBASE_CSF_GROUP_TERMINATED) {
-		unsigned int i;
-
-		seq_puts(file, "Bound queues:\n");
-
-		for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
-			kbasep_csf_scheduler_dump_active_queue(file,
-					group->bound_queues[i]);
-		}
-	}
-
-	seq_puts(file, "\n");
-}
-
-/**
- * kbasep_csf_queue_group_debugfs_show() - Print per-context GPU command queue
- *					   group debug information
+ * kbasep_csf_queue_show_groups() - Print per-context GPU command queue
+ *                                  group debug information
 *
 * @file: The seq_file for printing to
 * @data: The debugfs dentry private data, a pointer to kbase context
 *
 * Return: Negative error code or 0 on success.
 */
-static int kbasep_csf_queue_group_debugfs_show(struct seq_file *file,
-		void *data)
+static int kbasep_csf_queue_show_groups(struct seq_file *file, void *data)
 {
-	u32 gr;
+	struct kbasep_printer *kbpr;
 	struct kbase_context *const kctx = file->private;
-	struct kbase_device *kbdev;
+	int ret = -EINVAL;
+	CSTD_UNUSED(data);

-	if (WARN_ON(!kctx))
-		return -EINVAL;
-
-	kbdev = kctx->kbdev;
-
-	seq_printf(file, "MALI_CSF_CSG_DEBUGFS_VERSION: v%u\n",
-			MALI_CSF_CSG_DEBUGFS_VERSION);
-
-	mutex_lock(&kctx->csf.lock);
-	kbase_csf_scheduler_lock(kbdev);
-	kbase_csf_debugfs_update_active_groups_status(kbdev);
-	for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) {
-		struct kbase_queue_group *const group =
-			kctx->csf.queue_groups[gr];
-
-		if (group)
-			kbasep_csf_scheduler_dump_active_group(file, group);
+	kbpr = kbasep_printer_file_init(file);
+	if (kbpr != NULL) {
+		ret = kbasep_csf_csg_dump_print(kctx, kbpr);
+		kbasep_printer_term(kbpr);
 	}
-	kbase_csf_scheduler_unlock(kbdev);
-	mutex_unlock(&kctx->csf.lock);

-	return 0;
+	return ret;
 }

 /**
- * kbasep_csf_scheduler_dump_active_groups() - Print debug info for active
- *                                             GPU command queue groups
+ * kbasep_csf_csg_active_show_groups() - Print debug info for active GPU command queue groups
 *
 * @file: The seq_file for printing to
 * @data: The debugfs dentry private data, a pointer to kbase_device
 *
 * Return: Negative error code or 0 on success.
 */
-static int kbasep_csf_scheduler_dump_active_groups(struct seq_file *file,
-		void *data)
+static int kbasep_csf_csg_active_show_groups(struct seq_file *file, void *data)
 {
-	u32 csg_nr;
 	struct kbase_device *kbdev = file->private;
-	u32 num_groups = kbdev->csf.global_iface.group_num;
+	struct kbasep_printer *kbpr;
+	int ret = -EINVAL;
+	CSTD_UNUSED(data);

-	seq_printf(file, "MALI_CSF_CSG_DEBUGFS_VERSION: v%u\n",
-			MALI_CSF_CSG_DEBUGFS_VERSION);
-
-	kbase_csf_scheduler_lock(kbdev);
-	kbase_csf_debugfs_update_active_groups_status(kbdev);
-	for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
-		struct kbase_queue_group *const group =
-			kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
-
-		if (!group)
-			continue;
-
-		seq_printf(file, "\nCtx %d_%d\n", group->kctx->tgid,
-				group->kctx->id);
-
-		kbasep_csf_scheduler_dump_active_group(file, group);
+	kbpr = kbasep_printer_file_init(file);
+	if (kbpr != NULL) {
+		ret = kbasep_csf_csg_active_dump_print(kbdev, kbpr);
+		kbasep_printer_term(kbpr);
 	}
-	kbase_csf_scheduler_unlock(kbdev);

-	return 0;
+	return ret;
 }

-static int kbasep_csf_queue_group_debugfs_open(struct inode *in,
-		struct file *file)
+static int kbasep_csf_queue_group_debugfs_open(struct inode *in, struct file *file)
 {
-	return single_open(file, kbasep_csf_queue_group_debugfs_show,
-			in->i_private);
+	return single_open(file, kbasep_csf_queue_show_groups, in->i_private);
 }

-static int kbasep_csf_active_queue_groups_debugfs_open(struct inode *in,
-		struct file *file)
+static int kbasep_csf_active_queue_groups_debugfs_open(struct inode *in, struct file *file)
 {
-	return single_open(file, kbasep_csf_scheduler_dump_active_groups,
-			in->i_private);
+	return single_open(file, kbasep_csf_csg_active_show_groups, in->i_private);
 }

 static const struct file_operations kbasep_csf_queue_group_debugfs_fops = {
@@ -591,25 +132,23 @@ void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx)
 	if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry)))
 		return;

-	file = debugfs_create_file("groups", mode,
-		kctx->kctx_dentry, kctx, &kbasep_csf_queue_group_debugfs_fops);
+	file = debugfs_create_file("groups", mode, kctx->kctx_dentry, kctx,
+				   &kbasep_csf_queue_group_debugfs_fops);

 	if (IS_ERR_OR_NULL(file)) {
 		dev_warn(kctx->kbdev->dev,
-		    "Unable to create per context queue groups debugfs entry");
+			 "Unable to create per context queue groups debugfs entry");
 	}
 }

-static const struct file_operations
-	kbasep_csf_active_queue_groups_debugfs_fops = {
+static const struct file_operations kbasep_csf_active_queue_groups_debugfs_fops = {
 	.open = kbasep_csf_active_queue_groups_debugfs_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
 	.release = single_release,
 };

-static int kbasep_csf_debugfs_scheduling_timer_enabled_get(
-		void *data, u64 *val)
+static int kbasep_csf_debugfs_scheduling_timer_enabled_get(void *data, u64 *val)
 {
 	struct kbase_device *const kbdev = data;

@@ -618,8 +157,7 @@ static int kbasep_csf_debugfs_scheduling_timer_enabled_get(
 	return 0;
 }

-static int kbasep_csf_debugfs_scheduling_timer_enabled_set(
-		void *data, u64 val)
+static int kbasep_csf_debugfs_scheduling_timer_enabled_set(void *data, u64 val)
 {
 	struct kbase_device *const kbdev = data;

@@ -628,10 +166,10 @@ static int kbasep_csf_debugfs_scheduling_timer_enabled_set(
 	return 0;
 }

-static int kbasep_csf_debugfs_scheduling_timer_kick_set(
-		void *data, u64 val)
+static int kbasep_csf_debugfs_scheduling_timer_kick_set(void *data, u64 val)
 {
 	struct kbase_device *const kbdev = data;
+	CSTD_UNUSED(val);

 	kbase_csf_scheduler_kick(kbdev);

@@ -660,8 +198,8 @@ DEFINE_DEBUGFS_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_kick_fops, NULL,
 *         size of the state string if it was copied successfully to the
 *         User buffer or a negative value in case of an error.
 */
-static ssize_t kbase_csf_debugfs_scheduler_state_get(struct file *file,
-		    char __user *user_buf, size_t count, loff_t *ppos)
+static ssize_t kbase_csf_debugfs_scheduler_state_get(struct file *file, char __user *user_buf,
+						     size_t count, loff_t *ppos)
 {
 	struct kbase_device *kbdev = file->private_data;
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
@@ -674,8 +212,7 @@ static ssize_t kbase_csf_debugfs_scheduler_state_get(struct file *file,
 	if (!state_string)
 		count = 0;

-	return simple_read_from_buffer(user_buf, count, ppos,
-				       state_string, strlen(state_string));
+	return simple_read_from_buffer(user_buf, count, ppos, state_string, strlen(state_string));
 }

 /**
@@ -694,8 +231,8 @@ static ssize_t kbase_csf_debugfs_scheduler_state_get(struct file *file,
 *         state or if copy from user buffer failed, otherwise the length of
 *         the User buffer.
 */
-static ssize_t kbase_csf_debugfs_scheduler_state_set(struct file *file,
-		const char __user *ubuf, size_t count, loff_t *ppos)
+static ssize_t kbase_csf_debugfs_scheduler_state_set(struct file *file, const char __user *ubuf,
+						     size_t count, loff_t *ppos)
 {
 	struct kbase_device *kbdev = file->private_data;
 	char buf[MAX_SCHED_STATE_STRING_LEN];
@@ -735,19 +272,15 @@ static const struct file_operations kbasep_csf_debugfs_scheduler_state_fops = {

 void kbase_csf_debugfs_init(struct kbase_device *kbdev)
 {
-	debugfs_create_file("active_groups", 0444,
-		kbdev->mali_debugfs_directory, kbdev,
-		&kbasep_csf_active_queue_groups_debugfs_fops);
+	debugfs_create_file("active_groups", 0444, kbdev->mali_debugfs_directory, kbdev,
+			    &kbasep_csf_active_queue_groups_debugfs_fops);

-	debugfs_create_file("scheduling_timer_enabled", 0644,
-			kbdev->mali_debugfs_directory, kbdev,
-			&kbasep_csf_debugfs_scheduling_timer_enabled_fops);
-	debugfs_create_file("scheduling_timer_kick", 0200,
-			kbdev->mali_debugfs_directory, kbdev,
-			&kbasep_csf_debugfs_scheduling_timer_kick_fops);
-	debugfs_create_file("scheduler_state", 0644,
-			kbdev->mali_debugfs_directory, kbdev,
-			&kbasep_csf_debugfs_scheduler_state_fops);
+	debugfs_create_file("scheduling_timer_enabled", 0644, kbdev->mali_debugfs_directory, kbdev,
+			    &kbasep_csf_debugfs_scheduling_timer_enabled_fops);
+	debugfs_create_file("scheduling_timer_kick", 0200, kbdev->mali_debugfs_directory, kbdev,
+			    &kbasep_csf_debugfs_scheduling_timer_kick_fops);
+	debugfs_create_file("scheduler_state", 0644, kbdev->mali_debugfs_directory, kbdev,
+			    &kbasep_csf_debugfs_scheduler_state_fops);

 	kbase_csf_tl_reader_debugfs_init(kbdev);
 }
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -22,12 +22,9 @@
 #ifndef _KBASE_CSF_CSG_DEBUGFS_H_
 #define _KBASE_CSF_CSG_DEBUGFS_H_

-/* Forward declarations */
-struct kbase_device;
+/* Forward declaration */
 struct kbase_context;
-struct kbase_queue_group;
-
-#define MALI_CSF_CSG_DEBUGFS_VERSION 0
+struct kbase_device;

 /**
 * kbase_csf_queue_group_debugfs_init() - Add debugfs entry for queue groups
@@ -44,11 +41,4 @@ void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx);
 */
 void kbase_csf_debugfs_init(struct kbase_device *kbdev);

-/**
- * kbase_csf_debugfs_update_active_groups_status() - Update on-slot group statuses
- *
- * @kbdev: Pointer to the device
- */
-void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev);
-
 #endif /* _KBASE_CSF_CSG_DEBUGFS_H_ */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h
@@ -29,6 +29,7 @@
 #include <linux/types.h>
 #include <linux/wait.h>

+#include <hw_access/mali_kbase_hw_access.h>
 #include "mali_kbase_csf_firmware.h"
 #include "mali_kbase_refcount_defs.h"
 #include "mali_kbase_csf_event.h"
@@ -52,13 +53,13 @@
 */
 #define MAX_TILER_HEAPS (128)

-#define CSF_FIRMWARE_ENTRY_READ       (1ul << 0)
-#define CSF_FIRMWARE_ENTRY_WRITE      (1ul << 1)
-#define CSF_FIRMWARE_ENTRY_EXECUTE    (1ul << 2)
+#define CSF_FIRMWARE_ENTRY_READ (1ul << 0)
+#define CSF_FIRMWARE_ENTRY_WRITE (1ul << 1)
+#define CSF_FIRMWARE_ENTRY_EXECUTE (1ul << 2)
 #define CSF_FIRMWARE_ENTRY_CACHE_MODE (3ul << 3)
-#define CSF_FIRMWARE_ENTRY_PROTECTED  (1ul << 5)
-#define CSF_FIRMWARE_ENTRY_SHARED     (1ul << 30)
-#define CSF_FIRMWARE_ENTRY_ZERO       (1ul << 31)
+#define CSF_FIRMWARE_ENTRY_PROTECTED (1ul << 5)
+#define CSF_FIRMWARE_ENTRY_SHARED (1ul << 30)
+#define CSF_FIRMWARE_ENTRY_ZERO (1ul << 31)

 /**
 * enum kbase_csf_queue_bind_state - bind state of the queue
@@ -265,15 +266,18 @@ enum kbase_queue_group_priority {
 * @CSF_PM_TIMEOUT: Timeout for GPU Power Management to reach the desired
 *                  Shader, L2 and MCU state.
 * @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete.
- * @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for all active CSGs to be suspended.
+ * @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for a CSG to be suspended.
 * @CSF_FIRMWARE_BOOT_TIMEOUT: Maximum time to wait for firmware to boot.
 * @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond
 *                             to a ping from KBase.
 * @CSF_SCHED_PROTM_PROGRESS_TIMEOUT: Timeout used to prevent protected mode execution hang.
 * @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion
- *                                of a MMU operation
+ *                                of a MMU operation.
+ * @KCPU_FENCE_SIGNAL_TIMEOUT: Waiting time in ms for triggering a KCPU queue sync state dump
 * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
 *                                the enum.
+ * @KBASE_DEFAULT_TIMEOUT: Default timeout used when an invalid selector is passed
+ *                         to the pre-computed timeout getter.
 */
 enum kbase_timeout_selector {
 	CSF_FIRMWARE_TIMEOUT,
@@ -284,9 +288,11 @@ enum kbase_timeout_selector {
 	CSF_FIRMWARE_PING_TIMEOUT,
 	CSF_SCHED_PROTM_PROGRESS_TIMEOUT,
 	MMU_AS_INACTIVE_WAIT_TIMEOUT,
+	KCPU_FENCE_SIGNAL_TIMEOUT,

 	/* Must be the last in the enum */
-	KBASE_TIMEOUT_SELECTOR_COUNT
+	KBASE_TIMEOUT_SELECTOR_COUNT,
+	KBASE_DEFAULT_TIMEOUT = CSF_FIRMWARE_TIMEOUT
 };

 /**
@@ -324,6 +330,14 @@ struct kbase_csf_notification {
 *                  It is in page units.
 * @link:        Link to the linked list of GPU command queues created per
 *               GPU address space.
+ * @pending_kick:      Indicates whether there is a pending kick to be handled.
+ * @pending_kick_link: Link to the linked list of GPU command queues that have
+ *                     been kicked, but the kick has not yet been processed.
+ *                     This link would be deleted right before the kick is
+ *                     handled to allow for future kicks to occur in the mean
+ *                     time. For this reason, this must not be used to check
+ *                     for the presence of a pending queue kick. @pending_kick
+ *                     should be used instead.
 * @refcount:    Reference count, stands for the number of times the queue
 *               has been referenced. The reference is taken when it is
 *               created, when it is bound to the group and also when the
@@ -336,6 +350,7 @@ struct kbase_csf_notification {
 * @base_addr:      Base address of the CS buffer.
 * @size:           Size of the CS buffer.
 * @priority:       Priority of this queue within the group.
+ * @group_priority: Priority of the group to which this queue has been bound.
 * @bind_state:     Bind state of the queue as enum @kbase_csf_queue_bind_state
 * @csi_index:      The ID of the assigned CS hardware interface.
 * @enabled:        Indicating whether the CS is running, or not.
@@ -363,7 +378,6 @@ struct kbase_csf_notification {
 * @trace_offset_ptr:  Pointer to the CS trace buffer offset variable.
 * @trace_buffer_size: CS trace buffer size for the queue.
 * @trace_cfg:         CS trace configuration parameters.
- * @error:          GPU command queue fatal information to pass to user space.
 * @cs_error_work:    Work item to handle the CS fatal event reported for this
 *                    queue or the CS fault event if dump on fault is enabled
 *                    and acknowledgment for CS fault event needs to be done
@@ -373,7 +387,6 @@ struct kbase_csf_notification {
 * @cs_error:         Records information about the CS fatal event or
 *                    about CS fault event if dump on fault is enabled.
 * @cs_error_fatal:   Flag to track if the CS fault or CS fatal event occurred.
- * @pending:          Indicating whether the queue has new submitted work.
 * @extract_ofs: The current EXTRACT offset, this is only updated when handling
 *               the GLB IDLE IRQ if the idle timeout value is non-0 in order
 *               to help detect a queue's true idle status.
@@ -386,11 +399,13 @@ struct kbase_queue {
 	struct kbase_context *kctx;
 	u64 user_io_gpu_va;
 	struct tagged_addr phys[2];
-	char *user_io_addr;
+	u64 *user_io_addr;
 	u64 handle;
 	int doorbell_nr;
 	unsigned long db_file_offset;
 	struct list_head link;
+	atomic_t pending_kick;
+	struct list_head pending_kick_link;
 	kbase_refcount_t refcount;
 	struct kbase_queue_group *group;
 	struct kbase_va_region *queue_reg;
@@ -398,6 +413,7 @@ struct kbase_queue {
 	u64 base_addr;
 	u32 size;
 	u8 priority;
+	u8 group_priority;
 	s8 csi_index;
 	enum kbase_csf_queue_bind_state bind_state;
 	bool enabled;
@@ -410,16 +426,12 @@ struct kbase_queue {
 	u64 trace_offset_ptr;
 	u32 trace_buffer_size;
 	u32 trace_cfg;
-	struct kbase_csf_notification error;
 	struct work_struct cs_error_work;
 	u64 cs_error_info;
 	u32 cs_error;
 	bool cs_error_fatal;
-	atomic_t pending;
 	u64 extract_ofs;
-#if IS_ENABLED(CONFIG_DEBUG_FS)
 	u64 saved_cmd_ptr;
-#endif /* CONFIG_DEBUG_FS */
 };

 /**
@@ -514,10 +526,6 @@ struct kbase_protected_suspend_buffer {
 *                         have pending protected mode entry requests.
 * @error_fatal: An error of type BASE_GPU_QUEUE_GROUP_ERROR_FATAL to be
 *               returned to userspace if such an error has occurred.
- * @error_timeout: An error of type BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT
- *                 to be returned to userspace if such an error has occurred.
- * @error_tiler_oom: An error of type BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM
- *                   to be returned to userspace if such an error has occurred.
 * @timer_event_work: Work item to handle the progress timeout fatal event
 *                    for the group.
 * @deschedule_deferred_cnt: Counter keeping a track of the number of threads
@@ -544,6 +552,7 @@ struct kbase_queue_group {
 	u8 compute_max;
 	u8 csi_handlers;

+
 	u64 tiler_mask;
 	u64 fragment_mask;
 	u64 compute_mask;
@@ -566,8 +575,6 @@ struct kbase_queue_group {
 	DECLARE_BITMAP(protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP);

 	struct kbase_csf_notification error_fatal;
-	struct kbase_csf_notification error_timeout;
-	struct kbase_csf_notification error_tiler_oom;

 	struct work_struct timer_event_work;

@@ -582,6 +589,12 @@ struct kbase_queue_group {
 #endif
 	void *csg_reg;
 	u8 csg_reg_bind_retries;
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	/**
+	 * @prev_act: Previous CSG activity transition in a GPU metrics.
+	 */
+	bool prev_act;
+#endif
 };

 /**
@@ -834,7 +847,6 @@ struct kbase_csf_user_reg_context {
 * @link:             Link to this csf context in the 'runnable_kctxs' list of
 *                    the scheduler instance
 * @sched:            Object representing the scheduler's context
- * @pending_submission_work: Work item to process pending kicked GPU command queues.
 * @cpu_queue:        CPU queue information. Only be available when DEBUG_FS
 *                    is enabled.
 * @user_reg:         Collective information to support mapping to USER Register page.
@@ -842,8 +854,7 @@ struct kbase_csf_user_reg_context {
 struct kbase_csf_context {
 	struct list_head event_pages_head;
 	DECLARE_BITMAP(cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE);
-	struct kbase_queue *user_pages_info[
-		KBASE_CSF_NUM_USER_IO_PAGES_HANDLE];
+	struct kbase_queue *user_pages_info[KBASE_CSF_NUM_USER_IO_PAGES_HANDLE];
 	struct mutex lock;
 	struct kbase_queue_group *queue_groups[MAX_QUEUE_GROUP_NUM];
 	struct list_head queue_list;
@@ -853,10 +864,7 @@ struct kbase_csf_context {
 	struct workqueue_struct *wq;
 	struct list_head link;
 	struct kbase_csf_scheduler_context sched;
-	struct work_struct pending_submission_work;
-#if IS_ENABLED(CONFIG_DEBUG_FS)
 	struct kbase_csf_cpu_queue_context cpu_queue;
-#endif
 	struct kbase_csf_user_reg_context user_reg;
 };

@@ -1002,23 +1010,20 @@ struct kbase_csf_mcu_shared_regions {
 *                          periodic scheduling tasks. If this value is 0
 *                          then it will only perform scheduling under the
 *                          influence of external factors e.g., IRQs, IOCTLs.
- * @wq:                     Dedicated workqueue to execute the @tick_work.
 * @tick_timer:             High-resolution timer employed to schedule tick
 *                          workqueue items (kernel-provided delayed_work
 *                          items do not use hrtimer and for some reason do
 *                          not provide sufficiently reliable periodicity).
- * @tick_work:              Work item that performs the "schedule on tick"
- *                          operation to implement timeslice-based scheduling.
- * @tock_work:              Work item that would perform the schedule on tock
- *                          operation to implement the asynchronous scheduling.
- * @pending_tock_work:      Indicates that the tock work item should re-execute
- *                          once it's finished instead of going back to sleep.
+ * @pending_tick_work:      Indicates that kbase_csf_scheduler_kthread() should perform
+ *                          a scheduling tick.
+ * @pending_tock_work:      Indicates that kbase_csf_scheduler_kthread() should perform
+ *                          a scheduling tock.
 * @ping_work:              Work item that would ping the firmware at regular
 *                          intervals, only if there is a single active CSG
 *                          slot, to check if firmware is alive and would
 *                          initiate a reset if the ping request isn't
 *                          acknowledged.
- * @top_ctx:                Pointer to the Kbase context corresponding to the
+ * @top_kctx:               Pointer to the Kbase context corresponding to the
 *                          @top_grp.
 * @top_grp:                Pointer to queue group inside @groups_to_schedule
 *                          list that was assigned the highest slot priority.
@@ -1058,13 +1063,6 @@ struct kbase_csf_mcu_shared_regions {
 *                          after GPU and L2 cache have been powered up. So when
 *                          this count is zero, MCU will not be powered up.
 * @csg_scheduling_period_ms: Duration of Scheduling tick in milliseconds.
- * @tick_timer_active:      Indicates whether the @tick_timer is effectively
- *                          active or not, as the callback function of
- *                          @tick_timer will enqueue @tick_work only if this
- *                          flag is true. This is mainly useful for the case
- *                          when scheduling tick needs to be advanced from
- *                          interrupt context, without actually deactivating
- *                          the @tick_timer first and then enqueing @tick_work.
 * @tick_protm_pending_seq: Scan out sequence number of the group that has
 *                          protected mode execution pending for the queue(s)
 *                          bound to it and will be considered first for the
@@ -1076,6 +1074,12 @@ struct kbase_csf_mcu_shared_regions {
 * @mcu_regs_data:          Scheduler MCU shared regions data for managing the
 *                          shared interface mappings for on-slot queues and
 *                          CSG suspend buffers.
+ * @kthread_signal:         Used to wake up the GPU queue submission
+ *                          thread when a queue needs attention.
+ * @kthread_running:        Whether the GPU queue submission thread should keep
+ *                          executing.
+ * @gpuq_kthread:           High-priority thread used to handle GPU queue
+ *                          submissions.
 */
 struct kbase_csf_scheduler {
 	struct mutex lock;
@@ -1097,14 +1101,12 @@ struct kbase_csf_scheduler {
 	DECLARE_BITMAP(csg_slots_idle_mask, MAX_SUPPORTED_CSGS);
 	DECLARE_BITMAP(csg_slots_prio_update, MAX_SUPPORTED_CSGS);
 	unsigned long last_schedule;
-	bool timer_enabled;
-	struct workqueue_struct *wq;
+	atomic_t timer_enabled;
 	struct hrtimer tick_timer;
-	struct work_struct tick_work;
-	struct delayed_work tock_work;
+	atomic_t pending_tick_work;
 	atomic_t pending_tock_work;
 	struct delayed_work ping_work;
-	struct kbase_context *top_ctx;
+	struct kbase_context *top_kctx;
 	struct kbase_queue_group *top_grp;
 	struct kbase_queue_group *active_protm_grp;
 	struct workqueue_struct *idle_wq;
@@ -1115,11 +1117,37 @@ struct kbase_csf_scheduler {
 	u32 non_idle_scanout_grps;
 	u32 pm_active_count;
 	unsigned int csg_scheduling_period_ms;
-	bool tick_timer_active;
 	u32 tick_protm_pending_seq;
 	ktime_t protm_enter_time;
 	struct kbase_csf_sched_heap_reclaim_mgr reclaim_mgr;
 	struct kbase_csf_mcu_shared_regions mcu_regs_data;
+	struct completion kthread_signal;
+	bool kthread_running;
+	struct task_struct *gpuq_kthread;
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	/**
+	 *  @gpu_metrics_tb: Handler of firmware trace buffer for gpu_metrics
+	 */
+	struct firmware_trace_buffer *gpu_metrics_tb;
+
+	/**
+	 * @gpu_metrics_timer: High-resolution timer used to periodically emit the GPU metrics
+	 *                     tracepoints for applications that are using the GPU. The timer is
+	 *                     needed for the long duration handling so that the length of work
+	 *                     period is within the allowed limit.
+	 *                     Timer callback function will be executed in soft irq context.
+	 */
+	struct hrtimer gpu_metrics_timer;
+
+	/**
+	 * @gpu_metrics_lock: Lock for the serialization of GPU metrics related code. The lock
+	 *                    is not acquired in the HARDIRQ-safe way, so shall not be acquired
+	 *                    after acquiring a lock that can be taken in the hard irq.
+	 *                    The softirq must be disabled whenever the lock is taken from the
+	 *                    process context.
+	 */
+	spinlock_t gpu_metrics_lock;
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
 };

 /*
@@ -1130,15 +1158,14 @@ struct kbase_csf_scheduler {
 /*
 * Maximum value of the global progress timeout.
 */
-#define GLB_PROGRESS_TIMER_TIMEOUT_MAX \
-	((GLB_PROGRESS_TIMER_TIMEOUT_MASK >> \
-		GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) * \
-	GLB_PROGRESS_TIMER_TIMEOUT_SCALE)
+#define GLB_PROGRESS_TIMER_TIMEOUT_MAX                                           \
+	((GLB_PROGRESS_TIMER_TIMEOUT_MASK >> GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) * \
+	 GLB_PROGRESS_TIMER_TIMEOUT_SCALE)

 /*
- * Default GLB_PWROFF_TIMER_TIMEOUT value in unit of micro-seconds.
+ * Default GLB_PWROFF_TIMER_TIMEOUT value in unit of nanosecond.
 */
-#define DEFAULT_GLB_PWROFF_TIMEOUT_US (800)
+#define DEFAULT_GLB_PWROFF_TIMEOUT_NS (800 * 1000)

 /*
 * In typical operations, the management of the shader core power transitions
@@ -1186,7 +1213,7 @@ enum kbase_ipa_core_type {
 /*
 * Total number of configurable counters existing on the IPA Control interface.
 */
-#define KBASE_IPA_CONTROL_MAX_COUNTERS                                         \
+#define KBASE_IPA_CONTROL_MAX_COUNTERS \
 	((size_t)KBASE_IPA_CORE_TYPE_NUM * KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS)

 /**
@@ -1358,7 +1385,7 @@ struct kbase_csf_mcu_fw {
 /*
 * Firmware log polling period.
 */
-#define KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS 25
+#define KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT 25

 /**
 * enum kbase_csf_firmware_log_mode - Firmware log operating mode
@@ -1370,10 +1397,16 @@ struct kbase_csf_mcu_fw {
 * @KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT: Automatic printing mode, firmware log
 * will be periodically emptied into dmesg, manual reading through debugfs is
 * disabled.
+ *
+ * @KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD: Automatic discarding mode, firmware
+ * log will be periodically discarded, the remaining log can be read manually by
+ * the userspace (and it will also be dumped automatically into dmesg on GPU
+ * reset).
 */
 enum kbase_csf_firmware_log_mode {
 	KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL,
-	KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT
+	KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT,
+	KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD
 };

 /**
@@ -1387,6 +1420,7 @@ enum kbase_csf_firmware_log_mode {
 * @dump_buf:                  Buffer used for dumping the log.
 * @func_call_list_va_start:   Virtual address of the start of the call list of FW log functions.
 * @func_call_list_va_end:     Virtual address of the end of the call list of FW log functions.
+ * @poll_period_ms:            Firmware log polling period in milliseconds.
 */
 struct kbase_csf_firmware_log {
 	enum kbase_csf_firmware_log_mode mode;
@@ -1395,6 +1429,7 @@ struct kbase_csf_firmware_log {
 	u8 *dump_buf;
 	u32 func_call_list_va_start;
 	u32 func_call_list_va_end;
+	atomic_t poll_period_ms;
 };

 /**
@@ -1490,7 +1525,7 @@ struct kbase_csf_user_reg {
 *                          image.
 * @shared_interface:       Pointer to the interface object containing info for
 *                          the memory area shared between firmware & host.
- * @shared_reg_rbtree:      RB tree of the memory regions allocated from the
+ * @mcu_shared_zone:        Memory zone tracking memory regions allocated from the
 *                          shared interface segment in MCU firmware address
 *                          space.
 * @db_filp:                Pointer to a dummy file, that alongwith
@@ -1552,22 +1587,28 @@ struct kbase_csf_user_reg {
 * @fw_error_work:          Work item for handling the firmware internal error
 *                          fatal event.
 * @ipa_control:            IPA Control component manager.
- * @mcu_core_pwroff_dur_us: Sysfs attribute for the glb_pwroff timeout input
- *                          in unit of micro-seconds. The firmware does not use
+ * @mcu_core_pwroff_dur_ns: Sysfs attribute for the glb_pwroff timeout input
+ *                          in unit of nanoseconds. The firmware does not use
 *                          it directly.
 * @mcu_core_pwroff_dur_count: The counterpart of the glb_pwroff timeout input
 *                             in interface required format, ready to be used
 *                             directly in the firmware.
+ * @mcu_core_pwroff_dur_count_modifier: Update csffw_glb_req_cfg_pwroff_timer
+ *                                      to make the shr(10) modifier conditional
+ *                                      on new flag in GLB_PWROFF_TIMER_CONFIG
 * @mcu_core_pwroff_reg_shadow: The actual value that has been programed into
 *                              the glb_pwoff register. This is separated from
 *                              the @p mcu_core_pwroff_dur_count as an update
 *                              to the latter is asynchronous.
- * @gpu_idle_hysteresis_us: Sysfs attribute for the idle hysteresis time
- *                          window in unit of microseconds. The firmware does not
+ * @gpu_idle_hysteresis_ns: Sysfs attribute for the idle hysteresis time
+ *                          window in unit of nanoseconds. The firmware does not
 *                          use it directly.
 * @gpu_idle_dur_count:     The counterpart of the hysteresis time window in
 *                          interface required format, ready to be used
 *                          directly in the firmware.
+ * @gpu_idle_dur_count_modifier: Update csffw_glb_req_idle_enable to make the shr(10)
+ *                               modifier conditional on the new flag
+ *                               in GLB_IDLE_TIMER_CONFIG.
 * @fw_timeout_ms:          Timeout value (in milliseconds) used when waiting
 *                          for any request sent to the firmware.
 * @hwcnt:                  Contain members required for handling the dump of
@@ -1579,6 +1620,12 @@ struct kbase_csf_user_reg {
 * @dof:                    Structure for dump on fault.
 * @user_reg:               Collective information to support the mapping to
 *                          USER Register page for user processes.
+ * @pending_gpuq_kicks:     Lists of GPU queue that have been kicked but not
+ *                          yet processed, categorised by queue group's priority.
+ * @pending_gpuq_kicks_lock: Protect @pending_gpu_kicks and
+ *                           kbase_queue.pending_kick_link.
+ * @quirks_ext:             Pointer to an allocated buffer containing the firmware
+ *                          workarounds configuration.
 */
 struct kbase_csf_device {
 	struct kbase_mmu_table mcu_mmu;
@@ -1588,7 +1635,7 @@ struct kbase_csf_device {
 	struct kobject *fw_cfg_kobj;
 	struct kbase_csf_trace_buffers firmware_trace_buffers;
 	void *shared_interface;
-	struct rb_root shared_reg_rbtree;
+	struct kbase_reg_zone mcu_shared_zone;
 	struct file *db_filp;
 	u32 db_file_offsets;
 	struct tagged_addr dummy_db_page;
@@ -1609,11 +1656,13 @@ struct kbase_csf_device {
 	bool glb_init_request_pending;
 	struct work_struct fw_error_work;
 	struct kbase_ipa_control ipa_control;
-	u32 mcu_core_pwroff_dur_us;
+	u32 mcu_core_pwroff_dur_ns;
 	u32 mcu_core_pwroff_dur_count;
+	u32 mcu_core_pwroff_dur_count_modifier;
 	u32 mcu_core_pwroff_reg_shadow;
-	u32 gpu_idle_hysteresis_us;
+	u32 gpu_idle_hysteresis_ns;
 	u32 gpu_idle_dur_count;
+	u32 gpu_idle_dur_count_modifier;
 	unsigned int fw_timeout_ms;
 	struct kbase_csf_hwcnt hwcnt;
 	struct kbase_csf_mcu_fw fw;
@@ -1629,6 +1678,9 @@ struct kbase_csf_device {
 	struct kbase_debug_coresight_device coresight;
 #endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
 	struct kbase_csf_user_reg user_reg;
+	struct list_head pending_gpuq_kicks[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
+	spinlock_t pending_gpuq_kicks_lock;
+	u32 *quirks_ext;
 };

 /**
@@ -1645,10 +1697,6 @@ struct kbase_csf_device {
 * @bf_data:           Data relating to Bus fault.
 * @gf_data:           Data relating to GPU fault.
 * @current_setup:     Stores the MMU configuration for this address space.
- * @is_unresponsive:   Flag to indicate MMU is not responding.
- *                     Set if a MMU command isn't completed within
- *                     &kbase_device:mmu_as_inactive_wait_time_ms.
- *                     Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes.
 */
 struct kbase_as {
 	int number;
@@ -1660,7 +1708,6 @@ struct kbase_as {
 	struct kbase_fault bf_data;
 	struct kbase_fault gf_data;
 	struct kbase_mmu_setup current_setup;
-	bool is_unresponsive;
 };

 #endif /* _KBASE_CSF_DEFS_H_ */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -40,8 +40,8 @@ struct kbase_csf_event_cb {
 	void *param;
 };

-int kbase_csf_event_wait_add(struct kbase_context *kctx,
-			     kbase_csf_event_callback *callback, void *param)
+int kbase_csf_event_wait_add(struct kbase_context *kctx, kbase_csf_event_callback *callback,
+			     void *param)
 {
 	int err = -ENOMEM;
 	struct kbase_csf_event_cb *event_cb =
@@ -56,8 +56,7 @@ int kbase_csf_event_wait_add(struct kbase_context *kctx,

 		spin_lock_irqsave(&kctx->csf.event.lock, flags);
 		list_add_tail(&event_cb->link, &kctx->csf.event.callback_list);
-		dev_dbg(kctx->kbdev->dev,
-			"Added event handler %pK with param %pK\n", event_cb,
+		dev_dbg(kctx->kbdev->dev, "Added event handler %pK with param %pK\n", event_cb,
 			event_cb->param);
 		spin_unlock_irqrestore(&kctx->csf.event.lock, flags);

@@ -67,8 +66,8 @@ int kbase_csf_event_wait_add(struct kbase_context *kctx,
 	return err;
 }

-void kbase_csf_event_wait_remove(struct kbase_context *kctx,
-		kbase_csf_event_callback *callback, void *param)
+void kbase_csf_event_wait_remove(struct kbase_context *kctx, kbase_csf_event_callback *callback,
+				 void *param)
 {
 	struct kbase_csf_event_cb *event_cb;
 	unsigned long flags;
@@ -78,8 +77,7 @@ void kbase_csf_event_wait_remove(struct kbase_context *kctx,
 	list_for_each_entry(event_cb, &kctx->csf.event.callback_list, link) {
 		if ((event_cb->callback == callback) && (event_cb->param == param)) {
 			list_del(&event_cb->link);
-			dev_dbg(kctx->kbdev->dev,
-				"Removed event handler %pK with param %pK\n",
+			dev_dbg(kctx->kbdev->dev, "Removed event handler %pK with param %pK\n",
 				event_cb, event_cb->param);
 			kfree(event_cb);
 			break;
@@ -113,8 +111,7 @@ void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu)
 	struct kbase_csf_event_cb *event_cb, *next_event_cb;
 	unsigned long flags;

-	dev_dbg(kctx->kbdev->dev,
-		"Signal event (%s GPU notify) for context %pK\n",
+	dev_dbg(kctx->kbdev->dev, "Signal event (%s GPU notify) for context %pK\n",
 		notify_gpu ? "with" : "without", (void *)kctx);

 	/* First increment the signal count and wake up event thread.
@@ -136,12 +133,10 @@ void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu)
 	 */
 	spin_lock_irqsave(&kctx->csf.event.lock, flags);

-	list_for_each_entry_safe(
-		event_cb, next_event_cb, &kctx->csf.event.callback_list, link) {
+	list_for_each_entry_safe(event_cb, next_event_cb, &kctx->csf.event.callback_list, link) {
 		enum kbase_csf_event_callback_action action;

-		dev_dbg(kctx->kbdev->dev,
-			"Calling event handler %pK with param %pK\n",
+		dev_dbg(kctx->kbdev->dev, "Calling event handler %pK with param %pK\n",
 			(void *)event_cb, event_cb->param);
 		action = event_cb->callback(event_cb->param);
 		if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) {
@@ -160,17 +155,15 @@ void kbase_csf_event_term(struct kbase_context *kctx)

 	spin_lock_irqsave(&kctx->csf.event.lock, flags);

-	list_for_each_entry_safe(
-		event_cb, next_event_cb, &kctx->csf.event.callback_list, link) {
+	list_for_each_entry_safe(event_cb, next_event_cb, &kctx->csf.event.callback_list, link) {
 		list_del(&event_cb->link);
-		dev_warn(kctx->kbdev->dev,
-			"Removed event handler %pK with param %pK\n",
-			(void *)event_cb, event_cb->param);
+		dev_warn(kctx->kbdev->dev, "Removed event handler %pK with param %pK\n",
+			 (void *)event_cb, event_cb->param);
 		kfree(event_cb);
 	}

-	WARN(!list_empty(&kctx->csf.event.error_list),
-	     "Error list not empty for ctx %d_%d\n", kctx->tgid, kctx->id);
+	WARN(!list_empty(&kctx->csf.event.error_list), "Error list not empty for ctx %d_%d\n",
+	     kctx->tgid, kctx->id);

 	spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
 }
@@ -182,8 +175,7 @@ void kbase_csf_event_init(struct kbase_context *const kctx)
 	spin_lock_init(&kctx->csf.event.lock);
 }

-void kbase_csf_event_remove_error(struct kbase_context *kctx,
-				  struct kbase_csf_notification *error)
+void kbase_csf_event_remove_error(struct kbase_context *kctx, struct kbase_csf_notification *error)
 {
 	unsigned long flags;

@@ -201,19 +193,19 @@ bool kbase_csf_event_read_error(struct kbase_context *kctx,
 	spin_lock_irqsave(&kctx->csf.event.lock, flags);
 	if (likely(!list_empty(&kctx->csf.event.error_list))) {
 		error_data = list_first_entry(&kctx->csf.event.error_list,
-			struct kbase_csf_notification, link);
+					      struct kbase_csf_notification, link);
 		list_del_init(&error_data->link);
 		*event_data = error_data->data;
-		dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n",
-			(void *)error_data, (void *)kctx);
+		dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n", (void *)error_data,
+			(void *)kctx);
 	}
 	spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
 	return !!error_data;
 }

 void kbase_csf_event_add_error(struct kbase_context *const kctx,
-			struct kbase_csf_notification *const error,
-			struct base_csf_notification const *const data)
+			       struct kbase_csf_notification *const error,
+			       struct base_csf_notification const *const data)
 {
 	unsigned long flags;

@@ -230,8 +222,7 @@ void kbase_csf_event_add_error(struct kbase_context *const kctx,
 	if (list_empty(&error->link)) {
 		error->data = *data;
 		list_add_tail(&error->link, &kctx->csf.event.error_list);
-		dev_dbg(kctx->kbdev->dev,
-			"Added error %pK of type %d in context %pK\n",
+		dev_dbg(kctx->kbdev->dev, "Added error %pK of type %d in context %pK\n",
 			(void *)error, data->type, (void *)kctx);
 	} else {
 		dev_dbg(kctx->kbdev->dev, "Error %pK of type %d already pending in context %pK",
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -56,8 +56,8 @@ typedef enum kbase_csf_event_callback_action kbase_csf_event_callback(void *para
 *
 * Return: 0 on success, or negative on failure.
 */
-int kbase_csf_event_wait_add(struct kbase_context *kctx,
-		kbase_csf_event_callback *callback, void *param);
+int kbase_csf_event_wait_add(struct kbase_context *kctx, kbase_csf_event_callback *callback,
+			     void *param);

 /**
 * kbase_csf_event_wait_remove - Remove a CSF event callback
@@ -70,8 +70,8 @@ int kbase_csf_event_wait_add(struct kbase_context *kctx,
 * This function removes an event callback from the list of CSF event callbacks
 * belonging to a given Kbase context.
 */
-void kbase_csf_event_wait_remove(struct kbase_context *kctx,
-		kbase_csf_event_callback *callback, void *param);
+void kbase_csf_event_wait_remove(struct kbase_context *kctx, kbase_csf_event_callback *callback,
+				 void *param);

 /**
 * kbase_csf_event_term - Removes all CSF event callbacks
@@ -148,8 +148,8 @@ bool kbase_csf_event_read_error(struct kbase_context *kctx,
 *
 */
 void kbase_csf_event_add_error(struct kbase_context *const kctx,
-			struct kbase_csf_notification *const error,
-			struct base_csf_notification const *const data);
+			       struct kbase_csf_notification *const error,
+			       struct base_csf_notification const *const data);

 /**
 * kbase_csf_event_remove_error - Remove an error from event error list
@@ -157,8 +157,7 @@ void kbase_csf_event_add_error(struct kbase_context *const kctx,
 * @kctx:  Address of a base context associated with a GPU address space.
 * @error: Address of the item to be removed from the context's event error list.
 */
-void kbase_csf_event_remove_error(struct kbase_context *kctx,
-				  struct kbase_csf_notification *error);
+void kbase_csf_event_remove_error(struct kbase_context *kctx, struct kbase_csf_notification *error);

 /**
 * kbase_csf_event_error_pending - Check the error pending status
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -24,6 +24,7 @@

 #include "device/mali_kbase_device.h"
 #include <csf/mali_kbase_csf_registers.h>
+#include <uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h>

 /*
 * PAGE_KERNEL_RO was only defined on 32bit ARM in 4.19 in:
@@ -56,7 +57,7 @@
 #define CSF_NUM_DOORBELL ((u8)24)

 /* Offset to the first HW doorbell page */
-#define CSF_HW_DOORBELL_PAGE_OFFSET ((u32)0x80000)
+#define CSF_HW_DOORBELL_PAGE_OFFSET ((u32)DOORBELLS_BASE)

 /* Size of HW Doorbell page, used to calculate the offset to subsequent pages */
 #define CSF_HW_DOORBELL_PAGE_SIZE ((u32)0x10000)
@@ -80,7 +81,6 @@

 struct kbase_device;

-
 /**
 * struct kbase_csf_mapping - Memory mapping for CSF memory.
 * @phys:      Physical memory allocation used by the mapping.
@@ -134,8 +134,8 @@ struct kbase_csf_cmd_stream_info {
 * @offset: Offset of the word to be written, in bytes.
 * @value: Value to be written.
 */
-void kbase_csf_firmware_cs_input(
-	const struct kbase_csf_cmd_stream_info *info, u32 offset, u32 value);
+void kbase_csf_firmware_cs_input(const struct kbase_csf_cmd_stream_info *info, u32 offset,
+				 u32 value);

 /**
 * kbase_csf_firmware_cs_input_read() - Read a word in a CS's input page
@@ -145,8 +145,8 @@ void kbase_csf_firmware_cs_input(
 * @info: CSI provided by the firmware.
 * @offset: Offset of the word to be read, in bytes.
 */
-u32 kbase_csf_firmware_cs_input_read(
-	const struct kbase_csf_cmd_stream_info *const info, const u32 offset);
+u32 kbase_csf_firmware_cs_input_read(const struct kbase_csf_cmd_stream_info *const info,
+				     const u32 offset);

 /**
 * kbase_csf_firmware_cs_input_mask() - Set part of a word in a CS's input page
@@ -156,9 +156,8 @@ u32 kbase_csf_firmware_cs_input_read(
 * @value: Value to be written.
 * @mask: Bitmask with the bits to be modified set.
 */
-void kbase_csf_firmware_cs_input_mask(
-	const struct kbase_csf_cmd_stream_info *info, u32 offset,
-	u32 value, u32 mask);
+void kbase_csf_firmware_cs_input_mask(const struct kbase_csf_cmd_stream_info *info, u32 offset,
+				      u32 value, u32 mask);

 /**
 * kbase_csf_firmware_cs_output() - Read a word in a CS's output page
@@ -168,8 +167,7 @@ void kbase_csf_firmware_cs_input_mask(
 * @info: CSI provided by the firmware.
 * @offset: Offset of the word to be read, in bytes.
 */
-u32 kbase_csf_firmware_cs_output(
-	const struct kbase_csf_cmd_stream_info *info, u32 offset);
+u32 kbase_csf_firmware_cs_output(const struct kbase_csf_cmd_stream_info *info, u32 offset);
 /**
 * struct kbase_csf_cmd_stream_group_info - CSG interface provided by the
 *                                          firmware.
@@ -207,9 +205,8 @@ struct kbase_csf_cmd_stream_group_info {
 * @offset: Offset of the word to be written, in bytes.
 * @value: Value to be written.
 */
-void kbase_csf_firmware_csg_input(
-	const struct kbase_csf_cmd_stream_group_info *info, u32 offset,
-	u32 value);
+void kbase_csf_firmware_csg_input(const struct kbase_csf_cmd_stream_group_info *info, u32 offset,
+				  u32 value);

 /**
 * kbase_csf_firmware_csg_input_read() - Read a word in a CSG's input page
@@ -219,8 +216,8 @@ void kbase_csf_firmware_csg_input(
 * @info: CSG interface provided by the firmware.
 * @offset: Offset of the word to be read, in bytes.
 */
-u32 kbase_csf_firmware_csg_input_read(
-	const struct kbase_csf_cmd_stream_group_info *info, u32 offset);
+u32 kbase_csf_firmware_csg_input_read(const struct kbase_csf_cmd_stream_group_info *info,
+				      u32 offset);

 /**
 * kbase_csf_firmware_csg_input_mask() - Set part of a word in a CSG's
@@ -231,9 +228,8 @@ u32 kbase_csf_firmware_csg_input_read(
 * @value: Value to be written.
 * @mask: Bitmask with the bits to be modified set.
 */
-void kbase_csf_firmware_csg_input_mask(
-	const struct kbase_csf_cmd_stream_group_info *info, u32 offset,
-	u32 value, u32 mask);
+void kbase_csf_firmware_csg_input_mask(const struct kbase_csf_cmd_stream_group_info *info,
+				       u32 offset, u32 value, u32 mask);

 /**
 * kbase_csf_firmware_csg_output()- Read a word in a CSG's output page
@@ -243,8 +239,8 @@ void kbase_csf_firmware_csg_input_mask(
 * @info: CSG interface provided by the firmware.
 * @offset: Offset of the word to be read, in bytes.
 */
-u32 kbase_csf_firmware_csg_output(
-	const struct kbase_csf_cmd_stream_group_info *info, u32 offset);
+u32 kbase_csf_firmware_csg_output(const struct kbase_csf_cmd_stream_group_info *info, u32 offset);
+

 /**
 * struct kbase_csf_global_iface - Global CSF interface
@@ -286,8 +282,8 @@ struct kbase_csf_global_iface {
 * @offset: Offset of the word to be written, in bytes.
 * @value: Value to be written.
 */
-void kbase_csf_firmware_global_input(
-	const struct kbase_csf_global_iface *iface, u32 offset, u32 value);
+void kbase_csf_firmware_global_input(const struct kbase_csf_global_iface *iface, u32 offset,
+				     u32 value);

 /**
 * kbase_csf_firmware_global_input_mask() - Set part of a word in the global
@@ -298,9 +294,8 @@ void kbase_csf_firmware_global_input(
 * @value: Value to be written.
 * @mask: Bitmask with the bits to be modified set.
 */
-void kbase_csf_firmware_global_input_mask(
-	const struct kbase_csf_global_iface *iface, u32 offset,
-	u32 value, u32 mask);
+void kbase_csf_firmware_global_input_mask(const struct kbase_csf_global_iface *iface, u32 offset,
+					  u32 value, u32 mask);

 /**
 * kbase_csf_firmware_global_input_read() - Read a word in a global input page
@@ -310,8 +305,7 @@ void kbase_csf_firmware_global_input_mask(
 * @info: CSG interface provided by the firmware.
 * @offset: Offset of the word to be read, in bytes.
 */
-u32 kbase_csf_firmware_global_input_read(
-	const struct kbase_csf_global_iface *info, u32 offset);
+u32 kbase_csf_firmware_global_input_read(const struct kbase_csf_global_iface *info, u32 offset);

 /**
 * kbase_csf_firmware_global_output() - Read a word in the global output page
@@ -321,8 +315,7 @@ u32 kbase_csf_firmware_global_input_read(
 * @iface: CSF interface provided by the firmware.
 * @offset: Offset of the word to be read, in bytes.
 */
-u32 kbase_csf_firmware_global_output(
-	const struct kbase_csf_global_iface *iface, u32 offset);
+u32 kbase_csf_firmware_global_output(const struct kbase_csf_global_iface *iface, u32 offset);

 /**
 * kbase_csf_ring_doorbell() - Ring the doorbell
@@ -344,8 +337,7 @@ void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr);
 * is not permanently mapped on the CPU address space, therefore it maps it
 * and then unmaps it to access it independently.
 */
-void kbase_csf_read_firmware_memory(struct kbase_device *kbdev,
-	u32 gpu_addr, u32 *value);
+void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, u32 gpu_addr, u32 *value);

 /**
 * kbase_csf_update_firmware_memory - Write a value in a GPU address
@@ -359,8 +351,7 @@ void kbase_csf_read_firmware_memory(struct kbase_device *kbdev,
 * is not permanently mapped on the CPU address space, therefore it maps it
 * and then unmaps it to access it independently.
 */
-void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
-	u32 gpu_addr, u32 value);
+void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, u32 gpu_addr, u32 value);

 /**
 * kbase_csf_read_firmware_memory_exe - Read a value in a GPU address in the
@@ -378,8 +369,7 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
 * their final execution location during firmware boot using an address based on the
 * final execution location.
 */
-void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev,
-	u32 gpu_addr, u32 *value);
+void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev, u32 gpu_addr, u32 *value);

 /**
 * kbase_csf_update_firmware_memory_exe - Write a value in a GPU address in the
@@ -397,8 +387,7 @@ void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev,
 * their final execution location during firmware boot using an address based on the
 * final execution location.
 */
-void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev,
-	u32 gpu_addr, u32 value);
+void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev, u32 gpu_addr, u32 value);

 /**
 * kbase_csf_firmware_early_init() - Early initialization for the firmware.
@@ -560,11 +549,22 @@ static inline bool kbase_csf_firmware_mcu_halted(struct kbase_device *kbdev)
 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
 	return true;
 #else
-	return (kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)) ==
-		MCU_STATUS_HALTED);
+	return (kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(MCU_STATUS)) == MCU_STATUS_VALUE_HALT);
 #endif /* CONFIG_MALI_BIFROST_NO_MALI */
 }

+/**
+ * kbase_csf_firmware_mcu_halt_req_complete - Check if the MCU Halt request is complete
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function needs to be called after Halt request has been sent to the FW.
+ *
+ * Return: true if the Halt request is complete, otherwise false.
+ */
+bool kbase_csf_firmware_mcu_halt_req_complete(struct kbase_device *kbdev);
+
+
 /**
 * kbase_csf_firmware_trigger_mcu_halt - Send the Global request to firmware to
 *                                       halt its operation and bring itself
@@ -641,8 +641,7 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev);
 * @kbdev: Instance of a GPU platform device that implements a CSF interface.
 * @core_mask: Mask of the enabled shader cores.
 */
-void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev,
-				      u64 core_mask);
+void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, u64 core_mask);

 /**
 * kbase_csf_firmware_global_reinit_complete - Check the Global configuration
@@ -668,8 +667,8 @@ bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev);
 * @core_mask:                New core mask value if update_core_mask is true,
 *                            otherwise unused.
 */
-void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev,
-		bool update_core_pwroff_timer, bool update_core_mask, u64 core_mask);
+void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, bool update_core_pwroff_timer,
+					 bool update_core_mask, u64 core_mask);

 /**
 * kbase_csf_firmware_core_attr_updated - Check the Global configuration
@@ -711,11 +710,11 @@ bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev);
 *
 * Return: Total number of CSs, summed across all groups.
 */
-u32 kbase_csf_firmware_get_glb_iface(
-	struct kbase_device *kbdev, struct basep_cs_group_control *group_data,
-	u32 max_group_num, struct basep_cs_stream_control *stream_data,
-	u32 max_total_stream_num, u32 *glb_version, u32 *features,
-	u32 *group_num, u32 *prfcnt_size, u32 *instr_features);
+u32 kbase_csf_firmware_get_glb_iface(struct kbase_device *kbdev,
+				     struct basep_cs_group_control *group_data, u32 max_group_num,
+				     struct basep_cs_stream_control *stream_data,
+				     u32 max_total_stream_num, u32 *glb_version, u32 *features,
+				     u32 *group_num, u32 *prfcnt_size, u32 *instr_features);

 /**
 * kbase_csf_firmware_get_timeline_metadata - Get CSF firmware header timeline
@@ -727,8 +726,8 @@ u32 kbase_csf_firmware_get_glb_iface(
 *
 * Return: The firmware timeline metadata content which match @p name.
 */
-const char *kbase_csf_firmware_get_timeline_metadata(struct kbase_device *kbdev,
-	const char *name, size_t *size);
+const char *kbase_csf_firmware_get_timeline_metadata(struct kbase_device *kbdev, const char *name,
+						     size_t *size);

 /**
 * kbase_csf_firmware_mcu_shared_mapping_init - Allocate and map MCU shared memory.
@@ -750,12 +749,10 @@ const char *kbase_csf_firmware_get_timeline_metadata(struct kbase_device *kbdev,
 *
 * Return: 0 if success, or an error code on failure.
 */
-int kbase_csf_firmware_mcu_shared_mapping_init(
-		struct kbase_device *kbdev,
-		unsigned int num_pages,
-		unsigned long cpu_map_properties,
-		unsigned long gpu_map_properties,
-		struct kbase_csf_mapping *csf_mapping);
+int kbase_csf_firmware_mcu_shared_mapping_init(struct kbase_device *kbdev, unsigned int num_pages,
+					       unsigned long cpu_map_properties,
+					       unsigned long gpu_map_properties,
+					       struct kbase_csf_mapping *csf_mapping);

 /**
 * kbase_csf_firmware_mcu_shared_mapping_term - Unmap and free MCU shared memory.
@@ -763,8 +760,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 * @kbdev:       Device pointer.
 * @csf_mapping: Metadata of the memory mapping to terminate.
 */
-void kbase_csf_firmware_mcu_shared_mapping_term(
-		struct kbase_device *kbdev, struct kbase_csf_mapping *csf_mapping);
+void kbase_csf_firmware_mcu_shared_mapping_term(struct kbase_device *kbdev,
+						struct kbase_csf_mapping *csf_mapping);

 #ifdef CONFIG_MALI_BIFROST_DEBUG 
 extern bool fw_debug;
@@ -869,6 +866,22 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev);
 */
 u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur);

+/**
+ * kbase_csf_firmware_reset_mcu_core_pwroff_time - Reset the MCU shader Core power-off
+ *                                               time value
+ *
+ * @kbdev:   Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Sets the MCU Shader Core power-off time value to the default.
+ *
+ * The configured MCU shader Core power-off timer will only have effect when the host
+ * driver has delegated the shader cores' power management to MCU.
+ *
+ * Return: the actual internal core power-off timer value in register defined
+ *         format.
+ */
+u32 kbase_csf_firmware_reset_mcu_core_pwroff_time(struct kbase_device *kbdev);
+
 /**
 * kbase_csf_interface_version - Helper function to build the full firmware
 *                               interface version in a format compatible with
@@ -882,8 +895,7 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32
 */
 static inline u32 kbase_csf_interface_version(u32 major, u32 minor, u32 patch)
 {
-	return ((major << GLB_VERSION_MAJOR_SHIFT) |
-		(minor << GLB_VERSION_MINOR_SHIFT) |
+	return ((major << GLB_VERSION_MAJOR_SHIFT) | (minor << GLB_VERSION_MINOR_SHIFT) |
 		(patch << GLB_VERSION_PATCH_SHIFT));
 }

--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -31,6 +31,8 @@

 #define CSF_FIRMWARE_CFG_LOG_VERBOSITY_ENTRY_NAME "Log verbosity"

+#define CSF_FIRMWARE_CFG_WA_CFG0_ENTRY_NAME "WA_CFG0"
+
 /**
 * struct firmware_config - Configuration item within the MCU firmware
 *
@@ -66,10 +68,10 @@ struct firmware_config {
 	u32 cur_val;
 };

-#define FW_CFG_ATTR(_name, _mode)					\
-	struct attribute fw_cfg_attr_##_name = {			\
-			.name = __stringify(_name),			\
-			.mode = VERIFY_OCTAL_PERMISSIONS(_mode),	\
+#define FW_CFG_ATTR(_name, _mode)                        \
+	struct attribute fw_cfg_attr_##_name = {         \
+		.name = __stringify(_name),              \
+		.mode = VERIFY_OCTAL_PERMISSIONS(_mode), \
 	}

 static FW_CFG_ATTR(min, 0444);
@@ -78,17 +80,14 @@ static FW_CFG_ATTR(cur, 0644);

 static void fw_cfg_kobj_release(struct kobject *kobj)
 {
-	struct firmware_config *config =
-		container_of(kobj, struct firmware_config, kobj);
+	struct firmware_config *config = container_of(kobj, struct firmware_config, kobj);

 	kfree(config);
 }

-static ssize_t show_fw_cfg(struct kobject *kobj,
-	struct attribute *attr, char *buf)
+static ssize_t show_fw_cfg(struct kobject *kobj, struct attribute *attr, char *buf)
 {
-	struct firmware_config *config =
-		container_of(kobj, struct firmware_config, kobj);
+	struct firmware_config *config = container_of(kobj, struct firmware_config, kobj);
 	struct kbase_device *kbdev = config->kbdev;
 	u32 val = 0;

@@ -106,22 +105,17 @@ static ssize_t show_fw_cfg(struct kobject *kobj,
 		val = config->cur_val;
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	} else {
-		dev_warn(kbdev->dev,
-			"Unexpected read from entry %s/%s",
-			config->name, attr->name);
+		dev_warn(kbdev->dev, "Unexpected read from entry %s/%s", config->name, attr->name);
 		return -EINVAL;
 	}

-	return snprintf(buf, PAGE_SIZE, "%u\n", val);
+	return scnprintf(buf, PAGE_SIZE, "%u\n", val);
 }

-static ssize_t store_fw_cfg(struct kobject *kobj,
-	struct attribute *attr,
-	const char *buf,
-	size_t count)
+static ssize_t store_fw_cfg(struct kobject *kobj, struct attribute *attr, const char *buf,
+			    size_t count)
 {
-	struct firmware_config *config =
-		container_of(kobj, struct firmware_config, kobj);
+	struct firmware_config *config = container_of(kobj, struct firmware_config, kobj);
 	struct kbase_device *kbdev = config->kbdev;

 	if (!kbdev)
@@ -140,6 +134,9 @@ static ssize_t store_fw_cfg(struct kobject *kobj,
 			return -EINVAL;
 		}

+		if (!strcmp(config->name, CSF_FIRMWARE_CFG_WA_CFG0_ENTRY_NAME))
+			return -EPERM;
+
 		if ((val < config->min) || (val > config->max))
 			return -EINVAL;

@@ -161,8 +158,7 @@ static ssize_t store_fw_cfg(struct kobject *kobj,
 			 * the User to retry the write.
 			 */
 			if (kbase_reset_gpu_silent(kbdev)) {
-				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
-						       flags);
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 				return -EAGAIN;
 			}
 		}
@@ -176,8 +172,7 @@ static ssize_t store_fw_cfg(struct kobject *kobj,
 		 * in the RONLY section of firmware image, which is not
 		 * reloaded on firmware reboot due to GPU reset.
 		 */
-		kbase_csf_update_firmware_memory(
-			kbdev, config->address, val);
+		kbase_csf_update_firmware_memory(kbdev, config->address, val);

 		config->cur_val = val;

@@ -210,9 +205,7 @@ static ssize_t store_fw_cfg(struct kobject *kobj,
 		if (!config->updatable)
 			kbase_reset_gpu_wait(kbdev);
 	} else {
-		dev_warn(kbdev->dev,
-			"Unexpected write to entry %s/%s",
-			config->name, attr->name);
+		dev_warn(kbdev->dev, "Unexpected write to entry %s/%s", config->name, attr->name);
 		return -EINVAL;
 	}

@@ -248,12 +241,11 @@ int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev)
 {
 	struct firmware_config *config;

-	kbdev->csf.fw_cfg_kobj = kobject_create_and_add(
-		CSF_FIRMWARE_CFG_SYSFS_DIR_NAME, &kbdev->dev->kobj);
+	kbdev->csf.fw_cfg_kobj =
+		kobject_create_and_add(CSF_FIRMWARE_CFG_SYSFS_DIR_NAME, &kbdev->dev->kobj);
 	if (!kbdev->csf.fw_cfg_kobj) {
 		kobject_put(kbdev->csf.fw_cfg_kobj);
-		dev_err(kbdev->dev,
-			"Creation of %s sysfs sub-directory failed\n",
+		dev_err(kbdev->dev, "Creation of %s sysfs sub-directory failed\n",
 			CSF_FIRMWARE_CFG_SYSFS_DIR_NAME);
 		return -ENOMEM;
 	}
@@ -261,15 +253,25 @@ int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev)
 	list_for_each_entry(config, &kbdev->csf.firmware_config, node) {
 		int err;

-		kbase_csf_read_firmware_memory(kbdev, config->address,
-			&config->cur_val);
+		kbase_csf_read_firmware_memory(kbdev, config->address, &config->cur_val);

-		err = kobject_init_and_add(&config->kobj, &fw_cfg_kobj_type,
-				kbdev->csf.fw_cfg_kobj, "%s", config->name);
+		if (!strcmp(config->name, CSF_FIRMWARE_CFG_LOG_VERBOSITY_ENTRY_NAME) &&
+		    (config->cur_val)) {
+			err = kbase_csf_firmware_log_toggle_logging_calls(config->kbdev,
+									  config->cur_val);
+
+			if (err) {
+				kobject_put(&config->kobj);
+				dev_err(kbdev->dev, "Failed to enable logging (result: %d)", err);
+				return err;
+			}
+		}
+
+		err = kobject_init_and_add(&config->kobj, &fw_cfg_kobj_type, kbdev->csf.fw_cfg_kobj,
+					   "%s", config->name);
 		if (err) {
 			kobject_put(&config->kobj);
-			dev_err(kbdev->dev,
-				"Creation of %s sysfs sub-directory failed\n",
+			dev_err(kbdev->dev, "Creation of %s sysfs sub-directory failed\n",
 				config->name);
 			return err;
 		}
@@ -285,8 +287,8 @@ void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev)
 	while (!list_empty(&kbdev->csf.firmware_config)) {
 		struct firmware_config *config;

-		config = list_first_entry(&kbdev->csf.firmware_config,
-				struct firmware_config, node);
+		config =
+			list_first_entry(&kbdev->csf.firmware_config, struct firmware_config, node);
 		list_del(&config->node);

 		if (config->kobj_inited) {
@@ -307,6 +309,7 @@ int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev,
 	const char *name = (char *)&entry[3];
 	struct firmware_config *config;
 	const unsigned int name_len = size - CONFIGURATION_ENTRY_NAME_OFFSET;
+	CSTD_UNUSED(fw);

 	/* Allocate enough space for struct firmware_config and the
 	 * configuration option name (with NULL termination)
@@ -318,7 +321,7 @@ int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev,

 	config->kbdev = kbdev;
 	config->updatable = updatable;
-	config->name = (char *)(config+1);
+	config->name = (char *)(config + 1);
 	config->address = entry[0];
 	config->min = entry[1];
 	config->max = entry[2];
@@ -328,12 +331,80 @@ int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev,

 	list_add(&config->node, &kbdev->csf.firmware_config);

-	dev_dbg(kbdev->dev, "Configuration option '%s' at 0x%x range %u-%u",
-			config->name, config->address,
-			config->min, config->max);
+	dev_dbg(kbdev->dev, "Configuration option '%s' at 0x%x range %u-%u", config->name,
+		config->address, config->min, config->max);

 	return 0;
 }
+
+int kbase_csf_firmware_cfg_fw_wa_enable(struct kbase_device *kbdev)
+{
+	struct firmware_config *config;
+
+	/* "quirks_ext" property is optional */
+	if (!kbdev->csf.quirks_ext)
+		return 0;
+
+	list_for_each_entry(config, &kbdev->csf.firmware_config, node) {
+		if (strcmp(config->name, CSF_FIRMWARE_CFG_WA_CFG0_ENTRY_NAME))
+			continue;
+		dev_info(kbdev->dev, "External quirks 0: 0x%08x", kbdev->csf.quirks_ext[0]);
+		kbase_csf_update_firmware_memory(kbdev, config->address, kbdev->csf.quirks_ext[0]);
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev)
+{
+	int ret;
+	int entry_count;
+	size_t entry_bytes;
+
+	/* "quirks-ext" property is optional and may have no value.
+	 * Also try fallback "quirks_ext" property if it doesn't exist.
+	 */
+	entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks-ext");
+
+	if (entry_count == -EINVAL)
+		entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks_ext");
+
+	if (entry_count == -EINVAL || entry_count == -ENODATA)
+		return 0;
+
+	entry_bytes = entry_count * sizeof(u32);
+	kbdev->csf.quirks_ext = kzalloc(entry_bytes, GFP_KERNEL);
+	if (!kbdev->csf.quirks_ext)
+		return -ENOMEM;
+
+	ret = of_property_read_u32_array(kbdev->dev->of_node, "quirks-ext", kbdev->csf.quirks_ext,
+					 entry_count);
+
+	if (ret == -EINVAL)
+		ret = of_property_read_u32_array(kbdev->dev->of_node, "quirks_ext",
+						 kbdev->csf.quirks_ext, entry_count);
+
+	if (ret == -EINVAL || ret == -ENODATA) {
+		/* This is unexpected since the property is already accessed for counting the number
+		 * of its elements.
+		 */
+		dev_err(kbdev->dev, "\"quirks_ext\" DTB property data read failed");
+		return ret;
+	}
+	if (ret == -EOVERFLOW) {
+		dev_err(kbdev->dev, "\"quirks_ext\" DTB property data size exceeds 32 bits");
+		return ret;
+	}
+
+	return kbase_csf_firmware_cfg_fw_wa_enable(kbdev);
+}
+
+void kbase_csf_firmware_cfg_fw_wa_term(struct kbase_device *kbdev)
+{
+	kfree(kbdev->csf.quirks_ext);
+}
+
 #else
 int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev)
 {
@@ -351,4 +422,15 @@ int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev,
 {
 	return 0;
 }
+
+int kbase_csf_firmware_cfg_fw_wa_enable(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
+int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
 #endif /* CONFIG_SYSFS */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -69,4 +69,38 @@ void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev);
 int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev,
 					      const struct kbase_csf_mcu_fw *const fw,
 					      const u32 *entry, unsigned int size, bool updatable);
+
+/**
+ * kbase_csf_firmware_cfg_fw_wa_enable() - Enable firmware workarounds configuration.
+ *
+ * @kbdev:     Kbase device structure
+ *
+ * Look for the config entry that enables support in FW for workarounds and set it according to
+ * the firmware workaround configuration before the initial boot or reload of firmware.
+ *
+ * Return: 0 if successful, negative error code on failure
+ */
+int kbase_csf_firmware_cfg_fw_wa_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_cfg_fw_wa_init() - Initialize firmware workarounds configuration.
+ *
+ * @kbdev:     Kbase device structure
+ *
+ * Retrieve and save the firmware workarounds configuration from device-tree "quirks_ext" property.
+ * Then, look for the config entry that enables support in FW for workarounds and set it according
+ * to the configuration before the initial firmware boot.
+ *
+ * Return: 0 if successful, negative error code on failure
+ */
+int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_cfg_fw_wa_term - Delete local cache for firmware workarounds configuration.
+ *
+ * @kbdev: Pointer to the Kbase device
+ *
+ */
+void kbase_csf_firmware_cfg_fw_wa_term(struct kbase_device *kbdev);
+
 #endif /* _KBASE_CSF_FIRMWARE_CFG_H_ */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -25,6 +25,7 @@
 #include <linux/file.h>
 #include <linux/elf.h>
 #include <linux/elfcore.h>
+#include <linux/version_compat_defs.h>

 #include "mali_kbase.h"
 #include "mali_kbase_csf_firmware_core_dump.h"
@@ -41,7 +42,7 @@
 #define FW_CORE_DUMP_DATA_VERSION_MINOR 1

 /* Full version of the image header core dump data format */
-#define FW_CORE_DUMP_DATA_VERSION                                                                  \
+#define FW_CORE_DUMP_DATA_VERSION \
 	((FW_CORE_DUMP_DATA_VERSION_MAJOR << 8) | FW_CORE_DUMP_DATA_VERSION_MINOR)

 /* Validity flag to indicate if the MCU registers in the buffer are valid */
@@ -90,20 +91,20 @@ struct prstatus32_timeval {
 *   use u32[18] instead of elf_gregset32_t to prevent introducing new typedefs.
 */
 struct elf_prstatus32 {
-	struct elf_siginfo pr_info;		/* Info associated with signal. */
-	short int pr_cursig;			/* Current signal. */
-	unsigned int pr_sigpend;		/* Set of pending signals. */
-	unsigned int pr_sighold;		/* Set of held signals. */
+	struct elf_siginfo pr_info; /* Info associated with signal. */
+	short int pr_cursig; /* Current signal. */
+	unsigned int pr_sigpend; /* Set of pending signals. */
+	unsigned int pr_sighold; /* Set of held signals. */
 	pid_t pr_pid;
 	pid_t pr_ppid;
 	pid_t pr_pgrp;
 	pid_t pr_sid;
-	struct prstatus32_timeval pr_utime;	/* User time. */
-	struct prstatus32_timeval pr_stime;	/* System time. */
-	struct prstatus32_timeval pr_cutime;	/* Cumulative user time. */
-	struct prstatus32_timeval pr_cstime;	/* Cumulative system time. */
-	u32 pr_reg[18];				/* GP registers. */
-	int pr_fpvalid;				/* True if math copro being used. */
+	struct prstatus32_timeval pr_utime; /* User time. */
+	struct prstatus32_timeval pr_stime; /* System time. */
+	struct prstatus32_timeval pr_cutime; /* Cumulative user time. */
+	struct prstatus32_timeval pr_cstime; /* Cumulative system time. */
+	u32 pr_reg[18]; /* GP registers. */
+	int pr_fpvalid; /* True if math copro being used. */
 };

 /**
@@ -505,7 +506,7 @@ static int fw_core_dump_create(struct kbase_device *kbdev)

 	/* Ensure MCU is active before requesting the core dump. */
 	kbase_csf_scheduler_pm_active(kbdev);
-	err = kbase_csf_scheduler_wait_mcu_active(kbdev);
+	err = kbase_csf_scheduler_killable_wait_mcu_active(kbdev);
 	if (!err)
 		err = kbase_csf_firmware_req_core_dump(kbdev);

@@ -576,6 +577,7 @@ static void *fw_core_dump_seq_start(struct seq_file *m, loff_t *_pos)
 */
 static void fw_core_dump_seq_stop(struct seq_file *m, void *v)
 {
+	CSTD_UNUSED(m);
 	kfree(v);
 }

@@ -664,9 +666,9 @@ static int fw_core_dump_seq_show(struct seq_file *m, void *v)

 	/* Write the current page. */
 	page = as_page(data->interface->phys[data->page_num]);
-	p = kmap_atomic(page);
+	p = kbase_kmap_atomic(page);
 	seq_write(m, p, FW_PAGE_SIZE);
-	kunmap_atomic(p);
+	kbase_kunmap_atomic(p);

 	return 0;
 }
@@ -746,15 +748,16 @@ open_fail:
 static ssize_t fw_core_dump_debugfs_write(struct file *file, const char __user *ubuf, size_t count,
 					  loff_t *ppos)
 {
-	int err;
+	ssize_t err;
 	struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private;
 	struct kbase_device *const kbdev = dump_data->kbdev;

+	CSTD_UNUSED(ubuf);
 	CSTD_UNUSED(ppos);

 	err = fw_core_dump_create(kbdev);

-	return err ? err : count;
+	return err ? err : (ssize_t)count;
 }

 /**
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -30,24 +30,24 @@
 /*
 * ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address.
 */
-#define ARMV7_T1_BL_IMM_INSTR		0xd800f000
+#define ARMV7_T1_BL_IMM_INSTR 0xd800f000

 /*
 * ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address, maximum
 * negative jump offset.
 */
-#define ARMV7_T1_BL_IMM_RANGE_MIN	-16777216
+#define ARMV7_T1_BL_IMM_RANGE_MIN -16777216

 /*
 * ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address, maximum
 * positive jump offset.
 */
-#define ARMV7_T1_BL_IMM_RANGE_MAX	16777214
+#define ARMV7_T1_BL_IMM_RANGE_MAX 16777214

 /*
 * ARMv7 instruction: Double NOP instructions.
 */
-#define ARMV7_DOUBLE_NOP_INSTR		0xbf00bf00
+#define ARMV7_DOUBLE_NOP_INSTR 0xbf00bf00

 #if defined(CONFIG_DEBUG_FS)

@@ -55,7 +55,7 @@ static int kbase_csf_firmware_log_enable_mask_read(void *data, u64 *val)
 {
 	struct kbase_device *kbdev = (struct kbase_device *)data;
 	struct firmware_trace_buffer *tb =
-		kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
+		kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME);

 	if (tb == NULL) {
 		dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
@@ -70,7 +70,7 @@ static int kbase_csf_firmware_log_enable_mask_write(void *data, u64 val)
 {
 	struct kbase_device *kbdev = (struct kbase_device *)data;
 	struct firmware_trace_buffer *tb =
-		kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
+		kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME);
 	u64 new_mask;
 	unsigned int enable_bits_count;

@@ -115,7 +115,7 @@ static ssize_t kbasep_csf_firmware_log_debugfs_read(struct file *file, char __us
 	int ret;

 	struct firmware_trace_buffer *tb =
-		kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
+		kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME);

 	if (tb == NULL) {
 		dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
@@ -125,8 +125,9 @@ static ssize_t kbasep_csf_firmware_log_debugfs_read(struct file *file, char __us
 	if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0)
 		return -EBUSY;

-	/* Reading from userspace is only allowed in manual mode */
-	if (fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL) {
+	/* Reading from userspace is only allowed in manual mode or auto-discard mode */
+	if (fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL &&
+	    fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -176,8 +177,9 @@ static int kbase_csf_firmware_log_mode_write(void *data, u64 val)
 		cancel_delayed_work_sync(&fw_log->poll_work);
 		break;
 	case KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT:
+	case KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD:
 		schedule_delayed_work(&fw_log->poll_work,
-				      msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS));
+				      msecs_to_jiffies(atomic_read(&fw_log->poll_period_ms)));
 		break;
 	default:
 		ret = -EINVAL;
@@ -191,6 +193,24 @@ out:
 	return ret;
 }

+static int kbase_csf_firmware_log_poll_period_read(void *data, u64 *val)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)data;
+	struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+
+	*val = atomic_read(&fw_log->poll_period_ms);
+	return 0;
+}
+
+static int kbase_csf_firmware_log_poll_period_write(void *data, u64 val)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)data;
+	struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+
+	atomic_set(&fw_log->poll_period_ms, val);
+	return 0;
+}
+
 DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_enable_mask_fops,
 			 kbase_csf_firmware_log_enable_mask_read,
 			 kbase_csf_firmware_log_enable_mask_write, "%llx\n");
@@ -204,48 +224,124 @@ static const struct file_operations kbasep_csf_firmware_log_debugfs_fops = {

 DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_mode_fops, kbase_csf_firmware_log_mode_read,
 			 kbase_csf_firmware_log_mode_write, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_poll_period_fops,
+			 kbase_csf_firmware_log_poll_period_read,
+			 kbase_csf_firmware_log_poll_period_write, "%llu\n");

 #endif /* CONFIG_DEBUG_FS */

+static void kbase_csf_firmware_log_discard_buffer(struct kbase_device *kbdev)
+{
+	struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+	struct firmware_trace_buffer *tb =
+		kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME);
+
+	if (tb == NULL) {
+		dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware log discard skipped");
+		return;
+	}
+
+	if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0)
+		return;
+
+	kbase_csf_firmware_trace_buffer_discard(tb);
+
+	atomic_set(&fw_log->busy, 0);
+}
+
 static void kbase_csf_firmware_log_poll(struct work_struct *work)
 {
 	struct kbase_device *kbdev =
 		container_of(work, struct kbase_device, csf.fw_log.poll_work.work);
 	struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;

-	schedule_delayed_work(&fw_log->poll_work,
-			      msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS));
+	if (fw_log->mode == KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT)
+		kbase_csf_firmware_log_dump_buffer(kbdev);
+	else if (fw_log->mode == KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD)
+		kbase_csf_firmware_log_discard_buffer(kbdev);
+	else
+		return;

-	kbase_csf_firmware_log_dump_buffer(kbdev);
+	schedule_delayed_work(&fw_log->poll_work,
+			      msecs_to_jiffies(atomic_read(&fw_log->poll_period_ms)));
 }

 int kbase_csf_firmware_log_init(struct kbase_device *kbdev)
 {
 	struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+	int err = 0;
+#if defined(CONFIG_DEBUG_FS)
+	struct dentry *dentry;
+#endif /* CONFIG_DEBUG_FS */

 	/* Add one byte for null-termination */
 	fw_log->dump_buf = kmalloc(FIRMWARE_LOG_DUMP_BUF_SIZE + 1, GFP_KERNEL);
-	if (fw_log->dump_buf == NULL)
-		return -ENOMEM;
+	if (fw_log->dump_buf == NULL) {
+		err = -ENOMEM;
+		goto out;
+	}

 	/* Ensure null-termination for all strings */
 	fw_log->dump_buf[FIRMWARE_LOG_DUMP_BUF_SIZE] = 0;

+	/* Set default log polling period */
+	atomic_set(&fw_log->poll_period_ms, KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT);
+
+	INIT_DEFERRABLE_WORK(&fw_log->poll_work, kbase_csf_firmware_log_poll);
+#ifdef CONFIG_MALI_FW_TRACE_MODE_AUTO_DISCARD
+	fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD;
+	schedule_delayed_work(&fw_log->poll_work,
+			      msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT));
+#elif defined(CONFIG_MALI_FW_TRACE_MODE_AUTO_PRINT)
+	fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT;
+	schedule_delayed_work(&fw_log->poll_work,
+			      msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT));
+#else /* CONFIG_MALI_FW_TRACE_MODE_MANUAL */
 	fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL;
+#endif

 	atomic_set(&fw_log->busy, 0);
-	INIT_DEFERRABLE_WORK(&fw_log->poll_work, kbase_csf_firmware_log_poll);

-#if defined(CONFIG_DEBUG_FS)
-	debugfs_create_file("fw_trace_enable_mask", 0644, kbdev->mali_debugfs_directory, kbdev,
-			    &kbase_csf_firmware_log_enable_mask_fops);
-	debugfs_create_file("fw_traces", 0444, kbdev->mali_debugfs_directory, kbdev,
-			    &kbasep_csf_firmware_log_debugfs_fops);
-	debugfs_create_file("fw_trace_mode", 0644, kbdev->mali_debugfs_directory, kbdev,
-			    &kbase_csf_firmware_log_mode_fops);
-#endif /* CONFIG_DEBUG_FS */
+#if !defined(CONFIG_DEBUG_FS)
+	return 0;
+#else /* !CONFIG_DEBUG_FS */
+	dentry = debugfs_create_file("fw_trace_enable_mask", 0644, kbdev->mali_debugfs_directory,
+				     kbdev, &kbase_csf_firmware_log_enable_mask_fops);
+	if (IS_ERR_OR_NULL(dentry)) {
+		dev_err(kbdev->dev, "Unable to create fw_trace_enable_mask\n");
+		err = -ENOENT;
+		goto free_out;
+	}
+	dentry = debugfs_create_file("fw_traces", 0444, kbdev->mali_debugfs_directory, kbdev,
+				     &kbasep_csf_firmware_log_debugfs_fops);
+	if (IS_ERR_OR_NULL(dentry)) {
+		dev_err(kbdev->dev, "Unable to create fw_traces\n");
+		err = -ENOENT;
+		goto free_out;
+	}
+	dentry = debugfs_create_file("fw_trace_mode", 0644, kbdev->mali_debugfs_directory, kbdev,
+				     &kbase_csf_firmware_log_mode_fops);
+	if (IS_ERR_OR_NULL(dentry)) {
+		dev_err(kbdev->dev, "Unable to create fw_trace_mode\n");
+		err = -ENOENT;
+		goto free_out;
+	}
+	dentry = debugfs_create_file("fw_trace_poll_period_ms", 0644, kbdev->mali_debugfs_directory,
+				     kbdev, &kbase_csf_firmware_log_poll_period_fops);
+	if (IS_ERR_OR_NULL(dentry)) {
+		dev_err(kbdev->dev, "Unable to create fw_trace_poll_period_ms");
+		err = -ENOENT;
+		goto free_out;
+	}

 	return 0;
+
+free_out:
+	kfree(fw_log->dump_buf);
+	fw_log->dump_buf = NULL;
+#endif /* CONFIG_DEBUG_FS */
+out:
+	return err;
 }

 void kbase_csf_firmware_log_term(struct kbase_device *kbdev)
@@ -265,7 +361,7 @@ void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev)
 	u8 *buf = fw_log->dump_buf, *p, *pnewline, *pend, *pendbuf;
 	unsigned int read_size, remaining_size;
 	struct firmware_trace_buffer *tb =
-		kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
+		kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME);

 	if (tb == NULL) {
 		dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped");
@@ -349,14 +445,14 @@ static void toggle_logging_calls_in_loaded_image(struct kbase_device *kbdev, boo
 			kbase_csf_read_firmware_memory(kbdev, list_entry, &calling_address);
 			/* Read callee address */
 			kbase_csf_read_firmware_memory(kbdev, list_entry + sizeof(uint32_t),
-					&callee_address);
+						       &callee_address);

 			diff = callee_address - calling_address - 4;
 			sign = !!(diff & 0x80000000);
 			if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff ||
-					ARMV7_T1_BL_IMM_RANGE_MAX < (int32_t)diff) {
+			    ARMV7_T1_BL_IMM_RANGE_MAX < (int32_t)diff) {
 				dev_warn(kbdev->dev, "FW log patch 0x%x out of range, skipping",
-						calling_address);
+					 calling_address);
 				continue;
 			}

@@ -377,9 +473,9 @@ static void toggle_logging_calls_in_loaded_image(struct kbase_device *kbdev, boo

 			/* Patch logging func calls in their load location */
 			dev_dbg(kbdev->dev, "FW log patch 0x%x: 0x%x\n", calling_address,
-					bl_instruction);
+				bl_instruction);
 			kbase_csf_update_firmware_memory_exe(kbdev, calling_address,
-					bl_instruction);
+							     bl_instruction);
 		}
 	} else {
 		for (; list_entry < list_va_end; list_entry += 2 * sizeof(uint32_t)) {
@@ -388,7 +484,7 @@ static void toggle_logging_calls_in_loaded_image(struct kbase_device *kbdev, boo

 			/* Overwrite logging func calls with 2 NOP instructions */
 			kbase_csf_update_firmware_memory_exe(kbdev, calling_address,
-					ARMV7_DOUBLE_NOP_INSTR);
+							     ARMV7_DOUBLE_NOP_INSTR);
 		}
 	}
 }
@@ -418,17 +514,15 @@ int kbase_csf_firmware_log_toggle_logging_calls(struct kbase_device *kbdev, u32

 	/* Wait for the MCU to get disabled */
 	dev_info(kbdev->dev, "Wait for the MCU to get disabled");
-	ret = kbase_pm_wait_for_desired_state(kbdev);
+	ret = kbase_pm_killable_wait_for_desired_state(kbdev);
 	if (ret) {
-		dev_err(kbdev->dev,
-			"wait for PM state failed when toggling FW logging calls");
+		dev_err(kbdev->dev, "wait for PM state failed when toggling FW logging calls");
 		ret = -EAGAIN;
 		goto out;
 	}

 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	mcu_inactive =
-		kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state);
+	mcu_inactive = kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	if (!mcu_inactive) {
 		dev_err(kbdev->dev,
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c
@@ -43,23 +43,20 @@ static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc)

 	lockdep_assert_held(&ctx_alloc->lock);

-	heap_nr = find_first_zero_bit(ctx_alloc->in_use,
-		MAX_TILER_HEAPS);
+	heap_nr = find_first_zero_bit(ctx_alloc->in_use, MAX_TILER_HEAPS);

 	if (unlikely(heap_nr >= MAX_TILER_HEAPS)) {
-		dev_dbg(kctx->kbdev->dev,
-			"No free tiler heap contexts in the pool");
+		dev_dbg(kctx->kbdev->dev, "No free tiler heap contexts in the pool");
 		return 0;
 	}

 	ctx_offset = heap_nr * ctx_alloc->heap_context_size_aligned;
 	heap_gpu_va = ctx_alloc->gpu_va + ctx_offset;
-	ctx_ptr = kbase_vmap_prot(kctx, heap_gpu_va,
-		ctx_alloc->heap_context_size_aligned, KBASE_REG_CPU_WR, &mapping);
+	ctx_ptr = kbase_vmap_prot(kctx, heap_gpu_va, ctx_alloc->heap_context_size_aligned,
+				  KBASE_REG_CPU_WR, &mapping);

 	if (unlikely(!ctx_ptr)) {
-		dev_err(kctx->kbdev->dev,
-			"Failed to map tiler heap context %lu (0x%llX)\n",
+		dev_err(kctx->kbdev->dev, "Failed to map tiler heap context %lu (0x%llX)\n",
 			heap_nr, heap_gpu_va);
 		return 0;
 	}
@@ -69,8 +66,8 @@ static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc)

 	bitmap_set(ctx_alloc->in_use, heap_nr, 1);

-	dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %lu (0x%llX)\n",
-		heap_nr, heap_gpu_va);
+	dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %lu (0x%llX)\n", heap_nr,
+		heap_gpu_va);

 	return heap_gpu_va;
 }
@@ -88,14 +85,13 @@ static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc)
 * for heap context is freed.
 */
 static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ctx_alloc,
-			      u64 const heap_gpu_va)
+			       u64 const heap_gpu_va)
 {
 	struct kbase_context *const kctx = ctx_alloc->kctx;
 	u32 offset_in_bytes = (u32)(heap_gpu_va - ctx_alloc->gpu_va);
 	u32 offset_within_page = offset_in_bytes & ~PAGE_MASK;
 	u32 page_index = offset_in_bytes >> PAGE_SHIFT;
-	struct tagged_addr page =
-		kbase_get_gpu_phy_pages(ctx_alloc->region)[page_index];
+	struct tagged_addr page = kbase_get_gpu_phy_pages(ctx_alloc->region)[page_index];
 	phys_addr_t heap_context_pa = as_phys_addr_t(page) + offset_within_page;

 	lockdep_assert_held(&ctx_alloc->lock);
@@ -105,9 +101,8 @@ static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ct
 	 * disappear whilst this function is executing. Flush type is passed as FLUSH_PT
 	 * to CLN+INV L2 only.
 	 */
-	kbase_mmu_flush_pa_range(kctx->kbdev, kctx,
-				heap_context_pa, ctx_alloc->heap_context_size_aligned,
-				KBASE_MMU_OP_FLUSH_PT);
+	kbase_mmu_flush_pa_range(kctx->kbdev, kctx, heap_context_pa,
+				 ctx_alloc->heap_context_size_aligned, KBASE_MMU_OP_FLUSH_PT);
 }

 /**
@@ -117,7 +112,7 @@ static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ct
 * @heap_gpu_va: The GPU virtual address of a heap context structure to free.
 */
 static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc,
-	u64 const heap_gpu_va)
+		     u64 const heap_gpu_va)
 {
 	struct kbase_context *const kctx = ctx_alloc->kctx;
 	u32 ctx_offset = 0;
@@ -134,50 +129,41 @@ static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc,
 	ctx_offset = (u32)(heap_gpu_va - ctx_alloc->gpu_va);

 	if (WARN_ON(ctx_offset >= (ctx_alloc->region->nr_pages << PAGE_SHIFT)) ||
-		WARN_ON(ctx_offset % ctx_alloc->heap_context_size_aligned))
+	    WARN_ON(ctx_offset % ctx_alloc->heap_context_size_aligned))
 		return;

 	evict_heap_context(ctx_alloc, heap_gpu_va);

 	heap_nr = ctx_offset / ctx_alloc->heap_context_size_aligned;
-	dev_dbg(kctx->kbdev->dev,
-		"Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va);
+	dev_dbg(kctx->kbdev->dev, "Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va);

 	bitmap_clear(ctx_alloc->in_use, heap_nr, 1);
 }

-int kbase_csf_heap_context_allocator_init(
-	struct kbase_csf_heap_context_allocator *const ctx_alloc,
-	struct kbase_context *const kctx)
+int kbase_csf_heap_context_allocator_init(struct kbase_csf_heap_context_allocator *const ctx_alloc,
+					  struct kbase_context *const kctx)
 {
-	const u32 gpu_cache_line_size =
-		(1U << kctx->kbdev->gpu_props.props.l2_props.log2_line_size);
+	const u32 gpu_cache_line_size = (1U << kctx->kbdev->gpu_props.log2_line_size);

 	/* We cannot pre-allocate GPU memory here because the
 	 * custom VA zone may not have been created yet.
 	 */
 	ctx_alloc->kctx = kctx;
-	ctx_alloc->region = NULL;
-	ctx_alloc->gpu_va = 0;
-	ctx_alloc->heap_context_size_aligned =
-		(HEAP_CTX_SIZE + gpu_cache_line_size - 1) & ~(gpu_cache_line_size - 1);
+	ctx_alloc->heap_context_size_aligned = (HEAP_CTX_SIZE + gpu_cache_line_size - 1) &
+					       ~(gpu_cache_line_size - 1);

 	mutex_init(&ctx_alloc->lock);
-	bitmap_zero(ctx_alloc->in_use, MAX_TILER_HEAPS);

-	dev_dbg(kctx->kbdev->dev,
-		"Initialized a tiler heap context allocator\n");
+	dev_dbg(kctx->kbdev->dev, "Initialized a tiler heap context allocator\n");

 	return 0;
 }

-void kbase_csf_heap_context_allocator_term(
-	struct kbase_csf_heap_context_allocator *const ctx_alloc)
+void kbase_csf_heap_context_allocator_term(struct kbase_csf_heap_context_allocator *const ctx_alloc)
 {
 	struct kbase_context *const kctx = ctx_alloc->kctx;

-	dev_dbg(kctx->kbdev->dev,
-		"Terminating tiler heap context allocator\n");
+	dev_dbg(kctx->kbdev->dev, "Terminating tiler heap context allocator\n");

 	if (ctx_alloc->region) {
 		kbase_gpu_vm_lock(kctx);
@@ -191,8 +177,7 @@ void kbase_csf_heap_context_allocator_term(
 	mutex_destroy(&ctx_alloc->lock);
 }

-u64 kbase_csf_heap_context_allocator_alloc(
-	struct kbase_csf_heap_context_allocator *const ctx_alloc)
+u64 kbase_csf_heap_context_allocator_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc)
 {
 	struct kbase_context *const kctx = ctx_alloc->kctx;
 	u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR |
@@ -226,9 +211,8 @@ u64 kbase_csf_heap_context_allocator_alloc(
 	return heap_gpu_va;
 }

-void kbase_csf_heap_context_allocator_free(
-	struct kbase_csf_heap_context_allocator *const ctx_alloc,
-	u64 const heap_gpu_va)
+void kbase_csf_heap_context_allocator_free(struct kbase_csf_heap_context_allocator *const ctx_alloc,
+					   u64 const heap_gpu_va)
 {
 	mutex_lock(&ctx_alloc->lock);
 	sub_free(ctx_alloc, heap_gpu_va);
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -30,19 +30,19 @@
 * @ctx_alloc: Pointer to the heap context allocator to initialize.
 * @kctx:      Pointer to the kbase context.
 *
+ * This function must be called only when a kbase context is instantiated.
+ *
 * Return: 0 if successful or a negative error code on failure.
 */
-int kbase_csf_heap_context_allocator_init(
-	struct kbase_csf_heap_context_allocator *const ctx_alloc,
-	struct kbase_context *const kctx);
+int kbase_csf_heap_context_allocator_init(struct kbase_csf_heap_context_allocator *const ctx_alloc,
+					  struct kbase_context *const kctx);

 /**
 * kbase_csf_heap_context_allocator_term - Terminate an allocator for heap
 *                                         contexts
 * @ctx_alloc: Pointer to the heap context allocator to terminate.
 */
-void kbase_csf_heap_context_allocator_term(
-	struct kbase_csf_heap_context_allocator *const ctx_alloc);
+void kbase_csf_heap_context_allocator_term(struct kbase_csf_heap_context_allocator *const ctx_alloc);

 /**
 * kbase_csf_heap_context_allocator_alloc - Allocate a heap context structure
@@ -54,8 +54,7 @@ void kbase_csf_heap_context_allocator_term(
 *
 * Return: GPU virtual address of the allocated heap context or 0 on failure.
 */
-u64 kbase_csf_heap_context_allocator_alloc(
-	struct kbase_csf_heap_context_allocator *const ctx_alloc);
+u64 kbase_csf_heap_context_allocator_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc);

 /**
 * kbase_csf_heap_context_allocator_free - Free a heap context structure
@@ -68,8 +67,7 @@ u64 kbase_csf_heap_context_allocator_alloc(
 * contexts for possible reuse by a future call to
 * @kbase_csf_heap_context_allocator_alloc.
 */
-void kbase_csf_heap_context_allocator_free(
-	struct kbase_csf_heap_context_allocator *const ctx_alloc,
-	u64 const heap_gpu_va);
+void kbase_csf_heap_context_allocator_free(struct kbase_csf_heap_context_allocator *const ctx_alloc,
+					   u64 const heap_gpu_va);

 #endif /* _KBASE_CSF_HEAP_CONTEXT_ALLOC_H_ */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h
@@ -53,6 +53,7 @@ struct kbase_kcpu_command_import_info {
 * @fence_cb:      Fence callback
 * @fence:         Fence
 * @kcpu_queue:    kcpu command queue
+ * @fence_has_force_signaled:	fence has forced signaled after fence timeouted
 */
 struct kbase_kcpu_command_fence_info {
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
@@ -63,6 +64,7 @@ struct kbase_kcpu_command_fence_info {
 	struct dma_fence *fence;
 #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */
 	struct kbase_kcpu_command_queue *kcpu_queue;
+	bool fence_has_force_signaled;
 };

 /**
@@ -181,7 +183,7 @@ struct kbase_kcpu_command_jit_free_info {
 struct kbase_suspend_copy_buffer {
 	size_t size;
 	struct page **pages;
-	int nr_pages;
+	unsigned int nr_pages;
 	size_t offset;
 	struct kbase_mem_phy_alloc *cpu_alloc;
 };
@@ -252,6 +254,9 @@ struct kbase_kcpu_command {
 *				the function which handles processing of kcpu
 *				commands enqueued into a kcpu command queue;
 *				part of kernel API for processing workqueues
+ * @timeout_work:		struct work_struct which contains a pointer to the
+ *				function which handles post-timeout actions
+ *				queue when a fence signal timeout occurs.
 * @start_offset:		Index of the command to be executed next
 * @id:				KCPU command queue ID.
 * @num_pending_cmds:		The number of commands enqueued but not yet
@@ -283,6 +288,9 @@ struct kbase_kcpu_command {
 * @fence_timeout:		Timer used to detect the fence wait timeout.
 * @metadata:                   Metadata structure containing basic information about
 *                              this queue for any fence objects associated with this queue.
+ * @fence_signal_timeout:	Timer used for detect a fence signal command has
+ *				been blocked for too long.
+ * @fence_signal_pending_cnt:	Enqueued fence signal commands in the queue.
 */
 struct kbase_kcpu_command_queue {
 	struct mutex lock;
@@ -290,6 +298,7 @@ struct kbase_kcpu_command_queue {
 	struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE];
 	struct workqueue_struct *wq;
 	struct work_struct work;
+	struct work_struct timeout_work;
 	u8 start_offset;
 	u8 id;
 	u16 num_pending_cmds;
@@ -307,6 +316,8 @@ struct kbase_kcpu_command_queue {
 #if IS_ENABLED(CONFIG_SYNC_FILE)
 	struct kbase_kcpu_dma_fence_meta *metadata;
 #endif /* CONFIG_SYNC_FILE */
+	struct timer_list fence_signal_timeout;
+	atomic_t fence_signal_pending_cnt;
 };

 /**
@@ -319,8 +330,7 @@ struct kbase_kcpu_command_queue {
 *
 * Return: 0 if successful or a negative error code on failure.
 */
-int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
-			 struct kbase_ioctl_kcpu_queue_new *newq);
+int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu_queue_new *newq);

 /**
 * kbase_csf_kcpu_queue_delete - Delete KCPU command queue.
@@ -333,7 +343,7 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
 * Return: 0 if successful or a negative error code on failure.
 */
 int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx,
-			    struct kbase_ioctl_kcpu_queue_delete *del);
+				struct kbase_ioctl_kcpu_queue_delete *del);

 /**
 * kbase_csf_kcpu_queue_enqueue - Enqueue a KCPU command into a KCPU command
@@ -356,6 +366,8 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
 *
 * @kctx: Pointer to the kbase context being initialized.
 *
+ * This function must be called only when a kbase context is instantiated.
+ *
 * Return: 0 if successful or a negative error code on failure.
 */
 int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx);
@@ -381,4 +393,32 @@ int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue,
 				 struct base_fence *fence, struct sync_file **sync_file, int *fd);
 #endif /* CONFIG_SYNC_FILE */

+/*
+ * kbase_csf_kcpu_queue_halt_timers - Halt the KCPU fence timers associated with
+ *                                    the kbase device.
+ *
+ * @kbdev: Kbase device
+ *
+ * Note that this function assumes that the caller has ensured that the
+ * kbase_device::kctx_list does not get updated during this function's runtime.
+ * At the moment, the function is only safe to call during system suspend, when
+ * the device PM active count has reached zero.
+ *
+ * Return: 0 on success, negative value otherwise.
+ */
+int kbase_csf_kcpu_queue_halt_timers(struct kbase_device *kbdev);
+
+/*
+ * kbase_csf_kcpu_queue_resume_timers - Resume the KCPU fence timers associated
+ *                                      with the kbase device.
+ *
+ * @kbdev: Kbase device
+ *
+ * Note that this function assumes that the caller has ensured that the
+ * kbase_device::kctx_list does not get updated during this function's runtime.
+ * At the moment, the function is only safe to call during system resume.
+ */
+void kbase_csf_kcpu_queue_resume_timers(struct kbase_device *kbdev);
+
+bool kbase_kcpu_command_fence_has_force_signaled(struct kbase_kcpu_command_fence_info *fence_info);
 #endif /* _KBASE_CSF_KCPU_H_ */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -38,8 +38,8 @@
 * @waits: Pointer to the KCPU CQS wait command info
 */
 static void kbasep_csf_kcpu_debugfs_print_cqs_waits(struct seq_file *file,
-		struct kbase_context *kctx,
-		struct kbase_kcpu_command_cqs_wait_info *waits)
+						    struct kbase_context *kctx,
+						    struct kbase_kcpu_command_cqs_wait_info *waits)
 {
 	unsigned int i;

@@ -47,8 +47,8 @@ static void kbasep_csf_kcpu_debugfs_print_cqs_waits(struct seq_file *file,
 		struct kbase_vmap_struct *mapping;
 		u32 val;
 		char const *msg;
-		u32 *const cpu_ptr = (u32 *)kbase_phy_alloc_mapping_get(kctx,
-					waits->objs[i].addr, &mapping);
+		u32 *const cpu_ptr =
+			(u32 *)kbase_phy_alloc_mapping_get(kctx, waits->objs[i].addr, &mapping);

 		if (!cpu_ptr)
 			return;
@@ -57,10 +57,9 @@ static void kbasep_csf_kcpu_debugfs_print_cqs_waits(struct seq_file *file,

 		kbase_phy_alloc_mapping_put(kctx, mapping);

-		msg = (waits->inherit_err_flags && (1U << i)) ? "true" :
-								"false";
-		seq_printf(file, "   %llx(%u > %u, inherit_err: %s), ",
-			   waits->objs[i].addr, val, waits->objs[i].val, msg);
+		msg = (waits->inherit_err_flags && (1U << i)) ? "true" : "false";
+		seq_printf(file, "   %llx(%u > %u, inherit_err: %s), ", waits->objs[i].addr, val,
+			   waits->objs[i].val, msg);
 	}
 }

@@ -71,40 +70,34 @@ static void kbasep_csf_kcpu_debugfs_print_cqs_waits(struct seq_file *file,
 * @kctx:  The context of the KCPU queue
 * @queue: Pointer to the KCPU queue
 */
-static void kbasep_csf_kcpu_debugfs_print_queue(struct seq_file *file,
-		struct kbase_context *kctx,
-		struct kbase_kcpu_command_queue *queue)
+static void kbasep_csf_kcpu_debugfs_print_queue(struct seq_file *file, struct kbase_context *kctx,
+						struct kbase_kcpu_command_queue *queue)
 {
 	if (WARN_ON(!queue))
 		return;

 	lockdep_assert_held(&kctx->csf.kcpu_queues.lock);

-	seq_printf(file, "%16u, %11u, %7u, %13llu  %8u",
-			queue->num_pending_cmds, queue->enqueue_failed,
-			queue->command_started ? 1 : 0,
-			queue->fence_context, queue->fence_seqno);
+	seq_printf(file, "%16u, %11u, %7u, %13llu  %8u", queue->num_pending_cmds,
+		   queue->enqueue_failed, queue->command_started ? 1 : 0, queue->fence_context,
+		   queue->fence_seqno);

 	if (queue->command_started) {
-		struct kbase_kcpu_command *cmd =
-				&queue->commands[queue->start_offset];
+		struct kbase_kcpu_command *cmd = &queue->commands[queue->start_offset];
 		switch (cmd->type) {
 #if IS_ENABLED(CONFIG_SYNC_FILE)
-		case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
-		{
+		case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: {
 			struct kbase_sync_fence_info info;

 			kbase_sync_fence_info_get(cmd->info.fence.fence, &info);
-			seq_printf(file, ",  Fence      %pK %s %s",
-				   info.fence, info.name,
+			seq_printf(file, ",  Fence      %pK %s %s", info.fence, info.name,
 				   kbase_sync_status_string(info.status));
 			break;
 		}
 #endif
 		case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
 			seq_puts(file, ",  CQS     ");
-			kbasep_csf_kcpu_debugfs_print_cqs_waits(file, kctx,
-					&cmd->info.cqs_wait);
+			kbasep_csf_kcpu_debugfs_print_cqs_waits(file, kctx, &cmd->info.cqs_wait);
 			break;
 		default:
 			seq_puts(file, ", U, Unknown blocking command");
@@ -128,24 +121,23 @@ static int kbasep_csf_kcpu_debugfs_show(struct seq_file *file, void *data)
 	struct kbase_context *kctx = file->private;
 	unsigned long idx;

+	CSTD_UNUSED(data);
+
 	seq_printf(file, "MALI_CSF_KCPU_DEBUGFS_VERSION: v%u\n", MALI_CSF_KCPU_DEBUGFS_VERSION);
-	seq_puts(file, "Queue Idx(err-mode), Pending Commands, Enqueue err, Blocked, Fence context  &  seqno, (Wait Type, Additional info)\n");
+	seq_puts(
+		file,
+		"Queue Idx(err-mode), Pending Commands, Enqueue err, Blocked, Fence context  &  seqno, (Wait Type, Additional info)\n");
 	mutex_lock(&kctx->csf.kcpu_queues.lock);

-	idx = find_first_bit(kctx->csf.kcpu_queues.in_use,
-			KBASEP_MAX_KCPU_QUEUES);
+	idx = find_first_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES);

 	while (idx < KBASEP_MAX_KCPU_QUEUES) {
-		struct kbase_kcpu_command_queue *queue =
-					kctx->csf.kcpu_queues.array[idx];
+		struct kbase_kcpu_command_queue *queue = kctx->csf.kcpu_queues.array[idx];

-		seq_printf(file, "%9lu( %s ), ", idx,
-				 queue->has_error ? "InErr" : "NoErr");
-		kbasep_csf_kcpu_debugfs_print_queue(file, kctx,
-				kctx->csf.kcpu_queues.array[idx]);
+		seq_printf(file, "%9lu( %s ), ", idx, queue->has_error ? "InErr" : "NoErr");
+		kbasep_csf_kcpu_debugfs_print_queue(file, kctx, kctx->csf.kcpu_queues.array[idx]);

-		idx = find_next_bit(kctx->csf.kcpu_queues.in_use,
-				KBASEP_MAX_KCPU_QUEUES, idx + 1);
+		idx = find_next_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES, idx + 1);
 	}

 	mutex_unlock(&kctx->csf.kcpu_queues.lock);
@@ -172,16 +164,14 @@ void kbase_csf_kcpu_debugfs_init(struct kbase_context *kctx)
 	if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry)))
 		return;

-	file = debugfs_create_file("kcpu_queues", mode, kctx->kctx_dentry,
-			kctx, &kbasep_csf_kcpu_debugfs_fops);
+	file = debugfs_create_file("kcpu_queues", mode, kctx->kctx_dentry, kctx,
+				   &kbasep_csf_kcpu_debugfs_fops);

 	if (IS_ERR_OR_NULL(file)) {
-		dev_warn(kctx->kbdev->dev,
-				"Unable to create KCPU debugfs entry");
+		dev_warn(kctx->kbdev->dev, "Unable to create KCPU debugfs entry");
 	}
 }

-
 #else
 /*
 * Stub functions for when debugfs is disabled
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_fence_debugfs.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_fence_debugfs.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+#include <linux/debugfs.h>
+#endif
+
+#include <mali_kbase.h>
+#include <csf/mali_kbase_csf_kcpu_fence_debugfs.h>
+#include <mali_kbase_hwaccess_time.h>
+
+#define BUF_SIZE 10
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+static ssize_t kbase_csf_kcpu_queue_fence_signal_enabled_get(struct file *file, char __user *buf,
+							     size_t count, loff_t *ppos)
+{
+	int ret;
+	struct kbase_device *kbdev = file->private_data;
+
+	if (atomic_read(&kbdev->fence_signal_timeout_enabled))
+		ret = simple_read_from_buffer(buf, count, ppos, "1\n", 2);
+	else
+		ret = simple_read_from_buffer(buf, count, ppos, "0\n", 2);
+
+	return ret;
+};
+
+static ssize_t kbase_csf_kcpu_queue_fence_signal_enabled_set(struct file *file,
+							     const char __user *buf, size_t count,
+							     loff_t *ppos)
+{
+	int ret;
+	unsigned int enabled;
+	struct kbase_device *kbdev = file->private_data;
+
+	CSTD_UNUSED(ppos);
+
+	ret = kstrtouint_from_user(buf, count, 10, &enabled);
+	if (ret < 0)
+		return ret;
+
+	atomic_set(&kbdev->fence_signal_timeout_enabled, enabled);
+
+	return count;
+}
+
+static const struct file_operations kbase_csf_kcpu_queue_fence_signal_fops = {
+	.owner = THIS_MODULE,
+	.read = kbase_csf_kcpu_queue_fence_signal_enabled_get,
+	.write = kbase_csf_kcpu_queue_fence_signal_enabled_set,
+	.open = simple_open,
+	.llseek = default_llseek,
+};
+
+static ssize_t kbase_csf_kcpu_queue_fence_signal_timeout_get(struct file *file, char __user *buf,
+							     size_t count, loff_t *ppos)
+{
+	int size;
+	char buffer[BUF_SIZE];
+	struct kbase_device *kbdev = file->private_data;
+	unsigned int timeout_ms = kbase_get_timeout_ms(kbdev, KCPU_FENCE_SIGNAL_TIMEOUT);
+
+	size = scnprintf(buffer, sizeof(buffer), "%u\n", timeout_ms);
+	return simple_read_from_buffer(buf, count, ppos, buffer, size);
+}
+
+static ssize_t kbase_csf_kcpu_queue_fence_signal_timeout_set(struct file *file,
+							     const char __user *buf, size_t count,
+							     loff_t *ppos)
+{
+	int ret;
+	unsigned int timeout_ms;
+	struct kbase_device *kbdev = file->private_data;
+
+	CSTD_UNUSED(ppos);
+
+	ret = kstrtouint_from_user(buf, count, 10, &timeout_ms);
+	if (ret < 0)
+		return ret;
+
+	/* The timeout passed by the user is bounded when trying to insert it into
+	 * the precomputed timeout table, so we don't need to do any more validation
+	 * before-hand.
+	 */
+	kbase_device_set_timeout_ms(kbdev, KCPU_FENCE_SIGNAL_TIMEOUT, timeout_ms);
+
+	return count;
+}
+
+static const struct file_operations kbase_csf_kcpu_queue_fence_signal_timeout_fops = {
+	.owner = THIS_MODULE,
+	.read = kbase_csf_kcpu_queue_fence_signal_timeout_get,
+	.write = kbase_csf_kcpu_queue_fence_signal_timeout_set,
+	.open = simple_open,
+	.llseek = default_llseek,
+};
+
+int kbase_csf_fence_timer_debugfs_init(struct kbase_device *kbdev)
+{
+	struct dentry *file;
+	const mode_t mode = 0644;
+
+	if (WARN_ON(IS_ERR_OR_NULL(kbdev->mali_debugfs_directory)))
+		return -1;
+
+	file = debugfs_create_file("fence_signal_timeout_enable", mode,
+				   kbdev->mali_debugfs_directory, kbdev,
+				   &kbase_csf_kcpu_queue_fence_signal_fops);
+
+	if (IS_ERR_OR_NULL(file)) {
+		dev_warn(kbdev->dev, "Unable to create fence signal timer toggle entry");
+		return -1;
+	}
+
+	file = debugfs_create_file("fence_signal_timeout_ms", mode, kbdev->mali_debugfs_directory,
+				   kbdev, &kbase_csf_kcpu_queue_fence_signal_timeout_fops);
+
+	if (IS_ERR_OR_NULL(file)) {
+		dev_warn(kbdev->dev, "Unable to create fence signal timeout entry");
+		return -1;
+	}
+	return 0;
+}
+
+#else
+int kbase_csf_fence_timer_debugfs_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+	return 0;
+}
+
+#endif
+void kbase_csf_fence_timer_debugfs_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_fence_debugfs.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_fence_debugfs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -18,14 +18,25 @@
 * http://www.gnu.org/licenses/gpl-2.0.html.
 *
 */
+#ifndef _KBASE_CSF_KCPU_FENCE_SIGNAL_DEBUGFS_H_
+#define _KBASE_CSF_KCPU_FENCE_SIGNAL_DEBUGFS_H_

-#ifndef _KBASE_BITS_H_
-#define _KBASE_BITS_H_
+struct kbase_device;

-#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE)
-#include <linux/bits.h>
-#else
-#include <linux/bitops.h>
-#endif
+/*
+ * kbase_csf_fence_timer_debugfs_init - Initialize fence signal timeout debugfs
+ *                                      entries.
+ * @kbdev: Kbase device.
+ *
+ * Return: 0 on success, -1 on failure.
+ */
+int kbase_csf_fence_timer_debugfs_init(struct kbase_device *kbdev);

-#endif /* _KBASE_BITS_H_ */
+/*
+ * kbase_csf_fence_timer_debugfs_term - Terminate fence signal timeout debugfs
+ *                                      entries.
+ * @kbdev: Kbase device.
+ */
+void kbase_csf_fence_timer_debugfs_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_CSF_KCPU_FENCE_SIGNAL_DEBUGFS_H_ */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c
@@ -21,6 +21,7 @@

 #include <linux/protected_memory_allocator.h>
 #include <mali_kbase.h>
+#include <mali_kbase_reg_track.h>
 #include "mali_kbase_csf.h"
 #include "mali_kbase_csf_mcu_shared_reg.h"
 #include <mali_kbase_mem_migrate.h>
@@ -40,12 +41,13 @@
 #define CSG_REG_USERIO_VPFN(reg, csi, nr_susp_pages) (reg->start_pfn + 2 * (nr_susp_pages + csi))

 /* MCU shared segment dummy page mapping flags */
-#define DUMMY_PAGE_MAP_FLAGS (KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT) | KBASE_REG_GPU_NX)
+#define DUMMY_PAGE_MAP_FLAGS \
+	(KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_DEFAULT) | KBASE_REG_GPU_NX)

 /* MCU shared segment suspend buffer mapping flags */
-#define SUSP_PAGE_MAP_FLAGS                                                                        \
-	(KBASE_REG_GPU_RD | KBASE_REG_GPU_WR | KBASE_REG_GPU_NX |                                  \
-	 KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT))
+#define SUSP_PAGE_MAP_FLAGS                                       \
+	(KBASE_REG_GPU_RD | KBASE_REG_GPU_WR | KBASE_REG_GPU_NX | \
+	 KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_DEFAULT))

 /**
 * struct kbase_csg_shared_region - Wrapper object for use with a CSG on runtime
@@ -72,18 +74,18 @@ static unsigned long get_userio_mmu_flags(struct kbase_device *kbdev)
 	unsigned long userio_map_flags;

 	if (kbdev->system_coherency == COHERENCY_NONE)
-		userio_map_flags =
-			KBASE_REG_GPU_RD | KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
+		userio_map_flags = KBASE_REG_GPU_RD |
+				   KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_NON_CACHEABLE);
 	else
 		userio_map_flags = KBASE_REG_GPU_RD | KBASE_REG_SHARE_BOTH |
-				   KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED);
+				   KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_SHARED);

 	return (userio_map_flags | KBASE_REG_GPU_NX);
 }

 static void set_page_meta_status_not_movable(struct tagged_addr phy)
 {
-	if (kbase_page_migration_enabled) {
+	if (kbase_is_page_migration_enabled()) {
 		struct kbase_page_metadata *page_md = kbase_page_private(as_page(phy));

 		if (page_md) {
@@ -117,7 +119,7 @@ static inline int insert_dummy_pages(struct kbase_device *kbdev, u64 vpfn, u32 n

 	return kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
 				      nr_pages, mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
-				      mmu_sync_info, NULL, false);
+				      mmu_sync_info, NULL);
 }

 /* Reset consecutive retry count to zero */
@@ -607,14 +609,14 @@ static int shared_mcu_csg_reg_init(struct kbase_device *kbdev,
 	struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
 	const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
 	u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num;
+	u32 i;
 	const size_t nr_csg_reg_pages = 2 * (nr_susp_pages + nr_csis);
 	struct kbase_va_region *reg;
 	u64 vpfn;
-	int err, i;
+	int err;

 	INIT_LIST_HEAD(&csg_reg->link);
-	reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages,
-				      KBASE_REG_ZONE_MCU_SHARED);
+	reg = kbase_alloc_free_region(&kbdev->csf.mcu_shared_zone, 0, nr_csg_reg_pages);

 	if (!reg) {
 		dev_err(kbdev->dev, "%s: Failed to allocate a MCU shared region for %zu pages\n",
@@ -667,18 +669,19 @@ static int shared_mcu_csg_reg_init(struct kbase_device *kbdev,
 fail_userio_pages_map_fail:
 	while (i-- > 0) {
 		vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
-		kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-					 KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES,
-					 MCU_AS_NR, true);
+		kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn,
+						  shared_regs->dummy_phys,
+						  KBASEP_NUM_CS_USER_IO_PAGES,
+						  KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR);
 	}

 	vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
-	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-				 nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
+	kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+					  nr_susp_pages, nr_susp_pages, MCU_AS_NR);
 fail_pmod_map_fail:
 	vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
-	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-				 nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
+	kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+					  nr_susp_pages, nr_susp_pages, MCU_AS_NR);
 fail_susp_map_fail:
 	mutex_lock(&kbdev->csf.reg_lock);
 	kbase_remove_va_region(kbdev, reg);
@@ -697,21 +700,22 @@ static void shared_mcu_csg_reg_term(struct kbase_device *kbdev,
 	const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
 	const u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num;
 	u64 vpfn;
-	int i;
+	u32 i;

 	for (i = 0; i < nr_csis; i++) {
 		vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
-		kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-					 KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES,
-					 MCU_AS_NR, true);
+		kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn,
+						  shared_regs->dummy_phys,
+						  KBASEP_NUM_CS_USER_IO_PAGES,
+						  KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR);
 	}

 	vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
-	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-				 nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
+	kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+					  nr_susp_pages, nr_susp_pages, MCU_AS_NR);
 	vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
-	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-				 nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
+	kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+					  nr_susp_pages, nr_susp_pages, MCU_AS_NR);

 	mutex_lock(&kbdev->csf.reg_lock);
 	kbase_remove_va_region(kbdev, reg);
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -31,27 +31,28 @@ int kbase_csf_protected_memory_init(struct kbase_device *const kbdev)
 	int err = 0;

 #if IS_ENABLED(CONFIG_OF)
-	struct device_node *pma_node = of_parse_phandle(kbdev->dev->of_node,
-					"protected-memory-allocator", 0);
+	struct device_node *pma_node =
+		of_parse_phandle(kbdev->dev->of_node, "protected-memory-allocator", 0);
 	if (!pma_node) {
 		dev_info(kbdev->dev, "Protected memory allocator not available\n");
 	} else {
-		struct platform_device *const pdev =
-				of_find_device_by_node(pma_node);
+		struct platform_device *const pdev = of_find_device_by_node(pma_node);

-		kbdev->csf.pma_dev = NULL;
 		if (!pdev) {
-			dev_err(kbdev->dev, "Platform device for Protected memory allocator not found\n");
+			dev_err(kbdev->dev,
+				"Platform device for Protected memory allocator not found\n");
 		} else {
 			kbdev->csf.pma_dev = platform_get_drvdata(pdev);
 			if (!kbdev->csf.pma_dev) {
 				dev_info(kbdev->dev, "Protected memory allocator is not ready\n");
 				err = -EPROBE_DEFER;
 			} else if (!try_module_get(kbdev->csf.pma_dev->owner)) {
-				dev_err(kbdev->dev, "Failed to get Protected memory allocator module\n");
+				dev_err(kbdev->dev,
+					"Failed to get Protected memory allocator module\n");
 				err = -ENODEV;
 			} else {
-				dev_info(kbdev->dev, "Protected memory allocator successfully loaded\n");
+				dev_info(kbdev->dev,
+					 "Protected memory allocator successfully loaded\n");
 			}
 		}
 		of_node_put(pma_node);
@@ -68,15 +69,11 @@ void kbase_csf_protected_memory_term(struct kbase_device *const kbdev)
 }

 struct protected_memory_allocation **
-		kbase_csf_protected_memory_alloc(
-		struct kbase_device *const kbdev,
-		struct tagged_addr *phys,
-		size_t num_pages,
-		bool is_small_page)
+kbase_csf_protected_memory_alloc(struct kbase_device *const kbdev, struct tagged_addr *phys,
+				 size_t num_pages, bool is_small_page)
 {
 	size_t i;
-	struct protected_memory_allocator_device *pma_dev =
-		kbdev->csf.pma_dev;
+	struct protected_memory_allocator_device *pma_dev = kbdev->csf.pma_dev;
 	struct protected_memory_allocation **pma = NULL;
 	unsigned int order = KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER;
 	unsigned int num_pages_order;
@@ -114,9 +111,7 @@ struct protected_memory_allocation **
 			*phys++ = as_tagged_tag(phys_addr, HUGE_HEAD | HUGE_PAGE);

 			for (j = 1; j < num_pages_order; j++) {
-				*phys++ = as_tagged_tag(phys_addr +
-							PAGE_SIZE * j,
-							HUGE_PAGE);
+				*phys++ = as_tagged_tag(phys_addr + PAGE_SIZE * j, HUGE_PAGE);
 			}
 		} else {
 			phys[i] = as_tagged(phys_addr);
@@ -131,15 +126,12 @@ struct protected_memory_allocation **
 	return pma;
 }

-void kbase_csf_protected_memory_free(
-		struct kbase_device *const kbdev,
-		struct protected_memory_allocation **pma,
-		size_t num_pages,
-		bool is_small_page)
+void kbase_csf_protected_memory_free(struct kbase_device *const kbdev,
+				     struct protected_memory_allocation **pma, size_t num_pages,
+				     bool is_small_page)
 {
 	size_t i;
-	struct protected_memory_allocator_device *pma_dev =
-		kbdev->csf.pma_dev;
+	struct protected_memory_allocator_device *pma_dev = kbdev->csf.pma_dev;
 	unsigned int num_pages_order = (1u << KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER);

 	if (is_small_page)
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -25,9 +25,10 @@
 #include "mali_kbase.h"
 /**
 * kbase_csf_protected_memory_init - Initilaise protected memory allocator.
- *
 * @kbdev:	Device pointer.
 *
+ * This function must be called only when a kbase device is initialized.
+ *
 * Return: 0 if success, or an error code on failure.
 */
 int kbase_csf_protected_memory_init(struct kbase_device *const kbdev);
@@ -52,11 +53,8 @@ void kbase_csf_protected_memory_term(struct kbase_device *const kbdev);
 *		or NULL on failure.
 */
 struct protected_memory_allocation **
-	kbase_csf_protected_memory_alloc(
-		struct kbase_device *const kbdev,
-		struct tagged_addr *phys,
-		size_t num_pages,
-		bool is_small_page);
+kbase_csf_protected_memory_alloc(struct kbase_device *const kbdev, struct tagged_addr *phys,
+				 size_t num_pages, bool is_small_page);

 /**
 * kbase_csf_protected_memory_free - Free the allocated
@@ -67,9 +65,7 @@ struct protected_memory_allocation **
 * @num_pages:	Number of pages to be freed.
 * @is_small_page: Flag used to select the order of protected memory page.
 */
-void kbase_csf_protected_memory_free(
-		struct kbase_device *const kbdev,
-		struct protected_memory_allocation **pma,
-		size_t num_pages,
-		bool is_small_page);
+void kbase_csf_protected_memory_free(struct kbase_device *const kbdev,
+				     struct protected_memory_allocation **pma, size_t num_pages,
+				     bool is_small_page);
 #endif
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -38,21 +38,18 @@ enum kbasep_soft_reset_status {
 	MCU_REINIT_FAILED
 };

-static inline bool
-kbase_csf_reset_state_is_silent(enum kbase_csf_reset_gpu_state state)
+static inline bool kbase_csf_reset_state_is_silent(enum kbase_csf_reset_gpu_state state)
 {
 	return (state == KBASE_CSF_RESET_GPU_COMMITTED_SILENT);
 }

-static inline bool
-kbase_csf_reset_state_is_committed(enum kbase_csf_reset_gpu_state state)
+static inline bool kbase_csf_reset_state_is_committed(enum kbase_csf_reset_gpu_state state)
 {
 	return (state == KBASE_CSF_RESET_GPU_COMMITTED ||
 		state == KBASE_CSF_RESET_GPU_COMMITTED_SILENT);
 }

-static inline bool
-kbase_csf_reset_state_is_active(enum kbase_csf_reset_gpu_state state)
+static inline bool kbase_csf_reset_state_is_active(enum kbase_csf_reset_gpu_state state)
 {
 	return (state == KBASE_CSF_RESET_GPU_HAPPENING);
 }
@@ -100,8 +97,7 @@ int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev)
 {
 	down_read(&kbdev->csf.reset.sem);

-	if (atomic_read(&kbdev->csf.reset.state) ==
-	    KBASE_CSF_RESET_GPU_FAILED) {
+	if (atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_FAILED) {
 		up_read(&kbdev->csf.reset.sem);
 		return -ENOMEM;
 	}
@@ -120,8 +116,7 @@ int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev)
 	if (!down_read_trylock(&kbdev->csf.reset.sem))
 		return -EAGAIN;

-	if (atomic_read(&kbdev->csf.reset.state) ==
-	    KBASE_CSF_RESET_GPU_FAILED) {
+	if (atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_FAILED) {
 		up_read(&kbdev->csf.reset.sem);
 		return -ENOMEM;
 	}
@@ -166,9 +161,8 @@ void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev)
 /* Mark the reset as now happening, and synchronize with other threads that
 * might be trying to access the GPU
 */
-static void kbase_csf_reset_begin_hw_access_sync(
-	struct kbase_device *kbdev,
-	enum kbase_csf_reset_gpu_state initial_reset_state)
+static void kbase_csf_reset_begin_hw_access_sync(struct kbase_device *kbdev,
+						 enum kbase_csf_reset_gpu_state initial_reset_state)
 {
 	unsigned long hwaccess_lock_flags;
 	unsigned long scheduler_spin_lock_flags;
@@ -185,7 +179,7 @@ static void kbase_csf_reset_begin_hw_access_sync(
 	 */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_lock_flags);
 	kbase_csf_scheduler_spin_lock(kbdev, &scheduler_spin_lock_flags);
-	atomic_set(&kbdev->csf.reset.state, KBASE_RESET_GPU_HAPPENING);
+	atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_HAPPENING);
 	kbase_csf_scheduler_spin_unlock(kbdev, scheduler_spin_lock_flags);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_lock_flags);
 }
@@ -193,15 +187,13 @@ static void kbase_csf_reset_begin_hw_access_sync(
 /* Mark the reset as finished and allow others threads to once more access the
 * GPU
 */
-static void kbase_csf_reset_end_hw_access(struct kbase_device *kbdev,
-					  int err_during_reset,
+static void kbase_csf_reset_end_hw_access(struct kbase_device *kbdev, int err_during_reset,
 					  bool firmware_inited)
 {
 	unsigned long hwaccess_lock_flags;
 	unsigned long scheduler_spin_lock_flags;

-	WARN_ON(!kbase_csf_reset_state_is_active(
-		atomic_read(&kbdev->csf.reset.state)));
+	WARN_ON(!kbase_csf_reset_state_is_active(atomic_read(&kbdev->csf.reset.state)));

 	/* Once again, we synchronize with atomic context threads accessing the
 	 * HW, as otherwise any actions they defer could get lost
@@ -210,8 +202,7 @@ static void kbase_csf_reset_end_hw_access(struct kbase_device *kbdev,
 	kbase_csf_scheduler_spin_lock(kbdev, &scheduler_spin_lock_flags);

 	if (!err_during_reset) {
-		atomic_set(&kbdev->csf.reset.state,
-			   KBASE_CSF_RESET_GPU_NOT_PENDING);
+		atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_NOT_PENDING);
 	} else {
 		dev_err(kbdev->dev, "Reset failed to complete");
 		atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_FAILED);
@@ -236,25 +227,26 @@ static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev)
 	kbase_io_history_dump(kbdev);

 	dev_err(kbdev->dev, "Register state:");
-	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x   GPU_STATUS=0x%08x  MCU_STATUS=0x%08x",
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)),
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)),
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)));
-	dev_err(kbdev->dev, "  JOB_IRQ_RAWSTAT=0x%08x   MMU_IRQ_RAWSTAT=0x%08x   GPU_FAULTSTATUS=0x%08x",
-		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)),
-		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)),
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)));
-	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x   JOB_IRQ_MASK=0x%08x   MMU_IRQ_MASK=0x%08x",
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)),
-		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)),
-		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)));
-	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)),
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1)));
-	dev_err(kbdev->dev, "  SHADER_CONFIG=0x%08x   L2_MMU_CONFIG=0x%08x   TILER_CONFIG=0x%08x",
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)),
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)),
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)));
+	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x  GPU_STATUS=0x%08x MCU_STATUS=0x%08x",
+		kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_RAWSTAT)),
+		kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS)),
+		kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(MCU_STATUS)));
+	dev_err(kbdev->dev,
+		"  JOB_IRQ_RAWSTAT=0x%08x  MMU_IRQ_RAWSTAT=0x%08x  GPU_FAULTSTATUS=0x%08x",
+		kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_RAWSTAT)),
+		kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_RAWSTAT)),
+		kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_FAULTSTATUS)));
+	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x  JOB_IRQ_MASK=0x%08x  MMU_IRQ_MASK=0x%08x",
+		kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK)),
+		kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_MASK)),
+		kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK)));
+	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x  PWR_OVERRIDE1=0x%08x",
+		kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(PWR_OVERRIDE0)),
+		kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(PWR_OVERRIDE1)));
+	dev_err(kbdev->dev, "  SHADER_CONFIG=0x%08x  L2_MMU_CONFIG=0x%08x  TILER_CONFIG=0x%08x",
+		kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(SHADER_CONFIG)),
+		kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(L2_MMU_CONFIG)),
+		kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(TILER_CONFIG)));
 }

 /**
@@ -293,8 +285,7 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic
 	spin_lock(&kbdev->mmu_mask_change);
 	kbase_pm_reset_start_locked(kbdev);

-	dev_dbg(kbdev->dev,
-		"We're about to flush out the IRQs and their bottom halves\n");
+	dev_dbg(kbdev->dev, "We're about to flush out the IRQs and their bottom halves\n");
 	kbdev->irq_reset_flush = true;

 	/* Disable IRQ to avoid IRQ handlers to kick in after releasing the
@@ -312,13 +303,11 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic
 	dev_dbg(kbdev->dev, "Flush out any in-flight work items\n");
 	kbase_flush_mmu_wqs(kbdev);

-	dev_dbg(kbdev->dev,
-		"The flush has completed so reset the active indicator\n");
+	dev_dbg(kbdev->dev, "The flush has completed so reset the active indicator\n");
 	kbdev->irq_reset_flush = false;

 	if (!silent)
-		dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
-								RESET_TIMEOUT);
+		dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", RESET_TIMEOUT);

 	/* Output the state of some interesting registers to help in the
 	 * debugging of GPU resets, and dump the firmware trace buffer
@@ -329,9 +318,11 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic
 			kbase_csf_firmware_log_dump_buffer(kbdev);
 	}

-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	kbase_ipa_control_handle_gpu_reset_pre(kbdev);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	{
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_ipa_control_handle_gpu_reset_pre(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}

 	/* Tell hardware counters a reset is about to occur.
 	 * If the backend is in an unrecoverable error state (e.g. due to
@@ -352,7 +343,9 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic
 	mutex_lock(&kbdev->mmu_hw_mutex);
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbase_ctx_sched_restore_all_as(kbdev);
-	kbase_ipa_control_handle_gpu_reset_post(kbdev);
+	{
+		kbase_ipa_control_handle_gpu_reset_post(kbdev);
+	}
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	mutex_unlock(&kbdev->mmu_hw_mutex);

@@ -365,10 +358,12 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic
 	mutex_unlock(&kbdev->pm.lock);

 	if (err) {
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 		if (!kbase_pm_l2_is_in_desired_state(kbdev))
 			ret = L2_ON_FAILED;
 		else if (!kbase_pm_mcu_is_in_desired_state(kbdev))
 			ret = MCU_REINIT_FAILED;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	}

 	return ret;
@@ -440,16 +435,14 @@ err:

 static void kbase_csf_reset_gpu_worker(struct work_struct *data)
 {
-	struct kbase_device *kbdev = container_of(data, struct kbase_device,
-						  csf.reset.work);
+	struct kbase_device *kbdev = container_of(data, struct kbase_device, csf.reset.work);
 	bool gpu_sleep_mode_active = false;
 	bool firmware_inited;
 	unsigned long flags;
 	int err = 0;
 	const enum kbase_csf_reset_gpu_state initial_reset_state =
 		atomic_read(&kbdev->csf.reset.state);
-	const bool silent =
-		kbase_csf_reset_state_is_silent(initial_reset_state);
+	const bool silent = kbase_csf_reset_state_is_silent(initial_reset_state);

 	/* Ensure any threads (e.g. executing the CSF scheduler) have finished
 	 * using the HW
@@ -479,8 +472,8 @@ static void kbase_csf_reset_gpu_worker(struct work_struct *data)

 		err = kbase_csf_reset_gpu_now(kbdev, firmware_inited, silent);
 #endif
-	} else if (!kbase_pm_context_active_handle_suspend(kbdev,
-			KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+	} else if (!kbase_pm_context_active_handle_suspend(
+			   kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
 		err = kbase_csf_reset_gpu_now(kbdev, firmware_inited, silent);
 		kbase_pm_context_idle(kbdev);
 	}
@@ -494,22 +487,22 @@ static void kbase_csf_reset_gpu_worker(struct work_struct *data)
 bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags)
 {
 	if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)
-		kbase_hwcnt_backend_csf_on_unrecoverable_error(
-			&kbdev->hwcnt_gpu_iface);
+		kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface);

-	if (atomic_cmpxchg(&kbdev->csf.reset.state,
-			KBASE_CSF_RESET_GPU_NOT_PENDING,
-			KBASE_CSF_RESET_GPU_PREPARED) !=
-			KBASE_CSF_RESET_GPU_NOT_PENDING)
+	if (atomic_cmpxchg(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_NOT_PENDING,
+			   KBASE_CSF_RESET_GPU_PREPARED) != KBASE_CSF_RESET_GPU_NOT_PENDING)
 		/* Some other thread is already resetting the GPU */
 		return false;

+	/* Issue the wake up of threads waiting for PM state transition.
+	 * They might want to exit the wait since GPU reset has been triggered.
+	 */
+	wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
 	return true;
 }
 KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);

-bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev,
-				       unsigned int flags)
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, unsigned int flags)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);

@@ -521,8 +514,7 @@ void kbase_reset_gpu(struct kbase_device *kbdev)
 	/* Note this is a WARN/atomic_set because it is a software issue for
 	 * a race to be occurring here
 	 */
-	if (WARN_ON(atomic_read(&kbdev->csf.reset.state) !=
-		    KBASE_RESET_GPU_PREPARED))
+	if (WARN_ON(atomic_read(&kbdev->csf.reset.state) != KBASE_RESET_GPU_PREPARED))
 		return;

 	atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_COMMITTED);
@@ -543,10 +535,9 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev)

 int kbase_reset_gpu_silent(struct kbase_device *kbdev)
 {
-	if (atomic_cmpxchg(&kbdev->csf.reset.state,
-				KBASE_CSF_RESET_GPU_NOT_PENDING,
-				KBASE_CSF_RESET_GPU_COMMITTED_SILENT) !=
-				KBASE_CSF_RESET_GPU_NOT_PENDING) {
+	if (atomic_cmpxchg(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_NOT_PENDING,
+			   KBASE_CSF_RESET_GPU_COMMITTED_SILENT) !=
+	    KBASE_CSF_RESET_GPU_NOT_PENDING) {
 		/* Some other thread is already resetting the GPU */
 		return -EAGAIN;
 	}
@@ -561,8 +552,7 @@ KBASE_EXPORT_TEST_API(kbase_reset_gpu_silent);

 bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
 {
-	enum kbase_csf_reset_gpu_state reset_state =
-		atomic_read(&kbdev->csf.reset.state);
+	enum kbase_csf_reset_gpu_state reset_state = atomic_read(&kbdev->csf.reset.state);

 	/* For CSF, the reset is considered active only when the reset worker
 	 * is actually executing and other threads would have to wait for it to
@@ -597,10 +587,8 @@ int kbase_reset_gpu_wait(struct kbase_device *kbdev)

 	remaining = wait_event_timeout(
 		kbdev->csf.reset.wait,
-		(atomic_read(&kbdev->csf.reset.state) ==
-		 KBASE_CSF_RESET_GPU_NOT_PENDING) ||
-			(atomic_read(&kbdev->csf.reset.state) ==
-			 KBASE_CSF_RESET_GPU_FAILED),
+		(atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_NOT_PENDING) ||
+			(atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_FAILED),
 		wait_timeout);

 	if (!remaining) {
@@ -608,8 +596,7 @@ int kbase_reset_gpu_wait(struct kbase_device *kbdev)


 		return -ETIMEDOUT;
-	} else if (atomic_read(&kbdev->csf.reset.state) ==
-			KBASE_CSF_RESET_GPU_FAILED) {
+	} else if (atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_FAILED) {
 		return -ENOMEM;
 	}

--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -108,7 +108,7 @@ int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group);
 * Note: Caller must hold the interrupt_lock.
 */
 bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev,
-		struct kbase_queue_group *group);
+					      struct kbase_queue_group *group);

 /**
 * kbase_csf_scheduler_get_group_on_slot()- Gets the queue group that has been
@@ -121,8 +121,8 @@ bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev,
 *
 * Note: Caller must hold the interrupt_lock.
 */
-struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot(
-		struct kbase_device *kbdev, int slot);
+struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot(struct kbase_device *kbdev,
+								int slot);

 /**
 * kbase_csf_scheduler_group_deschedule() - Deschedule a GPU command queue
@@ -148,8 +148,8 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group);
 * on firmware slots from the given Kbase context. The affected groups are
 * added to the supplied list_head argument.
 */
-void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev,
-		struct kbase_context *kctx, struct list_head *evicted_groups);
+void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, struct kbase_context *kctx,
+					 struct list_head *evicted_groups);

 /**
 * kbase_csf_scheduler_context_init() - Initialize the context-specific part
@@ -264,7 +264,7 @@ void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev);
 * Return:	0 on success, or negative on failure.
 */
 int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
-		struct kbase_suspend_copy_buffer *sus_buf);
+					       struct kbase_suspend_copy_buffer *sus_buf);

 /**
 * kbase_csf_scheduler_lock - Acquire the global Scheduler lock.
@@ -299,8 +299,7 @@ static inline void kbase_csf_scheduler_unlock(struct kbase_device *kbdev)
 * This function will take the global scheduler lock, in order to serialize
 * against the Scheduler actions, for access to CS IO pages.
 */
-static inline void kbase_csf_scheduler_spin_lock(struct kbase_device *kbdev,
-						 unsigned long *flags)
+static inline void kbase_csf_scheduler_spin_lock(struct kbase_device *kbdev, unsigned long *flags)
 {
 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, *flags);
 }
@@ -312,8 +311,7 @@ static inline void kbase_csf_scheduler_spin_lock(struct kbase_device *kbdev,
 * @flags: Previously stored interrupt state when Scheduler interrupt
 *         spinlock was acquired.
 */
-static inline void kbase_csf_scheduler_spin_unlock(struct kbase_device *kbdev,
-						   unsigned long flags)
+static inline void kbase_csf_scheduler_spin_unlock(struct kbase_device *kbdev, unsigned long flags)
 {
 	spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
 }
@@ -324,8 +322,7 @@ static inline void kbase_csf_scheduler_spin_unlock(struct kbase_device *kbdev,
 *
 * @kbdev: Instance of a GPU platform device that implements a CSF interface.
 */
-static inline void
-kbase_csf_scheduler_spin_lock_assert_held(struct kbase_device *kbdev)
+static inline void kbase_csf_scheduler_spin_lock_assert_held(struct kbase_device *kbdev)
 {
 	lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
 }
@@ -338,7 +335,10 @@ kbase_csf_scheduler_spin_lock_assert_held(struct kbase_device *kbdev)
 *
 * Return: true if the scheduler is configured to wake up periodically
 */
-bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev);
+static inline bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev)
+{
+	return atomic_read(&kbdev->csf.scheduler.timer_enabled);
+}

 /**
 * kbase_csf_scheduler_timer_set_enabled() - Enable/disable periodic
@@ -347,8 +347,7 @@ bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev);
 * @kbdev:  Pointer to the device
 * @enable: Whether to enable periodic scheduler tasks
 */
-void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev,
-		bool enable);
+void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev, bool enable);

 /**
 * kbase_csf_scheduler_kick - Perform pending scheduling tasks once.
@@ -367,8 +366,7 @@ void kbase_csf_scheduler_kick(struct kbase_device *kbdev);
 *
 * Return: true if the scheduler is running with protected mode tasks
 */
-static inline bool kbase_csf_scheduler_protected_mode_in_use(
-					struct kbase_device *kbdev)
+static inline bool kbase_csf_scheduler_protected_mode_in_use(struct kbase_device *kbdev)
 {
 	return (kbdev->csf.scheduler.active_protm_grp != NULL);
 }
@@ -411,6 +409,22 @@ void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev);
 */
 int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev);

+/**
+ * kbase_csf_scheduler_killable_wait_mcu_active - Wait for the MCU to actually become
+ *                                                active in killable state.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function is same as kbase_csf_scheduler_wait_mcu_active(), expect that
+ * it would allow the SIGKILL signal to interrupt the wait.
+ * This function is supposed to be called from the code that is executed in ioctl or
+ * Userspace context, wherever it is safe to do so.
+ *
+ * Return: 0 if the MCU was successfully activated, or -ETIMEDOUT code on timeout error or
+ *        -ERESTARTSYS if the wait was interrupted.
+ */
+int kbase_csf_scheduler_killable_wait_mcu_active(struct kbase_device *kbdev);
+
 /**
 * kbase_csf_scheduler_pm_resume_no_lock - Reactivate the scheduler on system resume
 *
@@ -473,70 +487,25 @@ static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev)
 			    kbdev->csf.global_iface.group_num);
 }

-/**
- * kbase_csf_scheduler_tick_advance_nolock() - Advance the scheduling tick
- *
- * @kbdev: Pointer to the device
- *
- * This function advances the scheduling tick by enqueing the tick work item for
- * immediate execution, but only if the tick hrtimer is active. If the timer
- * is inactive then the tick work item is already in flight.
- * The caller must hold the interrupt lock.
- */
-static inline void
-kbase_csf_scheduler_tick_advance_nolock(struct kbase_device *kbdev)
-{
-	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-
-	lockdep_assert_held(&scheduler->interrupt_lock);
-
-	if (scheduler->tick_timer_active) {
-		KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_ADVANCE, NULL, 0u);
-		scheduler->tick_timer_active = false;
-		queue_work(scheduler->wq, &scheduler->tick_work);
-	} else {
-		KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_NOADVANCE, NULL, 0u);
-	}
-}
-
-/**
- * kbase_csf_scheduler_tick_advance() - Advance the scheduling tick
- *
- * @kbdev: Pointer to the device
- *
- * This function advances the scheduling tick by enqueing the tick work item for
- * immediate execution, but only if the tick hrtimer is active. If the timer
- * is inactive then the tick work item is already in flight.
- */
-static inline void kbase_csf_scheduler_tick_advance(struct kbase_device *kbdev)
-{
-	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-	unsigned long flags;
-
-	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
-	kbase_csf_scheduler_tick_advance_nolock(kbdev);
-	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
-}
-
 /**
 * kbase_csf_scheduler_invoke_tick() - Invoke the scheduling tick
 *
 * @kbdev: Pointer to the device
 *
- * This function will queue the scheduling tick work item for immediate
- * execution if tick timer is not active. This can be called from interrupt
- * context to resume the scheduling after GPU was put to sleep.
+ * This function wakes up kbase_csf_scheduler_kthread() to perform a scheduling
+ * tick regardless of whether the tick timer is enabled. This can be called
+ * from interrupt context to resume the scheduling after GPU was put to sleep.
+ *
+ * Caller is expected to check kbase_csf_scheduler.timer_enabled as required
+ * to see whether it is appropriate before calling this function.
 */
 static inline void kbase_csf_scheduler_invoke_tick(struct kbase_device *kbdev)
 {
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-	unsigned long flags;

 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_INVOKE, NULL, 0u);
-	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
-	if (!scheduler->tick_timer_active)
-		queue_work(scheduler->wq, &scheduler->tick_work);
-	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+	if (atomic_cmpxchg(&scheduler->pending_tick_work, false, true) == false)
+		complete(&scheduler->kthread_signal);
 }

 /**
@@ -544,8 +513,11 @@ static inline void kbase_csf_scheduler_invoke_tick(struct kbase_device *kbdev)
 *
 * @kbdev: Pointer to the device
 *
- * This function will queue the scheduling tock work item for immediate
- * execution.
+ * This function wakes up kbase_csf_scheduler_kthread() to perform a scheduling
+ * tock.
+ *
+ * Caller is expected to check kbase_csf_scheduler.timer_enabled as required
+ * to see whether it is appropriate before calling this function.
 */
 static inline void kbase_csf_scheduler_invoke_tock(struct kbase_device *kbdev)
 {
@@ -553,7 +525,7 @@ static inline void kbase_csf_scheduler_invoke_tock(struct kbase_device *kbdev)

 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_INVOKE, NULL, 0u);
 	if (atomic_cmpxchg(&scheduler->pending_tock_work, false, true) == false)
-		mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
+		complete(&scheduler->kthread_signal);
 }

 /**
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.c
@@ -0,0 +1,838 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase_csf_csg.h"
+#include "mali_kbase_csf_sync.h"
+#include "mali_kbase_csf_util.h"
+#include <mali_kbase.h>
+#include <linux/version_compat_defs.h>
+
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+#include "mali_kbase_sync.h"
+#endif
+
+#define CQS_UNREADABLE_LIVE_VALUE "(unavailable)"
+
+#define CSF_SYNC_DUMP_SIZE 256
+
+/* Number of nearby commands around the "extract_ptr" of GPU queues.
+ *
+ *     [extract_ptr - MAX_NR_NEARBY_INSTR, extract_ptr + MAX_NR_NEARBY_INSTR].
+ */
+#define MAX_NR_NEARBY_INSTR 32
+
+/**
+ * kbasep_csf_sync_get_cqs_live_u32() - Obtain live (u32) value for a CQS object.
+ *
+ * @kctx:     The context of the queue.
+ * @obj_addr: Pointer to the CQS live 32-bit value.
+ * @live_val: Pointer to the u32 that will be set to the CQS object's current, live
+ *            value.
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+static int kbasep_csf_sync_get_cqs_live_u32(struct kbase_context *kctx, u64 obj_addr, u32 *live_val)
+{
+	struct kbase_vmap_struct *mapping;
+	u32 *const cpu_ptr = (u32 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping);
+
+	if (!cpu_ptr)
+		return -1;
+
+	*live_val = *cpu_ptr;
+	kbase_phy_alloc_mapping_put(kctx, mapping);
+	return 0;
+}
+
+/**
+ * kbasep_csf_sync_get_cqs_live_u64() - Obtain live (u64) value for a CQS object.
+ *
+ * @kctx:     The context of the queue.
+ * @obj_addr: Pointer to the CQS live value (32 or 64-bit).
+ * @live_val: Pointer to the u64 that will be set to the CQS object's current, live
+ *            value.
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+static int kbasep_csf_sync_get_cqs_live_u64(struct kbase_context *kctx, u64 obj_addr, u64 *live_val)
+{
+	struct kbase_vmap_struct *mapping;
+	u64 *cpu_ptr = (u64 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping);
+
+	if (!cpu_ptr)
+		return -1;
+
+	*live_val = *cpu_ptr;
+	kbase_phy_alloc_mapping_put(kctx, mapping);
+	return 0;
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_fence_wait_or_signal() - Print details of a CSF SYNC Fence Wait
+ *                                                     or Fence Signal command, contained in a
+ *                                                     KCPU queue.
+ *
+ * @buffer:   The buffer to write to.
+ * @length:   The length of text in the buffer.
+ * @cmd:      The KCPU Command to be printed.
+ * @cmd_name: The name of the command: indicates either a fence SIGNAL or WAIT.
+ */
+static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(char *buffer, int *length,
+							    struct kbase_kcpu_command *cmd,
+							    const char *cmd_name)
+{
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+	struct fence *fence = NULL;
+#else
+	struct dma_fence *fence = NULL;
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */
+	struct kbase_kcpu_command_fence_info *fence_info;
+	struct kbase_sync_fence_info info;
+	const char *timeline_name = NULL;
+	bool is_signaled = false;
+
+	fence_info = &cmd->info.fence;
+	if (kbase_kcpu_command_fence_has_force_signaled(fence_info))
+		return;
+
+	fence = kbase_fence_get(fence_info);
+	if (WARN_ON(!fence))
+		return;
+
+	kbase_sync_fence_info_get(fence, &info);
+	timeline_name = fence->ops->get_timeline_name(fence);
+	is_signaled = info.status > 0;
+
+	*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+			    "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, fence, is_signaled);
+
+	/* Note: fence->seqno was u32 until 5.1 kernel, then u64 */
+	*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+			    "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx",
+			    timeline_name, fence->context, (u64)fence->seqno);
+
+	kbase_fence_put(fence);
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_cqs_wait() - Print details of a CSF SYNC CQS Wait command,
+ *                                         contained in a KCPU queue.
+ *
+ * @kctx:   The kbase context.
+ * @buffer: The buffer to write to.
+ * @length: The length of text in the buffer.
+ * @cmd:    The KCPU Command to be printed.
+ */
+static void kbasep_csf_sync_print_kcpu_cqs_wait(struct kbase_context *kctx, char *buffer,
+						int *length, struct kbase_kcpu_command *cmd)
+{
+	size_t i;
+
+	for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) {
+		struct base_cqs_wait_info *cqs_obj = &cmd->info.cqs_wait.objs[i];
+
+		u32 live_val;
+		int ret = kbasep_csf_sync_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val);
+		bool live_val_valid = (ret >= 0);
+
+		*length +=
+			snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				 "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
+
+		if (live_val_valid)
+			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					    "0x%.16llx", (u64)live_val);
+		else
+			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					    CQS_UNREADABLE_LIVE_VALUE);
+
+		*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				    " | op:gt arg_value:0x%.8x", cqs_obj->val);
+	}
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_cqs_set() - Print details of a CSF SYNC CQS
+ *                                        Set command, contained in a KCPU queue.
+ *
+ * @kctx:   The kbase context.
+ * @buffer: The buffer to write to.
+ * @length: The length of text in the buffer.
+ * @cmd:    The KCPU Command to be printed.
+ */
+static void kbasep_csf_sync_print_kcpu_cqs_set(struct kbase_context *kctx, char *buffer,
+					       int *length, struct kbase_kcpu_command *cmd)
+{
+	size_t i;
+
+	for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) {
+		struct base_cqs_set *cqs_obj = &cmd->info.cqs_set.objs[i];
+
+		u32 live_val;
+		int ret = kbasep_csf_sync_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val);
+		bool live_val_valid = (ret >= 0);
+
+		*length +=
+			snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				 "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
+
+		if (live_val_valid)
+			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					    "0x%.16llx", (u64)live_val);
+		else
+			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					    CQS_UNREADABLE_LIVE_VALUE);
+
+		*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				    " | op:add arg_value:0x%.8x", 1);
+	}
+}
+
+/**
+ * kbasep_csf_sync_get_wait_op_name() - Print the name of a CQS Wait Operation.
+ *
+ * @op: The numerical value of operation.
+ *
+ * Return: const static pointer to the command name, or '??' if unknown.
+ */
+static const char *kbasep_csf_sync_get_wait_op_name(basep_cqs_wait_operation_op op)
+{
+	const char *string;
+
+	switch (op) {
+	case BASEP_CQS_WAIT_OPERATION_LE:
+		string = "le";
+		break;
+	case BASEP_CQS_WAIT_OPERATION_GT:
+		string = "gt";
+		break;
+	default:
+		string = "??";
+		break;
+	}
+	return string;
+}
+
+/**
+ * kbasep_csf_sync_get_set_op_name() - Print the name of a CQS Set Operation.
+ *
+ * @op: The numerical value of operation.
+ *
+ * Return: const static pointer to the command name, or '??' if unknown.
+ */
+static const char *kbasep_csf_sync_get_set_op_name(basep_cqs_set_operation_op op)
+{
+	const char *string;
+
+	switch (op) {
+	case BASEP_CQS_SET_OPERATION_ADD:
+		string = "add";
+		break;
+	case BASEP_CQS_SET_OPERATION_SET:
+		string = "set";
+		break;
+	default:
+		string = "???";
+		break;
+	}
+	return string;
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_cqs_wait_op() - Print details of a CSF SYNC CQS
+ *                                            Wait Operation command, contained
+ *                                            in a KCPU queue.
+ *
+ * @kctx:   The kbase context.
+ * @buffer: The buffer to write to.
+ * @length: The length of text in the buffer.
+ * @cmd:    The KCPU Command to be printed.
+ */
+static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct kbase_context *kctx, char *buffer,
+						   int *length, struct kbase_kcpu_command *cmd)
+{
+	size_t i;
+
+	for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) {
+		struct base_cqs_wait_operation_info *wait_op =
+			&cmd->info.cqs_wait_operation.objs[i];
+		const char *op_name = kbasep_csf_sync_get_wait_op_name(wait_op->operation);
+
+		u64 live_val;
+		int ret = kbasep_csf_sync_get_cqs_live_u64(kctx, wait_op->addr, &live_val);
+
+		bool live_val_valid = (ret >= 0);
+
+		*length +=
+			snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				 "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr);
+
+		if (live_val_valid)
+			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					    "0x%.16llx", live_val);
+		else
+			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					    CQS_UNREADABLE_LIVE_VALUE);
+
+		*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				    " | op:%s arg_value:0x%.16llx", op_name, wait_op->val);
+	}
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_cqs_set_op() - Print details of a CSF SYNC CQS
+ *                                           Set Operation command, contained
+ *                                           in a KCPU queue.
+ *
+ * @kctx:   The kbase context.
+ * @buffer: The buffer to write to.
+ * @length: The length of text in the buffer.
+ * @cmd:    The KCPU Command to be printed.
+ */
+static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct kbase_context *kctx, char *buffer,
+						  int *length, struct kbase_kcpu_command *cmd)
+{
+	size_t i;
+
+	for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) {
+		struct base_cqs_set_operation_info *set_op = &cmd->info.cqs_set_operation.objs[i];
+		const char *op_name = kbasep_csf_sync_get_set_op_name(
+			(basep_cqs_set_operation_op)set_op->operation);
+
+		u64 live_val;
+		int ret = kbasep_csf_sync_get_cqs_live_u64(kctx, set_op->addr, &live_val);
+
+		bool live_val_valid = (ret >= 0);
+
+		*length +=
+			snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				 "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr);
+
+		if (live_val_valid)
+			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					    "0x%.16llx", live_val);
+		else
+			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					    CQS_UNREADABLE_LIVE_VALUE);
+
+		*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				    " | op:%s arg_value:0x%.16llx", op_name, set_op->val);
+	}
+}
+
+/**
+ * kbasep_csf_sync_kcpu_print_queue() - Print debug data for a KCPU queue
+ *
+ * @kctx:  The kbase context.
+ * @kbpr:  Pointer to printer instance.
+ * @queue: Pointer to the KCPU queue.
+ */
+static void kbasep_csf_sync_kcpu_print_queue(struct kbase_context *kctx,
+					     struct kbase_kcpu_command_queue *queue,
+					     struct kbasep_printer *kbpr)
+{
+	char started_or_pending;
+	struct kbase_kcpu_command *cmd;
+	size_t i;
+
+	if (WARN_ON(!queue))
+		return;
+
+	lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+	mutex_lock(&queue->lock);
+
+	for (i = 0; i != queue->num_pending_cmds; ++i) {
+		char buffer[CSF_SYNC_DUMP_SIZE];
+		int length = 0;
+
+		started_or_pending = ((i == 0) && queue->command_started) ? 'S' : 'P';
+		length += snprintf(buffer, CSF_SYNC_DUMP_SIZE, "queue:KCPU-%d-%d exec:%c ",
+				   kctx->id, queue->id, started_or_pending);
+
+		cmd = &queue->commands[(u8)(queue->start_offset + i)];
+		switch (cmd->type) {
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+		case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:
+			kbasep_csf_sync_print_kcpu_fence_wait_or_signal(buffer, &length, cmd,
+									"FENCE_SIGNAL");
+			break;
+		case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
+			kbasep_csf_sync_print_kcpu_fence_wait_or_signal(buffer, &length, cmd,
+									"FENCE_WAIT");
+			break;
+#endif
+		case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
+			kbasep_csf_sync_print_kcpu_cqs_wait(kctx, buffer, &length, cmd);
+			break;
+		case BASE_KCPU_COMMAND_TYPE_CQS_SET:
+			kbasep_csf_sync_print_kcpu_cqs_set(kctx, buffer, &length, cmd);
+			break;
+		case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION:
+			kbasep_csf_sync_print_kcpu_cqs_wait_op(kctx, buffer, &length, cmd);
+			break;
+		case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:
+			kbasep_csf_sync_print_kcpu_cqs_set_op(kctx, buffer, &length, cmd);
+			break;
+		default:
+			length += snprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length,
+					   ", U, Unknown blocking command");
+			break;
+		}
+
+		length += snprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length, "\n");
+		kbasep_print(kbpr, buffer);
+	}
+
+	mutex_unlock(&queue->lock);
+}
+
+int kbasep_csf_sync_kcpu_dump_print(struct kbase_context *kctx, struct kbasep_printer *kbpr)
+{
+	unsigned long queue_idx;
+
+	mutex_lock(&kctx->csf.kcpu_queues.lock);
+
+	kbasep_print(kbpr, "CSF KCPU queues sync info (version: v" __stringify(
+				   MALI_CSF_SYNC_DUMP_VERSION) "):\n");
+
+	kbasep_print(kbpr, "KCPU queues for ctx %d:\n", kctx->id);
+
+	queue_idx = find_first_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES);
+
+	while (queue_idx < KBASEP_MAX_KCPU_QUEUES) {
+		kbasep_csf_sync_kcpu_print_queue(kctx, kctx->csf.kcpu_queues.array[queue_idx],
+						 kbpr);
+
+		queue_idx = find_next_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES,
+					  queue_idx + 1);
+	}
+
+	mutex_unlock(&kctx->csf.kcpu_queues.lock);
+
+	return 0;
+}
+
+/* GPU queue related values */
+#define GPU_CSF_MOVE_OPCODE ((u64)0x1)
+#define GPU_CSF_MOVE32_OPCODE ((u64)0x2)
+#define GPU_CSF_SYNC_ADD_OPCODE ((u64)0x25)
+#define GPU_CSF_SYNC_SET_OPCODE ((u64)0x26)
+#define GPU_CSF_SYNC_WAIT_OPCODE ((u64)0x27)
+#define GPU_CSF_SYNC_ADD64_OPCODE ((u64)0x33)
+#define GPU_CSF_SYNC_SET64_OPCODE ((u64)0x34)
+#define GPU_CSF_SYNC_WAIT64_OPCODE ((u64)0x35)
+#define GPU_CSF_CALL_OPCODE ((u64)0x20)
+
+#define MAX_NR_GPU_CALLS (5)
+#define INSTR_OPCODE_MASK ((u64)0xFF << 56)
+#define INSTR_OPCODE_GET(value) ((value & INSTR_OPCODE_MASK) >> 56)
+#define MOVE32_IMM_MASK ((u64)0xFFFFFFFFFUL)
+#define MOVE_DEST_MASK ((u64)0xFF << 48)
+#define MOVE_DEST_GET(value) ((value & MOVE_DEST_MASK) >> 48)
+#define MOVE_IMM_MASK ((u64)0xFFFFFFFFFFFFUL)
+#define SYNC_SRC0_MASK ((u64)0xFF << 40)
+#define SYNC_SRC1_MASK ((u64)0xFF << 32)
+#define SYNC_SRC0_GET(value) (u8)((value & SYNC_SRC0_MASK) >> 40)
+#define SYNC_SRC1_GET(value) (u8)((value & SYNC_SRC1_MASK) >> 32)
+#define SYNC_WAIT_CONDITION_MASK ((u64)0xF << 28)
+#define SYNC_WAIT_CONDITION_GET(value) (u8)((value & SYNC_WAIT_CONDITION_MASK) >> 28)
+
+/* Enumeration for types of GPU queue sync events for
+ * the purpose of dumping them through sync.
+ */
+enum sync_gpu_sync_type {
+	CSF_GPU_SYNC_WAIT,
+	CSF_GPU_SYNC_SET,
+	CSF_GPU_SYNC_ADD,
+	NUM_CSF_GPU_SYNC_TYPES
+};
+
+/**
+ * kbasep_csf_get_move_immediate_value() - Get the immediate values for sync operations
+ *                                         from a MOVE instruction.
+ *
+ * @move_cmd:        Raw MOVE instruction.
+ * @sync_addr_reg:   Register identifier from SYNC_* instruction.
+ * @compare_val_reg: Register identifier from SYNC_* instruction.
+ * @sync_val:        Pointer to store CQS object address for sync operation.
+ * @compare_val:     Pointer to store compare value for sync operation.
+ *
+ * Return: True if value is obtained by checking for correct register identifier,
+ * or false otherwise.
+ */
+static bool kbasep_csf_get_move_immediate_value(u64 move_cmd, u64 sync_addr_reg,
+						u64 compare_val_reg, u64 *sync_val,
+						u64 *compare_val)
+{
+	u64 imm_mask;
+
+	/* Verify MOVE instruction and get immediate mask */
+	if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE32_OPCODE)
+		imm_mask = MOVE32_IMM_MASK;
+	else if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE_OPCODE)
+		imm_mask = MOVE_IMM_MASK;
+	else
+		/* Error return */
+		return false;
+
+	/* Verify value from MOVE instruction and assign to variable */
+	if (sync_addr_reg == MOVE_DEST_GET(move_cmd))
+		*sync_val = move_cmd & imm_mask;
+	else if (compare_val_reg == MOVE_DEST_GET(move_cmd))
+		*compare_val = move_cmd & imm_mask;
+	else
+		/* Error return */
+		return false;
+
+	return true;
+}
+
+/** kbasep_csf_read_ringbuffer_value() - Reads a u64 from the ringbuffer at a provided
+ *                                       offset.
+ *
+ * @queue:            Pointer to the queue.
+ * @ringbuff_offset:  Ringbuffer offset.
+ *
+ * Return: the u64 in the ringbuffer at the desired offset.
+ */
+static u64 kbasep_csf_read_ringbuffer_value(struct kbase_queue *queue, u32 ringbuff_offset)
+{
+	u64 page_off = ringbuff_offset >> PAGE_SHIFT;
+	u64 offset_within_page = ringbuff_offset & ~PAGE_MASK;
+	struct page *page = as_page(queue->queue_reg->gpu_alloc->pages[page_off]);
+	u64 *ringbuffer = vmap(&page, 1, VM_MAP, pgprot_noncached(PAGE_KERNEL));
+	u64 value;
+
+	if (!ringbuffer) {
+		struct kbase_context *kctx = queue->kctx;
+
+		dev_err(kctx->kbdev->dev, "%s failed to map the buffer page for read a command!",
+			__func__);
+		/* Return an alternative 0 for dumping operation*/
+		value = 0;
+	} else {
+		value = ringbuffer[offset_within_page / sizeof(u64)];
+		vunmap(ringbuffer);
+	}
+
+	return value;
+}
+
+/**
+ * kbasep_csf_print_gpu_sync_op() - Print sync operation info for given sync command.
+ *
+ * @kbpr:             Pointer to printer instance.
+ * @kctx:             Pointer to kbase context.
+ * @queue:            Pointer to the GPU command queue.
+ * @ringbuff_offset:  Offset to index the ring buffer with, for the given sync command.
+ *                    (Useful for finding preceding MOVE commands)
+ * @instr_addr:       GPU command address.
+ * @sync_cmd:         Entire u64 of the sync command, which has both sync address and
+ *                    comparison-value encoded in it.
+ * @type:             Type of GPU sync command (e.g. SYNC_SET, SYNC_ADD, SYNC_WAIT).
+ * @is_64bit:         Bool to indicate if operation is 64 bit (true) or 32 bit (false).
+ * @follows_wait:     Bool to indicate if the operation follows at least one wait
+ *                    operation. Used to determine whether it's pending or started.
+ */
+static void kbasep_csf_print_gpu_sync_op(struct kbasep_printer *kbpr, struct kbase_context *kctx,
+					 struct kbase_queue *queue, u32 ringbuff_offset,
+					 u64 instr_addr, u64 sync_cmd, enum sync_gpu_sync_type type,
+					 bool is_64bit, bool follows_wait)
+{
+	u64 sync_addr = 0, compare_val = 0, live_val = 0, ringbuffer_boundary_check;
+	u64 move_cmd;
+	u8 sync_addr_reg, compare_val_reg, wait_condition = 0;
+	int err;
+
+	static const char *const gpu_sync_type_name[] = { "SYNC_WAIT", "SYNC_SET", "SYNC_ADD" };
+	static const char *const gpu_sync_type_op[] = {
+		"wait", /* This should never be printed, only included to simplify indexing */
+		"set", "add"
+	};
+
+	if (type >= NUM_CSF_GPU_SYNC_TYPES) {
+		dev_warn(kctx->kbdev->dev, "Expected GPU queue sync type is unknown!");
+		return;
+	}
+
+	/* 1. Get Register identifiers from SYNC_* instruction */
+	sync_addr_reg = SYNC_SRC0_GET(sync_cmd);
+	compare_val_reg = SYNC_SRC1_GET(sync_cmd);
+
+	if (ringbuff_offset < sizeof(u64)) {
+		dev_warn(kctx->kbdev->dev,
+			 "Unexpected wraparound detected between %s & MOVE instruction",
+			 gpu_sync_type_name[type]);
+		return;
+	}
+	/* 2. Get values from first MOVE command */
+	ringbuff_offset -= sizeof(u64);
+	move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset);
+
+	/* We expect there to be at least 2 preceding MOVE instructions for CQS, or 3 preceding
+	 * MOVE instructions for Timeline CQS, and Base will always arrange for these
+	 * MOVE + SYNC instructions to be contiguously located, and is therefore never expected
+	 * to be wrapped around the ringbuffer boundary. The following check takes place after
+	 * the ringbuffer has been decremented, and already points to the first MOVE command,
+	 * so that it can be determined if it's a 32-bit MOVE (so 2 vs 1 preceding MOVE commands
+	 * will be checked).
+	 * This is to maintain compatibility with older userspace; a check is done to ensure that
+	 * the MOVE opcode found was a 32-bit MOVE, and if so, it has determined that a newer
+	 * userspace is being used and will continue to read the next 32-bit MOVE to recover the
+	 * compare/set value in the wait/set operation. If not, the single 48-bit value found
+	 * will be used.
+	 */
+	ringbuffer_boundary_check =
+		(INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE32_OPCODE && is_64bit) ? 2 : 1;
+	if (unlikely(ringbuff_offset < (ringbuffer_boundary_check * sizeof(u64)))) {
+		dev_warn(kctx->kbdev->dev,
+			 "Unexpected wraparound detected between %s & MOVE instruction",
+			 gpu_sync_type_name[type]);
+		return;
+	}
+	/* For 64-bit SYNC commands, the first MOVE command read in will actually use 1 register
+	 * above the compare value register in the sync command, as this will store the higher
+	 * 32-bits of 64-bit compare value. The compare value register read above will be read
+	 * afterwards.
+	 */
+	if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg,
+						 compare_val_reg + (is_64bit ? 1 : 0), &sync_addr,
+						 &compare_val))
+		return;
+
+	/* 64-bit WAITs or SETs are split into 2 32-bit MOVEs. sync_val would contain the higher
+	 * 32 bits, so the lower 32-bits are retrieved afterwards, to recover the full u64 value.
+	 */
+	if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE32_OPCODE && is_64bit) {
+		u64 compare_val_lower = 0;
+
+		ringbuff_offset -= sizeof(u64);
+		move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset);
+
+		if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg,
+							 &sync_addr, &compare_val_lower))
+			return;
+		/* Mask off upper 32 bits of compare_val_lower, and combine with the higher 32 bits
+		 * to restore the original u64 compare value.
+		 */
+		compare_val = (compare_val << 32) | (compare_val_lower & ((u64)U32_MAX));
+	}
+
+	/* 3. Get values from next MOVE command, which should be the CQS object address */
+	ringbuff_offset -= sizeof(u64);
+	move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset);
+	if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg,
+						 &sync_addr, &compare_val))
+		return;
+
+	/* 4. Get CQS object value */
+	if (is_64bit)
+		err = kbasep_csf_sync_get_cqs_live_u64(kctx, sync_addr, &live_val);
+	else
+		err = kbasep_csf_sync_get_cqs_live_u32(kctx, sync_addr, (u32 *)(&live_val));
+
+	if (err)
+		return;
+
+	/* 5. Print info */
+	kbasep_print(kbpr, "queue:GPU-%u-%u-%u exec:%c at:0x%.16llx cmd:%s ", kctx->id,
+		     queue->group->handle, queue->csi_index,
+		     queue->enabled && !follows_wait ? 'S' : 'P', instr_addr,
+		     gpu_sync_type_name[type]);
+
+	if (queue->group->csg_nr == KBASEP_CSG_NR_INVALID)
+		kbasep_print(kbpr, "slot:-");
+	else
+		kbasep_print(kbpr, "slot:%d", (int)queue->group->csg_nr);
+
+	kbasep_print(kbpr, " obj:0x%.16llx live_value:0x%.16llx | ", sync_addr, live_val);
+
+	if (type == CSF_GPU_SYNC_WAIT) {
+		wait_condition = SYNC_WAIT_CONDITION_GET(sync_cmd);
+		kbasep_print(kbpr, "op:%s ", kbasep_csf_sync_get_wait_op_name(wait_condition));
+	} else
+		kbasep_print(kbpr, "op:%s ", gpu_sync_type_op[type]);
+
+	kbasep_print(kbpr, "arg_value:0x%.16llx\n", compare_val);
+}
+
+/**
+ * kbasep_csf_dump_active_queue_sync_info() - Print GPU command queue sync information.
+ *
+ * @kbpr:  Pointer to printer instance.
+ * @queue: Address of a GPU command queue to examine.
+ *
+ * This function will iterate through each command in the ring buffer of the given GPU queue from
+ * CS_EXTRACT, and if is a SYNC_* instruction it will attempt to decode the sync operation and
+ * print relevant information to the sync file.
+ * This function will stop iterating once the CS_INSERT address is reached by the cursor (i.e.
+ * when there are no more commands to view) or a number of consumed GPU CALL commands have
+ * been observed.
+ */
+static void kbasep_csf_dump_active_queue_sync_info(struct kbasep_printer *kbpr,
+						   struct kbase_queue *queue)
+{
+	struct kbase_context *kctx;
+	u64 *addr;
+	u64 cs_extract, cs_insert, instr, cursor, end_cursor;
+	u32 nr_nearby_instr_size;
+	bool follows_wait = false;
+	int nr_calls = 0;
+
+	if (!queue)
+		return;
+
+	kctx = queue->kctx;
+
+	addr = queue->user_io_addr;
+	cs_insert = addr[CS_INSERT_LO / sizeof(*addr)];
+
+	addr = queue->user_io_addr + PAGE_SIZE / sizeof(*addr);
+	cs_extract = addr[CS_EXTRACT_LO / sizeof(*addr)];
+
+	nr_nearby_instr_size =
+		min((MAX_NR_NEARBY_INSTR * (u32)sizeof(u64)), ((queue->size / 2) & ~(0x7u)));
+	cursor = (cs_extract + queue->size - nr_nearby_instr_size) & ((u64)queue->size - 1);
+	end_cursor = min(cs_insert, ((cs_extract + nr_nearby_instr_size) & ((u64)queue->size - 1)));
+
+	if (!is_power_of_2(queue->size)) {
+		dev_warn(kctx->kbdev->dev, "GPU queue %u size of %u not a power of 2",
+			 queue->csi_index, queue->size);
+		return;
+	}
+
+	kbasep_print(
+		kbpr,
+		"queue:GPU-%u-%u-%u size:%u cs_insert:%8llx cs_extract:%8llx dump_begin:%8llx dump_end:%8llx\n",
+		kctx->id, queue->group->handle, queue->csi_index, queue->size, cs_insert,
+		cs_extract, cursor, end_cursor);
+
+	while ((cs_insert != cs_extract) && (cursor != end_cursor) &&
+	       (nr_calls < MAX_NR_GPU_CALLS)) {
+		bool instr_is_64_bit = false;
+		u32 cursor_ringbuff_offset = (u32)cursor;
+
+		/* Find instruction that cursor is currently on */
+		instr = kbasep_csf_read_ringbuffer_value(queue, cursor_ringbuff_offset);
+
+		switch (INSTR_OPCODE_GET(instr)) {
+		case GPU_CSF_SYNC_ADD64_OPCODE:
+		case GPU_CSF_SYNC_SET64_OPCODE:
+		case GPU_CSF_SYNC_WAIT64_OPCODE:
+			instr_is_64_bit = true;
+			break;
+		default:
+			break;
+		}
+		switch (INSTR_OPCODE_GET(instr)) {
+		case GPU_CSF_SYNC_ADD_OPCODE:
+		case GPU_CSF_SYNC_ADD64_OPCODE:
+			kbasep_csf_print_gpu_sync_op(kbpr, kctx, queue, cursor_ringbuff_offset,
+						     cursor, instr, CSF_GPU_SYNC_ADD,
+						     instr_is_64_bit, follows_wait);
+			break;
+		case GPU_CSF_SYNC_SET_OPCODE:
+		case GPU_CSF_SYNC_SET64_OPCODE:
+			kbasep_csf_print_gpu_sync_op(kbpr, kctx, queue, cursor_ringbuff_offset,
+						     cursor, instr, CSF_GPU_SYNC_SET,
+						     instr_is_64_bit, follows_wait);
+			break;
+		case GPU_CSF_SYNC_WAIT_OPCODE:
+		case GPU_CSF_SYNC_WAIT64_OPCODE:
+			kbasep_csf_print_gpu_sync_op(kbpr, kctx, queue, cursor_ringbuff_offset,
+						     cursor, instr, CSF_GPU_SYNC_WAIT,
+						     instr_is_64_bit, follows_wait);
+			follows_wait = true; /* Future commands will follow at least one wait */
+			break;
+		case GPU_CSF_CALL_OPCODE:
+			nr_calls++;
+			kbasep_print(kbpr,
+				     "queue:GPU-%u-%u-%u exec:%c at:0x%.16llx cmd:0x%.16llx\n",
+				     kctx->id, queue->group->handle, queue->csi_index,
+				     queue->enabled && !follows_wait ? 'S' : 'P', cursor, instr);
+			break;
+		default:
+			/* NOP instructions without metadata are not printed. */
+			if (instr) {
+				kbasep_print(
+					kbpr,
+					"queue:GPU-%u-%u-%u exec:%c at:0x%.16llx cmd:0x%.16llx\n",
+					kctx->id, queue->group->handle, queue->csi_index,
+					queue->enabled && !follows_wait ? 'S' : 'P', cursor, instr);
+			}
+			break;
+		}
+
+		cursor = (cursor + sizeof(u64)) & ((u64)queue->size - 1);
+	}
+}
+
+/**
+ * kbasep_csf_dump_active_group_sync_state() - Prints SYNC commands in all GPU queues of
+ *                                             the provided queue group.
+ *
+ * @kctx:  The kbase context
+ * @kbpr:  Pointer to printer instance.
+ * @group: Address of a GPU command group to iterate through.
+ *
+ * This function will iterate through each queue in the provided GPU queue group and
+ * print its SYNC related commands.
+ */
+static void kbasep_csf_dump_active_group_sync_state(struct kbase_context *kctx,
+						    struct kbasep_printer *kbpr,
+						    struct kbase_queue_group *const group)
+{
+	unsigned int i;
+
+	kbasep_print(kbpr, "GPU queues for group %u (slot %d) of ctx %d_%d\n", group->handle,
+		     group->csg_nr, kctx->tgid, kctx->id);
+
+	for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++)
+		kbasep_csf_dump_active_queue_sync_info(kbpr, group->bound_queues[i]);
+}
+
+int kbasep_csf_sync_gpu_dump_print(struct kbase_context *kctx, struct kbasep_printer *kbpr)
+{
+	u32 gr;
+	struct kbase_device *kbdev;
+
+	if (WARN_ON(!kctx))
+		return -EINVAL;
+
+	kbdev = kctx->kbdev;
+	kbase_csf_scheduler_lock(kbdev);
+	kbase_csf_csg_update_status(kbdev);
+
+	kbasep_print(kbpr, "CSF GPU queues sync info (version: v" __stringify(
+				   MALI_CSF_SYNC_DUMP_VERSION) "):\n");
+
+	for (gr = 0; gr < kbdev->csf.global_iface.group_num; gr++) {
+		struct kbase_queue_group *const group =
+			kbdev->csf.scheduler.csg_slots[gr].resident_group;
+		if (!group || group->kctx != kctx)
+			continue;
+		kbasep_csf_dump_active_group_sync_state(kctx, kbpr, group);
+	}
+
+	kbase_csf_scheduler_unlock(kbdev);
+
+	return 0;
+}
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_CSF_SYNC_H_
+#define _KBASE_CSF_SYNC_H_
+
+/* Forward declaration */
+struct kbase_context;
+struct kbasep_printer;
+
+#define MALI_CSF_SYNC_DUMP_VERSION 0
+
+/**
+ * kbasep_csf_sync_kcpu_dump_print() - Print CSF KCPU queue sync info
+ *
+ * @kctx: The kbase context.
+ * @kbpr: Pointer to printer instance.
+ *
+ * Return: Negative error code or 0 on success.
+ */
+int kbasep_csf_sync_kcpu_dump_print(struct kbase_context *kctx, struct kbasep_printer *kbpr);
+
+/**
+ * kbasep_csf_sync_gpu_dump_print() - Print CSF GPU queue sync info
+ *
+ * @kctx: The kbase context
+ * @kbpr: Pointer to printer instance.
+ *
+ * Return: Negative error code or 0 on success.
+ */
+int kbasep_csf_sync_gpu_dump_print(struct kbase_context *kctx, struct kbasep_printer *kbpr);
+
+#endif /* _KBASE_CSF_SYNC_H_ */
--- a/Show More
+++ b/Show More