From 25b2037af22f36400c9004481b9e44c02c30c8e1 Mon Sep 17 00:00:00 2001 From: Zhen Chen Date: Fri, 10 Feb 2023 12:01:52 +0800 Subject: [PATCH] MALI: rockchip: upgrade bifrost DDK to g17p0-01eac0, from g15p0-01eac0 In addition, add some more modifications, according to commit ccf3f0670c36 ("MALI: bifrost: from ARM: Remove references to PageMovable()"). Note, the corresponding mali_csffw.bin for DDK g15 MUST be used. Change-Id: Ie233cd29d8d169202d5b80b00a97ccb90e6bd3f2 Signed-off-by: Zhen Chen --- .../sysfs-device-mali-coresight-source | 113 +++ Documentation/csf_sync_state_dump.txt | 111 +++ .../devicetree/bindings/arm/mali-bifrost.txt | 1 + .../bindings/arm/mali-coresight-source.txt | 160 +++ Documentation/dma-buf-test-exporter.txt | 4 +- drivers/base/arm/Makefile | 9 +- drivers/base/arm/Mconfig | 64 -- .../memory_group_manager.c | 4 +- drivers/gpu/arm/bifrost/Kbuild | 2 +- drivers/gpu/arm/bifrost/Kconfig | 71 +- drivers/gpu/arm/bifrost/Makefile | 42 +- drivers/gpu/arm/bifrost/Mconfig | 326 ------- drivers/gpu/arm/bifrost/backend/gpu/Kbuild | 8 +- .../backend/gpu/mali_kbase_irq_linux.c | 10 +- .../bifrost/backend/gpu/mali_kbase_jm_as.c | 8 +- .../bifrost/backend/gpu/mali_kbase_jm_hw.c | 46 +- .../backend/gpu/mali_kbase_jm_internal.h | 41 +- .../bifrost/backend/gpu/mali_kbase_jm_rb.c | 113 +-- .../bifrost/backend/gpu/mali_kbase_jm_rb.h | 14 +- .../backend/gpu/mali_kbase_js_backend.c | 5 +- .../backend/gpu/mali_kbase_model_dummy.c | 73 +- .../backend/gpu/mali_kbase_model_dummy.h | 29 +- .../gpu/mali_kbase_model_error_generator.c | 2 +- .../backend/gpu/mali_kbase_model_linux.c | 33 +- .../backend/gpu/mali_kbase_model_linux.h | 125 ++- .../bifrost/backend/gpu/mali_kbase_pm_ca.c | 4 +- .../backend/gpu/mali_kbase_pm_driver.c | 82 +- .../backend/gpu/mali_kbase_pm_internal.h | 23 + .../backend/gpu/mali_kbase_pm_mcu_states.h | 16 +- .../backend/gpu/mali_kbase_pm_metrics.c | 2 +- drivers/gpu/arm/bifrost/build.bp | 5 +- .../context/backend/mali_kbase_context_csf.c | 2 + .../arm/bifrost/context/mali_kbase_context.c | 11 - drivers/gpu/arm/bifrost/csf/Kbuild | 14 +- drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c | 588 +++++------ drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h | 41 +- .../bifrost/csf/mali_kbase_csf_csg_debugfs.c | 24 +- .../bifrost/csf/mali_kbase_csf_csg_debugfs.h | 9 +- .../gpu/arm/bifrost/csf/mali_kbase_csf_defs.h | 105 +- .../arm/bifrost/csf/mali_kbase_csf_firmware.c | 288 +++++- .../arm/bifrost/csf/mali_kbase_csf_firmware.h | 56 +- .../csf/mali_kbase_csf_firmware_core_dump.c | 807 ++++++++++++++++ .../csf/mali_kbase_csf_firmware_core_dump.h | 65 ++ .../bifrost/csf/mali_kbase_csf_firmware_log.c | 4 +- .../bifrost/csf/mali_kbase_csf_firmware_log.h | 3 + .../csf/mali_kbase_csf_firmware_no_mali.c | 93 +- .../csf/mali_kbase_csf_heap_context_alloc.c | 82 +- .../gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c | 415 ++++++-- .../gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h | 22 +- .../csf/mali_kbase_csf_mcu_shared_reg.c | 815 ++++++++++++++++ .../csf/mali_kbase_csf_mcu_shared_reg.h | 139 +++ .../bifrost/csf/mali_kbase_csf_registers.h | 87 +- .../bifrost/csf/mali_kbase_csf_scheduler.c | 212 +++- .../bifrost/csf/mali_kbase_csf_sync_debugfs.c | 788 +++++++++++++++ .../bifrost/csf/mali_kbase_csf_sync_debugfs.h | 37 + .../bifrost/csf/mali_kbase_csf_tiler_heap.c | 82 +- .../csf/mali_kbase_csf_tiler_heap_reclaim.c | 4 + drivers/gpu/arm/bifrost/debug/Kbuild | 3 +- .../backend/mali_kbase_debug_coresight_csf.c | 851 ++++++++++++++++ .../mali_kbase_debug_coresight_internal_csf.h | 182 ++++ .../device/backend/mali_kbase_device_csf.c | 24 +- .../device/backend/mali_kbase_device_hw_csf.c | 8 +- .../device/backend/mali_kbase_device_hw_jm.c | 4 +- .../device/backend/mali_kbase_device_jm.c | 20 +- .../arm/bifrost/device/mali_kbase_device.c | 8 + .../device/mali_kbase_device_internal.h | 12 +- .../mali_kbase_hwcnt_backend_csf_if_fw.c | 12 +- .../backend/mali_kbase_hwcnt_backend_jm.c | 5 +- .../ipa/backend/mali_kbase_ipa_counter_csf.c | 45 +- .../ipa/backend/mali_kbase_ipa_counter_jm.c | 5 +- drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c | 30 +- .../arm/bifrost/ipa/mali_kbase_ipa_simple.c | 6 +- .../gpu/arm/bifrost/jm/mali_kbase_jm_defs.h | 2 +- drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h | 23 +- .../arm/bifrost/mali_base_hwconfig_features.h | 10 - .../arm/bifrost/mali_base_hwconfig_issues.h | 164 +++- drivers/gpu/arm/bifrost/mali_kbase.h | 30 +- .../arm/bifrost/mali_kbase_as_fault_debugfs.c | 10 +- .../gpu/arm/bifrost/mali_kbase_core_linux.c | 59 +- .../gpu/arm/bifrost/mali_kbase_ctx_sched.c | 41 +- .../gpu/arm/bifrost/mali_kbase_ctx_sched.h | 12 +- drivers/gpu/arm/bifrost/mali_kbase_defs.h | 34 +- drivers/gpu/arm/bifrost/mali_kbase_fence.h | 91 +- .../gpu/arm/bifrost/mali_kbase_fence_ops.c | 50 +- drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c | 53 +- drivers/gpu/arm/bifrost/mali_kbase_hw.c | 12 +- .../gpu/arm/bifrost/mali_kbase_hwaccess_jm.h | 17 +- drivers/gpu/arm/bifrost/mali_kbase_jd.c | 21 +- drivers/gpu/arm/bifrost/mali_kbase_jm.c | 18 +- drivers/gpu/arm/bifrost/mali_kbase_js.c | 210 ++-- .../gpu/arm/bifrost/mali_kbase_kinstr_jm.c | 5 + .../arm/bifrost/mali_kbase_kinstr_prfcnt.c | 515 ++++------ drivers/gpu/arm/bifrost/mali_kbase_linux.h | 4 +- drivers/gpu/arm/bifrost/mali_kbase_mem.c | 407 ++++++-- drivers/gpu/arm/bifrost/mali_kbase_mem.h | 148 ++- .../gpu/arm/bifrost/mali_kbase_mem_linux.c | 241 ++++- .../gpu/arm/bifrost/mali_kbase_mem_linux.h | 2 +- .../gpu/arm/bifrost/mali_kbase_mem_migrate.c | 347 ++++++- .../gpu/arm/bifrost/mali_kbase_mem_migrate.h | 7 +- drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c | 84 +- drivers/gpu/arm/bifrost/mali_kbase_softjobs.c | 41 +- drivers/gpu/arm/bifrost/mali_kbase_vinstr.c | 5 + .../bifrost/mmu/backend/mali_kbase_mmu_csf.c | 13 +- .../bifrost/mmu/backend/mali_kbase_mmu_jm.c | 6 +- drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c | 914 +++++++++++++++--- drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h | 109 ++- .../gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h | 45 +- .../bifrost/mmu/mali_kbase_mmu_hw_direct.c | 8 + drivers/gpu/arm/bifrost/tests/Mconfig | 73 -- .../bifrost/tests/include/kutf/kutf_helpers.h | 27 +- .../gpu/arm/bifrost/tests/kutf/kutf_helpers.c | 14 +- .../arm/bifrost/tl/mali_kbase_timeline_io.c | 5 + .../gpu/arm/bifrost/tl/mali_kbase_tlstream.h | 12 +- .../arm/bifrost/tl/mali_kbase_tracepoints.c | 32 + .../arm/bifrost/tl/mali_kbase_tracepoints.h | 37 + drivers/hwtracing/coresight/mali/Kbuild | 65 ++ drivers/hwtracing/coresight/mali/Kconfig | 47 + drivers/hwtracing/coresight/mali/Makefile | 101 ++ drivers/hwtracing/coresight/mali/build.bp | 100 ++ .../coresight/mali/coresight_mali_common.c | 62 ++ .../coresight/mali/coresight_mali_common.h | 133 +++ .../mali/sources/coresight_mali_sources.c | 168 ++++ .../mali/sources/coresight_mali_sources.h | 94 ++ .../mali/sources/ela/coresight-ela600.h | 129 +++ .../ela/coresight_mali_source_ela_core.c | 666 +++++++++++++ .../etm/coresight_mali_source_etm_core.c | 280 ++++++ .../itm/coresight_mali_source_itm_core.c | 265 +++++ .../linux/mali_kbase_debug_coresight_csf.h | 241 +++++ include/linux/version_compat_defs.h | 6 +- .../backend/gpu/mali_kbase_model_dummy.h | 13 + .../backend/gpu/mali_kbase_model_linux.h | 38 + .../arm/bifrost/csf/mali_base_csf_kernel.h | 18 +- .../arm/bifrost/csf/mali_kbase_csf_ioctl.h | 10 +- .../gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h | 1 - .../gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h | 10 +- .../gpu/arm/bifrost/mali_kbase_hwcnt_reader.h | 4 +- .../uapi/gpu/arm/bifrost/mali_kbase_ioctl.h | 276 +++--- 137 files changed, 11502 insertions(+), 2737 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-device-mali-coresight-source create mode 100644 Documentation/csf_sync_state_dump.txt create mode 100644 Documentation/devicetree/bindings/arm/mali-coresight-source.txt delete mode 100644 drivers/base/arm/Mconfig delete mode 100644 drivers/gpu/arm/bifrost/Mconfig create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.h create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.h create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h create mode 100644 drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c create mode 100644 drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_internal_csf.h delete mode 100644 drivers/gpu/arm/bifrost/tests/Mconfig create mode 100644 drivers/hwtracing/coresight/mali/Kbuild create mode 100644 drivers/hwtracing/coresight/mali/Kconfig create mode 100644 drivers/hwtracing/coresight/mali/Makefile create mode 100644 drivers/hwtracing/coresight/mali/build.bp create mode 100644 drivers/hwtracing/coresight/mali/coresight_mali_common.c create mode 100644 drivers/hwtracing/coresight/mali/coresight_mali_common.h create mode 100644 drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c create mode 100644 drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.h create mode 100644 drivers/hwtracing/coresight/mali/sources/ela/coresight-ela600.h create mode 100644 drivers/hwtracing/coresight/mali/sources/ela/coresight_mali_source_ela_core.c create mode 100644 drivers/hwtracing/coresight/mali/sources/etm/coresight_mali_source_etm_core.c create mode 100644 drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c create mode 100644 include/linux/mali_kbase_debug_coresight_csf.h create mode 100644 include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h diff --git a/Documentation/ABI/testing/sysfs-device-mali-coresight-source b/Documentation/ABI/testing/sysfs-device-mali-coresight-source new file mode 100644 index 000000000000..a24a88a824e4 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-device-mali-coresight-source @@ -0,0 +1,113 @@ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation) and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program) and can also be obtained + * from Free Software Foundation) Inc.) 51 Franklin Street) Fifth Floor) + * Boston) MA 02110-1301) USA. + * + */ + +What: /sys/bus/coresight/devices/mali-source-etm/enable_source +Description: + Attribute used to enable Coresight Source ETM. + +What: /sys/bus/coresight/devices/mali-source-etm/is_enabled +Description: + Attribute used to check if Coresight Source ITM is enabled. + +What: /sys/bus/coresight/devices/mali-source-etm/trcconfigr +Description: + Coresight Source ETM trace configuration to enable global + timestamping, and data value tracing. + +What: /sys/bus/coresight/devices/mali-source-etm/trctraceidr +Description: + Coresight Source ETM trace ID. + +What: /sys/bus/coresight/devices/mali-source-etm/trcvdarcctlr +Description: + Coresight Source ETM viewData include/exclude address + range comparators. + +What: /sys/bus/coresight/devices/mali-source-etm/trcviiectlr +Description: + Coresight Source ETM viewInst include and exclude control. + +What: /sys/bus/coresight/devices/mali-source-etm/trcstallctlr +Description: + Coresight Source ETM stall control register. + +What: /sys/bus/coresight/devices/mali-source-itm/enable_source +Description: + Attribute used to enable Coresight Source ITM. + +What: /sys/bus/coresight/devices/mali-source-itm/is_enabled +Description: + Attribute used to check if Coresight Source ITM is enabled. + +What: /sys/bus/coresight/devices/mali-source-itm/dwt_ctrl +Description: + Coresight Source DWT configuration: + [0] = 1, enable cycle counter + [4:1] = 4, set PC sample rate pf 256 cycles + [8:5] = 1, set initial post count value + [9] = 1, select position of post count tap on the cycle counter + [10:11] = 1, enable sync packets + [12] = 1, enable periodic PC sample packets + +What: /sys/bus/coresight/devices/mali-source-itm/itm_tcr +Description: + Coresight Source ITM configuration: + [0] = 1, Enable ITM + [1] = 1, Enable Time stamp generation + [2] = 1, Enable sync packet transmission + [3] = 1, Enable HW event forwarding + [11:10] = 1, Generate TS request approx every 128 cycles + [22:16] = 1, Trace bus ID + +What: /sys/bus/coresight/devices/mali-source-ela/enable_source +Description: + Attribute used to enable Coresight Source ELA. + +What: /sys/bus/coresight/devices/mali-source-ela/is_enabled +Description: + Attribute used to check if Coresight Source ELA is enabled. + +What: /sys/bus/coresight/devices/mali-source-ela/select +Description: + Coresight Source ELA select trace mode: + [0], NONE + [1], JCN + [2], CEU_EXEC + [3], CEU_CMDS + [4], MCU_AHBP + [5], HOST_AXI + [6], NR_TRACEMODE + + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/sigmask0 +Description: + Coresight Source ELA SIGMASK0 register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/sigmask4 +Description: + Coresight Source ELA SIGMASK4 register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/sigcomp0 +Description: + Coresight Source ELA SIGCOMP0 register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/sigcomp4 +Description: + Coresight Source ELA SIGCOMP4 register set/get. + Refer to specification for more details. diff --git a/Documentation/csf_sync_state_dump.txt b/Documentation/csf_sync_state_dump.txt new file mode 100644 index 000000000000..dc1e48774377 --- /dev/null +++ b/Documentation/csf_sync_state_dump.txt @@ -0,0 +1,111 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +DebugFS interface: +------------------ + +A new per-kbase-context debugfs file called csf_sync has been implemented +which captures the current KCPU & GPU queue state of the not-yet-completed +operations and displayed through the debugfs file. +This file is at: +======================================================= +/sys/kernel/debug/mali0/ctx/_/csf_sync +======================================================= + +Output Format: +---------------- + +The csf_sync file contains important data for the currently active queues. +This data is formatted into two segments, which are separated by a +pipe character: the common properties and the operation-specific properties. + +Common Properties: +------------------ + +* Queue type: GPU or KCPU. +* kbase context id and the queue id. +* If the queue type is a GPU queue then the group handle is also noted, +in the middle of the other two IDs. The slot value is also dumped. +* Execution status, which can either be 'P' for pending or 'S' for started. +* Command type is then output which indicates the type of dependency +(i.e. wait or signal). +* Object address which is a pointer to the sync object that the +command operates on. +* The live value, which is the value of the synchronization object +at the time of dumping. This could help to determine why wait +operations might be blocked. + +Operation-Specific Properties: +------------------------------ + +The operation-specific values for KCPU queue fence operations +are as follows: a unique timeline name, timeline context, and a fence +sequence number. The CQS WAIT and CQS SET are denoted in the sync dump +as their OPERATION counterparts, and therefore show the same operation +specific values; the argument value to wait on or set to, and operation type, +being (by definition) op:gt and op:set for CQS_WAIT and CQS_SET respectively. + +There are only two operation-specific values for operations in GPU queues +which are always shown; the argument value to wait on or set/add to, +and the operation type (set/add) or wait condition (e.g. LE, GT, GE). + +Examples +-------- +GPU Queue Example +------------------ + +The following output is of a GPU queue, from a process that has a KCTX ID of 52, +is in Queue Group (CSG) 0, and has Queue ID 0. It has started and is waiting on +the object at address 0x0000007f81ffc800. The live value is 0, +as is the arg value. However, the operation "op" is GT, indicating it's waiting +for the live value to surpass the arg value: + +====================================================================================================================================== +queue:GPU-52-0-0 exec:S cmd:SYNC_WAIT slot:4 obj:0x0000007f81ffc800 live_value:0x0000000000000000 | op:gt arg_value:0x0000000000000000 +====================================================================================================================================== + +The following is an example of GPU queue dump, where the SYNC SET operation +is blocked by the preceding SYNC WAIT operation. This shows two GPU queues, +with the same KCTX ID of 8, Queue Group (CSG) 0, and Queue ID 0. The SYNC WAIT +operation has started, while the SYNC SET is pending, blocked by the SYNC WAIT. +Both operations are on the same slot, 2 and have live value of 0. The SYNC WAIT +is waiting on the object at address 0x0000007f81ffc800, while the SYNC SET will +set the object at address 0x00000000a3bad4fb when it is unblocked. +The operation "op" is GT for the SYNC WAIT, indicating it's waiting for the +live value to surpass the arg value, while the operation and arg value for the +SYNC SET is "set" and "1" respectively: + +====================================================================================================================================== +queue:GPU-8-0-0 exec:S cmd:SYNC_WAIT slot:2 obj:0x0000007f81ffc800 live_value:0x0000000000000000 | op:gt arg_value:0x0000000000000000 +queue:GPU-8-0-0 exec:P cmd:SYNC_SET slot:2 obj:0x00000000a3bad4fb live_value:0x0000000000000000 | op:set arg_value:0x0000000000000001 +====================================================================================================================================== + +KCPU Queue Example +------------------ + +The following is an example of a KCPU queue, from a process that has +a KCTX ID of 0 and has Queue ID 1. It has started and is waiting on the +object at address 0x0000007fbf6f2ff8. The live value is currently 0 with +the "op" being GT indicating it is waiting on the live value to +surpass the arg value. + +=============================================================================================================================== +queue:KCPU-0-1 exec:S cmd:CQS_WAIT_OPERATION obj:0x0000007fbf6f2ff8 live_value:0x0000000000000000 | op:gt arg_value: 0x00000000 +=============================================================================================================================== diff --git a/Documentation/devicetree/bindings/arm/mali-bifrost.txt b/Documentation/devicetree/bindings/arm/mali-bifrost.txt index 2b3b1d028ccd..caf2de5e47be 100644 --- a/Documentation/devicetree/bindings/arm/mali-bifrost.txt +++ b/Documentation/devicetree/bindings/arm/mali-bifrost.txt @@ -235,6 +235,7 @@ gpu@0xfc010000 { ... pbha { int_id_override = <2 0x32>, <9 0x05>, <16 0x32>; + propagate_bits = <0x03>; }; ... }; diff --git a/Documentation/devicetree/bindings/arm/mali-coresight-source.txt b/Documentation/devicetree/bindings/arm/mali-coresight-source.txt new file mode 100644 index 000000000000..87a1ce3b3e85 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/mali-coresight-source.txt @@ -0,0 +1,160 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# +===================================== +ARM CoreSight Mali Source integration +===================================== + +See Documentation/trace/coresight/coresight.rst for detailed information +about Coresight. + +This documentation will cover Mali specific devicetree integration. + +References to Sink ports are given as examples. Access to Sink is specific +to an implementation and would require dedicated kernel modules. + +ARM Coresight Mali Source ITM +============================= + +Required properties +------------------- + +- compatible: Has to be "arm,coresight-mali-source-itm" +- gpu : phandle to a Mali GPU definition +- port: + - endpoint: + - remote-endpoint: phandle to a Coresight sink port + +Example +------- + +mali-source-itm { + compatible = "arm,coresight-mali-source-itm"; + gpu = <&gpu>; + port { + mali_source_itm_out_port0: endpoint { + remote-endpoint = <&mali_sink_in_port0>; + }; + }; +}; + +ARM Coresight Mali Source ETM +============================= + +Required properties +------------------- + +- compatible: Has to be "arm,coresight-mali-source-etm" +- gpu : phandle to a Mali GPU definition +- port: + - endpoint: + - remote-endpoint: phandle to a Coresight sink port + +Example +------- + +mali-source-etm { + compatible = "arm,coresight-mali-source-etm"; + gpu = <&gpu>; + port { + mali_source_etm_out_port0: endpoint { + remote-endpoint = <&mali_sink_in_port1>; + }; + }; +}; + +ARM Coresight Mali Source ELA +============================= + +Required properties +------------------- + +- compatible: Has to be "arm,coresight-mali-source-ela" +- gpu : phandle to a Mali GPU definition +- signal-groups: Signal groups indexed from 0 to 5. + Used to configure the signal channels. + - sgN: Types of signals attached to one channel. + It can be more than one type in the case of + JCN request/response. + + Types: + - "jcn-request": Can share the channel with "jcn-response" + - "jcn-response": Can share the channel with "jcn-request" + - "ceu-execution": Cannot share the channel with other types + - "ceu-commands": Cannot share the channel with other types + - "mcu-ahbp": Cannot share the channel with other types + - "host-axi": Cannot share the channel with other types + + + If the HW implementation shares a common channel + for JCN response and request (total of 4 channels), + Refer to: + - "Example: Shared JCN request/response channel" + Otherwise (total of 5 channels), refer to: + - "Example: Split JCN request/response channel" +- port: + - endpoint: + - remote-endpoint: phandle to a Coresight sink port + +Example: Split JCN request/response channel +-------------------------------------------- + +This examples applies to implementations with a total of 5 signal groups, +where JCN request and response are assigned to independent channels. + +mali-source-ela { + compatible = "arm,coresight-mali-source-ela"; + gpu = <&gpu>; + signal-groups { + sg0 = "jcn-request"; + sg1 = "jcn-response"; + sg2 = "ceu-execution"; + sg3 = "ceu-commands"; + sg4 = "mcu-ahbp"; + sg5 = "host-axi"; + }; + port { + mali_source_ela_out_port0: endpoint { + remote-endpoint = <&mali_sink_in_port2>; + }; + }; +}; + +Example: Shared JCN request/response channel +-------------------------------------------- + +This examples applies to implementations with a total of 4 signal groups, +where JCN request and response are assigned to the same channel. + +mali-source-ela { + compatible = "arm,coresight-mali-source-ela"; + gpu = <&gpu>; + signal-groups { + sg0 = "jcn-request", "jcn-response"; + sg1 = "ceu-execution"; + sg2 = "ceu-commands"; + sg3 = "mcu-ahbp"; + sg4 = "host-axi"; + }; + port { + mali_source_ela_out_port0: endpoint { + remote-endpoint = <&mali_sink_in_port1>; + }; + }; +}; diff --git a/Documentation/dma-buf-test-exporter.txt b/Documentation/dma-buf-test-exporter.txt index b01020c06751..70a92f7d3e28 100644 --- a/Documentation/dma-buf-test-exporter.txt +++ b/Documentation/dma-buf-test-exporter.txt @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2013, 2020-2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2013, 2020-2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -38,5 +38,5 @@ The buffers support all of the dma_buf API, including mmap. It supports being compiled as a module both in-tree and out-of-tree. -See include/linux/dma-buf-test-exporter.h for the ioctl interface. +See include/uapi/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.h for the ioctl interface. See Documentation/dma-buf-sharing.txt for details on dma_buf. diff --git a/drivers/base/arm/Makefile b/drivers/base/arm/Makefile index c1a61a1106d0..cc4bde71d3e6 100644 --- a/drivers/base/arm/Makefile +++ b/drivers/base/arm/Makefile @@ -90,6 +90,12 @@ EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \ KBUILD_CFLAGS += -Wall -Werror +ifeq ($(CONFIG_GCOV_KERNEL), y) + KBUILD_CFLAGS += $(call cc-option, -ftest-coverage) + KBUILD_CFLAGS += $(call cc-option, -fprofile-arcs) + EXTRA_CFLAGS += -DGCOV_PROFILE=1 +endif + # The following were added to align with W=1 in scripts/Makefile.extrawarn # from the Linux source tree (v5.18.14) KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter @@ -120,7 +126,8 @@ KBUILD_CFLAGS += -Wdisabled-optimization # global variables. KBUILD_CFLAGS += $(call cc-option, -Wlogical-op) KBUILD_CFLAGS += -Wmissing-field-initializers -KBUILD_CFLAGS += -Wtype-limits +# -Wtype-limits must be disabled due to build failures on kernel 5.x +KBUILD_CFLAGS += -Wno-type-limit KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized) KBUILD_CFLAGS += $(call cc-option, -Wunused-macros) diff --git a/drivers/base/arm/Mconfig b/drivers/base/arm/Mconfig deleted file mode 100644 index f7787f0ccd34..000000000000 --- a/drivers/base/arm/Mconfig +++ /dev/null @@ -1,64 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -# -# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU license. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# - -menuconfig MALI_BASE_MODULES - bool "Mali Base extra modules" - default y if BACKEND_KERNEL - help - Enable this option to build support for a Arm Mali base modules. - Those modules provide extra features or debug interfaces and, - are optional for the use of the Mali GPU modules. - -config DMA_SHARED_BUFFER_TEST_EXPORTER - bool "Build dma-buf framework test exporter module" - depends on MALI_BASE_MODULES - default y - help - This option will build the dma-buf framework test exporter module. - Usable to help test importers. - - Modules: - - dma-buf-test-exporter.ko - -config MALI_MEMORY_GROUP_MANAGER - bool "Build Mali Memory Group Manager module" - depends on MALI_BASE_MODULES - default y - help - This option will build the memory group manager module. - This is an example implementation for allocation and release of pages - for memory pools managed by Mali GPU device drivers. - - Modules: - - memory_group_manager.ko - -config MALI_PROTECTED_MEMORY_ALLOCATOR - bool "Build Mali Protected Memory Allocator module" - depends on MALI_BASE_MODULES && GPU_HAS_CSF - default y - help - This option will build the protected memory allocator module. - This is an example implementation for allocation and release of pages - of secure memory intended to be used by the firmware - of Mali GPU device drivers. - - Modules: - - protected_memory_allocator.ko - diff --git a/drivers/base/arm/memory_group_manager/memory_group_manager.c b/drivers/base/arm/memory_group_manager/memory_group_manager.c index 825893e3cf8e..2acb9faf12d0 100644 --- a/drivers/base/arm/memory_group_manager/memory_group_manager.c +++ b/drivers/base/arm/memory_group_manager/memory_group_manager.c @@ -228,8 +228,8 @@ static int mgm_initialize_debugfs(struct mgm_groups *mgm_data) #define ORDER_SMALL_PAGE 0 #define ORDER_LARGE_PAGE 9 -static void update_size(struct memory_group_manager_device *mgm_dev, int - group_id, int order, bool alloc) +static void update_size(struct memory_group_manager_device *mgm_dev, unsigned int group_id, + int order, bool alloc) { struct mgm_groups *data = mgm_dev->data; diff --git a/drivers/gpu/arm/bifrost/Kbuild b/drivers/gpu/arm/bifrost/Kbuild index 70f3997b2bd3..398e102a0af5 100644 --- a/drivers/gpu/arm/bifrost/Kbuild +++ b/drivers/gpu/arm/bifrost/Kbuild @@ -69,7 +69,7 @@ endif # # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= '"g15p0-01eac0"' +MALI_RELEASE_NAME ?= '"g17p0-01eac0"' # Set up defaults if not defined by build system ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y) MALI_UNIT_TEST = 1 diff --git a/drivers/gpu/arm/bifrost/Kconfig b/drivers/gpu/arm/bifrost/Kconfig index 1bfb59ca14e2..e530e8c85b17 100644 --- a/drivers/gpu/arm/bifrost/Kconfig +++ b/drivers/gpu/arm/bifrost/Kconfig @@ -41,9 +41,30 @@ config MALI_PLATFORM_NAME include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must exist. -config MALI_REAL_HW +choice + prompt "Mali HW backend" depends on MALI_BIFROST - def_bool !MALI_BIFROST_NO_MALI + default MALI_REAL_HW + +config MALI_REAL_HW + bool "Enable build of Mali kernel driver for real HW" + depends on MALI_BIFROST + help + This is the default HW backend. + +config MALI_BIFROST_NO_MALI + bool "Enable build of Mali kernel driver for No Mali" + depends on MALI_BIFROST && MALI_BIFROST_EXPERT + help + This can be used to test the driver in a simulated environment + whereby the hardware is not physically present. If the hardware is physically + present it will not be used. This can be used to test the majority of the + driver without needing actual hardware or for software benchmarking. + All calls to the simulated hardware will complete immediately as if the hardware + completed the task. + + +endchoice menu "Platform specific options" source "drivers/gpu/arm/bifrost/platform/Kconfig" @@ -91,6 +112,21 @@ config MALI_BIFROST_ENABLE_TRACE Enables tracing in kbase. Trace log available through the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled +config MALI_FW_CORE_DUMP + bool "Enable support for FW core dump" + depends on MALI_BIFROST && MALI_CSF_SUPPORT + default n + help + Adds ability to request firmware core dump through the "fw_core_dump" + debugfs file + + Example: + * To explicitly request core dump: + echo 1 > /sys/kernel/debug/mali0/fw_core_dump + * To output current core dump (after explicitly requesting a core dump, + or kernel driver reported an internal firmware error): + cat /sys/kernel/debug/mali0/fw_core_dump + config MALI_ARBITER_SUPPORT bool "Enable arbiter support for Mali" depends on MALI_BIFROST && !MALI_CSF_SUPPORT @@ -127,6 +163,11 @@ config MALI_DMA_BUF_LEGACY_COMPAT flushes in other drivers. This only has an effect for clients using UK 11.18 or older. For later UK versions it is not possible. +config MALI_CORESIGHT + depends on MALI_BIFROST && MALI_CSF_SUPPORT && !MALI_BIFROST_NO_MALI + bool "Enable Kbase CoreSight tracing support" + default n + menuconfig MALI_BIFROST_EXPERT depends on MALI_BIFROST bool "Enable Expert Settings" @@ -174,18 +215,6 @@ config MALI_CORESTACK comment "Platform options" depends on MALI_BIFROST && MALI_BIFROST_EXPERT -config MALI_BIFROST_NO_MALI - bool "Enable No Mali" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default n - help - This can be used to test the driver in a simulated environment - whereby the hardware is not physically present. If the hardware is physically - present it will not be used. This can be used to test the majority of the - driver without needing actual hardware or for software benchmarking. - All calls to the simulated hardware will complete immediately as if the hardware - completed the task. - config MALI_BIFROST_ERROR_INJECT bool "Enable No Mali error injection" depends on MALI_BIFROST && MALI_BIFROST_EXPERT && MALI_BIFROST_NO_MALI @@ -204,20 +233,6 @@ config MALI_GEM5_BUILD comment "Debug options" depends on MALI_BIFROST && MALI_BIFROST_EXPERT -config MALI_FW_CORE_DUMP - bool "Enable support for FW core dump" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT && MALI_CSF_SUPPORT - default n - help - Adds ability to request firmware core dump - - Example: - * To explicitly request core dump: - echo 1 >/sys/kernel/debug/mali0/fw_core_dump - * To output current core dump (after explicitly requesting a core dump, - or kernel driver reported an internal firmware error): - cat /sys/kernel/debug/mali0/fw_core_dump - config MALI_BIFROST_DEBUG bool "Enable debug build" depends on MALI_BIFROST && MALI_BIFROST_EXPERT diff --git a/drivers/gpu/arm/bifrost/Makefile b/drivers/gpu/arm/bifrost/Makefile index 3fb736d7950e..dfe96d8c37e7 100644 --- a/drivers/gpu/arm/bifrost/Makefile +++ b/drivers/gpu/arm/bifrost/Makefile @@ -39,17 +39,10 @@ ifeq ($(CONFIG_MALI_BIFROST),m) CONFIG_MALI_ARBITRATION ?= n CONFIG_MALI_PARTITION_MANAGER ?= n - ifeq ($(origin CONFIG_MALI_ABITER_MODULES), undefined) - CONFIG_MALI_ARBITER_MODULES := $(CONFIG_MALI_ARBITRATION) - endif - - ifeq ($(origin CONFIG_MALI_GPU_POWER_MODULES), undefined) - CONFIG_MALI_GPU_POWER_MODULES := $(CONFIG_MALI_ARBITRATION) - endif - ifneq ($(CONFIG_MALI_BIFROST_NO_MALI),y) # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI CONFIG_MALI_REAL_HW ?= y + CONFIG_MALI_CORESIGHT = n endif ifeq ($(CONFIG_MALI_BIFROST_DVFS),y) @@ -64,10 +57,11 @@ ifeq ($(CONFIG_MALI_BIFROST),m) CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n endif - ifeq ($(CONFIG_XEN),y) - ifneq ($(CONFIG_MALI_ARBITER_SUPPORT), n) - CONFIG_MALI_XEN ?= m - endif + ifeq ($(CONFIG_MALI_CSF_SUPPORT), y) + CONFIG_MALI_FW_CORE_DUMP ?= y + CONFIG_MALI_CORESIGHT ?= n + else + CONFIG_MALI_FW_CORE_DUMP ?= n endif # @@ -76,12 +70,14 @@ ifeq ($(CONFIG_MALI_BIFROST),m) ifeq ($(CONFIG_MALI_BIFROST_EXPERT), y) ifeq ($(CONFIG_MALI_BIFROST_NO_MALI), y) CONFIG_MALI_REAL_HW = n + else # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI=n CONFIG_MALI_REAL_HW = y CONFIG_MALI_BIFROST_ERROR_INJECT = n endif + ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y) # Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n @@ -143,12 +139,11 @@ ifeq ($(CONFIG_MALI_BIFROST),m) else # Prevent misuse when CONFIG_MALI_BIFROST=n CONFIG_MALI_ARBITRATION = n - CONFIG_MALI_ARBITER_MODULES = n - CONFIG_MALI_GPU_POWER_MODULES = n CONFIG_MALI_KUTF = n CONFIG_MALI_KUTF_IRQ_TEST = n CONFIG_MALI_KUTF_CLK_RATE_TRACE = n CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n + CONFIG_MALI_FW_CORE_DUMP = n endif # All Mali CONFIG should be listed here @@ -158,8 +153,6 @@ CONFIGS := \ CONFIG_MALI_BIFROST_GATOR_SUPPORT \ CONFIG_MALI_ARBITER_SUPPORT \ CONFIG_MALI_ARBITRATION \ - CONFIG_MALI_ARBITER_MODULES \ - CONFIG_MALI_GPU_POWER_MODULES \ CONFIG_MALI_PARTITION_MANAGER \ CONFIG_MALI_REAL_HW \ CONFIG_MALI_GEM5_BUILD \ @@ -189,10 +182,14 @@ CONFIGS := \ CONFIG_MALI_KUTF_IRQ_TEST \ CONFIG_MALI_KUTF_CLK_RATE_TRACE \ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \ - CONFIG_MALI_XEN + CONFIG_MALI_XEN \ + CONFIG_MALI_FW_CORE_DUMP \ + CONFIG_MALI_CORESIGHT -# +THIS_DIR := $(dir $(lastword $(MAKEFILE_LIST))) +-include $(THIS_DIR)/../arbitration/Makefile + # MAKE_ARGS to pass the custom CONFIGs on out-of-tree build # # Generate the list of CONFIGs and values. @@ -254,7 +251,8 @@ KBUILD_CFLAGS += -Wdisabled-optimization # global variables. KBUILD_CFLAGS += $(call cc-option, -Wlogical-op) KBUILD_CFLAGS += -Wmissing-field-initializers -KBUILD_CFLAGS += -Wtype-limits +# -Wtype-limits must be disabled due to build failures on kernel 5.x +KBUILD_CFLAGS += -Wno-type-limit KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized) KBUILD_CFLAGS += $(call cc-option, -Wunused-macros) @@ -263,6 +261,12 @@ KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2 # This warning is disabled to avoid build failures in some kernel versions KBUILD_CFLAGS += -Wno-ignored-qualifiers +ifeq ($(CONFIG_GCOV_KERNEL),y) + KBUILD_CFLAGS += $(call cc-option, -ftest-coverage) + KBUILD_CFLAGS += $(call cc-option, -fprofile-arcs) + EXTRA_CFLAGS += -DGCOV_PROFILE=1 +endif + all: $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules diff --git a/drivers/gpu/arm/bifrost/Mconfig b/drivers/gpu/arm/bifrost/Mconfig deleted file mode 100644 index f812bcad639c..000000000000 --- a/drivers/gpu/arm/bifrost/Mconfig +++ /dev/null @@ -1,326 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -# -# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU license. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# - -menuconfig MALI_BIFROST - bool "Mali Midgard series support" - default y - help - Enable this option to build support for a ARM Mali Midgard GPU. - - To compile this driver as a module, choose M here: - this will generate a single module, called mali_kbase. - -config MALI_PLATFORM_NAME - depends on MALI_BIFROST - string "Platform name" - default "hisilicon" if PLATFORM_HIKEY960 - default "hisilicon" if PLATFORM_HIKEY970 - default "devicetree" - help - Enter the name of the desired platform configuration directory to - include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must - exist. - - When PLATFORM_CUSTOM is set, this needs to be set manually to - pick up the desired platform files. - -config MALI_REAL_HW - bool - depends on MALI_BIFROST - default y - default n if NO_MALI - -config MALI_PLATFORM_DT_PIN_RST - bool "Enable Juno GPU Pin reset" - depends on MALI_BIFROST - default n - default y if BUSLOG - help - Enables support for GPUs pin reset on Juno platforms. - -config MALI_CSF_SUPPORT - bool "Enable Mali CSF based GPU support" - depends on MALI_BIFROST - default y if GPU_HAS_CSF - help - Enables support for CSF based GPUs. - -config MALI_BIFROST_DEVFREQ - bool "Enable devfreq support for Mali" - depends on MALI_BIFROST - default y - help - Support devfreq for Mali. - - Using the devfreq framework and, by default, the simple on-demand - governor, the frequency of Mali will be dynamically selected from the - available OPPs. - -config MALI_BIFROST_DVFS - bool "Enable legacy DVFS" - depends on MALI_BIFROST && !MALI_BIFROST_DEVFREQ - default n - help - Choose this option to enable legacy DVFS in the Mali Midgard DDK. - -config MALI_BIFROST_GATOR_SUPPORT - bool "Enable Streamline tracing support" - depends on MALI_BIFROST && !BACKEND_USER - default y - help - Enables kbase tracing used by the Arm Streamline Performance Analyzer. - The tracepoints are used to derive GPU activity charts in Streamline. - -config MALI_BIFROST_ENABLE_TRACE - bool "Enable kbase tracing" - depends on MALI_BIFROST - default y if MALI_BIFROST_DEBUG - default n - help - Enables tracing in kbase. Trace log available through - the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled - -config MALI_ARBITER_SUPPORT - bool "Enable arbiter support for Mali" - depends on MALI_BIFROST && !MALI_CSF_SUPPORT - default n - help - Enable support for the arbiter interface in the driver. - This allows an external arbiter to manage driver access - to GPU hardware in a virtualized environment - - If unsure, say N. - -config DMA_BUF_SYNC_IOCTL_SUPPORTED - bool "Enable Kernel DMA buffers support DMA_BUF_IOCTL_SYNC" - depends on MALI_BIFROST && BACKEND_KERNEL - default y - -config MALI_DMA_BUF_MAP_ON_DEMAND - bool "Enable map imported dma-bufs on demand" - depends on MALI_BIFROST - default n - default y if !DMA_BUF_SYNC_IOCTL_SUPPORTED - help - This option will cause kbase to set up the GPU mapping of imported - dma-buf when needed to run atoms. This is the legacy behavior. - - This is intended for testing and the option will get removed in the - future. - -config MALI_DMA_BUF_LEGACY_COMPAT - bool "Enable legacy compatibility cache flush on dma-buf map" - depends on MALI_BIFROST && !MALI_DMA_BUF_MAP_ON_DEMAND - default n - help - This option enables compatibility with legacy dma-buf mapping - behavior, then the dma-buf is mapped on import, by adding cache - maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping, - including a cache flush. - - This option might work-around issues related to missing cache - flushes in other drivers. This only has an effect for clients using - UK 11.18 or older. For later UK versions it is not possible. - -menuconfig MALI_BIFROST_EXPERT - depends on MALI_BIFROST - bool "Enable Expert Settings" - default y - help - Enabling this option and modifying the default settings may produce - a driver with performance or other limitations. - -config MALI_MEMORY_FULLY_BACKED - bool "Enable memory fully physically-backed" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default n - help - This option enables full physical backing of all virtual - memory allocations in the kernel. Notice that this build - option only affects allocations of grow-on-GPU-page-fault - memory. - -config MALI_CORESTACK - bool "Enable support of GPU core stack power control" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default n - help - Enabling this feature on supported GPUs will let the driver powering - on/off the GPU core stack independently without involving the Power - Domain Controller. This should only be enabled on platforms which - integration of the PDC to the Mali GPU is known to be problematic. - This feature is currently only supported on t-Six and t-HEx GPUs. - - If unsure, say N. - -config MALI_FW_CORE_DUMP - bool "Enable support for FW core dump" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT && MALI_CSF_SUPPORT - default n - help - Adds ability to request firmware core dump - - Example: - * To explicitly request core dump: - echo 1 >/sys/kernel/debug/mali0/fw_core_dump - * To output current core dump (after explicitly requesting a core dump, - or kernel driver reported an internal firmware error): - cat /sys/kernel/debug/mali0/fw_core_dump - -choice - prompt "Error injection level" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default MALI_ERROR_INJECT_NONE - help - Enables insertion of errors to test module failure and recovery mechanisms. - -config MALI_ERROR_INJECT_NONE - bool "disabled" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - help - Error injection is disabled. - -config MALI_ERROR_INJECT_TRACK_LIST - bool "error track list" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT && NO_MALI - help - Errors to inject are pre-configured by the user. - -config MALI_ERROR_INJECT_RANDOM - bool "random error injection" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT && NO_MALI - help - Injected errors are random, rather than user-driven. - -endchoice - -config MALI_ERROR_INJECT_ON - string - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default "0" if MALI_ERROR_INJECT_NONE - default "1" if MALI_ERROR_INJECT_TRACK_LIST - default "2" if MALI_ERROR_INJECT_RANDOM - -config MALI_BIFROST_ERROR_INJECT - bool - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default y if !MALI_ERROR_INJECT_NONE - -config MALI_GEM5_BUILD - bool "Enable build of Mali kernel driver for GEM5" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default n - help - This option is to do a Mali GEM5 build. - If unsure, say N. - -config MALI_BIFROST_DEBUG - bool "Enable debug build" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default y if DEBUG - default n - help - Select this option for increased checking and reporting of errors. - -config MALI_BIFROST_FENCE_DEBUG - bool "Enable debug sync fence usage" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default y if MALI_BIFROST_DEBUG - help - Select this option to enable additional checking and reporting on the - use of sync fences in the Mali driver. - - This will add a 3s timeout to all sync fence waits in the Mali - driver, so that when work for Mali has been waiting on a sync fence - for a long time a debug message will be printed, detailing what fence - is causing the block, and which dependent Mali atoms are blocked as a - result of this. - - The timeout can be changed at runtime through the js_soft_timeout - device attribute, where the timeout is specified in milliseconds. - -config MALI_BIFROST_SYSTEM_TRACE - bool "Enable system event tracing support" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default y if MALI_BIFROST_DEBUG - default n - help - Choose this option to enable system trace events for each - kbase event. This is typically used for debugging but has - minimal overhead when not in use. Enable only if you know what - you are doing. - -# Instrumentation options. - -# config MALI_PRFCNT_SET_PRIMARY exists in the Kernel Kconfig but is configured using CINSTR_PRIMARY_HWC in Mconfig. -# config MALI_BIFROST_PRFCNT_SET_SECONDARY exists in the Kernel Kconfig but is configured using CINSTR_SECONDARY_HWC in Mconfig. -# config MALI_PRFCNT_SET_TERTIARY exists in the Kernel Kconfig but is configured using CINSTR_TERTIARY_HWC in Mconfig. -# config MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS exists in the Kernel Kconfig but is configured using CINSTR_HWC_SET_SELECT_VIA_DEBUG_FS in Mconfig. - -config MALI_JOB_DUMP - bool "Enable system level support needed for job dumping" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default n - help - Choose this option to enable system level support needed for - job dumping. This is typically used for instrumentation but has - minimal overhead when not in use. Enable only if you know what - you are doing. - -config MALI_PWRSOFT_765 - bool "Enable workaround for PWRSOFT-765" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default n - help - PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged - in kernel v4.10, however if backported into the kernel then this - option must be manually selected. - - If using kernel >= v4.10 then say N, otherwise if devfreq cooling - changes have been backported say Y to avoid compilation errors. - - -config MALI_HW_ERRATA_1485982_NOT_AFFECTED - bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default n - default y if PLATFORM_JUNO - help - This option disables the default workaround for GPU2017-1336. The - workaround keeps the L2 cache powered up except for powerdown and reset. - - The workaround introduces a limitation that will prevent the running of - protected mode content on fully coherent platforms, as the switch to IO - coherency mode requires the L2 to be turned off. - -config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE - bool "Use alternative workaround for BASE_HW_ISSUE_GPU2017_1336" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT && !MALI_HW_ERRATA_1485982_NOT_AFFECTED - default n - help - This option uses an alternative workaround for GPU2017-1336. Lowering - the GPU clock to a, platform specific, known good frequeuncy before - powering down the L2 cache. The clock can be specified in the device - tree using the property, opp-mali-errata-1485982. Otherwise the - slowest clock will be selected. - - -source "kernel/drivers/gpu/arm/arbitration/Mconfig" -source "kernel/drivers/gpu/arm/midgard/tests/Mconfig" diff --git a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild index 7eec91ff6631..efebc8a544d1 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild +++ b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild @@ -47,8 +47,12 @@ endif bifrost_kbase-$(CONFIG_MALI_BIFROST_DEVFREQ) += \ backend/gpu/mali_kbase_devfreq.o -# Dummy model +ifneq ($(CONFIG_MALI_REAL_HW),y) + bifrost_kbase-y += backend/gpu/mali_kbase_model_linux.o +endif + +# NO_MALI Dummy model interface bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_dummy.o -bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_linux.o # HW error simulation bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_error_generator.o + diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c index 15999cbc9126..eb63b2c56c3d 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c @@ -25,12 +25,12 @@ #include -#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) +#if IS_ENABLED(CONFIG_MALI_REAL_HW) /* GPU IRQ Tags */ -#define JOB_IRQ_TAG 0 -#define MMU_IRQ_TAG 1 -#define GPU_IRQ_TAG 2 +#define JOB_IRQ_TAG 0 +#define MMU_IRQ_TAG 1 +#define GPU_IRQ_TAG 2 static void *kbase_tag(void *ptr, u32 tag) { @@ -500,4 +500,4 @@ void kbase_synchronize_irqs(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_synchronize_irqs); -#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ +#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c index 888aa59421a7..258dc6dac6c5 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -67,9 +67,8 @@ static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, kbase_js_runpool_inc_context_count(kbdev, kctx); } -bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) +bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_context *kctx, + unsigned int js) { int i; @@ -240,4 +239,3 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev, return true; } - diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c index e17014e45f6b..ab27e8bde40e 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c @@ -44,9 +44,8 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev); static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, const u64 affinity, const u64 limited_core_mask); -static u64 kbase_job_write_affinity(struct kbase_device *kbdev, - base_jd_core_req core_req, - int js, const u64 limited_core_mask) +static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req core_req, + unsigned int js, const u64 limited_core_mask) { u64 affinity; bool skip_affinity_check = false; @@ -191,7 +190,7 @@ static u64 select_job_chain(struct kbase_jd_atom *katom) return jc; } -int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js) +int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js) { struct kbase_context *kctx; u32 cfg; @@ -344,10 +343,8 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, * work out the best estimate (which might still result in an over-estimate to * the calculated time spent) */ -static void kbasep_job_slot_update_head_start_timestamp( - struct kbase_device *kbdev, - int js, - ktime_t end_timestamp) +static void kbasep_job_slot_update_head_start_timestamp(struct kbase_device *kbdev, unsigned int js, + ktime_t end_timestamp) { ktime_t timestamp_diff; struct kbase_jd_atom *katom; @@ -377,8 +374,7 @@ static void kbasep_job_slot_update_head_start_timestamp( * Make a tracepoint call to the instrumentation module informing that * softstop happened on given lpu (job slot). */ -static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, - int js) +static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, unsigned int js) { KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( kbdev, @@ -387,7 +383,6 @@ static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, void kbase_job_done(struct kbase_device *kbdev, u32 done) { - int i; u32 count = 0; ktime_t end_timestamp; @@ -398,6 +393,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) end_timestamp = ktime_get_raw(); while (done) { + unsigned int i; u32 failed = done >> 16; /* treat failed slots as finished slots */ @@ -407,8 +403,6 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) * numbered interrupts before the higher numbered ones. */ i = ffs(finished) - 1; - if (WARN(i < 0, "%s: called without receiving any interrupts\n", __func__)) - break; do { int nr_done; @@ -607,11 +601,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ_END, NULL, NULL, 0, count); } -void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, - int js, - u32 action, - base_jd_core_req core_reqs, - struct kbase_jd_atom *target_katom) +void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, unsigned int js, + u32 action, base_jd_core_req core_reqs, + struct kbase_jd_atom *target_katom) { #if KBASE_KTRACE_ENABLE u32 status_reg_before; @@ -669,6 +661,10 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, struct kbase_context *head_kctx; head = kbase_gpu_inspect(kbdev, js, 0); + if (unlikely(!head)) { + dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js); + return; + } head_kctx = head->kctx; if (status_reg_before == BASE_JD_EVENT_ACTIVE) @@ -737,7 +733,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx) { struct kbase_device *kbdev = kctx->kbdev; - int i; + unsigned int i; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -749,7 +745,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, struct kbase_jd_atom *target_katom) { struct kbase_device *kbdev; - int target_js = target_katom->slot_nr; + unsigned int target_js = target_katom->slot_nr; int i; bool stop_sent = false; @@ -927,8 +923,8 @@ KBASE_EXPORT_TEST_API(kbase_job_slot_term); * * Where possible any job in the next register is evicted before the soft-stop. */ -void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, - struct kbase_jd_atom *target_katom, u32 sw_flags) +void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js, + struct kbase_jd_atom *target_katom, u32 sw_flags) { dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n", target_katom, sw_flags, js); @@ -948,8 +944,8 @@ void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u); } -void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, - struct kbase_jd_atom *target_katom) +void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js, + struct kbase_jd_atom *target_katom) { struct kbase_device *kbdev = kctx->kbdev; bool stopped; @@ -1258,7 +1254,7 @@ static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) { - int i; + unsigned int i; int pending_jobs = 0; /* Count the number of jobs */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h index 1ebb8434046c..e4cff1f1e59c 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h @@ -33,21 +33,6 @@ #include #include -/** - * kbase_job_submit_nolock() - Submit a job to a certain job-slot - * @kbdev: Device pointer - * @katom: Atom to submit - * @js: Job slot to submit on - * - * The caller must check kbasep_jm_is_submit_slots_free() != false before - * calling this. - * - * The following locking conditions are made on the caller: - * - it must hold the hwaccess_lock - */ -void kbase_job_submit_nolock(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, int js); - /** * kbase_job_done_slot() - Complete the head job on a particular job-slot * @kbdev: Device pointer @@ -60,17 +45,16 @@ void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, u64 job_tail, ktime_t *end_timestamp); #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) -static inline char *kbasep_make_job_slot_string(int js, char *js_string, - size_t js_size) +static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string, size_t js_size) { - snprintf(js_string, js_size, "job_slot_%i", js); + snprintf(js_string, js_size, "job_slot_%u", js); return js_string; } #endif #if !MALI_USE_CSF -static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, - struct kbase_context *kctx) +static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, unsigned int js, + struct kbase_context *kctx) { return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT)); } @@ -90,7 +74,7 @@ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, * * Return: 0 if the job was successfully submitted to hardware, an error otherwise. */ -int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js); +int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js); #if !MALI_USE_CSF /** @@ -106,11 +90,9 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, * The following locking conditions are made on the caller: * - it must hold the hwaccess_lock */ -void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, - int js, - u32 action, - base_jd_core_req core_reqs, - struct kbase_jd_atom *target_katom); +void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, unsigned int js, + u32 action, base_jd_core_req core_reqs, + struct kbase_jd_atom *target_katom); #endif /* !MALI_USE_CSF */ /** @@ -134,11 +116,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, * * Return: true if an atom was stopped, false otherwise */ -bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js, - struct kbase_jd_atom *katom, - u32 action); +bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx, + unsigned int js, struct kbase_jd_atom *katom, u32 action); /** * kbase_job_slot_init - Initialise job slot framework diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c index e5af4ca8fc43..388b37f36a9d 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c @@ -93,9 +93,8 @@ static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, * * Return: Atom removed from ringbuffer */ -static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, - int js, - ktime_t *end_timestamp) +static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, unsigned int js, + ktime_t *end_timestamp) { struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; struct kbase_jd_atom *katom; @@ -118,8 +117,7 @@ static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, return katom; } -struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, - int idx) +struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx) { struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; @@ -131,8 +129,7 @@ struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom; } -struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, - int js) +struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, unsigned int js) { struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; @@ -144,12 +141,13 @@ struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) { - int js; - int i; + unsigned int js; lockdep_assert_held(&kbdev->hwaccess_lock); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + int i; + for (i = 0; i < SLOT_RB_SIZE; i++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); @@ -160,7 +158,7 @@ bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) return false; } -int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js) +int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js) { int nr = 0; int i; @@ -178,7 +176,7 @@ int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js) return nr; } -int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js) +int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js) { int nr = 0; int i; @@ -193,8 +191,8 @@ int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js) return nr; } -static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js, - enum kbase_atom_gpu_rb_state min_rb_state) +static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, unsigned int js, + enum kbase_atom_gpu_rb_state min_rb_state) { int nr = 0; int i; @@ -244,9 +242,11 @@ static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure) static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, bool secure) { - int js, i; + unsigned int js; for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + int i; + for (i = 0; i < SLOT_RB_SIZE; i++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); @@ -261,7 +261,7 @@ static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, return false; } -int kbase_backend_slot_free(struct kbase_device *kbdev, int js) +int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js) { lockdep_assert_held(&kbdev->hwaccess_lock); @@ -430,9 +430,9 @@ static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, * * Return: true if any slots other than @js are busy, false otherwise */ -static inline bool other_slots_busy(struct kbase_device *kbdev, int js) +static inline bool other_slots_busy(struct kbase_device *kbdev, unsigned int js) { - int slot; + unsigned int slot; for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) { if (slot == js) @@ -844,7 +844,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, void kbase_backend_slot_update(struct kbase_device *kbdev) { - int js; + unsigned int js; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -1013,25 +1013,25 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) kbase_pm_request_gpu_cycle_counter_l2_is_on( kbdev); - if (!kbase_job_hw_submit(kbdev, katom[idx], js)) + if (!kbase_job_hw_submit(kbdev, katom[idx], js)) { katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED; + + /* Inform power management at start/finish of + * atom so it can update its GPU utilisation + * metrics. + */ + kbase_pm_metrics_update(kbdev, + &katom[idx]->start_timestamp); + + /* Inform platform at start/finish of atom */ + kbasep_platform_event_atom_submit(katom[idx]); + } else break; /* ***TRANSITION TO HIGHER STATE*** */ fallthrough; case KBASE_ATOM_GPU_RB_SUBMITTED: - - /* Inform power management at start/finish of - * atom so it can update its GPU utilisation - * metrics. - */ - kbase_pm_metrics_update(kbdev, - &katom[idx]->start_timestamp); - - /* Inform platform at start/finish of atom */ - kbasep_platform_event_atom_submit(katom[idx]); - break; case KBASE_ATOM_GPU_RB_RETURN_TO_JS: @@ -1111,8 +1111,7 @@ kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a, * * Return: true if an atom was evicted, false otherwise. */ -bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, - u32 completion_code) +bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code) { struct kbase_jd_atom *katom; struct kbase_jd_atom *next_katom; @@ -1120,6 +1119,10 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, lockdep_assert_held(&kbdev->hwaccess_lock); katom = kbase_gpu_inspect(kbdev, js, 0); + if (!katom) { + dev_err(kbdev->dev, "Can't get a katom from js(%u)\n", js); + return false; + } next_katom = kbase_gpu_inspect(kbdev, js, 1); if (next_katom && @@ -1184,13 +1187,18 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, * on the HW and returned to the JS. */ -void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, - u32 completion_code, - u64 job_tail, - ktime_t *end_timestamp) +void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code, + u64 job_tail, ktime_t *end_timestamp) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); - struct kbase_context *kctx = katom->kctx; + struct kbase_context *kctx = NULL; + + if (unlikely(!katom)) { + dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js); + return; + } + + kctx = katom->kctx; dev_dbg(kbdev->dev, "Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", @@ -1243,7 +1251,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, } } else if (completion_code != BASE_JD_EVENT_DONE) { struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - int i; + unsigned int i; if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) { dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", @@ -1388,7 +1396,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) { - int js; + unsigned int js; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -1416,7 +1424,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) kbase_gpu_in_protected_mode(kbdev)); WARN(!(kbase_jd_katom_is_protected(katom) && js == 0) && kbase_jd_katom_is_protected(katom), - "Protected atom on JS%d not supported", js); + "Protected atom on JS%u not supported", js); } if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) && !kbase_ctx_flag(katom->kctx, KCTX_DYING)) @@ -1512,10 +1520,8 @@ static bool should_stop_next_atom(struct kbase_device *kbdev, return ret; } -static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, - int js, - struct kbase_jd_atom *katom, - u32 action) +static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, unsigned int js, + struct kbase_jd_atom *katom, u32 action) { struct kbase_context *kctx = katom->kctx; u32 hw_action = action & JS_COMMAND_MASK; @@ -1559,11 +1565,8 @@ static int should_stop_x_dep_slot(struct kbase_jd_atom *katom) return -1; } -bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js, - struct kbase_jd_atom *katom, - u32 action) +bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx, + unsigned int js, struct kbase_jd_atom *katom, u32 action) { struct kbase_jd_atom *katom_idx0; struct kbase_context *kctx_idx0 = NULL; @@ -1816,7 +1819,7 @@ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, void kbase_gpu_dump_slots(struct kbase_device *kbdev) { unsigned long flags; - int js; + unsigned int js; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -1831,12 +1834,10 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) idx); if (katom) - dev_info(kbdev->dev, - " js%d idx%d : katom=%pK gpu_rb_state=%d\n", - js, idx, katom, katom->gpu_rb_state); + dev_info(kbdev->dev, " js%u idx%d : katom=%pK gpu_rb_state=%d\n", + js, idx, katom, katom->gpu_rb_state); else - dev_info(kbdev->dev, " js%d idx%d : empty\n", - js, idx); + dev_info(kbdev->dev, " js%u idx%d : empty\n", js, idx); } } @@ -1845,7 +1846,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx) { - int js; + unsigned int js; bool tracked = false; lockdep_assert_held(&kbdev->hwaccess_lock); diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h index d3ff203762f9..32be0bf44655 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -40,8 +40,7 @@ * * Return: true if job evicted from NEXT registers, false otherwise */ -bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, - u32 completion_code); +bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code); /** * kbase_gpu_complete_hw - Complete an atom on job slot js @@ -53,10 +52,8 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, * completed * @end_timestamp: Time of completion */ -void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, - u32 completion_code, - u64 job_tail, - ktime_t *end_timestamp); +void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code, + u64 job_tail, ktime_t *end_timestamp); /** * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer @@ -68,8 +65,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, * Return: The atom at that position in the ringbuffer * or NULL if no atom present */ -struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, - int idx); +struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx); /** * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c index a2f824da5e04..cbc88f91a400 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -91,7 +91,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) struct kbase_device *kbdev; struct kbasep_js_device_data *js_devdata; struct kbase_backend_data *backend; - int s; + unsigned int s; bool reset_needed = false; KBASE_DEBUG_ASSERT(timer != NULL); @@ -375,4 +375,3 @@ void kbase_backend_timeouts_changed(struct kbase_device *kbdev) backend->timeouts_updated = true; } - diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c index 1a0209f702ac..19c345341ea9 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c @@ -62,8 +62,9 @@ * document */ #include +#include #include -#include +#include #include #if MALI_USE_CSF @@ -319,7 +320,7 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0, .gpu_features_hi = 0, - .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TBEX, .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { @@ -337,21 +338,6 @@ static const struct control_reg_values_t all_control_reg_values[] = { .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, - { - .name = "tDUx", - .gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0), - .as_present = 0xFF, - .thread_max_threads = 0x180, - .thread_max_workgroup_size = 0x180, - .thread_max_barrier_size = 0x180, - .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), - .tiler_features = 0x809, - .mmu_features = 0x2830, - .gpu_features_lo = 0, - .gpu_features_hi = 0, - .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, - .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, - }, { .name = "tODx", .gpu_id = GPU_ID2_MAKE(10, 8, 0, 2, 0, 0, 0), @@ -364,7 +350,7 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0, .gpu_features_hi = 0, - .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TODX, .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { @@ -412,7 +398,7 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0xf, .gpu_features_hi = 0, - .shader_present = 0xFF, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX, .stack_present = 0xF, }, { @@ -428,7 +414,7 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0xf, .gpu_features_hi = 0, - .shader_present = 0xFF, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX, .stack_present = 0xF, }, }; @@ -530,17 +516,18 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, (ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF; /* Currently only primary counter blocks are supported */ - if (WARN_ON(event_index >= 64)) + if (WARN_ON(event_index >= + (KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS + KBASE_DUMMY_MODEL_COUNTER_PER_CORE))) return 0; /* The actual events start index 4 onwards. Spec also says PRFCNT_EN, * TIMESTAMP_LO or TIMESTAMP_HI pseudo-counters do not make sense for * IPA counters. If selected, the value returned for them will be zero. */ - if (WARN_ON(event_index <= 3)) + if (WARN_ON(event_index < KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS)) return 0; - event_index -= 4; + event_index -= KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS; spin_lock_irqsave(&performance_counters.access_lock, flags); @@ -736,7 +723,7 @@ void gpu_model_glb_request_job_irq(void *model) spin_lock_irqsave(&hw_error_status.access_lock, flags); hw_error_status.job_irq_status |= JOB_IRQ_GLOBAL_IF; spin_unlock_irqrestore(&hw_error_status.access_lock, flags); - gpu_device_raise_irq(model, GPU_DUMMY_JOB_IRQ); + gpu_device_raise_irq(model, MODEL_LINUX_JOB_IRQ); } #endif /* !MALI_USE_CSF */ @@ -768,7 +755,7 @@ static void init_register_statuses(struct dummy_model_t *dummy) performance_counters.time = 0; } -static void update_register_statuses(struct dummy_model_t *dummy, int job_slot) +static void update_register_statuses(struct dummy_model_t *dummy, unsigned int job_slot) { lockdep_assert_held(&hw_error_status.access_lock); @@ -1101,7 +1088,7 @@ static const struct control_reg_values_t *find_control_reg_values(const char *gp return ret; } -void *midgard_model_create(const void *config) +void *midgard_model_create(struct kbase_device *kbdev) { struct dummy_model_t *dummy = NULL; @@ -1118,7 +1105,12 @@ void *midgard_model_create(const void *config) GPU_CONTROL_REG(L2_PRESENT_LO), dummy->control_reg_values); performance_counters.shader_present = get_implementation_register( GPU_CONTROL_REG(SHADER_PRESENT_LO), dummy->control_reg_values); + + gpu_device_set_data(dummy, kbdev); + + dev_info(kbdev->dev, "Using Dummy Model"); } + return dummy; } @@ -1134,7 +1126,7 @@ static void midgard_model_get_outputs(void *h) lockdep_assert_held(&hw_error_status.access_lock); if (hw_error_status.job_irq_status) - gpu_device_raise_irq(dummy, GPU_DUMMY_JOB_IRQ); + gpu_device_raise_irq(dummy, MODEL_LINUX_JOB_IRQ); if ((dummy->power_changed && dummy->power_changed_mask) || (dummy->reset_completed & dummy->reset_completed_mask) || @@ -1145,10 +1137,10 @@ static void midgard_model_get_outputs(void *h) (dummy->flush_pa_range_completed && dummy->flush_pa_range_completed_irq_enabled) || #endif (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled)) - gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ); + gpu_device_raise_irq(dummy, MODEL_LINUX_GPU_IRQ); if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask) - gpu_device_raise_irq(dummy, GPU_DUMMY_MMU_IRQ); + gpu_device_raise_irq(dummy, MODEL_LINUX_MMU_IRQ); } static void midgard_model_update(void *h) @@ -1215,7 +1207,7 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy) } } -u8 midgard_model_write_reg(void *h, u32 addr, u32 value) +void midgard_model_write_reg(void *h, u32 addr, u32 value) { unsigned long flags; struct dummy_model_t *dummy = (struct dummy_model_t *)h; @@ -1225,7 +1217,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) #if !MALI_USE_CSF if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { - int slot_idx = (addr >> 7) & 0xf; + unsigned int slot_idx = (addr >> 7) & 0xf; KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS); if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_LO)) { @@ -1607,11 +1599,9 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) midgard_model_update(dummy); midgard_model_get_outputs(dummy); spin_unlock_irqrestore(&hw_error_status.access_lock, flags); - - return 1; } -u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) +void midgard_model_read_reg(void *h, u32 addr, u32 *const value) { unsigned long flags; struct dummy_model_t *dummy = (struct dummy_model_t *)h; @@ -2051,8 +2041,6 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) spin_unlock_irqrestore(&hw_error_status.access_lock, flags); CSTD_UNUSED(dummy); - - return 1; } static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr_data_offset, @@ -2228,3 +2216,16 @@ int gpu_model_control(void *model, return 0; } + +/** + * kbase_is_gpu_removed - Has the GPU been removed. + * @kbdev: Kbase device pointer + * + * This function would return true if the GPU has been removed. + * It is stubbed here + * Return: Always false + */ +bool kbase_is_gpu_removed(struct kbase_device *kbdev) +{ + return false; +} diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h index 7d370de9f601..84842291c0f7 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h @@ -21,11 +21,24 @@ /* * Dummy Model interface + * + * Support for NO_MALI dummy Model interface. + * + * +-----------------------------------+ + * | Kbase read/write/IRQ | + * +-----------------------------------+ + * | Model Linux Framework | + * +-----------------------------------+ + * | Model Dummy interface definitions | + * +-----------------+-----------------+ + * | Fake R/W | Fake IRQ | + * +-----------------+-----------------+ */ #ifndef _KBASE_MODEL_DUMMY_H_ #define _KBASE_MODEL_DUMMY_H_ +#include #include #define model_error_log(module, ...) pr_err(__VA_ARGS__) @@ -154,11 +167,6 @@ struct gpu_model_prfcnt_en { u32 shader; }; -void *midgard_model_create(const void *config); -void midgard_model_destroy(void *h); -u8 midgard_model_write_reg(void *h, u32 addr, u32 value); -u8 midgard_model_read_reg(void *h, u32 addr, - u32 * const value); void midgard_set_error(int job_slot); int job_atom_inject_error(struct kbase_error_params *params); int gpu_model_control(void *h, @@ -211,17 +219,6 @@ void gpu_model_prfcnt_dump_request(uint32_t *sample_buf, struct gpu_model_prfcnt void gpu_model_glb_request_job_irq(void *model); #endif /* MALI_USE_CSF */ -enum gpu_dummy_irq { - GPU_DUMMY_JOB_IRQ, - GPU_DUMMY_GPU_IRQ, - GPU_DUMMY_MMU_IRQ -}; - -void gpu_device_raise_irq(void *model, - enum gpu_dummy_irq irq); -void gpu_device_set_data(void *model, void *data); -void *gpu_device_get_data(void *model); - extern struct error_status_t hw_error_status; #endif diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c index 972d1c87fb1a..75b1e7e656c0 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c @@ -21,7 +21,7 @@ #include #include -#include "backend/gpu/mali_kbase_model_dummy.h" +#include "backend/gpu/mali_kbase_model_linux.h" static struct kbase_error_atom *error_track_list; diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c index 7887cb240d43..b37680ddb29b 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c @@ -20,12 +20,12 @@ */ /* - * Model interface + * Model Linux Framework interfaces. */ #include #include -#include + #include "backend/gpu/mali_kbase_model_linux.h" #include "device/mali_kbase_device.h" #include "mali_kbase_irq_internal.h" @@ -105,8 +105,7 @@ static void serve_mmu_irq(struct work_struct *work) kmem_cache_free(kbdev->irq_slab, data); } -void gpu_device_raise_irq(void *model, - enum gpu_dummy_irq irq) +void gpu_device_raise_irq(void *model, enum model_linux_irqs irq) { struct model_irq_data *data; struct kbase_device *kbdev = gpu_device_get_data(model); @@ -120,15 +119,15 @@ void gpu_device_raise_irq(void *model, data->kbdev = kbdev; switch (irq) { - case GPU_DUMMY_JOB_IRQ: + case MODEL_LINUX_JOB_IRQ: INIT_WORK(&data->work, serve_job_irq); atomic_set(&kbdev->serving_job_irq, 1); break; - case GPU_DUMMY_GPU_IRQ: + case MODEL_LINUX_GPU_IRQ: INIT_WORK(&data->work, serve_gpu_irq); atomic_set(&kbdev->serving_gpu_irq, 1); break; - case GPU_DUMMY_MMU_IRQ: + case MODEL_LINUX_MMU_IRQ: INIT_WORK(&data->work, serve_mmu_irq); atomic_set(&kbdev->serving_mmu_irq, 1); break; @@ -165,22 +164,8 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) return val; } - KBASE_EXPORT_TEST_API(kbase_reg_read); -/** - * kbase_is_gpu_removed - Has the GPU been removed. - * @kbdev: Kbase device pointer - * - * This function would return true if the GPU has been removed. - * It is stubbed here - * Return: Always false - */ -bool kbase_is_gpu_removed(struct kbase_device *kbdev) -{ - return false; -} - int kbase_install_interrupts(struct kbase_device *kbdev) { KBASE_DEBUG_ASSERT(kbdev); @@ -239,16 +224,12 @@ KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler); int kbase_gpu_device_create(struct kbase_device *kbdev) { - kbdev->model = midgard_model_create(NULL); + kbdev->model = midgard_model_create(kbdev); if (kbdev->model == NULL) return -ENOMEM; - gpu_device_set_data(kbdev->model, kbdev); - spin_lock_init(&kbdev->reg_op_lock); - dev_warn(kbdev->dev, "Using Dummy Model"); - return 0; } diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h index dcb2e7cf7c70..a1c480eaf49d 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,13 +20,132 @@ */ /* - * Model interface + * Model Linux Framework interfaces. + * + * This framework is used to provide generic Kbase Models interfaces. + * Note: Backends cannot be used together; the selection is done at build time. + * + * - Without Model Linux Framework: + * +-----------------------------+ + * | Kbase read/write/IRQ | + * +-----------------------------+ + * | HW interface definitions | + * +-----------------------------+ + * + * - With Model Linux Framework: + * +-----------------------------+ + * | Kbase read/write/IRQ | + * +-----------------------------+ + * | Model Linux Framework | + * +-----------------------------+ + * | Model interface definitions | + * +-----------------------------+ */ #ifndef _KBASE_MODEL_LINUX_H_ #define _KBASE_MODEL_LINUX_H_ +/* + * Include Model definitions + */ + +#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) +#include +#endif /* IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ + +#if !IS_ENABLED(CONFIG_MALI_REAL_HW) +/** + * kbase_gpu_device_create() - Generic create function. + * + * @kbdev: Kbase device. + * + * Specific model hook is implemented by midgard_model_create() + * + * Return: 0 on success, error code otherwise. + */ int kbase_gpu_device_create(struct kbase_device *kbdev); + +/** + * kbase_gpu_device_destroy() - Generic create function. + * + * @kbdev: Kbase device. + * + * Specific model hook is implemented by midgard_model_destroy() + */ void kbase_gpu_device_destroy(struct kbase_device *kbdev); -#endif /* _KBASE_MODEL_LINUX_H_ */ +/** + * midgard_model_create() - Private create function. + * + * @kbdev: Kbase device. + * + * This hook is specific to the model built in Kbase. + * + * Return: Model handle. + */ +void *midgard_model_create(struct kbase_device *kbdev); + +/** + * midgard_model_destroy() - Private destroy function. + * + * @h: Model handle. + * + * This hook is specific to the model built in Kbase. + */ +void midgard_model_destroy(void *h); + +/** + * midgard_model_write_reg() - Private model write function. + * + * @h: Model handle. + * @addr: Address at which to write. + * @value: value to write. + * + * This hook is specific to the model built in Kbase. + */ +void midgard_model_write_reg(void *h, u32 addr, u32 value); + +/** + * midgard_model_read_reg() - Private model read function. + * + * @h: Model handle. + * @addr: Address from which to read. + * @value: Pointer where to store the read value. + * + * This hook is specific to the model built in Kbase. + */ +void midgard_model_read_reg(void *h, u32 addr, u32 *const value); + +/** + * gpu_device_raise_irq() - Private IRQ raise function. + * + * @model: Model handle. + * @irq: IRQ type to raise. + * + * This hook is global to the model Linux framework. + */ +void gpu_device_raise_irq(void *model, enum model_linux_irqs irq); + +/** + * gpu_device_set_data() - Private model set data function. + * + * @model: Model handle. + * @data: Data carried by model. + * + * This hook is global to the model Linux framework. + */ +void gpu_device_set_data(void *model, void *data); + +/** + * gpu_device_get_data() - Private model get data function. + * + * @model: Model handle. + * + * This hook is global to the model Linux framework. + * + * Return: Pointer to the data carried by model. + */ +void *gpu_device_get_data(void *model); +#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ + +#endif /* _KBASE_MODEL_LINUX_H_ */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c index 9e38b904b459..e2b0a919282e 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c @@ -26,9 +26,7 @@ #include #include #include -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -#include -#endif /* CONFIG_MALI_BIFROST_NO_MALI */ +#include #include int kbase_pm_ca_init(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c index 8173cf6ba7d7..d86a388c64fb 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c @@ -804,6 +804,17 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) KBASE_MCU_HCTL_SHADERS_PEND_ON; } else backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + if (kbase_debug_coresight_csf_state_check( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) { + kbase_debug_coresight_csf_state_request( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED); + backend->mcu_state = KBASE_MCU_CORESIGHT_ENABLE; + } else if (kbase_debug_coresight_csf_state_check( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) { + backend->mcu_state = KBASE_MCU_CORESIGHT_ENABLE; + } +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ } break; @@ -832,8 +843,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) unsigned long flags; kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbase_hwcnt_context_enable( - kbdev->hwcnt_gpu_ctx); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); kbase_csf_scheduler_spin_unlock(kbdev, flags); backend->hwcnt_disabled = false; } @@ -854,9 +864,19 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) backend->mcu_state = KBASE_MCU_HCTL_MCU_ON_RECHECK; } - } else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) { + } else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) backend->mcu_state = KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND; +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + else if (kbdev->csf.coresight.disable_on_pmode_enter) { + kbase_debug_coresight_csf_state_request( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED); + backend->mcu_state = KBASE_MCU_ON_PMODE_ENTER_CORESIGHT_DISABLE; + } else if (kbdev->csf.coresight.enable_on_pmode_exit) { + kbase_debug_coresight_csf_state_request( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED); + backend->mcu_state = KBASE_MCU_ON_PMODE_EXIT_CORESIGHT_ENABLE; } +#endif break; case KBASE_MCU_HCTL_MCU_ON_RECHECK: @@ -947,12 +967,46 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) #ifdef KBASE_PM_RUNTIME if (backend->gpu_sleep_mode_active) backend->mcu_state = KBASE_MCU_ON_SLEEP_INITIATE; - else + else { #endif backend->mcu_state = KBASE_MCU_ON_HALT; +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + kbase_debug_coresight_csf_state_request( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED); + backend->mcu_state = KBASE_MCU_CORESIGHT_DISABLE; +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + } } break; +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + case KBASE_MCU_ON_PMODE_ENTER_CORESIGHT_DISABLE: + if (kbase_debug_coresight_csf_state_check( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) { + backend->mcu_state = KBASE_MCU_ON; + kbdev->csf.coresight.disable_on_pmode_enter = false; + } + break; + case KBASE_MCU_ON_PMODE_EXIT_CORESIGHT_ENABLE: + if (kbase_debug_coresight_csf_state_check( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) { + backend->mcu_state = KBASE_MCU_ON; + kbdev->csf.coresight.enable_on_pmode_exit = false; + } + break; + case KBASE_MCU_CORESIGHT_DISABLE: + if (kbase_debug_coresight_csf_state_check( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) + backend->mcu_state = KBASE_MCU_ON_HALT; + break; + + case KBASE_MCU_CORESIGHT_ENABLE: + if (kbase_debug_coresight_csf_state_check( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) + backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; + break; +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + case KBASE_MCU_ON_HALT: if (!kbase_pm_is_mcu_desired(kbdev)) { kbase_csf_firmware_trigger_mcu_halt(kbdev); @@ -1045,6 +1099,11 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) /* Reset complete */ if (!backend->in_reset) backend->mcu_state = KBASE_MCU_OFF; + +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + kbdev->csf.coresight.disable_on_pmode_enter = false; + kbdev->csf.coresight.enable_on_pmode_exit = false; +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ break; default: @@ -1142,13 +1201,22 @@ static bool can_power_down_l2(struct kbase_device *kbdev) #if MALI_USE_CSF /* Due to the HW issue GPU2019-3878, need to prevent L2 power off * whilst MMU command is in progress. + * Also defer the power-down if MMU is in process of page migration. */ - return !kbdev->mmu_hw_operation_in_progress; + return !kbdev->mmu_hw_operation_in_progress && !kbdev->mmu_page_migrate_in_progress; #else - return true; + return !kbdev->mmu_page_migrate_in_progress; #endif } +static bool can_power_up_l2(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Avoiding l2 transition if MMU is undergoing page migration */ + return !kbdev->mmu_page_migrate_in_progress; +} + static bool need_tiler_control(struct kbase_device *kbdev) { #if MALI_USE_CSF @@ -1220,7 +1288,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) switch (backend->l2_state) { case KBASE_L2_OFF: - if (kbase_pm_is_l2_desired(kbdev)) { + if (kbase_pm_is_l2_desired(kbdev) && can_power_up_l2(kbdev)) { #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) /* Enable HW timer of IPA control before * L2 cache is powered-up. diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h index 115cd3c34d90..e66ce57d3120 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h @@ -995,4 +995,27 @@ static inline void kbase_pm_disable_db_mirror_interrupt(struct kbase_device *kbd } #endif +/** + * kbase_pm_l2_allow_mmu_page_migration - L2 state allows MMU page migration or not + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Check whether the L2 state is in power transition phase or not. If it is, the MMU + * page migration should be deferred. The caller must hold hwaccess_lock, and, if MMU + * page migration is intended, immediately start the MMU migration action without + * dropping the lock. When page migration begins, a flag is set in kbdev that would + * prevent the L2 state machine traversing into power transition phases, until + * the MMU migration action ends. + * + * Return: true if MMU page migration is allowed + */ +static inline bool kbase_pm_l2_allow_mmu_page_migration(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + return (backend->l2_state != KBASE_L2_PEND_ON && backend->l2_state != KBASE_L2_PEND_OFF); +} + #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h index 5e57c9d0c0e1..3b448e397e72 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -66,6 +66,13 @@ * is being put to sleep. * @ON_PEND_SLEEP: MCU sleep is in progress. * @IN_SLEEP: Sleep request is completed and MCU has halted. + * @ON_PMODE_ENTER_CORESIGHT_DISABLE: The MCU is on, protected mode enter is about to + * be requested, Coresight is being disabled. + * @ON_PMODE_EXIT_CORESIGHT_ENABLE : The MCU is on, protected mode exit has happened + * Coresight is being enabled. + * @CORESIGHT_DISABLE: The MCU is on and Coresight is being disabled. + * @CORESIGHT_ENABLE: The MCU is on, host does not have control and + * Coresight is being enabled. */ KBASEP_MCU_STATE(OFF) KBASEP_MCU_STATE(PEND_ON_RELOAD) @@ -92,3 +99,10 @@ KBASEP_MCU_STATE(HCTL_SHADERS_CORE_OFF_PEND) KBASEP_MCU_STATE(ON_SLEEP_INITIATE) KBASEP_MCU_STATE(ON_PEND_SLEEP) KBASEP_MCU_STATE(IN_SLEEP) +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) +/* Additional MCU states for Coresight */ +KBASEP_MCU_STATE(ON_PMODE_ENTER_CORESIGHT_DISABLE) +KBASEP_MCU_STATE(ON_PMODE_EXIT_CORESIGHT_ENABLE) +KBASEP_MCU_STATE(CORESIGHT_DISABLE) +KBASEP_MCU_STATE(CORESIGHT_ENABLE) +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c index 29e945d00fbe..865f526f61f2 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c @@ -464,7 +464,7 @@ void kbase_pm_metrics_stop(struct kbase_device *kbdev) */ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) { - int js; + unsigned int js; lockdep_assert_held(&kbdev->pm.backend.metrics.lock); diff --git a/drivers/gpu/arm/bifrost/build.bp b/drivers/gpu/arm/bifrost/build.bp index a17ff432398c..48c1fb44f494 100644 --- a/drivers/gpu/arm/bifrost/build.bp +++ b/drivers/gpu/arm/bifrost/build.bp @@ -28,7 +28,7 @@ bob_defaults { defaults: [ "kernel_defaults", ], - no_mali: { + mali_no_mali: { kbuild_options: [ "CONFIG_MALI_BIFROST_NO_MALI=y", "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}", @@ -140,6 +140,9 @@ bob_defaults { mali_fw_core_dump: { kbuild_options: ["CONFIG_MALI_FW_CORE_DUMP=y"], }, + mali_coresight: { + kbuild_options: ["CONFIG_MALI_CORESIGHT=y"], + }, kbuild_options: [ "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}", "MALI_CUSTOMER_RELEASE={{.release}}", diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c index 3abc7a2a66f4..07d277b947d2 100644 --- a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c +++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c @@ -34,6 +34,7 @@ #if IS_ENABLED(CONFIG_DEBUG_FS) #include #include +#include #include #include #include @@ -50,6 +51,7 @@ void kbase_context_debugfs_init(struct kbase_context *const kctx) kbase_jit_debugfs_init(kctx); kbase_csf_queue_group_debugfs_init(kctx); kbase_csf_kcpu_debugfs_init(kctx); + kbase_csf_sync_debugfs_init(kctx); kbase_csf_tiler_heap_debugfs_init(kctx); kbase_csf_tiler_heap_total_debugfs_init(kctx); kbase_csf_cpu_queue_debugfs_init(kctx); diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c index b6abfc44d212..792f724f16e4 100644 --- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c +++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c @@ -129,10 +129,6 @@ int kbase_context_common_init(struct kbase_context *kctx) /* creating a context is considered a disjoint event */ kbase_disjoint_event(kctx->kbdev); - kctx->as_nr = KBASEP_AS_NR_INVALID; - - atomic_set(&kctx->refcount, 0); - spin_lock_init(&kctx->mm_update_lock); kctx->process_mm = NULL; atomic_set(&kctx->nonmapped_pages, 0); @@ -251,15 +247,8 @@ static void kbase_remove_kctx_from_process(struct kbase_context *kctx) void kbase_context_common_term(struct kbase_context *kctx) { - unsigned long flags; int pages; - mutex_lock(&kctx->kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); - kbase_ctx_sched_remove_ctx(kctx); - spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); - mutex_unlock(&kctx->kbdev->mmu_hw_mutex); - pages = atomic_read(&kctx->used_pages); if (pages != 0) dev_warn(kctx->kbdev->dev, diff --git a/drivers/gpu/arm/bifrost/csf/Kbuild b/drivers/gpu/arm/bifrost/csf/Kbuild index 1474bdaacb0d..44217dba10c0 100644 --- a/drivers/gpu/arm/bifrost/csf/Kbuild +++ b/drivers/gpu/arm/bifrost/csf/Kbuild @@ -31,20 +31,24 @@ bifrost_kbase-y += \ csf/mali_kbase_csf_reset_gpu.o \ csf/mali_kbase_csf_csg_debugfs.o \ csf/mali_kbase_csf_kcpu_debugfs.o \ + csf/mali_kbase_csf_sync_debugfs.o \ csf/mali_kbase_csf_protected_memory.o \ csf/mali_kbase_csf_tiler_heap_debugfs.o \ csf/mali_kbase_csf_cpu_queue_debugfs.o \ csf/mali_kbase_csf_event.o \ csf/mali_kbase_csf_firmware_log.o \ - csf/mali_kbase_csf_tiler_heap_reclaim.o + csf/mali_kbase_csf_firmware_core_dump.o \ + csf/mali_kbase_csf_tiler_heap_reclaim.o \ + csf/mali_kbase_csf_mcu_shared_reg.o -bifrost_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o - -bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o +ifeq ($(CONFIG_MALI_BIFROST_NO_MALI),y) +bifrost_kbase-y += csf/mali_kbase_csf_firmware_no_mali.o +else +bifrost_kbase-y += csf/mali_kbase_csf_firmware.o +endif bifrost_kbase-$(CONFIG_DEBUG_FS) += csf/mali_kbase_debug_csf_fault.o - ifeq ($(KBUILD_EXTMOD),) # in-tree -include $(src)/csf/ipa_control/Kbuild diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c index b77007300c5c..5f4061b2ab62 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,12 +34,15 @@ #include #include #include "mali_kbase_csf_event.h" -#include +#include +#include "mali_kbase_csf_mcu_shared_reg.h" #define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK) #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK) #define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1) +#define PROTM_ALLOC_MAX_RETRIES ((u8)5) + const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = { KBASE_QUEUE_GROUP_PRIORITY_HIGH, KBASE_QUEUE_GROUP_PRIORITY_MEDIUM, @@ -130,21 +133,6 @@ static int get_user_pages_mmap_handle(struct kbase_context *kctx, return 0; } -static void gpu_munmap_user_io_pages(struct kbase_context *kctx, struct kbase_va_region *reg, - struct tagged_addr *phys) -{ - size_t num_pages = 2; - - kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, reg->start_pfn, phys, - num_pages, MCU_AS_NR); - - WARN_ON(reg->flags & KBASE_REG_FREE); - - mutex_lock(&kctx->kbdev->csf.reg_lock); - kbase_remove_va_region(kctx->kbdev, reg); - mutex_unlock(&kctx->kbdev->csf.reg_lock); -} - static void init_user_io_pages(struct kbase_queue *queue) { u32 *input_addr = (u32 *)(queue->user_io_addr); @@ -162,76 +150,15 @@ static void init_user_io_pages(struct kbase_queue *queue) output_addr[CS_ACTIVE/4] = 0; } -/* Map the input/output pages in the shared interface segment of MCU firmware - * address space. - */ -static int gpu_mmap_user_io_pages(struct kbase_device *kbdev, - struct tagged_addr *phys, struct kbase_va_region *reg) -{ - unsigned long mem_flags = KBASE_REG_GPU_RD; - const size_t num_pages = 2; - int ret; - - /* Calls to this function are inherently asynchronous, with respect to - * MMU operations. - */ - const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; - - if (kbdev->system_coherency == COHERENCY_NONE) { - mem_flags |= - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); - } else { - mem_flags |= KBASE_REG_SHARE_BOTH | - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); - } - - mutex_lock(&kbdev->csf.reg_lock); - ret = kbase_add_va_region_rbtree(kbdev, reg, 0, num_pages, 1); - reg->flags &= ~KBASE_REG_FREE; - mutex_unlock(&kbdev->csf.reg_lock); - - if (ret) - return ret; - - /* Map input page */ - ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, - &phys[0], 1, mem_flags, MCU_AS_NR, - KBASE_MEM_GROUP_CSF_IO, mmu_sync_info); - if (ret) - goto bad_insert; - - /* Map output page, it needs rw access */ - mem_flags |= KBASE_REG_GPU_WR; - ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, - reg->start_pfn + 1, &phys[1], 1, mem_flags, - MCU_AS_NR, KBASE_MEM_GROUP_CSF_IO, - mmu_sync_info); - if (ret) - goto bad_insert_output_page; - - return 0; - -bad_insert_output_page: - kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, phys, 1, MCU_AS_NR); -bad_insert: - mutex_lock(&kbdev->csf.reg_lock); - kbase_remove_va_region(kbdev, reg); - mutex_unlock(&kbdev->csf.reg_lock); - - return ret; -} - static void kernel_unmap_user_io_pages(struct kbase_context *kctx, struct kbase_queue *queue) { - const size_t num_pages = 2; - kbase_gpu_vm_lock(kctx); vunmap(queue->user_io_addr); - WARN_ON(num_pages > atomic_read(&kctx->permanent_mapped_pages)); - atomic_sub(num_pages, &kctx->permanent_mapped_pages); + WARN_ON(atomic_read(&kctx->permanent_mapped_pages) < KBASEP_NUM_CS_USER_IO_PAGES); + atomic_sub(KBASEP_NUM_CS_USER_IO_PAGES, &kctx->permanent_mapped_pages); kbase_gpu_vm_unlock(kctx); } @@ -312,63 +239,56 @@ static void release_queue(struct kbase_queue *queue); * If an explicit or implicit unbind was missed by the userspace then the * mapping will persist. On process exit kernel itself will remove the mapping. */ -static void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, - struct kbase_queue *queue) +void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue) { - const size_t num_pages = 2; - - gpu_munmap_user_io_pages(kctx, queue->reg, &queue->phys[0]); kernel_unmap_user_io_pages(kctx, queue); kbase_mem_pool_free_pages( &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], - num_pages, queue->phys, true, false); + KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, true, false); + kbase_process_page_usage_dec(kctx, KBASEP_NUM_CS_USER_IO_PAGES); - kfree(queue->reg); - queue->reg = NULL; + /* The user_io_gpu_va should have been unmapped inside the scheduler */ + WARN_ONCE(queue->user_io_gpu_va, "Userio pages appears still have mapping"); /* If the queue has already been terminated by userspace * then the ref count for queue object will drop to 0 here. */ release_queue(queue); } +KBASE_EXPORT_TEST_API(kbase_csf_free_command_stream_user_pages); -int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, - struct kbase_queue *queue) +int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue) { struct kbase_device *kbdev = kctx->kbdev; - struct kbase_va_region *reg; - const size_t num_pages = 2; int ret; lockdep_assert_held(&kctx->csf.lock); - reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0, - num_pages, KBASE_REG_ZONE_MCU_SHARED); - if (!reg) - return -ENOMEM; - - ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], num_pages, + ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], + KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, false); - - if (ret != num_pages) - goto phys_alloc_failed; + if (ret != KBASEP_NUM_CS_USER_IO_PAGES) { + /* Marking both the phys to zero for indicating there is no phys allocated */ + queue->phys[0].tagged_addr = 0; + queue->phys[1].tagged_addr = 0; + return -ENOMEM; + } ret = kernel_map_user_io_pages(kctx, queue); if (ret) goto kernel_map_failed; + kbase_process_page_usage_inc(kctx, KBASEP_NUM_CS_USER_IO_PAGES); init_user_io_pages(queue); - ret = gpu_mmap_user_io_pages(kctx->kbdev, queue->phys, reg); - if (ret) - goto gpu_mmap_failed; - - queue->reg = reg; + /* user_io_gpu_va is only mapped when scheduler decides to put the queue + * on slot at runtime. Initialize it to 0, signalling no mapping. + */ + queue->user_io_gpu_va = 0; mutex_lock(&kbdev->csf.reg_lock); - if (kbdev->csf.db_file_offsets > - (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1)) + if (kbdev->csf.db_file_offsets > (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1)) kbdev->csf.db_file_offsets = 0; queue->db_file_offset = kbdev->csf.db_file_offsets; @@ -388,19 +308,16 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, return 0; -gpu_mmap_failed: - kernel_unmap_user_io_pages(kctx, queue); - kernel_map_failed: - kbase_mem_pool_free_pages( - &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], - num_pages, queue->phys, false, false); + kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], + KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, false, false); + /* Marking both the phys to zero for indicating there is no phys allocated */ + queue->phys[0].tagged_addr = 0; + queue->phys[1].tagged_addr = 0; -phys_alloc_failed: - kfree(reg); - - return -ENOMEM; + return ret; } +KBASE_EXPORT_TEST_API(kbase_csf_alloc_command_stream_user_pages); static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx, u8 group_handle) @@ -418,6 +335,12 @@ static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx, return NULL; } +struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle) +{ + return find_queue_group(kctx, group_handle); +} +KBASE_EXPORT_TEST_API(kbase_csf_find_queue_group); + int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, u8 group_handle) { @@ -468,6 +391,17 @@ static void release_queue(struct kbase_queue *queue) "Remove any pending command queue fatal from ctx %d_%d", queue->kctx->tgid, queue->kctx->id); kbase_csf_event_remove_error(queue->kctx, &queue->error); + + /* After this the Userspace would be able to free the + * memory for GPU queue. In case the Userspace missed + * terminating the queue, the cleanup will happen on + * context termination where tear down of region tracker + * would free up the GPU queue memory. + */ + kbase_gpu_vm_lock(queue->kctx); + kbase_va_region_no_user_free_put(queue->kctx, queue->queue_reg); + kbase_gpu_vm_unlock(queue->kctx); + kfree(queue); } } @@ -521,7 +455,8 @@ static int csf_queue_register_internal(struct kbase_context *kctx, region = kbase_region_tracker_find_region_enclosing_address(kctx, queue_addr); - if (kbase_is_region_invalid_or_free(region)) { + if (kbase_is_region_invalid_or_free(region) || kbase_is_region_shrinkable(region) || + region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { ret = -ENOENT; goto out_unlock_vm; } @@ -570,7 +505,7 @@ static int csf_queue_register_internal(struct kbase_context *kctx, queue->kctx = kctx; queue->base_addr = queue_addr; - queue->queue_reg = region; + queue->queue_reg = kbase_va_region_no_user_free_get(kctx, region); queue->size = (queue_size << PAGE_SHIFT); queue->csi_index = KBASEP_IF_NR_INVALID; queue->enabled = false; @@ -608,7 +543,6 @@ static int csf_queue_register_internal(struct kbase_context *kctx, queue->extract_ofs = 0; - region->flags |= KBASE_REG_NO_USER_FREE; region->user_data = queue; /* Initialize the cs_trace configuration parameters, When buffer_size @@ -702,16 +636,8 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx, unbind_queue(kctx, queue); kbase_gpu_vm_lock(kctx); - if (!WARN_ON(!queue->queue_reg)) { - /* After this the Userspace would be able to free the - * memory for GPU queue. In case the Userspace missed - * terminating the queue, the cleanup will happen on - * context termination where tear down of region tracker - * would free up the GPU queue memory. - */ - queue->queue_reg->flags &= ~KBASE_REG_NO_USER_FREE; + if (!WARN_ON(!queue->queue_reg)) queue->queue_reg->user_data = NULL; - } kbase_gpu_vm_unlock(kctx); release_queue(queue); @@ -875,6 +801,15 @@ void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, if (WARN_ON(slot_bitmap > allowed_bitmap)) return; + /* The access to GLB_DB_REQ/ACK needs to be ordered with respect to CSG_REQ/ACK and + * CSG_DB_REQ/ACK to avoid a scenario where a CSI request overlaps with a CSG request + * or 2 CSI requests overlap and FW ends up missing the 2nd request. + * Memory barrier is required, both on Host and FW side, to guarantee the ordering. + * + * 'osh' is used as CPU and GPU would be in the same Outer shareable domain. + */ + dmb(osh); + value = kbase_csf_firmware_global_output(global_iface, GLB_DB_ACK); value ^= slot_bitmap; kbase_csf_firmware_global_input_mask(global_iface, GLB_DB_REQ, value, @@ -913,6 +848,14 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, WARN_ON(csi_index >= ginfo->stream_num)) return; + /* The access to CSG_DB_REQ/ACK needs to be ordered with respect to + * CS_REQ/ACK to avoid a scenario where CSG_DB_REQ/ACK becomes visibile to + * FW before CS_REQ/ACK is set. + * + * 'osh' is used as CPU and GPU would be in the same outer shareable domain. + */ + dmb(osh); + value = kbase_csf_firmware_csg_output(ginfo, CSG_DB_ACK); value ^= (1 << csi_index); kbase_csf_firmware_csg_input_mask(ginfo, CSG_DB_REQ, value, @@ -930,6 +873,8 @@ int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_va_region *region; int err = 0; + KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK(kbdev, kctx->id, kick->buffer_gpu_addr); + /* GPU work submission happening asynchronously to prevent the contention with * scheduler lock and as the result blocking application thread. For this reason, * the vm_lock is used here to get the reference to the queue based on its buffer_gpu_addr @@ -1018,6 +963,15 @@ static void unbind_queue(struct kbase_context *kctx, struct kbase_queue *queue) } } +static bool kbase_csf_queue_phys_allocated(struct kbase_queue *queue) +{ + /* The queue's phys are zeroed when allocation fails. Both of them being + * zero is an impossible condition for a successful allocated set of phy pages. + */ + + return (queue->phys[0].tagged_addr | queue->phys[1].tagged_addr); +} + void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit) { struct kbase_context *kctx = queue->kctx; @@ -1043,8 +997,8 @@ void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit) unbind_queue(kctx, queue); } - /* Free the resources, if allocated for this queue. */ - if (queue->reg) + /* Free the resources, if allocated phys for this queue */ + if (kbase_csf_queue_phys_allocated(queue)) kbase_csf_free_command_stream_user_pages(kctx, queue); } @@ -1057,8 +1011,8 @@ void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue) WARN_ON(queue->bind_state == KBASE_CSF_QUEUE_BOUND); unbind_stopped_queue(kctx, queue); - /* Free the resources, if allocated for this queue. */ - if (queue->reg) + /* Free the resources, if allocated phys for this queue */ + if (kbase_csf_queue_phys_allocated(queue)) kbase_csf_free_command_stream_user_pages(kctx, queue); } @@ -1121,167 +1075,39 @@ static bool iface_has_enough_streams(struct kbase_device *const kbdev, * @kctx: Pointer to kbase context where the queue group is created at * @s_buf: Pointer to suspend buffer that is attached to queue group * - * Return: 0 if suspend buffer is successfully allocated and reflected to GPU - * MMU page table. Otherwise -ENOMEM. + * Return: 0 if phy-pages for the suspend buffer is successfully allocated. + * Otherwise -ENOMEM or error code. */ static int create_normal_suspend_buffer(struct kbase_context *const kctx, struct kbase_normal_suspend_buffer *s_buf) { - struct kbase_va_region *reg = NULL; - const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR; const size_t nr_pages = PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size); - int err = 0; - - /* Calls to this function are inherently asynchronous, with respect to - * MMU operations. - */ - const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + int err; lockdep_assert_held(&kctx->csf.lock); - /* Allocate and initialize Region Object */ - reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0, - nr_pages, KBASE_REG_ZONE_MCU_SHARED); - - if (!reg) - return -ENOMEM; + /* The suspend buffer's mapping address is valid only when the CSG is to + * run on slot, initializing it 0, signalling the buffer is not mapped. + */ + s_buf->gpu_va = 0; s_buf->phy = kcalloc(nr_pages, sizeof(*s_buf->phy), GFP_KERNEL); - if (!s_buf->phy) { - err = -ENOMEM; - goto phy_alloc_failed; - } + if (!s_buf->phy) + return -ENOMEM; /* Get physical page for a normal suspend buffer */ err = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages, &s_buf->phy[0], false); - if (err < 0) - goto phy_pages_alloc_failed; - - /* Insert Region Object into rbtree and make virtual address available - * to map it to physical page - */ - mutex_lock(&kctx->kbdev->csf.reg_lock); - err = kbase_add_va_region_rbtree(kctx->kbdev, reg, 0, nr_pages, 1); - reg->flags &= ~KBASE_REG_FREE; - mutex_unlock(&kctx->kbdev->csf.reg_lock); - - if (err) - goto add_va_region_failed; - - /* Update MMU table */ - err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, - reg->start_pfn, &s_buf->phy[0], nr_pages, - mem_flags, MCU_AS_NR, - KBASE_MEM_GROUP_CSF_FW, mmu_sync_info); - if (err) - goto mmu_insert_failed; - - s_buf->reg = reg; - - return 0; - -mmu_insert_failed: - mutex_lock(&kctx->kbdev->csf.reg_lock); - kbase_remove_va_region(kctx->kbdev, reg); - mutex_unlock(&kctx->kbdev->csf.reg_lock); - -add_va_region_failed: - kbase_mem_pool_free_pages( - &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages, - &s_buf->phy[0], false, false); - -phy_pages_alloc_failed: - kfree(s_buf->phy); -phy_alloc_failed: - kfree(reg); - - return err; -} - -/** - * create_protected_suspend_buffer() - Create protected-mode suspend buffer - * per queue group - * - * @kbdev: Instance of a GPU platform device that implements a CSF interface. - * @s_buf: Pointer to suspend buffer that is attached to queue group - * - * Return: 0 if suspend buffer is successfully allocated and reflected to GPU - * MMU page table. Otherwise -ENOMEM. - */ -static int create_protected_suspend_buffer(struct kbase_device *const kbdev, - struct kbase_protected_suspend_buffer *s_buf) -{ - struct kbase_va_region *reg = NULL; - struct tagged_addr *phys = NULL; - const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR; - const size_t nr_pages = - PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); - int err = 0; - - /* Calls to this function are inherently asynchronous, with respect to - * MMU operations. - */ - const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; - - /* Allocate and initialize Region Object */ - reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, - nr_pages, KBASE_REG_ZONE_MCU_SHARED); - - if (!reg) - return -ENOMEM; - - phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL); - if (!phys) { - err = -ENOMEM; - goto phy_alloc_failed; + if (err < 0) { + kfree(s_buf->phy); + return err; } - s_buf->pma = kbase_csf_protected_memory_alloc(kbdev, phys, - nr_pages, true); - if (s_buf->pma == NULL) { - err = -ENOMEM; - goto pma_alloc_failed; - } - - /* Insert Region Object into rbtree and make virtual address available - * to map it to physical page - */ - mutex_lock(&kbdev->csf.reg_lock); - err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_pages, 1); - reg->flags &= ~KBASE_REG_FREE; - mutex_unlock(&kbdev->csf.reg_lock); - - if (err) - goto add_va_region_failed; - - /* Update MMU table */ - err = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, - phys, nr_pages, mem_flags, MCU_AS_NR, - KBASE_MEM_GROUP_CSF_FW, mmu_sync_info); - if (err) - goto mmu_insert_failed; - - s_buf->reg = reg; - kfree(phys); + kbase_process_page_usage_inc(kctx, nr_pages); return 0; - -mmu_insert_failed: - mutex_lock(&kbdev->csf.reg_lock); - kbase_remove_va_region(kbdev, reg); - mutex_unlock(&kbdev->csf.reg_lock); - -add_va_region_failed: - kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true); -pma_alloc_failed: - kfree(phys); -phy_alloc_failed: - kfree(reg); - - return err; } static void timer_event_worker(struct work_struct *data); @@ -1302,26 +1128,17 @@ static void term_normal_suspend_buffer(struct kbase_context *const kctx, static int create_suspend_buffers(struct kbase_context *const kctx, struct kbase_queue_group * const group) { - int err = 0; - if (create_normal_suspend_buffer(kctx, &group->normal_suspend_buf)) { dev_err(kctx->kbdev->dev, "Failed to create normal suspend buffer\n"); return -ENOMEM; } - if (kctx->kbdev->csf.pma_dev) { - err = create_protected_suspend_buffer(kctx->kbdev, - &group->protected_suspend_buf); - if (err) { - term_normal_suspend_buffer(kctx, - &group->normal_suspend_buf); - dev_err(kctx->kbdev->dev, "Failed to create protected suspend buffer\n"); - } - } else { - group->protected_suspend_buf.reg = NULL; - } + /* Protected suspend buffer, runtime binding so just initialize it */ + group->protected_suspend_buf.gpu_va = 0; + group->protected_suspend_buf.pma = NULL; + group->protected_suspend_buf.alloc_retries = 0; - return err; + return 0; } /** @@ -1387,6 +1204,9 @@ static int create_queue_group(struct kbase_context *const kctx, group->cs_unrecoverable = false; group->reevaluate_idle_status = false; + group->csg_reg = NULL; + group->csg_reg_bind_retries = 0; + group->dvs_buf = create->in.dvs_buf; #if IS_ENABLED(CONFIG_DEBUG_FS) @@ -1518,65 +1338,39 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx, * @s_buf: Pointer to queue group suspend buffer to be freed */ static void term_normal_suspend_buffer(struct kbase_context *const kctx, - struct kbase_normal_suspend_buffer *s_buf) + struct kbase_normal_suspend_buffer *s_buf) { - const size_t nr_pages = - PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size); + const size_t nr_pages = PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size); lockdep_assert_held(&kctx->csf.lock); - WARN_ON(kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, - s_buf->reg->start_pfn, s_buf->phy, nr_pages, MCU_AS_NR)); + /* The group should not have a bind remaining on any suspend buf region */ + WARN_ONCE(s_buf->gpu_va, "Suspend buffer address should be 0 at termination"); - WARN_ON(s_buf->reg->flags & KBASE_REG_FREE); - - mutex_lock(&kctx->kbdev->csf.reg_lock); - kbase_remove_va_region(kctx->kbdev, s_buf->reg); - mutex_unlock(&kctx->kbdev->csf.reg_lock); - - kbase_mem_pool_free_pages( - &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - nr_pages, &s_buf->phy[0], false, false); + kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages, + &s_buf->phy[0], false, false); + kbase_process_page_usage_dec(kctx, nr_pages); kfree(s_buf->phy); s_buf->phy = NULL; - kfree(s_buf->reg); - s_buf->reg = NULL; } /** - * term_protected_suspend_buffer() - Free normal-mode suspend buffer of + * term_protected_suspend_buffer() - Free protected-mode suspend buffer of * queue group * * @kbdev: Instance of a GPU platform device that implements a CSF interface. - * @s_buf: Pointer to queue group suspend buffer to be freed + * @sbuf: Pointer to queue group suspend buffer to be freed */ static void term_protected_suspend_buffer(struct kbase_device *const kbdev, - struct kbase_protected_suspend_buffer *s_buf) + struct kbase_protected_suspend_buffer *sbuf) { - const size_t nr_pages = - PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); - struct tagged_addr *phys = kmalloc(sizeof(*phys) * nr_pages, GFP_KERNEL); - size_t i = 0; - - for (i = 0; phys && i < nr_pages; i++) - phys[i] = as_tagged(s_buf->pma[i]->pa); - - WARN_ON(kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, s_buf->reg->start_pfn, phys, - nr_pages, MCU_AS_NR)); - - kfree(phys); - - WARN_ON(s_buf->reg->flags & KBASE_REG_FREE); - - mutex_lock(&kbdev->csf.reg_lock); - kbase_remove_va_region(kbdev, s_buf->reg); - mutex_unlock(&kbdev->csf.reg_lock); - - kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true); - s_buf->pma = NULL; - kfree(s_buf->reg); - s_buf->reg = NULL; + WARN_ONCE(sbuf->gpu_va, "Suspend buf should have been unmapped inside scheduler!"); + if (sbuf->pma) { + const size_t nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + kbase_csf_protected_memory_free(kbdev, sbuf->pma, nr_pages, true); + sbuf->pma = NULL; + } } void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group) @@ -1743,6 +1537,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx, kfree(group); } +KBASE_EXPORT_TEST_API(kbase_csf_queue_group_terminate); int kbase_csf_queue_group_suspend(struct kbase_context *kctx, struct kbase_suspend_copy_buffer *sus_buf, @@ -2022,12 +1817,10 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) * registered. */ #if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) - if (atomic_read(&queue->refcount) != 1) + WARN_ON(atomic_read(&queue->refcount) != 1); #else - if (refcount_read(&queue->refcount) != 1) + WARN_ON(refcount_read(&queue->refcount) != 1); #endif - dev_warn(kctx->kbdev->dev, - "Releasing queue with incorrect refcounting!\n"); list_del_init(&queue->link); release_queue(queue); } @@ -2378,6 +2171,81 @@ static void handle_progress_timer_event(struct kbase_queue_group *const group) queue_work(group->kctx->csf.wq, &group->timer_event_work); } +/** + * alloc_grp_protected_suspend_buffer_pages() - Allocate physical pages from the protected + * memory for the protected mode suspend buffer. + * @group: Pointer to the GPU queue group. + * + * Return: 0 if suspend buffer allocation is successful or if its already allocated, otherwise + * negative error value. + */ +static int alloc_grp_protected_suspend_buffer_pages(struct kbase_queue_group *const group) +{ + struct kbase_device *const kbdev = group->kctx->kbdev; + struct kbase_context *kctx = group->kctx; + struct tagged_addr *phys = NULL; + struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf; + size_t nr_pages; + int err = 0; + + if (likely(sbuf->pma)) + return 0; + + nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL); + if (unlikely(!phys)) { + err = -ENOMEM; + goto phys_free; + } + + mutex_lock(&kctx->csf.lock); + kbase_csf_scheduler_lock(kbdev); + + if (unlikely(!group->csg_reg)) { + /* The only chance of the bound csg_reg is removed from the group is + * that it has been put off slot by the scheduler and the csg_reg resource + * is contended by other groups. In this case, it needs another occasion for + * mapping the pma, which needs a bound csg_reg. Since the group is already + * off-slot, returning no error is harmless as the scheduler, when place the + * group back on-slot again would do the required MMU map operation on the + * allocated and retained pma. + */ + WARN_ON(group->csg_nr >= 0); + dev_dbg(kbdev->dev, "No bound csg_reg for group_%d_%d_%d to enter protected mode", + group->kctx->tgid, group->kctx->id, group->handle); + goto unlock; + } + + /* Allocate the protected mode pages */ + sbuf->pma = kbase_csf_protected_memory_alloc(kbdev, phys, nr_pages, true); + if (unlikely(!sbuf->pma)) { + err = -ENOMEM; + goto unlock; + } + + /* Map the bound susp_reg to the just allocated pma pages */ + err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group); + +unlock: + kbase_csf_scheduler_unlock(kbdev); + mutex_unlock(&kctx->csf.lock); +phys_free: + kfree(phys); + return err; +} + +static void report_group_fatal_error(struct kbase_queue_group *const group) +{ + struct base_gpu_queue_group_error const + err_payload = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, + .payload = { .fatal_group = { + .status = GPU_EXCEPTION_TYPE_SW_FAULT_0, + } } }; + + kbase_csf_add_group_fatal_error(group, &err_payload); + kbase_event_wakeup(group->kctx); +} + /** * protm_event_worker - Protected mode switch request event handler * called from a workqueue. @@ -2390,10 +2258,26 @@ static void protm_event_worker(struct work_struct *data) { struct kbase_queue_group *const group = container_of(data, struct kbase_queue_group, protm_event_work); + struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf; + int err = 0; KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START, group, 0u); - kbase_csf_scheduler_group_protm_enter(group); + + err = alloc_grp_protected_suspend_buffer_pages(group); + if (!err) { + kbase_csf_scheduler_group_protm_enter(group); + } else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) { + sbuf->alloc_retries++; + /* try again to allocate pages */ + queue_work(group->kctx->csf.wq, &group->protm_event_work); + } else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) { + dev_err(group->kctx->kbdev->dev, + "Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d", + group->handle, group->kctx->tgid, group->kctx->id); + report_group_fatal_error(group); + } + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, group, 0u); } @@ -2517,7 +2401,10 @@ static void cs_error_worker(struct work_struct *const data) struct kbase_queue_group *group; u8 group_handle; bool reset_prevented = false; - int err = kbase_reset_gpu_prevent_and_wait(kbdev); + int err; + + kbase_debug_csf_fault_wait_completion(kbdev); + err = kbase_reset_gpu_prevent_and_wait(kbdev); if (err) dev_warn( @@ -2526,7 +2413,6 @@ static void cs_error_worker(struct work_struct *const data) else reset_prevented = true; - kbase_debug_csf_fault_wait_completion(kbdev); mutex_lock(&kctx->csf.lock); group = get_bound_queue_group(queue); @@ -2724,12 +2610,17 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, get_queue(queue); KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM, group, queue, cs_req ^ cs_ack); - if (WARN_ON(!queue_work(wq, &queue->oom_event_work))) { + if (!queue_work(wq, &queue->oom_event_work)) { /* The work item shall not have been * already queued, there can be only * one pending OoM event for a * queue. */ + dev_warn( + kbdev->dev, + "Tiler OOM work pending: queue %d group %d (ctx %d_%d)", + queue->csi_index, group->handle, queue->kctx->tgid, + queue->kctx->id); release_queue(queue); } } @@ -2760,6 +2651,9 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, track->protm_grp = group; } + if (!group->protected_suspend_buf.pma) + queue_work(group->kctx->csf.wq, &group->protm_event_work); + if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) { clear_bit(group->csg_nr, scheduler->csg_slots_idle_mask); @@ -2801,8 +2695,6 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num)) return; - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr); - ginfo = &kbdev->csf.global_iface.groups[csg_nr]; req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ); ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); @@ -2811,7 +2703,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c /* There may not be any pending CSG/CS interrupts to process */ if ((req == ack) && (irqreq == irqack)) - goto out; + return; /* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before * examining the CS_ACK & CS_REQ bits. This would ensure that Host @@ -2832,10 +2724,12 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c * slot scheduler spinlock is required. */ if (!group) - goto out; + return; if (WARN_ON(kbase_csf_scheduler_group_get_slot_locked(group) != csg_nr)) - goto out; + return; + + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr); if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) { kbase_csf_firmware_csg_input_mask(ginfo, @@ -2897,8 +2791,6 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c process_cs_interrupts(group, ginfo, irqreq, irqack, track); -out: - /* group may still be NULL here */ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_END, group, ((u64)req ^ ack) | (((u64)irqreq ^ irqack) << 32)); } @@ -3058,6 +2950,10 @@ static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack) kbase_ipa_control_protm_exited(kbdev); kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface); } + +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + kbase_debug_coresight_csf_enable_pmode_exit(kbdev); +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ } static inline void process_tracked_info_for_protm(struct kbase_device *kbdev, diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h index b2677405761f..9fbc932b7905 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -40,12 +40,15 @@ */ #define KBASEP_USER_DB_NR_INVALID ((s8)-1) +/* Number of pages used for GPU command queue's User input & output data */ +#define KBASEP_NUM_CS_USER_IO_PAGES (2) + /* Indicates an invalid value for the scan out sequence number, used to * signify there is no group that has protected mode execution pending. */ #define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX) -#define FIRMWARE_IDLE_HYSTERESIS_TIME_MS (10) /* Default 10 milliseconds */ +#define FIRMWARE_IDLE_HYSTERESIS_TIME_USEC (10000) /* Default 10 milliseconds */ /* Idle hysteresis time can be scaled down when GPU sleep feature is used */ #define FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER (5) @@ -123,6 +126,25 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx, void kbase_csf_queue_terminate(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_terminate *term); +/** + * kbase_csf_free_command_stream_user_pages() - Free the resources allocated + * for a queue at the time of bind. + * + * @kctx: Address of the kbase context within which the queue was created. + * @queue: Pointer to the queue to be unlinked. + * + * This function will free the pair of physical pages allocated for a GPU + * command queue, and also release the hardware doorbell page, that were mapped + * into the process address space to enable direct submission of commands to + * the hardware. Also releases the reference taken on the queue when the mapping + * was created. + * + * If an explicit or implicit unbind was missed by the userspace then the + * mapping will persist. On process exit kernel itself will remove the mapping. + */ +void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, + struct kbase_queue *queue); + /** * kbase_csf_alloc_command_stream_user_pages - Allocate resources for a * GPU command queue. @@ -185,6 +207,20 @@ void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue); int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_kick *kick); +/** + * kbase_csf_queue_group_handle_is_valid - Find the queue group corresponding + * to the indicated handle. + * + * @kctx: The kbase context under which the queue group exists. + * @group_handle: Handle for the group which uniquely identifies it within + * the context with which it was created. + * + * This function is used to find the queue group when passed a handle. + * + * Return: Pointer to a queue group on success, NULL on failure + */ +struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle); + /** * kbase_csf_queue_group_handle_is_valid - Find if the given queue group handle * is valid. @@ -464,4 +500,5 @@ static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev) return 0; #endif } + #endif /* _KBASE_CSF_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c index 3afbe6d4005e..e96044ae6239 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c @@ -100,7 +100,7 @@ static void wait_csg_slots_status_update_finish(struct kbase_device *kbdev, } } -static void update_active_groups_status(struct kbase_device *kbdev, struct seq_file *file) +void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev) { u32 max_csg_slots = kbdev->csf.global_iface.group_num; DECLARE_BITMAP(used_csgs, MAX_SUPPORTED_CSGS) = { 0 }; @@ -116,6 +116,8 @@ static void update_active_groups_status(struct kbase_device *kbdev, struct seq_f * status of all on-slot groups when MCU sleep request is sent to it. */ if (kbdev->csf.scheduler.state == SCHED_SLEEPING) { + /* Wait for the MCU sleep request to complete. */ + kbase_pm_wait_for_desired_state(kbdev); bitmap_copy(csg_slots_status_updated, kbdev->csf.scheduler.csg_inuse_bitmap, max_csg_slots); return; @@ -496,23 +498,19 @@ static int kbasep_csf_queue_group_debugfs_show(struct seq_file *file, { u32 gr; struct kbase_context *const kctx = file->private; - struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_device *kbdev; if (WARN_ON(!kctx)) return -EINVAL; + kbdev = kctx->kbdev; + seq_printf(file, "MALI_CSF_CSG_DEBUGFS_VERSION: v%u\n", MALI_CSF_CSG_DEBUGFS_VERSION); mutex_lock(&kctx->csf.lock); kbase_csf_scheduler_lock(kbdev); - if (kbdev->csf.scheduler.state == SCHED_SLEEPING) { - /* Wait for the MCU sleep request to complete. Please refer the - * update_active_groups_status() function for the explanation. - */ - kbase_pm_wait_for_desired_state(kbdev); - } - update_active_groups_status(kbdev, file); + kbase_csf_debugfs_update_active_groups_status(kbdev); for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) { struct kbase_queue_group *const group = kctx->csf.queue_groups[gr]; @@ -546,13 +544,7 @@ static int kbasep_csf_scheduler_dump_active_groups(struct seq_file *file, MALI_CSF_CSG_DEBUGFS_VERSION); kbase_csf_scheduler_lock(kbdev); - if (kbdev->csf.scheduler.state == SCHED_SLEEPING) { - /* Wait for the MCU sleep request to complete. Please refer the - * update_active_groups_status() function for the explanation. - */ - kbase_pm_wait_for_desired_state(kbdev); - } - update_active_groups_status(kbdev, file); + kbase_csf_debugfs_update_active_groups_status(kbdev); for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { struct kbase_queue_group *const group = kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h index 397e657d2cb6..16a548bf8acb 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -44,4 +44,11 @@ void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx); */ void kbase_csf_debugfs_init(struct kbase_device *kbdev); +/** + * kbase_csf_debugfs_update_active_groups_status() - Update on-slot group statuses + * + * @kbdev: Pointer to the device + */ +void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev); + #endif /* _KBASE_CSF_CSG_DEBUGFS_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h index 32a1c557e387..e4a69cb169c3 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h @@ -33,6 +33,10 @@ #include "mali_kbase_csf_event.h" #include +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) +#include +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + /* Maximum number of KCPU command queues to be created per GPU address space. */ #define KBASEP_MAX_KCPU_QUEUES ((size_t)256) @@ -298,9 +302,9 @@ struct kbase_csf_notification { * * @kctx: Pointer to the base context with which this GPU command queue * is associated. - * @reg: Pointer to the region allocated from the shared - * interface segment for mapping the User mode - * input/output pages in MCU firmware address space. + * @user_io_gpu_va: The start GPU VA address of this queue's userio pages. Only + * valid (i.e. not 0 ) when the queue is enabled and its owner + * group has a runtime bound csg_reg (group region). * @phys: Pointer to the physical pages allocated for the * pair or User mode input/output page * @user_io_addr: Pointer to the permanent kernel mapping of User mode @@ -376,7 +380,7 @@ struct kbase_csf_notification { */ struct kbase_queue { struct kbase_context *kctx; - struct kbase_va_region *reg; + u64 user_io_gpu_va; struct tagged_addr phys[2]; char *user_io_addr; u64 handle; @@ -421,26 +425,33 @@ struct kbase_queue { /** * struct kbase_normal_suspend_buffer - Object representing a normal * suspend buffer for queue group. - * @reg: Memory region allocated for the normal-mode suspend buffer. + * @gpu_va: The start GPU VA address of the bound suspend buffer. Note, this + * field is only valid when the owner group has a region bound at + * runtime. * @phy: Array of physical memory pages allocated for the normal- * mode suspend buffer. */ struct kbase_normal_suspend_buffer { - struct kbase_va_region *reg; + u64 gpu_va; struct tagged_addr *phy; }; /** * struct kbase_protected_suspend_buffer - Object representing a protected * suspend buffer for queue group. - * @reg: Memory region allocated for the protected-mode suspend buffer. + * @gpu_va: The start GPU VA address of the bound protected mode suspend buffer. + * Note, this field is only valid when the owner group has a region + * bound at runtime. * @pma: Array of pointer to protected mode allocations containing * information about memory pages allocated for protected mode * suspend buffer. + * @alloc_retries: Number of times we retried allocing physical pages + * for protected suspend buffers. */ struct kbase_protected_suspend_buffer { - struct kbase_va_region *reg; + u64 gpu_va; struct protected_memory_allocation **pma; + u8 alloc_retries; }; /** @@ -512,6 +523,13 @@ struct kbase_protected_suspend_buffer { * @deschedule_deferred_cnt: Counter keeping a track of the number of threads * that tried to deschedule the group and had to defer * the descheduling due to the dump on fault. + * @csg_reg: An opaque pointer to the runtime bound shared regions. It is + * dynamically managed by the scheduler and can be NULL if the + * group is off-slot. + * @csg_reg_bind_retries: Runtime MCU shared region map operation attempted counts. + * It is accumulated on consecutive mapping attempt failures. On + * reaching a preset limit, the group is regarded as suffered + * a fatal error and triggers a fatal error notification. */ struct kbase_queue_group { struct kbase_context *kctx; @@ -562,6 +580,8 @@ struct kbase_queue_group { #if IS_ENABLED(CONFIG_DEBUG_FS) u32 deschedule_deferred_cnt; #endif + void *csg_reg; + u8 csg_reg_bind_retries; }; /** @@ -623,6 +643,8 @@ struct kbase_csf_cpu_queue_context { * @lock: Lock preventing concurrent access to the @in_use bitmap. * @in_use: Bitmap that indicates which heap context structures are currently * allocated (in @region). + * @heap_context_size_aligned: Size of a heap context structure, in bytes, + * aligned to GPU cacheline size. * * Heap context structures are allocated by the kernel for use by the firmware. * The current implementation subdivides a single GPU memory region for use as @@ -634,6 +656,7 @@ struct kbase_csf_heap_context_allocator { u64 gpu_va; struct mutex lock; DECLARE_BITMAP(in_use, MAX_TILER_HEAPS); + u32 heap_context_size_aligned; }; /** @@ -874,6 +897,33 @@ struct kbase_csf_sched_heap_reclaim_mgr { atomic_t unused_pages; }; +/** + * struct kbase_csf_mcu_shared_regions - Control data for managing the MCU shared + * interface segment regions for scheduler + * operations + * + * @array_csg_regs: Base pointer of an internally created array_csg_regs[]. + * @unused_csg_regs: List contains unused csg_regs items. When an item is bound to a + * group that is placed onto on-slot by the scheduler, it is dropped + * from the list (i.e busy active). The Scheduler will put an active + * item back when it's becoming off-slot (not in use). + * @dummy_phys: An array of dummy phys[nr_susp_pages] pages for use with normal + * and pmode suspend buffers, as a default replacement of a CSG's pages + * for the MMU mapping when the csg_reg is not bound to a group. + * @pma_phys: Pre-allocated array phy[nr_susp_pages] for transitional use with + * protected suspend buffer MMU map operations. + * @userio_mem_rd_flags: Userio input page's read access mapping configuration flags. + * @dummy_phys_allocated: Indicating the @p dummy_phy page is allocated when true. + */ +struct kbase_csf_mcu_shared_regions { + void *array_csg_regs; + struct list_head unused_csg_regs; + struct tagged_addr *dummy_phys; + struct tagged_addr *pma_phys; + unsigned long userio_mem_rd_flags; + bool dummy_phys_allocated; +}; + /** * struct kbase_csf_scheduler - Object representing the scheduler used for * CSF for an instance of GPU platform device. @@ -1008,6 +1058,9 @@ struct kbase_csf_sched_heap_reclaim_mgr { * @interrupt_lock is used to serialize the access. * @protm_enter_time: GPU protected mode enter time. * @reclaim_mgr: CSGs tiler heap manager object. + * @mcu_regs_data: Scheduler MCU shared regions data for managing the + * shared interface mappings for on-slot queues and + * CSG suspend buffers. */ struct kbase_csf_scheduler { struct mutex lock; @@ -1051,6 +1104,7 @@ struct kbase_csf_scheduler { u32 tick_protm_pending_seq; ktime_t protm_enter_time; struct kbase_csf_sched_heap_reclaim_mgr reclaim_mgr; + struct kbase_csf_mcu_shared_regions mcu_regs_data; }; /* @@ -1328,6 +1382,24 @@ struct kbase_csf_firmware_log { u32 func_call_list_va_end; }; +/** + * struct kbase_csf_firmware_core_dump - Object containing members for handling + * firmware core dump. + * + * @mcu_regs_addr: GPU virtual address of the start of the MCU registers buffer + * in Firmware. + * @version: Version of the FW image header core dump data format. Bits + * 7:0 specify version minor and 15:8 specify version major. + * @available: Flag to identify if the FW core dump buffer is available. + * True if entry is available in the FW image header and version + * is supported, False otherwise. + */ +struct kbase_csf_firmware_core_dump { + u32 mcu_regs_addr; + u16 version; + bool available; +}; + #if IS_ENABLED(CONFIG_DEBUG_FS) /** * struct kbase_csf_dump_on_fault - Faulty information to deliver to the daemon @@ -1458,9 +1530,9 @@ struct kbase_csf_dump_on_fault { * the glb_pwoff register. This is separated from * the @p mcu_core_pwroff_dur_count as an update * to the latter is asynchronous. - * @gpu_idle_hysteresis_ms: Sysfs attribute for the idle hysteresis time - * window in unit of ms. The firmware does not use it - * directly. + * @gpu_idle_hysteresis_us: Sysfs attribute for the idle hysteresis time + * window in unit of microseconds. The firmware does not + * use it directly. * @gpu_idle_dur_count: The counterpart of the hysteresis time window in * interface required format, ready to be used * directly in the firmware. @@ -1470,6 +1542,8 @@ struct kbase_csf_dump_on_fault { * HW counters. * @fw: Copy of the loaded MCU firmware image. * @fw_log: Contain members required for handling firmware log. + * @fw_core_dump: Contain members required for handling the firmware + * core dump. * @dof: Structure for dump on fault. */ struct kbase_csf_device { @@ -1507,15 +1581,22 @@ struct kbase_csf_device { u32 mcu_core_pwroff_dur_us; u32 mcu_core_pwroff_dur_count; u32 mcu_core_pwroff_reg_shadow; - u32 gpu_idle_hysteresis_ms; + u32 gpu_idle_hysteresis_us; u32 gpu_idle_dur_count; unsigned int fw_timeout_ms; struct kbase_csf_hwcnt hwcnt; struct kbase_csf_mcu_fw fw; struct kbase_csf_firmware_log fw_log; + struct kbase_csf_firmware_core_dump fw_core_dump; #if IS_ENABLED(CONFIG_DEBUG_FS) struct kbase_csf_dump_on_fault dof; #endif /* CONFIG_DEBUG_FS */ +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + /** + * @coresight: Coresight device structure. + */ + struct kbase_debug_coresight_device coresight; +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ }; /** diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c index 1f4a4d9b6876..548657bc0a38 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c @@ -22,6 +22,7 @@ #include "mali_kbase.h" #include "mali_kbase_csf_firmware_cfg.h" #include "mali_kbase_csf_firmware_log.h" +#include "mali_kbase_csf_firmware_core_dump.h" #include "mali_kbase_csf_trace_buffer.h" #include "mali_kbase_csf_timeout.h" #include "mali_kbase_mem.h" @@ -38,7 +39,6 @@ #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" #include #include - #include #include #include @@ -81,7 +81,7 @@ MODULE_PARM_DESC(fw_debug, #define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul) #define FIRMWARE_HEADER_VERSION_MAJOR (0ul) -#define FIRMWARE_HEADER_VERSION_MINOR (2ul) +#define FIRMWARE_HEADER_VERSION_MINOR (3ul) #define FIRMWARE_HEADER_LENGTH (0x14ul) #define CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS \ @@ -93,12 +93,13 @@ MODULE_PARM_DESC(fw_debug, CSF_FIRMWARE_ENTRY_ZERO | \ CSF_FIRMWARE_ENTRY_CACHE_MODE) -#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0) -#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1) -#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3) -#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4) +#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0) +#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1) +#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3) +#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4) #define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6) -#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST (7) +#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST (7) +#define CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP (9) #define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3) #define CSF_FIRMWARE_CACHE_MODE_CACHED (1ul << 3) @@ -120,7 +121,6 @@ MODULE_PARM_DESC(fw_debug, (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK) - static inline u32 input_page_read(const u32 *const input, const u32 offset) { WARN_ON(offset % sizeof(u32)); @@ -286,6 +286,13 @@ static void boot_csf_firmware(struct kbase_device *kbdev) { kbase_csf_firmware_enable_mcu(kbdev); +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + kbase_debug_coresight_csf_state_request(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED); + + if (!kbase_debug_coresight_csf_state_wait(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) + dev_err(kbdev->dev, "Timeout waiting for CoreSight to be enabled"); +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + wait_for_firmware_boot(kbdev); } @@ -488,6 +495,7 @@ out: * @kbdev: Kbase device structure * @virtual_start: Start of the virtual address range required for an entry allocation * @virtual_end: End of the virtual address range required for an entry allocation + * @flags: Firmware entry flags for comparison with the reusable pages found * @phys: Pointer to the array of physical (tagged) addresses making up the new * FW interface entry. It is an output parameter which would be made to * point to an already existing array allocated for the previously parsed @@ -508,10 +516,12 @@ out: * * Return: true if a large page can be reused, false otherwise. */ -static inline bool entry_find_large_page_to_reuse( - struct kbase_device *kbdev, const u32 virtual_start, const u32 virtual_end, - struct tagged_addr **phys, struct protected_memory_allocation ***pma, - u32 num_pages, u32 *num_pages_aligned, bool *is_small_page) +static inline bool entry_find_large_page_to_reuse(struct kbase_device *kbdev, + const u32 virtual_start, const u32 virtual_end, + const u32 flags, struct tagged_addr **phys, + struct protected_memory_allocation ***pma, + u32 num_pages, u32 *num_pages_aligned, + bool *is_small_page) { struct kbase_csf_firmware_interface *interface = NULL; struct kbase_csf_firmware_interface *target_interface = NULL; @@ -557,7 +567,7 @@ static inline bool entry_find_large_page_to_reuse( if (interface->virtual & (SZ_2M - 1)) continue; - if (virtual_diff < virtual_diff_min) { + if ((virtual_diff < virtual_diff_min) && (interface->flags == flags)) { target_interface = interface; virtual_diff_min = virtual_diff; } @@ -620,6 +630,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, struct protected_memory_allocation **pma = NULL; bool reuse_pages = false; bool is_small_page = true; + bool ignore_page_migration = true; if (data_end < data_start) { dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n", @@ -662,9 +673,9 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, num_pages = (virtual_end - virtual_start) >> PAGE_SHIFT; - reuse_pages = entry_find_large_page_to_reuse( - kbdev, virtual_start, virtual_end, &phys, &pma, - num_pages, &num_pages_aligned, &is_small_page); + reuse_pages = + entry_find_large_page_to_reuse(kbdev, virtual_start, virtual_end, flags, &phys, + &pma, num_pages, &num_pages_aligned, &is_small_page); if (!reuse_pages) phys = kmalloc_array(num_pages_aligned, sizeof(*phys), GFP_KERNEL); @@ -685,6 +696,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page), num_pages_aligned, phys, false); + ignore_page_migration = false; } } @@ -794,7 +806,8 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, virtual_start >> PAGE_SHIFT, phys, num_pages_aligned, mem_flags, - KBASE_MEM_GROUP_CSF_FW, NULL); + KBASE_MEM_GROUP_CSF_FW, NULL, NULL, + ignore_page_migration); if (ret != 0) { dev_err(kbdev->dev, "Failed to insert firmware pages\n"); @@ -1023,20 +1036,26 @@ static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_cs return parse_build_info_metadata_entry(kbdev, fw, entry, size); case CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST: /* Function call list section */ - if (size < 2 * sizeof(*entry)) { + if (size < FUNC_CALL_LIST_ENTRY_NAME_OFFSET + sizeof(*entry)) { dev_err(kbdev->dev, "Function call list entry too short (size=%u)\n", size); return -EINVAL; } kbase_csf_firmware_log_parse_logging_call_list_entry(kbdev, entry); - break; - } - - if (!optional) { - dev_err(kbdev->dev, - "Unsupported non-optional entry type %u in firmware\n", - type); - return -EINVAL; + return 0; + case CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP: + /* Core Dump section */ + if (size < CORE_DUMP_ENTRY_START_ADDR_OFFSET + sizeof(*entry)) { + dev_err(kbdev->dev, "FW Core dump entry too short (size=%u)\n", size); + return -EINVAL; + } + return kbase_csf_firmware_core_dump_entry_parse(kbdev, entry); + default: + if (!optional) { + dev_err(kbdev->dev, "Unsupported non-optional entry type %u in firmware\n", + type); + return -EINVAL; + } } return 0; @@ -1687,6 +1706,71 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev) kbdev->csf.gpu_idle_dur_count); } +static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + bool complete = false; + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) == + (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask)) + complete = true; + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + return complete; +} + +static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface, + u32 const req_mask) +{ + u32 glb_debug_req; + + kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); + + glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); + glb_debug_req ^= req_mask; + + kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask); +} + +static void request_fw_core_dump( + const struct kbase_csf_global_iface *const global_iface) +{ + uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP); + + set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode); + + set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); +} + +int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev) +{ + const struct kbase_csf_global_iface *const global_iface = + &kbdev->csf.global_iface; + unsigned long flags; + int ret; + + /* Serialize CORE_DUMP requests. */ + mutex_lock(&kbdev->csf.reg_lock); + + /* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */ + kbase_csf_scheduler_spin_lock(kbdev, &flags); + request_fw_core_dump(global_iface); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + /* Wait for firmware to acknowledge completion of the CORE_DUMP request. */ + ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); + if (!ret) + WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK)); + + mutex_unlock(&kbdev->csf.reg_lock); + + return ret; +} /** * kbasep_enable_rtu - Enable Ray Tracing Unit on powering up shader core @@ -1714,7 +1798,7 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask) GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK | - GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK; + GLB_REQ_DEBUG_CSF_REQ_MASK | GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK; const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -1740,6 +1824,14 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask) kbase_csf_firmware_global_input(global_iface, GLB_ACK_IRQ_MASK, ack_irq_mask); +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + /* Enable FW MCU read/write debug interfaces */ + kbase_csf_firmware_global_input_mask( + global_iface, GLB_DEBUG_ACK_IRQ_MASK, + GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK, + GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK); +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); kbase_csf_scheduler_spin_unlock(kbdev, flags); @@ -1890,12 +1982,12 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) kbase_pm_update_state(kbdev); } -static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms) +static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_us) { #define HYSTERESIS_VAL_UNIT_SHIFT (10) /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ u64 freq = arch_timer_get_cntfrq(); - u64 dur_val = dur_ms; + u64 dur_val = dur_us; u32 cnt_val_u32, reg_val_u32; bool src_system_timestamp = freq > 0; @@ -1913,9 +2005,9 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m "Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!"); } - /* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */ + /* Formula for dur_val = ((dur_us/1000000) * freq_HZ) >> 10) */ dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; - dur_val = div_u64(dur_val, 1000); + dur_val = div_u64(dur_val, 1000000); /* Interface limits the value field to S32_MAX */ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; @@ -1938,7 +2030,7 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) u32 dur; kbase_csf_scheduler_spin_lock(kbdev, &flags); - dur = kbdev->csf.gpu_idle_hysteresis_ms; + dur = kbdev->csf.gpu_idle_hysteresis_us; kbase_csf_scheduler_spin_unlock(kbdev, flags); return dur; @@ -1955,7 +2047,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, mutex_lock(&kbdev->fw_load_lock); if (unlikely(!kbdev->csf.firmware_inited)) { kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_ms = dur; + kbdev->csf.gpu_idle_hysteresis_us = dur; kbdev->csf.gpu_idle_dur_count = hysteresis_val; kbase_csf_scheduler_spin_unlock(kbdev, flags); mutex_unlock(&kbdev->fw_load_lock); @@ -1986,7 +2078,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_ms = dur; + kbdev->csf.gpu_idle_hysteresis_us = dur; kbdev->csf.gpu_idle_dur_count = hysteresis_val; kbase_csf_firmware_enable_gpu_idle_timer(kbdev); kbase_csf_scheduler_spin_unlock(kbdev, flags); @@ -2166,14 +2258,14 @@ void kbase_csf_firmware_early_term(struct kbase_device *kbdev) int kbase_csf_firmware_late_init(struct kbase_device *kbdev) { - kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS; + kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC; #ifdef KBASE_PM_RUNTIME if (kbase_pm_gpu_sleep_allowed(kbdev)) - kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; + kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; #endif - WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms); + WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us); kbdev->csf.gpu_idle_dur_count = - convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms); + convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us); return 0; } @@ -2353,6 +2445,10 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) goto err_out; } +#ifdef CONFIG_MALI_FW_CORE_DUMP + kbase_csf_firmware_core_dump_init(kbdev); +#endif + /* Firmware loaded successfully, ret = 0 */ KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL, (((u64)version_hash) << 32) | @@ -2470,6 +2566,119 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) kbdev->as_free |= MCU_AS_BITMASK; } +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) +int kbase_csf_firmware_mcu_register_write(struct kbase_device *const kbdev, u32 const reg_addr, + u32 const reg_val) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + unsigned long flags; + int err; + u32 glb_req; + + mutex_lock(&kbdev->csf.reg_lock); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + /* Set the address and value to write */ + kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr); + kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN1, reg_val); + + /* Set the Global Debug request for FW MCU write */ + glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); + glb_req ^= GLB_DEBUG_REQ_FW_AS_WRITE_MASK; + kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req, + GLB_DEBUG_REQ_FW_AS_WRITE_MASK); + + set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); + + /* Notify FW about the Global Debug request */ + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); + + mutex_unlock(&kbdev->csf.reg_lock); + + dev_dbg(kbdev->dev, "w: reg %08x val %08x", reg_addr, reg_val); + + return err; +} + +int kbase_csf_firmware_mcu_register_read(struct kbase_device *const kbdev, u32 const reg_addr, + u32 *reg_val) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + unsigned long flags; + int err; + u32 glb_req; + + if (WARN_ON(reg_val == NULL)) + return -EINVAL; + + mutex_lock(&kbdev->csf.reg_lock); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + /* Set the address to read */ + kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr); + + /* Set the Global Debug request for FW MCU read */ + glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); + glb_req ^= GLB_DEBUG_REQ_FW_AS_READ_MASK; + kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req, + GLB_DEBUG_REQ_FW_AS_READ_MASK); + + set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); + + /* Notify FW about the Global Debug request */ + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); + + if (!err) { + kbase_csf_scheduler_spin_lock(kbdev, &flags); + *reg_val = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ARG_OUT0); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + } + + mutex_unlock(&kbdev->csf.reg_lock); + + dev_dbg(kbdev->dev, "r: reg %08x val %08x", reg_addr, *reg_val); + + return err; +} + +int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 const reg_addr, + u32 const val_mask, u32 const reg_val) +{ + unsigned long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms) + jiffies; + u32 read_val; + + dev_dbg(kbdev->dev, "p: reg %08x val %08x mask %08x", reg_addr, reg_val, val_mask); + + while (time_before(jiffies, remaining)) { + int err = kbase_csf_firmware_mcu_register_read(kbdev, reg_addr, &read_val); + + if (err) { + dev_err(kbdev->dev, + "Error reading MCU register value (read_val = %u, expect = %u)\n", + read_val, reg_val); + return err; + } + + if ((read_val & val_mask) == reg_val) + return 0; + } + + dev_err(kbdev->dev, + "Timeout waiting for MCU register value to be set (read_val = %u, expect = %u)\n", + read_val, reg_val); + + return -ETIMEDOUT; +} +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev) { struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; @@ -2848,7 +3057,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn, &phys[0], num_pages, gpu_map_properties, - KBASE_MEM_GROUP_CSF_FW, NULL); + KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false); if (ret) goto mmu_insert_pages_error; @@ -2909,4 +3118,3 @@ void kbase_csf_firmware_mcu_shared_mapping_term( vunmap(csf_mapping->cpu_addr); kfree(csf_mapping->phys); } - diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h index 7560a298ac9c..714a14001189 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h @@ -246,7 +246,6 @@ void kbase_csf_firmware_csg_input_mask( u32 kbase_csf_firmware_csg_output( const struct kbase_csf_cmd_stream_group_info *info, u32 offset); - /** * struct kbase_csf_global_iface - Global CSF interface * provided by the firmware. @@ -450,6 +449,50 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev); */ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev); +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) +/** + * kbase_csf_firmware_mcu_register_write - Write to MCU register + * + * @kbdev: Instance of a gpu platform device that implements a csf interface. + * @reg_addr: Register address to write into + * @reg_val: Value to be written + * + * Write a desired value to a register in MCU address space. + * + * return: 0 on success, or negative on failure. + */ +int kbase_csf_firmware_mcu_register_write(struct kbase_device *const kbdev, u32 const reg_addr, + u32 const reg_val); +/** + * kbase_csf_firmware_mcu_register_read - Read from MCU register + * + * @kbdev: Instance of a gpu platform device that implements a csf interface. + * @reg_addr: Register address to read from + * @reg_val: Value as present in reg_addr register + * + * Read a value from MCU address space. + * + * return: 0 on success, or negative on failure. + */ +int kbase_csf_firmware_mcu_register_read(struct kbase_device *const kbdev, u32 const reg_addr, + u32 *reg_val); + +/** + * kbase_csf_firmware_mcu_register_poll - Poll MCU register + * + * @kbdev: Instance of a gpu platform device that implements a csf interface. + * @reg_addr: Register address to read from + * @val_mask: Value to mask the read value for comparison + * @reg_val: Value to be compared against + * + * Continue to read a value from MCU address space until it matches given mask and value. + * + * return: 0 on success, or negative on failure. + */ +int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 const reg_addr, + u32 const val_mask, u32 const reg_val); +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + /** * kbase_csf_firmware_ping - Send the ping request to firmware. * @@ -858,5 +901,16 @@ static inline u32 kbase_csf_interface_version(u32 major, u32 minor, u32 patch) */ int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev); +/** + * kbase_csf_firmware_req_core_dump - Request a firmware core dump + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Request a firmware core dump and wait for for firmware to acknowledge. + * Firmware will enter infinite loop after the firmware core dump is created. + * + * Return: 0 if success, or negative error code on failure. + */ +int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev); #endif diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c new file mode 100644 index 000000000000..f0a10d197eec --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c @@ -0,0 +1,807 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include +#include +#include + +#include "mali_kbase.h" +#include "mali_kbase_csf_firmware_core_dump.h" +#include "backend/gpu/mali_kbase_pm_internal.h" + +/* Page size in bytes in use by MCU. */ +#define FW_PAGE_SIZE 4096 + +/* + * FW image header core dump data format supported. + * Currently only version 0.1 is supported. + */ +#define FW_CORE_DUMP_DATA_VERSION_MAJOR 0 +#define FW_CORE_DUMP_DATA_VERSION_MINOR 1 + +/* Full version of the image header core dump data format */ +#define FW_CORE_DUMP_DATA_VERSION \ + ((FW_CORE_DUMP_DATA_VERSION_MAJOR << 8) | FW_CORE_DUMP_DATA_VERSION_MINOR) + +/* Validity flag to indicate if the MCU registers in the buffer are valid */ +#define FW_MCU_STATUS_MASK 0x1 +#define FW_MCU_STATUS_VALID (1 << 0) + +/* Core dump entry fields */ +#define FW_CORE_DUMP_VERSION_INDEX 0 +#define FW_CORE_DUMP_START_ADDR_INDEX 1 + +/* MCU registers stored by a firmware core dump */ +struct fw_core_dump_mcu { + u32 r0; + u32 r1; + u32 r2; + u32 r3; + u32 r4; + u32 r5; + u32 r6; + u32 r7; + u32 r8; + u32 r9; + u32 r10; + u32 r11; + u32 r12; + u32 sp; + u32 lr; + u32 pc; +}; + +/* Any ELF definitions used in this file are from elf.h/elfcore.h except + * when specific 32-bit versions are required (mainly for the + * ELF_PRSTATUS32 note that is used to contain the MCU registers). + */ + +/* - 32-bit version of timeval structures used in ELF32 PRSTATUS note. */ +struct prstatus32_timeval { + int tv_sec; + int tv_usec; +}; + +/* - Structure defining ELF32 PRSTATUS note contents, as defined by the + * GNU binutils BFD library used by GDB, in bfd/hosts/x86-64linux.h. + * Note: GDB checks for the size of this structure to be 0x94. + * Modified pr_reg (array containing the Arm 32-bit MCU registers) to + * use u32[18] instead of elf_gregset32_t to prevent introducing new typedefs. + */ +struct elf_prstatus32 { + struct elf_siginfo pr_info; /* Info associated with signal. */ + short int pr_cursig; /* Current signal. */ + unsigned int pr_sigpend; /* Set of pending signals. */ + unsigned int pr_sighold; /* Set of held signals. */ + pid_t pr_pid; + pid_t pr_ppid; + pid_t pr_pgrp; + pid_t pr_sid; + struct prstatus32_timeval pr_utime; /* User time. */ + struct prstatus32_timeval pr_stime; /* System time. */ + struct prstatus32_timeval pr_cutime; /* Cumulative user time. */ + struct prstatus32_timeval pr_cstime; /* Cumulative system time. */ + u32 pr_reg[18]; /* GP registers. */ + int pr_fpvalid; /* True if math copro being used. */ +}; + +/** + * struct fw_core_dump_data - Context for seq_file operations used on 'fw_core_dump' + * debugfs file. + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +struct fw_core_dump_data { + struct kbase_device *kbdev; +}; + +/* + * struct fw_core_dump_seq_off - Iterator for seq_file operations used on 'fw_core_dump' + * debugfs file. + * @interface: current firmware memory interface + * @page_num: current page number (0..) within @interface + */ +struct fw_core_dump_seq_off { + struct kbase_csf_firmware_interface *interface; + u32 page_num; +}; + +/** + * fw_get_core_dump_mcu - Get the MCU registers saved by a firmware core dump + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @regs: Pointer to a core dump mcu struct where the MCU registers are copied + * to. Should be allocated by the called. + * + * Return: 0 if successfully copied the MCU registers, negative error code otherwise. + */ +static int fw_get_core_dump_mcu(struct kbase_device *kbdev, struct fw_core_dump_mcu *regs) +{ + unsigned int i; + u32 status = 0; + u32 data_addr = kbdev->csf.fw_core_dump.mcu_regs_addr; + u32 *data = (u32 *)regs; + + /* Check if the core dump entry exposed the buffer */ + if (!regs || !kbdev->csf.fw_core_dump.available) + return -EPERM; + + /* Check if the data in the buffer is valid, if not, return error */ + kbase_csf_read_firmware_memory(kbdev, data_addr, &status); + if ((status & FW_MCU_STATUS_MASK) != FW_MCU_STATUS_VALID) + return -EPERM; + + /* According to image header documentation, the MCU registers core dump + * buffer is 32-bit aligned. + */ + for (i = 1; i <= sizeof(struct fw_core_dump_mcu) / sizeof(u32); ++i) + kbase_csf_read_firmware_memory(kbdev, data_addr + i * sizeof(u32), &data[i - 1]); + + return 0; +} + +/** + * fw_core_dump_fill_elf_header - Initializes an ELF32 header + * @hdr: ELF32 header to initialize + * @sections: Number of entries in the ELF program header table + * + * Initializes an ELF32 header for an ARM 32-bit little-endian + * 'Core file' object file. + */ +static void fw_core_dump_fill_elf_header(struct elf32_hdr *hdr, unsigned int sections) +{ + /* Reset all members in header. */ + memset(hdr, 0, sizeof(*hdr)); + + /* Magic number identifying file as an ELF object. */ + memcpy(hdr->e_ident, ELFMAG, SELFMAG); + + /* Identify file as 32-bit, little-endian, using current + * ELF header version, with no OS or ABI specific ELF + * extensions used. + */ + hdr->e_ident[EI_CLASS] = ELFCLASS32; + hdr->e_ident[EI_DATA] = ELFDATA2LSB; + hdr->e_ident[EI_VERSION] = EV_CURRENT; + hdr->e_ident[EI_OSABI] = ELFOSABI_NONE; + + /* 'Core file' type of object file. */ + hdr->e_type = ET_CORE; + + /* ARM 32-bit architecture (AARCH32) */ + hdr->e_machine = EM_ARM; + + /* Object file version: the original format. */ + hdr->e_version = EV_CURRENT; + + /* Offset of program header table in file. */ + hdr->e_phoff = sizeof(struct elf32_hdr); + + /* No processor specific flags. */ + hdr->e_flags = 0; + + /* Size of the ELF header in bytes. */ + hdr->e_ehsize = sizeof(struct elf32_hdr); + + /* Size of the ELF program header entry in bytes. */ + hdr->e_phentsize = sizeof(struct elf32_phdr); + + /* Number of entries in the program header table. */ + hdr->e_phnum = sections; +} + +/** + * fw_core_dump_fill_elf_program_header_note - Initializes an ELF32 program header + * for holding auxiliary information + * @phdr: ELF32 program header + * @file_offset: Location of the note in the file in bytes + * @size: Size of the note in bytes. + * + * Initializes an ELF32 program header describing auxiliary information (containing + * one or more notes) of @size bytes alltogether located in the file at offset + * @file_offset. + */ +static void fw_core_dump_fill_elf_program_header_note(struct elf32_phdr *phdr, u32 file_offset, + u32 size) +{ + /* Auxiliary information (note) in program header. */ + phdr->p_type = PT_NOTE; + + /* Location of first note in file in bytes. */ + phdr->p_offset = file_offset; + + /* Size of all notes combined in bytes. */ + phdr->p_filesz = size; + + /* Other members not relevant for a note. */ + phdr->p_vaddr = 0; + phdr->p_paddr = 0; + phdr->p_memsz = 0; + phdr->p_align = 0; + phdr->p_flags = 0; +} + +/** + * fw_core_dump_fill_elf_program_header - Initializes an ELF32 program header for a loadable segment + * @phdr: ELF32 program header to initialize. + * @file_offset: Location of loadable segment in file in bytes + * (aligned to FW_PAGE_SIZE bytes) + * @vaddr: 32-bit virtual address where to write the segment + * (aligned to FW_PAGE_SIZE bytes) + * @size: Size of the segment in bytes. + * @flags: CSF_FIRMWARE_ENTRY_* flags describing access permissions. + * + * Initializes an ELF32 program header describing a loadable segment of + * @size bytes located in the file at offset @file_offset to be loaded + * at virtual address @vaddr with access permissions as described by + * CSF_FIRMWARE_ENTRY_* flags in @flags. + */ +static void fw_core_dump_fill_elf_program_header(struct elf32_phdr *phdr, u32 file_offset, + u32 vaddr, u32 size, u32 flags) +{ + /* Loadable segment in program header. */ + phdr->p_type = PT_LOAD; + + /* Location of segment in file in bytes. Aligned to p_align bytes. */ + phdr->p_offset = file_offset; + + /* Virtual address of segment. Aligned to p_align bytes. */ + phdr->p_vaddr = vaddr; + + /* Physical address of segment. Not relevant. */ + phdr->p_paddr = 0; + + /* Size of segment in file and memory. */ + phdr->p_filesz = size; + phdr->p_memsz = size; + + /* Alignment of segment in the file and memory in bytes (integral power of 2). */ + phdr->p_align = FW_PAGE_SIZE; + + /* Set segment access permissions. */ + phdr->p_flags = 0; + if (flags & CSF_FIRMWARE_ENTRY_READ) + phdr->p_flags |= PF_R; + if (flags & CSF_FIRMWARE_ENTRY_WRITE) + phdr->p_flags |= PF_W; + if (flags & CSF_FIRMWARE_ENTRY_EXECUTE) + phdr->p_flags |= PF_X; +} + +/** + * fw_core_dump_get_prstatus_note_size - Calculates size of a ELF32 PRSTATUS note + * @name: Name given to the PRSTATUS note. + * + * Calculates the size of a 32-bit PRSTATUS note (which contains information + * about a process like the current MCU registers) taking into account + * @name must be padded to a 4-byte multiple. + * + * Return: size of 32-bit PRSTATUS note in bytes. + */ +static unsigned int fw_core_dump_get_prstatus_note_size(char *name) +{ + return sizeof(struct elf32_note) + roundup(strlen(name) + 1, 4) + + sizeof(struct elf_prstatus32); +} + +/** + * fw_core_dump_fill_elf_prstatus - Initializes an ELF32 PRSTATUS structure + * @prs: ELF32 PRSTATUS note to initialize + * @regs: MCU registers to copy into the PRSTATUS note + * + * Initializes an ELF32 PRSTATUS structure with MCU registers @regs. + * Other process information is N/A for CSF Firmware. + */ +static void fw_core_dump_fill_elf_prstatus(struct elf_prstatus32 *prs, + struct fw_core_dump_mcu *regs) +{ + /* Only fill in registers (32-bit) of PRSTATUS note. */ + memset(prs, 0, sizeof(*prs)); + prs->pr_reg[0] = regs->r0; + prs->pr_reg[1] = regs->r1; + prs->pr_reg[2] = regs->r2; + prs->pr_reg[3] = regs->r3; + prs->pr_reg[4] = regs->r4; + prs->pr_reg[5] = regs->r5; + prs->pr_reg[6] = regs->r0; + prs->pr_reg[7] = regs->r7; + prs->pr_reg[8] = regs->r8; + prs->pr_reg[9] = regs->r9; + prs->pr_reg[10] = regs->r10; + prs->pr_reg[11] = regs->r11; + prs->pr_reg[12] = regs->r12; + prs->pr_reg[13] = regs->sp; + prs->pr_reg[14] = regs->lr; + prs->pr_reg[15] = regs->pc; +} + +/** + * fw_core_dump_create_prstatus_note - Creates an ELF32 PRSTATUS note + * @name: Name for the PRSTATUS note + * @prs: ELF32 PRSTATUS structure to put in the PRSTATUS note + * @created_prstatus_note: + * Pointer to the allocated ELF32 PRSTATUS note + * + * Creates an ELF32 note with one PRSTATUS entry containing the + * ELF32 PRSTATUS structure @prs. Caller needs to free the created note in + * @created_prstatus_note. + * + * Return: 0 on failure, otherwise size of ELF32 PRSTATUS note in bytes. + */ +static unsigned int fw_core_dump_create_prstatus_note(char *name, struct elf_prstatus32 *prs, + struct elf32_note **created_prstatus_note) +{ + struct elf32_note *note; + unsigned int note_name_sz; + unsigned int note_sz; + + /* Allocate memory for ELF32 note containing a PRSTATUS note. */ + note_name_sz = strlen(name) + 1; + note_sz = sizeof(struct elf32_note) + roundup(note_name_sz, 4) + + sizeof(struct elf_prstatus32); + note = kmalloc(note_sz, GFP_KERNEL); + if (!note) + return 0; + + /* Fill in ELF32 note with one entry for a PRSTATUS note. */ + note->n_namesz = note_name_sz; + note->n_descsz = sizeof(struct elf_prstatus32); + note->n_type = NT_PRSTATUS; + memcpy(note + 1, name, note_name_sz); + memcpy((char *)(note + 1) + roundup(note_name_sz, 4), prs, sizeof(*prs)); + + /* Return pointer and size of the created ELF32 note. */ + *created_prstatus_note = note; + return note_sz; +} + +/** + * fw_core_dump_write_elf_header - Writes ELF header for the FW core dump + * @m: the seq_file handle + * + * Writes the ELF header of the core dump including program headers for + * memory sections and a note containing the current MCU register + * values. + * + * Excludes memory sections without read access permissions or + * are for protected memory. + * + * The data written is as follows: + * - ELF header + * - ELF PHDRs for memory sections + * - ELF PHDR for program header NOTE + * - ELF PRSTATUS note + * - 0-bytes padding to multiple of ELF_EXEC_PAGESIZE + * + * The actual memory section dumps should follow this (not written + * by this function). + * + * Retrieves the necessary information via the struct + * fw_core_dump_data stored in the private member of the seq_file + * handle. + * + * Return: + * * 0 - success + * * -ENOMEM - not enough memory for allocating ELF32 note + */ +static int fw_core_dump_write_elf_header(struct seq_file *m) +{ + struct elf32_hdr hdr; + struct elf32_phdr phdr; + struct fw_core_dump_data *dump_data = m->private; + struct kbase_device *const kbdev = dump_data->kbdev; + struct kbase_csf_firmware_interface *interface; + struct elf_prstatus32 elf_prs; + struct elf32_note *elf_prstatus_note; + unsigned int sections = 0; + unsigned int elf_prstatus_note_size; + u32 elf_prstatus_offset; + u32 elf_phdr_note_offset; + u32 elf_memory_sections_data_offset; + u32 total_pages = 0; + u32 padding_size, *padding; + struct fw_core_dump_mcu regs = { 0 }; + + /* Count number of memory sections. */ + list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { + /* Skip memory sections that cannot be read or are protected. */ + if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) || + (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0) + continue; + sections++; + } + + /* Prepare ELF header. */ + fw_core_dump_fill_elf_header(&hdr, sections + 1); + seq_write(m, &hdr, sizeof(struct elf32_hdr)); + + elf_prstatus_note_size = fw_core_dump_get_prstatus_note_size("CORE"); + /* PHDRs of PT_LOAD type. */ + elf_phdr_note_offset = sizeof(struct elf32_hdr) + sections * sizeof(struct elf32_phdr); + /* PHDR of PT_NOTE type. */ + elf_prstatus_offset = elf_phdr_note_offset + sizeof(struct elf32_phdr); + elf_memory_sections_data_offset = elf_prstatus_offset + elf_prstatus_note_size; + + /* Calculate padding size to page offset. */ + padding_size = roundup(elf_memory_sections_data_offset, ELF_EXEC_PAGESIZE) - + elf_memory_sections_data_offset; + elf_memory_sections_data_offset += padding_size; + + /* Prepare ELF program header table. */ + list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { + /* Skip memory sections that cannot be read or are protected. */ + if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) || + (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0) + continue; + + fw_core_dump_fill_elf_program_header(&phdr, elf_memory_sections_data_offset, + interface->virtual, + interface->num_pages * FW_PAGE_SIZE, + interface->flags); + + seq_write(m, &phdr, sizeof(struct elf32_phdr)); + + elf_memory_sections_data_offset += interface->num_pages * FW_PAGE_SIZE; + total_pages += interface->num_pages; + } + + /* Prepare PHDR of PT_NOTE type. */ + fw_core_dump_fill_elf_program_header_note(&phdr, elf_prstatus_offset, + elf_prstatus_note_size); + seq_write(m, &phdr, sizeof(struct elf32_phdr)); + + /* Prepare ELF note of PRSTATUS type. */ + if (fw_get_core_dump_mcu(kbdev, ®s)) + dev_dbg(kbdev->dev, "MCU Registers not available, all registers set to zero"); + /* Even if MCU Registers are not available the ELF prstatus is still + * filled with the registers equal to zero. + */ + fw_core_dump_fill_elf_prstatus(&elf_prs, ®s); + elf_prstatus_note_size = + fw_core_dump_create_prstatus_note("CORE", &elf_prs, &elf_prstatus_note); + if (elf_prstatus_note_size == 0) + return -ENOMEM; + + seq_write(m, elf_prstatus_note, elf_prstatus_note_size); + kfree(elf_prstatus_note); + + /* Pad file to page size. */ + padding = kzalloc(padding_size, GFP_KERNEL); + seq_write(m, padding, padding_size); + kfree(padding); + + return 0; +} + +/** + * fw_core_dump_create - Requests firmware to save state for a firmware core dump + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Return: 0 on success, error code otherwise. + */ +static int fw_core_dump_create(struct kbase_device *kbdev) +{ + int err; + + /* Ensure MCU is active before requesting the core dump. */ + kbase_csf_scheduler_pm_active(kbdev); + err = kbase_csf_scheduler_wait_mcu_active(kbdev); + if (!err) + err = kbase_csf_firmware_req_core_dump(kbdev); + + kbase_csf_scheduler_pm_idle(kbdev); + + return err; +} + +/** + * fw_core_dump_seq_start - seq_file start operation for firmware core dump file + * @m: the seq_file handle + * @_pos: holds the current position in pages + * (0 or most recent position used in previous session) + * + * Starts a seq_file session, positioning the iterator for the session to page @_pos - 1 + * within the firmware interface memory sections. @_pos value 0 is used to indicate the + * position of the ELF header at the start of the file. + * + * Retrieves the necessary information via the struct fw_core_dump_data stored in + * the private member of the seq_file handle. + * + * Return: + * * iterator pointer - pointer to iterator struct fw_core_dump_seq_off + * * SEQ_START_TOKEN - special iterator pointer indicating its is the start of the file + * * NULL - iterator could not be allocated + */ +static void *fw_core_dump_seq_start(struct seq_file *m, loff_t *_pos) +{ + struct fw_core_dump_data *dump_data = m->private; + struct fw_core_dump_seq_off *data; + struct kbase_csf_firmware_interface *interface; + loff_t pos = *_pos; + + if (pos == 0) + return SEQ_START_TOKEN; + + /* Move iterator in the right position based on page number within + * available pages of firmware interface memory sections. + */ + pos--; /* ignore start token */ + list_for_each_entry(interface, &dump_data->kbdev->csf.firmware_interfaces, node) { + /* Skip memory sections that cannot be read or are protected. */ + if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) || + (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0) + continue; + + if (pos >= interface->num_pages) { + pos -= interface->num_pages; + } else { + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + data->interface = interface; + data->page_num = pos; + return data; + } + } + + return NULL; +} + +/** + * fw_core_dump_seq_stop - seq_file stop operation for firmware core dump file + * @m: the seq_file handle + * @v: the current iterator (pointer to struct fw_core_dump_seq_off) + * + * Closes the current session and frees any memory related. + */ +static void fw_core_dump_seq_stop(struct seq_file *m, void *v) +{ + kfree(v); +} + +/** + * fw_core_dump_seq_next - seq_file next operation for firmware core dump file + * @m: the seq_file handle + * @v: the current iterator (pointer to struct fw_core_dump_seq_off) + * @pos: holds the current position in pages + * (0 or most recent position used in previous session) + * + * Moves the iterator @v forward to the next page within the firmware interface + * memory sections and returns the updated position in @pos. + * @v value SEQ_START_TOKEN indicates the ELF header position. + * + * Return: + * * iterator pointer - pointer to iterator struct fw_core_dump_seq_off + * * NULL - iterator could not be allocated + */ +static void *fw_core_dump_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct fw_core_dump_data *dump_data = m->private; + struct fw_core_dump_seq_off *data = v; + struct kbase_csf_firmware_interface *interface; + struct list_head *interfaces = &dump_data->kbdev->csf.firmware_interfaces; + + /* Is current position at the ELF header ? */ + if (v == SEQ_START_TOKEN) { + if (list_empty(interfaces)) + return NULL; + + /* Prepare iterator for starting at first page in firmware interface + * memory sections. + */ + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + data->interface = + list_first_entry(interfaces, struct kbase_csf_firmware_interface, node); + data->page_num = 0; + ++*pos; + return data; + } + + /* First attempt to satisfy from current firmware interface memory section. */ + interface = data->interface; + if (data->page_num + 1 < interface->num_pages) { + data->page_num++; + ++*pos; + return data; + } + + /* Need next firmware interface memory section. This could be the last one. */ + if (list_is_last(&interface->node, interfaces)) { + kfree(data); + return NULL; + } + + /* Move to first page in next firmware interface memory section. */ + data->interface = list_next_entry(interface, node); + data->page_num = 0; + ++*pos; + + return data; +} + +/** + * fw_core_dump_seq_show - seq_file show operation for firmware core dump file + * @m: the seq_file handle + * @v: the current iterator (pointer to struct fw_core_dump_seq_off) + * + * Writes the current page in a firmware interface memory section indicated + * by the iterator @v to the file. If @v is SEQ_START_TOKEN the ELF + * header is written. + * + * Return: 0 on success, error code otherwise. + */ +static int fw_core_dump_seq_show(struct seq_file *m, void *v) +{ + struct fw_core_dump_seq_off *data = v; + struct page *page; + u32 *p; + + /* Either write the ELF header or current page. */ + if (v == SEQ_START_TOKEN) + return fw_core_dump_write_elf_header(m); + + /* Write the current page. */ + page = as_page(data->interface->phys[data->page_num]); + p = kmap_atomic(page); + seq_write(m, p, FW_PAGE_SIZE); + kunmap_atomic(p); + + return 0; +} + +/* Sequence file operations for firmware core dump file. */ +static const struct seq_operations fw_core_dump_seq_ops = { + .start = fw_core_dump_seq_start, + .next = fw_core_dump_seq_next, + .stop = fw_core_dump_seq_stop, + .show = fw_core_dump_seq_show, +}; + +/** + * fw_core_dump_debugfs_open - callback for opening the 'fw_core_dump' debugfs file + * @inode: inode of the file + * @file: file pointer + * + * Prepares for servicing a write request to request a core dump from firmware and + * a read request to retrieve the core dump. + * + * Returns an error if the firmware is not initialized yet. + * + * Return: 0 on success, error code otherwise. + */ +static int fw_core_dump_debugfs_open(struct inode *inode, struct file *file) +{ + struct kbase_device *const kbdev = inode->i_private; + struct fw_core_dump_data *dump_data; + int ret; + + /* Fail if firmware is not initialized yet. */ + if (!kbdev->csf.firmware_inited) { + ret = -ENODEV; + goto open_fail; + } + + /* Open a sequence file for iterating through the pages in the + * firmware interface memory pages. seq_open stores a + * struct seq_file * in the private_data field of @file. + */ + ret = seq_open(file, &fw_core_dump_seq_ops); + if (ret) + goto open_fail; + + /* Allocate a context for sequence file operations. */ + dump_data = kmalloc(sizeof(*dump_data), GFP_KERNEL); + if (!dump_data) { + ret = -ENOMEM; + goto out; + } + + /* Kbase device will be shared with sequence file operations. */ + dump_data->kbdev = kbdev; + + /* Link our sequence file context. */ + ((struct seq_file *)file->private_data)->private = dump_data; + + return 0; +out: + seq_release(inode, file); +open_fail: + return ret; +} + +/** + * fw_core_dump_debugfs_write - callback for a write to the 'fw_core_dump' debugfs file + * @file: file pointer + * @ubuf: user buffer containing data to store + * @count: number of bytes in user buffer + * @ppos: file position + * + * Any data written to the file triggers a firmware core dump request which + * subsequently can be retrieved by reading from the file. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t fw_core_dump_debugfs_write(struct file *file, const char __user *ubuf, size_t count, + loff_t *ppos) +{ + int err; + struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private; + struct kbase_device *const kbdev = dump_data->kbdev; + + CSTD_UNUSED(ppos); + + err = fw_core_dump_create(kbdev); + + return err ? err : count; +} + +/** + * fw_core_dump_debugfs_release - callback for releasing the 'fw_core_dump' debugfs file + * @inode: inode of the file + * @file: file pointer + * + * Return: 0 on success, error code otherwise. + */ +static int fw_core_dump_debugfs_release(struct inode *inode, struct file *file) +{ + struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private; + + seq_release(inode, file); + + kfree(dump_data); + + return 0; +} +/* Debugfs file operations for firmware core dump file. */ +static const struct file_operations kbase_csf_fw_core_dump_fops = { + .owner = THIS_MODULE, + .open = fw_core_dump_debugfs_open, + .read = seq_read, + .write = fw_core_dump_debugfs_write, + .llseek = seq_lseek, + .release = fw_core_dump_debugfs_release, +}; + +void kbase_csf_firmware_core_dump_init(struct kbase_device *const kbdev) +{ +#if IS_ENABLED(CONFIG_DEBUG_FS) + debugfs_create_file("fw_core_dump", 0600, kbdev->mali_debugfs_directory, kbdev, + &kbase_csf_fw_core_dump_fops); +#endif /* CONFIG_DEBUG_FS */ +} + +int kbase_csf_firmware_core_dump_entry_parse(struct kbase_device *kbdev, const u32 *entry) +{ + /* Casting to u16 as version is defined by bits 15:0 */ + kbdev->csf.fw_core_dump.version = (u16)entry[FW_CORE_DUMP_VERSION_INDEX]; + + if (kbdev->csf.fw_core_dump.version != FW_CORE_DUMP_DATA_VERSION) + return -EPERM; + + kbdev->csf.fw_core_dump.mcu_regs_addr = entry[FW_CORE_DUMP_START_ADDR_INDEX]; + kbdev->csf.fw_core_dump.available = true; + + return 0; +} diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.h new file mode 100644 index 000000000000..0537dca4f37f --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_FIRMWARE_CORE_DUMP_H_ +#define _KBASE_CSF_FIRMWARE_CORE_DUMP_H_ + +struct kbase_device; + +/** Offset of the last field of core dump entry from the image header */ +#define CORE_DUMP_ENTRY_START_ADDR_OFFSET (0x4) + +/** + * kbase_csf_firmware_core_dump_entry_parse() - Parse a "core dump" entry from + * the image header. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @entry: Pointer to section. + * + * Read a "core dump" entry from the image header, check the version for + * compatibility and store the address pointer. + * + * Return: 0 if successfully parse entry, negative error code otherwise. + */ +int kbase_csf_firmware_core_dump_entry_parse(struct kbase_device *kbdev, const u32 *entry); + +/** + * kbase_csf_firmware_core_dump_init() - Initialize firmware core dump support + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * Must be zero-initialized. + * + * Creates the fw_core_dump debugfs file through which to request a firmware + * core dump. The created debugfs file is cleaned up as part of kbdev debugfs + * cleanup. + * + * The fw_core_dump debugs file that case be used in the following way: + * + * To explicitly request core dump: + * echo 1 >/sys/kernel/debug/mali0/fw_core_dump + * + * To output current core dump (after explicitly requesting a core dump, or + * kernel driver reported an internal firmware error): + * cat /sys/kernel/debug/mali0/fw_core_dump + */ +void kbase_csf_firmware_core_dump_init(struct kbase_device *const kbdev); + +#endif /* _KBASE_CSF_FIRMWARE_CORE_DUMP_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c index 20d8c0d4fdb1..6e0d3c2f5071 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c @@ -85,7 +85,7 @@ static int kbase_csf_firmware_log_enable_mask_write(void *data, u64 val) dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64", enable_bits_count); enable_bits_count = 64; } - new_mask = val & ((1 << enable_bits_count) - 1); + new_mask = val & (UINT64_MAX >> (64 - enable_bits_count)); if (new_mask != kbase_csf_firmware_trace_buffer_get_active_mask64(tb)) return kbase_csf_firmware_trace_buffer_set_active_mask64(tb, new_mask); @@ -353,7 +353,7 @@ static void toggle_logging_calls_in_loaded_image(struct kbase_device *kbdev, boo diff = callee_address - calling_address - 4; sign = !!(diff & 0x80000000); - if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff && + if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff || ARMV7_T1_BL_IMM_RANGE_MAX < (int32_t)diff) { dev_warn(kbdev->dev, "FW log patch 0x%x out of range, skipping", calling_address); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h index 8d7a2210a457..1008320464a9 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h @@ -24,6 +24,9 @@ #include +/** Offset of the last field of functions call list entry from the image header */ +#define FUNC_CALL_LIST_ENTRY_NAME_OFFSET (0x8) + /* * Firmware log dumping buffer size. */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c index f414d8894306..ab25ed4429e3 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c @@ -32,7 +32,8 @@ #include "mali_kbase_csf_scheduler.h" #include "mmu/mali_kbase_mmu.h" #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" -#include +#include +#include #include #include @@ -104,7 +105,6 @@ struct dummy_firmware_interface { (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK) - static inline u32 input_page_read(const u32 *const input, const u32 offset) { WARN_ON(offset % sizeof(u32)); @@ -716,6 +716,71 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev) kbdev->csf.gpu_idle_dur_count); } +static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + bool complete = false; + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) == + (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask)) + complete = true; + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + return complete; +} + +static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface, + u32 const req_mask) +{ + u32 glb_debug_req; + + kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); + + glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); + glb_debug_req ^= req_mask; + + kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask); +} + +static void request_fw_core_dump( + const struct kbase_csf_global_iface *const global_iface) +{ + uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP); + + set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode); + + set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); +} + +int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev) +{ + const struct kbase_csf_global_iface *const global_iface = + &kbdev->csf.global_iface; + unsigned long flags; + int ret; + + /* Serialize CORE_DUMP requests. */ + mutex_lock(&kbdev->csf.reg_lock); + + /* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */ + kbase_csf_scheduler_spin_lock(kbdev, &flags); + request_fw_core_dump(global_iface); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + /* Wait for firmware to acknowledge completion of the CORE_DUMP request. */ + ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); + if (!ret) + WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK)); + + mutex_unlock(&kbdev->csf.reg_lock); + + return ret; +} static void global_init(struct kbase_device *const kbdev, u64 core_mask) { @@ -724,8 +789,7 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask) GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK | - GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK | - 0; + GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK | GLB_REQ_DEBUG_CSF_REQ_MASK; const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -917,7 +981,7 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) u32 dur; kbase_csf_scheduler_spin_lock(kbdev, &flags); - dur = kbdev->csf.gpu_idle_hysteresis_ms; + dur = kbdev->csf.gpu_idle_hysteresis_us; kbase_csf_scheduler_spin_unlock(kbdev, flags); return dur; @@ -934,7 +998,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, mutex_lock(&kbdev->fw_load_lock); if (unlikely(!kbdev->csf.firmware_inited)) { kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_ms = dur; + kbdev->csf.gpu_idle_hysteresis_us = dur; kbdev->csf.gpu_idle_dur_count = hysteresis_val; kbase_csf_scheduler_spin_unlock(kbdev, flags); mutex_unlock(&kbdev->fw_load_lock); @@ -965,7 +1029,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_ms = dur; + kbdev->csf.gpu_idle_hysteresis_us = dur; kbdev->csf.gpu_idle_dur_count = hysteresis_val; kbase_csf_firmware_enable_gpu_idle_timer(kbdev); kbase_csf_scheduler_spin_unlock(kbdev, flags); @@ -1076,14 +1140,14 @@ void kbase_csf_firmware_early_term(struct kbase_device *kbdev) int kbase_csf_firmware_late_init(struct kbase_device *kbdev) { - kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS; + kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC; #ifdef KBASE_PM_RUNTIME if (kbase_pm_gpu_sleep_allowed(kbdev)) - kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; + kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; #endif - WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms); + WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us); kbdev->csf.gpu_idle_dur_count = - convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms); + convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us); return 0; } @@ -1166,8 +1230,6 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) /* NO_MALI: Don't stop firmware or unload MMU tables */ - kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu); - kbase_csf_scheduler_term(kbdev); kbase_csf_free_dummy_user_reg_page(kbdev); @@ -1197,6 +1259,8 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) * entry parsed from the firmware image. */ kbase_mcu_shared_interface_region_tracker_term(kbdev); + + kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu); } void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev) @@ -1533,7 +1597,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn, &phys[0], num_pages, gpu_map_properties, - KBASE_MEM_GROUP_CSF_FW, NULL); + KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false); if (ret) goto mmu_insert_pages_error; @@ -1594,4 +1658,3 @@ void kbase_csf_firmware_mcu_shared_mapping_term( vunmap(csf_mapping->cpu_addr); kfree(csf_mapping->phys); } - diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c index 1876d505dd5b..42d19e1b6ad7 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c @@ -23,10 +23,7 @@ #include "mali_kbase_csf_heap_context_alloc.h" /* Size of one heap context structure, in bytes. */ -#define HEAP_CTX_SIZE ((size_t)32) - -/* Total size of the GPU memory region allocated for heap contexts, in bytes. */ -#define HEAP_CTX_REGION_SIZE (MAX_TILER_HEAPS * HEAP_CTX_SIZE) +#define HEAP_CTX_SIZE ((u32)32) /** * sub_alloc - Sub-allocate a heap context from a GPU memory region @@ -38,8 +35,8 @@ static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc) { struct kbase_context *const kctx = ctx_alloc->kctx; - int heap_nr = 0; - size_t ctx_offset = 0; + unsigned long heap_nr = 0; + u32 ctx_offset = 0; u64 heap_gpu_va = 0; struct kbase_vmap_struct mapping; void *ctx_ptr = NULL; @@ -55,29 +52,64 @@ static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc) return 0; } - ctx_offset = heap_nr * HEAP_CTX_SIZE; + ctx_offset = heap_nr * ctx_alloc->heap_context_size_aligned; heap_gpu_va = ctx_alloc->gpu_va + ctx_offset; ctx_ptr = kbase_vmap_prot(kctx, heap_gpu_va, - HEAP_CTX_SIZE, KBASE_REG_CPU_WR, &mapping); + ctx_alloc->heap_context_size_aligned, KBASE_REG_CPU_WR, &mapping); if (unlikely(!ctx_ptr)) { dev_err(kctx->kbdev->dev, - "Failed to map tiler heap context %d (0x%llX)\n", + "Failed to map tiler heap context %lu (0x%llX)\n", heap_nr, heap_gpu_va); return 0; } - memset(ctx_ptr, 0, HEAP_CTX_SIZE); + memset(ctx_ptr, 0, ctx_alloc->heap_context_size_aligned); kbase_vunmap(ctx_ptr, &mapping); bitmap_set(ctx_alloc->in_use, heap_nr, 1); - dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %d (0x%llX)\n", + dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %lu (0x%llX)\n", heap_nr, heap_gpu_va); return heap_gpu_va; } +/** + * evict_heap_context - Evict the data of heap context from GPU's L2 cache. + * + * @ctx_alloc: Pointer to the heap context allocator. + * @heap_gpu_va: The GPU virtual address of a heap context structure to free. + * + * This function is called when memory for the heap context is freed. It uses the + * FLUSH_PA_RANGE command to evict the data of heap context, so on older CSF GPUs + * there is nothing done. The whole GPU cache is anyways expected to be flushed + * on older GPUs when initial chunks of the heap are freed just before the memory + * for heap context is freed. + */ +static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ctx_alloc, + u64 const heap_gpu_va) +{ + struct kbase_context *const kctx = ctx_alloc->kctx; + u32 offset_in_bytes = (u32)(heap_gpu_va - ctx_alloc->gpu_va); + u32 offset_within_page = offset_in_bytes & ~PAGE_MASK; + u32 page_index = offset_in_bytes >> PAGE_SHIFT; + struct tagged_addr page = + kbase_get_gpu_phy_pages(ctx_alloc->region)[page_index]; + phys_addr_t heap_context_pa = as_phys_addr_t(page) + offset_within_page; + + lockdep_assert_held(&ctx_alloc->lock); + + /* There is no need to take vm_lock here as the ctx_alloc region is no_user_free + * refcounted. The region and the backing page can't disappear whilst this + * function is executing. + * Flush type is passed as FLUSH_PT to CLN+INV L2 only. + */ + kbase_mmu_flush_pa_range(kctx->kbdev, kctx, + heap_context_pa, ctx_alloc->heap_context_size_aligned, + KBASE_MMU_OP_FLUSH_PT); +} + /** * sub_free - Free a heap context sub-allocated from a GPU memory region * @@ -88,7 +120,7 @@ static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc, u64 const heap_gpu_va) { struct kbase_context *const kctx = ctx_alloc->kctx; - u64 ctx_offset = 0; + u32 ctx_offset = 0; unsigned int heap_nr = 0; lockdep_assert_held(&ctx_alloc->lock); @@ -99,13 +131,15 @@ static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc, if (WARN_ON(heap_gpu_va < ctx_alloc->gpu_va)) return; - ctx_offset = heap_gpu_va - ctx_alloc->gpu_va; + ctx_offset = (u32)(heap_gpu_va - ctx_alloc->gpu_va); - if (WARN_ON(ctx_offset >= HEAP_CTX_REGION_SIZE) || - WARN_ON(ctx_offset % HEAP_CTX_SIZE)) + if (WARN_ON(ctx_offset >= (ctx_alloc->region->nr_pages << PAGE_SHIFT)) || + WARN_ON(ctx_offset % ctx_alloc->heap_context_size_aligned)) return; - heap_nr = ctx_offset / HEAP_CTX_SIZE; + evict_heap_context(ctx_alloc, heap_gpu_va); + + heap_nr = ctx_offset / ctx_alloc->heap_context_size_aligned; dev_dbg(kctx->kbdev->dev, "Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va); @@ -116,12 +150,17 @@ int kbase_csf_heap_context_allocator_init( struct kbase_csf_heap_context_allocator *const ctx_alloc, struct kbase_context *const kctx) { + const u32 gpu_cache_line_size = + (1U << kctx->kbdev->gpu_props.props.l2_props.log2_line_size); + /* We cannot pre-allocate GPU memory here because the * custom VA zone may not have been created yet. */ ctx_alloc->kctx = kctx; ctx_alloc->region = NULL; ctx_alloc->gpu_va = 0; + ctx_alloc->heap_context_size_aligned = + (HEAP_CTX_SIZE + gpu_cache_line_size - 1) & ~(gpu_cache_line_size - 1); mutex_init(&ctx_alloc->lock); bitmap_zero(ctx_alloc->in_use, MAX_TILER_HEAPS); @@ -142,7 +181,14 @@ void kbase_csf_heap_context_allocator_term( if (ctx_alloc->region) { kbase_gpu_vm_lock(kctx); - ctx_alloc->region->flags &= ~KBASE_REG_NO_USER_FREE; + /* + * We can't enforce (nor check) the no_user_free refcount + * to be 0 here as other code regions can take such a reference. + * Anyway, this isn't an issue as the region will eventually + * be freed by the region tracker if its refcount didn't drop + * to 0. + */ + kbase_va_region_no_user_free_put(kctx, ctx_alloc->region); kbase_mem_free_region(kctx, ctx_alloc->region); kbase_gpu_vm_unlock(kctx); } @@ -156,7 +202,7 @@ u64 kbase_csf_heap_context_allocator_alloc( struct kbase_context *const kctx = ctx_alloc->kctx; u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE | BASE_MEM_PROT_CPU_RD; - u64 nr_pages = PFN_UP(HEAP_CTX_REGION_SIZE); + u64 nr_pages = PFN_UP(MAX_TILER_HEAPS * ctx_alloc->heap_context_size_aligned); u64 heap_gpu_va = 0; /* Calls to this function are inherently asynchronous, with respect to diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c index 0b3f1334a9e6..f1727224b243 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c @@ -80,7 +80,14 @@ static int kbase_kcpu_map_import_prepare( * on the physical pages tracking object. When the last * reference to the tracking object is dropped the pages * would be unpinned if they weren't unpinned before. + * + * Region should be CPU cached: abort if it isn't. */ + if (WARN_ON(!(reg->flags & KBASE_REG_CPU_CACHED))) { + ret = -EINVAL; + goto out; + } + ret = kbase_jd_user_buf_pin_pages(kctx, reg); if (ret) goto out; @@ -674,9 +681,8 @@ static int kbase_csf_queue_group_suspend_prepare( (kbase_reg_current_backed_size(reg) < nr_pages) || !(reg->flags & KBASE_REG_CPU_WR) || (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) || - (reg->flags & KBASE_REG_DONT_NEED) || - (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC) || - (reg->flags & KBASE_REG_NO_USER_FREE)) { + (kbase_is_region_shrinkable(reg)) || + (kbase_va_region_is_no_user_free(kctx, reg))) { ret = -EINVAL; goto out_clean_pages; } @@ -784,13 +790,14 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev, return -EINVAL; } - sig_set = evt[BASEP_EVENT_VAL_INDEX] > cqs_wait->objs[i].val; + sig_set = + evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)] > cqs_wait->objs[i].val; if (sig_set) { bool error = false; bitmap_set(cqs_wait->signaled, i, 1); if ((cqs_wait->inherit_err_flags & (1U << i)) && - evt[BASEP_EVENT_ERR_INDEX] > 0) { + evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)] > 0) { queue->has_error = true; error = true; } @@ -800,7 +807,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev, error); KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END( - kbdev, queue, evt[BASEP_EVENT_ERR_INDEX]); + kbdev, queue, evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)]); queue->command_started = false; } @@ -817,12 +824,34 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev, return bitmap_full(cqs_wait->signaled, cqs_wait->nr_objs); } +static inline bool kbase_kcpu_cqs_is_data_type_valid(u8 data_type) +{ + return data_type == BASEP_CQS_DATA_TYPE_U32 || data_type == BASEP_CQS_DATA_TYPE_U64; +} + +static inline bool kbase_kcpu_cqs_is_aligned(u64 addr, u8 data_type) +{ + BUILD_BUG_ON(BASEP_EVENT32_ALIGN_BYTES != BASEP_EVENT32_SIZE_BYTES); + BUILD_BUG_ON(BASEP_EVENT64_ALIGN_BYTES != BASEP_EVENT64_SIZE_BYTES); + WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(data_type)); + + switch (data_type) { + default: + return false; + case BASEP_CQS_DATA_TYPE_U32: + return (addr & (BASEP_EVENT32_ALIGN_BYTES - 1)) == 0; + case BASEP_CQS_DATA_TYPE_U64: + return (addr & (BASEP_EVENT64_ALIGN_BYTES - 1)) == 0; + } +} + static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue, struct base_kcpu_command_cqs_wait_info *cqs_wait_info, struct kbase_kcpu_command *current_command) { struct base_cqs_wait_info *objs; unsigned int nr_objs = cqs_wait_info->nr_objs; + unsigned int i; lockdep_assert_held(&queue->lock); @@ -842,6 +871,17 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue, return -ENOMEM; } + /* Check the CQS objects as early as possible. By checking their alignment + * (required alignment equals to size for Sync32 and Sync64 objects), we can + * prevent overrunning the supplied event page. + */ + for (i = 0; i < nr_objs; i++) { + if (!kbase_kcpu_cqs_is_aligned(objs[i].addr, BASEP_CQS_DATA_TYPE_U32)) { + kfree(objs); + return -EINVAL; + } + } + if (++queue->cqs_wait_count == 1) { if (kbase_csf_event_wait_add(queue->kctx, event_cqs_callback, queue)) { @@ -897,14 +937,13 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev, "Sync memory %llx already freed", cqs_set->objs[i].addr); queue->has_error = true; } else { - evt[BASEP_EVENT_ERR_INDEX] = queue->has_error; + evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)] = queue->has_error; /* Set to signaled */ - evt[BASEP_EVENT_VAL_INDEX]++; + evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)]++; kbase_phy_alloc_mapping_put(queue->kctx, mapping); - KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_SET, - queue, cqs_set->objs[i].addr, - evt[BASEP_EVENT_ERR_INDEX]); + KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_SET, queue, cqs_set->objs[i].addr, + evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)]); } } @@ -921,6 +960,7 @@ static int kbase_kcpu_cqs_set_prepare( { struct base_cqs_set *objs; unsigned int nr_objs = cqs_set_info->nr_objs; + unsigned int i; lockdep_assert_held(&kcpu_queue->lock); @@ -940,6 +980,17 @@ static int kbase_kcpu_cqs_set_prepare( return -ENOMEM; } + /* Check the CQS objects as early as possible. By checking their alignment + * (required alignment equals to size for Sync32 and Sync64 objects), we can + * prevent overrunning the supplied event page. + */ + for (i = 0; i < nr_objs; i++) { + if (!kbase_kcpu_cqs_is_aligned(objs[i].addr, BASEP_CQS_DATA_TYPE_U32)) { + kfree(objs); + return -EINVAL; + } + } + current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET; current_command->info.cqs_set.nr_objs = nr_objs; current_command->info.cqs_set.objs = objs; @@ -982,8 +1033,9 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, if (!test_bit(i, cqs_wait_operation->signaled)) { struct kbase_vmap_struct *mapping; bool sig_set; - u64 *evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, - cqs_wait_operation->objs[i].addr, &mapping); + uintptr_t evt = (uintptr_t)kbase_phy_alloc_mapping_get( + queue->kctx, cqs_wait_operation->objs[i].addr, &mapping); + u64 val = 0; /* GPUCORE-28172 RDT to review */ if (!queue->command_started) @@ -996,12 +1048,29 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, return -EINVAL; } + switch (cqs_wait_operation->objs[i].data_type) { + default: + WARN_ON(!kbase_kcpu_cqs_is_data_type_valid( + cqs_wait_operation->objs[i].data_type)); + kbase_phy_alloc_mapping_put(queue->kctx, mapping); + queue->has_error = true; + return -EINVAL; + case BASEP_CQS_DATA_TYPE_U32: + val = *(u32 *)evt; + evt += BASEP_EVENT32_ERR_OFFSET - BASEP_EVENT32_VAL_OFFSET; + break; + case BASEP_CQS_DATA_TYPE_U64: + val = *(u64 *)evt; + evt += BASEP_EVENT64_ERR_OFFSET - BASEP_EVENT64_VAL_OFFSET; + break; + } + switch (cqs_wait_operation->objs[i].operation) { case BASEP_CQS_WAIT_OPERATION_LE: - sig_set = *evt <= cqs_wait_operation->objs[i].val; + sig_set = val <= cqs_wait_operation->objs[i].val; break; case BASEP_CQS_WAIT_OPERATION_GT: - sig_set = *evt > cqs_wait_operation->objs[i].val; + sig_set = val > cqs_wait_operation->objs[i].val; break; default: dev_dbg(kbdev->dev, @@ -1013,24 +1082,10 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, return -EINVAL; } - /* Increment evt up to the error_state value depending on the CQS data type */ - switch (cqs_wait_operation->objs[i].data_type) { - default: - dev_dbg(kbdev->dev, "Unreachable data_type=%d", cqs_wait_operation->objs[i].data_type); - /* Fallthrough - hint to compiler that there's really only 2 options at present */ - fallthrough; - case BASEP_CQS_DATA_TYPE_U32: - evt = (u64 *)((u8 *)evt + sizeof(u32)); - break; - case BASEP_CQS_DATA_TYPE_U64: - evt = (u64 *)((u8 *)evt + sizeof(u64)); - break; - } - if (sig_set) { bitmap_set(cqs_wait_operation->signaled, i, 1); if ((cqs_wait_operation->inherit_err_flags & (1U << i)) && - *evt > 0) { + *(u32 *)evt > 0) { queue->has_error = true; } @@ -1058,6 +1113,7 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue { struct base_cqs_wait_operation_info *objs; unsigned int nr_objs = cqs_wait_operation_info->nr_objs; + unsigned int i; lockdep_assert_held(&queue->lock); @@ -1077,6 +1133,18 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue return -ENOMEM; } + /* Check the CQS objects as early as possible. By checking their alignment + * (required alignment equals to size for Sync32 and Sync64 objects), we can + * prevent overrunning the supplied event page. + */ + for (i = 0; i < nr_objs; i++) { + if (!kbase_kcpu_cqs_is_data_type_valid(objs[i].data_type) || + !kbase_kcpu_cqs_is_aligned(objs[i].addr, objs[i].data_type)) { + kfree(objs); + return -EINVAL; + } + } + if (++queue->cqs_wait_count == 1) { if (kbase_csf_event_wait_add(queue->kctx, event_cqs_callback, queue)) { @@ -1107,6 +1175,44 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue return 0; } +static void kbasep_kcpu_cqs_do_set_operation_32(struct kbase_kcpu_command_queue *queue, + uintptr_t evt, u8 operation, u64 val) +{ + struct kbase_device *kbdev = queue->kctx->kbdev; + + switch (operation) { + case BASEP_CQS_SET_OPERATION_ADD: + *(u32 *)evt += (u32)val; + break; + case BASEP_CQS_SET_OPERATION_SET: + *(u32 *)evt = val; + break; + default: + dev_dbg(kbdev->dev, "Unsupported CQS set operation %d", operation); + queue->has_error = true; + break; + } +} + +static void kbasep_kcpu_cqs_do_set_operation_64(struct kbase_kcpu_command_queue *queue, + uintptr_t evt, u8 operation, u64 val) +{ + struct kbase_device *kbdev = queue->kctx->kbdev; + + switch (operation) { + case BASEP_CQS_SET_OPERATION_ADD: + *(u64 *)evt += val; + break; + case BASEP_CQS_SET_OPERATION_SET: + *(u64 *)evt = val; + break; + default: + dev_dbg(kbdev->dev, "Unsupported CQS set operation %d", operation); + queue->has_error = true; + break; + } +} + static void kbase_kcpu_cqs_set_operation_process( struct kbase_device *kbdev, struct kbase_kcpu_command_queue *queue, @@ -1121,9 +1227,9 @@ static void kbase_kcpu_cqs_set_operation_process( for (i = 0; i < cqs_set_operation->nr_objs; i++) { struct kbase_vmap_struct *mapping; - u64 *evt; + uintptr_t evt; - evt = (u64 *)kbase_phy_alloc_mapping_get( + evt = (uintptr_t)kbase_phy_alloc_mapping_get( queue->kctx, cqs_set_operation->objs[i].addr, &mapping); /* GPUCORE-28172 RDT to review */ @@ -1133,39 +1239,31 @@ static void kbase_kcpu_cqs_set_operation_process( "Sync memory %llx already freed", cqs_set_operation->objs[i].addr); queue->has_error = true; } else { - switch (cqs_set_operation->objs[i].operation) { - case BASEP_CQS_SET_OPERATION_ADD: - *evt += cqs_set_operation->objs[i].val; - break; - case BASEP_CQS_SET_OPERATION_SET: - *evt = cqs_set_operation->objs[i].val; - break; - default: - dev_dbg(kbdev->dev, - "Unsupported CQS set operation %d", cqs_set_operation->objs[i].operation); - queue->has_error = true; - break; - } + struct base_cqs_set_operation_info *obj = &cqs_set_operation->objs[i]; - /* Increment evt up to the error_state value depending on the CQS data type */ - switch (cqs_set_operation->objs[i].data_type) { + switch (obj->data_type) { default: - dev_dbg(kbdev->dev, "Unreachable data_type=%d", cqs_set_operation->objs[i].data_type); - /* Fallthrough - hint to compiler that there's really only 2 options at present */ - fallthrough; + WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(obj->data_type)); + queue->has_error = true; + goto skip_err_propagation; case BASEP_CQS_DATA_TYPE_U32: - evt = (u64 *)((u8 *)evt + sizeof(u32)); + kbasep_kcpu_cqs_do_set_operation_32(queue, evt, obj->operation, + obj->val); + evt += BASEP_EVENT32_ERR_OFFSET - BASEP_EVENT32_VAL_OFFSET; break; case BASEP_CQS_DATA_TYPE_U64: - evt = (u64 *)((u8 *)evt + sizeof(u64)); + kbasep_kcpu_cqs_do_set_operation_64(queue, evt, obj->operation, + obj->val); + evt += BASEP_EVENT64_ERR_OFFSET - BASEP_EVENT64_VAL_OFFSET; break; } /* GPUCORE-28172 RDT to review */ /* Always propagate errors */ - *evt = queue->has_error; + *(u32 *)evt = queue->has_error; +skip_err_propagation: kbase_phy_alloc_mapping_put(queue->kctx, mapping); } } @@ -1183,6 +1281,7 @@ static int kbase_kcpu_cqs_set_operation_prepare( { struct base_cqs_set_operation_info *objs; unsigned int nr_objs = cqs_set_operation_info->nr_objs; + unsigned int i; lockdep_assert_held(&kcpu_queue->lock); @@ -1202,6 +1301,18 @@ static int kbase_kcpu_cqs_set_operation_prepare( return -ENOMEM; } + /* Check the CQS objects as early as possible. By checking their alignment + * (required alignment equals to size for Sync32 and Sync64 objects), we can + * prevent overrunning the supplied event page. + */ + for (i = 0; i < nr_objs; i++) { + if (!kbase_kcpu_cqs_is_data_type_valid(objs[i].data_type) || + !kbase_kcpu_cqs_is_aligned(objs[i].addr, objs[i].data_type)) { + kfree(objs); + return -EINVAL; + } + } + current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION; current_command->info.cqs_set_operation.nr_objs = nr_objs; current_command->info.cqs_set_operation.objs = objs; @@ -1234,9 +1345,8 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence, queue_work(kcpu_queue->wq, &kcpu_queue->work); } -static void kbase_kcpu_fence_wait_cancel( - struct kbase_kcpu_command_queue *kcpu_queue, - struct kbase_kcpu_command_fence_info *fence_info) +static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info) { struct kbase_context *const kctx = kcpu_queue->kctx; @@ -1410,15 +1520,14 @@ static int kbase_kcpu_fence_wait_process( */ if (fence_status) - kbase_kcpu_fence_wait_cancel(kcpu_queue, fence_info); + kbasep_kcpu_fence_wait_cancel(kcpu_queue, fence_info); return fence_status; } -static int kbase_kcpu_fence_wait_prepare( - struct kbase_kcpu_command_queue *kcpu_queue, - struct base_kcpu_command_fence_info *fence_info, - struct kbase_kcpu_command *current_command) +static int kbase_kcpu_fence_wait_prepare(struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_fence_info *fence_info, + struct kbase_kcpu_command *current_command) { #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence_in; @@ -1429,8 +1538,7 @@ static int kbase_kcpu_fence_wait_prepare( lockdep_assert_held(&kcpu_queue->lock); - if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), - sizeof(fence))) + if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence))) return -ENOMEM; fence_in = sync_file_get_fence(fence.basep.fd); @@ -1444,9 +1552,8 @@ static int kbase_kcpu_fence_wait_prepare( return 0; } -static int kbase_kcpu_fence_signal_process( - struct kbase_kcpu_command_queue *kcpu_queue, - struct kbase_kcpu_command_fence_info *fence_info) +static int kbasep_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info) { struct kbase_context *const kctx = kcpu_queue->kctx; int ret; @@ -1467,37 +1574,37 @@ static int kbase_kcpu_fence_signal_process( fence_info->fence->seqno); /* dma_fence refcount needs to be decreased to release it. */ - dma_fence_put(fence_info->fence); + kbase_fence_put(fence_info->fence); fence_info->fence = NULL; return ret; } -static int kbase_kcpu_fence_signal_prepare( - struct kbase_kcpu_command_queue *kcpu_queue, - struct base_kcpu_command_fence_info *fence_info, - struct kbase_kcpu_command *current_command) +static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command *current_command, + struct base_fence *fence, struct sync_file **sync_file, + int *fd) { #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence_out; #else struct dma_fence *fence_out; #endif - struct base_fence fence; - struct sync_file *sync_file; + struct kbase_kcpu_dma_fence *kcpu_fence; int ret = 0; - int fd; lockdep_assert_held(&kcpu_queue->lock); - if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), - sizeof(fence))) - return -EFAULT; - - fence_out = kzalloc(sizeof(*fence_out), GFP_KERNEL); - if (!fence_out) + kcpu_fence = kzalloc(sizeof(*kcpu_fence), GFP_KERNEL); + if (!kcpu_fence) return -ENOMEM; +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + fence_out = (struct fence *)kcpu_fence; +#else + fence_out = (struct dma_fence *)kcpu_fence; +#endif + dma_fence_init(fence_out, &kbase_fence_ops, &kbase_csf_fence_lock, @@ -1513,28 +1620,70 @@ static int kbase_kcpu_fence_signal_prepare( dma_fence_get(fence_out); #endif + /* Set reference to KCPU metadata and increment refcount */ + kcpu_fence->metadata = kcpu_queue->metadata; +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) + WARN_ON(!atomic_inc_not_zero(&kcpu_fence->metadata->refcount)); +#else + WARN_ON(!refcount_inc_not_zero(&kcpu_fence->metadata->refcount)); +#endif + /* create a sync_file fd representing the fence */ - sync_file = sync_file_create(fence_out); - if (!sync_file) { + *sync_file = sync_file_create(fence_out); + if (!(*sync_file)) { ret = -ENOMEM; goto file_create_fail; } - fd = get_unused_fd_flags(O_CLOEXEC); - if (fd < 0) { - ret = fd; + *fd = get_unused_fd_flags(O_CLOEXEC); + if (*fd < 0) { + ret = *fd; goto fd_flags_fail; } - fence.basep.fd = fd; + fence->basep.fd = *fd; current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL; current_command->info.fence.fence = fence_out; + return 0; + +fd_flags_fail: + fput((*sync_file)->file); +file_create_fail: + /* + * Upon failure, dma_fence refcount that was increased by + * dma_fence_get() or sync_file_create() needs to be decreased + * to release it. + */ + kbase_fence_put(fence_out); + current_command->info.fence.fence = NULL; + + return ret; +} + +static int kbase_kcpu_fence_signal_prepare(struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_fence_info *fence_info, + struct kbase_kcpu_command *current_command) +{ + struct base_fence fence; + struct sync_file *sync_file = NULL; + int fd; + int ret = 0; + + lockdep_assert_held(&kcpu_queue->lock); + + if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence))) + return -EFAULT; + + ret = kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, &fence, &sync_file, &fd); + if (ret) + return ret; + if (copy_to_user(u64_to_user_ptr(fence_info->fence), &fence, sizeof(fence))) { ret = -EFAULT; - goto fd_flags_fail; + goto fail; } /* 'sync_file' pointer can't be safely dereferenced once 'fd' is @@ -1544,21 +1693,34 @@ static int kbase_kcpu_fence_signal_prepare( fd_install(fd, sync_file->file); return 0; -fd_flags_fail: +fail: fput(sync_file->file); -file_create_fail: - /* - * Upon failure, dma_fence refcount that was increased by - * dma_fence_get() or sync_file_create() needs to be decreased - * to release it. - */ - dma_fence_put(fence_out); - + kbase_fence_put(current_command->info.fence.fence); current_command->info.fence.fence = NULL; - kfree(fence_out); return ret; } + +int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info) +{ + if (!kcpu_queue || !fence_info) + return -EINVAL; + + return kbasep_kcpu_fence_signal_process(kcpu_queue, fence_info); +} +KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_process); + +int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command *current_command, + struct base_fence *fence, struct sync_file **sync_file, int *fd) +{ + if (!kcpu_queue || !current_command || !fence || !sync_file || !fd) + return -EINVAL; + + return kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, fence, sync_file, fd); +} +KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_init); #endif /* CONFIG_SYNC_FILE */ static void kcpu_queue_process_worker(struct work_struct *data) @@ -1595,6 +1757,9 @@ static int delete_queue(struct kbase_context *kctx, u32 id) mutex_lock(&queue->lock); + /* Metadata struct may outlive KCPU queue. */ + kbase_kcpu_dma_fence_meta_put(queue->metadata); + /* Drain the remaining work for this queue first and go past * all the waits. */ @@ -1701,8 +1866,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, status = 0; #if IS_ENABLED(CONFIG_SYNC_FILE) if (drain_queue) { - kbase_kcpu_fence_wait_cancel(queue, - &cmd->info.fence); + kbasep_kcpu_fence_wait_cancel(queue, &cmd->info.fence); } else { status = kbase_kcpu_fence_wait_process(queue, &cmd->info.fence); @@ -1732,8 +1896,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, status = 0; #if IS_ENABLED(CONFIG_SYNC_FILE) - status = kbase_kcpu_fence_signal_process( - queue, &cmd->info.fence); + status = kbasep_kcpu_fence_signal_process(queue, &cmd->info.fence); if (status < 0) queue->has_error = true; @@ -2103,14 +2266,30 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, return -EINVAL; } + /* There might be a race between one thread trying to enqueue commands to the queue + * and other thread trying to delete the same queue. + * This racing could lead to use-after-free problem by enqueuing thread if + * resources for the queue has already been freed by deleting thread. + * + * To prevent the issue, two mutexes are acquired/release asymmetrically as follows. + * + * Lock A (kctx mutex) + * Lock B (queue mutex) + * Unlock A + * Unlock B + * + * With the kctx mutex being held, enqueuing thread will check the queue + * and will return error code if the queue had already been deleted. + */ mutex_lock(&kctx->csf.kcpu_queues.lock); queue = kctx->csf.kcpu_queues.array[enq->id]; - mutex_unlock(&kctx->csf.kcpu_queues.lock); - - if (queue == NULL) + if (queue == NULL) { + dev_dbg(kctx->kbdev->dev, "Invalid KCPU queue (id:%u)", enq->id); + mutex_unlock(&kctx->csf.kcpu_queues.lock); return -EINVAL; - + } mutex_lock(&queue->lock); + mutex_unlock(&kctx->csf.kcpu_queues.lock); if (kcpu_queue_get_space(queue) < enq->nr_commands) { ret = -EBUSY; @@ -2275,6 +2454,7 @@ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx) mutex_destroy(&kctx->csf.kcpu_queues.lock); } +KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_context_term); int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, struct kbase_ioctl_kcpu_queue_delete *del) @@ -2288,7 +2468,9 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_kcpu_command_queue *queue; int idx; int ret = 0; - +#if IS_ENABLED(CONFIG_SYNC_FILE) + struct kbase_kcpu_dma_fence_meta *metadata; +#endif /* The queue id is of u8 type and we use the index of the kcpu_queues * array as an id, so the number of elements in the array can't be * more than 256. @@ -2334,7 +2516,27 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, queue->fence_context = dma_fence_context_alloc(1); queue->fence_seqno = 0; queue->fence_wait_processed = false; + + metadata = kzalloc(sizeof(*metadata), GFP_KERNEL); + if (!metadata) { + kfree(queue); + ret = -ENOMEM; + goto out; + } + + metadata->kbdev = kctx->kbdev; + metadata->kctx_id = kctx->id; + snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%d-%d_%d-%lld-kcpu", kctx->kbdev->id, + kctx->tgid, kctx->id, queue->fence_context); + +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) + atomic_set(&metadata->refcount, 1); +#else + refcount_set(&metadata->refcount, 1); #endif + queue->metadata = metadata; + atomic_inc(&kctx->kbdev->live_fence_metadata); +#endif /* CONFIG_SYNC_FILE */ queue->enqueue_failed = false; queue->command_started = false; INIT_LIST_HEAD(&queue->jit_blocked); @@ -2360,3 +2562,4 @@ out: return ret; } +KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_new); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h index 5f9b8e0684bc..85db53867c06 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h @@ -22,6 +22,9 @@ #ifndef _KBASE_CSF_KCPU_H_ #define _KBASE_CSF_KCPU_H_ +#include +#include + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) #include #else @@ -44,8 +47,8 @@ struct kbase_kcpu_command_import_info { }; /** - * struct kbase_kcpu_command_fence_info - Structure which holds information - * about the fence object enqueued in the kcpu command queue + * struct kbase_kcpu_command_fence_info - Structure which holds information about the + * fence object enqueued in the kcpu command queue * * @fence_cb: Fence callback * @fence: Fence @@ -274,6 +277,8 @@ struct kbase_kcpu_command { * @jit_blocked: Used to keep track of command queues blocked * by a pending JIT allocation command. * @fence_timeout: Timer used to detect the fence wait timeout. + * @metadata: Metadata structure containing basic information about + * this queue for any fence objects associated with this queue. */ struct kbase_kcpu_command_queue { struct mutex lock; @@ -295,6 +300,9 @@ struct kbase_kcpu_command_queue { #ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG struct timer_list fence_timeout; #endif /* CONFIG_MALI_BIFROST_FENCE_DEBUG */ +#if IS_ENABLED(CONFIG_SYNC_FILE) + struct kbase_kcpu_dma_fence_meta *metadata; +#endif /* CONFIG_SYNC_FILE */ }; /** @@ -359,4 +367,14 @@ int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx); */ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx); +#if IS_ENABLED(CONFIG_SYNC_FILE) +/* Test wrappers for dma fence operations. */ +int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info); + +int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command *current_command, + struct base_fence *fence, struct sync_file **sync_file, int *fd); +#endif /* CONFIG_SYNC_FILE */ + #endif /* _KBASE_CSF_KCPU_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c new file mode 100644 index 000000000000..77e19dba4262 --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c @@ -0,0 +1,815 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include "mali_kbase_csf.h" +#include "mali_kbase_csf_mcu_shared_reg.h" +#include + +/* Scaling factor in pre-allocating shared regions for suspend bufs and userios */ +#define MCU_SHARED_REGS_PREALLOCATE_SCALE (8) + +/* MCU shared region map attempt limit */ +#define MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT (4) + +/* Convert a VPFN to its start addr */ +#define GET_VPFN_VA(vpfn) ((vpfn) << PAGE_SHIFT) + +/* Macros for extract the corresponding VPFNs from a CSG_REG */ +#define CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages) (reg->start_pfn) +#define CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages) (reg->start_pfn + nr_susp_pages) +#define CSG_REG_USERIO_VPFN(reg, csi, nr_susp_pages) (reg->start_pfn + 2 * (nr_susp_pages + csi)) + +/* MCU shared segment dummy page mapping flags */ +#define DUMMY_PAGE_MAP_FLAGS (KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT) | KBASE_REG_GPU_NX) + +/* MCU shared segment suspend buffer mapping flags */ +#define SUSP_PAGE_MAP_FLAGS \ + (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR | KBASE_REG_GPU_NX | \ + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT)) + +/** + * struct kbase_csg_shared_region - Wrapper object for use with a CSG on runtime + * resources for suspend buffer pages, userio pages + * and their corresponding mapping GPU VA addresses + * from the MCU shared interface segment + * + * @link: Link to the managing list for the wrapper object. + * @reg: pointer to the region allocated from the shared interface segment, which + * covers the normal/P-mode suspend buffers, userio pages of the queues + * @grp: Pointer to the bound kbase_queue_group, or NULL if no binding (free). + * @pmode_mapped: Boolean for indicating the region has MMU mapped with the bound group's + * protected mode suspend buffer pages. + */ +struct kbase_csg_shared_region { + struct list_head link; + struct kbase_va_region *reg; + struct kbase_queue_group *grp; + bool pmode_mapped; +}; + +static unsigned long get_userio_mmu_flags(struct kbase_device *kbdev) +{ + unsigned long userio_map_flags; + + if (kbdev->system_coherency == COHERENCY_NONE) + userio_map_flags = + KBASE_REG_GPU_RD | KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); + else + userio_map_flags = KBASE_REG_GPU_RD | KBASE_REG_SHARE_BOTH | + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); + + return (userio_map_flags | KBASE_REG_GPU_NX); +} + +static void set_page_meta_status_not_movable(struct tagged_addr phy) +{ + if (kbase_page_migration_enabled) { + struct kbase_page_metadata *page_md = kbase_page_private(as_page(phy)); + + if (page_md) { + spin_lock(&page_md->migrate_lock); + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + spin_unlock(&page_md->migrate_lock); + } + } +} + +static struct kbase_csg_shared_region *get_group_bound_csg_reg(struct kbase_queue_group *group) +{ + return (struct kbase_csg_shared_region *)group->csg_reg; +} + +static inline int update_mapping_with_dummy_pages(struct kbase_device *kbdev, u64 vpfn, + u32 nr_pages) +{ + struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; + const unsigned long mem_flags = DUMMY_PAGE_MAP_FLAGS; + + return kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, shared_regs->dummy_phys, nr_pages, + mem_flags, KBASE_MEM_GROUP_CSF_FW); +} + +static inline int insert_dummy_pages(struct kbase_device *kbdev, u64 vpfn, u32 nr_pages) +{ + struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; + const unsigned long mem_flags = DUMMY_PAGE_MAP_FLAGS; + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + + return kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + nr_pages, mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW, + mmu_sync_info, NULL, false); +} + +/* Reset consecutive retry count to zero */ +static void notify_group_csg_reg_map_done(struct kbase_queue_group *group) +{ + lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); + + /* Just clear the internal map retry count */ + group->csg_reg_bind_retries = 0; +} + +/* Return true if a fatal group error has already been triggered */ +static bool notify_group_csg_reg_map_error(struct kbase_queue_group *group) +{ + struct kbase_device *kbdev = group->kctx->kbdev; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (group->csg_reg_bind_retries < U8_MAX) + group->csg_reg_bind_retries++; + + /* Allow only one fatal error notification */ + if (group->csg_reg_bind_retries == MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT) { + struct base_gpu_queue_group_error const err_payload = { + .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, + .payload = { .fatal_group = { .status = GPU_EXCEPTION_TYPE_SW_FAULT_0 } } + }; + + dev_err(kbdev->dev, "Fatal: group_%d_%d_%d exceeded shared region map retry limit", + group->kctx->tgid, group->kctx->id, group->handle); + kbase_csf_add_group_fatal_error(group, &err_payload); + kbase_event_wakeup(group->kctx); + } + + return group->csg_reg_bind_retries >= MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT; +} + +/* Replace the given phys at vpfn (reflecting a queue's userio_pages) mapping. + * If phys is NULL, the internal dummy_phys is used, which effectively + * restores back to the initialized state for the given queue's userio_pages + * (i.e. mapped to the default dummy page). + * In case of CSF mmu update error on a queue, the dummy phy is used to restore + * back the default 'unbound' (i.e. mapped to dummy) condition. + * + * It's the caller's responsibility to ensure that the given vpfn is extracted + * correctly from a CSG_REG object, for example, using CSG_REG_USERIO_VPFN(). + */ +static int userio_pages_replace_phys(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys) +{ + struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; + int err = 0, err1; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (phys) { + unsigned long mem_flags_input = shared_regs->userio_mem_rd_flags; + unsigned long mem_flags_output = mem_flags_input | KBASE_REG_GPU_WR; + + /* Dealing with a queue's INPUT page */ + err = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, &phys[0], 1, mem_flags_input, + KBASE_MEM_GROUP_CSF_IO); + /* Dealing with a queue's OUTPUT page */ + err1 = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn + 1, &phys[1], 1, + mem_flags_output, KBASE_MEM_GROUP_CSF_IO); + if (unlikely(err1)) + err = err1; + } + + if (unlikely(err) || !phys) { + /* Restore back to dummy_userio_phy */ + update_mapping_with_dummy_pages(kbdev, vpfn, KBASEP_NUM_CS_USER_IO_PAGES); + } + + return err; +} + +/* Update a group's queues' mappings for a group with its runtime bound group region */ +static int csg_reg_update_on_csis(struct kbase_device *kbdev, struct kbase_queue_group *group, + struct kbase_queue_group *prev_grp) +{ + struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); + const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + const u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num; + struct tagged_addr *phy; + int err = 0, err1; + u32 i; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (WARN_ONCE(!csg_reg, "Update_userio pages: group has no bound csg_reg")) + return -EINVAL; + + for (i = 0; i < nr_csis; i++) { + struct kbase_queue *queue = group->bound_queues[i]; + struct kbase_queue *prev_queue = prev_grp ? prev_grp->bound_queues[i] : NULL; + + /* Set the phy if the group's queue[i] needs mapping, otherwise NULL */ + phy = (queue && queue->enabled && !queue->user_io_gpu_va) ? queue->phys : NULL; + + /* Either phy is valid, or this update is for a transition change from + * prev_group, and the prev_queue was mapped, so an update is required. + */ + if (phy || (prev_queue && prev_queue->user_io_gpu_va)) { + u64 vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, i, nr_susp_pages); + + err1 = userio_pages_replace_phys(kbdev, vpfn, phy); + + if (unlikely(err1)) { + dev_warn(kbdev->dev, + "%s: Error in update queue-%d mapping for csg_%d_%d_%d", + __func__, i, group->kctx->tgid, group->kctx->id, + group->handle); + err = err1; + } else if (phy) + queue->user_io_gpu_va = GET_VPFN_VA(vpfn); + + /* Mark prev_group's queue has lost its mapping */ + if (prev_queue) + prev_queue->user_io_gpu_va = 0; + } + } + + return err; +} + +/* Bind a group to a given csg_reg, any previous mappings with the csg_reg are replaced + * with the given group's phy pages, or, if no replacement, the default dummy pages. + * Note, the csg_reg's fields are in transition step-by-step from the prev_grp to its + * new binding owner in this function. At the end, the prev_grp would be completely + * detached away from the previously bound csg_reg. + */ +static int group_bind_csg_reg(struct kbase_device *kbdev, struct kbase_queue_group *group, + struct kbase_csg_shared_region *csg_reg) +{ + const unsigned long mem_flags = SUSP_PAGE_MAP_FLAGS; + const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + struct kbase_queue_group *prev_grp = csg_reg->grp; + struct kbase_va_region *reg = csg_reg->reg; + struct tagged_addr *phy; + int err = 0, err1; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + /* The csg_reg is expected still on the unused list so its link is not empty */ + if (WARN_ON_ONCE(list_empty(&csg_reg->link))) { + dev_dbg(kbdev->dev, "csg_reg is marked in active use"); + return -EINVAL; + } + + if (WARN_ON_ONCE(prev_grp && prev_grp->csg_reg != csg_reg)) { + dev_dbg(kbdev->dev, "Unexpected bound lost on prev_group"); + prev_grp->csg_reg = NULL; + return -EINVAL; + } + + /* Replacing the csg_reg bound group to the newly given one */ + csg_reg->grp = group; + group->csg_reg = csg_reg; + + /* Resolving mappings, deal with protected mode first */ + if (group->protected_suspend_buf.pma) { + /* We are binding a new group with P-mode ready, the prev_grp's P-mode mapping + * status is now stale during this transition of ownership. For the new owner, + * its mapping would have been updated away when it lost its binding previously. + * So it needs an update to this pma map. By clearing here the mapped flag + * ensures it reflects the new owner's condition. + */ + csg_reg->pmode_mapped = false; + err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group); + } else if (csg_reg->pmode_mapped) { + /* Need to unmap the previous one, use the dummy pages */ + err = update_mapping_with_dummy_pages( + kbdev, CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages); + + if (unlikely(err)) + dev_warn(kbdev->dev, "%s: Failed to update P-mode dummy for csg_%d_%d_%d", + __func__, group->kctx->tgid, group->kctx->id, group->handle); + + csg_reg->pmode_mapped = false; + } + + /* Unlike the normal suspend buf, the mapping of the protected mode suspend buffer is + * actually reflected by a specific mapped flag (due to phys[] is only allocated on + * in-need basis). So the GPU_VA is always updated to the bound region's corresponding + * VA, as a reflection of the binding to the csg_reg. + */ + group->protected_suspend_buf.gpu_va = + GET_VPFN_VA(CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages)); + + /* Deal with normal mode suspend buffer */ + phy = group->normal_suspend_buf.phy; + err1 = kbase_mmu_update_csf_mcu_pages(kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), phy, + nr_susp_pages, mem_flags, KBASE_MEM_GROUP_CSF_FW); + + if (unlikely(err1)) { + dev_warn(kbdev->dev, "%s: Failed to update suspend buffer for csg_%d_%d_%d", + __func__, group->kctx->tgid, group->kctx->id, group->handle); + + /* Attempt a restore to default dummy for removing previous mapping */ + if (prev_grp) + update_mapping_with_dummy_pages( + kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages); + err = err1; + /* Marking the normal suspend buffer is not mapped (due to error) */ + group->normal_suspend_buf.gpu_va = 0; + } else { + /* Marking the normal suspend buffer is actually mapped */ + group->normal_suspend_buf.gpu_va = + GET_VPFN_VA(CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages)); + } + + /* Deal with queue uerio_pages */ + err1 = csg_reg_update_on_csis(kbdev, group, prev_grp); + if (likely(!err1)) + err = err1; + + /* Reset the previous group's suspend buffers' GPU_VAs as it has lost its bound */ + if (prev_grp) { + prev_grp->normal_suspend_buf.gpu_va = 0; + prev_grp->protected_suspend_buf.gpu_va = 0; + prev_grp->csg_reg = NULL; + } + + return err; +} + +/* Notify the group is placed on-slot, hence the bound csg_reg is active in use */ +void kbase_csf_mcu_shared_set_group_csg_reg_active(struct kbase_device *kbdev, + struct kbase_queue_group *group) +{ + struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (WARN_ONCE(!csg_reg || csg_reg->grp != group, "Group_%d_%d_%d has no csg_reg bounding", + group->kctx->tgid, group->kctx->id, group->handle)) + return; + + /* By dropping out the csg_reg from the unused list, it becomes active and is tracked + * by its bound group that is on-slot. The design is that, when this on-slot group is + * moved to off-slot, the scheduler slot-clean up will add it back to the tail of the + * unused list. + */ + if (!WARN_ON_ONCE(list_empty(&csg_reg->link))) + list_del_init(&csg_reg->link); +} + +/* Notify the group is placed off-slot, hence the bound csg_reg is not in active use + * anymore. Existing bounding/mappings are left untouched. These would only be dealt with + * if the bound csg_reg is to be reused with another group. + */ +void kbase_csf_mcu_shared_set_group_csg_reg_unused(struct kbase_device *kbdev, + struct kbase_queue_group *group) +{ + struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); + struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (WARN_ONCE(!csg_reg || csg_reg->grp != group, "Group_%d_%d_%d has no csg_reg bound", + group->kctx->tgid, group->kctx->id, group->handle)) + return; + + /* By adding back the csg_reg to the unused list, it becomes available for another + * group to break its existing binding and set up a new one. + */ + if (!list_empty(&csg_reg->link)) { + WARN_ONCE(group->csg_nr >= 0, "Group is assumed vacated from slot"); + list_move_tail(&csg_reg->link, &shared_regs->unused_csg_regs); + } else + list_add_tail(&csg_reg->link, &shared_regs->unused_csg_regs); +} + +/* Adding a new queue to an existing on-slot group */ +int kbase_csf_mcu_shared_add_queue(struct kbase_device *kbdev, struct kbase_queue *queue) +{ + struct kbase_queue_group *group = queue->group; + struct kbase_csg_shared_region *csg_reg; + const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + u64 vpfn; + int err; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (WARN_ONCE(!group || group->csg_nr < 0, "No bound group, or group is not on-slot")) + return -EIO; + + csg_reg = get_group_bound_csg_reg(group); + if (WARN_ONCE(!csg_reg || !list_empty(&csg_reg->link), + "No bound csg_reg, or in wrong state")) + return -EIO; + + vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, queue->csi_index, nr_susp_pages); + err = userio_pages_replace_phys(kbdev, vpfn, queue->phys); + if (likely(!err)) { + /* Mark the queue has been successfully mapped */ + queue->user_io_gpu_va = GET_VPFN_VA(vpfn); + } else { + /* Mark the queue has no mapping on its phys[] */ + queue->user_io_gpu_va = 0; + dev_dbg(kbdev->dev, + "%s: Error in mapping userio pages for queue-%d of csg_%d_%d_%d", __func__, + queue->csi_index, group->kctx->tgid, group->kctx->id, group->handle); + + /* notify the error for the bound group */ + if (notify_group_csg_reg_map_error(group)) + err = -EIO; + } + + return err; +} + +/* Unmap a given queue's userio pages, when the queue is deleted */ +void kbase_csf_mcu_shared_drop_stopped_queue(struct kbase_device *kbdev, struct kbase_queue *queue) +{ + struct kbase_queue_group *group; + struct kbase_csg_shared_region *csg_reg; + const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + u64 vpfn; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + /* The queue has no existing mapping, nothing to do */ + if (!queue || !queue->user_io_gpu_va) + return; + + group = queue->group; + if (WARN_ONCE(!group || !group->csg_reg, "Queue/Group has no bound region")) + return; + + csg_reg = get_group_bound_csg_reg(group); + + vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, queue->csi_index, nr_susp_pages); + + WARN_ONCE(userio_pages_replace_phys(kbdev, vpfn, NULL), + "Unexpected restoring to dummy map update error"); + queue->user_io_gpu_va = 0; +} + +int kbase_csf_mcu_shared_group_update_pmode_map(struct kbase_device *kbdev, + struct kbase_queue_group *group) +{ + struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; + struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); + const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + int err = 0, err1; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (WARN_ONCE(!csg_reg, "Update_pmode_map: the bound csg_reg can't be NULL")) + return -EINVAL; + + /* If the pmode already mapped, nothing to do */ + if (csg_reg->pmode_mapped) + return 0; + + /* P-mode map not in place and the group has allocated P-mode pages, map it */ + if (group->protected_suspend_buf.pma) { + unsigned long mem_flags = SUSP_PAGE_MAP_FLAGS; + struct tagged_addr *phy = shared_regs->pma_phys; + struct kbase_va_region *reg = csg_reg->reg; + u64 vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); + u32 i; + + /* Populate the protected phys from pma to phy[] */ + for (i = 0; i < nr_susp_pages; i++) + phy[i] = as_tagged(group->protected_suspend_buf.pma[i]->pa); + + /* Add the P-mode suspend buffer mapping */ + err = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, phy, nr_susp_pages, mem_flags, + KBASE_MEM_GROUP_CSF_FW); + + /* If error, restore to default dummpy */ + if (unlikely(err)) { + err1 = update_mapping_with_dummy_pages(kbdev, vpfn, nr_susp_pages); + if (unlikely(err1)) + dev_warn( + kbdev->dev, + "%s: Failed in recovering to P-mode dummy for csg_%d_%d_%d", + __func__, group->kctx->tgid, group->kctx->id, + group->handle); + + csg_reg->pmode_mapped = false; + } else + csg_reg->pmode_mapped = true; + } + + return err; +} + +void kbase_csf_mcu_shared_clear_evicted_group_csg_reg(struct kbase_device *kbdev, + struct kbase_queue_group *group) +{ + struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; + struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); + struct kbase_va_region *reg; + const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num; + int err = 0; + u32 i; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + /* Nothing to do for clearing up if no bound csg_reg */ + if (!csg_reg) + return; + + reg = csg_reg->reg; + /* Restore mappings default dummy pages for any mapped pages */ + if (csg_reg->pmode_mapped) { + err = update_mapping_with_dummy_pages( + kbdev, CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages); + WARN_ONCE(unlikely(err), "Restore dummy failed for clearing pmod buffer mapping"); + + csg_reg->pmode_mapped = false; + } + + if (group->normal_suspend_buf.gpu_va) { + err = update_mapping_with_dummy_pages( + kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages); + WARN_ONCE(err, "Restore dummy failed for clearing suspend buffer mapping"); + } + + /* Deal with queue uerio pages */ + for (i = 0; i < nr_csis; i++) + kbase_csf_mcu_shared_drop_stopped_queue(kbdev, group->bound_queues[i]); + + group->normal_suspend_buf.gpu_va = 0; + group->protected_suspend_buf.gpu_va = 0; + + /* Break the binding */ + group->csg_reg = NULL; + csg_reg->grp = NULL; + + /* Put the csg_reg to the front of the unused list */ + if (WARN_ON_ONCE(list_empty(&csg_reg->link))) + list_add(&csg_reg->link, &shared_regs->unused_csg_regs); + else + list_move(&csg_reg->link, &shared_regs->unused_csg_regs); +} + +int kbase_csf_mcu_shared_group_bind_csg_reg(struct kbase_device *kbdev, + struct kbase_queue_group *group) +{ + struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; + struct kbase_csg_shared_region *csg_reg; + int err; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + csg_reg = get_group_bound_csg_reg(group); + if (!csg_reg) + csg_reg = list_first_entry_or_null(&shared_regs->unused_csg_regs, + struct kbase_csg_shared_region, link); + + if (!WARN_ON_ONCE(!csg_reg)) { + struct kbase_queue_group *prev_grp = csg_reg->grp; + + /* Deal with the previous binding and lazy unmap, i.e if the previous mapping not + * the required one, unmap it. + */ + if (prev_grp == group) { + /* Update existing bindings, if there have been some changes */ + err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group); + if (likely(!err)) + err = csg_reg_update_on_csis(kbdev, group, NULL); + } else + err = group_bind_csg_reg(kbdev, group, csg_reg); + } else { + /* This should not have been possible if the code operates rightly */ + dev_err(kbdev->dev, "%s: Unexpected NULL csg_reg for group %d of context %d_%d", + __func__, group->handle, group->kctx->tgid, group->kctx->id); + return -EIO; + } + + if (likely(!err)) + notify_group_csg_reg_map_done(group); + else + notify_group_csg_reg_map_error(group); + + return err; +} + +static int shared_mcu_csg_reg_init(struct kbase_device *kbdev, + struct kbase_csg_shared_region *csg_reg) +{ + struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; + const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num; + const size_t nr_csg_reg_pages = 2 * (nr_susp_pages + nr_csis); + struct kbase_va_region *reg; + u64 vpfn; + int err, i; + + INIT_LIST_HEAD(&csg_reg->link); + reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages, + KBASE_REG_ZONE_MCU_SHARED); + + if (!reg) { + dev_err(kbdev->dev, "%s: Failed to allocate a MCU shared region for %zu pages\n", + __func__, nr_csg_reg_pages); + return -ENOMEM; + } + + /* Insert the region into rbtree, so it becomes ready to use */ + mutex_lock(&kbdev->csf.reg_lock); + err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_csg_reg_pages, 1); + reg->flags &= ~KBASE_REG_FREE; + mutex_unlock(&kbdev->csf.reg_lock); + if (err) { + kfree(reg); + dev_err(kbdev->dev, "%s: Failed to add a region of %zu pages into rbtree", __func__, + nr_csg_reg_pages); + return err; + } + + /* Initialize the mappings so MMU only need to update the the corresponding + * mapped phy-pages at runtime. + * Map the normal suspend buffer pages to the prepared dummy phys[]. + */ + vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages); + err = insert_dummy_pages(kbdev, vpfn, nr_susp_pages); + + if (unlikely(err)) + goto fail_susp_map_fail; + + /* Map the protected suspend buffer pages to the prepared dummy phys[] */ + vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); + err = insert_dummy_pages(kbdev, vpfn, nr_susp_pages); + + if (unlikely(err)) + goto fail_pmod_map_fail; + + for (i = 0; i < nr_csis; i++) { + vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages); + err = insert_dummy_pages(kbdev, vpfn, KBASEP_NUM_CS_USER_IO_PAGES); + + if (unlikely(err)) + goto fail_userio_pages_map_fail; + } + + /* Replace the previous NULL-valued field with the successully initialized reg */ + csg_reg->reg = reg; + + return 0; + +fail_userio_pages_map_fail: + while (i-- > 0) { + vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages); + kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR, true); + } + + vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); + kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + nr_susp_pages, MCU_AS_NR, true); +fail_pmod_map_fail: + vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages); + kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + nr_susp_pages, MCU_AS_NR, true); +fail_susp_map_fail: + mutex_lock(&kbdev->csf.reg_lock); + kbase_remove_va_region(kbdev, reg); + mutex_unlock(&kbdev->csf.reg_lock); + kfree(reg); + + return err; +} + +/* Note, this helper can only be called on scheduler shutdown */ +static void shared_mcu_csg_reg_term(struct kbase_device *kbdev, + struct kbase_csg_shared_region *csg_reg) +{ + struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; + struct kbase_va_region *reg = csg_reg->reg; + const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + const u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num; + u64 vpfn; + int i; + + for (i = 0; i < nr_csis; i++) { + vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages); + kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR, true); + } + + vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); + kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + nr_susp_pages, MCU_AS_NR, true); + vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages); + kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + nr_susp_pages, MCU_AS_NR, true); + + mutex_lock(&kbdev->csf.reg_lock); + kbase_remove_va_region(kbdev, reg); + mutex_unlock(&kbdev->csf.reg_lock); + kfree(reg); +} + +int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct kbase_csf_mcu_shared_regions *shared_regs = &scheduler->mcu_regs_data; + struct kbase_csg_shared_region *array_csg_regs; + const size_t nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + const u32 nr_groups = kbdev->csf.global_iface.group_num; + const u32 nr_csg_regs = MCU_SHARED_REGS_PREALLOCATE_SCALE * nr_groups; + const u32 nr_dummy_phys = MAX(nr_susp_pages, KBASEP_NUM_CS_USER_IO_PAGES); + u32 i; + int err; + + shared_regs->userio_mem_rd_flags = get_userio_mmu_flags(kbdev); + INIT_LIST_HEAD(&shared_regs->unused_csg_regs); + + shared_regs->dummy_phys = + kcalloc(nr_dummy_phys, sizeof(*shared_regs->dummy_phys), GFP_KERNEL); + if (!shared_regs->dummy_phys) + return -ENOMEM; + + if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, + &shared_regs->dummy_phys[0], false) <= 0) + return -ENOMEM; + + shared_regs->dummy_phys_allocated = true; + set_page_meta_status_not_movable(shared_regs->dummy_phys[0]); + + /* Replicate the allocated single shared_regs->dummy_phys[0] to the full array */ + for (i = 1; i < nr_dummy_phys; i++) + shared_regs->dummy_phys[i] = shared_regs->dummy_phys[0]; + + shared_regs->pma_phys = kcalloc(nr_susp_pages, sizeof(*shared_regs->pma_phys), GFP_KERNEL); + if (!shared_regs->pma_phys) + return -ENOMEM; + + array_csg_regs = kcalloc(nr_csg_regs, sizeof(*array_csg_regs), GFP_KERNEL); + if (!array_csg_regs) + return -ENOMEM; + shared_regs->array_csg_regs = array_csg_regs; + + /* All fields in scheduler->mcu_regs_data except the shared_regs->array_csg_regs + * are properly populated and ready to use. Now initialize the items in + * shared_regs->array_csg_regs[] + */ + for (i = 0; i < nr_csg_regs; i++) { + err = shared_mcu_csg_reg_init(kbdev, &array_csg_regs[i]); + if (err) + return err; + + list_add_tail(&array_csg_regs[i].link, &shared_regs->unused_csg_regs); + } + + return 0; +} + +void kbase_csf_mcu_shared_regs_data_term(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct kbase_csf_mcu_shared_regions *shared_regs = &scheduler->mcu_regs_data; + struct kbase_csg_shared_region *array_csg_regs = + (struct kbase_csg_shared_region *)shared_regs->array_csg_regs; + const u32 nr_groups = kbdev->csf.global_iface.group_num; + const u32 nr_csg_regs = MCU_SHARED_REGS_PREALLOCATE_SCALE * nr_groups; + + if (array_csg_regs) { + struct kbase_csg_shared_region *csg_reg; + u32 i, cnt_csg_regs = 0; + + for (i = 0; i < nr_csg_regs; i++) { + csg_reg = &array_csg_regs[i]; + /* There should not be any group mapping bindings */ + WARN_ONCE(csg_reg->grp, "csg_reg has a bound group"); + + if (csg_reg->reg) { + shared_mcu_csg_reg_term(kbdev, csg_reg); + cnt_csg_regs++; + } + } + + /* The nr_susp_regs counts should match the array_csg_regs' length */ + list_for_each_entry(csg_reg, &shared_regs->unused_csg_regs, link) + cnt_csg_regs--; + + WARN_ONCE(cnt_csg_regs, "Unmatched counts of susp_regs"); + kfree(shared_regs->array_csg_regs); + } + + if (shared_regs->dummy_phys_allocated) { + struct page *page = as_page(shared_regs->dummy_phys[0]); + + kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false); + } + + kfree(shared_regs->dummy_phys); + kfree(shared_regs->pma_phys); +} diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.h new file mode 100644 index 000000000000..61943cbbf083 --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_MCU_SHARED_REG_H_ +#define _KBASE_CSF_MCU_SHARED_REG_H_ + +/** + * kbase_csf_mcu_shared_set_group_csg_reg_active - Notify that the group is active on-slot with + * scheduling action. Essential runtime resources + * are bound with the group for it to run + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @group: Pointer to the group that is placed into active on-slot running by the scheduler. + * + */ +void kbase_csf_mcu_shared_set_group_csg_reg_active(struct kbase_device *kbdev, + struct kbase_queue_group *group); + +/** + * kbase_csf_mcu_shared_set_group_csg_reg_unused - Notify that the group is placed off-slot with + * scheduling action. Some of bound runtime + * resources can be reallocated for others to use + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @group: Pointer to the group that is placed off-slot by the scheduler. + * + */ +void kbase_csf_mcu_shared_set_group_csg_reg_unused(struct kbase_device *kbdev, + struct kbase_queue_group *group); + +/** + * kbase_csf_mcu_shared_group_update_pmode_map - Request to update the given group's protected + * suspend buffer pages to be mapped for supporting + * protected mode operations. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @group: Pointer to the group for attempting a protected mode suspend buffer binding/mapping. + * + * Return: 0 for success, the group has a protected suspend buffer region mapped. Otherwise an + * error code is returned. + */ +int kbase_csf_mcu_shared_group_update_pmode_map(struct kbase_device *kbdev, + struct kbase_queue_group *group); + +/** + * kbase_csf_mcu_shared_clear_evicted_group_csg_reg - Clear any bound regions/mappings as the + * given group is evicted out of the runtime + * operations. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @group: Pointer to the group that has been evicted out of set of operational groups. + * + * This function will taken away any of the bindings/mappings immediately so the resources + * are not tied up to the given group, which has been evicted out of scheduling action for + * termination. + */ +void kbase_csf_mcu_shared_clear_evicted_group_csg_reg(struct kbase_device *kbdev, + struct kbase_queue_group *group); + +/** + * kbase_csf_mcu_shared_add_queue - Request to add a newly activated queue for a group to be + * run on slot. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @queue: Pointer to the queue that requires some runtime resource to be bound for joining + * others that are already running on-slot with their bound group. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_mcu_shared_add_queue(struct kbase_device *kbdev, struct kbase_queue *queue); + +/** + * kbase_csf_mcu_shared_drop_stopped_queue - Request to drop a queue after it has been stopped + * from its operational state from a group. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @queue: Pointer to the queue that has been stopped from operational state. + * + */ +void kbase_csf_mcu_shared_drop_stopped_queue(struct kbase_device *kbdev, struct kbase_queue *queue); + +/** + * kbase_csf_mcu_shared_group_bind_csg_reg - Bind some required runtime resources to the given + * group for ready to run on-slot. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @group: Pointer to the queue group that requires the runtime resources. + * + * This function binds/maps the required suspend buffer pages and userio pages for the given + * group, readying it to run on-slot. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_mcu_shared_group_bind_csg_reg(struct kbase_device *kbdev, + struct kbase_queue_group *group); + +/** + * kbase_csf_mcu_shared_regs_data_init - Allocate and initialize the MCU shared regions data for + * the given device. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function allocate and initialize the MCU shared VA regions for runtime operations + * of the CSF scheduler. + * + * Return: 0 on success, or an error code. + */ +int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev); + +/** + * kbase_csf_mcu_shared_regs_data_term - Terminate the allocated MCU shared regions data for + * the given device. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function terminates the MCU shared VA regions allocated for runtime operations + * of the CSF scheduler. + */ +void kbase_csf_mcu_shared_regs_data_term(struct kbase_device *kbdev); + +#endif /* _KBASE_CSF_MCU_SHARED_REG_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h index 6dde56cb161a..82389e5bf2a3 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h @@ -229,22 +229,44 @@ #define GLB_PRFCNT_TILER_EN 0x0058 /* () Performance counter enable for tiler */ #define GLB_PRFCNT_MMU_L2_EN 0x005C /* () Performance counter enable for MMU/L2 cache */ -#define GLB_DEBUG_FWUTF_DESTROY 0x0FE0 /* () Test fixture destroy function address */ -#define GLB_DEBUG_FWUTF_TEST 0x0FE4 /* () Test index */ -#define GLB_DEBUG_FWUTF_FIXTURE 0x0FE8 /* () Test fixture index */ -#define GLB_DEBUG_FWUTF_CREATE 0x0FEC /* () Test fixture create function address */ +#define GLB_DEBUG_ARG_IN0 0x0FE0 /* Firmware Debug argument array element 0 */ +#define GLB_DEBUG_ARG_IN1 0x0FE4 /* Firmware Debug argument array element 1 */ +#define GLB_DEBUG_ARG_IN2 0x0FE8 /* Firmware Debug argument array element 2 */ +#define GLB_DEBUG_ARG_IN3 0x0FEC /* Firmware Debug argument array element 3 */ + +/* Mappings based on GLB_DEBUG_REQ.FWUTF_RUN bit being different from GLB_DEBUG_ACK.FWUTF_RUN */ +#define GLB_DEBUG_FWUTF_DESTROY GLB_DEBUG_ARG_IN0 /* () Test fixture destroy function address */ +#define GLB_DEBUG_FWUTF_TEST GLB_DEBUG_ARG_IN1 /* () Test index */ +#define GLB_DEBUG_FWUTF_FIXTURE GLB_DEBUG_ARG_IN2 /* () Test fixture index */ +#define GLB_DEBUG_FWUTF_CREATE GLB_DEBUG_ARG_IN3 /* () Test fixture create function address */ + #define GLB_DEBUG_ACK_IRQ_MASK 0x0FF8 /* () Global debug acknowledge interrupt mask */ #define GLB_DEBUG_REQ 0x0FFC /* () Global debug request */ /* GLB_OUTPUT_BLOCK register offsets */ +#define GLB_DEBUG_ARG_OUT0 0x0FE0 /* Firmware debug result element 0 */ +#define GLB_DEBUG_ARG_OUT1 0x0FE4 /* Firmware debug result element 1 */ +#define GLB_DEBUG_ARG_OUT2 0x0FE8 /* Firmware debug result element 2 */ +#define GLB_DEBUG_ARG_OUT3 0x0FEC /* Firmware debug result element 3 */ + #define GLB_ACK 0x0000 /* () Global acknowledge */ #define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */ #define GLB_HALT_STATUS 0x0010 /* () Global halt status */ #define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */ #define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */ -#define GLB_DEBUG_FWUTF_RESULT 0x0FE0 /* () Firmware debug test result */ +#define GLB_DEBUG_FWUTF_RESULT GLB_DEBUG_ARG_OUT0 /* () Firmware debug test result */ #define GLB_DEBUG_ACK 0x0FFC /* () Global debug acknowledge */ +#ifdef CONFIG_MALI_CORESIGHT +#define GLB_DEBUG_REQ_FW_AS_WRITE_SHIFT 4 +#define GLB_DEBUG_REQ_FW_AS_WRITE_MASK (0x1 << GLB_DEBUG_REQ_FW_AS_WRITE_SHIFT) +#define GLB_DEBUG_REQ_FW_AS_READ_SHIFT 5 +#define GLB_DEBUG_REQ_FW_AS_READ_MASK (0x1 << GLB_DEBUG_REQ_FW_AS_READ_SHIFT) +#define GLB_DEBUG_ARG_IN0 0x0FE0 +#define GLB_DEBUG_ARG_IN1 0x0FE4 +#define GLB_DEBUG_ARG_OUT0 0x0FE0 +#endif /* CONFIG_MALI_CORESIGHT */ + /* USER register offsets */ #define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */ @@ -304,10 +326,17 @@ #define CS_REQ_IDLE_RESOURCE_REQ_SHIFT 11 #define CS_REQ_IDLE_RESOURCE_REQ_MASK (0x1 << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) #define CS_REQ_IDLE_RESOURCE_REQ_GET(reg_val) \ - (((reg_val)&CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT) + (((reg_val) & CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT) #define CS_REQ_IDLE_RESOURCE_REQ_SET(reg_val, value) \ (((reg_val) & ~CS_REQ_IDLE_RESOURCE_REQ_MASK) | \ (((value) << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) & CS_REQ_IDLE_RESOURCE_REQ_MASK)) +#define CS_REQ_IDLE_SHARED_SB_DEC_SHIFT 12 +#define CS_REQ_IDLE_SHARED_SB_DEC_MASK (0x1 << CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) +#define CS_REQ_IDLE_SHARED_SB_DEC_GET(reg_val) \ + (((reg_val) & CS_REQ_IDLE_SHARED_SB_DEC_MASK) >> CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) +#define CS_REQ_IDLE_SHARED_SB_DEC_REQ_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_IDLE_SHARED_SB_DEC_MASK) | \ + (((value) << CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) & CS_REQ_IDLE_SHARED_SB_DEC_MASK)) #define CS_REQ_TILER_OOM_SHIFT 26 #define CS_REQ_TILER_OOM_MASK (0x1 << CS_REQ_TILER_OOM_SHIFT) #define CS_REQ_TILER_OOM_GET(reg_val) (((reg_val)&CS_REQ_TILER_OOM_MASK) >> CS_REQ_TILER_OOM_SHIFT) @@ -582,6 +611,13 @@ #define CS_STATUS_WAIT_PROTM_PEND_SET(reg_val, value) \ (((reg_val) & ~CS_STATUS_WAIT_PROTM_PEND_MASK) | \ (((value) << CS_STATUS_WAIT_PROTM_PEND_SHIFT) & CS_STATUS_WAIT_PROTM_PEND_MASK)) +#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT 30 +#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT) +#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(reg_val) \ + (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT) +#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK) | \ + (((value) << CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK)) #define CS_STATUS_WAIT_SYNC_WAIT_SHIFT 31 #define CS_STATUS_WAIT_SYNC_WAIT_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SHIFT) #define CS_STATUS_WAIT_SYNC_WAIT_GET(reg_val) \ @@ -1590,4 +1626,43 @@ ((GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT) & \ GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK)) +/* GLB_DEBUG_REQ register */ +#define GLB_DEBUG_REQ_DEBUG_RUN_SHIFT GPU_U(23) +#define GLB_DEBUG_REQ_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) +#define GLB_DEBUG_REQ_DEBUG_RUN_GET(reg_val) \ + (((reg_val)&GLB_DEBUG_REQ_DEBUG_RUN_MASK) >> GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) +#define GLB_DEBUG_REQ_DEBUG_RUN_SET(reg_val, value) \ + (((reg_val) & ~GLB_DEBUG_REQ_DEBUG_RUN_MASK) | \ + (((value) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) & GLB_DEBUG_REQ_DEBUG_RUN_MASK)) + +#define GLB_DEBUG_REQ_RUN_MODE_SHIFT GPU_U(24) +#define GLB_DEBUG_REQ_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) +#define GLB_DEBUG_REQ_RUN_MODE_GET(reg_val) \ + (((reg_val)&GLB_DEBUG_REQ_RUN_MODE_MASK) >> GLB_DEBUG_REQ_RUN_MODE_SHIFT) +#define GLB_DEBUG_REQ_RUN_MODE_SET(reg_val, value) \ + (((reg_val) & ~GLB_DEBUG_REQ_RUN_MODE_MASK) | \ + (((value) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) & GLB_DEBUG_REQ_RUN_MODE_MASK)) + +/* GLB_DEBUG_ACK register */ +#define GLB_DEBUG_ACK_DEBUG_RUN_SHIFT GPU_U(23) +#define GLB_DEBUG_ACK_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) +#define GLB_DEBUG_ACK_DEBUG_RUN_GET(reg_val) \ + (((reg_val)&GLB_DEBUG_ACK_DEBUG_RUN_MASK) >> GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) +#define GLB_DEBUG_ACK_DEBUG_RUN_SET(reg_val, value) \ + (((reg_val) & ~GLB_DEBUG_ACK_DEBUG_RUN_MASK) | \ + (((value) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) & GLB_DEBUG_ACK_DEBUG_RUN_MASK)) + +#define GLB_DEBUG_ACK_RUN_MODE_SHIFT GPU_U(24) +#define GLB_DEBUG_ACK_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_ACK_RUN_MODE_SHIFT) +#define GLB_DEBUG_ACK_RUN_MODE_GET(reg_val) \ + (((reg_val)&GLB_DEBUG_ACK_RUN_MODE_MASK) >> GLB_DEBUG_ACK_RUN_MODE_SHIFT) +#define GLB_DEBUG_ACK_RUN_MODE_SET(reg_val, value) \ + (((reg_val) & ~GLB_DEBUG_ACK_RUN_MODE_MASK) | \ + (((value) << GLB_DEBUG_ACK_RUN_MODE_SHIFT) & GLB_DEBUG_ACK_RUN_MODE_MASK)) + +/* RUN_MODE values */ +#define GLB_DEBUG_RUN_MODE_TYPE_NOP 0x0 +#define GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP 0x1 +/* End of RUN_MODE values */ + #endif /* _KBASE_CSF_REGISTERS_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c index b3cdef7dae52..135d3b01a2ff 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c @@ -32,6 +32,7 @@ #include #include #include "mali_kbase_csf_tiler_heap_reclaim.h" +#include "mali_kbase_csf_mcu_shared_reg.h" /* Value to indicate that a queue group is not groups_to_schedule list */ #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX) @@ -57,6 +58,9 @@ /* Time to wait for completion of PING req before considering MCU as hung */ #define FW_PING_AFTER_ERROR_TIMEOUT_MS (10) +/* Explicitly defining this blocked_reason code as SB_WAIT for clarity */ +#define CS_STATUS_BLOCKED_ON_SB_WAIT CS_STATUS_BLOCKED_REASON_REASON_WAIT + static int scheduler_group_schedule(struct kbase_queue_group *group); static void remove_group_from_idle_wait(struct kbase_queue_group *const group); static @@ -561,7 +565,7 @@ void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) * updated whilst gpu_idle_worker() is executing. */ scheduler->fast_gpu_idle_handling = - (kbdev->csf.gpu_idle_hysteresis_ms == 0) || + (kbdev->csf.gpu_idle_hysteresis_us == 0) || !kbase_csf_scheduler_all_csgs_idle(kbdev); /* The GPU idle worker relies on update_on_slot_queues_offsets() to have @@ -1458,6 +1462,7 @@ int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue) err = sched_halt_stream(queue); unassign_user_doorbell_from_queue(kbdev, queue); + kbase_csf_mcu_shared_drop_stopped_queue(kbdev, queue); } mutex_unlock(&kbdev->csf.scheduler.lock); @@ -1575,17 +1580,15 @@ static void program_cs(struct kbase_device *kbdev, kbase_csf_firmware_cs_input(stream, CS_SIZE, queue->size); - user_input = (queue->reg->start_pfn << PAGE_SHIFT); - kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, - user_input & 0xFFFFFFFF); - kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, - user_input >> 32); + user_input = queue->user_io_gpu_va; + WARN_ONCE(!user_input && queue->enabled, "Enabled queue should have a valid gpu_va"); - user_output = ((queue->reg->start_pfn + 1) << PAGE_SHIFT); - kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, - user_output & 0xFFFFFFFF); - kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, - user_output >> 32); + kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, user_input & 0xFFFFFFFF); + kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, user_input >> 32); + + user_output = user_input + PAGE_SIZE; + kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, user_output & 0xFFFFFFFF); + kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, user_output >> 32); kbase_csf_firmware_cs_input(stream, CS_CONFIG, (queue->doorbell_nr << 8) | (queue->priority & 0xF)); @@ -1616,8 +1619,10 @@ static void program_cs(struct kbase_device *kbdev, * or protected mode switch. */ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, - CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK, - CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK); + CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK | + CS_REQ_IDLE_SHARED_SB_DEC_MASK, + CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK | + CS_REQ_IDLE_SHARED_SB_DEC_MASK); /* Set state to START/STOP */ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, @@ -1632,6 +1637,20 @@ static void program_cs(struct kbase_device *kbdev, update_hw_active(queue, true); } +static int onslot_csg_add_new_queue(struct kbase_queue *queue) +{ + struct kbase_device *kbdev = queue->kctx->kbdev; + int err; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + err = kbase_csf_mcu_shared_add_queue(kbdev, queue); + if (!err) + program_cs(kbdev, queue, true); + + return err; +} + int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) { struct kbase_queue_group *group = queue->group; @@ -1687,8 +1706,28 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) * user door-bell on such a case. */ kbase_csf_ring_cs_user_doorbell(kbdev, queue); - } else - program_cs(kbdev, queue, true); + } else { + err = onslot_csg_add_new_queue(queue); + /* For an on slot CSG, the only error in adding a new + * queue to run is that the scheduler could not map + * the required userio pages due to likely some resource + * issues. In such a case, and if the group is yet + * to enter its fatal error state, we return a -EBUSY + * to the submitter for another kick. The queue itself + * has yet to be programmed hence needs to remain its + * previous (disabled) state. If the error persists, + * the group will eventually reports a fatal error by + * the group's error reporting mechanism, when the MCU + * shared region map retry limit of the group is + * exceeded. For such a case, the expected error value + * is -EIO. + */ + if (unlikely(err)) { + queue->enabled = cs_enabled; + mutex_unlock(&kbdev->csf.scheduler.lock); + return (err != -EIO) ? -EBUSY : err; + } + } } queue_delayed_work(system_long_wq, &kbdev->csf.scheduler.ping_work, msecs_to_jiffies(kbase_get_timeout_ms( @@ -1899,9 +1938,12 @@ static bool evaluate_sync_update(struct kbase_queue *queue) struct kbase_vmap_struct *mapping; bool updated = false; u32 *sync_ptr; + u32 sync_wait_size; + u32 sync_wait_align_mask; u32 sync_wait_cond; u32 sync_current_val; struct kbase_device *kbdev; + bool sync_wait_align_valid = false; bool sync_wait_cond_valid = false; if (WARN_ON(!queue)) @@ -1911,6 +1953,16 @@ static bool evaluate_sync_update(struct kbase_queue *queue) lockdep_assert_held(&kbdev->csf.scheduler.lock); + sync_wait_size = CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(queue->status_wait); + sync_wait_align_mask = + (sync_wait_size == 0 ? BASEP_EVENT32_ALIGN_BYTES : BASEP_EVENT64_ALIGN_BYTES) - 1; + sync_wait_align_valid = ((uintptr_t)queue->sync_ptr & sync_wait_align_mask) == 0; + if (!sync_wait_align_valid) { + dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX is misaligned", + queue->sync_ptr); + goto out; + } + sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr, &mapping); @@ -1995,7 +2047,7 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo, KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, queue, status); - if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) { + if (CS_STATUS_WAIT_SYNC_WAIT_GET(status) || CS_STATUS_WAIT_SB_MASK_GET(status)) { queue->status_wait = status; queue->sync_ptr = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT_SYNC_POINTER_LO); @@ -2011,7 +2063,8 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo, kbase_csf_firmware_cs_output(stream, CS_STATUS_BLOCKED_REASON)); - if (!evaluate_sync_update(queue)) { + if ((queue->blocked_reason == CS_STATUS_BLOCKED_ON_SB_WAIT) || + !evaluate_sync_update(queue)) { is_waiting = true; } else { /* Sync object already got updated & met the condition @@ -2305,7 +2358,7 @@ static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler, insert_group_to_idle_wait(group); } -static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group *group) +static void update_offslot_non_idle_cnt(struct kbase_queue_group *group) { struct kbase_device *kbdev = group->kctx->kbdev; struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; @@ -2442,9 +2495,14 @@ static void save_csg_slot(struct kbase_queue_group *group) if (!queue || !queue->enabled) continue; - if (save_slot_cs(ginfo, queue)) - sync_wait = true; - else { + if (save_slot_cs(ginfo, queue)) { + /* sync_wait is only true if the queue is blocked on + * a CQS and not a scoreboard. + */ + if (queue->blocked_reason != + CS_STATUS_BLOCKED_ON_SB_WAIT) + sync_wait = true; + } else { /* Need to confirm if ringbuffer of the GPU * queue is empty or not. A race can arise * between the flush of GPU queue and suspend @@ -2558,6 +2616,11 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group) KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot); + /* Notify the group is off-slot and the csg_reg might be available for + * resue with other groups in a 'lazy unbinding' style. + */ + kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group); + return as_fault; } @@ -2641,8 +2704,8 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, u32 state; int i; unsigned long flags; - const u64 normal_suspend_buf = - group->normal_suspend_buf.reg->start_pfn << PAGE_SHIFT; + u64 normal_suspend_buf; + u64 protm_suspend_buf; struct kbase_csf_csg_slot *csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; @@ -2654,6 +2717,19 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY); + if (unlikely(kbase_csf_mcu_shared_group_bind_csg_reg(kbdev, group))) { + dev_warn(kbdev->dev, + "Couldn't bind MCU shared csg_reg for group %d of context %d_%d, slot=%u", + group->handle, group->kctx->tgid, kctx->id, slot); + kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group); + return; + } + + /* The suspend buf has already been mapped through binding to csg_reg */ + normal_suspend_buf = group->normal_suspend_buf.gpu_va; + protm_suspend_buf = group->protected_suspend_buf.gpu_va; + WARN_ONCE(!normal_suspend_buf, "Normal suspend buffer not mapped"); + ginfo = &global_iface->groups[slot]; /* Pick an available address space for this context */ @@ -2666,6 +2742,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, if (kctx->as_nr == KBASEP_AS_NR_INVALID) { dev_dbg(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n", group->handle, kctx->tgid, kctx->id, slot); + kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group); return; } @@ -2716,15 +2793,15 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI, normal_suspend_buf >> 32); - if (group->protected_suspend_buf.reg) { - const u64 protm_suspend_buf = - group->protected_suspend_buf.reg->start_pfn << - PAGE_SHIFT; - kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, - protm_suspend_buf & U32_MAX); - kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, - protm_suspend_buf >> 32); - } + /* Note, we program the P-mode buffer pointer here, but actual runtime + * enter into pmode execution is controlled by the P-mode phy pages are + * allocated and mapped with the bound csg_reg, which has a specific flag + * for indicating this P-mode runnable condition before a group is + * granted its p-mode section entry. Without a P-mode entry, the buffer + * pointed is not going to be accessed at all. + */ + kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, protm_suspend_buf & U32_MAX); + kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, protm_suspend_buf >> 32); if (group->dvs_buf) { kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_LO, @@ -2777,6 +2854,9 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, /* Programming a slot consumes a group from scanout */ update_offslot_non_idle_cnt_for_onslot_grp(group); + + /* Notify the group's bound csg_reg is now in active use */ + kbase_csf_mcu_shared_set_group_csg_reg_active(kbdev, group); } static void remove_scheduled_group(struct kbase_device *kbdev, @@ -2797,7 +2877,7 @@ static void remove_scheduled_group(struct kbase_device *kbdev, } static void sched_evict_group(struct kbase_queue_group *group, bool fault, - bool update_non_idle_offslot_grps_cnt) + bool update_non_idle_offslot_grps_cnt_from_run_state) { struct kbase_context *kctx = group->kctx; struct kbase_device *kbdev = kctx->kbdev; @@ -2808,7 +2888,7 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, if (queue_group_scheduled_locked(group)) { u32 i; - if (update_non_idle_offslot_grps_cnt && + if (update_non_idle_offslot_grps_cnt_from_run_state && (group->run_state == KBASE_CSF_GROUP_SUSPENDED || group->run_state == KBASE_CSF_GROUP_RUNNABLE)) { int new_val = atomic_dec_return( @@ -2823,8 +2903,11 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, } if (group->prepared_seq_num != - KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) + KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) { + if (!update_non_idle_offslot_grps_cnt_from_run_state) + update_offslot_non_idle_cnt(group); remove_scheduled_group(kbdev, group); + } if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) remove_group_from_idle_wait(group); @@ -2851,6 +2934,9 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, } kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(group); + + /* Clear all the bound shared regions and unmap any in-place MMU maps */ + kbase_csf_mcu_shared_clear_evicted_group_csg_reg(kbdev, group); } static int term_group_sync(struct kbase_queue_group *group) @@ -3230,8 +3316,7 @@ static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev, scheduler->remaining_tick_slots--; } } else { - update_offslot_non_idle_cnt_for_faulty_grp( - group); + update_offslot_non_idle_cnt(group); remove_scheduled_group(kbdev, group); } } @@ -3421,8 +3506,6 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) */ clear_bit(i, slot_mask); set_bit(i, scheduler->csgs_events_enable_mask); - update_offslot_non_idle_cnt_for_onslot_grp( - group); } suspend_wait_failed = true; @@ -3882,11 +3965,16 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, struct kbase_queue_group *const input_grp) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct kbase_protected_suspend_buffer *sbuf = &input_grp->protected_suspend_buf; unsigned long flags; bool protm_in_use; lockdep_assert_held(&scheduler->lock); + /* Return early if the physical pages have not been allocated yet */ + if (unlikely(!sbuf->pma)) + return; + /* This lock is taken to prevent the issuing of MMU command during the * transition to protected mode. This helps avoid the scenario where the * entry to protected mode happens with a memory region being locked and @@ -3945,6 +4033,15 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER, input_grp, 0u); +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + + /* Coresight must be disabled before entering protected mode. */ + kbase_debug_coresight_csf_disable_pmode_enter(kbdev); + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + kbase_csf_enter_protected_mode(kbdev); /* Set the pending protm seq number to the next one */ protm_enter_set_next_pending_seq(kbdev); @@ -4057,8 +4154,7 @@ static void scheduler_apply(struct kbase_device *kbdev) if (!kctx_as_enabled(group->kctx) || group->faulted) { /* Drop the head group and continue */ - update_offslot_non_idle_cnt_for_faulty_grp( - group); + update_offslot_non_idle_cnt(group); remove_scheduled_group(kbdev, group); continue; } @@ -4337,6 +4433,8 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, set_bit(i, csg_bitmap); } else { group->run_state = KBASE_CSF_GROUP_RUNNABLE; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, + group->run_state); } } @@ -5170,16 +5268,12 @@ redo_local_tock: * queue jobs. */ if (protm_grp && scheduler->top_grp == protm_grp) { - int new_val; - dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d", protm_grp->handle); - new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, protm_grp, - new_val); - spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + update_offslot_non_idle_cnt_for_onslot_grp(protm_grp); + remove_scheduled_group(kbdev, protm_grp); scheduler_check_pmode_progress(kbdev); } else if (scheduler->top_grp) { if (protm_grp) @@ -5993,8 +6087,11 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group) mutex_lock(&scheduler->lock); - if (group->run_state == KBASE_CSF_GROUP_IDLE) + if (group->run_state == KBASE_CSF_GROUP_IDLE) { group->run_state = KBASE_CSF_GROUP_RUNNABLE; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, + group->run_state); + } /* Check if the group is now eligible for execution in protected mode. */ if (scheduler_get_protm_enter_async_group(kbdev, group)) scheduler_group_check_protm_enter(kbdev, group); @@ -6262,6 +6359,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) int priority; int err; + kbase_ctx_sched_init_ctx(kctx); + for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++priority) { INIT_LIST_HEAD(&kctx->csf.sched.runnable_groups[priority]); @@ -6278,7 +6377,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) if (!kctx->csf.sched.sync_update_wq) { dev_err(kctx->kbdev->dev, "Failed to initialize scheduler context workqueue"); - return -ENOMEM; + err = -ENOMEM; + goto alloc_wq_failed; } INIT_WORK(&kctx->csf.sched.sync_update_work, @@ -6291,10 +6391,16 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) if (err) { dev_err(kctx->kbdev->dev, "Failed to register a sync update callback"); - destroy_workqueue(kctx->csf.sched.sync_update_wq); + goto event_wait_add_failed; } return err; + +event_wait_add_failed: + destroy_workqueue(kctx->csf.sched.sync_update_wq); +alloc_wq_failed: + kbase_ctx_sched_remove_ctx(kctx); + return err; } void kbase_csf_scheduler_context_term(struct kbase_context *kctx) @@ -6302,6 +6408,8 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx) kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx); cancel_work_sync(&kctx->csf.sched.sync_update_work); destroy_workqueue(kctx->csf.sched.sync_update_wq); + + kbase_ctx_sched_remove_ctx(kctx); } int kbase_csf_scheduler_init(struct kbase_device *kbdev) @@ -6320,7 +6428,7 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev) return -ENOMEM; } - return 0; + return kbase_csf_mcu_shared_regs_data_init(kbdev); } int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) @@ -6420,6 +6528,8 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev) } KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_TERMINATED, NULL, kbase_csf_scheduler_get_nr_active_csgs(kbdev)); + /* Terminating the MCU shared regions, following the release of slots */ + kbase_csf_mcu_shared_regs_data_term(kbdev); } void kbase_csf_scheduler_early_term(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c new file mode 100644 index 000000000000..a5e0ab5eaf17 --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c @@ -0,0 +1,788 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase_csf_sync_debugfs.h" +#include "mali_kbase_csf_csg_debugfs.h" +#include +#include + +#if IS_ENABLED(CONFIG_SYNC_FILE) +#include "mali_kbase_sync.h" +#endif + +#if IS_ENABLED(CONFIG_DEBUG_FS) + +#define CQS_UNREADABLE_LIVE_VALUE "(unavailable)" + +/* GPU queue related values */ +#define GPU_CSF_MOVE_OPCODE ((u64)0x1) +#define GPU_CSF_MOVE32_OPCODE ((u64)0x2) +#define GPU_CSF_SYNC_ADD_OPCODE ((u64)0x25) +#define GPU_CSF_SYNC_SET_OPCODE ((u64)0x26) +#define GPU_CSF_SYNC_WAIT_OPCODE ((u64)0x27) +#define GPU_CSF_SYNC_ADD64_OPCODE ((u64)0x33) +#define GPU_CSF_SYNC_SET64_OPCODE ((u64)0x34) +#define GPU_CSF_SYNC_WAIT64_OPCODE ((u64)0x35) +#define GPU_CSF_CALL_OPCODE ((u64)0x20) + +#define MAX_NR_GPU_CALLS (5) +#define INSTR_OPCODE_MASK ((u64)0xFF << 56) +#define INSTR_OPCODE_GET(value) ((value & INSTR_OPCODE_MASK) >> 56) +#define MOVE32_IMM_MASK ((u64)0xFFFFFFFFFUL) +#define MOVE_DEST_MASK ((u64)0xFF << 48) +#define MOVE_DEST_GET(value) ((value & MOVE_DEST_MASK) >> 48) +#define MOVE_IMM_MASK ((u64)0xFFFFFFFFFFFFUL) +#define SYNC_SRC0_MASK ((u64)0xFF << 40) +#define SYNC_SRC1_MASK ((u64)0xFF << 32) +#define SYNC_SRC0_GET(value) (u8)((value & SYNC_SRC0_MASK) >> 40) +#define SYNC_SRC1_GET(value) (u8)((value & SYNC_SRC1_MASK) >> 32) +#define SYNC_WAIT_CONDITION_MASK ((u64)0xF << 28) +#define SYNC_WAIT_CONDITION_GET(value) (u8)((value & SYNC_WAIT_CONDITION_MASK) >> 28) + +/* Enumeration for types of GPU queue sync events for + * the purpose of dumping them through debugfs. + */ +enum debugfs_gpu_sync_type { + DEBUGFS_GPU_SYNC_WAIT, + DEBUGFS_GPU_SYNC_SET, + DEBUGFS_GPU_SYNC_ADD, + NUM_DEBUGFS_GPU_SYNC_TYPES +}; + +/** + * kbasep_csf_debugfs_get_cqs_live_u32() - Obtain live (u32) value for a CQS object. + * + * @kctx: The context of the queue. + * @obj_addr: Pointer to the CQS live 32-bit value. + * @live_val: Pointer to the u32 that will be set to the CQS object's current, live + * value. + * + * Return: 0 if successful or a negative error code on failure. + */ +static int kbasep_csf_debugfs_get_cqs_live_u32(struct kbase_context *kctx, u64 obj_addr, + u32 *live_val) +{ + struct kbase_vmap_struct *mapping; + u32 *const cpu_ptr = (u32 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping); + + if (!cpu_ptr) + return -1; + + *live_val = *cpu_ptr; + kbase_phy_alloc_mapping_put(kctx, mapping); + return 0; +} + +/** + * kbasep_csf_debugfs_get_cqs_live_u64() - Obtain live (u64) value for a CQS object. + * + * @kctx: The context of the queue. + * @obj_addr: Pointer to the CQS live value (32 or 64-bit). + * @live_val: Pointer to the u64 that will be set to the CQS object's current, live + * value. + * + * Return: 0 if successful or a negative error code on failure. + */ +static int kbasep_csf_debugfs_get_cqs_live_u64(struct kbase_context *kctx, u64 obj_addr, + u64 *live_val) +{ + struct kbase_vmap_struct *mapping; + u64 *cpu_ptr = (u64 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping); + + if (!cpu_ptr) + return -1; + + *live_val = *cpu_ptr; + kbase_phy_alloc_mapping_put(kctx, mapping); + return 0; +} + +/** + * kbasep_csf_sync_print_kcpu_fence_wait_or_signal() - Print details of a CSF SYNC Fence Wait + * or Fence Signal command, contained in a + * KCPU queue. + * + * @file: The seq_file for printing to. + * @cmd: The KCPU Command to be printed. + * @cmd_name: The name of the command: indicates either a fence SIGNAL or WAIT. + */ +static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(struct seq_file *file, + struct kbase_kcpu_command *cmd, + const char *cmd_name) +{ +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence = NULL; +#else + struct dma_fence *fence = NULL; +#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ + + struct kbase_sync_fence_info info; + const char *timeline_name = NULL; + bool is_signaled = false; + + fence = cmd->info.fence.fence; + if (WARN_ON(!fence)) + return; + + kbase_sync_fence_info_get(cmd->info.fence.fence, &info); + timeline_name = fence->ops->get_timeline_name(fence); + is_signaled = info.status > 0; + + seq_printf(file, "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, cmd->info.fence.fence, + is_signaled); + + /* Note: fence->seqno was u32 until 5.1 kernel, then u64 */ + seq_printf(file, "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx", + timeline_name, fence->context, (u64)fence->seqno); +} + +/** + * kbasep_csf_sync_print_kcpu_cqs_wait() - Print details of a CSF SYNC CQS Wait command, + * contained in a KCPU queue. + * + * @file: The seq_file for printing to. + * @cmd: The KCPU Command to be printed. + */ +static void kbasep_csf_sync_print_kcpu_cqs_wait(struct seq_file *file, + struct kbase_kcpu_command *cmd) +{ + struct kbase_context *kctx = file->private; + size_t i; + + for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) { + struct base_cqs_wait_info *cqs_obj = &cmd->info.cqs_wait.objs[i]; + + u32 live_val; + int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val); + bool live_val_valid = (ret >= 0); + + seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); + + if (live_val_valid) + seq_printf(file, "0x%.16llx", (u64)live_val); + else + seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); + + seq_printf(file, " | op:gt arg_value:0x%.8x", cqs_obj->val); + } +} + +/** + * kbasep_csf_sync_print_kcpu_cqs_set() - Print details of a CSF SYNC CQS + * Set command, contained in a KCPU queue. + * + * @file: The seq_file for printing to. + * @cmd: The KCPU Command to be printed. + */ +static void kbasep_csf_sync_print_kcpu_cqs_set(struct seq_file *file, + struct kbase_kcpu_command *cmd) +{ + struct kbase_context *kctx = file->private; + size_t i; + + for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) { + struct base_cqs_set *cqs_obj = &cmd->info.cqs_set.objs[i]; + + u32 live_val; + int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val); + bool live_val_valid = (ret >= 0); + + seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); + + if (live_val_valid) + seq_printf(file, "0x%.16llx", (u64)live_val); + else + seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); + + seq_printf(file, " | op:add arg_value:0x%.8x", 1); + } +} + +/** + * kbasep_csf_sync_get_wait_op_name() - Print the name of a CQS Wait Operation. + * + * @op: The numerical value of operation. + * + * Return: const static pointer to the command name, or '??' if unknown. + */ +static const char *kbasep_csf_sync_get_wait_op_name(basep_cqs_wait_operation_op op) +{ + const char *string; + + switch (op) { + case BASEP_CQS_WAIT_OPERATION_LE: + string = "le"; + break; + case BASEP_CQS_WAIT_OPERATION_GT: + string = "gt"; + break; + default: + string = "??"; + break; + } + return string; +} + +/** + * kbasep_csf_sync_get_set_op_name() - Print the name of a CQS Set Operation. + * + * @op: The numerical value of operation. + * + * Return: const static pointer to the command name, or '??' if unknown. + */ +static const char *kbasep_csf_sync_get_set_op_name(basep_cqs_set_operation_op op) +{ + const char *string; + + switch (op) { + case BASEP_CQS_SET_OPERATION_ADD: + string = "add"; + break; + case BASEP_CQS_SET_OPERATION_SET: + string = "set"; + break; + default: + string = "???"; + break; + } + return string; +} + +/** + * kbasep_csf_sync_print_kcpu_cqs_wait_op() - Print details of a CSF SYNC CQS + * Wait Operation command, contained + * in a KCPU queue. + * + * @file: The seq_file for printing to. + * @cmd: The KCPU Command to be printed. + */ +static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct seq_file *file, + struct kbase_kcpu_command *cmd) +{ + size_t i; + struct kbase_context *kctx = file->private; + + for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) { + struct base_cqs_wait_operation_info *wait_op = + &cmd->info.cqs_wait_operation.objs[i]; + const char *op_name = kbasep_csf_sync_get_wait_op_name(wait_op->operation); + + u64 live_val; + int ret = kbasep_csf_debugfs_get_cqs_live_u64(kctx, wait_op->addr, &live_val); + + bool live_val_valid = (ret >= 0); + + seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr); + + if (live_val_valid) + seq_printf(file, "0x%.16llx", live_val); + else + seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); + + seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, wait_op->val); + } +} + +/** + * kbasep_csf_sync_print_kcpu_cqs_set_op() - Print details of a CSF SYNC CQS + * Set Operation command, contained + * in a KCPU queue. + * + * @file: The seq_file for printing to. + * @cmd: The KCPU Command to be printed. + */ +static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct seq_file *file, + struct kbase_kcpu_command *cmd) +{ + size_t i; + struct kbase_context *kctx = file->private; + + for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) { + struct base_cqs_set_operation_info *set_op = &cmd->info.cqs_set_operation.objs[i]; + const char *op_name = kbasep_csf_sync_get_set_op_name( + (basep_cqs_set_operation_op)set_op->operation); + + u64 live_val; + int ret = kbasep_csf_debugfs_get_cqs_live_u64(kctx, set_op->addr, &live_val); + + bool live_val_valid = (ret >= 0); + + seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr); + + if (live_val_valid) + seq_printf(file, "0x%.16llx", live_val); + else + seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); + + seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, set_op->val); + } +} + +/** + * kbasep_csf_kcpu_debugfs_print_queue() - Print debug data for a KCPU queue + * + * @file: The seq_file to print to. + * @queue: Pointer to the KCPU queue. + */ +static void kbasep_csf_sync_kcpu_debugfs_print_queue(struct seq_file *file, + struct kbase_kcpu_command_queue *queue) +{ + char started_or_pending; + struct kbase_kcpu_command *cmd; + struct kbase_context *kctx = file->private; + size_t i; + + if (WARN_ON(!queue)) + return; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + mutex_lock(&queue->lock); + + for (i = 0; i != queue->num_pending_cmds; ++i) { + started_or_pending = ((i == 0) && queue->command_started) ? 'S' : 'P'; + seq_printf(file, "queue:KCPU-%u-%u exec:%c ", kctx->id, queue->id, + started_or_pending); + + cmd = &queue->commands[queue->start_offset + i]; + switch (cmd->type) { +#if IS_ENABLED(CONFIG_SYNC_FILE) + case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: + kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_SIGNAL"); + break; + case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: + kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_WAIT"); + break; +#endif + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: + kbasep_csf_sync_print_kcpu_cqs_wait(file, cmd); + break; + case BASE_KCPU_COMMAND_TYPE_CQS_SET: + kbasep_csf_sync_print_kcpu_cqs_set(file, cmd); + break; + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: + kbasep_csf_sync_print_kcpu_cqs_wait_op(file, cmd); + break; + case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: + kbasep_csf_sync_print_kcpu_cqs_set_op(file, cmd); + break; + default: + seq_puts(file, ", U, Unknown blocking command"); + break; + } + + seq_puts(file, "\n"); + } + + mutex_unlock(&queue->lock); +} + +/** + * kbasep_csf_sync_kcpu_debugfs_show() - Print CSF KCPU queue sync info + * + * @file: The seq_file for printing to. + * + * Return: Negative error code or 0 on success. + */ +static int kbasep_csf_sync_kcpu_debugfs_show(struct seq_file *file) +{ + struct kbase_context *kctx = file->private; + unsigned long queue_idx; + + mutex_lock(&kctx->csf.kcpu_queues.lock); + seq_printf(file, "KCPU queues for ctx %u:\n", kctx->id); + + queue_idx = find_first_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES); + + while (queue_idx < KBASEP_MAX_KCPU_QUEUES) { + kbasep_csf_sync_kcpu_debugfs_print_queue(file, + kctx->csf.kcpu_queues.array[queue_idx]); + + queue_idx = find_next_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES, + queue_idx + 1); + } + + mutex_unlock(&kctx->csf.kcpu_queues.lock); + return 0; +} + +/** + * kbasep_csf_get_move_immediate_value() - Get the immediate values for sync operations + * from a MOVE instruction. + * + * @move_cmd: Raw MOVE instruction. + * @sync_addr_reg: Register identifier from SYNC_* instruction. + * @compare_val_reg: Register identifier from SYNC_* instruction. + * @sync_val: Pointer to store CQS object address for sync operation. + * @compare_val: Pointer to store compare value for sync operation. + * + * Return: True if value is obtained by checking for correct register identifier, + * or false otherwise. + */ +static bool kbasep_csf_get_move_immediate_value(u64 move_cmd, u64 sync_addr_reg, + u64 compare_val_reg, u64 *sync_val, + u64 *compare_val) +{ + u64 imm_mask; + + /* Verify MOVE instruction and get immediate mask */ + if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE32_OPCODE) + imm_mask = MOVE32_IMM_MASK; + else if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE_OPCODE) + imm_mask = MOVE_IMM_MASK; + else + /* Error return */ + return false; + + /* Verify value from MOVE instruction and assign to variable */ + if (sync_addr_reg == MOVE_DEST_GET(move_cmd)) + *sync_val = move_cmd & imm_mask; + else if (compare_val_reg == MOVE_DEST_GET(move_cmd)) + *compare_val = move_cmd & imm_mask; + else + /* Error return */ + return false; + + return true; +} + +/** kbasep_csf_read_ringbuffer_value() - Reads a u64 from the ringbuffer at a provided + * offset. + * + * @queue: Pointer to the queue. + * @ringbuff_offset: Ringbuffer offset. + * + * Return: the u64 in the ringbuffer at the desired offset. + */ +static u64 kbasep_csf_read_ringbuffer_value(struct kbase_queue *queue, u32 ringbuff_offset) +{ + u64 page_off = ringbuff_offset >> PAGE_SHIFT; + u64 offset_within_page = ringbuff_offset & ~PAGE_MASK; + struct page *page = as_page(queue->queue_reg->gpu_alloc->pages[page_off]); + u64 *ringbuffer = kmap_atomic(page); + u64 value = ringbuffer[offset_within_page / sizeof(u64)]; + + kunmap_atomic(ringbuffer); + return value; +} + +/** + * kbasep_csf_print_gpu_sync_op() - Print sync operation info for given sync command. + * + * @file: Pointer to debugfs seq_file file struct for writing output. + * @kctx: Pointer to kbase context. + * @queue: Pointer to the GPU command queue. + * @ringbuff_offset: Offset to index the ring buffer with, for the given sync command. + * (Useful for finding preceding MOVE commands) + * @sync_cmd: Entire u64 of the sync command, which has both sync address and + * comparison-value encoded in it. + * @type: Type of GPU sync command (e.g. SYNC_SET, SYNC_ADD, SYNC_WAIT). + * @is_64bit: Bool to indicate if operation is 64 bit (true) or 32 bit (false). + * @follows_wait: Bool to indicate if the operation follows at least one wait + * operation. Used to determine whether it's pending or started. + */ +static void kbasep_csf_print_gpu_sync_op(struct seq_file *file, struct kbase_context *kctx, + struct kbase_queue *queue, u32 ringbuff_offset, + u64 sync_cmd, enum debugfs_gpu_sync_type type, + bool is_64bit, bool follows_wait) +{ + u64 sync_addr = 0, compare_val = 0, live_val = 0; + u64 move_cmd; + u8 sync_addr_reg, compare_val_reg, wait_condition = 0; + int err; + + static const char *const gpu_sync_type_name[] = { "SYNC_WAIT", "SYNC_SET", "SYNC_ADD" }; + static const char *const gpu_sync_type_op[] = { + "wait", /* This should never be printed, only included to simplify indexing */ + "set", "add" + }; + + if (type >= NUM_DEBUGFS_GPU_SYNC_TYPES) { + dev_warn(kctx->kbdev->dev, "Expected GPU queue sync type is unknown!"); + return; + } + + /* We expect there to be at least 2 preceding MOVE instructions, and + * Base will always arrange for the 2 MOVE + SYNC instructions to be + * contiguously located, and is therefore never expected to be wrapped + * around the ringbuffer boundary. + */ + if (unlikely(ringbuff_offset < (2 * sizeof(u64)))) { + dev_warn(kctx->kbdev->dev, + "Unexpected wraparound detected between %s & MOVE instruction", + gpu_sync_type_name[type]); + return; + } + + /* 1. Get Register identifiers from SYNC_* instruction */ + sync_addr_reg = SYNC_SRC0_GET(sync_cmd); + compare_val_reg = SYNC_SRC1_GET(sync_cmd); + + /* 2. Get values from first MOVE command */ + ringbuff_offset -= sizeof(u64); + move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset); + if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg, + &sync_addr, &compare_val)) + return; + + /* 3. Get values from next MOVE command */ + ringbuff_offset -= sizeof(u64); + move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset); + if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg, + &sync_addr, &compare_val)) + return; + + /* 4. Get CQS object value */ + if (is_64bit) + err = kbasep_csf_debugfs_get_cqs_live_u64(kctx, sync_addr, &live_val); + else + err = kbasep_csf_debugfs_get_cqs_live_u32(kctx, sync_addr, (u32 *)(&live_val)); + + if (err) + return; + + /* 5. Print info */ + seq_printf(file, "queue:GPU-%u-%u-%u exec:%c cmd:%s ", kctx->id, queue->group->handle, + queue->csi_index, queue->enabled && !follows_wait ? 'S' : 'P', + gpu_sync_type_name[type]); + + if (queue->group->csg_nr == KBASEP_CSG_NR_INVALID) + seq_puts(file, "slot:-"); + else + seq_printf(file, "slot:%d", (int)queue->group->csg_nr); + + seq_printf(file, " obj:0x%.16llx live_value:0x%.16llx | ", sync_addr, live_val); + + if (type == DEBUGFS_GPU_SYNC_WAIT) { + wait_condition = SYNC_WAIT_CONDITION_GET(sync_cmd); + seq_printf(file, "op:%s ", kbasep_csf_sync_get_wait_op_name(wait_condition)); + } else + seq_printf(file, "op:%s ", gpu_sync_type_op[type]); + + seq_printf(file, "arg_value:0x%.16llx\n", compare_val); +} + +/** + * kbasep_csf_dump_active_queue_sync_info() - Print GPU command queue sync information. + * + * @file: seq_file for printing to. + * @queue: Address of a GPU command queue to examine. + * + * This function will iterate through each command in the ring buffer of the given GPU queue from + * CS_EXTRACT, and if is a SYNC_* instruction it will attempt to decode the sync operation and + * print relevant information to the debugfs file. + * This function will stop iterating once the CS_INSERT address is reached by the cursor (i.e. + * when there are no more commands to view) or a number of consumed GPU CALL commands have + * been observed. + */ +static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct kbase_queue *queue) +{ + struct kbase_context *kctx; + u32 *addr; + u64 cs_extract, cs_insert, instr, cursor; + bool follows_wait = false; + int nr_calls = 0; + + if (!queue) + return; + + kctx = queue->kctx; + + addr = (u32 *)queue->user_io_addr; + cs_insert = addr[CS_INSERT_LO / 4] | ((u64)addr[CS_INSERT_HI / 4] << 32); + + addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); + cs_extract = addr[CS_EXTRACT_LO / 4] | ((u64)addr[CS_EXTRACT_HI / 4] << 32); + + cursor = cs_extract; + + if (!is_power_of_2(queue->size)) { + dev_warn(kctx->kbdev->dev, "GPU queue %u size of %u not a power of 2", + queue->csi_index, queue->size); + return; + } + + while ((cursor < cs_insert) && (nr_calls < MAX_NR_GPU_CALLS)) { + bool instr_is_64_bit = false; + /* Calculate offset into ringbuffer from the absolute cursor, + * by finding the remainder of the cursor divided by the + * ringbuffer size. The ringbuffer size is guaranteed to be + * a power of 2, so the remainder can be calculated without an + * explicit modulo. queue->size - 1 is the ringbuffer mask. + */ + u32 cursor_ringbuff_offset = (u32)(cursor & (queue->size - 1)); + + /* Find instruction that cursor is currently on */ + instr = kbasep_csf_read_ringbuffer_value(queue, cursor_ringbuff_offset); + + switch (INSTR_OPCODE_GET(instr)) { + case GPU_CSF_SYNC_ADD64_OPCODE: + case GPU_CSF_SYNC_SET64_OPCODE: + case GPU_CSF_SYNC_WAIT64_OPCODE: + instr_is_64_bit = true; + default: + break; + } + + switch (INSTR_OPCODE_GET(instr)) { + case GPU_CSF_SYNC_ADD_OPCODE: + case GPU_CSF_SYNC_ADD64_OPCODE: + kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset, + instr, DEBUGFS_GPU_SYNC_ADD, instr_is_64_bit, + follows_wait); + break; + case GPU_CSF_SYNC_SET_OPCODE: + case GPU_CSF_SYNC_SET64_OPCODE: + kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset, + instr, DEBUGFS_GPU_SYNC_SET, instr_is_64_bit, + follows_wait); + break; + case GPU_CSF_SYNC_WAIT_OPCODE: + case GPU_CSF_SYNC_WAIT64_OPCODE: + kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset, + instr, DEBUGFS_GPU_SYNC_WAIT, instr_is_64_bit, + follows_wait); + follows_wait = true; /* Future commands will follow at least one wait */ + break; + case GPU_CSF_CALL_OPCODE: + nr_calls++; + /* Fallthrough */ + default: + /* Unrecognized command, skip past it */ + break; + } + + cursor += sizeof(u64); + } +} + +/** + * kbasep_csf_dump_active_group_sync_state() - Prints SYNC commands in all GPU queues of + * the provided queue group. + * + * @file: seq_file for printing to. + * @group: Address of a GPU command group to iterate through. + * + * This function will iterate through each queue in the provided GPU queue group and + * print its SYNC related commands. + */ +static void kbasep_csf_dump_active_group_sync_state(struct seq_file *file, + struct kbase_queue_group *const group) +{ + struct kbase_context *kctx = file->private; + unsigned int i; + + seq_printf(file, "GPU queues for group %u (slot %d) of ctx %d_%d\n", group->handle, + group->csg_nr, kctx->tgid, kctx->id); + + for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) + kbasep_csf_dump_active_queue_sync_info(file, group->bound_queues[i]); +} + +/** + * kbasep_csf_sync_gpu_debugfs_show() - Print CSF GPU queue sync info + * + * @file: The seq_file for printing to. + * + * Return: Negative error code or 0 on success. + */ +static int kbasep_csf_sync_gpu_debugfs_show(struct seq_file *file) +{ + u32 gr; + struct kbase_context *kctx = file->private; + struct kbase_device *kbdev; + + if (WARN_ON(!kctx)) + return -EINVAL; + + kbdev = kctx->kbdev; + kbase_csf_scheduler_lock(kbdev); + kbase_csf_debugfs_update_active_groups_status(kbdev); + + for (gr = 0; gr < kbdev->csf.global_iface.group_num; gr++) { + struct kbase_queue_group *const group = + kbdev->csf.scheduler.csg_slots[gr].resident_group; + if (!group || group->kctx != kctx) + continue; + kbasep_csf_dump_active_group_sync_state(file, group); + } + + kbase_csf_scheduler_unlock(kbdev); + return 0; +} + +/** + * kbasep_csf_sync_debugfs_show() - Print CSF queue sync information + * + * @file: The seq_file for printing to. + * @data: The debugfs dentry private data, a pointer to kbase_context. + * + * Return: Negative error code or 0 on success. + */ +static int kbasep_csf_sync_debugfs_show(struct seq_file *file, void *data) +{ + seq_printf(file, "MALI_CSF_SYNC_DEBUGFS_VERSION: v%u\n", MALI_CSF_SYNC_DEBUGFS_VERSION); + + kbasep_csf_sync_kcpu_debugfs_show(file); + kbasep_csf_sync_gpu_debugfs_show(file); + return 0; +} + +static int kbasep_csf_sync_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbasep_csf_sync_debugfs_show, in->i_private); +} + +static const struct file_operations kbasep_csf_sync_debugfs_fops = { + .open = kbasep_csf_sync_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/** + * kbase_csf_sync_debugfs_init() - Initialise debugfs file. + * + * @kctx: Kernel context pointer. + */ +void kbase_csf_sync_debugfs_init(struct kbase_context *kctx) +{ + struct dentry *file; + const mode_t mode = 0444; + + if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) + return; + + file = debugfs_create_file("csf_sync", mode, kctx->kctx_dentry, kctx, + &kbasep_csf_sync_debugfs_fops); + + if (IS_ERR_OR_NULL(file)) + dev_warn(kctx->kbdev->dev, "Unable to create CSF Sync debugfs entry"); +} + +#else +/* + * Stub functions for when debugfs is disabled + */ +void kbase_csf_sync_debugfs_init(struct kbase_context *kctx) +{ +} + +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h new file mode 100644 index 000000000000..177e15d85341 --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_SYNC_DEBUGFS_H_ +#define _KBASE_CSF_SYNC_DEBUGFS_H_ + +/* Forward declaration */ +struct kbase_context; + +#define MALI_CSF_SYNC_DEBUGFS_VERSION 0 + +/** + * kbase_csf_sync_debugfs_init() - Create a debugfs entry for CSF queue sync info + * + * @kctx: The kbase_context for which to create the debugfs entry + */ +void kbase_csf_sync_debugfs_init(struct kbase_context *kctx); + +#endif /* _KBASE_CSF_SYNC_DEBUGFS_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c index 909362da0047..14d80970ff70 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c @@ -101,7 +101,7 @@ static struct kbase_csf_tiler_heap_chunk *get_last_chunk( * @kctx: kbase context the chunk belongs to. * @chunk: The chunk whose external mappings are going to be removed. * - * This function marks the region as DONT NEED. Along with KBASE_REG_NO_USER_FREE, this indicates + * This function marks the region as DONT NEED. Along with NO_USER_FREE, this indicates * that the VA region is owned by the tiler heap and could potentially be shrunk at any time. Other * parts of kbase outside of tiler heap management should not take references on its physical * pages, and should not modify them. @@ -227,12 +227,14 @@ static void remove_unlinked_chunk(struct kbase_context *kctx, kbase_gpu_vm_lock(kctx); kbase_vunmap(kctx, &chunk->map); /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT - * regions), and so we must clear that flag too before freeing + * regions), and so we must clear that flag too before freeing. + * For "no user free", we check that the refcount is 1 as it is a shrinkable region; + * no other code part within kbase can take a reference to it. */ + WARN_ON(chunk->region->no_user_free_refcnt > 1); + kbase_va_region_no_user_free_put(kctx, chunk->region); #if !defined(CONFIG_MALI_VECTOR_DUMP) - chunk->region->flags &= ~(KBASE_REG_NO_USER_FREE | KBASE_REG_DONT_NEED); -#else - chunk->region->flags &= ~KBASE_REG_NO_USER_FREE; + chunk->region->flags &= ~KBASE_REG_DONT_NEED; #endif kbase_mem_free_region(kctx, chunk->region); kbase_gpu_vm_unlock(kctx); @@ -297,7 +299,7 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context * kbase_gpu_vm_lock(kctx); - /* Some checks done here as KBASE_REG_NO_USER_FREE still allows such things to be made + /* Some checks done here as NO_USER_FREE still allows such things to be made * whilst we had dropped the region lock */ if (unlikely(atomic_read(&chunk->region->gpu_alloc->kernel_mappings) > 0)) { @@ -305,32 +307,45 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context * goto unroll_region; } + /* There is a race condition with regard to KBASE_REG_DONT_NEED, where another + * thread can have the "no user free" refcount increased between kbase_mem_alloc + * and kbase_gpu_vm_lock (above) and before KBASE_REG_DONT_NEED is set by + * remove_external_chunk_mappings (below). + * + * It should be fine and not a security risk if we let the region leak till + * region tracker termination in such a case. + */ + if (unlikely(chunk->region->no_user_free_refcnt > 1)) { + dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_refcnt > 1!\n"); + goto unroll_region; + } + /* Whilst we can be sure of a number of other restrictions due to BASEP_MEM_NO_USER_FREE * being requested, it's useful to document in code what those restrictions are, and ensure * they remain in place in future. */ if (WARN(!chunk->region->gpu_alloc, - "KBASE_REG_NO_USER_FREE chunks should not have had their alloc freed")) { + "NO_USER_FREE chunks should not have had their alloc freed")) { goto unroll_region; } if (WARN(chunk->region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE, - "KBASE_REG_NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) { + "NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) { goto unroll_region; } if (WARN((chunk->region->flags & KBASE_REG_ACTIVE_JIT_ALLOC), - "KBASE_REG_NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) { + "NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) { goto unroll_region; } if (WARN((chunk->region->flags & KBASE_REG_DONT_NEED), - "KBASE_REG_NO_USER_FREE chunks should not have been made ephemeral")) { + "NO_USER_FREE chunks should not have been made ephemeral")) { goto unroll_region; } if (WARN(atomic_read(&chunk->region->cpu_alloc->gpu_mappings) > 1, - "KBASE_REG_NO_USER_FREE chunks should not have been aliased")) { + "NO_USER_FREE chunks should not have been aliased")) { goto unroll_region; } @@ -344,16 +359,21 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context * remove_external_chunk_mappings(kctx, chunk); kbase_gpu_vm_unlock(kctx); + /* If page migration is enabled, we don't want to migrate tiler heap pages. + * This does not change if the constituent pages are already marked as isolated. + */ + if (kbase_page_migration_enabled) + kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE); + return chunk; unroll_region: /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT * regions), and so we must clear that flag too before freeing. */ + kbase_va_region_no_user_free_put(kctx, chunk->region); #if !defined(CONFIG_MALI_VECTOR_DUMP) - chunk->region->flags &= ~(KBASE_REG_NO_USER_FREE | KBASE_REG_DONT_NEED); -#else - chunk->region->flags &= ~KBASE_REG_NO_USER_FREE; + chunk->region->flags &= ~KBASE_REG_DONT_NEED; #endif kbase_mem_free_region(kctx, chunk->region); kbase_gpu_vm_unlock(kctx); @@ -511,7 +531,7 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap) if (heap->buf_desc_reg) { kbase_vunmap(kctx, &heap->buf_desc_map); kbase_gpu_vm_lock(kctx); - heap->buf_desc_reg->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg); kbase_gpu_vm_unlock(kctx); } @@ -629,8 +649,8 @@ static bool kbasep_is_buffer_descriptor_region_suitable(struct kbase_context *co return false; } - if (!(reg->flags & KBASE_REG_CPU_RD) || (reg->flags & KBASE_REG_DONT_NEED) || - (reg->flags & KBASE_REG_PF_GROW) || (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC)) { + if (!(reg->flags & KBASE_REG_CPU_RD) || kbase_is_region_shrinkable(reg) || + (reg->flags & KBASE_REG_PF_GROW)) { dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%lX!\n", reg->flags); return false; } @@ -719,14 +739,17 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_ /* If we don't prevent userspace from unmapping this, we may run into * use-after-free, as we don't check for the existence of the region throughout. */ - buf_desc_reg->flags |= KBASE_REG_NO_USER_FREE; heap->buf_desc_va = buf_desc_va; - heap->buf_desc_reg = buf_desc_reg; + heap->buf_desc_reg = kbase_va_region_no_user_free_get(kctx, buf_desc_reg); vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE, KBASE_REG_CPU_RD, &heap->buf_desc_map, KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING); + + if (kbase_page_migration_enabled) + kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE); + kbase_gpu_vm_unlock(kctx); if (unlikely(!vmap_ptr)) { @@ -811,7 +834,7 @@ heap_context_alloc_failed: buf_desc_vmap_failed: if (heap->buf_desc_reg) { kbase_gpu_vm_lock(kctx); - heap->buf_desc_reg->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg); kbase_gpu_vm_unlock(kctx); } buf_desc_not_suitable: @@ -866,6 +889,25 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx, return err; } +/** + * validate_allocation_request - Check whether the chunk allocation request + * received on tiler OOM should be handled at + * current time. + * + * @heap: The tiler heap the OOM is associated with + * @nr_in_flight: Number of fragment jobs in flight + * @pending_frag_count: Number of pending fragment jobs + * + * Context: must hold the tiler heap lock to guarantee its lifetime + * + * Return: + * * 0 - allowed to allocate an additional chunk + * * -EINVAL - invalid + * * -EBUSY - there are fragment jobs still in flight, which may free chunks + * after completing + * * -ENOMEM - the targeted number of in-flight chunks has been reached and + * no new ones will be allocated + */ static int validate_allocation_request(struct kbase_csf_tiler_heap *heap, u32 nr_in_flight, u32 pending_frag_count) { diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c index bcab31d27945..069e827d16ff 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c @@ -346,7 +346,11 @@ void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev) reclaim->batch = HEAP_SHRINKER_BATCH; #if !defined(CONFIG_MALI_VECTOR_DUMP) +#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE register_shrinker(reclaim); +#else + register_shrinker(reclaim, "mali-csf-tiler-heap"); +#endif #endif } diff --git a/drivers/gpu/arm/bifrost/debug/Kbuild b/drivers/gpu/arm/bifrost/debug/Kbuild index 6e1f0f75c43e..ebf3ddb763a2 100644 --- a/drivers/gpu/arm/bifrost/debug/Kbuild +++ b/drivers/gpu/arm/bifrost/debug/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,7 @@ bifrost_kbase-y += debug/mali_kbase_debug_ktrace.o ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) bifrost_kbase-y += debug/backend/mali_kbase_debug_ktrace_csf.o + bifrost_kbase-$(CONFIG_MALI_CORESIGHT) += debug/backend/mali_kbase_debug_coresight_csf.o else bifrost_kbase-y += debug/backend/mali_kbase_debug_ktrace_jm.o endif diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c new file mode 100644 index 000000000000..ff5f947e2da5 --- /dev/null +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c @@ -0,0 +1,851 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +static const char *coresight_state_to_string(enum kbase_debug_coresight_csf_state state) +{ + switch (state) { + case KBASE_DEBUG_CORESIGHT_CSF_DISABLED: + return "DISABLED"; + case KBASE_DEBUG_CORESIGHT_CSF_ENABLED: + return "ENABLED"; + default: + break; + } + + return "UNKNOWN"; +} + +static bool validate_reg_addr(struct kbase_debug_coresight_csf_client *client, + struct kbase_device *kbdev, u32 reg_addr, u8 op_type) +{ + int i; + + if (reg_addr & 0x3) { + dev_err(kbdev->dev, "Invalid operation %d: reg_addr (0x%x) not 32bit aligned", + op_type, reg_addr); + return false; + } + + for (i = 0; i < client->nr_ranges; i++) { + struct kbase_debug_coresight_csf_address_range *range = &client->addr_ranges[i]; + + if ((range->start <= reg_addr) && (reg_addr <= range->end)) + return true; + } + + dev_err(kbdev->dev, "Invalid operation %d: reg_addr (0x%x) not in client range", op_type, + reg_addr); + + return false; +} + +static bool validate_op(struct kbase_debug_coresight_csf_client *client, + struct kbase_debug_coresight_csf_op *op) +{ + struct kbase_device *kbdev; + u32 reg; + + if (!op) + return false; + + if (!client) + return false; + + kbdev = (struct kbase_device *)client->drv_data; + + switch (op->type) { + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_NOP: + return true; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM: + if (validate_reg_addr(client, kbdev, op->op.write_imm.reg_addr, op->type)) + return true; + + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE: + for (reg = op->op.write_imm_range.reg_start; reg <= op->op.write_imm_range.reg_end; + reg += sizeof(u32)) { + if (!validate_reg_addr(client, kbdev, reg, op->type)) + return false; + } + + return true; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE: + if (!op->op.write.ptr) { + dev_err(kbdev->dev, "Invalid operation %d: ptr not set", op->type); + break; + } + + if (validate_reg_addr(client, kbdev, op->op.write.reg_addr, op->type)) + return true; + + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ: + if (!op->op.read.ptr) { + dev_err(kbdev->dev, "Invalid operation %d: ptr not set", op->type); + break; + } + + if (validate_reg_addr(client, kbdev, op->op.read.reg_addr, op->type)) + return true; + + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL: + if (validate_reg_addr(client, kbdev, op->op.poll.reg_addr, op->type)) + return true; + + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND: + fallthrough; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR: + fallthrough; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR: + fallthrough; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT: + if (op->op.bitw.ptr != NULL) + return true; + + dev_err(kbdev->dev, "Invalid bitwise operation pointer"); + + break; + default: + dev_err(kbdev->dev, "Invalid operation %d", op->type); + break; + } + + return false; +} + +static bool validate_seq(struct kbase_debug_coresight_csf_client *client, + struct kbase_debug_coresight_csf_sequence *seq) +{ + struct kbase_debug_coresight_csf_op *ops = seq->ops; + int nr_ops = seq->nr_ops; + int i; + + for (i = 0; i < nr_ops; i++) { + if (!validate_op(client, &ops[i])) + return false; + } + + return true; +} + +static int execute_op(struct kbase_device *kbdev, struct kbase_debug_coresight_csf_op *op) +{ + int result = -EINVAL; + u32 reg; + + dev_dbg(kbdev->dev, "Execute operation %d", op->type); + + switch (op->type) { + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_NOP: + result = 0; + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM: + result = kbase_csf_firmware_mcu_register_write(kbdev, op->op.write.reg_addr, + op->op.write_imm.val); + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE: + for (reg = op->op.write_imm_range.reg_start; reg <= op->op.write_imm_range.reg_end; + reg += sizeof(u32)) { + result = kbase_csf_firmware_mcu_register_write(kbdev, reg, + op->op.write_imm_range.val); + if (!result) + break; + } + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE: + result = kbase_csf_firmware_mcu_register_write(kbdev, op->op.write.reg_addr, + *op->op.write.ptr); + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ: + result = kbase_csf_firmware_mcu_register_read(kbdev, op->op.read.reg_addr, + op->op.read.ptr); + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL: + result = kbase_csf_firmware_mcu_register_poll(kbdev, op->op.poll.reg_addr, + op->op.poll.mask, op->op.poll.val); + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND: + *op->op.bitw.ptr &= op->op.bitw.val; + result = 0; + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR: + *op->op.bitw.ptr |= op->op.bitw.val; + result = 0; + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR: + *op->op.bitw.ptr ^= op->op.bitw.val; + result = 0; + break; + case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT: + *op->op.bitw.ptr = ~(*op->op.bitw.ptr); + result = 0; + break; + default: + dev_err(kbdev->dev, "Invalid operation %d", op->type); + break; + } + + return result; +} + +static int coresight_config_enable(struct kbase_device *kbdev, + struct kbase_debug_coresight_csf_config *config) +{ + int ret = 0; + int i; + + if (!config) + return -EINVAL; + + if (config->state == KBASE_DEBUG_CORESIGHT_CSF_ENABLED) + return ret; + + for (i = 0; config->enable_seq && !ret && i < config->enable_seq->nr_ops; i++) + ret = execute_op(kbdev, &config->enable_seq->ops[i]); + + if (!ret) { + dev_dbg(kbdev->dev, "Coresight config (0x%pK) state transition: %s to %s", config, + coresight_state_to_string(config->state), + coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_ENABLED)); + config->state = KBASE_DEBUG_CORESIGHT_CSF_ENABLED; + } + + /* Always assign the return code during config enable. + * It gets propagated when calling config disable. + */ + config->error = ret; + + return ret; +} + +static int coresight_config_disable(struct kbase_device *kbdev, + struct kbase_debug_coresight_csf_config *config) +{ + int ret = 0; + int i; + + if (!config) + return -EINVAL; + + if (config->state == KBASE_DEBUG_CORESIGHT_CSF_DISABLED) + return ret; + + for (i = 0; config->disable_seq && !ret && i < config->disable_seq->nr_ops; i++) + ret = execute_op(kbdev, &config->disable_seq->ops[i]); + + if (!ret) { + dev_dbg(kbdev->dev, "Coresight config (0x%pK) state transition: %s to %s", config, + coresight_state_to_string(config->state), + coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_DISABLED)); + config->state = KBASE_DEBUG_CORESIGHT_CSF_DISABLED; + } else { + /* Only assign the error if ret is not 0. + * As we don't want to overwrite an error from config enable + */ + if (!config->error) + config->error = ret; + } + + return ret; +} + +void *kbase_debug_coresight_csf_register(void *drv_data, + struct kbase_debug_coresight_csf_address_range *ranges, + int nr_ranges) +{ + struct kbase_debug_coresight_csf_client *client, *client_entry; + struct kbase_device *kbdev; + unsigned long flags; + int k; + + if (unlikely(!drv_data)) { + pr_err("NULL drv_data"); + return NULL; + } + + kbdev = (struct kbase_device *)drv_data; + + if (unlikely(!ranges)) { + dev_err(kbdev->dev, "NULL ranges"); + return NULL; + } + + if (unlikely(!nr_ranges)) { + dev_err(kbdev->dev, "nr_ranges is 0"); + return NULL; + } + + for (k = 0; k < nr_ranges; k++) { + if (ranges[k].end < ranges[k].start) { + dev_err(kbdev->dev, "Invalid address ranges 0x%08x - 0x%08x", + ranges[k].start, ranges[k].end); + return NULL; + } + } + + client = kzalloc(sizeof(struct kbase_debug_coresight_csf_client), GFP_KERNEL); + + if (!client) + return NULL; + + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + list_for_each_entry(client_entry, &kbdev->csf.coresight.clients, link) { + struct kbase_debug_coresight_csf_address_range *client_ranges = + client_entry->addr_ranges; + int i; + + for (i = 0; i < client_entry->nr_ranges; i++) { + int j; + + for (j = 0; j < nr_ranges; j++) { + if ((ranges[j].start < client_ranges[i].end) && + (client_ranges[i].start < ranges[j].end)) { + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + kfree(client); + dev_err(kbdev->dev, + "Client with range 0x%08x - 0x%08x already present at address range 0x%08x - 0x%08x", + client_ranges[i].start, client_ranges[i].end, + ranges[j].start, ranges[j].end); + + return NULL; + } + } + } + } + + client->drv_data = drv_data; + client->addr_ranges = ranges; + client->nr_ranges = nr_ranges; + list_add(&client->link, &kbdev->csf.coresight.clients); + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + + return client; +} +EXPORT_SYMBOL(kbase_debug_coresight_csf_register); + +void kbase_debug_coresight_csf_unregister(void *client_data) +{ + struct kbase_debug_coresight_csf_client *client; + struct kbase_debug_coresight_csf_config *config_entry; + struct kbase_device *kbdev; + unsigned long flags; + bool retry = true; + + if (unlikely(!client_data)) { + pr_err("NULL client"); + return; + } + + client = (struct kbase_debug_coresight_csf_client *)client_data; + + kbdev = (struct kbase_device *)client->drv_data; + if (unlikely(!kbdev)) { + pr_err("NULL drv_data in client"); + return; + } + + /* check for active config from client */ + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + list_del_init(&client->link); + + while (retry && !list_empty(&kbdev->csf.coresight.configs)) { + retry = false; + list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { + if (config_entry->client == client) { + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + kbase_debug_coresight_csf_config_free(config_entry); + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + retry = true; + break; + } + } + } + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + + kfree(client); +} +EXPORT_SYMBOL(kbase_debug_coresight_csf_unregister); + +void * +kbase_debug_coresight_csf_config_create(void *client_data, + struct kbase_debug_coresight_csf_sequence *enable_seq, + struct kbase_debug_coresight_csf_sequence *disable_seq) +{ + struct kbase_debug_coresight_csf_client *client; + struct kbase_debug_coresight_csf_config *config; + struct kbase_device *kbdev; + + if (unlikely(!client_data)) { + pr_err("NULL client"); + return NULL; + } + + client = (struct kbase_debug_coresight_csf_client *)client_data; + + kbdev = (struct kbase_device *)client->drv_data; + if (unlikely(!kbdev)) { + pr_err("NULL drv_data in client"); + return NULL; + } + + if (enable_seq) { + if (!validate_seq(client, enable_seq)) { + dev_err(kbdev->dev, "Invalid enable_seq"); + return NULL; + } + } + + if (disable_seq) { + if (!validate_seq(client, disable_seq)) { + dev_err(kbdev->dev, "Invalid disable_seq"); + return NULL; + } + } + + config = kzalloc(sizeof(struct kbase_debug_coresight_csf_config), GFP_KERNEL); + if (WARN_ON(!client)) + return NULL; + + config->client = client; + config->enable_seq = enable_seq; + config->disable_seq = disable_seq; + config->error = 0; + config->state = KBASE_DEBUG_CORESIGHT_CSF_DISABLED; + + INIT_LIST_HEAD(&config->link); + + return config; +} +EXPORT_SYMBOL(kbase_debug_coresight_csf_config_create); + +void kbase_debug_coresight_csf_config_free(void *config_data) +{ + struct kbase_debug_coresight_csf_config *config; + + if (unlikely(!config_data)) { + pr_err("NULL config"); + return; + } + + config = (struct kbase_debug_coresight_csf_config *)config_data; + + kbase_debug_coresight_csf_config_disable(config); + + kfree(config); +} +EXPORT_SYMBOL(kbase_debug_coresight_csf_config_free); + +int kbase_debug_coresight_csf_config_enable(void *config_data) +{ + struct kbase_debug_coresight_csf_config *config; + struct kbase_debug_coresight_csf_client *client; + struct kbase_device *kbdev; + struct kbase_debug_coresight_csf_config *config_entry; + unsigned long flags; + int ret = 0; + + if (unlikely(!config_data)) { + pr_err("NULL config"); + return -EINVAL; + } + + config = (struct kbase_debug_coresight_csf_config *)config_data; + client = (struct kbase_debug_coresight_csf_client *)config->client; + + if (unlikely(!client)) { + pr_err("NULL client in config"); + return -EINVAL; + } + + kbdev = (struct kbase_device *)client->drv_data; + if (unlikely(!kbdev)) { + pr_err("NULL drv_data in client"); + return -EINVAL; + } + + /* Check to prevent double entry of config */ + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { + if (config_entry == config) { + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + dev_err(kbdev->dev, "Config already enabled"); + return -EINVAL; + } + } + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + + kbase_csf_scheduler_lock(kbdev); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + /* Check the state of Scheduler to confirm the desired state of MCU */ + if (((kbdev->csf.scheduler.state != SCHED_SUSPENDED) && + (kbdev->csf.scheduler.state != SCHED_SLEEPING) && + !kbase_csf_scheduler_protected_mode_in_use(kbdev)) || + kbase_pm_get_policy(kbdev) == &kbase_pm_always_on_policy_ops) { + kbase_csf_scheduler_spin_unlock(kbdev, flags); + /* Wait for MCU to reach the stable ON state */ + ret = kbase_pm_wait_for_desired_state(kbdev); + + if (ret) + dev_err(kbdev->dev, + "Wait for PM state failed when enabling coresight config"); + else + ret = coresight_config_enable(kbdev, config); + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + } + + /* Add config to next enable sequence */ + if (!ret) { + spin_lock(&kbdev->csf.coresight.lock); + list_add(&config->link, &kbdev->csf.coresight.configs); + spin_unlock(&kbdev->csf.coresight.lock); + } + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + kbase_csf_scheduler_unlock(kbdev); + + return ret; +} +EXPORT_SYMBOL(kbase_debug_coresight_csf_config_enable); + +int kbase_debug_coresight_csf_config_disable(void *config_data) +{ + struct kbase_debug_coresight_csf_config *config; + struct kbase_debug_coresight_csf_client *client; + struct kbase_device *kbdev; + struct kbase_debug_coresight_csf_config *config_entry; + bool found_in_list = false; + unsigned long flags; + int ret = 0; + + if (unlikely(!config_data)) { + pr_err("NULL config"); + return -EINVAL; + } + + config = (struct kbase_debug_coresight_csf_config *)config_data; + + /* Exit early if not enabled prior */ + if (list_empty(&config->link)) + return ret; + + client = (struct kbase_debug_coresight_csf_client *)config->client; + + if (unlikely(!client)) { + pr_err("NULL client in config"); + return -EINVAL; + } + + kbdev = (struct kbase_device *)client->drv_data; + if (unlikely(!kbdev)) { + pr_err("NULL drv_data in client"); + return -EINVAL; + } + + /* Check if the config is in the correct list */ + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { + if (config_entry == config) { + found_in_list = true; + break; + } + } + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + + if (!found_in_list) { + dev_err(kbdev->dev, "Config looks corrupted"); + return -EINVAL; + } + + kbase_csf_scheduler_lock(kbdev); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + /* Check the state of Scheduler to confirm the desired state of MCU */ + if (((kbdev->csf.scheduler.state != SCHED_SUSPENDED) && + (kbdev->csf.scheduler.state != SCHED_SLEEPING) && + !kbase_csf_scheduler_protected_mode_in_use(kbdev)) || + kbase_pm_get_policy(kbdev) == &kbase_pm_always_on_policy_ops) { + kbase_csf_scheduler_spin_unlock(kbdev, flags); + /* Wait for MCU to reach the stable ON state */ + ret = kbase_pm_wait_for_desired_state(kbdev); + + if (ret) + dev_err(kbdev->dev, + "Wait for PM state failed when disabling coresight config"); + else + ret = coresight_config_disable(kbdev, config); + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + } else if (kbdev->pm.backend.mcu_state == KBASE_MCU_OFF) { + /* MCU is OFF, so the disable sequence was already executed. + * + * Propagate any error that would have occurred during the enable + * or disable sequence. + * + * This is done as part of the disable sequence, since the call from + * client is synchronous. + */ + ret = config->error; + } + + /* Remove config from next disable sequence */ + spin_lock(&kbdev->csf.coresight.lock); + list_del_init(&config->link); + spin_unlock(&kbdev->csf.coresight.lock); + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + kbase_csf_scheduler_unlock(kbdev); + + return ret; +} +EXPORT_SYMBOL(kbase_debug_coresight_csf_config_disable); + +static void coresight_config_enable_all(struct work_struct *data) +{ + struct kbase_device *kbdev = + container_of(data, struct kbase_device, csf.coresight.enable_work); + struct kbase_debug_coresight_csf_config *config_entry; + unsigned long flags; + + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + + list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + if (coresight_config_enable(kbdev, config_entry)) + dev_err(kbdev->dev, "enable config (0x%pK) failed", config_entry); + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + } + + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + wake_up_all(&kbdev->csf.coresight.event_wait); +} + +static void coresight_config_disable_all(struct work_struct *data) +{ + struct kbase_device *kbdev = + container_of(data, struct kbase_device, csf.coresight.disable_work); + struct kbase_debug_coresight_csf_config *config_entry; + unsigned long flags; + + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + + list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + if (coresight_config_disable(kbdev, config_entry)) + dev_err(kbdev->dev, "disable config (0x%pK) failed", config_entry); + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + } + + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + wake_up_all(&kbdev->csf.coresight.event_wait); +} + +void kbase_debug_coresight_csf_disable_pmode_enter(struct kbase_device *kbdev) +{ + unsigned long flags; + + dev_dbg(kbdev->dev, "Coresight state %s before protected mode enter", + coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_ENABLED)); + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + kbase_pm_lock(kbdev); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + kbdev->csf.coresight.disable_on_pmode_enter = true; + kbdev->csf.coresight.enable_on_pmode_exit = false; + kbase_pm_update_state(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + kbase_pm_wait_for_desired_state(kbdev); + + kbase_pm_unlock(kbdev); +} + +void kbase_debug_coresight_csf_enable_pmode_exit(struct kbase_device *kbdev) +{ + dev_dbg(kbdev->dev, "Coresight state %s after protected mode exit", + coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_DISABLED)); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ON(kbdev->csf.coresight.disable_on_pmode_enter); + + kbdev->csf.coresight.enable_on_pmode_exit = true; + kbase_pm_update_state(kbdev); +} + +void kbase_debug_coresight_csf_state_request(struct kbase_device *kbdev, + enum kbase_debug_coresight_csf_state state) +{ + if (unlikely(!kbdev)) + return; + + if (unlikely(!kbdev->csf.coresight.workq)) + return; + + dev_dbg(kbdev->dev, "Coresight state %s requested", coresight_state_to_string(state)); + + switch (state) { + case KBASE_DEBUG_CORESIGHT_CSF_DISABLED: + queue_work(kbdev->csf.coresight.workq, &kbdev->csf.coresight.disable_work); + break; + case KBASE_DEBUG_CORESIGHT_CSF_ENABLED: + queue_work(kbdev->csf.coresight.workq, &kbdev->csf.coresight.enable_work); + break; + default: + dev_err(kbdev->dev, "Invalid Coresight state %d", state); + break; + } +} + +bool kbase_debug_coresight_csf_state_check(struct kbase_device *kbdev, + enum kbase_debug_coresight_csf_state state) +{ + struct kbase_debug_coresight_csf_config *config_entry; + unsigned long flags; + bool success = true; + + dev_dbg(kbdev->dev, "Coresight check for state: %s", coresight_state_to_string(state)); + + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + + list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { + if (state != config_entry->state) { + success = false; + break; + } + } + + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + + return success; +} +KBASE_EXPORT_TEST_API(kbase_debug_coresight_csf_state_check); + +bool kbase_debug_coresight_csf_state_wait(struct kbase_device *kbdev, + enum kbase_debug_coresight_csf_state state) +{ + const long wait_timeout = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + struct kbase_debug_coresight_csf_config *config_entry, *next_config_entry; + unsigned long flags; + bool success = true; + + dev_dbg(kbdev->dev, "Coresight wait for state: %s", coresight_state_to_string(state)); + + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + + list_for_each_entry_safe(config_entry, next_config_entry, &kbdev->csf.coresight.configs, + link) { + const enum kbase_debug_coresight_csf_state prev_state = config_entry->state; + long remaining; + + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + remaining = wait_event_timeout(kbdev->csf.coresight.event_wait, + state == config_entry->state, wait_timeout); + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + + if (!remaining) { + success = false; + dev_err(kbdev->dev, + "Timeout waiting for Coresight state transition %s to %s", + coresight_state_to_string(prev_state), + coresight_state_to_string(state)); + } + } + + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + + return success; +} +KBASE_EXPORT_TEST_API(kbase_debug_coresight_csf_state_wait); + +int kbase_debug_coresight_csf_init(struct kbase_device *kbdev) +{ + kbdev->csf.coresight.workq = alloc_ordered_workqueue("Mali CoreSight workqueue", 0); + if (kbdev->csf.coresight.workq == NULL) + return -ENOMEM; + + INIT_LIST_HEAD(&kbdev->csf.coresight.clients); + INIT_LIST_HEAD(&kbdev->csf.coresight.configs); + INIT_WORK(&kbdev->csf.coresight.enable_work, coresight_config_enable_all); + INIT_WORK(&kbdev->csf.coresight.disable_work, coresight_config_disable_all); + init_waitqueue_head(&kbdev->csf.coresight.event_wait); + spin_lock_init(&kbdev->csf.coresight.lock); + + kbdev->csf.coresight.disable_on_pmode_enter = false; + kbdev->csf.coresight.enable_on_pmode_exit = false; + + return 0; +} + +void kbase_debug_coresight_csf_term(struct kbase_device *kbdev) +{ + struct kbase_debug_coresight_csf_client *client_entry, *next_client_entry; + struct kbase_debug_coresight_csf_config *config_entry, *next_config_entry; + unsigned long flags; + + kbdev->csf.coresight.disable_on_pmode_enter = false; + kbdev->csf.coresight.enable_on_pmode_exit = false; + + cancel_work_sync(&kbdev->csf.coresight.enable_work); + cancel_work_sync(&kbdev->csf.coresight.disable_work); + destroy_workqueue(kbdev->csf.coresight.workq); + kbdev->csf.coresight.workq = NULL; + + spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + + list_for_each_entry_safe(config_entry, next_config_entry, &kbdev->csf.coresight.configs, + link) { + list_del_init(&config_entry->link); + kfree(config_entry); + } + + list_for_each_entry_safe(client_entry, next_client_entry, &kbdev->csf.coresight.clients, + link) { + list_del_init(&client_entry->link); + kfree(client_entry); + } + + spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); +} diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_internal_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_internal_csf.h new file mode 100644 index 000000000000..06d62dc70182 --- /dev/null +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_internal_csf.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_ +#define _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_ + +#include +#include + +/** + * struct kbase_debug_coresight_csf_client - Coresight client definition + * + * @drv_data: Pointer to driver device data. + * @addr_ranges: Arrays of address ranges used by the registered client. + * @nr_ranges: Size of @addr_ranges array. + * @link: Link item of a Coresight client. + * Linked to &struct_kbase_device.csf.coresight.clients. + */ +struct kbase_debug_coresight_csf_client { + void *drv_data; + struct kbase_debug_coresight_csf_address_range *addr_ranges; + u32 nr_ranges; + struct list_head link; +}; + +/** + * enum kbase_debug_coresight_csf_state - Coresight configuration states + * + * @KBASE_DEBUG_CORESIGHT_CSF_DISABLED: Coresight configuration is disabled. + * @KBASE_DEBUG_CORESIGHT_CSF_ENABLED: Coresight configuration is enabled. + */ +enum kbase_debug_coresight_csf_state { + KBASE_DEBUG_CORESIGHT_CSF_DISABLED = 0, + KBASE_DEBUG_CORESIGHT_CSF_ENABLED, +}; + +/** + * struct kbase_debug_coresight_csf_config - Coresight configuration definition + * + * @client: Pointer to the client for which the configuration is created. + * @enable_seq: Array of operations for Coresight client enable sequence. Can be NULL. + * @disable_seq: Array of operations for Coresight client disable sequence. Can be NULL. + * @state: Current Coresight configuration state. + * @error: Error code used to know if an error occurred during the execution + * of the enable or disable sequences. + * @link: Link item of a Coresight configuration. + * Linked to &struct_kbase_device.csf.coresight.configs. + */ +struct kbase_debug_coresight_csf_config { + void *client; + struct kbase_debug_coresight_csf_sequence *enable_seq; + struct kbase_debug_coresight_csf_sequence *disable_seq; + enum kbase_debug_coresight_csf_state state; + int error; + struct list_head link; +}; + +/** + * struct kbase_debug_coresight_device - Object representing the Coresight device + * + * @clients: List head to maintain Coresight clients. + * @configs: List head to maintain Coresight configs. + * @lock: A lock to protect client/config lists. + * Lists can be accessed concurrently by + * Coresight kernel modules and kernel threads. + * @workq: Work queue for Coresight enable/disable execution. + * @enable_work: Work item used to enable Coresight. + * @disable_work: Work item used to disable Coresight. + * @event_wait: Wait queue for Coresight events. + * @enable_on_pmode_exit: Flag used by the PM state machine to + * identify if Coresight enable is needed. + * @disable_on_pmode_enter: Flag used by the PM state machine to + * identify if Coresight disable is needed. + */ +struct kbase_debug_coresight_device { + struct list_head clients; + struct list_head configs; + spinlock_t lock; + struct workqueue_struct *workq; + struct work_struct enable_work; + struct work_struct disable_work; + wait_queue_head_t event_wait; + bool enable_on_pmode_exit; + bool disable_on_pmode_enter; +}; + +/** + * kbase_debug_coresight_csf_init - Initialize Coresight resources. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function should be called once at device initialization. + * + * Return: 0 on success. + */ +int kbase_debug_coresight_csf_init(struct kbase_device *kbdev); + +/** + * kbase_debug_coresight_csf_term - Terminate Coresight resources. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function should be called at device termination to prevent any + * memory leaks if Coresight module would have been removed without calling + * kbasep_debug_coresight_csf_trace_disable(). + */ +void kbase_debug_coresight_csf_term(struct kbase_device *kbdev); + +/** + * kbase_debug_coresight_csf_disable_pmode_enter - Disable Coresight on Protected + * mode enter. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function should be called just before requesting to enter protected mode. + * It will trigger a PM state machine transition from MCU_ON + * to ON_PMODE_ENTER_CORESIGHT_DISABLE. + */ +void kbase_debug_coresight_csf_disable_pmode_enter(struct kbase_device *kbdev); + +/** + * kbase_debug_coresight_csf_enable_pmode_exit - Enable Coresight on Protected + * mode enter. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function should be called after protected mode exit is acknowledged. + * It will trigger a PM state machine transition from MCU_ON + * to ON_PMODE_EXIT_CORESIGHT_ENABLE. + */ +void kbase_debug_coresight_csf_enable_pmode_exit(struct kbase_device *kbdev); + +/** + * kbase_debug_coresight_csf_state_request - Request Coresight state transition. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @state: Coresight state to check for. + */ +void kbase_debug_coresight_csf_state_request(struct kbase_device *kbdev, + enum kbase_debug_coresight_csf_state state); + +/** + * kbase_debug_coresight_csf_state_check - Check Coresight state. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @state: Coresight state to check for. + * + * Return: true if all states of configs are @state. + */ +bool kbase_debug_coresight_csf_state_check(struct kbase_device *kbdev, + enum kbase_debug_coresight_csf_state state); + +/** + * kbase_debug_coresight_csf_state_wait - Wait for Coresight state transition to complete. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @state: Coresight state to wait for. + * + * Return: true if all configs become @state in pre-defined time period. + */ +bool kbase_debug_coresight_csf_state_wait(struct kbase_device *kbdev, + enum kbase_debug_coresight_csf_state state); + +#endif /* _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_ */ diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c index 277569381292..e123b3ac57ac 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c @@ -29,10 +29,7 @@ #include #include #include - -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) #include -#endif #include #include @@ -92,13 +89,13 @@ static int kbase_backend_late_init(struct kbase_device *kbdev) goto fail_timer; #ifdef CONFIG_MALI_BIFROST_DEBUG -#ifndef CONFIG_MALI_BIFROST_NO_MALI +#if IS_ENABLED(CONFIG_MALI_REAL_HW) if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { dev_err(kbdev->dev, "Interrupt assignment check failed.\n"); err = -EINVAL; goto fail_interrupt_test; } -#endif /* !CONFIG_MALI_BIFROST_NO_MALI */ +#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ #endif /* CONFIG_MALI_BIFROST_DEBUG */ kbase_ipa_control_init(kbdev); @@ -142,9 +139,9 @@ fail_pm_metrics_init: kbase_ipa_control_term(kbdev); #ifdef CONFIG_MALI_BIFROST_DEBUG -#ifndef CONFIG_MALI_BIFROST_NO_MALI +#if IS_ENABLED(CONFIG_MALI_REAL_HW) fail_interrupt_test: -#endif /* !CONFIG_MALI_BIFROST_NO_MALI */ +#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ #endif /* CONFIG_MALI_BIFROST_DEBUG */ kbase_backend_timer_term(kbdev); @@ -283,12 +280,13 @@ static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev) } static const struct kbase_device_init dev_init[] = { -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) - { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" }, -#else +#if !IS_ENABLED(CONFIG_MALI_REAL_HW) + { kbase_gpu_device_create, kbase_gpu_device_destroy, + "Dummy model initialization failed" }, +#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ { assign_irqs, NULL, "IRQ search failed" }, { registers_map, registers_unmap, "Register map failed" }, -#endif +#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ { power_control_init, power_control_term, "Power control initialization failed" }, { kbase_device_io_history_init, kbase_device_io_history_term, "Register access history initialization failed" }, @@ -344,6 +342,10 @@ static const struct kbase_device_init dev_init[] = { { kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, "GPU property population failed" }, { kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" }, +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + { kbase_debug_coresight_csf_init, kbase_debug_coresight_csf_term, + "Coresight initialization failed" }, +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ }; static void kbase_device_term_partial(struct kbase_device *kbdev, diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c index 3b792968a7d7..2abd62aaa8b1 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -149,9 +150,6 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) dev_dbg(kbdev->dev, "Doorbell mirror interrupt received"); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -#ifdef CONFIG_MALI_BIFROST_DEBUG - WARN_ON(!kbase_csf_scheduler_get_nr_active_csgs(kbdev)); -#endif kbase_pm_disable_db_mirror_interrupt(kbdev); kbdev->pm.backend.exit_gpu_sleep_mode = true; kbase_csf_scheduler_invoke_tick(kbdev); @@ -189,7 +187,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) } #if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -static bool kbase_is_register_accessible(u32 offset) +bool kbase_is_register_accessible(u32 offset) { #ifdef CONFIG_MALI_BIFROST_DEBUG if (((offset >= MCU_SUBSYSTEM_BASE) && (offset < IPA_CONTROL_BASE)) || @@ -201,7 +199,9 @@ static bool kbase_is_register_accessible(u32 offset) return true; } +#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ +#if IS_ENABLED(CONFIG_MALI_REAL_HW) void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) { if (WARN_ON(!kbdev->pm.backend.gpu_powered)) diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c index 52063fb0f533..38223af213d1 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c @@ -106,7 +106,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val); } -#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) +#if IS_ENABLED(CONFIG_MALI_REAL_HW) void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) { WARN_ON(!kbdev->pm.backend.gpu_powered); @@ -140,4 +140,4 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) return val; } KBASE_EXPORT_TEST_API(kbase_reg_read); -#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ +#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c index 129b4e430c52..6f0ec7d933c4 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c @@ -30,10 +30,7 @@ #include #include #include - -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) #include -#endif /* CONFIG_MALI_BIFROST_NO_MALI */ #ifdef CONFIG_MALI_ARBITER_SUPPORT #include @@ -74,13 +71,13 @@ static int kbase_backend_late_init(struct kbase_device *kbdev) goto fail_timer; #ifdef CONFIG_MALI_BIFROST_DEBUG -#ifndef CONFIG_MALI_BIFROST_NO_MALI +#if IS_ENABLED(CONFIG_MALI_REAL_HW) if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { dev_err(kbdev->dev, "Interrupt assignment check failed.\n"); err = -EINVAL; goto fail_interrupt_test; } -#endif /* !CONFIG_MALI_BIFROST_NO_MALI */ +#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ #endif /* CONFIG_MALI_BIFROST_DEBUG */ err = kbase_job_slot_init(kbdev); @@ -119,9 +116,9 @@ fail_devfreq_init: fail_job_slot: #ifdef CONFIG_MALI_BIFROST_DEBUG -#ifndef CONFIG_MALI_BIFROST_NO_MALI +#if IS_ENABLED(CONFIG_MALI_REAL_HW) fail_interrupt_test: -#endif /* !CONFIG_MALI_BIFROST_NO_MALI */ +#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ #endif /* CONFIG_MALI_BIFROST_DEBUG */ kbase_backend_timer_term(kbdev); @@ -213,12 +210,13 @@ static void kbase_device_hwcnt_backend_jm_watchdog_term(struct kbase_device *kbd } static const struct kbase_device_init dev_init[] = { -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) - { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" }, -#else +#if !IS_ENABLED(CONFIG_MALI_REAL_HW) + { kbase_gpu_device_create, kbase_gpu_device_destroy, + "Dummy model initialization failed" }, +#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ { assign_irqs, NULL, "IRQ search failed" }, { registers_map, registers_unmap, "Register map failed" }, -#endif +#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ { kbase_device_io_history_init, kbase_device_io_history_term, "Register access history initialization failed" }, { kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" }, diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c index 053400bd63f0..fb3e4176395e 100644 --- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c @@ -328,6 +328,9 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) kbdev->num_of_atoms_hw_completed = 0; #endif +#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) + atomic_set(&kbdev->live_fence_metadata, 0); +#endif return 0; term_as: @@ -351,6 +354,11 @@ void kbase_device_misc_term(struct kbase_device *kbdev) if (kbdev->oom_notifier_block.notifier_call) unregister_oom_notifier(&kbdev->oom_notifier_block); + +#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) + if (atomic_read(&kbdev->live_fence_metadata) > 0) + dev_warn(kbdev->dev, "Terminating Kbase device with live fence metadata!"); +#endif } #if !MALI_USE_CSF diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h b/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h index 36b4698ca2f8..2c1c6ecec15f 100644 --- a/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -99,3 +99,13 @@ int kbase_device_late_init(struct kbase_device *kbdev); * @kbdev: Device pointer */ void kbase_device_late_term(struct kbase_device *kbdev); + +#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) +/** + * kbase_is_register_accessible - Checks if register is accessible + * @offset: Register offset + * + * Return: true if the register is accessible, false otherwise. + */ +bool kbase_is_register_accessible(u32 offset); +#endif /* MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c index 9985752a3748..f412531ab03a 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c @@ -34,13 +34,11 @@ #include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h" #include "mali_kbase_hwaccess_time.h" #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" +#include #include #include "mali_kbase_ccswe.h" -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -#include -#endif /* CONFIG_MALI_BIFROST_NO_MALI */ /* Ring buffer virtual address start at 4GB */ #define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32) @@ -103,6 +101,8 @@ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_i static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long *flags) + __acquires(&(struct kbase_hwcnt_backend_csf_if_fw_ctx) + ctx->kbdev->csf.scheduler.interrupt_lock) { struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; struct kbase_device *kbdev; @@ -117,6 +117,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_i static void kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags) + __releases(&(struct kbase_hwcnt_backend_csf_if_fw_ctx) + ctx->kbdev->csf.scheduler.interrupt_lock) { struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; struct kbase_device *kbdev; @@ -345,7 +347,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( /* Update MMU table */ ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys, num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW, - mmu_sync_info); + mmu_sync_info, NULL, false); if (ret) goto mmu_insert_failed; @@ -480,7 +482,7 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys, - fw_ring_buf->num_pages, MCU_AS_NR)); + fw_ring_buf->num_pages, MCU_AS_NR, true)); vunmap(fw_ring_buf->cpu_dump_base); diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c index 9d9889a0e426..669701c29152 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c @@ -27,10 +27,7 @@ #include "mali_kbase_hwaccess_instr.h" #include "mali_kbase_hwaccess_time.h" #include "mali_kbase_ccswe.h" - -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -#include "backend/gpu/mali_kbase_model_dummy.h" -#endif /* CONFIG_MALI_BIFROST_NO_MALI */ +#include "backend/gpu/mali_kbase_model_linux.h" #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" #include "backend/gpu/mali_kbase_pm_internal.h" diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c index 43cdf18a5e3b..21b4e52884c5 100644 --- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c +++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c @@ -23,10 +23,13 @@ #include "mali_kbase.h" /* MEMSYS counter block offsets */ +#define L2_RD_MSG_IN_CU (13) #define L2_RD_MSG_IN (16) #define L2_WR_MSG_IN (18) +#define L2_SNP_MSG_IN (20) #define L2_RD_MSG_OUT (22) #define L2_READ_LOOKUP (26) +#define L2_EXT_READ_NOSNP (30) #define L2_EXT_WRITE_NOSNP_FULL (43) /* SC counter block offsets */ @@ -36,17 +39,23 @@ #define FULL_QUAD_WARPS (21) #define EXEC_INSTR_FMA (27) #define EXEC_INSTR_CVT (28) +#define EXEC_INSTR_SFU (29) #define EXEC_INSTR_MSG (30) #define TEX_FILT_NUM_OPS (39) #define LS_MEM_READ_SHORT (45) #define LS_MEM_WRITE_SHORT (47) #define VARY_SLOT_16 (51) +#define BEATS_RD_LSC_EXT (57) +#define BEATS_RD_TEX (58) +#define BEATS_RD_TEX_EXT (59) +#define FRAG_QUADS_COARSE (68) /* Tiler counter block offsets */ #define IDVS_POS_SHAD_STALL (23) #define PREFETCH_STALL (25) #define VFETCH_POS_READ_WAIT (29) #define VFETCH_VERTEX_WAIT (30) +#define PRIMASSY_STALL (32) #define IDVS_VAR_SHAD_STALL (38) #define ITER_STALL (40) #define PMGR_PTR_RD_STALL (48) @@ -111,6 +120,15 @@ static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttux[] = { TILER_COUNTER_DEF("vfetch_vertex_wait", -391964, VFETCH_VERTEX_WAIT), }; +static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttix[] = { + TILER_COUNTER_DEF("primassy_stall", 471953, PRIMASSY_STALL), + TILER_COUNTER_DEF("idvs_var_shad_stall", -460559, IDVS_VAR_SHAD_STALL), + + MEMSYS_COUNTER_DEF("l2_rd_msg_in_cu", -6189604, L2_RD_MSG_IN_CU), + MEMSYS_COUNTER_DEF("l2_snp_msg_in", 6289609, L2_SNP_MSG_IN), + MEMSYS_COUNTER_DEF("l2_ext_read_nosnp", 512341, L2_EXT_READ_NOSNP), +}; + /* These tables provide a description of each performance counter * used by the shader cores counter model for energy estimation. */ @@ -150,6 +168,17 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = { SC_COUNTER_DEF("frag_quads_ezs_update", 372032, FRAG_QUADS_EZS_UPDATE), }; +static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttix[] = { + SC_COUNTER_DEF("exec_instr_fma", 192642, EXEC_INSTR_FMA), + SC_COUNTER_DEF("exec_instr_msg", 1326465, EXEC_INSTR_MSG), + SC_COUNTER_DEF("beats_rd_tex", 163518, BEATS_RD_TEX), + SC_COUNTER_DEF("beats_rd_lsc_ext", 127475, BEATS_RD_LSC_EXT), + SC_COUNTER_DEF("frag_quads_coarse", -36247, FRAG_QUADS_COARSE), + SC_COUNTER_DEF("ls_mem_write_short", 51547, LS_MEM_WRITE_SHORT), + SC_COUNTER_DEF("beats_rd_tex_ext", -43370, BEATS_RD_TEX_EXT), + SC_COUNTER_DEF("exec_instr_sfu", 31583, EXEC_INSTR_SFU), +}; + #define IPA_POWER_MODEL_OPS(gpu, init_token) \ const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \ .name = "mali-" #gpu "-power-model", \ @@ -181,13 +210,13 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = { #define ALIAS_POWER_MODEL(gpu, as_gpu) \ IPA_POWER_MODEL_OPS(gpu, as_gpu) -/* Reference voltage value is 750 mV. - */ +/* Reference voltage value is 750 mV. */ STANDARD_POWER_MODEL(todx, 750); STANDARD_POWER_MODEL(tgrx, 750); STANDARD_POWER_MODEL(tvax, 750); - STANDARD_POWER_MODEL(ttux, 750); +/* Reference voltage value is 550 mV. */ +STANDARD_POWER_MODEL(ttix, 550); /* Assuming LODX is an alias of TODX for IPA */ ALIAS_POWER_MODEL(lodx, todx); @@ -195,10 +224,14 @@ ALIAS_POWER_MODEL(lodx, todx); /* Assuming LTUX is an alias of TTUX for IPA */ ALIAS_POWER_MODEL(ltux, ttux); +/* Assuming LTUX is an alias of TTUX for IPA */ +ALIAS_POWER_MODEL(ltix, ttix); + static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = { &kbase_todx_ipa_model_ops, &kbase_lodx_ipa_model_ops, &kbase_tgrx_ipa_model_ops, &kbase_tvax_ipa_model_ops, - &kbase_ttux_ipa_model_ops, &kbase_ltux_ipa_model_ops + &kbase_ttux_ipa_model_ops, &kbase_ltux_ipa_model_ops, + &kbase_ttix_ipa_model_ops, &kbase_ltix_ipa_model_ops, }; const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( @@ -237,6 +270,10 @@ const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id) return "mali-ttux-power-model"; case GPU_ID2_PRODUCT_LTUX: return "mali-ltux-power-model"; + case GPU_ID2_PRODUCT_TTIX: + return "mali-ttix-power-model"; + case GPU_ID2_PRODUCT_LTIX: + return "mali-ltix-power-model"; default: return NULL; } diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c index a32a2c207163..cc61f642399c 100644 --- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c +++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c @@ -23,10 +23,7 @@ #include "mali_kbase_ipa_counter_common_jm.h" #include "mali_kbase.h" - -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -#include -#endif /* CONFIG_MALI_BIFROST_NO_MALI */ +#include /* Performance counter blocks base offsets */ #define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK) diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c index d15e98a54c40..b2e6bc459f22 100644 --- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c +++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -84,11 +84,11 @@ KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id); static struct device_node *get_model_dt_node(struct kbase_ipa_model *model, bool dt_required) { - struct device_node *model_dt_node; + struct device_node *model_dt_node = NULL; char compat_string[64]; - snprintf(compat_string, sizeof(compat_string), "arm,%s", - model->ops->name); + if (unlikely(!scnprintf(compat_string, sizeof(compat_string), "arm,%s", model->ops->name))) + return NULL; /* of_find_compatible_node() will call of_node_put() on the root node, * so take a reference on it first. @@ -111,12 +111,12 @@ int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, const char *name, s32 *addr, size_t num_elems, bool dt_required) { - int err, i; + int err = -EINVAL, i; struct device_node *model_dt_node = get_model_dt_node(model, dt_required); char *origin; - err = of_property_read_u32_array(model_dt_node, name, addr, num_elems); + err = of_property_read_u32_array(model_dt_node, name, (u32 *)addr, num_elems); /* We're done with model_dt_node now, so drop the reference taken in * get_model_dt_node()/of_find_compatible_node(). */ @@ -138,11 +138,17 @@ int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, for (i = 0; i < num_elems; ++i) { char elem_name[32]; - if (num_elems == 1) - snprintf(elem_name, sizeof(elem_name), "%s", name); - else - snprintf(elem_name, sizeof(elem_name), "%s.%d", - name, i); + if (num_elems == 1) { + if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s", name))) { + err = -ENOMEM; + goto exit; + } + } else { + if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s.%d", name, i))) { + err = -ENOMEM; + goto exit; + } + } dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n", model->ops->name, elem_name, addr[i], origin); @@ -164,7 +170,7 @@ int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, int err; struct device_node *model_dt_node = get_model_dt_node(model, dt_required); - const char *string_prop_value; + const char *string_prop_value = ""; char *origin; err = of_property_read_string(model_dt_node, name, diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c index 57508eb24749..8557fe8723cf 100644 --- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c +++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c @@ -236,14 +236,12 @@ static int add_params(struct kbase_ipa_model *model) (struct kbase_ipa_model_simple_data *)model->model_data; err = kbase_ipa_model_add_param_s32(model, "static-coefficient", - &model_data->static_coefficient, - 1, true); + (s32 *)&model_data->static_coefficient, 1, true); if (err) goto end; err = kbase_ipa_model_add_param_s32(model, "dynamic-coefficient", - &model_data->dynamic_coefficient, - 1, true); + (s32 *)&model_data->dynamic_coefficient, 1, true); if (err) goto end; diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h index e4316981e635..fe8995aefc37 100644 --- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h +++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h @@ -578,7 +578,7 @@ struct kbase_jd_atom { #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) int work_id; #endif - int slot_nr; + unsigned int slot_nr; u32 atom_flags; diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h index d03bcc0f27d8..53819caaf616 100644 --- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h +++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h @@ -132,15 +132,15 @@ void kbasep_js_kctx_term(struct kbase_context *kctx); * Atoms of higher priority might still be able to be pulled from the context * on @js. This helps with starting a high priority atom as soon as possible. */ -static inline void kbase_jsctx_slot_prio_blocked_set(struct kbase_context *kctx, - int js, int sched_prio) +static inline void kbase_jsctx_slot_prio_blocked_set(struct kbase_context *kctx, unsigned int js, + int sched_prio) { struct kbase_jsctx_slot_tracking *slot_tracking = &kctx->slot_tracking[js]; lockdep_assert_held(&kctx->kbdev->hwaccess_lock); WARN(!slot_tracking->atoms_pulled_pri[sched_prio], - "When marking slot %d as blocked for priority %d on a kctx, no atoms were pulled - the slot cannot become unblocked", + "When marking slot %u as blocked for priority %d on a kctx, no atoms were pulled - the slot cannot become unblocked", js, sched_prio); slot_tracking->blocked |= ((kbase_js_prio_bitmap_t)1) << sched_prio; @@ -509,19 +509,6 @@ void kbasep_js_resume(struct kbase_device *kbdev); bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, struct kbase_jd_atom *katom); -/** - * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer. - * @kctx: Context Pointer - * @prio: Priority (specifies the queue together with js). - * @js: Job slot (specifies the queue together with prio). - * - * Pushes all possible atoms from the linked list to the ringbuffer. - * Number of atoms are limited to free space in the ringbuffer and - * number of available atoms in the linked list. - * - */ -void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js); - /** * kbase_js_pull - Pull an atom from a context in the job scheduler for * execution. @@ -536,7 +523,7 @@ void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js); * Return: a pointer to an atom, or NULL if there are no atoms for this * slot that can be currently run. */ -struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js); +struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js); /** * kbase_js_unpull - Return an atom to the job scheduler ringbuffer. @@ -617,7 +604,7 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *katom); * been used. * */ -void kbase_js_sched(struct kbase_device *kbdev, int js_mask); +void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask); /** * kbase_js_zap_context - Attempt to deschedule a context that is being diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h index ea143ab49642..c6fea791b8c9 100644 --- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h +++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h @@ -131,16 +131,6 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tBAx[ BASE_HW_FEATURE_END }; -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tDUx[] = { - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_L2_CONFIG, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, - BASE_HW_FEATURE_END -}; - __attribute__((unused)) static const enum base_hw_feature base_hw_features_tODx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h index a360984acca5..2dc0402197de 100644 --- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h +++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h @@ -64,6 +64,9 @@ enum base_hw_issue { BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -88,6 +91,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0 BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -108,6 +113,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0 BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -128,6 +135,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p1 BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -143,6 +152,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tMI BASE_HW_ISSUE_TMIX_8343, BASE_HW_ISSUE_TMIX_8456, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -156,6 +167,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p0 BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -169,6 +182,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p1 BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -182,6 +197,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p2 BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -194,6 +211,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p3 BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -204,6 +223,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tHE BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -217,6 +238,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p0 BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -230,6 +253,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p1 BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -242,6 +267,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p0 BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -253,6 +280,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p1 BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -263,6 +292,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tSI BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -274,6 +305,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDVx_r0p0 BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -284,6 +317,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDV BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -296,6 +331,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNOx_r0p0 BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -306,6 +343,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNO BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -318,6 +357,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r0p0 BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -330,6 +371,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r1p0 BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -340,6 +383,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGO BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -356,6 +401,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p0 BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -372,6 +419,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p1 BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -387,6 +436,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p2 BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -399,6 +450,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTR BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -415,6 +468,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p0 BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -430,6 +485,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p1 BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -442,6 +499,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNA BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -456,6 +515,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p0 BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -469,6 +530,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p1 BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -482,6 +545,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p0 BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -495,6 +560,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p1 BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -507,6 +574,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBE BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -521,6 +590,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p0 BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -534,6 +605,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p1 BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -547,6 +620,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p0 BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -560,6 +635,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r1p0 BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; @@ -572,90 +649,74 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBA BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDUx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3212, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tODx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3212, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGRx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVAx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_1997, @@ -663,70 +724,97 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0 BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_END +}; + +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p1[] = { + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TURSEHW_1997, + BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_TURSEHW_2716, + BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTUx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTIx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTIx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; diff --git a/drivers/gpu/arm/bifrost/mali_kbase.h b/drivers/gpu/arm/bifrost/mali_kbase.h index 8e4d36141368..542e8f63fb5b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase.h +++ b/drivers/gpu/arm/bifrost/mali_kbase.h @@ -339,21 +339,8 @@ int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx, void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, struct kbase_jd_atom *target_katom); -void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, - struct kbase_jd_atom *target_katom, u32 sw_flags); - -/** - * kbase_job_slot_hardstop - Hard-stop the specified job slot - * @kctx: The kbase context that contains the job(s) that should - * be hard-stopped - * @js: The job slot to hard-stop - * @target_katom: The job that should be hard-stopped (or NULL for all - * jobs from the context) - * Context: - * The job slot lock must be held when calling this function. - */ -void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, - struct kbase_jd_atom *target_katom); +void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js, + struct kbase_jd_atom *target_katom, u32 sw_flags); /** * kbase_job_check_enter_disjoint - potentiall enter disjoint mode @@ -448,19 +435,6 @@ static inline void kbase_free_user_buffer( } } -/** - * kbase_mem_copy_from_extres() - Copy from external resources. - * - * @kctx: kbase context within which the copying is to take place. - * @buf_data: Pointer to the information about external resources: - * pages pertaining to the external resource, number of - * pages to copy. - * - * Return: 0 on success, error code otherwise. - */ -int kbase_mem_copy_from_extres(struct kbase_context *kctx, - struct kbase_debug_copy_buffer *buf_data); - #if !MALI_USE_CSF int kbase_process_soft_job(struct kbase_jd_atom *katom); int kbase_prepare_soft_job(struct kbase_jd_atom *katom); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c index 08a9a3cd0479..10dbeee02e40 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -98,11 +98,9 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev) "unable to create address_spaces debugfs directory"); } else { for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { - snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i); - debugfs_create_file(as_name, 0444, - debugfs_directory, - (void *)(uintptr_t)i, - &as_fault_fops); + if (likely(scnprintf(as_name, ARRAY_SIZE(as_name), "as%u", i))) + debugfs_create_file(as_name, 0444, debugfs_directory, + (void *)(uintptr_t)i, &as_fault_fops); } } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c index fa094ab36b1f..7eb6b5a798ce 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c @@ -31,10 +31,7 @@ #include #endif /* CONFIG_DEVFREQ_THERMAL */ #endif /* CONFIG_MALI_BIFROST_DEVFREQ */ -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) #include "backend/gpu/mali_kbase_model_linux.h" -#include -#endif /* CONFIG_MALI_BIFROST_NO_MALI */ #include "uapi/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h" #include "mali_kbase_mem.h" #include "mali_kbase_mem_pool_debugfs.h" @@ -632,7 +629,8 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile, kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); #if IS_ENABLED(CONFIG_DEBUG_FS) - snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id); + if (unlikely(!scnprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id))) + return -ENOMEM; mutex_init(&kctx->mem_profile_lock); @@ -671,8 +669,10 @@ static int kbase_open(struct inode *inode, struct file *filp) if (!kbdev) return -ENODEV; - /* Set address space operation for page migration */ +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) + /* Set address space operations for page migration */ kbase_mem_migrate_set_address_space_ops(kbdev, filp); +#endif /* Device-wide firmware load is moved here from probing to comply with * Android GKI vendor guideline. @@ -1467,6 +1467,9 @@ static int kbasep_kcpu_queue_enqueue(struct kbase_context *kctx, static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx, union kbase_ioctl_cs_tiler_heap_init *heap_init) { + if (heap_init->in.group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) + return -EINVAL; + kctx->jit_group_id = heap_init->in.group_id; return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size, @@ -1479,6 +1482,9 @@ static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx, static int kbasep_cs_tiler_heap_init_1_13(struct kbase_context *kctx, union kbase_ioctl_cs_tiler_heap_init_1_13 *heap_init) { + if (heap_init->in.group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) + return -EINVAL; + kctx->jit_group_id = heap_init->in.group_id; return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size, @@ -4278,7 +4284,7 @@ void kbase_protected_mode_term(struct kbase_device *kbdev) kfree(kbdev->protected_dev); } -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) +#if !IS_ENABLED(CONFIG_MALI_REAL_HW) static int kbase_common_reg_map(struct kbase_device *kbdev) { return 0; @@ -4286,7 +4292,7 @@ static int kbase_common_reg_map(struct kbase_device *kbdev) static void kbase_common_reg_unmap(struct kbase_device * const kbdev) { } -#else /* CONFIG_MALI_BIFROST_NO_MALI */ +#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ static int kbase_common_reg_map(struct kbase_device *kbdev) { int err = 0; @@ -4322,7 +4328,7 @@ static void kbase_common_reg_unmap(struct kbase_device * const kbdev) kbdev->reg_size = 0; } } -#endif /* CONFIG_MALI_BIFROST_NO_MALI */ +#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ int registers_map(struct kbase_device * const kbdev) { @@ -4585,8 +4591,18 @@ int power_control_init(struct kbase_device *kbdev) * from completing its initialization. */ #if defined(CONFIG_PM_OPP) -#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \ - defined(CONFIG_REGULATOR)) +#if defined(CONFIG_REGULATOR) +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + if (kbdev->nr_regulators > 0) { + kbdev->token = dev_pm_opp_set_regulators(kbdev->dev, regulator_names); + + if (kbdev->token < 0) { + err = kbdev->token; + goto regulators_probe_defer; + } + + } +#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) if (kbdev->nr_regulators > 0) { kbdev->opp_table = dev_pm_opp_set_regulators(kbdev->dev, regulator_names, @@ -4605,7 +4621,9 @@ int power_control_init(struct kbase_device *kbdev) return 0; } } -#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */ +#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ +#endif /* CONFIG_REGULATOR */ + #ifdef CONFIG_ARCH_ROCKCHIP err = kbase_platform_rk_init_opp_table(kbdev); if (err) @@ -4645,13 +4663,17 @@ void power_control_term(struct kbase_device *kbdev) #if defined(CONFIG_PM_OPP) dev_pm_opp_of_remove_table(kbdev->dev); -#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \ - defined(CONFIG_REGULATOR)) - if (!IS_ERR_OR_NULL(kbdev->opp_table)) { +#if defined(CONFIG_REGULATOR) +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + if (kbdev->token > -EPERM) { dev_pm_opp_unregister_set_opp_helper(kbdev->opp_table); - dev_pm_opp_put_regulators(kbdev->opp_table); + dev_pm_opp_put_regulators(kbdev->token); } -#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */ +#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) + if (!IS_ERR_OR_NULL(kbdev->opp_table)) + dev_pm_opp_put_regulators(kbdev->opp_table); +#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ +#endif /* CONFIG_REGULATOR */ #endif /* CONFIG_PM_OPP */ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { @@ -5514,6 +5536,11 @@ static int kbase_platform_device_probe(struct platform_device *pdev) } kbdev->dev = &pdev->dev; + +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + kbdev->token = -EPERM; +#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ + dev_set_drvdata(kbdev->dev, kbdev); #if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) mutex_lock(&kbase_probe_mutex); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c index 60afde2ceb7f..beb292862b21 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c @@ -69,6 +69,12 @@ void kbase_ctx_sched_term(struct kbase_device *kbdev) } } +void kbase_ctx_sched_init_ctx(struct kbase_context *kctx) +{ + kctx->as_nr = KBASEP_AS_NR_INVALID; + atomic_set(&kctx->refcount, 0); +} + /* kbasep_ctx_sched_find_as_for_ctx - Find a free address space * * @kbdev: The context for which to find a free address space @@ -113,7 +119,7 @@ int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx) if (atomic_inc_return(&kctx->refcount) == 1) { int const free_as = kbasep_ctx_sched_find_as_for_ctx(kctx); - if (free_as != KBASEP_AS_NR_INVALID) { + if (free_as >= 0) { kbdev->as_free &= ~(1u << free_as); /* Only program the MMU if the context has not been * assigned the same address space before. @@ -167,8 +173,10 @@ void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx) */ WARN_ON(!atomic_read(&kctx->refcount)); #endif - WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID); - WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx); + if (likely((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS))) + WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx); + else + WARN(true, "Invalid as_nr(%d)", kctx->as_nr); atomic_inc(&kctx->refcount); } @@ -182,16 +190,17 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx) new_ref_count = atomic_dec_return(&kctx->refcount); if (new_ref_count == 0) { - kbdev->as_free |= (1u << kctx->as_nr); - if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) { - KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( - kbdev, kctx->id); - kbdev->as_to_kctx[kctx->as_nr] = NULL; - kctx->as_nr = KBASEP_AS_NR_INVALID; - kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT); + if (likely((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS))) { + kbdev->as_free |= (1u << kctx->as_nr); + if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) { + KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(kbdev, kctx->id); + kbdev->as_to_kctx[kctx->as_nr] = NULL; + kctx->as_nr = KBASEP_AS_NR_INVALID; + kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT); #if !MALI_USE_CSF - kbase_backend_slot_kctx_purge_locked(kbdev, kctx); + kbase_backend_slot_kctx_purge_locked(kbdev, kctx); #endif + } } } @@ -201,13 +210,14 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx) void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) { struct kbase_device *const kbdev = kctx->kbdev; + unsigned long flags; - lockdep_assert_held(&kbdev->mmu_hw_mutex); - lockdep_assert_held(&kbdev->hwaccess_lock); + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); WARN_ON(atomic_read(&kctx->refcount) != 0); - if (kctx->as_nr != KBASEP_AS_NR_INVALID) { + if ((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS)) { if (kbdev->pm.backend.gpu_powered) kbase_mmu_disable(kctx); @@ -215,6 +225,9 @@ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) kbdev->as_to_kctx[kctx->as_nr] = NULL; kctx->as_nr = KBASEP_AS_NR_INVALID; } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); } void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h index f787cc34ba48..5a8d17547b7b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h @@ -59,6 +59,15 @@ int kbase_ctx_sched_init(struct kbase_device *kbdev); */ void kbase_ctx_sched_term(struct kbase_device *kbdev); +/** + * kbase_ctx_sched_ctx_init - Initialize per-context data fields for scheduling + * @kctx: The context to initialize + * + * This must be called during context initialization before any other context + * scheduling functions are called on @kctx + */ +void kbase_ctx_sched_init_ctx(struct kbase_context *kctx); + /** * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context * @kctx: The context to which to retain a reference @@ -113,9 +122,6 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx); * This function should be called when a context is being destroyed. The * context must no longer have any reference. If it has been assigned an * address space before then the AS will be unprogrammed. - * - * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be - * held whilst calling this function. */ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_defs.h index 15fa0d71387a..80f76145e393 100755 --- a/drivers/gpu/arm/bifrost/mali_kbase_defs.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_defs.h @@ -650,7 +650,6 @@ struct kbase_process { * struct kbase_mem_migrate - Object representing an instance for managing * page migration. * - * @mapping: Pointer to address space struct used for page migration. * @free_pages_list: List of deferred pages to free. Mostly used when page migration * is enabled. Pages in memory pool that require migrating * will be freed instead. However page cannot be freed @@ -661,13 +660,17 @@ struct kbase_process { * @free_pages_workq: Work queue to process the work items queued to free * pages in @free_pages_list. * @free_pages_work: Work item to free pages in @free_pages_list. + * @inode: Pointer to inode whose address space operations are used + * for page migration purposes. */ struct kbase_mem_migrate { - struct address_space *mapping; struct list_head free_pages_list; spinlock_t free_pages_lock; struct workqueue_struct *free_pages_workq; struct work_struct free_pages_work; +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) + struct inode *inode; +#endif }; /** @@ -709,6 +712,10 @@ struct kbase_mem_migrate { * @opp_table: Pointer to the device OPP structure maintaining the * link to OPPs attached to a device. This is obtained * after setting regulator names for the device. + * @token: Integer replacement for opp_table in kernel versions + * 6 and greater. Value is a token id number when 0 or greater, + * and a linux errno when negative. Must be initialised + * to an non-zero value as 0 is valid token id. * @devname: string containing the name used for GPU device instance, * miscellaneous device is registered using the same name. * @id: Unique identifier for the device, indicates the number of @@ -906,6 +913,10 @@ struct kbase_mem_migrate { * GPU2019-3878. PM state machine is invoked after * clearing this flag and @hwaccess_lock is used to * serialize the access. + * @mmu_page_migrate_in_progress: Set before starting a MMU page migration transaction + * and cleared after the transaction completes. PM L2 state is + * prevented from entering powering up/down transitions when the + * flag is set, @hwaccess_lock is used to serialize the access. * @poweroff_pending: Set when power off operation for GPU is started, reset when * power on for GPU is started. * @infinite_cache_active_default: Set to enable using infinite cache for all the @@ -986,6 +997,10 @@ struct kbase_mem_migrate { * @oom_notifier_block: notifier_block containing kernel-registered out-of- * memory handler. * @mem_migrate: Per device object for managing page migration. + * @live_fence_metadata: Count of live fence metadata structures created by + * KCPU queue. These structures may outlive kbase module + * itself. Therefore, in such a case, a warning should be + * be produced. */ struct kbase_device { u32 hw_quirks_sc; @@ -1010,14 +1025,16 @@ struct kbase_device { #if IS_ENABLED(CONFIG_REGULATOR) struct regulator *regulators[BASE_MAX_NR_CLOCKS_REGULATORS]; unsigned int nr_regulators; -#if (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + int token; +#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) struct opp_table *opp_table; -#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */ +#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ #endif /* CONFIG_REGULATOR */ char devname[DEVNAME_SIZE]; u32 id; -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) +#if !IS_ENABLED(CONFIG_MALI_REAL_HW) void *model; struct kmem_cache *irq_slab; struct workqueue_struct *irq_workq; @@ -1025,7 +1042,7 @@ struct kbase_device { atomic_t serving_gpu_irq; atomic_t serving_mmu_irq; spinlock_t reg_op_lock; -#endif /* CONFIG_MALI_BIFROST_NO_MALI */ +#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ struct kbase_pm_device_data pm; struct kbase_mem_pool_group mem_pools; @@ -1186,6 +1203,7 @@ struct kbase_device { #if MALI_USE_CSF bool mmu_hw_operation_in_progress; #endif + bool mmu_page_migrate_in_progress; bool poweroff_pending; bool infinite_cache_active_default; @@ -1286,6 +1304,10 @@ struct kbase_device { #endif struct kbase_mem_migrate mem_migrate; + +#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) + atomic_t live_fence_metadata; +#endif }; /** diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.h b/drivers/gpu/arm/bifrost/mali_kbase_fence.h index dfe33e52b4ce..25986f604c6c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_fence.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.h @@ -33,6 +33,49 @@ #include "mali_kbase_fence_defs.h" #include "mali_kbase.h" +#if MALI_USE_CSF +/* Maximum number of characters in DMA fence timeline name. */ +#define MAX_TIMELINE_NAME (32) + +/** + * struct kbase_kcpu_dma_fence_meta - Metadata structure for dma fence objects containing + * information about KCPU queue. One instance per KCPU + * queue. + * + * @refcount: Atomic value to keep track of number of references to an instance. + * An instance can outlive the KCPU queue itself. + * @kbdev: Pointer to Kbase device. + * @kctx_id: Kbase context ID. + * @timeline_name: String of timeline name for associated fence object. + */ +struct kbase_kcpu_dma_fence_meta { +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) + atomic_t refcount; +#else + refcount_t refcount; +#endif + struct kbase_device *kbdev; + int kctx_id; + char timeline_name[MAX_TIMELINE_NAME]; +}; + +/** + * struct kbase_kcpu_dma_fence - Structure which extends a dma fence object to include a + * reference to metadata containing more informaiton about it. + * + * @base: Fence object itself. + * @metadata: Pointer to metadata structure. + */ +struct kbase_kcpu_dma_fence { +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence base; +#else + struct dma_fence base; +#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ + struct kbase_kcpu_dma_fence_meta *metadata; +}; +#endif + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) extern const struct fence_ops kbase_fence_ops; #else @@ -167,12 +210,56 @@ static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom, */ #define kbase_fence_get(fence_info) dma_fence_get((fence_info)->fence) +#if MALI_USE_CSF +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct fence *fence) +#else +static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct dma_fence *fence) +#endif +{ + if (fence->ops == &kbase_fence_ops) + return (struct kbase_kcpu_dma_fence *)fence; + + return NULL; +} + +static inline void kbase_kcpu_dma_fence_meta_put(struct kbase_kcpu_dma_fence_meta *metadata) +{ +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) + if (atomic_dec_and_test(&metadata->refcount)) { +#else + if (refcount_dec_and_test(&metadata->refcount)) { +#endif + atomic_dec(&metadata->kbdev->live_fence_metadata); + kfree(metadata); + } +} + +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +static inline void kbase_kcpu_dma_fence_put(struct fence *fence) +#else +static inline void kbase_kcpu_dma_fence_put(struct dma_fence *fence) +#endif +{ + struct kbase_kcpu_dma_fence *kcpu_fence = kbase_kcpu_dma_fence_get(fence); + + if (kcpu_fence) + kbase_kcpu_dma_fence_meta_put(kcpu_fence->metadata); +} +#endif /* MALI_USE_CSF */ + /** * kbase_fence_put() - Releases a reference to a fence * @fence: Fence to release reference for. */ -#define kbase_fence_put(fence) dma_fence_put(fence) - +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +static inline void kbase_fence_put(struct fence *fence) +#else +static inline void kbase_fence_put(struct dma_fence *fence) +#endif +{ + dma_fence_put(fence); +} #endif /* IS_ENABLED(CONFIG_SYNC_FILE) */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c b/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c index be141553c674..25b4c9c03b53 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include static const char * @@ -41,7 +41,13 @@ kbase_fence_get_timeline_name(struct fence *fence) kbase_fence_get_timeline_name(struct dma_fence *fence) #endif { +#if MALI_USE_CSF + struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence; + + return kcpu_fence->metadata->timeline_name; +#else return kbase_timeline_name; +#endif /* MALI_USE_CSF */ } static bool @@ -62,24 +68,44 @@ kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size) #endif { #if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) - snprintf(str, size, "%u", fence->seqno); + const char *format = "%u"; #else - snprintf(str, size, "%llu", fence->seqno); + const char *format = "%llu"; #endif + if (unlikely(!scnprintf(str, size, format, fence->seqno))) + pr_err("Fail to encode fence seqno to string"); } +#if MALI_USE_CSF +static void +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +kbase_fence_release(struct fence *fence) +#else +kbase_fence_release(struct dma_fence *fence) +#endif +{ + struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence; + + kbase_kcpu_dma_fence_meta_put(kcpu_fence->metadata); + kfree(kcpu_fence); +} +#endif + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) extern const struct fence_ops kbase_fence_ops; /* silence checker warning */ -const struct fence_ops kbase_fence_ops = { - .wait = fence_default_wait, +const struct fence_ops kbase_fence_ops = { .wait = fence_default_wait, #else extern const struct dma_fence_ops kbase_fence_ops; /* silence checker warning */ -const struct dma_fence_ops kbase_fence_ops = { - .wait = dma_fence_default_wait, +const struct dma_fence_ops kbase_fence_ops = { .wait = dma_fence_default_wait, +#endif + .get_driver_name = kbase_fence_get_driver_name, + .get_timeline_name = kbase_fence_get_timeline_name, + .enable_signaling = kbase_fence_enable_signaling, +#if MALI_USE_CSF + .fence_value_str = kbase_fence_fence_value_str, + .release = kbase_fence_release +#else + .fence_value_str = kbase_fence_fence_value_str #endif - .get_driver_name = kbase_fence_get_driver_name, - .get_timeline_name = kbase_fence_get_timeline_name, - .enable_signaling = kbase_fence_enable_signaling, - .fence_value_str = kbase_fence_fence_value_str }; - +KBASE_EXPORT_TEST_API(kbase_fence_ops); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c index 0282aaf8eb3a..7a7d17ea5f26 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c @@ -311,7 +311,6 @@ static void kbase_gpuprops_calculate_props( struct base_gpu_props * const gpu_props, struct kbase_device *kbdev) { int i; - u32 gpu_id; /* Populate the base_gpu_props structure */ kbase_gpuprops_update_core_props_gpu_id(gpu_props); @@ -361,49 +360,23 @@ static void kbase_gpuprops_calculate_props( gpu_props->thread_props.tls_alloc = gpu_props->raw_props.thread_tls_alloc; - /* MIDHARC-2364 was intended for tULx. - * Workaround for the incorrectly applied THREAD_FEATURES to tDUx. - */ - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - #if MALI_USE_CSF - CSTD_UNUSED(gpu_id); gpu_props->thread_props.max_registers = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 0U, 22); + KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 22); gpu_props->thread_props.impl_tech = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 22U, 2); + KBASE_UBFX32(gpu_props->raw_props.thread_features, 22U, 2); gpu_props->thread_props.max_task_queue = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 24U, 8); + KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 8); gpu_props->thread_props.max_thread_group_split = 0; #else - if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == GPU_ID2_PRODUCT_TDUX) { - gpu_props->thread_props.max_registers = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 0U, 22); - gpu_props->thread_props.impl_tech = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 22U, 2); - gpu_props->thread_props.max_task_queue = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 24U, 8); - gpu_props->thread_props.max_thread_group_split = 0; - } else { - gpu_props->thread_props.max_registers = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 0U, 16); - gpu_props->thread_props.max_task_queue = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 16U, 8); - gpu_props->thread_props.max_thread_group_split = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 24U, 6); - gpu_props->thread_props.impl_tech = - KBASE_UBFX32(gpu_props->raw_props.thread_features, - 30U, 2); - } + gpu_props->thread_props.max_registers = + KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16); + gpu_props->thread_props.max_task_queue = + KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8); + gpu_props->thread_props.max_thread_group_split = + KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6); + gpu_props->thread_props.impl_tech = + KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2); #endif /* If values are not specified, then use defaults */ @@ -539,7 +512,7 @@ MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing"); static u32 l2_hash_values[ASN_HASH_COUNT] = { 0, }; -static int num_override_l2_hash_values; +static unsigned int num_override_l2_hash_values; module_param_array(l2_hash_values, uint, &num_override_l2_hash_values, 0000); MODULE_PARM_DESC(l2_hash_values, "Override L2 hash values config for testing"); @@ -593,7 +566,7 @@ kbase_read_l2_config_from_dt(struct kbase_device *const kbdev) kbdev->l2_hash_values_override = false; if (num_override_l2_hash_values) { - int i; + unsigned int i; kbdev->l2_hash_values_override = true; for (i = 0; i < num_override_l2_hash_values; i++) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.c b/drivers/gpu/arm/bifrost/mali_kbase_hw.c index b6a8a2e5608f..c658fb79429b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hw.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.c @@ -68,9 +68,6 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TBAX: features = base_hw_features_tBAx; break; - case GPU_ID2_PRODUCT_TDUX: - features = base_hw_features_tDUx; - break; case GPU_ID2_PRODUCT_TODX: case GPU_ID2_PRODUCT_LODX: features = base_hw_features_tODx; @@ -211,10 +208,6 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tBAx_r0p0 }, { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_TDUX, - { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0 }, - { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_TODX, { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 }, { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tODx_r0p0 }, @@ -235,6 +228,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( { GPU_ID2_PRODUCT_TTUX, { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTUx_r0p1 }, { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 }, { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 }, { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 }, @@ -393,9 +387,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TBAX: issues = base_hw_issues_model_tBAx; break; - case GPU_ID2_PRODUCT_TDUX: - issues = base_hw_issues_model_tDUx; - break; case GPU_ID2_PRODUCT_TODX: case GPU_ID2_PRODUCT_LODX: issues = base_hw_issues_model_tODx; @@ -414,7 +405,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_LTIX: issues = base_hw_issues_model_tTIx; break; - default: dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h index 124a6d643e42..ca77c192deea 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h @@ -97,8 +97,8 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev, * Return: true if context is now active, false otherwise (ie if context does * not have an address space assigned) */ -bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, - struct kbase_context *kctx, int js); +bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_context *kctx, + unsigned int js); /** * kbase_backend_release_ctx_irq - Release a context from the GPU. This will @@ -183,8 +183,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp); * * Return: Atom currently at the head of slot @js, or NULL */ -struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, - int js); +struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, unsigned int js); /** * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a @@ -194,7 +193,7 @@ struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, * * Return: Number of atoms currently on slot */ -int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js); +int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js); /** * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot @@ -204,7 +203,7 @@ int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js); * * Return: Number of atoms currently on slot @js that are currently on the GPU. */ -int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js); +int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js); /** * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs @@ -233,7 +232,7 @@ void kbase_backend_timeouts_changed(struct kbase_device *kbdev); * * Return: Number of jobs that can be submitted. */ -int kbase_backend_slot_free(struct kbase_device *kbdev, int js); +int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js); /** * kbase_job_check_leave_disjoint - potentially leave disjoint state @@ -287,8 +286,8 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev); * Context: * The job slot lock must be held when calling this function. */ -void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, - struct kbase_jd_atom *target_katom); +void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js, + struct kbase_jd_atom *target_katom); /** * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd.c b/drivers/gpu/arm/bifrost/mali_kbase_jd.c index f5faa92525c5..f44426a736ca 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_jd.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_jd.c @@ -28,6 +28,11 @@ #include #include #include +#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE +#include +#else +#include +#endif #include #include @@ -1074,11 +1079,19 @@ int kbase_jd_submit(struct kbase_context *kctx, return -EINVAL; } + if (nr_atoms > BASE_JD_ATOM_COUNT) { + dev_dbg(kbdev->dev, "Invalid attempt to submit %u atoms at once for kctx %d_%d", + nr_atoms, kctx->tgid, kctx->id); + return -EINVAL; + } + /* All atoms submitted in this call have the same flush ID */ latest_flush = kbase_backend_get_current_flush_id(kbdev); for (i = 0; i < nr_atoms; i++) { - struct base_jd_atom user_atom; + struct base_jd_atom user_atom = { + .seq_nr = 0, + }; struct base_jd_fragment user_jc_incr; struct kbase_jd_atom *katom; @@ -1202,6 +1215,12 @@ while (false) kbase_disjoint_event_potential(kbdev); mutex_unlock(&jctx->lock); + if (fatal_signal_pending(current)) { + dev_dbg(kbdev->dev, "Fatal signal pending for kctx %d_%d", + kctx->tgid, kctx->id); + /* We're being killed so the result code doesn't really matter */ + return 0; + } } if (need_to_try_schedule_context) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jm.c b/drivers/gpu/arm/bifrost/mali_kbase_jm.c index 6cbd6f1a423e..1ac5cd3eafff 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_jm.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -37,15 +37,13 @@ * * Return: true if slot can still be submitted on, false if slot is now full. */ -static bool kbase_jm_next_job(struct kbase_device *kbdev, int js, - int nr_jobs_to_submit) +static bool kbase_jm_next_job(struct kbase_device *kbdev, unsigned int js, int nr_jobs_to_submit) { struct kbase_context *kctx; int i; kctx = kbdev->hwaccess.active_kctx[js]; - dev_dbg(kbdev->dev, - "Trying to run the next %d jobs in kctx %pK (s:%d)\n", + dev_dbg(kbdev->dev, "Trying to run the next %d jobs in kctx %pK (s:%u)\n", nr_jobs_to_submit, (void *)kctx, js); if (!kctx) @@ -60,7 +58,7 @@ static bool kbase_jm_next_job(struct kbase_device *kbdev, int js, kbase_backend_run_atom(kbdev, katom); } - dev_dbg(kbdev->dev, "Slot ringbuffer should now be full (s:%d)\n", js); + dev_dbg(kbdev->dev, "Slot ringbuffer should now be full (s:%u)\n", js); return false; } @@ -72,7 +70,7 @@ u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask) dev_dbg(kbdev->dev, "JM kick slot mask 0x%x\n", js_mask); while (js_mask) { - int js = ffs(js_mask) - 1; + unsigned int js = ffs(js_mask) - 1; int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js); if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit)) @@ -111,14 +109,14 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev) void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) { - int js; + unsigned int js; lockdep_assert_held(&kbdev->hwaccess_lock); for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { if (kbdev->hwaccess.active_kctx[js] == kctx) { - dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n", - (void *)kctx, js); + dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%u)\n", (void *)kctx, + js); kbdev->hwaccess.active_kctx[js] = NULL; } } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.c b/drivers/gpu/arm/bifrost/mali_kbase_js.c index a64d7327a76b..78f2d7d47b3b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_js.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_js.c @@ -77,8 +77,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); -static int kbase_js_get_slot(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); +static unsigned int kbase_js_get_slot(struct kbase_device *kbdev, struct kbase_jd_atom *katom); static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, kbasep_js_ctx_job_cb *callback); @@ -151,8 +150,7 @@ static void kbase_js_sync_timers(struct kbase_device *kbdev) * * Return: true if there are no atoms to pull, false otherwise. */ -static inline bool -jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) +static inline bool jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, unsigned int js, int prio) { bool none_to_pull; struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; @@ -161,9 +159,8 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) none_to_pull = RB_EMPTY_ROOT(&rb->runnable_tree); - dev_dbg(kctx->kbdev->dev, - "Slot %d (prio %d) is %spullable in kctx %pK\n", - js, prio, none_to_pull ? "not " : "", kctx); + dev_dbg(kctx->kbdev->dev, "Slot %u (prio %d) is %spullable in kctx %pK\n", js, prio, + none_to_pull ? "not " : "", kctx); return none_to_pull; } @@ -179,8 +176,7 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) * Return: true if the ring buffers for all priorities have no pullable atoms, * false otherwise. */ -static inline bool -jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) +static inline bool jsctx_rb_none_to_pull(struct kbase_context *kctx, unsigned int js) { int prio; @@ -212,8 +208,8 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) * * The HW access lock must always be held when calling this function. */ -static void jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, - int prio, kbasep_js_ctx_job_cb *callback) +static void jsctx_queue_foreach_prio(struct kbase_context *kctx, unsigned int js, int prio, + kbasep_js_ctx_job_cb *callback) { struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; @@ -272,7 +268,7 @@ static void jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback * for each entry, and remove the entry from the queue. */ -static inline void jsctx_queue_foreach(struct kbase_context *kctx, int js, +static inline void jsctx_queue_foreach(struct kbase_context *kctx, unsigned int js, kbasep_js_ctx_job_cb *callback) { int prio; @@ -293,15 +289,14 @@ static inline void jsctx_queue_foreach(struct kbase_context *kctx, int js, * * Return: Pointer to next atom in buffer, or NULL if there is no atom. */ -static inline struct kbase_jd_atom * -jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) +static inline struct kbase_jd_atom *jsctx_rb_peek_prio(struct kbase_context *kctx, unsigned int js, + int prio) { struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; struct rb_node *node; lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - dev_dbg(kctx->kbdev->dev, - "Peeking runnable tree of kctx %pK for prio %d (s:%d)\n", + dev_dbg(kctx->kbdev->dev, "Peeking runnable tree of kctx %pK for prio %d (s:%u)\n", (void *)kctx, prio, js); node = rb_first(&rb->runnable_tree); @@ -326,8 +321,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) * * Return: Pointer to next atom in buffer, or NULL if there is no atom. */ -static inline struct kbase_jd_atom * -jsctx_rb_peek(struct kbase_context *kctx, int js) +static inline struct kbase_jd_atom *jsctx_rb_peek(struct kbase_context *kctx, unsigned int js) { int prio; @@ -358,7 +352,7 @@ static inline void jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { int prio = katom->sched_priority; - int js = katom->slot_nr; + unsigned int js = katom->slot_nr; struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; lockdep_assert_held(&kctx->kbdev->hwaccess_lock); @@ -377,14 +371,14 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) { struct kbase_device *kbdev = kctx->kbdev; int prio = katom->sched_priority; - int js = katom->slot_nr; + unsigned int js = katom->slot_nr; struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL; lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Adding atom %pK to runnable tree of kctx %pK (s:%d)\n", - (void *)katom, (void *)kctx, js); + dev_dbg(kbdev->dev, "Adding atom %pK to runnable tree of kctx %pK (s:%u)\n", (void *)katom, + (void *)kctx, js); while (*new) { struct kbase_jd_atom *entry = container_of(*new, @@ -425,15 +419,11 @@ jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) jsctx_tree_add(kctx, katom); } -static bool kbase_js_ctx_pullable(struct kbase_context *kctx, - int js, - bool is_scheduled); +static bool kbase_js_ctx_pullable(struct kbase_context *kctx, unsigned int js, bool is_scheduled); static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js); + struct kbase_context *kctx, unsigned int js); static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js); + struct kbase_context *kctx, unsigned int js); typedef bool(katom_ordering_func)(const struct kbase_jd_atom *, const struct kbase_jd_atom *); @@ -645,6 +635,8 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx) KBASE_DEBUG_ASSERT(kctx != NULL); + kbase_ctx_sched_init_ctx(kctx); + for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]); @@ -683,7 +675,7 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) { struct kbase_device *kbdev; struct kbasep_js_kctx_info *js_kctx_info; - int js; + unsigned int js; bool update_ctx_count = false; unsigned long flags; CSTD_UNUSED(js_kctx_info); @@ -722,6 +714,8 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) kbase_backend_ctx_count_changed(kbdev); mutex_unlock(&kbdev->js_data.runpool_mutex); } + + kbase_ctx_sched_remove_ctx(kctx); } /* @@ -729,8 +723,8 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) */ /* Should not normally use directly - use kbase_jsctx_slot_atom_pulled_dec() instead */ -static void kbase_jsctx_slot_prio_blocked_clear(struct kbase_context *kctx, - int js, int sched_prio) +static void kbase_jsctx_slot_prio_blocked_clear(struct kbase_context *kctx, unsigned int js, + int sched_prio) { struct kbase_jsctx_slot_tracking *slot_tracking = &kctx->slot_tracking[js]; @@ -742,7 +736,7 @@ static void kbase_jsctx_slot_prio_blocked_clear(struct kbase_context *kctx, NULL, 0, js, (unsigned int)sched_prio); } -static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, int js) +static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, unsigned int js) { return atomic_read(&kctx->slot_tracking[js].atoms_pulled); } @@ -752,7 +746,7 @@ static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, int js) * - that priority level is blocked * - or, any higher priority level is blocked */ -static bool kbase_jsctx_slot_prio_is_blocked(struct kbase_context *kctx, int js, +static bool kbase_jsctx_slot_prio_is_blocked(struct kbase_context *kctx, unsigned int js, int sched_prio) { struct kbase_jsctx_slot_tracking *slot_tracking = @@ -792,7 +786,7 @@ static bool kbase_jsctx_slot_prio_is_blocked(struct kbase_context *kctx, int js, static int kbase_jsctx_slot_atom_pulled_inc(struct kbase_context *kctx, const struct kbase_jd_atom *katom) { - int js = katom->slot_nr; + unsigned int js = katom->slot_nr; int sched_prio = katom->sched_priority; struct kbase_jsctx_slot_tracking *slot_tracking = &kctx->slot_tracking[js]; @@ -801,7 +795,7 @@ static int kbase_jsctx_slot_atom_pulled_inc(struct kbase_context *kctx, lockdep_assert_held(&kctx->kbdev->hwaccess_lock); WARN(kbase_jsctx_slot_prio_is_blocked(kctx, js, sched_prio), - "Should not have pulled atoms for slot %d from a context that is blocked at priority %d or higher", + "Should not have pulled atoms for slot %u from a context that is blocked at priority %d or higher", js, sched_prio); nr_atoms_pulled = atomic_inc_return(&kctx->atoms_pulled_all_slots); @@ -830,7 +824,7 @@ static int kbase_jsctx_slot_atom_pulled_inc(struct kbase_context *kctx, static bool kbase_jsctx_slot_atom_pulled_dec(struct kbase_context *kctx, const struct kbase_jd_atom *katom) { - int js = katom->slot_nr; + unsigned int js = katom->slot_nr; int sched_prio = katom->sched_priority; int atoms_pulled_pri; struct kbase_jsctx_slot_tracking *slot_tracking = @@ -879,14 +873,12 @@ static bool kbase_jsctx_slot_atom_pulled_dec(struct kbase_context *kctx, * Return: true if caller should call kbase_backend_ctx_count_changed() */ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) + struct kbase_context *kctx, unsigned int js) { bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Add pullable tail kctx %pK (s:%d)\n", - (void *)kctx, js); + dev_dbg(kbdev->dev, "Add pullable tail kctx %pK (s:%u)\n", (void *)kctx, js); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); @@ -921,14 +913,13 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, * * Return: true if caller should call kbase_backend_ctx_count_changed() */ -static bool kbase_js_ctx_list_add_pullable_head_nolock( - struct kbase_device *kbdev, struct kbase_context *kctx, int js) +static bool kbase_js_ctx_list_add_pullable_head_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx, unsigned int js) { bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Add pullable head kctx %pK (s:%d)\n", - (void *)kctx, js); + dev_dbg(kbdev->dev, "Add pullable head kctx %pK (s:%u)\n", (void *)kctx, js); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); @@ -966,8 +957,7 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock( * Return: true if caller should call kbase_backend_ctx_count_changed() */ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) + struct kbase_context *kctx, unsigned int js) { bool ret; unsigned long flags; @@ -997,14 +987,12 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, * Return: true if caller should call kbase_backend_ctx_count_changed() */ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) + struct kbase_context *kctx, unsigned int js) { bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Add unpullable tail kctx %pK (s:%d)\n", - (void *)kctx, js); + dev_dbg(kbdev->dev, "Add unpullable tail kctx %pK (s:%u)\n", (void *)kctx, js); list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], &kbdev->js_data.ctx_list_unpullable[js][kctx->priority]); @@ -1039,9 +1027,8 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, * * Return: true if caller should call kbase_backend_ctx_count_changed() */ -static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) +static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, struct kbase_context *kctx, + unsigned int js) { bool ret = false; @@ -1077,9 +1064,8 @@ static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, * Return: Context to use for specified slot. * NULL if no contexts present for specified slot */ -static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( - struct kbase_device *kbdev, - int js) +static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(struct kbase_device *kbdev, + unsigned int js) { struct kbase_context *kctx; int i; @@ -1095,9 +1081,8 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( jctx.sched_info.ctx.ctx_list_entry[js]); list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); - dev_dbg(kbdev->dev, - "Popped %pK from the pullable queue (s:%d)\n", - (void *)kctx, js); + dev_dbg(kbdev->dev, "Popped %pK from the pullable queue (s:%u)\n", (void *)kctx, + js); return kctx; } return NULL; @@ -1112,8 +1097,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( * Return: Context to use for specified slot. * NULL if no contexts present for specified slot */ -static struct kbase_context *kbase_js_ctx_list_pop_head( - struct kbase_device *kbdev, int js) +static struct kbase_context *kbase_js_ctx_list_pop_head(struct kbase_device *kbdev, unsigned int js) { struct kbase_context *kctx; unsigned long flags; @@ -1137,8 +1121,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head( * Return: true if context can be pulled from on specified slot * false otherwise */ -static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, - bool is_scheduled) +static bool kbase_js_ctx_pullable(struct kbase_context *kctx, unsigned int js, bool is_scheduled) { struct kbasep_js_device_data *js_devdata; struct kbase_jd_atom *katom; @@ -1157,8 +1140,7 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, } katom = jsctx_rb_peek(kctx, js); if (!katom) { - dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n", - (void *)kctx, js); + dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%u)\n", (void *)kctx, js); return false; /* No pullable atoms */ } if (kbase_jsctx_slot_prio_is_blocked(kctx, js, katom->sched_priority)) { @@ -1166,7 +1148,7 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, kctx->kbdev, JS_SLOT_PRIO_IS_BLOCKED, kctx, katom, katom->jc, js, (unsigned int)katom->sched_priority); dev_dbg(kbdev->dev, - "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%d)\n", + "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%u)\n", (void *)kctx, katom->sched_priority, js); return false; } @@ -1187,14 +1169,14 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) { dev_dbg(kbdev->dev, - "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n", + "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%u)\n", (void *)katom, js); return false; } } - dev_dbg(kbdev->dev, "JS: Atom %pK is pullable in kctx %pK (s:%d)\n", - (void *)katom, (void *)kctx, js); + dev_dbg(kbdev->dev, "JS: Atom %pK is pullable in kctx %pK (s:%u)\n", (void *)katom, + (void *)kctx, js); return true; } @@ -1205,7 +1187,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, struct kbase_device *kbdev = kctx->kbdev; bool ret = true; bool has_dep = false, has_x_dep = false; - int js = kbase_js_get_slot(kbdev, katom); + unsigned int js = kbase_js_get_slot(kbdev, katom); int prio = katom->sched_priority; int i; @@ -1213,7 +1195,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, struct kbase_jd_atom *dep_atom = katom->dep[i].atom; if (dep_atom) { - int dep_js = kbase_js_get_slot(kbdev, dep_atom); + unsigned int dep_js = kbase_js_get_slot(kbdev, dep_atom); int dep_prio = dep_atom->sched_priority; dev_dbg(kbdev->dev, @@ -1368,7 +1350,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority) { struct kbase_device *kbdev = kctx->kbdev; - int js; + unsigned int js; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -2074,9 +2056,8 @@ void kbase_js_set_timeouts(struct kbase_device *kbdev) kbase_backend_timeouts_changed(kbdev); } -static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) +static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, + unsigned int js) { struct kbasep_js_device_data *js_devdata; struct kbasep_js_kctx_info *js_kctx_info; @@ -2084,7 +2065,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, bool kctx_suspended = false; int as_nr; - dev_dbg(kbdev->dev, "Scheduling kctx %pK (s:%d)\n", kctx, js); + dev_dbg(kbdev->dev, "Scheduling kctx %pK (s:%u)\n", kctx, js); js_devdata = &kbdev->js_data; js_kctx_info = &kctx->jctx.sched_info; @@ -2111,8 +2092,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, WARN_ON(as_nr == KBASEP_AS_NR_INVALID); } } - if (as_nr == KBASEP_AS_NR_INVALID) - return false; /* No address spaces currently available */ + if ((as_nr < 0) || (as_nr >= BASE_MAX_NR_AS)) + return false; /* No address space currently available */ /* * Atomic transaction on the Context and Run Pool begins @@ -2219,9 +2200,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, return true; } -static bool kbase_js_use_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) +static bool kbase_js_use_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, + unsigned int js) { unsigned long flags; @@ -2229,9 +2209,7 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev, if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && kbase_backend_use_ctx_sched(kbdev, kctx, js)) { - - dev_dbg(kbdev->dev, - "kctx %pK already has ASID - mark as active (s:%d)\n", + dev_dbg(kbdev->dev, "kctx %pK already has ASID - mark as active (s:%u)\n", (void *)kctx, js); if (kbdev->hwaccess.active_kctx[js] != kctx) { @@ -2498,8 +2476,7 @@ bool kbase_js_is_atom_valid(struct kbase_device *kbdev, return true; } -static int kbase_js_get_slot(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) +static unsigned int kbase_js_get_slot(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { if (katom->core_req & BASE_JD_REQ_JOB_SLOT) return katom->jobslot; @@ -2538,11 +2515,10 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, (katom->pre_dep && (katom->pre_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { int prio = katom->sched_priority; - int js = katom->slot_nr; + unsigned int js = katom->slot_nr; struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; - dev_dbg(kctx->kbdev->dev, "Add atom %pK to X_DEP list (s:%d)\n", - (void *)katom, js); + dev_dbg(kctx->kbdev->dev, "Add atom %pK to X_DEP list (s:%u)\n", (void *)katom, js); list_add_tail(&katom->queue, &queue->x_dep_head); katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; @@ -2633,8 +2609,8 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) * * Context: Caller must hold the HW access lock */ -static void kbase_js_evict_deps(struct kbase_context *kctx, - struct kbase_jd_atom *katom, int js, int prio) +static void kbase_js_evict_deps(struct kbase_context *kctx, struct kbase_jd_atom *katom, + unsigned int js, int prio) { struct kbase_jd_atom *x_dep = katom->x_post_dep; struct kbase_jd_atom *next_katom = katom->post_dep; @@ -2666,7 +2642,7 @@ static void kbase_js_evict_deps(struct kbase_context *kctx, } } -struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) +struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js) { struct kbase_jd_atom *katom; struct kbasep_js_device_data *js_devdata; @@ -2676,8 +2652,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) KBASE_DEBUG_ASSERT(kctx); kbdev = kctx->kbdev; - dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %pK (s:%d)\n", - (void *)kctx, js); + dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %pK (s:%u)\n", (void *)kctx, js); js_devdata = &kbdev->js_data; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -2696,13 +2671,12 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) katom = jsctx_rb_peek(kctx, js); if (!katom) { - dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n", - (void *)kctx, js); + dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%u)\n", (void *)kctx, js); return NULL; } if (kbase_jsctx_slot_prio_is_blocked(kctx, js, katom->sched_priority)) { dev_dbg(kbdev->dev, - "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%d)\n", + "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%u)\n", (void *)kctx, katom->sched_priority, js); return NULL; } @@ -2736,7 +2710,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && kbase_backend_nr_atoms_on_slot(kbdev, js)) { dev_dbg(kbdev->dev, - "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n", + "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%u)\n", (void *)katom, js); return NULL; } @@ -2759,7 +2733,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) katom->ticks = 0; - dev_dbg(kbdev->dev, "JS: successfully pulled atom %pK from kctx %pK (s:%d)\n", + dev_dbg(kbdev->dev, "JS: successfully pulled atom %pK from kctx %pK (s:%u)\n", (void *)katom, (void *)kctx, js); return katom; @@ -3362,7 +3336,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, if (!kbasep_js_is_submit_allowed(js_devdata, kctx) && !kbase_jsctx_atoms_pulled(kctx) && !kbase_ctx_flag(kctx, KCTX_DYING)) { - int js; + unsigned int js; kbasep_js_set_submit_allowed(js_devdata, kctx); @@ -3374,7 +3348,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, } } else if (katom->x_post_dep && kbasep_js_is_submit_allowed(js_devdata, kctx)) { - int js; + unsigned int js; for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { if (kbase_js_ctx_pullable(kctx, js, true)) @@ -3604,13 +3578,13 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) return false; } -void kbase_js_sched(struct kbase_device *kbdev, int js_mask) +void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask) { struct kbasep_js_device_data *js_devdata; struct kbase_context *last_active[BASE_JM_MAX_NR_SLOTS]; bool timer_sync = false; bool ctx_waiting[BASE_JM_MAX_NR_SLOTS]; - int js; + unsigned int js; KBASE_TLSTREAM_TL_JS_SCHED_START(kbdev, 0); @@ -3639,24 +3613,20 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) if (!kctx) { js_mask &= ~(1 << js); - dev_dbg(kbdev->dev, - "No kctx on pullable list (s:%d)\n", - js); + dev_dbg(kbdev->dev, "No kctx on pullable list (s:%u)\n", js); break; } if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) { context_idle = true; - dev_dbg(kbdev->dev, - "kctx %pK is not active (s:%d)\n", - (void *)kctx, js); + dev_dbg(kbdev->dev, "kctx %pK is not active (s:%u)\n", (void *)kctx, + js); if (kbase_pm_context_active_handle_suspend( kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { - dev_dbg(kbdev->dev, - "Suspend pending (s:%d)\n", js); + dev_dbg(kbdev->dev, "Suspend pending (s:%u)\n", js); /* Suspend pending - return context to * queue and stop scheduling */ @@ -3714,16 +3684,13 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) kbase_ctx_flag_clear(kctx, KCTX_PULLED); if (!kbase_jm_kick(kbdev, 1 << js)) { - dev_dbg(kbdev->dev, - "No more jobs can be submitted (s:%d)\n", - js); + dev_dbg(kbdev->dev, "No more jobs can be submitted (s:%u)\n", js); js_mask &= ~(1 << js); } if (!kbase_ctx_flag(kctx, KCTX_PULLED)) { bool pullable; - dev_dbg(kbdev->dev, - "No atoms pulled from kctx %pK (s:%d)\n", + dev_dbg(kbdev->dev, "No atoms pulled from kctx %pK (s:%u)\n", (void *)kctx, js); pullable = kbase_js_ctx_pullable(kctx, js, @@ -3807,8 +3774,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { if (kbdev->hwaccess.active_kctx[js] == last_active[js] && ctx_waiting[js]) { - dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n", - (void *)last_active[js], js); + dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%u)\n", + (void *)last_active[js], js); kbdev->hwaccess.active_kctx[js] = NULL; } } @@ -3879,7 +3846,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) */ if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { unsigned long flags; - int js; + unsigned int js; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { @@ -4003,7 +3970,7 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, { struct kbase_device *kbdev; unsigned long flags; - u32 js; + unsigned int js; kbdev = kctx->kbdev; @@ -4035,4 +4002,3 @@ base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio pr req_priority); return kbasep_js_sched_prio_to_atom_prio(kbdev, out_priority); } - diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c index 7b8961679a10..14a730dc5a12 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c @@ -48,6 +48,11 @@ #include #include +/* Explicitly include epoll header for old kernels. Not required from 4.16. */ +#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE +#include +#endif + /* Define static_assert(). * * The macro was introduced in kernel 5.1. But older vendor kernels may define diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c index 5fb11b7b94c5..823f9156e19e 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c @@ -36,9 +36,15 @@ #include #include #include +#include #include #include +/* Explicitly include epoll header for old kernels. Not required from 4.16. */ +#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE +#include +#endif + /* The minimum allowed interval between dumps, in nanoseconds * (equivalent to 10KHz) */ @@ -115,16 +121,31 @@ struct kbase_kinstr_prfcnt_client_config { }; /** - * struct kbase_kinstr_prfcnt_async - Asynchronous sampling operation to - * carry out for a kinstr_prfcnt_client. - * @dump_work: Worker for performing asynchronous counter dumps. - * @user_data: User data for asynchronous dump in progress. - * @ts_end_ns: End timestamp of most recent async dump. + * enum kbase_kinstr_prfcnt_client_init_state - A list of + * initialisation states that the + * kinstr_prfcnt client can be at + * during initialisation. Useful + * for terminating a partially + * initialised client. + * + * @KINSTR_PRFCNT_UNINITIALISED : Client is uninitialised + * @KINSTR_PRFCNT_PARSE_SETUP : Parse the setup session + * @KINSTR_PRFCNT_ENABLE_MAP : Allocate memory for enable map + * @KINSTR_PRFCNT_DUMP_BUFFER : Allocate memory for dump buffer + * @KINSTR_PRFCNT_SAMPLE_ARRAY : Allocate memory for and initialise sample array + * @KINSTR_PRFCNT_VIRTUALIZER_CLIENT : Create virtualizer client + * @KINSTR_PRFCNT_WAITQ_MUTEX : Create and initialise mutex and waitqueue + * @KINSTR_PRFCNT_INITIALISED : Client is fully initialised */ -struct kbase_kinstr_prfcnt_async { - struct work_struct dump_work; - u64 user_data; - u64 ts_end_ns; +enum kbase_kinstr_prfcnt_client_init_state { + KINSTR_PRFCNT_UNINITIALISED, + KINSTR_PRFCNT_PARSE_SETUP = KINSTR_PRFCNT_UNINITIALISED, + KINSTR_PRFCNT_ENABLE_MAP, + KINSTR_PRFCNT_DUMP_BUFFER, + KINSTR_PRFCNT_SAMPLE_ARRAY, + KINSTR_PRFCNT_VIRTUALIZER_CLIENT, + KINSTR_PRFCNT_WAITQ_MUTEX, + KINSTR_PRFCNT_INITIALISED }; /** @@ -134,9 +155,7 @@ struct kbase_kinstr_prfcnt_async { * @hvcli: Hardware counter virtualizer client. * @node: Node used to attach this client to list in * kinstr_prfcnt context. - * @cmd_sync_lock: Lock coordinating the reader interface for commands - * that need interacting with the async sample dump - * worker thread. + * @cmd_sync_lock: Lock coordinating the reader interface for commands. * @next_dump_time_ns: Time in ns when this client's next periodic dump must * occur. If 0, not a periodic client. * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic @@ -157,15 +176,10 @@ struct kbase_kinstr_prfcnt_async { * @waitq: Client's notification queue. * @sample_size: Size of the data required for one sample, in bytes. * @sample_count: Number of samples the client is able to capture. - * @sync_sample_count: Number of available spaces for synchronous samples. - * It can differ from sample_count if asynchronous - * sample requests are reserving space in the buffer. * @user_data: User data associated with the session. * This is set when the session is started and stopped. * This value is ignored for control commands that * provide another value. - * @async: Asynchronous sampling operations to carry out in this - * client's session. */ struct kbase_kinstr_prfcnt_client { struct kbase_kinstr_prfcnt_context *kinstr_ctx; @@ -186,9 +200,7 @@ struct kbase_kinstr_prfcnt_client { wait_queue_head_t waitq; size_t sample_size; size_t sample_count; - atomic_t sync_sample_count; u64 user_data; - struct kbase_kinstr_prfcnt_async async; }; static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = { @@ -423,6 +435,7 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena size_t grp, blk, blk_inst; struct prfcnt_metadata **ptr_md = block_meta_base; const struct kbase_hwcnt_metadata *metadata; + uint8_t block_idx = 0; if (!dst || !*block_meta_base) return -EINVAL; @@ -431,6 +444,10 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { u8 *dst_blk; + /* Block indices must be reported with no gaps. */ + if (blk_inst == 0) + block_idx = 0; + /* Skip unavailable or non-enabled blocks */ if (kbase_kinstr_is_block_type_reserved(metadata, grp, blk) || !kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst) || @@ -444,13 +461,14 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena kbase_hwcnt_metadata_block_type_to_prfcnt_block_type( kbase_hwcnt_metadata_block_type(metadata, grp, blk)); - (*ptr_md)->u.block_md.block_idx = (u8)blk_inst; + (*ptr_md)->u.block_md.block_idx = block_idx; (*ptr_md)->u.block_md.set = counter_set; (*ptr_md)->u.block_md.block_state = BLOCK_STATE_UNKNOWN; (*ptr_md)->u.block_md.values_offset = (u32)(dst_blk - base_addr); /* update the buf meta data block pointer to next item */ (*ptr_md)++; + block_idx++; } return 0; @@ -502,33 +520,6 @@ static void kbasep_kinstr_prfcnt_set_sample_metadata( ptr_md->hdr.item_version = 0; } -/** - * kbasep_kinstr_prfcnt_client_output_empty_sample() - Assemble an empty sample - * for output. - * @cli: Non-NULL pointer to a kinstr_prfcnt client. - * @buf_idx: The index to the sample array for saving the sample. - */ -static void kbasep_kinstr_prfcnt_client_output_empty_sample( - struct kbase_kinstr_prfcnt_client *cli, unsigned int buf_idx) -{ - struct kbase_hwcnt_dump_buffer *dump_buf; - struct prfcnt_metadata *ptr_md; - - if (WARN_ON(buf_idx >= cli->sample_arr.sample_count)) - return; - - dump_buf = &cli->sample_arr.samples[buf_idx].dump_buf; - ptr_md = cli->sample_arr.samples[buf_idx].sample_meta; - - kbase_hwcnt_dump_buffer_zero(dump_buf, &cli->enable_map); - - /* Use end timestamp from most recent async dump */ - ptr_md->u.sample_md.timestamp_start = cli->async.ts_end_ns; - ptr_md->u.sample_md.timestamp_end = cli->async.ts_end_ns; - - kbasep_kinstr_prfcnt_set_sample_metadata(cli, dump_buf, ptr_md); -} - /** * kbasep_kinstr_prfcnt_client_output_sample() - Assemble a sample for output. * @cli: Non-NULL pointer to a kinstr_prfcnt client. @@ -578,16 +569,11 @@ static void kbasep_kinstr_prfcnt_client_output_sample( * @cli: Non-NULL pointer to a kinstr_prfcnt client. * @event_id: Event type that triggered the dump. * @user_data: User data to return to the user. - * @async_dump: Whether this is an asynchronous dump or not. - * @empty_sample: Sample block data will be 0 if this is true. * * Return: 0 on success, else error code. */ -static int -kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli, - enum base_hwcnt_reader_event event_id, - u64 user_data, bool async_dump, - bool empty_sample) +static int kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli, + enum base_hwcnt_reader_event event_id, u64 user_data) { int ret; u64 ts_start_ns = 0; @@ -605,17 +591,11 @@ kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli, /* Check if there is a place to copy HWC block into. Calculate the * number of available samples count, by taking into account the type * of dump. - * Asynchronous dumps have the ability to reserve space in the samples - * array for future dumps, unlike synchronous dumps. Because of that, - * the samples count for synchronous dumps is managed by a variable - * called sync_sample_count, that originally is defined as equal to the - * size of the whole array but later decreases every time an - * asynchronous dump request is pending and then re-increased every - * time an asynchronous dump request is completed. */ - available_samples_count = async_dump ? - cli->sample_arr.sample_count : - atomic_read(&cli->sync_sample_count); + available_samples_count = cli->sample_arr.sample_count; + WARN_ON(available_samples_count < 1); + /* Reserve one slot to store the implicit sample taken on CMD_STOP */ + available_samples_count -= 1; if (write_idx - read_idx == available_samples_count) { /* For periodic sampling, the current active dump * will be accumulated in the next sample, when @@ -631,38 +611,19 @@ kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli, */ write_idx %= cli->sample_arr.sample_count; - if (!empty_sample) { - ret = kbase_hwcnt_virtualizer_client_dump( - cli->hvcli, &ts_start_ns, &ts_end_ns, &cli->tmp_buf); - /* HWC dump error, set the sample with error flag */ - if (ret) - cli->sample_flags |= SAMPLE_FLAG_ERROR; + ret = kbase_hwcnt_virtualizer_client_dump(cli->hvcli, &ts_start_ns, &ts_end_ns, + &cli->tmp_buf); + /* HWC dump error, set the sample with error flag */ + if (ret) + cli->sample_flags |= SAMPLE_FLAG_ERROR; - /* Make the sample ready and copy it to the userspace mapped buffer */ - kbasep_kinstr_prfcnt_client_output_sample( - cli, write_idx, user_data, ts_start_ns, ts_end_ns); - } else { - if (!async_dump) { - struct prfcnt_metadata *ptr_md; - /* User data will not be updated for empty samples. */ - ptr_md = cli->sample_arr.samples[write_idx].sample_meta; - ptr_md->u.sample_md.user_data = user_data; - } - - /* Make the sample ready and copy it to the userspace mapped buffer */ - kbasep_kinstr_prfcnt_client_output_empty_sample(cli, write_idx); - } + /* Make the sample ready and copy it to the userspace mapped buffer */ + kbasep_kinstr_prfcnt_client_output_sample(cli, write_idx, user_data, ts_start_ns, + ts_end_ns); /* Notify client. Make sure all changes to memory are visible. */ wmb(); atomic_inc(&cli->write_idx); - if (async_dump) { - /* Remember the end timestamp of async dump for empty samples */ - if (!empty_sample) - cli->async.ts_end_ns = ts_end_ns; - - atomic_inc(&cli->sync_sample_count); - } wake_up_interruptible(&cli->waitq); /* Reset the flags for the next sample dump */ cli->sample_flags = 0; @@ -676,6 +637,9 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli, { int ret; u64 tm_start, tm_end; + unsigned int write_idx; + unsigned int read_idx; + size_t available_samples_count; WARN_ON(!cli); lockdep_assert_held(&cli->cmd_sync_lock); @@ -684,6 +648,16 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli, if (cli->active) return 0; + write_idx = atomic_read(&cli->write_idx); + read_idx = atomic_read(&cli->read_idx); + + /* Check whether there is space to store atleast an implicit sample + * corresponding to CMD_STOP. + */ + available_samples_count = cli->sample_count - (write_idx - read_idx); + if (!available_samples_count) + return -EBUSY; + kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &cli->config.phys_em); @@ -696,7 +670,6 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli, cli->hvcli, &cli->enable_map, &tm_start, &tm_end, NULL); if (!ret) { - atomic_set(&cli->sync_sample_count, cli->sample_count); cli->active = true; cli->user_data = user_data; cli->sample_flags = 0; @@ -710,16 +683,6 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli, return ret; } -static int kbasep_kinstr_prfcnt_client_wait_async_done( - struct kbase_kinstr_prfcnt_client *cli) -{ - lockdep_assert_held(&cli->cmd_sync_lock); - - return wait_event_interruptible(cli->waitq, - atomic_read(&cli->sync_sample_count) == - cli->sample_count); -} - static int kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, u64 user_data) @@ -728,7 +691,7 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, u64 tm_start = 0; u64 tm_end = 0; struct kbase_hwcnt_physical_enable_map phys_em; - struct kbase_hwcnt_dump_buffer *tmp_buf = NULL; + size_t available_samples_count; unsigned int write_idx; unsigned int read_idx; @@ -739,12 +702,11 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, if (!cli->active) return -EINVAL; - /* Wait until pending async sample operation done */ - ret = kbasep_kinstr_prfcnt_client_wait_async_done(cli); - - if (ret < 0) - return -ERESTARTSYS; + mutex_lock(&cli->kinstr_ctx->lock); + /* Disable counters under the lock, so we do not race with the + * sampling thread. + */ phys_em.fe_bm = 0; phys_em.tiler_bm = 0; phys_em.mmu_l2_bm = 0; @@ -752,15 +714,11 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &phys_em); - mutex_lock(&cli->kinstr_ctx->lock); - /* Check whether one has the buffer to hold the last sample */ write_idx = atomic_read(&cli->write_idx); read_idx = atomic_read(&cli->read_idx); - /* Check if there is a place to save the last stop produced sample */ - if (write_idx - read_idx < cli->sample_arr.sample_count) - tmp_buf = &cli->tmp_buf; + available_samples_count = cli->sample_count - (write_idx - read_idx); ret = kbase_hwcnt_virtualizer_client_set_counters(cli->hvcli, &cli->enable_map, @@ -770,7 +728,8 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, if (ret) cli->sample_flags |= SAMPLE_FLAG_ERROR; - if (tmp_buf) { + /* There must be a place to save the last stop produced sample */ + if (!WARN_ON(!available_samples_count)) { write_idx %= cli->sample_arr.sample_count; /* Handle the last stop sample */ kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, @@ -800,50 +759,6 @@ kbasep_kinstr_prfcnt_client_sync_dump(struct kbase_kinstr_prfcnt_client *cli, u64 user_data) { int ret; - bool empty_sample = false; - - lockdep_assert_held(&cli->cmd_sync_lock); - - /* If the client is not started, or not manual, the command invalid */ - if (!cli->active || cli->dump_interval_ns) - return -EINVAL; - - /* Wait until pending async sample operation done, this is required to - * satisfy the stated sample sequence following their issuing order, - * reflected by the sample start timestamp. - */ - if (atomic_read(&cli->sync_sample_count) != cli->sample_count) { - /* Return empty sample instead of performing real dump. - * As there is an async dump currently in-flight which will - * have the desired information. - */ - empty_sample = true; - ret = kbasep_kinstr_prfcnt_client_wait_async_done(cli); - - if (ret < 0) - return -ERESTARTSYS; - } - - mutex_lock(&cli->kinstr_ctx->lock); - - ret = kbasep_kinstr_prfcnt_client_dump(cli, - BASE_HWCNT_READER_EVENT_MANUAL, - user_data, false, empty_sample); - - mutex_unlock(&cli->kinstr_ctx->lock); - - return ret; -} - -static int -kbasep_kinstr_prfcnt_client_async_dump(struct kbase_kinstr_prfcnt_client *cli, - u64 user_data) -{ - unsigned int write_idx; - unsigned int read_idx; - unsigned int active_async_dumps; - unsigned int new_async_buf_idx; - int ret; lockdep_assert_held(&cli->cmd_sync_lock); @@ -853,45 +768,7 @@ kbasep_kinstr_prfcnt_client_async_dump(struct kbase_kinstr_prfcnt_client *cli, mutex_lock(&cli->kinstr_ctx->lock); - write_idx = atomic_read(&cli->write_idx); - read_idx = atomic_read(&cli->read_idx); - active_async_dumps = - cli->sample_count - atomic_read(&cli->sync_sample_count); - new_async_buf_idx = write_idx + active_async_dumps; - - /* Check if there is a place to copy HWC block into. - * If successful, reserve space in the buffer for the asynchronous - * operation to make sure that it can actually take place. - * Because we reserve space for asynchronous dumps we need to take that - * in consideration here. - */ - ret = (new_async_buf_idx - read_idx == cli->sample_arr.sample_count) ? - -EBUSY : - 0; - - if (ret == -EBUSY) { - mutex_unlock(&cli->kinstr_ctx->lock); - return ret; - } - - if (active_async_dumps > 0) { - struct prfcnt_metadata *ptr_md; - unsigned int buf_idx = - new_async_buf_idx % cli->sample_arr.sample_count; - /* Instead of storing user_data, write it directly to future - * empty sample. - */ - ptr_md = cli->sample_arr.samples[buf_idx].sample_meta; - ptr_md->u.sample_md.user_data = user_data; - - atomic_dec(&cli->sync_sample_count); - } else { - cli->async.user_data = user_data; - atomic_dec(&cli->sync_sample_count); - - kbase_hwcnt_virtualizer_queue_work(cli->kinstr_ctx->hvirt, - &cli->async.dump_work); - } + ret = kbasep_kinstr_prfcnt_client_dump(cli, BASE_HWCNT_READER_EVENT_MANUAL, user_data); mutex_unlock(&cli->kinstr_ctx->lock); @@ -948,10 +825,6 @@ int kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client *cli, ret = kbasep_kinstr_prfcnt_client_sync_dump( cli, control_cmd->user_data); break; - case PRFCNT_CONTROL_CMD_SAMPLE_ASYNC: - ret = kbasep_kinstr_prfcnt_client_async_dump( - cli, control_cmd->user_data); - break; case PRFCNT_CONTROL_CMD_DISCARD: ret = kbasep_kinstr_prfcnt_client_discard(cli); break; @@ -1006,17 +879,6 @@ kbasep_kinstr_prfcnt_get_sample(struct kbase_kinstr_prfcnt_client *cli, sample_meta = cli->sample_arr.samples[read_idx].sample_meta; sample_offset_bytes = (u8 *)sample_meta - cli->sample_arr.user_buf; - /* Verify that a valid sample has been dumped in the read_idx. - * There are situations where this may not be the case, - * for instance if the client is trying to get an asynchronous - * sample which has not been dumped yet. - */ - if (sample_meta->hdr.item_type != PRFCNT_SAMPLE_META_TYPE_SAMPLE || - sample_meta->hdr.item_version != PRFCNT_READER_API_VERSION) { - err = -EINVAL; - goto error_out; - } - sample_access->sequence = sample_meta->u.sample_md.seq; sample_access->sample_offset_bytes = sample_offset_bytes; @@ -1163,19 +1025,46 @@ static void kbasep_kinstr_prfcnt_sample_array_free( memset(sample_arr, 0, sizeof(*sample_arr)); } -void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli) +static void +kbasep_kinstr_prfcnt_client_destroy_partial(struct kbase_kinstr_prfcnt_client *cli, + enum kbase_kinstr_prfcnt_client_init_state init_state) { if (!cli) return; - kbase_hwcnt_virtualizer_client_destroy(cli->hvcli); - kbasep_kinstr_prfcnt_sample_array_free(&cli->sample_arr); - kbase_hwcnt_dump_buffer_free(&cli->tmp_buf); - kbase_hwcnt_enable_map_free(&cli->enable_map); - mutex_destroy(&cli->cmd_sync_lock); + while (init_state-- > KINSTR_PRFCNT_UNINITIALISED) { + switch (init_state) { + case KINSTR_PRFCNT_INITIALISED: + /* This shouldn't be reached */ + break; + case KINSTR_PRFCNT_WAITQ_MUTEX: + mutex_destroy(&cli->cmd_sync_lock); + break; + case KINSTR_PRFCNT_VIRTUALIZER_CLIENT: + kbase_hwcnt_virtualizer_client_destroy(cli->hvcli); + break; + case KINSTR_PRFCNT_SAMPLE_ARRAY: + kbasep_kinstr_prfcnt_sample_array_free(&cli->sample_arr); + break; + case KINSTR_PRFCNT_DUMP_BUFFER: + kbase_hwcnt_dump_buffer_free(&cli->tmp_buf); + break; + case KINSTR_PRFCNT_ENABLE_MAP: + kbase_hwcnt_enable_map_free(&cli->enable_map); + break; + case KINSTR_PRFCNT_PARSE_SETUP: + /* Nothing to do here */ + break; + } + } kfree(cli); } +void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli) +{ + kbasep_kinstr_prfcnt_client_destroy_partial(cli, KINSTR_PRFCNT_INITIALISED); +} + /** * kbasep_kinstr_prfcnt_hwcnt_reader_release() - hwcnt reader's release. * @inode: Non-NULL pointer to inode structure. @@ -1279,9 +1168,8 @@ static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work) list_for_each_entry(pos, &kinstr_ctx->clients, node) { if (pos->active && (pos->next_dump_time_ns != 0) && (pos->next_dump_time_ns < cur_time_ns)) - kbasep_kinstr_prfcnt_client_dump( - pos, BASE_HWCNT_READER_EVENT_PERIODIC, - pos->user_data, false, false); + kbasep_kinstr_prfcnt_client_dump(pos, BASE_HWCNT_READER_EVENT_PERIODIC, + pos->user_data); } kbasep_kinstr_prfcnt_reschedule_worker(kinstr_ctx); @@ -1289,48 +1177,6 @@ static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work) mutex_unlock(&kinstr_ctx->lock); } -/** - * kbasep_kinstr_prfcnt_async_dump_worker()- Dump worker for a manual client - * to take a single asynchronous - * sample. - * @work: Work structure. - */ -static void kbasep_kinstr_prfcnt_async_dump_worker(struct work_struct *work) -{ - struct kbase_kinstr_prfcnt_async *cli_async = - container_of(work, struct kbase_kinstr_prfcnt_async, dump_work); - struct kbase_kinstr_prfcnt_client *cli = container_of( - cli_async, struct kbase_kinstr_prfcnt_client, async); - - mutex_lock(&cli->kinstr_ctx->lock); - /* While the async operation is in flight, a sync stop might have been - * executed, for which the dump should be skipped. Further as we are - * doing an async dump, we expect that there is reserved buffer for - * this to happen. This is to avoid the rare corner case where the - * user side has issued a stop/start pair before the async work item - * get the chance to execute. - */ - if (cli->active && - (atomic_read(&cli->sync_sample_count) < cli->sample_count)) - kbasep_kinstr_prfcnt_client_dump(cli, - BASE_HWCNT_READER_EVENT_MANUAL, - cli->async.user_data, true, - false); - - /* While the async operation is in flight, more async dump requests - * may have been submitted. In this case, no more async dumps work - * will be queued. Instead space will be reserved for that dump and - * an empty sample will be return after handling the current async - * dump. - */ - while (cli->active && - (atomic_read(&cli->sync_sample_count) < cli->sample_count)) { - kbasep_kinstr_prfcnt_client_dump( - cli, BASE_HWCNT_READER_EVENT_MANUAL, 0, true, true); - } - mutex_unlock(&cli->kinstr_ctx->lock); -} - /** * kbasep_kinstr_prfcnt_dump_timer() - Dump timer that schedules the dump worker for * execution as soon as possible. @@ -1790,83 +1636,100 @@ int kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context *kinst { int err; struct kbase_kinstr_prfcnt_client *cli; + enum kbase_kinstr_prfcnt_client_init_state init_state; - WARN_ON(!kinstr_ctx); - WARN_ON(!setup); - WARN_ON(!req_arr); + if (WARN_ON(!kinstr_ctx)) + return -EINVAL; + + if (WARN_ON(!setup)) + return -EINVAL; + + if (WARN_ON(!req_arr)) + return -EINVAL; cli = kzalloc(sizeof(*cli), GFP_KERNEL); if (!cli) return -ENOMEM; - cli->kinstr_ctx = kinstr_ctx; - err = kbasep_kinstr_prfcnt_parse_setup(kinstr_ctx, setup, &cli->config, req_arr); + for (init_state = KINSTR_PRFCNT_UNINITIALISED; init_state < KINSTR_PRFCNT_INITIALISED; + init_state++) { + err = 0; + switch (init_state) { + case KINSTR_PRFCNT_PARSE_SETUP: + cli->kinstr_ctx = kinstr_ctx; + err = kbasep_kinstr_prfcnt_parse_setup(kinstr_ctx, setup, &cli->config, + req_arr); - if (err < 0) - goto error; + break; - cli->config.buffer_count = MAX_BUFFER_COUNT; - cli->dump_interval_ns = cli->config.period_ns; - cli->next_dump_time_ns = 0; - cli->active = false; - atomic_set(&cli->write_idx, 0); - atomic_set(&cli->read_idx, 0); - atomic_set(&cli->fetch_idx, 0); + case KINSTR_PRFCNT_ENABLE_MAP: + cli->config.buffer_count = MAX_BUFFER_COUNT; + cli->dump_interval_ns = cli->config.period_ns; + cli->next_dump_time_ns = 0; + cli->active = false; + atomic_set(&cli->write_idx, 0); + atomic_set(&cli->read_idx, 0); + atomic_set(&cli->fetch_idx, 0); - err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata, - &cli->enable_map); + err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata, &cli->enable_map); + break; - if (err < 0) - goto error; + case KINSTR_PRFCNT_DUMP_BUFFER: + kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, + &cli->config.phys_em); - kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &cli->config.phys_em); + cli->sample_count = cli->config.buffer_count; + cli->sample_size = + kbasep_kinstr_prfcnt_get_sample_size(cli, kinstr_ctx->metadata); - cli->sample_count = cli->config.buffer_count; - atomic_set(&cli->sync_sample_count, cli->sample_count); - cli->sample_size = kbasep_kinstr_prfcnt_get_sample_size(cli, kinstr_ctx->metadata); + /* Use virtualizer's metadata to alloc tmp buffer which interacts with + * the HWC virtualizer. + */ + err = kbase_hwcnt_dump_buffer_alloc(kinstr_ctx->metadata, &cli->tmp_buf); + break; - /* Use virtualizer's metadata to alloc tmp buffer which interacts with - * the HWC virtualizer. - */ - err = kbase_hwcnt_dump_buffer_alloc(kinstr_ctx->metadata, - &cli->tmp_buf); + case KINSTR_PRFCNT_SAMPLE_ARRAY: + /* Disable clock map in setup, and enable clock map when start */ + cli->enable_map.clk_enable_map = 0; - if (err < 0) - goto error; + /* Use metadata from virtualizer to allocate dump buffers if + * kinstr_prfcnt doesn't have the truncated metadata. + */ + err = kbasep_kinstr_prfcnt_sample_array_alloc(cli, kinstr_ctx->metadata); - /* Disable clock map in setup, and enable clock map when start */ - cli->enable_map.clk_enable_map = 0; + break; - /* Use metadata from virtualizer to allocate dump buffers if - * kinstr_prfcnt doesn't have the truncated metadata. - */ - err = kbasep_kinstr_prfcnt_sample_array_alloc(cli, kinstr_ctx->metadata); + case KINSTR_PRFCNT_VIRTUALIZER_CLIENT: + /* Set enable map to be 0 to prevent virtualizer to init and kick the + * backend to count. + */ + kbase_hwcnt_gpu_enable_map_from_physical( + &cli->enable_map, &(struct kbase_hwcnt_physical_enable_map){ 0 }); - if (err < 0) - goto error; + err = kbase_hwcnt_virtualizer_client_create(kinstr_ctx->hvirt, + &cli->enable_map, &cli->hvcli); + break; - /* Set enable map to be 0 to prevent virtualizer to init and kick the backend to count */ - kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, - &(struct kbase_hwcnt_physical_enable_map){ 0 }); + case KINSTR_PRFCNT_WAITQ_MUTEX: + init_waitqueue_head(&cli->waitq); + mutex_init(&cli->cmd_sync_lock); + break; - err = kbase_hwcnt_virtualizer_client_create( - kinstr_ctx->hvirt, &cli->enable_map, &cli->hvcli); + case KINSTR_PRFCNT_INITIALISED: + /* This shouldn't be reached */ + break; + } - if (err < 0) - goto error; - - init_waitqueue_head(&cli->waitq); - INIT_WORK(&cli->async.dump_work, - kbasep_kinstr_prfcnt_async_dump_worker); - mutex_init(&cli->cmd_sync_lock); + if (err < 0) { + kbasep_kinstr_prfcnt_client_destroy_partial(cli, init_state); + return err; + } + } *out_vcli = cli; return 0; -error: - kbasep_kinstr_prfcnt_client_destroy(cli); - return err; } static size_t kbasep_kinstr_prfcnt_get_block_info_count( @@ -2085,17 +1948,18 @@ int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx, union kbase_ioctl_kinstr_prfcnt_setup *setup) { int err; - unsigned int item_count; - unsigned long bytes; - struct prfcnt_request_item *req_arr; + size_t item_count; + size_t bytes; + struct prfcnt_request_item *req_arr = NULL; struct kbase_kinstr_prfcnt_client *cli = NULL; + const size_t max_bytes = 32 * sizeof(*req_arr); if (!kinstr_ctx || !setup) return -EINVAL; item_count = setup->in.request_item_count; - /* Limiting the request items to 2x of the expected: acommodating + /* Limiting the request items to 2x of the expected: accommodating * moderate duplications but rejecting excessive abuses. */ if (!setup->in.requests_ptr || (item_count < 2) || (setup->in.request_item_size == 0) || @@ -2103,7 +1967,18 @@ int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx, return -EINVAL; } - bytes = item_count * sizeof(*req_arr); + if (check_mul_overflow(item_count, sizeof(*req_arr), &bytes)) + return -EINVAL; + + /* Further limiting the max bytes to copy from userspace by setting it in the following + * fashion: a maximum of 1 mode item, 4 types of 3 sets for a total of 12 enable items, + * each currently at the size of prfcnt_request_item. + * + * Note: if more request types get added, this max limit needs to be updated. + */ + if (bytes > max_bytes) + return -EINVAL; + req_arr = memdup_user(u64_to_user_ptr(setup->in.requests_ptr), bytes); if (IS_ERR(req_arr)) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_linux.h index 1d8d1967eee9..e5c6f7a0b217 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_linux.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_linux.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2014, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2014, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,7 +33,7 @@ #include #include -#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API)) +#if IS_ENABLED(MALI_KERNEL_TEST_API) #define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func) #else #define KBASE_EXPORT_TEST_API(func) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.c b/drivers/gpu/arm/bifrost/mali_kbase_mem.c index 3743b4df999f..b18b1e25267e 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -803,6 +803,40 @@ static void kbase_region_tracker_ds_init(struct kbase_context *kctx, } #endif /* MALI_USE_CSF */ +static struct kbase_context *kbase_reg_flags_to_kctx(struct kbase_va_region *reg) +{ + struct kbase_context *kctx = NULL; + struct rb_root *rbtree = reg->rbtree; + + switch (reg->flags & KBASE_REG_ZONE_MASK) { + case KBASE_REG_ZONE_CUSTOM_VA: + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_custom); + break; + case KBASE_REG_ZONE_SAME_VA: + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_same); + break; + case KBASE_REG_ZONE_EXEC_VA: + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec); + break; +#if MALI_USE_CSF + case KBASE_REG_ZONE_EXEC_FIXED_VA: + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed); + break; + case KBASE_REG_ZONE_FIXED_VA: + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed); + break; + case KBASE_REG_ZONE_MCU_SHARED: + /* This is only expected to be called on driver unload. */ + break; +#endif + default: + WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); + break; + } + + return kctx; +} + static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) { struct rb_node *rbnode; @@ -814,6 +848,8 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) rb_erase(rbnode, rbtree); reg = rb_entry(rbnode, struct kbase_va_region, rblink); WARN_ON(reg->va_refcnt != 1); + if (kbase_page_migration_enabled) + kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg); /* Reset the start_pfn - as the rbtree is being * destroyed and we've already erased this region, there * is no further need to attempt to remove it. @@ -830,6 +866,10 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) void kbase_region_tracker_term(struct kbase_context *kctx) { + WARN(kctx->as_nr != KBASEP_AS_NR_INVALID, + "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions", + kctx->tgid, kctx->id); + kbase_gpu_vm_lock(kctx); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); @@ -1554,6 +1594,7 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, return NULL; new_reg->va_refcnt = 1; + new_reg->no_user_free_refcnt = 0; new_reg->cpu_alloc = NULL; /* no alloc bound yet */ new_reg->gpu_alloc = NULL; /* no alloc bound yet */ new_reg->rbtree = rbtree; @@ -1572,41 +1613,6 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, KBASE_EXPORT_TEST_API(kbase_alloc_free_region); -static struct kbase_context *kbase_reg_flags_to_kctx( - struct kbase_va_region *reg) -{ - struct kbase_context *kctx = NULL; - struct rb_root *rbtree = reg->rbtree; - - switch (reg->flags & KBASE_REG_ZONE_MASK) { - case KBASE_REG_ZONE_CUSTOM_VA: - kctx = container_of(rbtree, struct kbase_context, - reg_rbtree_custom); - break; - case KBASE_REG_ZONE_SAME_VA: - kctx = container_of(rbtree, struct kbase_context, - reg_rbtree_same); - break; - case KBASE_REG_ZONE_EXEC_VA: - kctx = container_of(rbtree, struct kbase_context, - reg_rbtree_exec); - break; -#if MALI_USE_CSF - case KBASE_REG_ZONE_EXEC_FIXED_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed); - break; - case KBASE_REG_ZONE_FIXED_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed); - break; -#endif - default: - WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); - break; - } - - return kctx; -} - /** * kbase_free_alloced_region - Free a region object. * @@ -1720,6 +1726,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, unsigned long gwt_mask = ~0; int group_id; struct kbase_mem_phy_alloc *alloc; + bool ignore_page_migration = false; #ifdef CONFIG_MALI_CINSTR_GWT if (kctx->gwt_enabled) @@ -1749,15 +1756,12 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, for (i = 0; i < alloc->imported.alias.nents; i++) { if (alloc->imported.alias.aliased[i].alloc) { err = kbase_mmu_insert_pages( - kctx->kbdev, &kctx->mmu, - reg->start_pfn + (i * stride), - alloc->imported.alias.aliased[i] - .alloc->pages + - alloc->imported.alias.aliased[i] - .offset, + kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride), + alloc->imported.alias.aliased[i].alloc->pages + + alloc->imported.alias.aliased[i].offset, alloc->imported.alias.aliased[i].length, - reg->flags & gwt_mask, kctx->as_nr, - group_id, mmu_sync_info); + reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, + NULL, ignore_page_migration); if (err) goto bad_insert; @@ -1777,12 +1781,15 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, } } } else { - err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn, + if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM || + reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) + ignore_page_migration = true; + + err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, kctx->as_nr, - group_id, mmu_sync_info); + reg->flags & gwt_mask, kctx->as_nr, group_id, + mmu_sync_info, reg, ignore_page_migration); if (err) goto bad_insert; kbase_mem_phy_alloc_gpu_mapped(alloc); @@ -1816,7 +1823,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, bad_insert: kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, - reg->nr_pages, kctx->as_nr); + reg->nr_pages, kctx->as_nr, ignore_page_migration); kbase_remove_va_region(kctx->kbdev, reg); @@ -1845,7 +1852,6 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) switch (alloc->type) { case KBASE_MEM_TYPE_ALIAS: { size_t i = 0; - /* Due to the way the number of valid PTEs and ATEs are tracked * currently, only the GPU virtual range that is backed & mapped * should be passed to the kbase_mmu_teardown_pages() function, @@ -1853,27 +1859,37 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) * separately. */ for (i = 0; i < alloc->imported.alias.nents; i++) { - if (alloc->imported.alias.aliased[i].alloc) { - int err_loop = kbase_mmu_teardown_pages( - kctx->kbdev, &kctx->mmu, - reg->start_pfn + (i * alloc->imported.alias.stride), - alloc->pages + (i * alloc->imported.alias.stride), - alloc->imported.alias.aliased[i].length, - kctx->as_nr); - if (WARN_ON_ONCE(err_loop)) - err = err_loop; - } + struct tagged_addr *phys_alloc = NULL; + int err_loop; + + if (alloc->imported.alias.aliased[i].alloc != NULL) + phys_alloc = alloc->imported.alias.aliased[i].alloc->pages + + alloc->imported.alias.aliased[i].offset; + + err_loop = kbase_mmu_teardown_pages( + kctx->kbdev, &kctx->mmu, + reg->start_pfn + (i * alloc->imported.alias.stride), + phys_alloc, alloc->imported.alias.aliased[i].length, + kctx->as_nr, false); + + if (WARN_ON_ONCE(err_loop)) + err = err_loop; } } break; case KBASE_MEM_TYPE_IMPORTED_UMM: err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, reg->nr_pages, kctx->as_nr); + alloc->pages, reg->nr_pages, kctx->as_nr, true); + break; + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, kbase_reg_current_backed_size(reg), + kctx->as_nr, true); break; default: err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, kbase_reg_current_backed_size(reg), - kctx->as_nr); + kctx->as_nr, false); break; } @@ -2046,6 +2062,7 @@ void kbase_sync_single(struct kbase_context *kctx, src = ((unsigned char *)kmap(gpu_page)) + offset; dst = ((unsigned char *)kmap(cpu_page)) + offset; } + memcpy(dst, src, size); kunmap(gpu_page); kunmap(cpu_page); @@ -2197,7 +2214,7 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re __func__, (void *)reg, (void *)kctx); lockdep_assert_held(&kctx->reg_lock); - if (reg->flags & KBASE_REG_NO_USER_FREE) { + if (kbase_va_region_is_no_user_free(kctx, reg)) { dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n"); return -EINVAL; } @@ -2416,8 +2433,11 @@ int kbase_update_region_flags(struct kbase_context *kctx, if (flags & BASEP_MEM_PERMANENT_KERNEL_MAPPING) reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING; - if (flags & BASEP_MEM_NO_USER_FREE) - reg->flags |= KBASE_REG_NO_USER_FREE; + if (flags & BASEP_MEM_NO_USER_FREE) { + kbase_gpu_vm_lock(kctx); + kbase_va_region_no_user_free_get(kctx, reg); + kbase_gpu_vm_unlock(kctx); + } if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE; @@ -3206,9 +3226,32 @@ out_rollback: out_term: return -1; } - KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages); +void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc, + enum kbase_page_status status) +{ + u32 i = 0; + + for (; i < alloc->nents; i++) { + struct tagged_addr phys = alloc->pages[i]; + struct kbase_page_metadata *page_md = kbase_page_private(as_page(phys)); + + /* Skip the 4KB page that is part of a large page, as the large page is + * excluded from the migration process. + */ + if (is_huge(phys) || is_partial(phys)) + continue; + + if (!page_md) + continue; + + spin_lock(&page_md->migrate_lock); + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)status); + spin_unlock(&page_md->migrate_lock); + } +} + bool kbase_check_alloc_flags(unsigned long flags) { /* Only known input flags should be set. */ @@ -3766,7 +3809,15 @@ static void kbase_jit_destroy_worker(struct work_struct *work) mutex_unlock(&kctx->jit_evict_lock); kbase_gpu_vm_lock(kctx); - reg->flags &= ~KBASE_REG_NO_USER_FREE; + + /* + * Incrementing the refcount is prevented on JIT regions. + * If/when this ever changes we would need to compensate + * by implementing "free on putting the last reference", + * but only for JIT regions. + */ + WARN_ON(reg->no_user_free_refcnt > 1); + kbase_va_region_no_user_free_put(kctx, reg); kbase_mem_free_region(kctx, reg); kbase_gpu_vm_unlock(kctx); } while (1); @@ -4419,7 +4470,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, if (ret < 0) { /* * An update to an allocation from the pool failed, - * chances are slim a new allocation would fair any + * chances are slim a new allocation would fare any * better so return the allocation to the pool and * return the function with failure. */ @@ -4441,6 +4492,17 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, mutex_unlock(&kctx->jit_evict_lock); reg = NULL; goto end; + } else { + /* A suitable JIT allocation existed on the evict list, so we need + * to make sure that the NOT_MOVABLE property is cleared. + */ + if (kbase_page_migration_enabled) { + kbase_gpu_vm_lock(kctx); + mutex_lock(&kctx->jit_evict_lock); + kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED); + mutex_unlock(&kctx->jit_evict_lock); + kbase_gpu_vm_unlock(kctx); + } } } else { /* No suitable JIT allocation was found so create a new one */ @@ -4497,6 +4559,29 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, } } + /* Similarly to tiler heap init, there is a short window of time + * where the (either recycled or newly allocated, in our case) region has + * "no user free" refcount incremented but is still missing the DONT_NEED flag, and + * doesn't yet have the ACTIVE_JIT_ALLOC flag either. Temporarily leaking the + * allocation is the least bad option that doesn't lead to a security issue down the + * line (it will eventually be cleaned up during context termination). + * + * We also need to call kbase_gpu_vm_lock regardless, as we're updating the region + * flags. + */ + kbase_gpu_vm_lock(kctx); + if (unlikely(reg->no_user_free_refcnt > 1)) { + kbase_gpu_vm_unlock(kctx); + dev_err(kctx->kbdev->dev, "JIT region has no_user_free_refcnt > 1!\n"); + + mutex_lock(&kctx->jit_evict_lock); + list_move(®->jit_node, &kctx->jit_pool_head); + mutex_unlock(&kctx->jit_evict_lock); + + reg = NULL; + goto end; + } + trace_mali_jit_alloc(reg, info->id); kctx->jit_current_allocations++; @@ -4514,6 +4599,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, kbase_jit_report_update_pressure(kctx, reg, info->va_pages, KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + kbase_gpu_vm_unlock(kctx); end: for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) @@ -4584,6 +4670,12 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) list_move(®->jit_node, &kctx->jit_pool_head); + /* Inactive JIT regions should be freed by the shrinker and not impacted + * by page migration. Once freed, they will enter into the page migration + * state machine via the mempools. + */ + if (kbase_page_migration_enabled) + kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE); mutex_unlock(&kctx->jit_evict_lock); } @@ -4630,7 +4722,14 @@ bool kbase_jit_evict(struct kbase_context *kctx) mutex_unlock(&kctx->jit_evict_lock); if (reg) { - reg->flags &= ~KBASE_REG_NO_USER_FREE; + /* + * Incrementing the refcount is prevented on JIT regions. + * If/when this ever changes we would need to compensate + * by implementing "free on putting the last reference", + * but only for JIT regions. + */ + WARN_ON(reg->no_user_free_refcnt > 1); + kbase_va_region_no_user_free_put(kctx, reg); kbase_mem_free_region(kctx, reg); } @@ -4652,7 +4751,14 @@ void kbase_jit_term(struct kbase_context *kctx) list_del(&walker->jit_node); list_del_init(&walker->gpu_alloc->evict_node); mutex_unlock(&kctx->jit_evict_lock); - walker->flags &= ~KBASE_REG_NO_USER_FREE; + /* + * Incrementing the refcount is prevented on JIT regions. + * If/when this ever changes we would need to compensate + * by implementing "free on putting the last reference", + * but only for JIT regions. + */ + WARN_ON(walker->no_user_free_refcnt > 1); + kbase_va_region_no_user_free_put(kctx, walker); kbase_mem_free_region(kctx, walker); mutex_lock(&kctx->jit_evict_lock); } @@ -4664,7 +4770,14 @@ void kbase_jit_term(struct kbase_context *kctx) list_del(&walker->jit_node); list_del_init(&walker->gpu_alloc->evict_node); mutex_unlock(&kctx->jit_evict_lock); - walker->flags &= ~KBASE_REG_NO_USER_FREE; + /* + * Incrementing the refcount is prevented on JIT regions. + * If/when this ever changes we would need to compensate + * by implementing "free on putting the last reference", + * but only for JIT regions. + */ + WARN_ON(walker->no_user_free_refcnt > 1); + kbase_va_region_no_user_free_put(kctx, walker); kbase_mem_free_region(kctx, walker); mutex_lock(&kctx->jit_evict_lock); } @@ -4873,10 +4986,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, struct page **pages; struct tagged_addr *pa; long i, dma_mapped_pages; - unsigned long address; struct device *dev; - unsigned long offset_within_page; - unsigned long remaining_size; unsigned long gwt_mask = ~0; /* Calls to this function are inherently asynchronous, with respect to * MMU operations. @@ -4892,19 +5002,29 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, alloc = reg->gpu_alloc; pa = kbase_get_gpu_phy_pages(reg); - address = alloc->imported.user_buf.address; pinned_pages = alloc->nents; pages = alloc->imported.user_buf.pages; dev = kctx->kbdev->dev; - offset_within_page = address & ~PAGE_MASK; - remaining_size = alloc->imported.user_buf.size; + /* Manual CPU cache synchronization. + * + * The driver disables automatic CPU cache synchronization because the + * memory pages that enclose the imported region may also contain + * sub-regions which are not imported and that are allocated and used + * by the user process. This may be the case of memory at the beginning + * of the first page and at the end of the last page. Automatic CPU cache + * synchronization would force some operations on those memory allocations, + * unbeknown to the user process: in particular, a CPU cache invalidate + * upon unmapping would destroy the content of dirty CPU caches and cause + * the user process to lose CPU writes to the non-imported sub-regions. + * + * When the GPU claims ownership of the imported memory buffer, it shall + * commit CPU writes for the whole of all pages that enclose the imported + * region, otherwise the initial content of memory would be wrong. + */ for (i = 0; i < pinned_pages; i++) { - unsigned long map_size = - MIN(PAGE_SIZE - offset_within_page, remaining_size); - dma_addr_t dma_addr = dma_map_page(dev, pages[i], - offset_within_page, map_size, - DMA_BIDIRECTIONAL); + dma_addr_t dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, + DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); err = dma_mapping_error(dev, dma_addr); if (err) @@ -4913,8 +5033,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, alloc->imported.user_buf.dma_addrs[i] = dma_addr; pa[i] = as_tagged(page_to_phys(pages[i])); - remaining_size -= map_size; - offset_within_page = 0; + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); } #ifdef CONFIG_MALI_CINSTR_GWT @@ -4922,29 +5041,31 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, gwt_mask = ~KBASE_REG_GPU_WR; #endif - err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - pa, kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, kctx->as_nr, - alloc->group_id, mmu_sync_info); + err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa, + kbase_reg_current_backed_size(reg), reg->flags & gwt_mask, + kctx->as_nr, alloc->group_id, mmu_sync_info, NULL, true); if (err == 0) return 0; /* fall down */ unwind: alloc->nents = 0; - offset_within_page = address & ~PAGE_MASK; - remaining_size = alloc->imported.user_buf.size; dma_mapped_pages = i; - /* Run the unmap loop in the same order as map loop */ + /* Run the unmap loop in the same order as map loop, and perform again + * CPU cache synchronization to re-write the content of dirty CPU caches + * to memory. This is precautionary measure in case a GPU job has taken + * advantage of a partially GPU-mapped range to write and corrupt the + * content of memory, either inside or outside the imported region. + * + * Notice that this error recovery path doesn't try to be optimal and just + * flushes the entire page range. + */ for (i = 0; i < dma_mapped_pages; i++) { - unsigned long unmap_size = - MIN(PAGE_SIZE - offset_within_page, remaining_size); + dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; - dma_unmap_page(kctx->kbdev->dev, - alloc->imported.user_buf.dma_addrs[i], - unmap_size, DMA_BIDIRECTIONAL); - remaining_size -= unmap_size; - offset_within_page = 0; + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); } /* The user buffer could already have been previously pinned before @@ -4985,12 +5106,85 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem #endif for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { - unsigned long unmap_size = - MIN(remaining_size, PAGE_SIZE - offset_within_page); + unsigned long imported_size = MIN(remaining_size, PAGE_SIZE - offset_within_page); + /* Notice: this is a temporary variable that is used for DMA sync + * operations, and that could be incremented by an offset if the + * current page contains both imported and non-imported memory + * sub-regions. + * + * It is valid to add an offset to this value, because the offset + * is always kept within the physically contiguous dma-mapped range + * and there's no need to translate to physical address to offset it. + * + * This variable is not going to be used for the actual DMA unmap + * operation, that shall always use the original DMA address of the + * whole memory page. + */ dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; - dma_unmap_page(kctx->kbdev->dev, dma_addr, unmap_size, - DMA_BIDIRECTIONAL); + /* Manual CPU cache synchronization. + * + * When the GPU returns ownership of the buffer to the CPU, the driver + * needs to treat imported and non-imported memory differently. + * + * The first case to consider is non-imported sub-regions at the + * beginning of the first page and at the end of last page. For these + * sub-regions: CPU cache shall be committed with a clean+invalidate, + * in order to keep the last CPU write. + * + * Imported region prefers the opposite treatment: this memory has been + * legitimately mapped and used by the GPU, hence GPU writes shall be + * committed to memory, while CPU cache shall be invalidated to make + * sure that CPU reads the correct memory content. + * + * The following diagram shows the expect value of the variables + * used in this loop in the corner case of an imported region encloed + * by a single memory page: + * + * page boundary ->|---------- | <- dma_addr (initial value) + * | | + * | - - - - - | <- offset_within_page + * |XXXXXXXXXXX|\ + * |XXXXXXXXXXX| \ + * |XXXXXXXXXXX| }- imported_size + * |XXXXXXXXXXX| / + * |XXXXXXXXXXX|/ + * | - - - - - | <- offset_within_page + imported_size + * | |\ + * | | }- PAGE_SIZE - imported_size - offset_within_page + * | |/ + * page boundary ->|-----------| + * + * If the imported region is enclosed by more than one page, then + * offset_within_page = 0 for any page after the first. + */ + + /* Only for first page: handle non-imported range at the beginning. */ + if (offset_within_page > 0) { + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page, + DMA_BIDIRECTIONAL); + dma_addr += offset_within_page; + } + + /* For every page: handle imported range. */ + if (imported_size > 0) + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size, + DMA_BIDIRECTIONAL); + + /* Only for last page (that may coincide with first page): + * handle non-imported range at the end. + */ + if ((imported_size + offset_within_page) < PAGE_SIZE) { + dma_addr += imported_size; + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, + PAGE_SIZE - imported_size - offset_within_page, + DMA_BIDIRECTIONAL); + } + + /* Notice: use the original DMA address to unmap the whole memory page. */ + dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], + PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); + if (writeable) set_page_dirty_lock(pages[i]); #if !MALI_USE_CSF @@ -4998,7 +5192,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem pages[i] = NULL; #endif - remaining_size -= unmap_size; + remaining_size -= imported_size; offset_within_page = 0; } #if !MALI_USE_CSF @@ -5079,8 +5273,9 @@ int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_regi break; } default: - WARN(1, "Invalid external resource GPU allocation type (%x) on mapping", - alloc->type); + dev_dbg(kctx->kbdev->dev, + "Invalid external resource GPU allocation type (%x) on mapping", + alloc->type); return -EINVAL; } @@ -5113,7 +5308,7 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, kbase_reg_current_backed_size(reg), - kctx->as_nr); + kctx->as_nr, true); } if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.h b/drivers/gpu/arm/bifrost/mali_kbase_mem.h index 5820f6d8a556..f0f5f92c793c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -193,10 +193,11 @@ struct kbase_mem_phy_alloc { * @SPILL_IN_PROGRESS: Transitory state. Corner case where pages in a memory * pool of a dying context are being moved to the device * memory pool. + * @NOT_MOVABLE: Stable state. Page has been allocated for an object that is + * not movable, but may return to be movable when the object + * is freed. * @ALLOCATED_MAPPED: Stable state. Page has been allocated, mapped to GPU * and has reference to kbase_mem_phy_alloc object. - * @MULTI_MAPPED: Stable state. This state is used to manage all use cases - * where a page may have "unusual" mappings. * @PT_MAPPED: Stable state. Similar to ALLOCATED_MAPPED, but page doesn't * reference kbase_mem_phy_alloc object. Used as a page in MMU * page table. @@ -205,9 +206,11 @@ struct kbase_mem_phy_alloc { * unmapping it. This status means that a memory release is * happening and it's still not complete. * @FREE_ISOLATED_IN_PROGRESS: Transitory state. This is a very particular corner case. - * A page is isolated while it is in ALLOCATED_MAPPED or - * PT_MAPPED state, but then the driver tries to destroy the - * allocation. + * A page is isolated while it is in ALLOCATED_MAPPED state, + * but then the driver tries to destroy the allocation. + * @FREE_PT_ISOLATED_IN_PROGRESS: Transitory state. This is a very particular corner case. + * A page is isolated while it is in PT_MAPPED state, but + * then the driver tries to destroy the allocation. * * Pages can only be migrated in stable states. */ @@ -215,23 +218,32 @@ enum kbase_page_status { MEM_POOL = 0, ALLOCATE_IN_PROGRESS, SPILL_IN_PROGRESS, + NOT_MOVABLE, ALLOCATED_MAPPED, - MULTI_MAPPED, PT_MAPPED, FREE_IN_PROGRESS, FREE_ISOLATED_IN_PROGRESS, + FREE_PT_ISOLATED_IN_PROGRESS, }; +#define PGD_VPFN_LEVEL_MASK ((u64)0x3) +#define PGD_VPFN_LEVEL_GET_LEVEL(pgd_vpfn_level) (pgd_vpfn_level & PGD_VPFN_LEVEL_MASK) +#define PGD_VPFN_LEVEL_GET_VPFN(pgd_vpfn_level) (pgd_vpfn_level & ~PGD_VPFN_LEVEL_MASK) +#define PGD_VPFN_LEVEL_SET(pgd_vpfn, level) \ + ((pgd_vpfn & ~PGD_VPFN_LEVEL_MASK) | (level & PGD_VPFN_LEVEL_MASK)) + /** * struct kbase_page_metadata - Metadata for each page in kbase * * @kbdev: Pointer to kbase device. * @dma_addr: DMA address mapped to page. * @migrate_lock: A spinlock to protect the private metadata. + * @data: Member in union valid based on @status. * @status: Status to keep track if page can be migrated at any * given moment. MSB will indicate if page is isolated. * Protected by @migrate_lock. - * @data: Member in union valid based on @status. + * @vmap_count: Counter of kernel mappings. + * @group_id: Memory group ID obtained at the time of page allocation. * * Each 4KB page will have a reference to this struct in the private field. * This will be used to keep track of information required for Linux page @@ -240,7 +252,6 @@ enum kbase_page_status { struct kbase_page_metadata { dma_addr_t dma_addr; spinlock_t migrate_lock; - u8 status; union { struct { @@ -251,19 +262,25 @@ struct kbase_page_metadata { struct kbase_device *kbdev; } mem_pool; struct { - struct kbase_mem_phy_alloc *phy_alloc; struct kbase_va_region *reg; struct kbase_mmu_table *mmut; - struct page *pgd; u64 vpfn; - size_t page_array_index; } mapped; struct { struct kbase_mmu_table *mmut; - struct page *pgd; - u16 entry_info; + u64 pgd_vpfn_level; } pt_mapped; + struct { + struct kbase_device *kbdev; + } free_isolated; + struct { + struct kbase_device *kbdev; + } free_pt_isolated; } data; + + u8 status; + u8 vmap_count; + u8 group_id; }; /* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is @@ -288,6 +305,20 @@ enum kbase_jit_report_flags { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE = (1u << 0) }; +/** + * kbase_set_phy_alloc_page_status - Set the page migration status of the underlying + * physical allocation. + * @alloc: the physical allocation containing the pages whose metadata is going + * to be modified + * @status: the status the pages should end up in + * + * Note that this function does not go through all of the checking to ensure that + * proper states are set. Instead, it is only used when we change the allocation + * to NOT_MOVABLE or from NOT_MOVABLE to ALLOCATED_MAPPED + */ +void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc, + enum kbase_page_status status); + static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc) { KBASE_DEBUG_ASSERT(alloc); @@ -388,6 +419,8 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m * @jit_usage_id: The last just-in-time memory usage ID for this region. * @jit_bin_id: The just-in-time memory bin this region came from. * @va_refcnt: Number of users of this region. Protected by reg_lock. + * @no_user_free_refcnt: Number of users that want to prevent the region from + * being freed by userspace. * @heap_info_gpu_addr: Pointer to an object in GPU memory defining an end of * an allocated region * The object can be one of: @@ -508,10 +541,7 @@ struct kbase_va_region { #define KBASE_REG_RESERVED_BIT_23 (1ul << 23) #endif /* !MALI_USE_CSF */ -/* Whilst this flag is set the GPU allocation is not supposed to be freed by - * user space. The flag will remain set for the lifetime of JIT allocations. - */ -#define KBASE_REG_NO_USER_FREE (1ul << 24) +/* Bit 24 is currently unused and is available for use for a new flag */ /* Memory has permanent kernel side mapping */ #define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25) @@ -652,6 +682,7 @@ struct kbase_va_region { #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ int va_refcnt; + int no_user_free_refcnt; }; /** @@ -694,6 +725,23 @@ static inline bool kbase_is_region_invalid_or_free(struct kbase_va_region *reg) return (kbase_is_region_invalid(reg) || kbase_is_region_free(reg)); } +/** + * kbase_is_region_shrinkable - Check if a region is "shrinkable". + * A shrinkable regions is a region for which its backing pages (reg->gpu_alloc->pages) + * can be freed at any point, even though the kbase_va_region structure itself + * may have been refcounted. + * Regions that aren't on a shrinker, but could be shrunk at any point in future + * without warning are still considered "shrinkable" (e.g. Active JIT allocs) + * + * @reg: Pointer to region + * + * Return: true if the region is "shrinkable", false if not. + */ +static inline bool kbase_is_region_shrinkable(struct kbase_va_region *reg) +{ + return (reg->flags & KBASE_REG_DONT_NEED) || (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC); +} + void kbase_remove_va_region(struct kbase_device *kbdev, struct kbase_va_region *reg); static inline void kbase_region_refcnt_free(struct kbase_device *kbdev, @@ -714,6 +762,7 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get( lockdep_assert_held(&kctx->reg_lock); WARN_ON(!region->va_refcnt); + WARN_ON(region->va_refcnt == INT_MAX); /* non-atomic as kctx->reg_lock is held */ dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %pK\n", @@ -741,6 +790,69 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put( return NULL; } +/** + * kbase_va_region_is_no_user_free - Check if user free is forbidden for the region. + * A region that must not be freed by userspace indicates that it is owned by some other + * kbase subsystem, for example tiler heaps, JIT memory or CSF queues. + * Such regions must not be shrunk (i.e. have their backing pages freed), except by the + * current owner. + * Hence, callers cannot rely on this check alone to determine if a region might be shrunk + * by any part of kbase. Instead they should use kbase_is_region_shrinkable(). + * + * @kctx: Pointer to kbase context. + * @region: Pointer to region. + * + * Return: true if userspace cannot free the region, false if userspace can free the region. + */ +static inline bool kbase_va_region_is_no_user_free(struct kbase_context *kctx, + struct kbase_va_region *region) +{ + lockdep_assert_held(&kctx->reg_lock); + return region->no_user_free_refcnt > 0; +} + +/** + * kbase_va_region_no_user_free_get - Increment "no user free" refcount for a region. + * Calling this function will prevent the region to be shrunk by parts of kbase that + * don't own the region (as long as the refcount stays above zero). Refer to + * kbase_va_region_is_no_user_free() for more information. + * + * @kctx: Pointer to kbase context. + * @region: Pointer to region (not shrinkable). + * + * Return: the pointer to the region passed as argument. + */ +static inline struct kbase_va_region * +kbase_va_region_no_user_free_get(struct kbase_context *kctx, struct kbase_va_region *region) +{ + lockdep_assert_held(&kctx->reg_lock); + + WARN_ON(kbase_is_region_shrinkable(region)); + WARN_ON(region->no_user_free_refcnt == INT_MAX); + + /* non-atomic as kctx->reg_lock is held */ + region->no_user_free_refcnt++; + + return region; +} + +/** + * kbase_va_region_no_user_free_put - Decrement "no user free" refcount for a region. + * + * @kctx: Pointer to kbase context. + * @region: Pointer to region (not shrinkable). + */ +static inline void kbase_va_region_no_user_free_put(struct kbase_context *kctx, + struct kbase_va_region *region) +{ + lockdep_assert_held(&kctx->reg_lock); + + WARN_ON(!kbase_va_region_is_no_user_free(kctx, region)); + + /* non-atomic as kctx->reg_lock is held */ + region->no_user_free_refcnt--; +} + /* Common functions */ static inline struct tagged_addr *kbase_get_cpu_phy_pages( struct kbase_va_region *reg) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c index 9899cef317ac..998849fa4cc2 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -791,7 +792,11 @@ int kbase_mem_evictable_init(struct kbase_context *kctx) * struct shrinker does not define batch */ kctx->reclaim.batch = 0; +#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE register_shrinker(&kctx->reclaim); +#else + register_shrinker(&kctx->reclaim, "mali-mem"); +#endif return 0; } @@ -855,6 +860,9 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) lockdep_assert_held(&kctx->reg_lock); + /* Memory is in the process of transitioning to the shrinker, and + * should ignore migration attempts + */ kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg, 0, gpu_alloc->nents); @@ -862,12 +870,17 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) /* This allocation can't already be on a list. */ WARN_ON(!list_empty(&gpu_alloc->evict_node)); - /* - * Add the allocation to the eviction list, after this point the shrink + /* Add the allocation to the eviction list, after this point the shrink * can reclaim it. */ list_add(&gpu_alloc->evict_node, &kctx->evict_list); atomic_add(gpu_alloc->nents, &kctx->evict_nents); + + /* Indicate to page migration that the memory can be reclaimed by the shrinker. + */ + if (kbase_page_migration_enabled) + kbase_set_phy_alloc_page_status(gpu_alloc, NOT_MOVABLE); + mutex_unlock(&kctx->jit_evict_lock); kbase_mem_evictable_mark_reclaim(gpu_alloc); @@ -919,6 +932,15 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) gpu_alloc->evicted, 0, mmu_sync_info); gpu_alloc->evicted = 0; + + /* Since the allocation is no longer evictable, and we ensure that + * it grows back to its pre-eviction size, we will consider the + * state of it to be ALLOCATED_MAPPED, as that is the only state + * in which a physical allocation could transition to NOT_MOVABLE + * from. + */ + if (kbase_page_migration_enabled) + kbase_set_phy_alloc_page_status(gpu_alloc, ALLOCATED_MAPPED); } } @@ -977,7 +999,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in * & GPU queue ringbuffer and none of them needs to be explicitly marked * as evictable by Userspace. */ - if (reg->flags & KBASE_REG_NO_USER_FREE) + if (kbase_va_region_is_no_user_free(kctx, reg)) goto out_unlock; /* Is the region being transitioning between not needed and needed? */ @@ -1302,9 +1324,8 @@ int kbase_mem_umm_map(struct kbase_context *kctx, err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, kbase_get_gpu_phy_pages(reg), - kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, kctx->as_nr, - alloc->group_id, mmu_sync_info); + kbase_reg_current_backed_size(reg), reg->flags & gwt_mask, + kctx->as_nr, alloc->group_id, mmu_sync_info, NULL, true); if (err) goto bad_insert; @@ -1330,7 +1351,7 @@ int kbase_mem_umm_map(struct kbase_context *kctx, bad_pad_insert: kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, - alloc->nents, kctx->as_nr); + alloc->nents, kctx->as_nr, true); bad_insert: kbase_mem_umm_unmap_attachment(kctx, alloc); bad_map_attachment: @@ -1359,7 +1380,7 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, int err; err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, reg->nr_pages, kctx->as_nr); + alloc->pages, reg->nr_pages, kctx->as_nr, true); WARN_ON(err); } @@ -1559,10 +1580,10 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( int zone = KBASE_REG_ZONE_CUSTOM_VA; bool shared_zone = false; u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev); - unsigned long offset_within_page; - unsigned long remaining_size; struct kbase_alloc_import_user_buf *user_buf; struct page **pages = NULL; + struct tagged_addr *pa; + struct device *dev; int write; /* Flag supported only for dma-buf imported memory */ @@ -1704,20 +1725,33 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( reg->gpu_alloc->nents = 0; reg->extension = 0; - if (pages) { - struct device *dev = kctx->kbdev->dev; - struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg); + pa = kbase_get_gpu_phy_pages(reg); + dev = kctx->kbdev->dev; + if (pages) { /* Top bit signifies that this was pinned on import */ user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT; - offset_within_page = user_buf->address & ~PAGE_MASK; - remaining_size = user_buf->size; + /* Manual CPU cache synchronization. + * + * The driver disables automatic CPU cache synchronization because the + * memory pages that enclose the imported region may also contain + * sub-regions which are not imported and that are allocated and used + * by the user process. This may be the case of memory at the beginning + * of the first page and at the end of the last page. Automatic CPU cache + * synchronization would force some operations on those memory allocations, + * unbeknown to the user process: in particular, a CPU cache invalidate + * upon unmapping would destroy the content of dirty CPU caches and cause + * the user process to lose CPU writes to the non-imported sub-regions. + * + * When the GPU claims ownership of the imported memory buffer, it shall + * commit CPU writes for the whole of all pages that enclose the imported + * region, otherwise the initial content of memory would be wrong. + */ for (i = 0; i < faulted_pages; i++) { - unsigned long map_size = - MIN(PAGE_SIZE - offset_within_page, remaining_size); - dma_addr_t dma_addr = dma_map_page(dev, pages[i], - offset_within_page, map_size, DMA_BIDIRECTIONAL); + dma_addr_t dma_addr = + dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); if (dma_mapping_error(dev, dma_addr)) goto unwind_dma_map; @@ -1725,8 +1759,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( user_buf->dma_addrs[i] = dma_addr; pa[i] = as_tagged(page_to_phys(pages[i])); - remaining_size -= map_size; - offset_within_page = 0; + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); } reg->gpu_alloc->nents = faulted_pages; @@ -1735,19 +1768,19 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( return reg; unwind_dma_map: - offset_within_page = user_buf->address & ~PAGE_MASK; - remaining_size = user_buf->size; dma_mapped_pages = i; - /* Run the unmap loop in the same order as map loop */ + /* Run the unmap loop in the same order as map loop, and perform again + * CPU cache synchronization to re-write the content of dirty CPU caches + * to memory. This precautionary measure is kept here to keep this code + * aligned with kbase_jd_user_buf_map() to allow for a potential refactor + * in the future. + */ for (i = 0; i < dma_mapped_pages; i++) { - unsigned long unmap_size = - MIN(PAGE_SIZE - offset_within_page, remaining_size); + dma_addr_t dma_addr = user_buf->dma_addrs[i]; - dma_unmap_page(kctx->kbdev->dev, - user_buf->dma_addrs[i], - unmap_size, DMA_BIDIRECTIONAL); - remaining_size -= unmap_size; - offset_within_page = 0; + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); } fault_mismatch: if (pages) { @@ -1767,7 +1800,6 @@ no_alloc_obj: no_region: bad_size: return NULL; - } @@ -1888,9 +1920,9 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, /* validate found region */ if (kbase_is_region_invalid_or_free(aliasing_reg)) goto bad_handle; /* Not found/already free */ - if (aliasing_reg->flags & KBASE_REG_DONT_NEED) + if (kbase_is_region_shrinkable(aliasing_reg)) goto bad_handle; /* Ephemeral region */ - if (aliasing_reg->flags & KBASE_REG_NO_USER_FREE) + if (kbase_va_region_is_no_user_free(kctx, aliasing_reg)) goto bad_handle; /* JIT regions can't be * aliased. NO_USER_FREE flag * covers the entire lifetime @@ -2050,7 +2082,10 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ *flags &= ~BASE_MEM_COHERENT_SYSTEM; } - + if (((*flags & BASE_MEM_CACHED_CPU) == 0) && (type == BASE_MEM_IMPORT_TYPE_USER_BUFFER)) { + dev_warn(kctx->kbdev->dev, "USER_BUFFER must be CPU cached"); + goto bad_flags; + } if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) { dev_warn(kctx->kbdev->dev, "padding is only supported for UMM"); @@ -2164,11 +2199,9 @@ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, /* Map the new pages into the GPU */ phy_pages = kbase_get_gpu_phy_pages(reg); - ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn + old_pages, - phy_pages + old_pages, delta, reg->flags, - kctx->as_nr, reg->gpu_alloc->group_id, - mmu_sync_info); + ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages, + phy_pages + old_pages, delta, reg->flags, kctx->as_nr, + reg->gpu_alloc->group_id, mmu_sync_info, reg, false); return ret; } @@ -2197,7 +2230,7 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx, int ret = 0; ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages, - alloc->pages + new_pages, delta, kctx->as_nr); + alloc->pages + new_pages, delta, kctx->as_nr, false); return ret; } @@ -2262,10 +2295,10 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) if (atomic_read(®->cpu_alloc->kernel_mappings) > 0) goto out_unlock; - if (reg->flags & KBASE_REG_DONT_NEED) + if (kbase_is_region_shrinkable(reg)) goto out_unlock; - if (reg->flags & KBASE_REG_NO_USER_FREE) + if (kbase_va_region_is_no_user_free(kctx, reg)) goto out_unlock; #ifdef CONFIG_MALI_MEMORY_FULLY_BACKED @@ -2662,6 +2695,8 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx, size_t size; int err = 0; + lockdep_assert_held(&kctx->reg_lock); + dev_dbg(kctx->kbdev->dev, "%s\n", __func__); size = (vma->vm_end - vma->vm_start); nr_pages = size >> PAGE_SHIFT; @@ -2734,7 +2769,7 @@ static int kbasep_reg_mmap(struct kbase_context *kctx, size_t *nr_pages, size_t *aligned_offset) { - int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); + unsigned int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); struct kbase_va_region *reg; int err = 0; @@ -2775,7 +2810,6 @@ static int kbasep_reg_mmap(struct kbase_context *kctx, /* adjust down nr_pages to what we have physically */ *nr_pages = kbase_reg_current_backed_size(reg); - if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset, reg->nr_pages, 1, mmu_sync_info) != 0) { dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__); @@ -3016,6 +3050,99 @@ void kbase_sync_mem_regions(struct kbase_context *kctx, } } +/** + * kbase_vmap_phy_pages_migrate_count_increment - Increment VMAP count for + * array of physical pages + * + * @pages: Array of pages. + * @page_count: Number of pages. + * @flags: Region flags. + * + * This function is supposed to be called only if page migration support + * is enabled in the driver. + * + * The counter of kernel CPU mappings of the physical pages involved in a + * mapping operation is incremented by 1. Errors are handled by making pages + * not movable. Permanent kernel mappings will be marked as not movable, too. + */ +static void kbase_vmap_phy_pages_migrate_count_increment(struct tagged_addr *pages, + size_t page_count, unsigned long flags) +{ + size_t i; + + for (i = 0; i < page_count; i++) { + struct page *p = as_page(pages[i]); + struct kbase_page_metadata *page_md = kbase_page_private(p); + + /* Skip the 4KB page that is part of a large page, as the large page is + * excluded from the migration process. + */ + if (is_huge(pages[i]) || is_partial(pages[i])) + continue; + + spin_lock(&page_md->migrate_lock); + /* Mark permanent kernel mappings as NOT_MOVABLE because they're likely + * to stay mapped for a long time. However, keep on counting the number + * of mappings even for them: they don't represent an exception for the + * vmap_count. + * + * At the same time, errors need to be handled if a client tries to add + * too many mappings, hence a page may end up in the NOT_MOVABLE state + * anyway even if it's not a permanent kernel mapping. + */ + if (flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + if (page_md->vmap_count < U8_MAX) + page_md->vmap_count++; + else + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + spin_unlock(&page_md->migrate_lock); + } +} + +/** + * kbase_vunmap_phy_pages_migrate_count_decrement - Decrement VMAP count for + * array of physical pages + * + * @pages: Array of pages. + * @page_count: Number of pages. + * + * This function is supposed to be called only if page migration support + * is enabled in the driver. + * + * The counter of kernel CPU mappings of the physical pages involved in a + * mapping operation is decremented by 1. Errors are handled by making pages + * not movable. + */ +static void kbase_vunmap_phy_pages_migrate_count_decrement(struct tagged_addr *pages, + size_t page_count) +{ + size_t i; + + for (i = 0; i < page_count; i++) { + struct page *p = as_page(pages[i]); + struct kbase_page_metadata *page_md = kbase_page_private(p); + + /* Skip the 4KB page that is part of a large page, as the large page is + * excluded from the migration process. + */ + if (is_huge(pages[i]) || is_partial(pages[i])) + continue; + + spin_lock(&page_md->migrate_lock); + /* Decrement the number of mappings for all kinds of pages, including + * pages which are NOT_MOVABLE (e.g. permanent kernel mappings). + * However, errors still need to be handled if a client tries to remove + * more mappings than created. + */ + if (page_md->vmap_count == 0) + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + else + page_md->vmap_count--; + spin_unlock(&page_md->migrate_lock); + } +} + static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg, u64 offset_bytes, size_t size, struct kbase_vmap_struct *map, kbase_vmap_flag vmap_flags) @@ -3088,6 +3215,13 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_regi */ cpu_addr = vmap(pages, page_count, VM_MAP, prot); + /* If page migration is enabled, increment the number of VMA mappings + * of all physical pages. In case of errors, e.g. too many mappings, + * make the page not movable to prevent trouble. + */ + if (kbase_page_migration_enabled && !kbase_mem_is_imported(reg->gpu_alloc->type)) + kbase_vmap_phy_pages_migrate_count_increment(page_array, page_count, reg->flags); + kfree(pages); if (!cpu_addr) @@ -3111,6 +3245,7 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_regi atomic_add(page_count, &kctx->permanent_mapped_pages); kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc); + return 0; } @@ -3162,6 +3297,9 @@ void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, if (kbase_is_region_invalid_or_free(reg)) goto out_unlock; + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) + goto out_unlock; + addr = kbase_vmap_reg(kctx, reg, gpu_addr, size, prot_request, map, 0u); out_unlock: @@ -3189,6 +3327,17 @@ static void kbase_vunmap_phy_pages(struct kbase_context *kctx, vunmap(addr); + /* If page migration is enabled, decrement the number of VMA mappings + * for all physical pages. Now is a good time to do it because references + * haven't been released yet. + */ + if (kbase_page_migration_enabled && !kbase_mem_is_imported(map->gpu_alloc->type)) { + const size_t page_count = PFN_UP(map->offset_in_page + map->size); + struct tagged_addr *pages_array = map->cpu_pages; + + kbase_vunmap_phy_pages_migrate_count_decrement(pages_array, page_count); + } + if (map->flags & KBASE_VMAP_FLAG_SYNC_NEEDED) kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE); if (map->flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) { diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h index 5b12e181bf4c..6dda44b9f128 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h @@ -284,7 +284,7 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, struct kbase_va_reg * have been released in the mean time. * * Or, it must have been refcounted with a call to kbase_va_region_alloc_get(), and the region * lock is now held again. - * * Or, @reg has had KBASE_REG_NO_USER_FREE set at creation time or under the region lock, and the + * * Or, @reg has had NO_USER_FREE set at creation time or under the region lock, and the * region lock is now held again. * * The acceptable @vmap_flags are those in %KBASE_VMAP_INPUT_FLAGS. diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c index 8526688b7b12..737f7da5595d 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c @@ -22,11 +22,11 @@ /** * DOC: Base kernel page migration implementation. */ - #include #include #include +#include /* Global integer used to determine if module parameter value has been * provided and if page migration feature is enabled. @@ -36,7 +36,12 @@ int kbase_page_migration_enabled; module_param(kbase_page_migration_enabled, int, 0444); KBASE_EXPORT_TEST_API(kbase_page_migration_enabled); -bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr) +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) +static const struct movable_operations movable_ops; +#endif + +bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr, + u8 group_id) { struct kbase_page_metadata *page_md = kzalloc(sizeof(struct kbase_page_metadata), GFP_KERNEL); @@ -48,19 +53,43 @@ bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_a set_page_private(p, (unsigned long)page_md); page_md->dma_addr = dma_addr; page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATE_IN_PROGRESS); + page_md->vmap_count = 0; + page_md->group_id = group_id; spin_lock_init(&page_md->migrate_lock); lock_page(p); - if (kbdev->mem_migrate.mapping) { - __SetPageMovable(p, kbdev->mem_migrate.mapping); +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + __SetPageMovable(p, &movable_ops); + page_md->status = PAGE_MOVABLE_SET(page_md->status); +#else + /* In some corner cases, the driver may attempt to allocate memory pages + * even before the device file is open and the mapping for address space + * operations is created. In that case, it is impossible to assign address + * space operations to memory pages: simply pretend that they are movable, + * even if they are not. + * + * The page will go through all state transitions but it will never be + * actually considered movable by the kernel. This is due to the fact that + * the page cannot be marked as NOT_MOVABLE upon creation, otherwise the + * memory pool will always refuse to add it to the pool and schedule + * a worker thread to free it later. + * + * Page metadata may seem redundant in this case, but they are not, + * because memory pools expect metadata to be present when page migration + * is enabled and because the pages may always return to memory pools and + * gain the movable property later on in their life cycle. + */ + if (kbdev->mem_migrate.inode && kbdev->mem_migrate.inode->i_mapping) { + __SetPageMovable(p, kbdev->mem_migrate.inode->i_mapping); page_md->status = PAGE_MOVABLE_SET(page_md->status); } +#endif unlock_page(p); return true; } -static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p) +static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p, u8 *group_id) { struct device *const dev = kbdev->dev; struct kbase_page_metadata *page_md; @@ -70,10 +99,13 @@ static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p) if (!page_md) return; + if (group_id) + *group_id = page_md->group_id; dma_addr = kbase_dma_addr(p); dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); kfree(page_md); + set_page_private(p, 0); ClearPagePrivate(p); } @@ -91,6 +123,7 @@ static void kbase_free_pages_worker(struct work_struct *work) spin_unlock(&mem_migrate->free_pages_lock); list_for_each_entry_safe(p, tmp, &free_list, lru) { + u8 group_id = 0; list_del_init(&p->lru); lock_page(p); @@ -101,8 +134,8 @@ static void kbase_free_pages_worker(struct work_struct *work) } unlock_page(p); - kbase_free_page_metadata(kbdev, p); - __free_pages(p, 0); + kbase_free_page_metadata(kbdev, p, &group_id); + kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, group_id, p, 0); } } @@ -115,6 +148,145 @@ void kbase_free_page_later(struct kbase_device *kbdev, struct page *p) spin_unlock(&mem_migrate->free_pages_lock); } +/** + * kbasep_migrate_page_pt_mapped - Migrate a memory page that is mapped + * in a PGD of kbase_mmu_table. + * + * @old_page: Existing PGD page to remove + * @new_page: Destination for migrating the existing PGD page to + * + * Replace an existing PGD page with a new page by migrating its content. More specifically: + * the new page shall replace the existing PGD page in the MMU page table. Before returning, + * the new page shall be set as movable and not isolated, while the old page shall lose + * the movable property. The meta data attached to the PGD page is transferred to the + * new (replacement) page. + * + * Return: 0 on migration success, or -EAGAIN for a later retry. Otherwise it's a failure + * and the migration is aborted. + */ +static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new_page) +{ + struct kbase_page_metadata *page_md = kbase_page_private(old_page); + struct kbase_context *kctx = page_md->data.pt_mapped.mmut->kctx; + struct kbase_device *kbdev = kctx->kbdev; + dma_addr_t old_dma_addr = page_md->dma_addr; + dma_addr_t new_dma_addr; + int ret; + + /* Create a new dma map for the new page */ + new_dma_addr = dma_map_page(kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(kbdev->dev, new_dma_addr)) + return -ENOMEM; + + /* Lock context to protect access to the page in physical allocation. + * This blocks the CPU page fault handler from remapping pages. + * Only MCU's mmut is device wide, i.e. no corresponding kctx. + */ + kbase_gpu_vm_lock(kctx); + + ret = kbase_mmu_migrate_page( + as_tagged(page_to_phys(old_page)), as_tagged(page_to_phys(new_page)), old_dma_addr, + new_dma_addr, PGD_VPFN_LEVEL_GET_LEVEL(page_md->data.pt_mapped.pgd_vpfn_level)); + + if (ret == 0) { + dma_unmap_page(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + __ClearPageMovable(old_page); + page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); + ClearPagePrivate(old_page); + put_page(old_page); + + page_md = kbase_page_private(new_page); +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + __SetPageMovable(new_page, &movable_ops); + page_md->status = PAGE_MOVABLE_SET(page_md->status); +#else + if (kbdev->mem_migrate.inode->i_mapping) { + __SetPageMovable(new_page, kbdev->mem_migrate.inode->i_mapping); + page_md->status = PAGE_MOVABLE_SET(page_md->status); + } +#endif + SetPagePrivate(new_page); + get_page(new_page); + } else + dma_unmap_page(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + + /* Page fault handler for CPU mapping unblocked. */ + kbase_gpu_vm_unlock(kctx); + + return ret; +} + +/* + * kbasep_migrate_page_allocated_mapped - Migrate a memory page that is both + * allocated and mapped. + * + * @old_page: Page to remove. + * @new_page: Page to add. + * + * Replace an old page with a new page by migrating its content and all its + * CPU and GPU mappings. More specifically: the new page shall replace the + * old page in the MMU page table, as well as in the page array of the physical + * allocation, which is used to create CPU mappings. Before returning, the new + * page shall be set as movable and not isolated, while the old page shall lose + * the movable property. + */ +static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct page *new_page) +{ + struct kbase_page_metadata *page_md = kbase_page_private(old_page); + struct kbase_context *kctx = page_md->data.mapped.mmut->kctx; + dma_addr_t old_dma_addr, new_dma_addr; + int ret; + + old_dma_addr = page_md->dma_addr; + new_dma_addr = dma_map_page(kctx->kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(kctx->kbdev->dev, new_dma_addr)) + return -ENOMEM; + + /* Lock context to protect access to array of pages in physical allocation. + * This blocks the CPU page fault handler from remapping pages. + */ + kbase_gpu_vm_lock(kctx); + + /* Unmap the old physical range. */ + unmap_mapping_range(kctx->filp->f_inode->i_mapping, page_md->data.mapped.vpfn << PAGE_SHIFT, + PAGE_SIZE, 1); + + ret = kbase_mmu_migrate_page(as_tagged(page_to_phys(old_page)), + as_tagged(page_to_phys(new_page)), old_dma_addr, new_dma_addr, + MIDGARD_MMU_BOTTOMLEVEL); + + if (ret == 0) { + dma_unmap_page(kctx->kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + + SetPagePrivate(new_page); + get_page(new_page); + + /* Clear PG_movable from the old page and release reference. */ + ClearPagePrivate(old_page); + __ClearPageMovable(old_page); + page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); + put_page(old_page); + + page_md = kbase_page_private(new_page); + /* Set PG_movable to the new page. */ +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + __SetPageMovable(new_page, &movable_ops); + page_md->status = PAGE_MOVABLE_SET(page_md->status); +#else + if (kctx->kbdev->mem_migrate.inode->i_mapping) { + __SetPageMovable(new_page, kctx->kbdev->mem_migrate.inode->i_mapping); + page_md->status = PAGE_MOVABLE_SET(page_md->status); + } +#endif + } else + dma_unmap_page(kctx->kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + + /* Page fault handler for CPU mapping unblocked. */ + kbase_gpu_vm_unlock(kctx); + + return ret; +} + /** * kbase_page_isolate - Isolate a page for migration. * @@ -133,6 +305,9 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode) CSTD_UNUSED(mode); + if (!page_md || !IS_PAGE_MOVABLE(page_md->status)) + return false; + if (!spin_trylock(&page_md->migrate_lock)) return false; @@ -152,17 +327,29 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode) atomic_inc(&mem_pool->isolation_in_progress_cnt); break; case ALLOCATED_MAPPED: + /* Mark the page into isolated state, but only if it has no + * kernel CPU mappings + */ + if (page_md->vmap_count == 0) + page_md->status = PAGE_ISOLATE_SET(page_md->status, 1); + break; case PT_MAPPED: - /* Only pages in a memory pool can be isolated for now. */ + /* Mark the page into isolated state. */ + page_md->status = PAGE_ISOLATE_SET(page_md->status, 1); break; case SPILL_IN_PROGRESS: case ALLOCATE_IN_PROGRESS: case FREE_IN_PROGRESS: - /* Transitory state: do nothing. */ + break; + case NOT_MOVABLE: + /* Opportunistically clear the movable property for these pages */ + __ClearPageMovable(p); + page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); break; default: /* State should always fall in one of the previous cases! - * Also notice that FREE_ISOLATED_IN_PROGRESS is impossible because + * Also notice that FREE_ISOLATED_IN_PROGRESS or + * FREE_PT_ISOLATED_IN_PROGRESS is impossible because * that state only applies to pages that are already isolated. */ page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); @@ -210,17 +397,31 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode) * * Return: 0 on success, error code otherwise. */ +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) static int kbase_page_migrate(struct address_space *mapping, struct page *new_page, struct page *old_page, enum migrate_mode mode) +#else +static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum migrate_mode mode) +#endif { int err = 0; bool status_mem_pool = false; + bool status_free_pt_isolated_in_progress = false; + bool status_free_isolated_in_progress = false; + bool status_pt_mapped = false; + bool status_mapped = false; + bool status_not_movable = false; struct kbase_page_metadata *page_md = kbase_page_private(old_page); - struct kbase_device *kbdev; + struct kbase_device *kbdev = NULL; +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) CSTD_UNUSED(mapping); +#endif CSTD_UNUSED(mode); + if (!page_md || !IS_PAGE_MOVABLE(page_md->status)) + return -EINVAL; + if (!spin_trylock(&page_md->migrate_lock)) return -EAGAIN; @@ -235,10 +436,22 @@ static int kbase_page_migrate(struct address_space *mapping, struct page *new_pa kbdev = page_md->data.mem_pool.kbdev; break; case ALLOCATED_MAPPED: + status_mapped = true; + break; case PT_MAPPED: + status_pt_mapped = true; + break; case FREE_ISOLATED_IN_PROGRESS: - case MULTI_MAPPED: - /* So far, only pages in a memory pool can be migrated. */ + status_free_isolated_in_progress = true; + kbdev = page_md->data.free_isolated.kbdev; + break; + case FREE_PT_ISOLATED_IN_PROGRESS: + status_free_pt_isolated_in_progress = true; + kbdev = page_md->data.free_pt_isolated.kbdev; + break; + case NOT_MOVABLE: + status_not_movable = true; + break; default: /* State should always fall in one of the previous cases! */ err = -EAGAIN; @@ -247,18 +460,37 @@ static int kbase_page_migrate(struct address_space *mapping, struct page *new_pa spin_unlock(&page_md->migrate_lock); - if (status_mem_pool) { + if (status_mem_pool || status_free_isolated_in_progress || + status_free_pt_isolated_in_progress) { struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; - kbase_free_page_metadata(kbdev, old_page); + kbase_free_page_metadata(kbdev, old_page, NULL); __ClearPageMovable(old_page); page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); + put_page(old_page); /* Just free new page to avoid lock contention. */ INIT_LIST_HEAD(&new_page->lru); + get_page(new_page); set_page_private(new_page, 0); kbase_free_page_later(kbdev, new_page); queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); + } else if (status_not_movable) { + err = -EINVAL; + } else if (status_mapped) { + err = kbasep_migrate_page_allocated_mapped(old_page, new_page); + } else if (status_pt_mapped) { + err = kbasep_migrate_page_pt_mapped(old_page, new_page); + } + + /* While we want to preserve the movability of pages for which we return + * EAGAIN, according to the kernel docs, movable pages for which a critical + * error is returned are called putback on, which may not be what we + * expect. + */ + if (err < 0 && err != -EAGAIN) { + __ClearPageMovable(old_page); + page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); } return err; @@ -277,13 +509,23 @@ static int kbase_page_migrate(struct address_space *mapping, struct page *new_pa static void kbase_page_putback(struct page *p) { bool status_mem_pool = false; + bool status_free_isolated_in_progress = false; + bool status_free_pt_isolated_in_progress = false; struct kbase_page_metadata *page_md = kbase_page_private(p); - struct kbase_device *kbdev; + struct kbase_device *kbdev = NULL; + + /* If we don't have page metadata, the page may not belong to the + * driver or may already have been freed, and there's nothing we can do + */ + if (!page_md) + return; spin_lock(&page_md->migrate_lock); - /* Page must have been isolated to reach here but metadata is incorrect. */ - WARN_ON(!IS_PAGE_ISOLATED(page_md->status)); + if (WARN_ON(!IS_PAGE_ISOLATED(page_md->status))) { + spin_unlock(&page_md->migrate_lock); + return; + } switch (PAGE_STATUS_GET(page_md->status)) { case MEM_POOL: @@ -291,11 +533,22 @@ static void kbase_page_putback(struct page *p) kbdev = page_md->data.mem_pool.kbdev; break; case ALLOCATED_MAPPED: + page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); + break; case PT_MAPPED: - case FREE_ISOLATED_IN_PROGRESS: - /* Only pages in a memory pool can be isolated for now. - * Therefore only pages in a memory pool can be 'putback'. + case NOT_MOVABLE: + /* Pages should no longer be isolated if they are in a stable state + * and used by the driver. */ + page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); + break; + case FREE_ISOLATED_IN_PROGRESS: + status_free_isolated_in_progress = true; + kbdev = page_md->data.free_isolated.kbdev; + break; + case FREE_PT_ISOLATED_IN_PROGRESS: + status_free_pt_isolated_in_progress = true; + kbdev = page_md->data.free_pt_isolated.kbdev; break; default: /* State should always fall in one of the previous cases! */ @@ -304,35 +557,59 @@ static void kbase_page_putback(struct page *p) spin_unlock(&page_md->migrate_lock); - /* If page was in a memory pool then just free it to avoid lock contention. */ - if (!WARN_ON(!status_mem_pool)) { - struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; - + /* If page was in a memory pool then just free it to avoid lock contention. The + * same is also true to status_free_pt_isolated_in_progress. + */ + if (status_mem_pool || status_free_isolated_in_progress || + status_free_pt_isolated_in_progress) { __ClearPageMovable(p); page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); - list_del_init(&p->lru); - kbase_free_page_later(kbdev, p); - queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); + + if (!WARN_ON_ONCE(!kbdev)) { + struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; + + kbase_free_page_later(kbdev, p); + queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); + } } } +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) +static const struct movable_operations movable_ops = { + .isolate_page = kbase_page_isolate, + .migrate_page = kbase_page_migrate, + .putback_page = kbase_page_putback, +}; +#else static const struct address_space_operations kbase_address_space_ops = { .isolate_page = kbase_page_isolate, .migratepage = kbase_page_migrate, .putback_page = kbase_page_putback, }; +#endif +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp) { + mutex_lock(&kbdev->fw_load_lock); + if (filp) { filp->f_inode->i_mapping->a_ops = &kbase_address_space_ops; - if (!kbdev->mem_migrate.mapping) - kbdev->mem_migrate.mapping = filp->f_inode->i_mapping; - else - WARN_ON(kbdev->mem_migrate.mapping != filp->f_inode->i_mapping); + if (!kbdev->mem_migrate.inode) { + kbdev->mem_migrate.inode = filp->f_inode; + /* This reference count increment is balanced by iput() + * upon termination. + */ + atomic_inc(&filp->f_inode->i_count); + } else { + WARN_ON(kbdev->mem_migrate.inode != filp->f_inode); + } } + + mutex_unlock(&kbdev->fw_load_lock); } +#endif void kbase_mem_migrate_init(struct kbase_device *kbdev) { @@ -344,6 +621,9 @@ void kbase_mem_migrate_init(struct kbase_device *kbdev) spin_lock_init(&mem_migrate->free_pages_lock); INIT_LIST_HEAD(&mem_migrate->free_pages_list); +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) + mem_migrate->inode = NULL; +#endif mem_migrate->free_pages_workq = alloc_workqueue("free_pages_workq", WQ_UNBOUND | WQ_MEM_RECLAIM, 1); INIT_WORK(&mem_migrate->free_pages_work, kbase_free_pages_worker); @@ -355,4 +635,7 @@ void kbase_mem_migrate_term(struct kbase_device *kbdev) if (mem_migrate->free_pages_workq) destroy_workqueue(mem_migrate->free_pages_workq); +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) + iput(mem_migrate->inode); +#endif } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h index d4796327b8d7..76bbc999e110 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h @@ -50,6 +50,8 @@ extern int kbase_page_migration_enabled; * @kbdev: Pointer to kbase device. * @p: Page to assign metadata to. * @dma_addr: DMA address mapped to paged. + * @group_id: Memory group ID associated with the entity that is + * allocating the page metadata. * * This will allocate memory for the page's metadata, initialize it and * assign a reference to the page's private field. Importantly, once @@ -58,7 +60,8 @@ extern int kbase_page_migration_enabled; * * Return: true if successful or false otherwise. */ -bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr); +bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr, + u8 group_id); /** * kbase_free_page_later - Defer freeing of given page. @@ -70,6 +73,7 @@ bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_a */ void kbase_free_page_later(struct kbase_device *kbdev, struct page *p); +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) /* * kbase_mem_migrate_set_address_space_ops - Set address space operations * @@ -81,6 +85,7 @@ void kbase_free_page_later(struct kbase_device *kbdev, struct page *p); * add a reference to @kbdev. */ void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp); +#endif /* * kbase_mem_migrate_init - Initialise kbase page migration diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c index dce066db7385..75569cc51c52 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c @@ -57,37 +57,59 @@ static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool) return kbase_mem_pool_size(pool) == 0; } -static void set_pool_new_page_metadata(struct kbase_mem_pool *pool, struct page *p, +static bool set_pool_new_page_metadata(struct kbase_mem_pool *pool, struct page *p, struct list_head *page_list, size_t *list_size) { struct kbase_page_metadata *page_md = kbase_page_private(p); + bool not_movable = false; lockdep_assert_held(&pool->pool_lock); + /* Free the page instead of adding it to the pool if it's not movable. + * Only update page status and add the page to the memory pool if + * it is not isolated. + */ spin_lock(&page_md->migrate_lock); - /* Only update page status and add the page to the memory pool if it is not isolated */ - if (!WARN_ON(IS_PAGE_ISOLATED(page_md->status))) { + if (PAGE_STATUS_GET(page_md->status) == (u8)NOT_MOVABLE) { + not_movable = true; + } else if (!WARN_ON_ONCE(IS_PAGE_ISOLATED(page_md->status))) { page_md->status = PAGE_STATUS_SET(page_md->status, (u8)MEM_POOL); page_md->data.mem_pool.pool = pool; page_md->data.mem_pool.kbdev = pool->kbdev; - list_move(&p->lru, page_list); + list_add(&p->lru, page_list); (*list_size)++; } spin_unlock(&page_md->migrate_lock); + + if (not_movable) { + kbase_free_page_later(pool->kbdev, p); + pool_dbg(pool, "skipping a not movable page\n"); + } + + return not_movable; } static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool, struct page *p) { + bool queue_work_to_free = false; + lockdep_assert_held(&pool->pool_lock); - if (!pool->order && kbase_page_migration_enabled) - set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size); - else { + if (!pool->order && kbase_page_migration_enabled) { + if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size)) + queue_work_to_free = true; + } else { list_add(&p->lru, &pool->page_list); pool->cur_size++; } + if (queue_work_to_free) { + struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate; + + queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); + } + pool_dbg(pool, "added page\n"); } @@ -101,18 +123,29 @@ static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p) static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool, struct list_head *page_list, size_t nr_pages) { + bool queue_work_to_free = false; + lockdep_assert_held(&pool->pool_lock); if (!pool->order && kbase_page_migration_enabled) { struct page *p, *tmp; - list_for_each_entry_safe(p, tmp, page_list, lru) - set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size); + list_for_each_entry_safe(p, tmp, page_list, lru) { + list_del_init(&p->lru); + if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size)) + queue_work_to_free = true; + } } else { list_splice(page_list, &pool->page_list); pool->cur_size += nr_pages; } + if (queue_work_to_free) { + struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate; + + queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); + } + pool_dbg(pool, "added %zu pages\n", nr_pages); } @@ -226,7 +259,7 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) /* Setup page metadata for 4KB pages when page migration is enabled */ if (!pool->order && kbase_page_migration_enabled) { INIT_LIST_HEAD(&p->lru); - if (!kbase_alloc_page_metadata(kbdev, p, dma_addr)) { + if (!kbase_alloc_page_metadata(kbdev, p, dma_addr, pool->group_id)) { dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, pool->group_id, p, pool->order); @@ -251,7 +284,14 @@ static void enqueue_free_pool_pages_work(struct kbase_mem_pool *pool) void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p) { - struct kbase_device *kbdev = pool->kbdev; + struct kbase_device *kbdev; + + if (WARN_ON(!pool)) + return; + if (WARN_ON(!p)) + return; + + kbdev = pool->kbdev; if (!pool->order && kbase_page_migration_enabled) { kbase_free_page_later(kbdev, p); @@ -460,7 +500,11 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool * struct shrinker does not define batch */ pool->reclaim.batch = 0; +#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE register_shrinker(&pool->reclaim); +#else + register_shrinker(&pool->reclaim, "mali-mem-pool"); +#endif pool_dbg(pool, "initialized\n"); @@ -499,14 +543,16 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool) /* Zero pages first without holding the next_pool lock */ for (i = 0; i < nr_to_spill; i++) { p = kbase_mem_pool_remove_locked(pool, SPILL_IN_PROGRESS); - list_add(&p->lru, &spill_list); + if (p) + list_add(&p->lru, &spill_list); } } while (!kbase_mem_pool_is_empty(pool)) { /* Free remaining pages to kernel */ p = kbase_mem_pool_remove_locked(pool, FREE_IN_PROGRESS); - list_add(&p->lru, &free_list); + if (p) + list_add(&p->lru, &free_list); } kbase_mem_pool_unlock(pool); @@ -558,17 +604,10 @@ struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool) struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool) { - struct page *p; - lockdep_assert_held(&pool->pool_lock); pool_dbg(pool, "alloc_locked()\n"); - p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS); - - if (p) - return p; - - return NULL; + return kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS); } void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p, @@ -636,10 +675,12 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, /* Get pages from this pool */ kbase_mem_pool_lock(pool); nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool)); + while (nr_from_pool--) { int j; p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS); + if (pool->order) { pages[i++] = as_tagged_tag(page_to_phys(p), HUGE_HEAD | HUGE_PAGE); @@ -867,7 +908,6 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, pages[i] = as_tagged(0); continue; } - p = as_page(pages[i]); kbase_mem_pool_free_page(pool, p); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c index b0c5126afcbe..212a61f68372 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c @@ -502,6 +502,7 @@ static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) kbase_js_sched_all(katom->kctx->kbdev); } +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) { struct kbase_debug_copy_buffer *buffers = katom->softjob_data; @@ -673,8 +674,8 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; - unsigned long nr_pages = - alloc->imported.user_buf.nr_pages; + const unsigned long nr_pages = alloc->imported.user_buf.nr_pages; + const unsigned long start = alloc->imported.user_buf.address; if (alloc->imported.user_buf.mm != current->mm) { ret = -EINVAL; @@ -686,11 +687,9 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) ret = -ENOMEM; goto out_unlock; } - - ret = get_user_pages_fast( - alloc->imported.user_buf.address, - nr_pages, 0, - buffers[i].extres_pages); + kbase_gpu_vm_unlock(katom->kctx); + ret = get_user_pages_fast(start, nr_pages, 0, buffers[i].extres_pages); + kbase_gpu_vm_lock(katom->kctx); if (ret != nr_pages) { /* Adjust number of pages, so that we only * attempt to release pages in the array that we @@ -728,7 +727,6 @@ out_cleanup: return ret; } -#endif /* !MALI_USE_CSF */ #if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, @@ -760,8 +758,18 @@ static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, } #endif -int kbase_mem_copy_from_extres(struct kbase_context *kctx, - struct kbase_debug_copy_buffer *buf_data) +/** + * kbase_mem_copy_from_extres() - Copy from external resources. + * + * @kctx: kbase context within which the copying is to take place. + * @buf_data: Pointer to the information about external resources: + * pages pertaining to the external resource, number of + * pages to copy. + * + * Return: 0 on success, error code otherwise. + */ +static int kbase_mem_copy_from_extres(struct kbase_context *kctx, + struct kbase_debug_copy_buffer *buf_data) { unsigned int i; unsigned int target_page_nr = 0; @@ -848,7 +856,6 @@ out_unlock: return ret; } -#if !MALI_USE_CSF static int kbase_debug_copy(struct kbase_jd_atom *katom) { struct kbase_debug_copy_buffer *buffers = katom->softjob_data; @@ -866,6 +873,7 @@ static int kbase_debug_copy(struct kbase_jd_atom *katom) return 0; } +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ #endif /* !MALI_USE_CSF */ #define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7) @@ -963,11 +971,6 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) ret = -EINVAL; goto free_info; } - /* Clear any remaining bytes when user struct is smaller than - * kernel struct. For jit version 1, this also clears the - * padding bytes - */ - memset(((u8 *)info) + sizeof(*info), 0, sizeof(*info) - sizeof(*info)); ret = kbasep_jit_alloc_validate(kctx, info); if (ret) @@ -1541,6 +1544,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) case BASE_JD_REQ_SOFT_EVENT_RESET: kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET); break; +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_JD_REQ_SOFT_DEBUG_COPY: { int res = kbase_debug_copy(katom); @@ -1549,6 +1553,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) katom->event_code = BASE_JD_EVENT_JOB_INVALID; break; } +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ case BASE_JD_REQ_SOFT_JIT_ALLOC: ret = kbase_jit_allocate_process(katom); break; @@ -1654,8 +1659,10 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) if (katom->jc == 0) return -EINVAL; break; +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_JD_REQ_SOFT_DEBUG_COPY: return kbase_debug_copy_prepare(katom); +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ case BASE_JD_REQ_SOFT_EXT_RES_MAP: return kbase_ext_res_prepare(katom); case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: @@ -1687,9 +1694,11 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom) kbase_sync_fence_in_remove(katom); break; #endif /* CONFIG_SYNC_FILE */ +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_JD_REQ_SOFT_DEBUG_COPY: kbase_debug_copy_finish(katom); break; +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ case BASE_JD_REQ_SOFT_JIT_ALLOC: kbase_jit_allocate_finish(katom); break; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c index 853c89796d44..d770913e9da5 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c @@ -41,6 +41,11 @@ #include #include +/* Explicitly include epoll header for old kernels. Not required from 4.16. */ +#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE +#include +#endif + /* Hwcnt reader API version */ #define HWCNT_READER_API 1 diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c index db2086079c14..4a0926531af2 100644 --- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c @@ -88,12 +88,11 @@ static void submit_work_pagefault(struct kbase_device *kbdev, u32 as_nr, * context's address space, when the page fault occurs for * MCU's address space. */ - if (!queue_work(as->pf_wq, &as->work_pagefault)) - kbase_ctx_sched_release_ctx(kctx); - else { + if (!queue_work(as->pf_wq, &as->work_pagefault)) { dev_dbg(kbdev->dev, - "Page fault is already pending for as %u\n", - as_nr); + "Page fault is already pending for as %u", as_nr); + kbase_ctx_sched_release_ctx(kctx); + } else { atomic_inc(&kbdev->faults_pending); } } @@ -552,14 +551,14 @@ void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status, } KBASE_EXPORT_TEST_API(kbase_mmu_gpu_fault_interrupt); -int kbase_mmu_as_init(struct kbase_device *kbdev, int i) +int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i) { kbdev->as[i].number = i; kbdev->as[i].bf_data.addr = 0ULL; kbdev->as[i].pf_data.addr = 0ULL; kbdev->as[i].gf_data.addr = 0ULL; - kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i); + kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 1, i); if (!kbdev->as[i].pf_wq) return -ENOMEM; diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c index 22786f0748ce..83605c3dc56f 100644 --- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c @@ -328,7 +328,7 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) while (bf_bits | pf_bits) { struct kbase_as *as; - int as_no; + unsigned int as_no; struct kbase_context *kctx; struct kbase_fault *fault; @@ -423,13 +423,13 @@ int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, return kbase_job_slot_softstop_start_rp(kctx, reg); } -int kbase_mmu_as_init(struct kbase_device *kbdev, int i) +int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i) { kbdev->as[i].number = i; kbdev->as[i].bf_data.addr = 0ULL; kbdev->as[i].pf_data.addr = 0ULL; - kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i); + kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 1, i); if (!kbdev->as[i].pf_wq) return -ENOMEM; diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c index 8f261d439909..3131d57ef330 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -156,7 +157,7 @@ static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, siz } else if (op == KBASE_MMU_OP_FLUSH_MEM) { flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC; } else { - dev_warn(kbdev->dev, "Invalid flush request (op = %d)\n", op); + dev_warn(kbdev->dev, "Invalid flush request (op = %d)", op); return; } @@ -167,7 +168,7 @@ static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, siz * perform a reset to recover */ dev_err(kbdev->dev, - "Flush for physical address range did not complete. Issuing GPU soft-reset to recover\n"); + "Flush for physical address range did not complete. Issuing GPU soft-reset to recover"); if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); @@ -230,9 +231,8 @@ static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as */ dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); - if (kbase_prepare_to_reset_gpu( - kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) - kbase_reset_gpu(kbdev); + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu_locked(kbdev); } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -326,7 +326,7 @@ static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct * perform a reset to recover. */ dev_err(kbdev->dev, - "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); + "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); @@ -340,15 +340,7 @@ static void kbase_mmu_sync_pgd_gpu(struct kbase_device *kbdev, struct kbase_cont phys_addr_t phys, size_t size, enum kbase_mmu_op_type flush_op) { -#if MALI_USE_CSF - unsigned long irq_flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); - if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) && - kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) - mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); -#endif + kbase_mmu_flush_pa_range(kbdev, kctx, phys, size, flush_op); } static void kbase_mmu_sync_pgd_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size) @@ -398,9 +390,9 @@ static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, struct kbase_context * a 4kB physical page. */ -static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, unsigned long flags, - int group_id, u64 *dirty_pgds); +static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int group_id, u64 *dirty_pgds); /** * kbase_mmu_update_and_free_parent_pgds() - Update number of valid entries and @@ -420,6 +412,65 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, u64 vpfn, int level, enum kbase_mmu_op_type flush_op, u64 *dirty_pgds, struct list_head *free_pgds_list); + +static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) +{ + atomic_sub(1, &kbdev->memdev.used_pages); + + /* If MMU tables belong to a context then pages will have been accounted + * against it, so we must decrement the usage counts here. + */ + if (mmut->kctx) { + kbase_process_page_usage_dec(mmut->kctx, 1); + atomic_sub(1, &mmut->kctx->used_pages); + } + + kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); +} + +static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + struct page *p) +{ + struct kbase_page_metadata *page_md = kbase_page_private(p); + bool page_is_isolated = false; + + lockdep_assert_held(&mmut->mmu_lock); + + if (!kbase_page_migration_enabled) + return false; + + spin_lock(&page_md->migrate_lock); + if (PAGE_STATUS_GET(page_md->status) == PT_MAPPED) { + WARN_ON_ONCE(!mmut->kctx); + if (IS_PAGE_ISOLATED(page_md->status)) { + page_md->status = PAGE_STATUS_SET(page_md->status, + FREE_PT_ISOLATED_IN_PROGRESS); + page_md->data.free_pt_isolated.kbdev = kbdev; + page_is_isolated = true; + } else { + page_md->status = + PAGE_STATUS_SET(page_md->status, FREE_IN_PROGRESS); + } + } else { + WARN_ON_ONCE(mmut->kctx); + WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != NOT_MOVABLE); + } + spin_unlock(&page_md->migrate_lock); + + if (unlikely(page_is_isolated)) { + /* Do the CPU cache flush and accounting here for the isolated + * PGD page, which is done inside kbase_mmu_free_pgd() for the + * PGD page that did not get isolated. + */ + dma_sync_single_for_device(kbdev->dev, kbase_dma_addr(p), PAGE_SIZE, + DMA_BIDIRECTIONAL); + kbase_mmu_account_freed_pgd(kbdev, mmut); + } + + return page_is_isolated; +} + /** * kbase_mmu_free_pgd() - Free memory of the page directory * @@ -441,17 +492,7 @@ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_tabl kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true); - atomic_sub(1, &kbdev->memdev.used_pages); - - /* If MMU tables belong to a context then pages will have been accounted - * against it, so we must decrement the usage counts here. - */ - if (mmut->kctx) { - kbase_process_page_usage_dec(mmut->kctx, 1); - atomic_sub(1, &mmut->kctx->used_pages); - } - - kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); + kbase_mmu_account_freed_pgd(kbdev, mmut); } /** @@ -482,6 +523,20 @@ static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mm mutex_unlock(&mmut->mmu_lock); } +static void kbase_mmu_add_to_free_pgds_list(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + struct page *p, struct list_head *free_pgds_list) +{ + bool page_is_isolated = false; + + lockdep_assert_held(&mmut->mmu_lock); + + page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p); + + if (likely(!page_is_isolated)) + list_add(&p->lru, free_pgds_list); +} + /** * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to * a region on a GPU page fault @@ -509,7 +564,7 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, if (!multiple) { dev_warn( kbdev->dev, - "VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n", + "VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW", ((unsigned long long)reg->start_pfn) << PAGE_SHIFT); return minimum_extra; } @@ -692,8 +747,8 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, } /* Now make this faulting page writable to GPU. */ - kbase_mmu_update_pages_no_flush(kctx, fault_pfn, fault_phys_addr, 1, region->flags, - region->gpu_alloc->group_id, &dirty_pgds); + kbase_mmu_update_pages_no_flush(kbdev, &kctx->mmu, fault_pfn, fault_phys_addr, 1, + region->flags, region->gpu_alloc->group_id, &dirty_pgds); kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1, kctx->id, dirty_pgds); @@ -917,7 +972,7 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, */ dev_warn( kctx->kbdev->dev, - "Page allocation failure of %zu pages: managed %zu pages, mempool (inc linked pools) had %zu pages available\n", + "Page allocation failure of %zu pages: managed %zu pages, mempool (inc linked pools) had %zu pages available", new_pages, total_gpu_pages_alloced + total_cpu_pages_alloced, total_mempools_free_4k); *pages_to_grow = 0; @@ -985,9 +1040,8 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) as_no = faulting_as->number; kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); - dev_dbg(kbdev->dev, - "Entering %s %pK, fault_pfn %lld, as_no %d\n", - __func__, (void *)data, fault_pfn, as_no); + dev_dbg(kbdev->dev, "Entering %s %pK, fault_pfn %lld, as_no %d", __func__, (void *)data, + fault_pfn, as_no); /* Grab the context that was already refcounted in kbase_mmu_interrupt() * Therefore, it cannot be scheduled out of this AS until we explicitly @@ -1010,8 +1064,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) #ifdef CONFIG_MALI_ARBITER_SUPPORT /* check if we still have GPU */ if (unlikely(kbase_is_gpu_removed(kbdev))) { - dev_dbg(kbdev->dev, - "%s: GPU has been removed\n", __func__); + dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__); goto fault_done; } #endif @@ -1206,8 +1259,7 @@ page_fault_retry: /* cap to max vsize */ new_pages = min(new_pages, region->nr_pages - current_backed_size); - dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault\n", - new_pages); + dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault", new_pages); if (new_pages == 0) { struct kbase_mmu_hw_op_param op_param; @@ -1284,11 +1336,10 @@ page_fault_retry: * so the no_flush version of insert_pages is used which allows * us to unlock the MMU as we see fit. */ - err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu, - region->start_pfn + pfn_offset, - &kbase_get_gpu_phy_pages(region)[pfn_offset], - new_pages, region->flags, - region->gpu_alloc->group_id, &dirty_pgds); + err = kbase_mmu_insert_pages_no_flush( + kbdev, &kctx->mmu, region->start_pfn + pfn_offset, + &kbase_get_gpu_phy_pages(region)[pfn_offset], new_pages, region->flags, + region->gpu_alloc->group_id, &dirty_pgds, region, false); if (err) { kbase_free_phy_pages_helper(region->gpu_alloc, new_pages); @@ -1314,16 +1365,11 @@ page_fault_retry: if (region->threshold_pages && kbase_reg_current_backed_size(region) > region->threshold_pages) { - - dev_dbg(kctx->kbdev->dev, - "%zu pages exceeded IR threshold %zu\n", - new_pages + current_backed_size, - region->threshold_pages); + dev_dbg(kctx->kbdev->dev, "%zu pages exceeded IR threshold %zu", + new_pages + current_backed_size, region->threshold_pages); if (kbase_mmu_switch_to_ir(kctx, region) >= 0) { - dev_dbg(kctx->kbdev->dev, - "Get region %pK for IR\n", - (void *)region); + dev_dbg(kctx->kbdev->dev, "Get region %pK for IR", (void *)region); kbase_va_region_alloc_get(kctx, region); } } @@ -1441,7 +1487,7 @@ page_fault_retry: kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Page allocation failure", fault); } else { - dev_dbg(kbdev->dev, "Try again after pool_grow\n"); + dev_dbg(kbdev->dev, "Try again after pool_grow"); goto page_fault_retry; } } @@ -1468,7 +1514,7 @@ fault_done: release_ctx(kbdev, kctx); atomic_dec(&kbdev->faults_pending); - dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK\n", (void *)data); + dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK", (void *)data); } static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, @@ -1532,11 +1578,10 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * u64 *dirty_pgds) { u64 *page; + u64 pgd_vpfn = vpfn; phys_addr_t target_pgd; struct page *p; - KBASE_DEBUG_ASSERT(*pgd); - lockdep_assert_held(&mmut->mmu_lock); /* @@ -1549,7 +1594,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * p = pfn_to_page(PFN_DOWN(*pgd)); page = kmap(p); if (page == NULL) { - dev_warn(kbdev->dev, "%s: kmap failure\n", __func__); + dev_warn(kbdev->dev, "%s: kmap failure", __func__); return -EINVAL; } @@ -1559,8 +1604,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut); if (target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS) { - dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n", - __func__); + dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure", __func__); kunmap(p); return -ENOMEM; } @@ -1585,9 +1629,32 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * * GPU cache is still needed. For explanation, please refer * the comment in kbase_mmu_insert_pages_no_flush(). */ - kbase_mmu_sync_pgd(kbdev, mmut->kctx, *pgd + (vpfn * sizeof(u64)), - kbase_dma_addr(p) + (vpfn * sizeof(u64)), sizeof(u64), - KBASE_MMU_OP_FLUSH_PT); + kbase_mmu_sync_pgd(kbdev, mmut->kctx, + *pgd + (vpfn * sizeof(u64)), + kbase_dma_addr(p) + (vpfn * sizeof(u64)), + sizeof(u64), KBASE_MMU_OP_FLUSH_PT); + + /* Update the new target_pgd page to its stable state */ + if (kbase_page_migration_enabled) { + struct kbase_page_metadata *page_md = + kbase_page_private(phys_to_page(target_pgd)); + + spin_lock(&page_md->migrate_lock); + + WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS || + IS_PAGE_ISOLATED(page_md->status)); + + if (mmut->kctx) { + page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED); + page_md->data.pt_mapped.mmut = mmut; + page_md->data.pt_mapped.pgd_vpfn_level = + PGD_VPFN_LEVEL_SET(pgd_vpfn, level); + } else { + page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE); + } + + spin_unlock(&page_md->migrate_lock); + } } else { target_pgd = kbdev->mmu_mode->pte_to_phy_addr( kbdev->mgm_dev->ops.mgm_pte_to_original_pte( @@ -1618,9 +1685,8 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_tab mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l, newly_created_pgd, dirty_pgds); /* Handle failure condition */ if (err) { - dev_dbg(kbdev->dev, - "%s: mmu_get_next_pgd failure at level %d\n", - __func__, l); + dev_dbg(kbdev->dev, "%s: mmu_get_next_pgd failure at level %d", __func__, + l); return err; } } @@ -1640,7 +1706,8 @@ static int mmu_get_bottom_pgd(struct kbase_device *kbdev, struct kbase_mmu_table static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 from_vpfn, u64 to_vpfn, u64 *dirty_pgds, - struct list_head *free_pgds_list) + struct list_head *free_pgds_list, + struct tagged_addr *phys, bool ignore_page_migration) { u64 vpfn = from_vpfn; struct kbase_mmu_mode const *mmu_mode; @@ -1693,8 +1760,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, pcount = count; break; default: - dev_warn(kbdev->dev, "%sNo support for ATEs at level %d\n", - __func__, level); + dev_warn(kbdev->dev, "%sNo support for ATEs at level %d", __func__, level); goto next; } @@ -1713,7 +1779,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, if (!num_of_valid_entries) { kunmap(p); - list_add(&p->lru, free_pgds_list); + kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, free_pgds_list); kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, KBASE_MMU_OP_NONE, dirty_pgds, @@ -1734,12 +1800,34 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, next: vpfn += count; } + + /* If page migration is enabled: the only way to recover from failure + * is to mark all pages as not movable. It is not predictable what's + * going to happen to these pages at this stage. They might return + * movable once they are returned to a memory pool. + */ + if (kbase_page_migration_enabled && !ignore_page_migration && phys) { + const u64 num_pages = to_vpfn - from_vpfn + 1; + u64 i; + + for (i = 0; i < num_pages; i++) { + struct page *phys_page = as_page(phys[i]); + struct kbase_page_metadata *page_md = kbase_page_private(phys_page); + + if (page_md) { + spin_lock(&page_md->migrate_lock); + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + spin_unlock(&page_md->migrate_lock); + } + } + } } static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, const u64 vpfn, size_t nr, u64 dirty_pgds, - enum kbase_caller_mmu_sync_info mmu_sync_info) + enum kbase_caller_mmu_sync_info mmu_sync_info, + bool insert_pages_failed) { struct kbase_mmu_hw_op_param op_param; int as_nr = 0; @@ -1764,8 +1852,12 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, * * Operations that affect the whole GPU cache shall only be done if it's * impossible to update physical ranges. + * + * On GPUs where flushing by physical address range is supported, + * full cache flush is done when an error occurs during + * insert_pages() to keep the error handling simpler. */ - if (mmu_flush_cache_on_gpu_ctrl(kbdev)) + if (mmu_flush_cache_on_gpu_ctrl(kbdev) && !insert_pages_failed) mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param); else mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param); @@ -1806,6 +1898,20 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, if (nr == 0) return 0; + /* If page migration is enabled, pages involved in multiple GPU mappings + * are always treated as not movable. + */ + if (kbase_page_migration_enabled) { + struct page *phys_page = as_page(phys); + struct kbase_page_metadata *page_md = kbase_page_private(phys_page); + + if (page_md) { + spin_lock(&page_md->migrate_lock); + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + spin_unlock(&page_md->migrate_lock); + } + } + mutex_lock(&kctx->mmu.mmu_lock); while (remain) { @@ -1842,15 +1948,15 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, mutex_lock(&kctx->mmu.mmu_lock); } while (!err); if (err) { - dev_warn(kbdev->dev, "%s: mmu_get_bottom_pgd failure\n", - __func__); + dev_warn(kbdev->dev, "%s: mmu_get_bottom_pgd failure", __func__); if (recover_required) { /* Invalidate the pages we have partially * completed */ mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn, start_vpfn + recover_count, - &dirty_pgds, &free_pgds_list); + &dirty_pgds, &free_pgds_list, + NULL, true); } goto fail_unlock; } @@ -1858,14 +1964,15 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { - dev_warn(kbdev->dev, "%s: kmap failure\n", __func__); + dev_warn(kbdev->dev, "%s: kmap failure", __func__); if (recover_required) { /* Invalidate the pages we have partially * completed */ mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn, start_vpfn + recover_count, - &dirty_pgds, &free_pgds_list); + &dirty_pgds, &free_pgds_list, + NULL, true); } err = -ENOMEM; goto fail_unlock; @@ -1917,7 +2024,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, mutex_unlock(&kctx->mmu.mmu_lock); mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds, - mmu_sync_info); + mmu_sync_info, false); return 0; @@ -1925,12 +2032,91 @@ fail_unlock: mutex_unlock(&kctx->mmu.mmu_lock); mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds, - mmu_sync_info); + mmu_sync_info, true); kbase_mmu_free_pgds_list(kbdev, &kctx->mmu, &free_pgds_list); return err; } +static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys, + struct kbase_va_region *reg, + struct kbase_mmu_table *mmut, const u64 vpfn) +{ + struct page *phys_page = as_page(phys); + struct kbase_page_metadata *page_md = kbase_page_private(phys_page); + + spin_lock(&page_md->migrate_lock); + + /* If no GPU va region is given: the metadata provided are + * invalid. + * + * If the page is already allocated and mapped: this is + * an additional GPU mapping, probably to create a memory + * alias, which means it is no longer possible to migrate + * the page easily because tracking all the GPU mappings + * would be too costly. + * + * In any case: the page becomes not movable. It is kept + * alive, but attempts to migrate it will fail. The page + * will be freed if it is still not movable when it returns + * to a memory pool. Notice that the movable flag is not + * cleared because that would require taking the page lock. + */ + if (!reg || PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATED_MAPPED) { + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + } else if (PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATE_IN_PROGRESS) { + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATED_MAPPED); + page_md->data.mapped.reg = reg; + page_md->data.mapped.mmut = mmut; + page_md->data.mapped.vpfn = vpfn; + } + + spin_unlock(&page_md->migrate_lock); +} + +static void kbase_mmu_progress_migration_on_teardown(struct kbase_device *kbdev, + struct tagged_addr *phys, size_t requested_nr) +{ + size_t i; + + for (i = 0; i < requested_nr; i++) { + struct page *phys_page = as_page(phys[i]); + struct kbase_page_metadata *page_md = kbase_page_private(phys_page); + + /* Skip the 4KB page that is part of a large page, as the large page is + * excluded from the migration process. + */ + if (is_huge(phys[i]) || is_partial(phys[i])) + continue; + + if (page_md) { + u8 status; + + spin_lock(&page_md->migrate_lock); + status = PAGE_STATUS_GET(page_md->status); + + if (status == ALLOCATED_MAPPED) { + if (IS_PAGE_ISOLATED(page_md->status)) { + page_md->status = PAGE_STATUS_SET( + page_md->status, (u8)FREE_ISOLATED_IN_PROGRESS); + page_md->data.free_isolated.kbdev = kbdev; + /* At this point, we still have a reference + * to the page via its page migration metadata, + * and any page with the FREE_ISOLATED_IN_PROGRESS + * status will subsequently be freed in either + * kbase_page_migrate() or kbase_page_putback() + */ + phys[i] = as_tagged(0); + } else + page_md->status = PAGE_STATUS_SET(page_md->status, + (u8)FREE_IN_PROGRESS); + } + + spin_unlock(&page_md->migrate_lock); + } + } +} + u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, struct tagged_addr const phy, unsigned long const flags, int const level, int const group_id) @@ -1944,7 +2130,8 @@ u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, const u64 start_vpfn, struct tagged_addr *phys, size_t nr, - unsigned long flags, int const group_id, u64 *dirty_pgds) + unsigned long flags, int const group_id, u64 *dirty_pgds, + struct kbase_va_region *reg, bool ignore_page_migration) { phys_addr_t pgd; u64 *pgd_page; @@ -2006,14 +2193,15 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu } while (!err); if (err) { - dev_warn(kbdev->dev, "%s: mmu_get_pgd_at_level failure\n", __func__); + dev_warn(kbdev->dev, "%s: mmu_get_pgd_at_level failure", __func__); if (insert_vpfn != start_vpfn) { /* Invalidate the pages we have partially * completed */ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, dirty_pgds, - &free_pgds_list); + &free_pgds_list, phys, + ignore_page_migration); } goto fail_unlock; } @@ -2021,15 +2209,15 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { - dev_warn(kbdev->dev, "%s: kmap failure\n", - __func__); + dev_warn(kbdev->dev, "%s: kmap failure", __func__); if (insert_vpfn != start_vpfn) { /* Invalidate the pages we have partially * completed */ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, dirty_pgds, - &free_pgds_list); + &free_pgds_list, phys, + ignore_page_migration); } err = -ENOMEM; goto fail_unlock; @@ -2060,6 +2248,14 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu *target = kbase_mmu_create_ate(kbdev, phys[i], flags, cur_level, group_id); + + /* If page migration is enabled, this is the right time + * to update the status of the page. + */ + if (kbase_page_migration_enabled && !ignore_page_migration && + !is_huge(phys[i]) && !is_partial(phys[i])) + kbase_mmu_progress_migration_on_insert(phys[i], reg, mmut, + insert_vpfn + i); } num_of_valid_entries += count; } @@ -2104,8 +2300,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu fail_unlock: mutex_unlock(&mmut->mmu_lock); - mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, *dirty_pgds, - CALLER_MMU_ASYNC); + mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, + dirty_pgds ? *dirty_pgds : 0xF, CALLER_MMU_ASYNC, true); kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list); return err; @@ -2115,11 +2311,10 @@ fail_unlock: * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space * number 'as_nr'. */ -int kbase_mmu_insert_pages(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int as_nr, int const group_id, - enum kbase_caller_mmu_sync_info mmu_sync_info) +int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr, + int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, + struct kbase_va_region *reg, bool ignore_page_migration) { int err; u64 dirty_pgds = 0; @@ -2130,11 +2325,11 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, return 0; err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, - &dirty_pgds); + &dirty_pgds, reg, ignore_page_migration); if (err) return err; - mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info); + mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false); return 0; } @@ -2285,7 +2480,7 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, current_pgd + (index * sizeof(u64)), sizeof(u64), flush_op); - list_add(&p->lru, free_pgds_list); + kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, free_pgds_list); } else { current_valid_entries--; @@ -2361,11 +2556,12 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, * @mmut: Pointer to GPU MMU page table. * @vpfn: Start page frame number of the GPU virtual pages to unmap. * @phys: Array of physical pages currently mapped to the virtual - * pages to unmap, or NULL. This is only used for GPU cache - * maintenance. + * pages to unmap, or NULL. This is used for GPU cache maintenance + * and page migration support. * @nr: Number of pages to unmap. * @as_nr: Address space number, for GPU cache maintenance operations * that happen outside a specific kbase context. + * @ignore_page_migration: Whether page migration metadata should be ignored. * * We actually discard the ATE and free the page table pages if no valid entries * exist in PGD. @@ -2384,10 +2580,11 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, * Return: 0 on success, otherwise an error code. */ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr, int as_nr) + struct tagged_addr *phys, size_t nr, int as_nr, + bool ignore_page_migration) { + const size_t requested_nr = nr; u64 start_vpfn = vpfn; - size_t requested_nr = nr; enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; struct kbase_mmu_mode const *mmu_mode; struct kbase_mmu_hw_op_param op_param; @@ -2478,9 +2675,8 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table switch (level) { case MIDGARD_MMU_LEVEL(0): case MIDGARD_MMU_LEVEL(1): - dev_warn(kbdev->dev, - "%s: No support for ATEs at level %d\n", - __func__, level); + dev_warn(kbdev->dev, "%s: No support for ATEs at level %d", __func__, + level); kunmap(p); goto out; case MIDGARD_MMU_LEVEL(2): @@ -2488,9 +2684,10 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table if (count >= 512) { pcount = 1; } else { - dev_warn(kbdev->dev, - "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n", - __func__, count); + dev_warn( + kbdev->dev, + "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down", + __func__, count); pcount = 0; } break; @@ -2499,9 +2696,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table pcount = count; break; default: - dev_err(kbdev->dev, - "%s: found non-mapped memory, early out\n", - __func__); + dev_err(kbdev->dev, "%s: found non-mapped memory, early out", __func__); vpfn += count; nr -= count; continue; @@ -2530,7 +2725,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table pgd + (index * sizeof(u64)), pcount * sizeof(u64), flush_op); - list_add(&p->lru, &free_pgds_list); + kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, &free_pgds_list); kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, flush_op, &dirty_pgds, @@ -2553,7 +2748,6 @@ next: } err = 0; out: - mutex_unlock(&mmut->mmu_lock); /* Set up MMU operation parameters. See above about MMU cache flush strategy. */ op_param = (struct kbase_mmu_hw_op_param){ .vpfn = start_vpfn, @@ -2566,6 +2760,16 @@ out: }; mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, &op_param); + /* If page migration is enabled: the status of all physical pages involved + * shall be updated, unless they are not movable. Their status shall be + * updated before releasing the lock to protect against concurrent + * requests to migrate the pages, if they have been isolated. + */ + if (kbase_page_migration_enabled && phys && !ignore_page_migration) + kbase_mmu_progress_migration_on_teardown(kbdev, phys, requested_nr); + + mutex_unlock(&mmut->mmu_lock); + kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list); return err; @@ -2574,9 +2778,11 @@ out: KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); /** - * kbase_mmu_update_pages_no_flush() - Update attributes data in GPU page table entries + * kbase_mmu_update_pages_no_flush() - Update phy pages and attributes data in GPU + * page table entries * - * @kctx: Kbase context + * @kbdev: Pointer to kbase device. + * @mmut: The involved MMU table * @vpfn: Virtual PFN (Page Frame Number) of the first page to update * @phys: Pointer to the array of tagged physical addresses of the physical * pages that are pointed to by the page table entries (that need to @@ -2589,26 +2795,22 @@ KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); * @dirty_pgds: Flags to track every level where a PGD has been updated. * * This will update page table entries that already exist on the GPU based on - * the new flags that are passed (the physical pages pointed to by the page - * table entries remain unchanged). It is used as a response to the changes of - * the memory attributes. + * new flags and replace any existing phy pages that are passed (the PGD pages + * remain unchanged). It is used as a response to the changes of phys as well + * as the the memory attributes. * * The caller is responsible for validating the memory attributes. * * Return: 0 if the attributes data in page table entries were updated * successfully, otherwise an error code. */ -static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, unsigned long flags, - int const group_id, u64 *dirty_pgds) +static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int const group_id, u64 *dirty_pgds) { phys_addr_t pgd; u64 *pgd_page; int err; - struct kbase_device *kbdev; - - if (WARN_ON(kctx == NULL)) - return -EINVAL; KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); @@ -2616,9 +2818,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, if (nr == 0) return 0; - mutex_lock(&kctx->mmu.mmu_lock); - - kbdev = kctx->kbdev; + mutex_lock(&mmut->mmu_lock); while (nr) { unsigned int i; @@ -2634,8 +2834,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, if (is_huge(*phys) && (index == index_in_large_page(*phys))) cur_level = MIDGARD_MMU_LEVEL(2); - err = mmu_get_pgd_at_level(kbdev, &kctx->mmu, vpfn, cur_level, &pgd, NULL, - dirty_pgds); + err = mmu_get_pgd_at_level(kbdev, mmut, vpfn, cur_level, &pgd, NULL, dirty_pgds); if (WARN_ON(err)) goto fail_unlock; @@ -2662,7 +2861,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, pgd_page[level_index] = kbase_mmu_create_ate(kbdev, *target_phys, flags, MIDGARD_MMU_LEVEL(2), group_id); - kbase_mmu_sync_pgd(kbdev, kctx, pgd + (level_index * sizeof(u64)), + kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (level_index * sizeof(u64)), kbase_dma_addr(p) + (level_index * sizeof(u64)), sizeof(u64), KBASE_MMU_OP_NONE); } else { @@ -2680,7 +2879,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, /* MMU cache flush strategy is NONE because GPU cache maintenance * will be done by the caller. */ - kbase_mmu_sync_pgd(kbdev, kctx, pgd + (index * sizeof(u64)), + kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), kbase_dma_addr(p) + (index * sizeof(u64)), count * sizeof(u64), KBASE_MMU_OP_NONE); } @@ -2698,60 +2897,446 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, kunmap(p); } - mutex_unlock(&kctx->mmu.mmu_lock); + mutex_unlock(&mmut->mmu_lock); return 0; fail_unlock: - mutex_unlock(&kctx->mmu.mmu_lock); + mutex_unlock(&mmut->mmu_lock); return err; } -int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int const group_id) +static int kbase_mmu_update_pages_common(struct kbase_device *kbdev, struct kbase_context *kctx, + u64 vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int const group_id) { int err; struct kbase_mmu_hw_op_param op_param; u64 dirty_pgds = 0; - + struct kbase_mmu_table *mmut; /* Calls to this function are inherently asynchronous, with respect to * MMU operations. */ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + int as_nr; - err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags, group_id, &dirty_pgds); +#if !MALI_USE_CSF + if (unlikely(kctx == NULL)) + return -EINVAL; + + as_nr = kctx->as_nr; + mmut = &kctx->mmu; +#else + if (kctx) { + mmut = &kctx->mmu; + as_nr = kctx->as_nr; + } else { + mmut = &kbdev->csf.mcu_mmu; + as_nr = MCU_AS_NR; + } +#endif + + err = kbase_mmu_update_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, + &dirty_pgds); op_param = (const struct kbase_mmu_hw_op_param){ .vpfn = vpfn, .nr = nr, .op = KBASE_MMU_OP_FLUSH_MEM, - .kctx_id = kctx->id, + .kctx_id = kctx ? kctx->id : 0xFFFFFFFF, .mmu_sync_info = mmu_sync_info, .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds), }; - if (mmu_flush_cache_on_gpu_ctrl(kctx->kbdev)) - mmu_flush_invalidate_on_gpu_ctrl(kctx->kbdev, kctx, kctx->as_nr, &op_param); + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) + mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, &op_param); else - mmu_flush_invalidate(kctx->kbdev, kctx, kctx->as_nr, &op_param); + mmu_flush_invalidate(kbdev, kctx, as_nr, &op_param); + return err; } -static void mmu_teardown_level(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, phys_addr_t pgd, - int level) +int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys, + size_t nr, unsigned long flags, int const group_id) +{ + if (unlikely(kctx == NULL)) + return -EINVAL; + + return kbase_mmu_update_pages_common(kctx->kbdev, kctx, vpfn, phys, nr, flags, group_id); +} + +#if MALI_USE_CSF +int kbase_mmu_update_csf_mcu_pages(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys, + size_t nr, unsigned long flags, int const group_id) +{ + return kbase_mmu_update_pages_common(kbdev, NULL, vpfn, phys, nr, flags, group_id); +} +#endif /* MALI_USE_CSF */ + +static void mmu_page_migration_transaction_begin(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ON_ONCE(kbdev->mmu_page_migrate_in_progress); + kbdev->mmu_page_migrate_in_progress = true; +} + +static void mmu_page_migration_transaction_end(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + WARN_ON_ONCE(!kbdev->mmu_page_migrate_in_progress); + kbdev->mmu_page_migrate_in_progress = false; + /* Invoke the PM state machine, as the MMU page migration session + * may have deferred a transition in L2 state machine. + */ + kbase_pm_update_state(kbdev); +} + +int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys, + dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level) +{ + struct kbase_page_metadata *page_md = kbase_page_private(as_page(old_phys)); + struct kbase_mmu_hw_op_param op_param; + struct kbase_mmu_table *mmut = (level == MIDGARD_MMU_BOTTOMLEVEL) ? + page_md->data.mapped.mmut : + page_md->data.pt_mapped.mmut; + struct kbase_device *kbdev; + phys_addr_t pgd; + u64 *old_page, *new_page, *pgd_page, *target, vpfn; + int index, check_state, ret = 0; + unsigned long hwaccess_flags = 0; + unsigned int num_of_valid_entries; + u8 vmap_count = 0; + + /* Due to the hard binding of mmu_command_instr with kctx_id via kbase_mmu_hw_op_param, + * here we skip the no kctx case, which is only used with MCU's mmut. + */ + if (!mmut->kctx) + return -EINVAL; + + if (level > MIDGARD_MMU_BOTTOMLEVEL) + return -EINVAL; + else if (level == MIDGARD_MMU_BOTTOMLEVEL) + vpfn = page_md->data.mapped.vpfn; + else + vpfn = PGD_VPFN_LEVEL_GET_VPFN(page_md->data.pt_mapped.pgd_vpfn_level); + + kbdev = mmut->kctx->kbdev; + index = (vpfn >> ((3 - level) * 9)) & 0x1FF; + + /* Create all mappings before copying content. + * This is done as early as possible because is the only operation that may + * fail. It is possible to do this before taking any locks because the + * pages to migrate are not going to change and even the parent PGD is not + * going to be affected by any other concurrent operation, since the page + * has been isolated before migration and therefore it cannot disappear in + * the middle of this function. + */ + old_page = kmap(as_page(old_phys)); + if (!old_page) { + dev_warn(kbdev->dev, "%s: kmap failure for old page.", __func__); + ret = -EINVAL; + goto old_page_map_error; + } + + new_page = kmap(as_page(new_phys)); + if (!new_page) { + dev_warn(kbdev->dev, "%s: kmap failure for new page.", __func__); + ret = -EINVAL; + goto new_page_map_error; + } + + /* GPU cache maintenance affects both memory content and page table, + * but at two different stages. A single virtual memory page is affected + * by the migration. + * + * Notice that the MMU maintenance is done in the following steps: + * + * 1) The MMU region is locked without performing any other operation. + * This lock must cover the entire migration process, in order to + * prevent any GPU access to the virtual page whose physical page + * is being migrated. + * 2) Immediately after locking: the MMU region content is flushed via + * GPU control while the lock is taken and without unlocking. + * The region must stay locked for the duration of the whole page + * migration procedure. + * This is necessary to make sure that pending writes to the old page + * are finalized before copying content to the new page. + * 3) Before unlocking: changes to the page table are flushed. + * Finer-grained GPU control operations are used if possible, otherwise + * the whole GPU cache shall be flushed again. + * This is necessary to make sure that the GPU accesses the new page + * after migration. + * 4) The MMU region is unlocked. + */ +#define PGD_VPFN_MASK(level) (~((((u64)1) << ((3 - level) * 9)) - 1)) + op_param.mmu_sync_info = CALLER_MMU_ASYNC; + op_param.kctx_id = mmut->kctx->id; + op_param.vpfn = vpfn & PGD_VPFN_MASK(level); + op_param.nr = 1 << ((3 - level) * 9); + op_param.op = KBASE_MMU_OP_FLUSH_PT; + /* When level is not MIDGARD_MMU_BOTTOMLEVEL, it is assumed PGD page migration */ + op_param.flush_skip_levels = (level == MIDGARD_MMU_BOTTOMLEVEL) ? + pgd_level_to_skip_flush(1ULL << level) : + pgd_level_to_skip_flush(3ULL << level); + + mutex_lock(&mmut->mmu_lock); + + /* The state was evaluated before entering this function, but it could + * have changed before the mmu_lock was taken. However, the state + * transitions which are possible at this point are only two, and in both + * cases it is a stable state progressing to a "free in progress" state. + * + * After taking the mmu_lock the state can no longer change: read it again + * and make sure that it hasn't changed before continuing. + */ + spin_lock(&page_md->migrate_lock); + check_state = PAGE_STATUS_GET(page_md->status); + if (level == MIDGARD_MMU_BOTTOMLEVEL) + vmap_count = page_md->vmap_count; + spin_unlock(&page_md->migrate_lock); + + if (level == MIDGARD_MMU_BOTTOMLEVEL) { + if (check_state != ALLOCATED_MAPPED) { + dev_dbg(kbdev->dev, + "%s: state changed to %d (was %d), abort page migration", __func__, + check_state, ALLOCATED_MAPPED); + ret = -EAGAIN; + goto page_state_change_out; + } else if (vmap_count > 0) { + dev_dbg(kbdev->dev, "%s: page was multi-mapped, abort page migration", + __func__); + ret = -EAGAIN; + goto page_state_change_out; + } + } else { + if (check_state != PT_MAPPED) { + dev_dbg(kbdev->dev, + "%s: state changed to %d (was %d), abort PGD page migration", + __func__, check_state, PT_MAPPED); + WARN_ON_ONCE(check_state != FREE_PT_ISOLATED_IN_PROGRESS); + ret = -EAGAIN; + goto page_state_change_out; + } + } + + ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd, NULL, NULL); + if (ret) { + dev_warn(kbdev->dev, "%s: failed to find PGD for old page.", __func__); + goto get_pgd_at_level_error; + } + + pgd_page = kmap(phys_to_page(pgd)); + if (!pgd_page) { + dev_warn(kbdev->dev, "%s: kmap failure for PGD page.", __func__); + ret = -EINVAL; + goto pgd_page_map_error; + } + + mutex_lock(&kbdev->pm.lock); + mutex_lock(&kbdev->mmu_hw_mutex); + + /* Lock MMU region and flush GPU cache by using GPU control, + * in order to keep MMU region locked. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); + if (unlikely(!kbase_pm_l2_allow_mmu_page_migration(kbdev))) { + /* Defer the migration as L2 is in a transitional phase */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + mutex_unlock(&kbdev->pm.lock); + dev_dbg(kbdev->dev, "%s: L2 in transtion, abort PGD page migration", __func__); + ret = -EAGAIN; + goto l2_state_defer_out; + } + /* Prevent transitional phases in L2 by starting the transaction */ + mmu_page_migration_transaction_begin(kbdev); + if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) { + int as_nr = mmut->kctx->as_nr; + struct kbase_as *as = &kbdev->as[as_nr]; + + ret = kbase_mmu_hw_do_lock(kbdev, as, &op_param); + if (!ret) { + ret = kbase_gpu_cache_flush_and_busy_wait( + kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC); + } + if (ret) + mmu_page_migration_transaction_end(kbdev); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); + + if (ret < 0) { + mutex_unlock(&kbdev->mmu_hw_mutex); + mutex_unlock(&kbdev->pm.lock); + dev_err(kbdev->dev, + "%s: failed to lock MMU region or flush GPU cache. Issuing GPU soft-reset to recover.", + __func__); + goto gpu_reset; + } + + /* Copy memory content. + * + * It is necessary to claim the ownership of the DMA buffer for the old + * page before performing the copy, to make sure of reading a consistent + * version of its content, before copying. After the copy, ownership of + * the DMA buffer for the new page is given to the GPU in order to make + * the content visible to potential GPU access that may happen as soon as + * this function releases the lock on the MMU region. + */ + dma_sync_single_for_cpu(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + memcpy(new_page, old_page, PAGE_SIZE); + dma_sync_single_for_device(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + + /* Remap GPU virtual page. + * + * This code rests on the assumption that page migration is only enabled + * for 4 kB pages, that necessarily live in the bottom level of the MMU + * page table. For this reason, the PGD level tells us inequivocably + * whether the page being migrated is a "content page" or another PGD + * of the page table: + * + * - Bottom level implies ATE (Address Translation Entry) + * - Any other level implies PTE (Page Table Entry) + * + * The current implementation doesn't handle the case of a level 0 PGD, + * that is: the root PGD of the page table. + */ + target = &pgd_page[index]; + + /* Certain entries of a page table page encode the count of valid entries + * present in that page. So need to save & restore the count information + * when updating the PTE/ATE to point to the new page. + */ + num_of_valid_entries = kbdev->mmu_mode->get_num_valid_entries(pgd_page); + + if (level == MIDGARD_MMU_BOTTOMLEVEL) { + WARN_ON_ONCE((*target & 1UL) == 0); + *target = + kbase_mmu_create_ate(kbdev, new_phys, page_md->data.mapped.reg->flags, + level, page_md->data.mapped.reg->gpu_alloc->group_id); + } else { + u64 managed_pte; + +#ifdef CONFIG_MALI_BIFROST_DEBUG + /* The PTE should be pointing to the page being migrated */ + WARN_ON_ONCE(as_phys_addr_t(old_phys) != kbdev->mmu_mode->pte_to_phy_addr( + kbdev->mgm_dev->ops.mgm_pte_to_original_pte( + kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, pgd_page[index]))); +#endif + kbdev->mmu_mode->entry_set_pte(&managed_pte, as_phys_addr_t(new_phys)); + *target = kbdev->mgm_dev->ops.mgm_update_gpu_pte( + kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte); + } + + kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries); + + /* This function always updates a single entry inside an existing PGD, + * therefore cache maintenance is necessary and affects a single entry. + */ + kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), + kbase_dma_addr(phys_to_page(pgd)) + (index * sizeof(u64)), sizeof(u64), + KBASE_MMU_OP_FLUSH_PT); + + /* Unlock MMU region. + * + * Notice that GPUs which don't issue flush commands via GPU control + * still need an additional GPU cache flush here, this time only + * for the page table, because the function call above to sync PGDs + * won't have any effect on them. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); + if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) { + int as_nr = mmut->kctx->as_nr; + struct kbase_as *as = &kbdev->as[as_nr]; + + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { + ret = kbase_mmu_hw_do_unlock(kbdev, as, &op_param); + } else { + ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, + GPU_COMMAND_CACHE_CLN_INV_L2); + if (!ret) + ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, &op_param); + } + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); + /* Releasing locks before checking the migration transaction error state */ + mutex_unlock(&kbdev->mmu_hw_mutex); + mutex_unlock(&kbdev->pm.lock); + + spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); + /* Release the transition prevention in L2 by ending the transaction */ + mmu_page_migration_transaction_end(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); + + /* Checking the final migration transaction error state */ + if (ret < 0) { + dev_err(kbdev->dev, "%s: failed to unlock MMU region.", __func__); + goto gpu_reset; + } + + /* Undertaking metadata transfer, while we are holding the mmu_lock */ + spin_lock(&page_md->migrate_lock); + if (level == MIDGARD_MMU_BOTTOMLEVEL) { + size_t page_array_index = + page_md->data.mapped.vpfn - page_md->data.mapped.reg->start_pfn; + + WARN_ON(PAGE_STATUS_GET(page_md->status) != ALLOCATED_MAPPED); + + /* Replace page in array of pages of the physical allocation. */ + page_md->data.mapped.reg->gpu_alloc->pages[page_array_index] = new_phys; + } + /* Update the new page dma_addr with the transferred metadata from the old_page */ + page_md->dma_addr = new_dma_addr; + page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); + spin_unlock(&page_md->migrate_lock); + set_page_private(as_page(new_phys), (unsigned long)page_md); + /* Old page metatdata pointer cleared as it now owned by the new page */ + set_page_private(as_page(old_phys), 0); + +l2_state_defer_out: + kunmap(phys_to_page(pgd)); +pgd_page_map_error: +get_pgd_at_level_error: +page_state_change_out: + mutex_unlock(&mmut->mmu_lock); + + kunmap(as_page(new_phys)); +new_page_map_error: + kunmap(as_page(old_phys)); +old_page_map_error: + return ret; + +gpu_reset: + /* Unlock the MMU table before resetting the GPU and undo + * mappings. + */ + mutex_unlock(&mmut->mmu_lock); + kunmap(phys_to_page(pgd)); + kunmap(as_page(new_phys)); + kunmap(as_page(old_phys)); + + /* Reset the GPU because of an unrecoverable error in locking or flushing. */ + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); + + return ret; +} + +static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + phys_addr_t pgd, unsigned int level) { u64 *pgd_page; int i; struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev; struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode; u64 *pgd_page_buffer = NULL; + bool page_is_isolated = false; + struct page *p = phys_to_page(pgd); lockdep_assert_held(&mmut->mmu_lock); - pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); + pgd_page = kmap_atomic(p); /* kmap_atomic should NEVER fail. */ - if (WARN_ON(pgd_page == NULL)) + if (WARN_ON_ONCE(pgd_page == NULL)) return; if (level < MIDGARD_MMU_BOTTOMLEVEL) { /* Copy the page to our preallocated buffer so that we can minimize @@ -2761,6 +3346,12 @@ static void mmu_teardown_level(struct kbase_device *kbdev, memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); } + /* When page migration is enabled, kbase_region_tracker_term() would ensure + * there are no pages left mapped on the GPU for a context. Hence the count + * of valid entries is expected to be zero here. + */ + if (kbase_page_migration_enabled && mmut->kctx) + WARN_ON_ONCE(kbdev->mmu_mode->get_num_valid_entries(pgd_page)); /* Invalidate page after copying */ mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES); kunmap_atomic(pgd_page); @@ -2779,7 +3370,12 @@ static void mmu_teardown_level(struct kbase_device *kbdev, } } - kbase_mmu_free_pgd(kbdev, mmut, pgd); + /* Top level PGD page is excluded from migration process. */ + if (level != MIDGARD_MMU_TOPLEVEL) + page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p); + + if (likely(!page_is_isolated)) + kbase_mmu_free_pgd(kbdev, mmut, pgd); } int kbase_mmu_init(struct kbase_device *const kbdev, @@ -2836,6 +3432,10 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { int level; + WARN((mmut->kctx) && (mmut->kctx->as_nr != KBASEP_AS_NR_INVALID), + "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables", + mmut->kctx->tgid, mmut->kctx->id); + if (mmut->pgd != KBASE_MMU_INVALID_PGD_ADDRESS) { mutex_lock(&mmut->mmu_lock); mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL); @@ -2855,11 +3455,26 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) mutex_destroy(&mmut->mmu_lock); } -void kbase_mmu_as_term(struct kbase_device *kbdev, int i) +void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i) { destroy_workqueue(kbdev->as[i].pf_wq); } +void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx, + phys_addr_t phys, size_t size, + enum kbase_mmu_op_type flush_op) +{ +#if MALI_USE_CSF + unsigned long irq_flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) && + kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) + mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); +#endif +} + #ifdef CONFIG_MALI_VECTOR_DUMP static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left) @@ -2881,7 +3496,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd))); if (!pgd_page) { - dev_warn(kbdev->dev, "%s: kmap failure\n", __func__); + dev_warn(kbdev->dev, "%s: kmap failure", __func__); return 0; } @@ -3035,8 +3650,7 @@ void kbase_mmu_bus_fault_worker(struct work_struct *data) #ifdef CONFIG_MALI_ARBITER_SUPPORT /* check if we still have GPU */ if (unlikely(kbase_is_gpu_removed(kbdev))) { - dev_dbg(kbdev->dev, - "%s: GPU has been removed\n", __func__); + dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__); release_ctx(kbdev, kctx); atomic_dec(&kbdev->faults_pending); return; diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h index 848570f2a6dd..247a67c50da8 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h @@ -29,6 +29,7 @@ struct kbase_context; struct kbase_mmu_table; +struct kbase_va_region; /** * enum kbase_caller_mmu_sync_info - MMU-synchronous caller info. @@ -49,6 +50,26 @@ enum kbase_caller_mmu_sync_info { CALLER_MMU_ASYNC }; +/** + * enum kbase_mmu_op_type - enum for MMU operations + * @KBASE_MMU_OP_NONE: To help catch uninitialized struct + * @KBASE_MMU_OP_FIRST: The lower boundary of enum + * @KBASE_MMU_OP_LOCK: Lock memory region + * @KBASE_MMU_OP_UNLOCK: Unlock memory region + * @KBASE_MMU_OP_FLUSH_PT: Flush page table (CLN+INV L2 only) + * @KBASE_MMU_OP_FLUSH_MEM: Flush memory (CLN+INV L2+LSC) + * @KBASE_MMU_OP_COUNT: The upper boundary of enum + */ +enum kbase_mmu_op_type { + KBASE_MMU_OP_NONE = 0, /* Must be zero */ + KBASE_MMU_OP_FIRST, /* Must be the first non-zero op */ + KBASE_MMU_OP_LOCK = KBASE_MMU_OP_FIRST, + KBASE_MMU_OP_UNLOCK, + KBASE_MMU_OP_FLUSH_PT, + KBASE_MMU_OP_FLUSH_MEM, + KBASE_MMU_OP_COUNT /* Must be the last in enum */ +}; + /** * kbase_mmu_as_init() - Initialising GPU address space object. * @@ -60,7 +81,7 @@ enum kbase_caller_mmu_sync_info { * * Return: 0 on success and non-zero value on failure. */ -int kbase_mmu_as_init(struct kbase_device *kbdev, int i); +int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i); /** * kbase_mmu_as_term() - Terminate address space object. @@ -71,7 +92,7 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, int i); * This is called upon device termination to destroy * the address space object of the device. */ -void kbase_mmu_as_term(struct kbase_device *kbdev, int i); +void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i); /** * kbase_mmu_init - Initialise an object representing GPU page tables @@ -132,22 +153,92 @@ u64 kbase_mmu_create_ate(struct kbase_device *kbdev, int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, const u64 start_vpfn, struct tagged_addr *phys, size_t nr, - unsigned long flags, int group_id, u64 *dirty_pgds); -int kbase_mmu_insert_pages(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int as_nr, int group_id, - enum kbase_caller_mmu_sync_info mmu_sync_info); + unsigned long flags, int group_id, u64 *dirty_pgds, + struct kbase_va_region *reg, bool ignore_page_migration); +int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr, + int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, + struct kbase_va_region *reg, bool ignore_page_migration); int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, struct tagged_addr phys, size_t nr, unsigned long flags, int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info); int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr, int as_nr); + struct tagged_addr *phys, size_t nr, int as_nr, + bool ignore_page_migration); int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int const group_id); +#if MALI_USE_CSF +/** + * kbase_mmu_update_csf_mcu_pages - Update MCU mappings with changes of phys and flags + * + * @kbdev: Pointer to kbase device. + * @vpfn: Virtual PFN (Page Frame Number) of the first page to update + * @phys: Pointer to the array of tagged physical addresses of the physical + * pages that are pointed to by the page table entries (that need to + * be updated). + * @nr: Number of pages to update + * @flags: Flags + * @group_id: The physical memory group in which the page was allocated. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * + * Return: 0 on success, otherwise an error code. + */ +int kbase_mmu_update_csf_mcu_pages(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys, + size_t nr, unsigned long flags, int const group_id); +#endif + +/** + * kbase_mmu_migrate_page - Migrate GPU mappings and content between memory pages + * + * @old_phys: Old physical page to be replaced. + * @new_phys: New physical page used to replace old physical page. + * @old_dma_addr: DMA address of the old page. + * @new_dma_addr: DMA address of the new page. + * @level: MMU page table level of the provided PGD. + * + * The page migration process is made of 2 big steps: + * + * 1) Copy the content of the old page to the new page. + * 2) Remap the virtual page, that is: replace either the ATE (if the old page + * was a regular page) or the PTE (if the old page was used as a PGD) in the + * MMU page table with the new page. + * + * During the process, the MMU region is locked to prevent GPU access to the + * virtual memory page that is being remapped. + * + * Before copying the content of the old page to the new page and while the + * MMU region is locked, a GPU cache flush is performed to make sure that + * pending GPU writes are finalized to the old page before copying. + * That is necessary because otherwise there's a risk that GPU writes might + * be finalized to the old page, and not new page, after migration. + * The MMU region is unlocked only at the end of the migration operation. + * + * Return: 0 on success, otherwise an error code. + */ +int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys, + dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level); + +/** + * kbase_mmu_flush_pa_range() - Flush physical address range from the GPU caches + * + * @kbdev: Instance of GPU platform device, allocated from the probe method. + * @kctx: Pointer to kbase context, it can be NULL if the physical address + * range is not associated with User created context. + * @phys: Starting address of the physical range to start the operation on. + * @size: Number of bytes to work on. + * @flush_op: Type of cache flush operation to perform. + * + * Issue a cache flush physical range command. This function won't perform any + * flush if the GPU doesn't support FLUSH_PA_RANGE command. The flush would be + * performed only if the context has a JASID assigned to it. + * This function is basically a wrapper for kbase_gpu_cache_flush_pa_range_and_busy_wait(). + */ +void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx, + phys_addr_t phys, size_t size, + enum kbase_mmu_op_type flush_op); /** * kbase_mmu_bus_fault_interrupt - Process a bus fault interrupt. diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h index 09b3fa809bea..50d2ea5d07c8 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h @@ -54,26 +54,6 @@ enum kbase_mmu_fault_type { KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED }; -/** - * enum kbase_mmu_op_type - enum for MMU operations - * @KBASE_MMU_OP_NONE: To help catch uninitialized struct - * @KBASE_MMU_OP_FIRST: The lower boundary of enum - * @KBASE_MMU_OP_LOCK: Lock memory region - * @KBASE_MMU_OP_UNLOCK: Unlock memory region - * @KBASE_MMU_OP_FLUSH_PT: Flush page table (CLN+INV L2 only) - * @KBASE_MMU_OP_FLUSH_MEM: Flush memory (CLN+INV L2+LSC) - * @KBASE_MMU_OP_COUNT: The upper boundary of enum - */ -enum kbase_mmu_op_type { - KBASE_MMU_OP_NONE = 0, /* Must be zero */ - KBASE_MMU_OP_FIRST, /* Must be the first non-zero op */ - KBASE_MMU_OP_LOCK = KBASE_MMU_OP_FIRST, - KBASE_MMU_OP_UNLOCK, - KBASE_MMU_OP_FLUSH_PT, - KBASE_MMU_OP_FLUSH_MEM, - KBASE_MMU_OP_COUNT /* Must be the last in enum */ -}; - /** * struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_* functions * @vpfn: MMU Virtual Page Frame Number to start the operation on. @@ -104,6 +84,22 @@ struct kbase_mmu_hw_op_param { void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as); +/** + * kbase_mmu_hw_do_lock - Issue LOCK command to the MMU and program + * the LOCKADDR register. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @op_param: Pointer to struct containing information about the MMU + * operation to perform. + * + * hwaccess_lock needs to be held when calling this function. + * + * Return: 0 if issuing the command was successful, otherwise an error code. + */ +int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param); + /** * kbase_mmu_hw_do_unlock_no_addr - Issue UNLOCK command to the MMU without * programming the LOCKADDR register and wait @@ -114,6 +110,9 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, * @op_param: Pointer to struct containing information about the MMU * operation to perform. * + * This function should be called for GPU where GPU command is used to flush + * the cache(s) instead of MMU command. + * * Return: 0 if issuing the command was successful, otherwise an error code. */ int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as, @@ -145,7 +144,7 @@ int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as, * GPUs where MMU command to flush the cache(s) is deprecated. * mmu_hw_mutex needs to be held when calling this function. * - * Return: Zero if the operation was successful, non-zero otherwise. + * Return: 0 if the operation was successful, non-zero otherwise. */ int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param); @@ -164,7 +163,7 @@ int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, * Both mmu_hw_mutex and hwaccess_lock need to be held when calling this * function. * - * Return: Zero if the operation was successful, non-zero otherwise. + * Return: 0 if the operation was successful, non-zero otherwise. */ int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param); @@ -181,7 +180,7 @@ int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as * specified inside @op_param. GPU command is used to flush the cache(s) * instead of the MMU command. * - * Return: Zero if the operation was successful, non-zero otherwise. + * Return: 0 if the operation was successful, non-zero otherwise. */ int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param); diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c index cc764b483f05..858d4bf6edcd 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c @@ -424,6 +424,14 @@ static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, return ret; } +int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + return mmu_hw_do_lock(kbdev, as, op_param); +} + int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param) { diff --git a/drivers/gpu/arm/bifrost/tests/Mconfig b/drivers/gpu/arm/bifrost/tests/Mconfig deleted file mode 100644 index 67b38a28cf96..000000000000 --- a/drivers/gpu/arm/bifrost/tests/Mconfig +++ /dev/null @@ -1,73 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -# -# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU license. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# - -menuconfig MALI_KUTF - bool "Build Mali Kernel Unit Test Framework modules" - depends on MALI_BIFROST && MALI_BIFROST_DEBUG - default y if BACKEND_KERNEL && MALI_BIFROST_DEBUG - help - This option will build the Mali testing framework modules. - - Modules: - - kutf.ko - - kutf_test.ko - -config MALI_KUTF_IRQ_TEST - bool "Build Mali KUTF IRQ test module" - depends on MALI_KUTF - default y - help - This option will build the IRQ latency measurement test module. - It can determine the latency of the Mali GPU IRQ on your system. - - Modules: - - mali_kutf_irq_test.ko - -config MALI_KUTF_CLK_RATE_TRACE - bool "Build Mali KUTF Clock rate trace test module" - depends on MALI_KUTF - default y - help - This option will build the clock rate trace portal test module. - It can test the clocks integration into the platform and exercise some - basic trace test in the system. - - Modules: - - mali_kutf_clk_rate_trace_test_portal.ko - -config MALI_KUTF_MGM_INTEGRATION_TEST - bool "Build Mali KUTF MGM integration test module" - depends on MALI_KUTF - default y - help - This option will build the MGM integration test module. - It can test the implementation of PTE translation for specific - group ids. - - Modules: - - mali_kutf_mgm_integration_test.ko - - -# Enable MALI_BIFROST_DEBUG for KUTF modules support - -config UNIT_TEST_KERNEL_MODULES - bool - default y if UNIT_TEST_CODE && BACKEND_KERNEL - default n diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h index c4c713c838cf..3f68efa4257d 100644 --- a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h +++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,6 +31,7 @@ */ #include +#include /** * kutf_helper_pending_input() - Check any pending lines sent by user space @@ -81,4 +82,28 @@ int kutf_helper_input_enqueue(struct kutf_context *context, */ void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context); +/** + * kutf_helper_ignore_dmesg() - Write message in dmesg to instruct parser + * to ignore errors, until the counterpart + * is written to dmesg to stop ignoring errors. + * @dev: Device pointer to write to dmesg using. + * + * This function writes "Start ignoring dmesg warnings" to dmesg, which + * the parser will read and not log any errors. Only to be used in cases where + * we expect an error to be produced in dmesg but that we do not want to be + * flagged as an error. + */ +void kutf_helper_ignore_dmesg(struct device *dev); + +/** + * kutf_helper_stop_ignoring_dmesg() - Write message in dmesg to instruct parser + * to stop ignoring errors. + * @dev: Device pointer to write to dmesg using. + * + * This function writes "Stop ignoring dmesg warnings" to dmesg, which + * the parser will read and continue to log any errors. Counterpart to + * kutf_helper_ignore_dmesg(). + */ +void kutf_helper_stop_ignoring_dmesg(struct device *dev); + #endif /* _KERNEL_UTF_HELPERS_H_ */ diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c index d207d1c6e04f..42736195e071 100644 --- a/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c +++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -127,3 +127,15 @@ void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context) { kutf_helper_input_enqueue(context, NULL, 0); } + +void kutf_helper_ignore_dmesg(struct device *dev) +{ + dev_info(dev, "KUTF: Start ignoring dmesg warnings\n"); +} +EXPORT_SYMBOL(kutf_helper_ignore_dmesg); + +void kutf_helper_stop_ignoring_dmesg(struct device *dev) +{ + dev_info(dev, "KUTF: Stop ignoring dmesg warnings\n"); +} +EXPORT_SYMBOL(kutf_helper_stop_ignoring_dmesg); diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c index 644d69bc209d..359d06371aff 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c @@ -30,6 +30,11 @@ #include #include +/* Explicitly include epoll header for old kernels. Not required from 4.16. */ +#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE +#include +#endif + /* The timeline stream file operations functions. */ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, size_t size, loff_t *f_pos); diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h index 6660cf5bc276..c1428495b11c 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,17 +27,13 @@ #include /* The maximum size of a single packet used by timeline. */ -#define PACKET_SIZE 4096 /* bytes */ +#define PACKET_SIZE 4096 /* bytes */ /* The number of packets used by one timeline stream. */ -#if defined(CONFIG_MALI_JOB_DUMP) || defined(CONFIG_MALI_VECTOR_DUMP) - #define PACKET_COUNT 64 -#else - #define PACKET_COUNT 32 -#endif +#define PACKET_COUNT 128 /* The maximum expected length of string in tracepoint descriptor. */ -#define STRLEN_MAX 64 /* bytes */ +#define STRLEN_MAX 64 /* bytes */ /** * struct kbase_tlstream - timeline stream structure diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c index fd0d0c01adde..e8a74e9dafa6 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c @@ -84,6 +84,7 @@ enum tl_msg_id_obj { KBASE_TL_ATTRIB_ATOM_PRIORITIZED, KBASE_TL_ATTRIB_ATOM_JIT, KBASE_TL_KBASE_NEW_DEVICE, + KBASE_TL_KBASE_GPUCMDQUEUE_KICK, KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, KBASE_TL_KBASE_DEVICE_HALT_CSG, @@ -352,6 +353,10 @@ enum tl_msg_id_obj { "New KBase Device", \ "@IIIIIII", \ "kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs,kbase_device_as_count,kbase_device_sb_entry_count,kbase_device_has_cross_stream_sync,kbase_device_supports_gpu_sleep") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_GPUCMDQUEUE_KICK, \ + "Kernel receives a request to process new GPU queue instructions", \ + "@IL", \ + "kernel_ctx_id,buffer_gpu_addr") \ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \ "CSG is programmed to a slot", \ "@IIIII", \ @@ -2092,6 +2097,33 @@ void __kbase_tlstream_tl_kbase_new_device( kbase_tlstream_msgbuf_release(stream, acq_flags); } +void __kbase_tlstream_tl_kbase_gpucmdqueue_kick( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u64 buffer_gpu_addr +) +{ + const u32 msg_id = KBASE_TL_KBASE_GPUCMDQUEUE_KICK; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kernel_ctx_id) + + sizeof(buffer_gpu_addr) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &buffer_gpu_addr, sizeof(buffer_gpu_addr)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + void __kbase_tlstream_tl_kbase_device_program_csg( struct kbase_tlstream *stream, u32 kbase_device_id, diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h index be0c62edecd7..c690a75fe22c 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h @@ -396,6 +396,12 @@ void __kbase_tlstream_tl_kbase_new_device( u32 kbase_device_supports_gpu_sleep ); +void __kbase_tlstream_tl_kbase_gpucmdqueue_kick( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u64 buffer_gpu_addr +); + void __kbase_tlstream_tl_kbase_device_program_csg( struct kbase_tlstream *stream, u32 kbase_device_id, @@ -1981,6 +1987,37 @@ struct kbase_tlstream; do { } while (0) #endif /* MALI_USE_CSF */ +/** + * KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK - Kernel receives a request to process new GPU queue instructions + * + * @kbdev: Kbase device + * @kernel_ctx_id: Unique ID for the KBase Context + * @buffer_gpu_addr: Address of the GPU queue's command buffer + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK( \ + kbdev, \ + kernel_ctx_id, \ + buffer_gpu_addr \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_gpucmdqueue_kick( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kernel_ctx_id, \ + buffer_gpu_addr \ + ); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK( \ + kbdev, \ + kernel_ctx_id, \ + buffer_gpu_addr \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ + /** * KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG - CSG is programmed to a slot * diff --git a/drivers/hwtracing/coresight/mali/Kbuild b/drivers/hwtracing/coresight/mali/Kbuild new file mode 100644 index 000000000000..4d7d665f5652 --- /dev/null +++ b/drivers/hwtracing/coresight/mali/Kbuild @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +# make $(src) as absolute path if it is not already, by prefixing $(srctree) +# This is to prevent any build issue due to wrong path. +src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) + +# +# ccflags +# +ccflags-y += \ + -I$(srctree)/include/linux \ + -I$(src) \ + -I$(srctree)/drivers/hwtracing/coresight/ \ + -I$(src)/../../../../include + +subdir-ccflags-y += $(ccflags-y) + +ifneq ($(CONFIG_CORESIGHT), n) +ifneq ($(CONFIG_CORESIGHT), ) + + +ifeq ($(CONFIG_CORESIGHT_MALI_SOURCES_ETM),y) + obj-m += coresight_mali_source_etm.o + coresight_mali_source_etm-y := \ + sources/etm/coresight_mali_source_etm_core.o \ + sources/coresight_mali_sources.o \ + coresight_mali_common.o +endif + +ifeq ($(CONFIG_CORESIGHT_MALI_SOURCES_ITM),y) + obj-m += coresight_mali_source_itm.o + coresight_mali_source_itm-y := \ + sources/itm/coresight_mali_source_itm_core.o \ + sources/coresight_mali_sources.o \ + coresight_mali_common.o +endif + +ifeq ($(CONFIG_CORESIGHT_MALI_SOURCES_ELA),y) + obj-m += coresight_mali_source_ela.o + coresight_mali_source_ela-y := \ + sources/ela/coresight_mali_source_ela_core.o \ + sources/coresight_mali_sources.o \ + coresight_mali_common.o +endif + +endif +endif diff --git a/drivers/hwtracing/coresight/mali/Kconfig b/drivers/hwtracing/coresight/mali/Kconfig new file mode 100644 index 000000000000..283e2b56b641 --- /dev/null +++ b/drivers/hwtracing/coresight/mali/Kconfig @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + + +config CORESIGHT_MALI_SOURCES_ETM + depends on MALI_CORESIGHT && ARM64 + bool "Enable CoreSight Mali Sources ETM module" + default y + help + This option will build coresight ETM source driver, + that is used for configuring, enabling and disabling + the ETM component. + +config CORESIGHT_MALI_SOURCES_ITM + depends on MALI_CORESIGHT + bool "Enable CoreSight Mali Sources ITM module" + default y + help + This option will build coresight ITM source driver, + that is used for configuring, enabling and disabling + the ITM component. + +config CORESIGHT_MALI_SOURCES_ELA + depends on MALI_CORESIGHT + bool "Enable CoreSight Mali Sources ELA module" + default y + help + This option will build coresight ELA source driver, + that is used for configuring, enabling and disabling + the ELA component. diff --git a/drivers/hwtracing/coresight/mali/Makefile b/drivers/hwtracing/coresight/mali/Makefile new file mode 100644 index 000000000000..a6b5622c92ae --- /dev/null +++ b/drivers/hwtracing/coresight/mali/Makefile @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build +KDIR ?= $(KERNEL_SRC) + +ifeq ($(KDIR),) + $(error Must specify KDIR to point to the kernel to target)) +endif + +CONFIG_MALI_CORESIGHT ?= n +ifeq ($(CONFIG_MALI_CORESIGHT),y) + + ifeq ($(CONFIG_ARM64), y) + CONFIG_CORESIGHT_MALI_SOURCES_ETM ?= y + endif + + CONFIG_CORESIGHT_MALI_SOURCES_ITM ?= y + CONFIG_CORESIGHT_MALI_SOURCES_ELA ?= y +endif + +CONFIGS := \ + CONFIG_MALI_CORESIGHT \ + CONFIG_CORESIGHT_MALI_SOURCES_ETM \ + CONFIG_CORESIGHT_MALI_SOURCES_ITM \ + CONFIG_CORESIGHT_MALI_SOURCES_ELA + + +# +# MAKE_ARGS to pass the custom CONFIGs on out-of-tree build +# +# Generate the list of CONFIGs and values. +# $(value config) is the name of the CONFIG option. +# $(value $(value config)) is its value (y, m). +# When the CONFIG is not set to y or m, it defaults to n. +MAKE_ARGS := $(foreach config,$(CONFIGS), \ + $(if $(filter y m,$(value $(value config))), \ + $(value config)=$(value $(value config)), \ + $(value config)=n)) + +# +# EXTRA_CFLAGS to define the custom CONFIGs on out-of-tree build +# +# Generate the list of CONFIGs defines with values from CONFIGS. +# $(value config) is the name of the CONFIG option. +# When set to y or m, the CONFIG gets defined to 1. +EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \ + $(if $(filter y m,$(value $(value config))), \ + -D$(value config)=1)) + +# +# KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions +# +EXTRA_SYMBOLS += $(CURDIR)/../../../../../drivers/gpu/arm/midgard/Module.symvers +EXTRA_SYMBOLS += $(CURDIR)/../../../../../drivers/hwtracing/coresight/mali/Module.symvers + +# The following were added to align with W=1 in scripts/Makefile.extrawarn +# from the Linux source tree +KBUILD_CFLAGS += -Wall -Werror +KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter +KBUILD_CFLAGS += -Wmissing-declarations +KBUILD_CFLAGS += -Wmissing-format-attribute +KBUILD_CFLAGS += -Wmissing-prototypes +KBUILD_CFLAGS += -Wold-style-definition +KBUILD_CFLAGS += -Wmissing-include-dirs +KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable) +KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable) +KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned) +KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation) +# The following turn off the warnings enabled by -Wextra +KBUILD_CFLAGS += -Wno-missing-field-initializers +KBUILD_CFLAGS += -Wno-sign-compare +KBUILD_CFLAGS += -Wno-type-limits + +KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 + +all: + $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules + +modules_install: + $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) modules_install + +clean: + $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) clean diff --git a/drivers/hwtracing/coresight/mali/build.bp b/drivers/hwtracing/coresight/mali/build.bp new file mode 100644 index 000000000000..824ae54c9e43 --- /dev/null +++ b/drivers/hwtracing/coresight/mali/build.bp @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +bob_defaults { + name: "coresight_mali_defaults", + srcs: [ + "Makefile", + "Kbuild", + "coresight_mali_common.c", + "coresight_mali_common.h", + ], +} + +bob_defaults { + name: "coresight_mali_source_defaults", + srcs: [ + "sources/coresight_mali_sources.c", + "sources/coresight_mali_sources.h", + ], +} + + +bob_kernel_module { + name: "coresight_mali_source_etm", + defaults: [ + "kernel_defaults", + "coresight_mali_defaults", + "coresight_mali_source_defaults", + ], + srcs: [ + "sources/etm/coresight_mali_source_etm_core.c", + ], + extra_symbols: [ + "mali_kbase", + ], + enabled: false, + mali_coresight: { + kbuild_options: ["CONFIG_CORESIGHT_MALI_SOURCES_ETM=y"], + enabled: true, + }, +} + +bob_kernel_module { + name: "coresight_mali_source_itm", + defaults: [ + "kernel_defaults", + "coresight_mali_defaults", + "coresight_mali_source_defaults", + ], + srcs: [ + "sources/itm/coresight_mali_source_itm_core.c", + ], + extra_symbols: [ + "mali_kbase", + ], + enabled: false, + mali_coresight: { + kbuild_options: ["CONFIG_CORESIGHT_MALI_SOURCES_ITM=y"], + enabled: true, + }, +} + +bob_kernel_module { + name: "coresight_mali_source_ela", + defaults: [ + "kernel_defaults", + "coresight_mali_defaults", + "coresight_mali_source_defaults", + ], + srcs: [ + "sources/ela/coresight_mali_source_ela_core.c", + "sources/ela/coresight-ela600.h" + ], + extra_symbols: [ + "mali_kbase", + ], + enabled: false, + mali_coresight: { + kbuild_options: ["CONFIG_CORESIGHT_MALI_SOURCES_ELA=y"], + enabled: true, + }, +} diff --git a/drivers/hwtracing/coresight/mali/coresight_mali_common.c b/drivers/hwtracing/coresight/mali/coresight_mali_common.c new file mode 100644 index 000000000000..8e3af76c5267 --- /dev/null +++ b/drivers/hwtracing/coresight/mali/coresight_mali_common.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include + +#include +#include "coresight_mali_common.h" + +int coresight_mali_enable_component(struct coresight_device *csdev, u32 mode) +{ + struct coresight_mali_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + int res = 0; + + if (mode != CS_MODE_SYSFS) { + dev_err(drvdata->dev, "Unsupported Mali CS_MODE: %d, expected: %d\n", mode, + CS_MODE_SYSFS); + return -EINVAL; + } + + drvdata->mode = mode; + + res = kbase_debug_coresight_csf_config_enable(drvdata->config); + if (res) { + dev_err(drvdata->dev, "Config failed to enable with error code %d\n", res); + drvdata->mode = CS_MODE_DISABLED; + } + + return res; +} + +int coresight_mali_disable_component(struct coresight_device *csdev) +{ + struct coresight_mali_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + int res = 0; + + res = kbase_debug_coresight_csf_config_disable(drvdata->config); + if (res) + dev_err(drvdata->dev, "config failed to disable with error code %d\n", res); + + drvdata->mode = CS_MODE_DISABLED; + + return res; +} diff --git a/drivers/hwtracing/coresight/mali/coresight_mali_common.h b/drivers/hwtracing/coresight/mali/coresight_mali_common.h new file mode 100644 index 000000000000..43154c1f639d --- /dev/null +++ b/drivers/hwtracing/coresight/mali/coresight_mali_common.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _CORESIGHT_MALI_COMMON_H +#define _CORESIGHT_MALI_COMMON_H + +#include +#include + +/* Macros for CoreSight OP types. */ +#define WRITE_IMM_OP(_reg_addr, _val) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM, \ + .op.write_imm.reg_addr = _reg_addr, .op.write_imm.val = _val \ + } + +#define WRITE_RANGE_OP(_reg_start, _reg_end, _val) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE, \ + .op.write_imm_range.reg_start = _reg_start, \ + .op.write_imm_range.reg_end = _reg_end, .op.write_imm_range.val = _val \ + } + +#define WRITE_PTR_OP(_reg_addr, _ptr) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE, .op.write.reg_addr = _reg_addr, \ + .op.write.ptr = _ptr \ + } + +#define READ_OP(_reg_addr, _ptr) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ, .op.read.reg_addr = _reg_addr, \ + .op.read.ptr = _ptr \ + } + +#define POLL_OP(_reg_addr, _mask, _val) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL, .op.poll.reg_addr = _reg_addr, \ + .op.poll.mask = _mask, .op.poll.val = _val \ + } + +#define BIT_OR_OP(_ptr, _val) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR, .op.bitw.ptr = _ptr, \ + .op.bitw.val = _val \ + } + +#define BIT_XOR_OP(_ptr, _val) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR, .op.bitw.ptr = _ptr, \ + .op.bitw.val = _val \ + } + +#define BIT_AND_OP(_ptr, _val) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND, .op.bitw.ptr = _ptr, \ + .op.bitw.val = _val \ + } + +#define BIT_NOT_OP(_ptr) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT, .op.bitw.ptr = _ptr, \ + } + +#ifndef CS_MALI_UNLOCK_COMPONENT +/** + * CS_MALI_UNLOCK_COMPONENT - A write of 0xC5ACCE55 enables write access to the block + */ +#define CS_MALI_UNLOCK_COMPONENT 0xC5ACCE55 +#endif + +/** + * struct coresight_mali_drvdata - Coresight mali driver data + * + * @csdev: Coresight device pointer + * @dev: Device pointer + * @kbase_client: Pointer to coresight mali client + * @config: Pointer to coresight mali config, used for enabling and + * disabling the coresight component + * @enable_seq: Enable sequence needed to enable coresight block + * @disable_seq: Disable sequence needed to enable coresight block + * @gpu_dev: Pointer to gpu device structure + * @mode: Mode in which the driver operates + */ +struct coresight_mali_drvdata { + struct coresight_device *csdev; + struct device *dev; + void *kbase_client; + void *config; + struct kbase_debug_coresight_csf_sequence enable_seq; + struct kbase_debug_coresight_csf_sequence disable_seq; + void *gpu_dev; + u32 mode; +}; + +/** + * coresight_mali_enable_component - Generic enable for a coresight block + * + * @csdev: Coresight device to be enabled + * @mode: Mode in which the block should start operating in + * + * Return: 0 if success. Error code on failure. + */ +int coresight_mali_enable_component(struct coresight_device *csdev, u32 mode); + +/** + * coresight_mali_disable_component - Generic disable for a coresight block + * + * @csdev: Coresight device to be disabled + * + * Return: 0 if success. Error code on failure. + */ +int coresight_mali_disable_component(struct coresight_device *csdev); + +#endif /* _CORESIGHT_MALI_COMMON_H */ diff --git a/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c b/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c new file mode 100644 index 000000000000..e6d2dc71096b --- /dev/null +++ b/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "sources/coresight_mali_sources.h" + +static int coresight_mali_source_trace_id(struct coresight_device *csdev) +{ + struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + return drvdata->trcid; +} + +static int coresight_mali_enable_source(struct coresight_device *csdev, struct perf_event *event, + u32 mode) +{ + return coresight_mali_enable_component(csdev, mode); +} + +static void coresight_mali_disable_source(struct coresight_device *csdev, struct perf_event *event) +{ + coresight_mali_disable_component(csdev); +} + +static const struct coresight_ops_source coresight_mali_source_ops = { + .trace_id = coresight_mali_source_trace_id, + .enable = coresight_mali_enable_source, + .disable = coresight_mali_disable_source +}; + +static const struct coresight_ops mali_cs_ops = { + .source_ops = &coresight_mali_source_ops, +}; + +int coresight_mali_sources_probe(struct platform_device *pdev) +{ + int ret = 0; + struct coresight_platform_data *pdata = NULL; + struct coresight_mali_source_drvdata *drvdata = NULL; + struct coresight_desc desc = { 0 }; + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + struct platform_device *gpu_pdev = NULL; + struct device_node *gpu_node = NULL; + + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); + if (!drvdata) + return -ENOMEM; + + dev_set_drvdata(dev, drvdata); + drvdata->base.dev = dev; + +#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE + pdata = coresight_get_platform_data(dev); +#else + if (np) + pdata = of_get_coresight_platform_data(dev, np); +#endif + if (IS_ERR(pdata)) { + dev_err(drvdata->base.dev, "Failed to get platform data\n"); + ret = PTR_ERR(pdata); + goto devm_kfree_drvdata; + } + + dev->platform_data = pdata; + + gpu_node = of_parse_phandle(np, "gpu", 0); + if (!gpu_node) { + dev_err(drvdata->base.dev, "GPU node not available\n"); + goto devm_kfree_drvdata; + } + gpu_pdev = of_find_device_by_node(gpu_node); + if (gpu_pdev == NULL) { + dev_err(drvdata->base.dev, "Couldn't find GPU device from node\n"); + goto devm_kfree_drvdata; + } + + drvdata->base.gpu_dev = platform_get_drvdata(gpu_pdev); + if (!drvdata->base.gpu_dev) { + dev_err(drvdata->base.dev, "GPU dev not available\n"); + goto devm_kfree_drvdata; + } + + ret = coresight_mali_sources_init_drvdata(drvdata); + if (ret) { + dev_err(drvdata->base.dev, "Failed to init source driver data\n"); + goto kbase_client_unregister; + } + + desc.type = CORESIGHT_DEV_TYPE_SOURCE; + desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE; + desc.ops = &mali_cs_ops; + desc.pdata = pdata; + desc.dev = dev; + desc.groups = coresight_mali_source_groups_get(); + +#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE + desc.name = devm_kasprintf(dev, GFP_KERNEL, "%s", drvdata->type_name); + if (!desc.name) { + ret = -ENOMEM; + goto devm_kfree_drvdata; + } +#endif + drvdata->base.csdev = coresight_register(&desc); + if (IS_ERR(drvdata->base.csdev)) { + dev_err(drvdata->base.dev, "Failed to register coresight device\n"); + ret = PTR_ERR(drvdata->base.csdev); + goto devm_kfree_drvdata; + } + + return ret; + +kbase_client_unregister: + if (drvdata->base.csdev != NULL) + coresight_unregister(drvdata->base.csdev); + + coresight_mali_sources_deinit_drvdata(drvdata); + +devm_kfree_drvdata: + devm_kfree(dev, drvdata); + + return ret; +} + +int coresight_mali_sources_remove(struct platform_device *pdev) +{ + struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(&pdev->dev); + + if (drvdata->base.csdev != NULL) + coresight_unregister(drvdata->base.csdev); + + coresight_mali_sources_deinit_drvdata(drvdata); + + devm_kfree(&pdev->dev, drvdata); + + return 0; +} + +MODULE_AUTHOR("ARM Ltd."); +MODULE_DESCRIPTION("Arm Coresight Mali source"); +MODULE_LICENSE("GPL"); diff --git a/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.h b/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.h new file mode 100644 index 000000000000..0915dd107488 --- /dev/null +++ b/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _CORESIGHT_MALI_SOURCES_H +#define _CORESIGHT_MALI_SOURCES_H + +#include +#include +#include + +#include "coresight_mali_common.h" + +/** + * struct coresight_mali_source_drvdata - Coresight mali source driver data + * + * @base: Common driver data structure between coresight mali sources and sinks + * @trcid: Trace id + * @type_name: Type name of the driver, for example "itm" or "etm" + */ +struct coresight_mali_source_drvdata { + struct coresight_mali_drvdata base; + u32 trcid; +#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE + char *type_name; +#endif +}; + +/** + * coresight_mali_sources_probe - Generic probe for a coresight mali source + * + * @pdev: Pointer to a platform device + * + * Return: 0 if success. Error code on failure. + */ +int coresight_mali_sources_probe(struct platform_device *pdev); + +/** + * coresight_mali_sources_remove - Generic remove for a coresight mali source + * + * @pdev: Pointer to a platform device + * + * Return: 0 if success. Error code on failure. + */ +int coresight_mali_sources_remove(struct platform_device *pdev); + +/** + * coresight_mali_sources_init_drvdata - Driver data initialization hook. + * + * @drvdata: Driver data structure to initialize + * + * Used for initializing source specific enable and disable sequences and other relevant data. + * + * Return: 0 if success. Error code on failure. + */ +int coresight_mali_sources_init_drvdata(struct coresight_mali_source_drvdata *drvdata); + +/** + * coresight_mali_sources_deinit_drvdata - Driver data deinitialization hook. + * + * @drvdata: Driver data structure to deinitialize + * + * Used for releasing source specific enable and disable sequences and other relevant data. + */ +void coresight_mali_sources_deinit_drvdata(struct coresight_mali_source_drvdata *drvdata); + +/** + * coresight_mali_source_groups_get - Getter for source groups. + * + * Return: a pointer to an array of attribute groups of the driver. Can also be NULL. + * + * Groups are drivers sysfs subnodes that can be used to read state of the coresight component + * or write component configuration. + */ +const struct attribute_group **coresight_mali_source_groups_get(void); + +#endif /* _CORESIGHT_MALI_SOURCES_H */ diff --git a/drivers/hwtracing/coresight/mali/sources/ela/coresight-ela600.h b/drivers/hwtracing/coresight/mali/sources/ela/coresight-ela600.h new file mode 100644 index 000000000000..0ee96bc2f047 --- /dev/null +++ b/drivers/hwtracing/coresight/mali/sources/ela/coresight-ela600.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _CORESIGHT_ELA600_H +#define _CORESIGHT_ELA600_H + +#include + +#define ELA_CTRL 0x000 +#define ELA_TIMECTRL 0x004 +#define ELA_TSSR 0x008 +#define ELA_ATBCTRL 0x00C +#define ELA_PTACTION 0x010 +#define ELA_AUXCTRL 0x014 +#define ELA_CNTSEL 0x018 + +#define ELA_CTSR 0x020 +#define ELA_CCVR 0x024 +#define ELA_CAVR 0x028 +#define ELA_RDCAPTID 0x02C +#define ELA_RDCAPTIDEXT 0x030 + +#define ELA_RRAR 0x040 +#define ELA_RRDR 0x044 +#define ELA_RWAR 0x048 +#define ELA_RWDR 0x04C + +#define ELA_SIGSEL(x) (0x100 + 0x100 * (x)) +#define ELA_TRIGCTRL(x) (ELA_SIGSEL(x) + 0x004) +#define ELA_NEXTSTATE(x) (ELA_SIGSEL(x) + 0x008) +#define ELA_ACTION(x) (ELA_SIGSEL(x) + 0x00C) +#define ELA_ALTNEXTSTATE(x) (ELA_SIGSEL(x) + 0x010) +#define ELA_ALTACTION(x) (ELA_SIGSEL(x) + 0x014) +#define ELA_COMPCTRL(x) (ELA_SIGSEL(x) + 0x018) +#define ELA_ALTCOMPCTRL(x) (ELA_SIGSEL(x) + 0x01C) +#define ELA_COUNTCOMP(x) (ELA_SIGSEL(x) + 0x020) +#define ELA_TWBSEL(x) (ELA_SIGSEL(x) + 0x028) +#define ELA_EXTMASK(x) (ELA_SIGSEL(x) + 0x030) +#define ELA_EXTCOMP(x) (ELA_SIGSEL(x) + 0x034) +#define ELA_QUALMASK(x) (ELA_SIGSEL(x) + 0x038) +#define ELA_QUALCOMP(x) (ELA_SIGSEL(x) + 0x03C) +#define ELA_SIGMASK(x, y) (ELA_SIGSEL(x) + 0x040 + 4 * (y)) +#define ELA_SIGCOMP(x, y) (ELA_SIGSEL(x) + 0x080 + 4 * (y)) + +#define ELA_ITTRIGOUT 0xEE8 +#define ELA_ITATBDATA 0xEEC +#define ELA_ITATBCTR1 0xEF0 +#define ELA_ITATBCTR0 0xEF4 +#define ELA_ITTRIGIN 0xEF8 +#define ELA_ITCTRL 0xF00 + +#define ELA_AUTHSTATUS 0xFB8 + +#define ELA_DEVARCH 0xFBC +#define ELA_DEVID2 0xFC0 +#define ELA_DEVID1 0xFC4 +#define ELA_DEVID 0xFC8 +#define ELA_DEVTYPE 0xFCC + +#define ELA_PIDR4 0xFD0 +#define ELA_PIDR5 0xFD4 +#define ELA_PIDR6 0xFD8 +#define ELA_PIDR7 0xFDC +#define ELA_PIDR0 0xFE0 +#define ELA_PIDR1 0xFE4 +#define ELA_PIDR2 0xFE8 +#define ELA_PIDR3 0xFEC +#define ELA_CIDR0 0xFF0 +#define ELA_CIDR1 0xFF4 +#define ELA_CIDR2 0xFF8 +#define ELA_CIDR3 0xFFC + +/* REGISTER MASKS */ +#define ELA_CTRL_RUN BIT(0) +#define ELA_CTRL_TRACE_BUSY BIT(1) + +#define ELA_TIMECTRL_TSEN BIT(16) +#define ELA_TIMECTRL_TSINT GEN_MASK(15, 12) +#define ELA_TIMECTRL_TCSEL1 GEN_MASK(7, 4) +#define ELA_TIMECTRL_TCSEL0 GEN_MASK(3, 0) + +#define ELA_ATBCTRL_PREDICT BIT(31) +#define ELA_ATBCTRL_ATID_TRIG_EN BIT(15) +#define ELA_ATBCTRL_ATID_VALUE GEN_MASK(14, 8) +#define ELA_ATBCTRL_ASYNC_INTERVAL GEN_MASK(7, 0) + +#define ELA_ACTION_ELAOUTPUT GEN_MASK(7, 4) +#define ELA_ACTION_TRACE BIT(3) +#define ELA_ACTION_STOPCLOCK BIT(2) +#define ELA_ACTION_CTTRIGOUT GEN_MASK(1, 0) + +#define ELA_AUXCTRL_FLUSH_DIS BIT(0) + +#define ELA_SIGSEL_JCN_REQUEST BIT(0) +#define ELA_SIGSEL_JCN_RESPONSE BIT(1) +#define ELA_SIGSEL_CEU_EXECUTION BIT(2) +#define ELA_SIGSEL_MCU_AHBP BIT(3) +#define ELA_SIGSEL_HOST_AXI BIT(4) + +#define ELA_TRIGCTRL_ALTCOMPSEL BIT(15) +#define ELA_TRIGCTRL_ALTCOMP GEN_MASK(14, 12) +#define ELA_TRIGCTRL_CAPTID GEN_MASK(11, 10) +#define ELA_TRIGCTRL_COUNTBRK BIT(9) +#define ELA_TRIGCTRL_COUNTCLR BIT(8) +#define ELA_TRIGCTRL_TRACE GEN_MASK(7, 6) +#define ELA_TRIGCTRL_COUNTSRC BIT(5) +#define ELA_TRIGCTRL_WATCHRST BIT(4) +#define ELA_TRIGCTRL_COMPSEL BIT(3) +#define ELA_TRIGCTRL_COMP GEN_MASK(2, 0) + +#endif /* _CORESIGHT_ELA600_H */ diff --git a/drivers/hwtracing/coresight/mali/sources/ela/coresight_mali_source_ela_core.c b/drivers/hwtracing/coresight/mali/sources/ela/coresight_mali_source_ela_core.c new file mode 100644 index 000000000000..0da37a75ecfb --- /dev/null +++ b/drivers/hwtracing/coresight/mali/sources/ela/coresight_mali_source_ela_core.c @@ -0,0 +1,666 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include "sources/coresight_mali_sources.h" +#include "coresight-ela600.h" + +/* Linux Coresight framework does not support multiple sources enabled + * at the same time. + * + * To avoid Kernel instability, all Mali Coresight sources use the + * same trace ID value as the mandatory ETM one. + */ +#define CS_MALI_TRACE_ID 0x00000010 + +#define CS_ELA_BASE_ADDR 0xE0043000 +#define CS_GPU_COMMAND_ADDR 0x40003030 +#define CS_GPU_COMMAND_TRACE_CONTROL_EN 0x000001DC +#define CS_ELA_MAX_SIGNAL_GROUPS 12U +#define CS_SG_NAME_MAX_LEN 10U +#define CS_ELA_NR_SIG_REGS 8U + +#define NELEMS(s) (sizeof(s) / sizeof((s)[0])) + +#define CS_ELA_SIGREGS_ATTR_RW(_a, _b) \ + static ssize_t _a##_show(struct device *dev, struct device_attribute *attr, \ + char *const buf) \ + { \ + return sprintf_regs(buf, CS_ELA_##_b##_0, CS_ELA_##_b##_7); \ + } \ + static ssize_t _a##_store(struct device *dev, struct device_attribute *attr, \ + const char *buf, size_t count) \ + { \ + return verify_store_8_regs(dev, buf, count, CS_ELA_##_b##_0); \ + } \ + static DEVICE_ATTR_RW(_a) + +enum cs_ela_dynamic_regs { + CS_ELA_TIMECTRL, + CS_ELA_TSSR, + + CS_ELA_SIGSEL0, + CS_ELA_COMPCTRL0, + CS_ELA_ALTCOMPCTRL0, + CS_ELA_TWBSEL0, + CS_ELA_QUALMASK0, + CS_ELA_QUALCOMP0, + CS_ELA_SIGMASK0_0, + CS_ELA_SIGMASK0_1, + CS_ELA_SIGMASK0_2, + CS_ELA_SIGMASK0_3, + CS_ELA_SIGMASK0_4, + CS_ELA_SIGMASK0_5, + CS_ELA_SIGMASK0_6, + CS_ELA_SIGMASK0_7, + CS_ELA_SIGCOMP0_0, + CS_ELA_SIGCOMP0_1, + CS_ELA_SIGCOMP0_2, + CS_ELA_SIGCOMP0_3, + CS_ELA_SIGCOMP0_4, + CS_ELA_SIGCOMP0_5, + CS_ELA_SIGCOMP0_6, + CS_ELA_SIGCOMP0_7, + + CS_ELA_SIGSEL4, + CS_ELA_NEXTSTATE4, + CS_ELA_ACTION4, + CS_ELA_ALTNEXTSTATE4, + CS_ELA_COMPCTRL4, + CS_ELA_TWBSEL4, + CS_ELA_SIGMASK4_0, + CS_ELA_SIGMASK4_1, + CS_ELA_SIGMASK4_2, + CS_ELA_SIGMASK4_3, + CS_ELA_SIGMASK4_4, + CS_ELA_SIGMASK4_5, + CS_ELA_SIGMASK4_6, + CS_ELA_SIGMASK4_7, + CS_ELA_SIGCOMP4_0, + CS_ELA_SIGCOMP4_1, + CS_ELA_SIGCOMP4_2, + CS_ELA_SIGCOMP4_3, + CS_ELA_SIGCOMP4_4, + CS_ELA_SIGCOMP4_5, + CS_ELA_SIGCOMP4_6, + CS_ELA_SIGCOMP4_7, + + CS_ELA_NR_DYN_REGS +}; + +enum cs_ela_tracemodes { + CS_ELA_TRACEMODE_NONE, + CS_ELA_TRACEMODE_JCN, + CS_ELA_TRACEMODE_CEU_EXEC, + CS_ELA_TRACEMODE_CEU_CMDS, + CS_ELA_TRACEMODE_MCU_AHBP, + CS_ELA_TRACEMODE_HOST_AXI, + CS_ELA_NR_TRACEMODE +}; + +enum cs_ela_signal_types { + CS_ELA_SIGTYPE_JCN_REQ, + CS_ELA_SIGTYPE_JCN_RES, + CS_ELA_SIGTYPE_CEU_EXEC, + CS_ELA_SIGTYPE_CEU_CMDS, + CS_ELA_SIGTYPE_MCU_AHBP, + CS_ELA_SIGTYPE_HOST_AXI, + CS_ELA_NR_SIGTYPE, +}; + +struct cs_ela_state { + enum cs_ela_tracemodes tracemode; + u32 supported_tracemodes; + int enabled; + u32 signal_types[CS_ELA_NR_SIGTYPE]; + u32 regs[CS_ELA_NR_DYN_REGS]; +}; + +#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE +static char *type_name = "mali-source-ela"; +#endif + +static struct cs_ela_state ela_state = { 0 }; + +/* Setup ELA sysfs attributes */ +static char *tracemode_names[] = { + [CS_ELA_TRACEMODE_NONE] = "NONE", [CS_ELA_TRACEMODE_JCN] = "JCN", + [CS_ELA_TRACEMODE_CEU_EXEC] = "CEU_EXEC", [CS_ELA_TRACEMODE_CEU_CMDS] = "CEU_CMDS", + [CS_ELA_TRACEMODE_MCU_AHBP] = "MCU_AHBP", [CS_ELA_TRACEMODE_HOST_AXI] = "HOST_AXI", +}; + +static char *signal_type_names[] = { + [CS_ELA_SIGTYPE_JCN_REQ] = "jcn-request", [CS_ELA_SIGTYPE_JCN_RES] = "jcn-response", + [CS_ELA_SIGTYPE_CEU_EXEC] = "ceu-execution", [CS_ELA_SIGTYPE_CEU_CMDS] = "ceu-commands", + [CS_ELA_SIGTYPE_MCU_AHBP] = "mcu-ahbp", [CS_ELA_SIGTYPE_HOST_AXI] = "host-axi", +}; + +static int signal_type_tracemode_map[] = { + [CS_ELA_SIGTYPE_JCN_REQ] = CS_ELA_TRACEMODE_JCN, + [CS_ELA_SIGTYPE_JCN_RES] = CS_ELA_TRACEMODE_JCN, + [CS_ELA_SIGTYPE_CEU_EXEC] = CS_ELA_TRACEMODE_CEU_EXEC, + [CS_ELA_SIGTYPE_CEU_CMDS] = CS_ELA_TRACEMODE_CEU_CMDS, + [CS_ELA_SIGTYPE_MCU_AHBP] = CS_ELA_TRACEMODE_MCU_AHBP, + [CS_ELA_SIGTYPE_HOST_AXI] = CS_ELA_TRACEMODE_HOST_AXI, +}; + +static void setup_tracemode_registers(int tracemode) +{ + switch (tracemode) { + case CS_ELA_TRACEMODE_NONE: + /* Perform full reset of all dynamic registers */ + memset(ela_state.regs, 0x00000000, sizeof(u32) * CS_ELA_NR_DYN_REGS); + + ela_state.tracemode = CS_ELA_TRACEMODE_NONE; + break; + case CS_ELA_TRACEMODE_JCN: + + if (ela_state.signal_types[CS_ELA_SIGTYPE_JCN_REQ] == + ela_state.signal_types[CS_ELA_SIGTYPE_JCN_RES]) { + ela_state.regs[CS_ELA_TSSR] = 0x00000000; + + ela_state.regs[CS_ELA_SIGSEL0] = + ela_state.signal_types[CS_ELA_SIGTYPE_JCN_REQ]; + + ela_state.regs[CS_ELA_COMPCTRL0] = 0x00000010; + ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x00001000; + ela_state.regs[CS_ELA_TWBSEL0] = 0x0000FFFF; + ela_state.regs[CS_ELA_QUALMASK0] = 0x00000000; + ela_state.regs[CS_ELA_QUALCOMP0] = 0x00000000; + + memset(&ela_state.regs[CS_ELA_SIGMASK0_0], 0x00000000, + sizeof(u32) * (CS_ELA_SIGCOMP0_7 - CS_ELA_SIGMASK0_0 + 1)); + ela_state.regs[CS_ELA_SIGMASK0_1] = 0x80000000; + ela_state.regs[CS_ELA_SIGMASK0_3] = 0x80000000; + ela_state.regs[CS_ELA_SIGCOMP0_1] = 0x80000000; + ela_state.regs[CS_ELA_SIGCOMP0_3] = 0x80000000; + + memset(&ela_state.regs[CS_ELA_SIGSEL4], 0x00000000, + sizeof(u32) * (CS_ELA_SIGCOMP4_7 - CS_ELA_SIGSEL4 + 1)); + + ela_state.regs[CS_ELA_COMPCTRL4] = 0x11111111; + + } else { + ela_state.regs[CS_ELA_TSSR] = 0x00000010; + + ela_state.regs[CS_ELA_SIGSEL0] = + ela_state.signal_types[CS_ELA_SIGTYPE_JCN_REQ]; + + ela_state.regs[CS_ELA_COMPCTRL0] = 0x00000100; + ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x11111111; + ela_state.regs[CS_ELA_TWBSEL0] = 0x00000FFF; + ela_state.regs[CS_ELA_QUALMASK0] = 0x00000000; + ela_state.regs[CS_ELA_QUALCOMP0] = 0x00000000; + + memset(&ela_state.regs[CS_ELA_SIGMASK0_0], 0x00000000, + sizeof(u32) * (CS_ELA_SIGCOMP0_7 - CS_ELA_SIGMASK0_0 + 1)); + ela_state.regs[CS_ELA_SIGMASK0_2] |= 0x80000000; + ela_state.regs[CS_ELA_SIGCOMP0_2] |= 0x80000000; + + ela_state.regs[CS_ELA_SIGSEL4] = + ela_state.signal_types[CS_ELA_SIGTYPE_JCN_RES]; + ela_state.regs[CS_ELA_NEXTSTATE4] = 0x00000010; + ela_state.regs[CS_ELA_ACTION4] = 0x00000008; + ela_state.regs[CS_ELA_ALTNEXTSTATE4] = 0x00000001; + ela_state.regs[CS_ELA_COMPCTRL4] = 0x00000100; + ela_state.regs[CS_ELA_TWBSEL4] = 0x00000FFF; + + memset(&ela_state.regs[CS_ELA_SIGMASK4_0], 0x00000000, + sizeof(u32) * (CS_ELA_SIGCOMP4_7 - CS_ELA_SIGMASK4_0 + 1)); + ela_state.regs[CS_ELA_SIGMASK4_2] |= 0x80000000; + ela_state.regs[CS_ELA_SIGCOMP4_2] |= 0x80000000; + } + + break; + case CS_ELA_TRACEMODE_CEU_EXEC: + case CS_ELA_TRACEMODE_CEU_CMDS: + ela_state.regs[CS_ELA_TSSR] = 0x00000000; + + if (tracemode == CS_ELA_TRACEMODE_CEU_EXEC) { + ela_state.regs[CS_ELA_SIGSEL0] = + ela_state.signal_types[CS_ELA_SIGTYPE_CEU_EXEC]; + ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x00001000; + } else if (tracemode == CS_ELA_TRACEMODE_CEU_CMDS) { + ela_state.regs[CS_ELA_SIGSEL0] = + ela_state.signal_types[CS_ELA_SIGTYPE_CEU_CMDS]; + ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x11111111; + } + + ela_state.regs[CS_ELA_COMPCTRL0] = 0x00000001; + ela_state.regs[CS_ELA_TWBSEL0] = 0x0000FFFF; + ela_state.regs[CS_ELA_QUALMASK0] = 0x0000000F; + ela_state.regs[CS_ELA_QUALCOMP0] = 0x0000000F; + + memset(&ela_state.regs[CS_ELA_SIGMASK0_0], 0x00000000, + sizeof(u32) * (CS_ELA_SIGCOMP0_7 - CS_ELA_SIGMASK0_0 + 1)); + + memset(&ela_state.regs[CS_ELA_SIGSEL4], 0x00000000, + sizeof(u32) * (CS_ELA_SIGCOMP4_7 - CS_ELA_SIGSEL4 + 1)); + + ela_state.regs[CS_ELA_COMPCTRL4] = 0x11111111; + + break; + case CS_ELA_TRACEMODE_MCU_AHBP: + case CS_ELA_TRACEMODE_HOST_AXI: + ela_state.regs[CS_ELA_TSSR] = 0x00000000; + + if (tracemode == CS_ELA_TRACEMODE_MCU_AHBP) + ela_state.regs[CS_ELA_SIGSEL0] = + ela_state.signal_types[CS_ELA_SIGTYPE_MCU_AHBP]; + else if (tracemode == CS_ELA_TRACEMODE_HOST_AXI) + ela_state.regs[CS_ELA_SIGSEL0] = + ela_state.signal_types[CS_ELA_SIGTYPE_HOST_AXI]; + + ela_state.regs[CS_ELA_COMPCTRL0] = 0x00000001; + ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x11111111; + ela_state.regs[CS_ELA_TWBSEL0] = 0x000000FF; + ela_state.regs[CS_ELA_QUALMASK0] = 0x00000003; + ela_state.regs[CS_ELA_QUALCOMP0] = 0x00000003; + + memset(&ela_state.regs[CS_ELA_SIGMASK0_0], 0x00000000, + sizeof(u32) * (CS_ELA_SIGCOMP0_7 - CS_ELA_SIGMASK0_0 + 1)); + + memset(&ela_state.regs[CS_ELA_SIGSEL4], 0x00000000, + sizeof(u32) * (CS_ELA_SIGCOMP4_7 - CS_ELA_SIGSEL4 + 1)); + + ela_state.regs[CS_ELA_COMPCTRL4] = 0x11111111; + + break; + } + ela_state.tracemode = tracemode; +} + +static ssize_t select_show(struct device *dev, struct device_attribute *attr, char *const buf) +{ + ssize_t ret = 0; + unsigned int mode; + + for (mode = CS_ELA_TRACEMODE_NONE; mode < CS_ELA_NR_TRACEMODE; mode++) { + if (ela_state.supported_tracemodes & (1U << mode)) { + if (ela_state.tracemode == mode) + ret += sprintf(buf + ret, "[%s]\n", tracemode_names[mode]); + else + ret += sprintf(buf + ret, "%s\n", tracemode_names[mode]); + } + } + return ret; +} + +static ssize_t select_store(struct device *dev, struct device_attribute *attr, const char *buf, + size_t count) +{ + struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(dev->parent); + unsigned int mode = 0; + + /* Check if enabled and return error */ + if (ela_state.enabled == 1) { + dev_err(drvdata->base.dev, + "Config needs to be disabled before modifying registers"); + return -EINVAL; + } + + for (mode = CS_ELA_TRACEMODE_NONE; mode < CS_ELA_NR_TRACEMODE; mode++) { + if (sysfs_streq(tracemode_names[mode], buf) && + (ela_state.supported_tracemodes & (1U << mode))) { + setup_tracemode_registers(mode); + return count; + } + } + + dev_err(drvdata->base.dev, "Invalid tracemode: %s", buf); + return -EINVAL; +} + +static DEVICE_ATTR_RW(select); + +static ssize_t is_enabled_show(struct device *dev, struct device_attribute *attr, char *const buf) +{ + return sprintf(buf, "%d\n", ela_state.enabled); +} + +static DEVICE_ATTR_RO(is_enabled); + +static ssize_t sprintf_regs(char *const buf, int from_reg, int to_reg) +{ + ssize_t ret = 0; + unsigned int i = 0; + + for (i = from_reg; i <= to_reg; i++) + ret += sprintf(buf + ret, "0x%08X ", ela_state.regs[i]); + + ret += sprintf(buf + ret, "\n"); + return ret; +} + +static ssize_t verify_store_8_regs(struct device *dev, const char *buf, size_t count, int from_reg) +{ + struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(dev->parent); + u32 regs[CS_ELA_NR_SIG_REGS] = { 0 }; + int items; + unsigned int i; + + if (ela_state.enabled == 1) { + dev_err(drvdata->base.dev, + "Config needs to be disabled before modifying registers"); + return -EINVAL; + } + + items = sscanf(buf, "%x %x %x %x %x %x %x %x", ®s[0], ®s[1], ®s[2], ®s[3], + ®s[4], ®s[5], ®s[6], ®s[7]); + if (items <= 0) { + dev_err(drvdata->base.dev, "Invalid register value"); + return -EINVAL; + } + if (items != CS_ELA_NR_SIG_REGS) { + dev_err(drvdata->base.dev, "Incorrect number of registers set (%d != %d)", items, + CS_ELA_NR_SIG_REGS); + return -EINVAL; + } + for (i = 0; i < CS_ELA_NR_SIG_REGS; i++) + ela_state.regs[from_reg + i] = regs[i]; + + return count; +} + +CS_ELA_SIGREGS_ATTR_RW(sigmask0, SIGMASK0); +CS_ELA_SIGREGS_ATTR_RW(sigcomp0, SIGCOMP0); +CS_ELA_SIGREGS_ATTR_RW(sigmask4, SIGMASK4); +CS_ELA_SIGREGS_ATTR_RW(sigcomp4, SIGCOMP4); + +static struct attribute *coresight_ela_attrs[] = { + &dev_attr_select.attr, + &dev_attr_is_enabled.attr, + &dev_attr_sigmask0.attr, + &dev_attr_sigcomp0.attr, + &dev_attr_sigmask4.attr, + &dev_attr_sigcomp4.attr, + NULL, +}; + +static struct attribute_group coresight_ela_group = { + .attrs = coresight_ela_attrs, +}; + +static const struct attribute_group *coresight_ela_groups[] = { + &coresight_ela_group, + NULL, +}; + +const struct attribute_group **coresight_mali_source_groups_get(void) +{ + return coresight_ela_groups; +} + +/* Initialize ELA coresight driver */ + +static struct kbase_debug_coresight_csf_address_range ela_range[] = { + { CS_ELA_BASE_ADDR, CS_ELA_BASE_ADDR + CORESIGHT_DEVTYPE }, + { CS_GPU_COMMAND_ADDR, CS_GPU_COMMAND_ADDR } +}; + +static struct kbase_debug_coresight_csf_op ela_enable_ops[] = { + /* Clearing CTRL.RUN and the read only CTRL.TRACE_BUSY. */ + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_CTRL, 0x00000000), + /* Poll CTRL.TRACE_BUSY until it becomes low to ensure that trace has stopped. */ + POLL_OP(CS_ELA_BASE_ADDR + ELA_CTRL, ELA_CTRL_TRACE_BUSY, 0x0), + /* 0 for now. TSEN = 1 or TSINT = 8 in future */ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_TIMECTRL, &ela_state.regs[CS_ELA_TIMECTRL]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_TSSR, &ela_state.regs[CS_ELA_TSSR]), + /* ATID[6:0] = 4; valid range 0x1-0x6F, value must be unique and needs to be + * known for trace extraction + */ + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ATBCTRL, 0x00000400), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_PTACTION, ELA_ACTION_TRACE), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_AUXCTRL, 0x00000000), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_CNTSEL, 0x00000000), + + /* Trigger State 0 */ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(0), &ela_state.regs[CS_ELA_SIGSEL0]), + /* May need to be configurable in future. */ + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_TRIGCTRL(0), 0x00000000), + + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_NEXTSTATE(0), 0x00000001), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ACTION(0), ELA_ACTION_TRACE), + + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTNEXTSTATE(0), 0x00000001), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTACTION(0), ELA_ACTION_TRACE), + + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(0), &ela_state.regs[CS_ELA_COMPCTRL0]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(0), &ela_state.regs[CS_ELA_ALTCOMPCTRL0]), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COUNTCOMP(0), 0x00000000), + + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_TWBSEL(0), &ela_state.regs[CS_ELA_TWBSEL0]), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_EXTMASK(0), 0x00000000), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_EXTCOMP(0), 0x00000000), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_QUALMASK(0), &ela_state.regs[CS_ELA_QUALMASK0]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_QUALCOMP(0), &ela_state.regs[CS_ELA_QUALCOMP0]), + + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 0), &ela_state.regs[CS_ELA_SIGMASK0_0]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 1), &ela_state.regs[CS_ELA_SIGMASK0_1]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 2), &ela_state.regs[CS_ELA_SIGMASK0_2]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 3), &ela_state.regs[CS_ELA_SIGMASK0_3]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 4), &ela_state.regs[CS_ELA_SIGMASK0_4]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 5), &ela_state.regs[CS_ELA_SIGMASK0_5]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 6), &ela_state.regs[CS_ELA_SIGMASK0_6]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 7), &ela_state.regs[CS_ELA_SIGMASK0_7]), + + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 0), &ela_state.regs[CS_ELA_SIGCOMP0_0]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 1), &ela_state.regs[CS_ELA_SIGCOMP0_1]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 2), &ela_state.regs[CS_ELA_SIGCOMP0_2]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 3), &ela_state.regs[CS_ELA_SIGCOMP0_3]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 4), &ela_state.regs[CS_ELA_SIGCOMP0_4]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 5), &ela_state.regs[CS_ELA_SIGCOMP0_5]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 6), &ela_state.regs[CS_ELA_SIGCOMP0_6]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 7), &ela_state.regs[CS_ELA_SIGCOMP0_7]), + + WRITE_RANGE_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(1), CS_ELA_BASE_ADDR + ELA_SIGCOMP(1, 7), + 0x00000000), + WRITE_RANGE_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(2), CS_ELA_BASE_ADDR + ELA_SIGCOMP(2, 7), + 0x00000000), + WRITE_RANGE_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(3), CS_ELA_BASE_ADDR + ELA_SIGCOMP(3, 7), + 0x00000000), + + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(1), 0x11111111), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(2), 0x11111111), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(3), 0x11111111), + + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(1), 0x11111111), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(2), 0x11111111), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(3), 0x11111111), + + /* Trigger State 4 */ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(4), &ela_state.regs[CS_ELA_SIGSEL4]), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_TRIGCTRL(4), 0x00000000), + + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_NEXTSTATE(4), &ela_state.regs[CS_ELA_NEXTSTATE4]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_ACTION(4), &ela_state.regs[CS_ELA_ACTION4]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_ALTNEXTSTATE(4), &ela_state.regs[CS_ELA_ALTNEXTSTATE4]), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTACTION(4), ELA_ACTION_TRACE), + + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(4), &ela_state.regs[CS_ELA_COMPCTRL4]), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(4), 0x11111111), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COUNTCOMP(4), 0x00000000), + + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_TWBSEL(4), &ela_state.regs[CS_ELA_TWBSEL4]), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_EXTMASK(4), 0x00000000), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_EXTCOMP(4), 0x00000000), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_QUALMASK(4), 0x00000000), + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_QUALCOMP(4), 0x00000000), + + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 0), &ela_state.regs[CS_ELA_SIGMASK4_0]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 1), &ela_state.regs[CS_ELA_SIGMASK4_1]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 2), &ela_state.regs[CS_ELA_SIGMASK4_2]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 3), &ela_state.regs[CS_ELA_SIGMASK4_3]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 4), &ela_state.regs[CS_ELA_SIGMASK4_4]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 5), &ela_state.regs[CS_ELA_SIGMASK4_5]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 6), &ela_state.regs[CS_ELA_SIGMASK4_6]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 7), &ela_state.regs[CS_ELA_SIGMASK4_7]), + + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 0), &ela_state.regs[CS_ELA_SIGCOMP4_0]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 1), &ela_state.regs[CS_ELA_SIGCOMP4_1]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 2), &ela_state.regs[CS_ELA_SIGCOMP4_2]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 3), &ela_state.regs[CS_ELA_SIGCOMP4_3]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 4), &ela_state.regs[CS_ELA_SIGCOMP4_4]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 5), &ela_state.regs[CS_ELA_SIGCOMP4_5]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 6), &ela_state.regs[CS_ELA_SIGCOMP4_6]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 7), &ela_state.regs[CS_ELA_SIGCOMP4_7]), + + WRITE_IMM_OP(CS_GPU_COMMAND_ADDR, CS_GPU_COMMAND_TRACE_CONTROL_EN), + + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_CTRL, ELA_CTRL_RUN), + + BIT_OR_OP(&ela_state.enabled, 0x1), +}; + +static struct kbase_debug_coresight_csf_op ela_disable_ops[] = { + WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_CTRL, 0x00000000), + /* Poll CTRL.TRACE_BUSY until it becomes low to ensure that trace has stopped. */ + POLL_OP(CS_ELA_BASE_ADDR + ELA_CTRL, ELA_CTRL_TRACE_BUSY, 0x0), + + BIT_AND_OP(&ela_state.enabled, 0x0), +}; + +static int parse_signal_groups(struct coresight_mali_source_drvdata *drvdata) +{ + struct device_node *signal_groups = NULL; + unsigned int siggrp_idx; + + if (drvdata->base.dev->of_node) + signal_groups = of_get_child_by_name(drvdata->base.dev->of_node, "signal-groups"); + + if (!signal_groups) { + dev_err(drvdata->base.dev, "Failed to find signal groups OF node"); + return -EINVAL; + } + + for (siggrp_idx = 0; siggrp_idx < CS_ELA_MAX_SIGNAL_GROUPS; siggrp_idx++) { + char buf[CS_SG_NAME_MAX_LEN]; + ssize_t res; + const char *name; + struct property *prop; + + res = snprintf(buf, CS_SG_NAME_MAX_LEN, "sg%d", siggrp_idx); + if (res <= 0) { + dev_err(drvdata->base.dev, + "Signal group name %d snprintf failed unexpectedly", siggrp_idx); + return -EINVAL; + } + + of_property_for_each_string(signal_groups, buf, prop, name) { + int sig_type; + + for (sig_type = 0; sig_type < CS_ELA_NR_SIGTYPE; sig_type++) { + if (!strncmp(signal_type_names[sig_type], name, + strlen(signal_type_names[sig_type]))) { + ela_state.signal_types[sig_type] = (1U << siggrp_idx); + ela_state.supported_tracemodes |= + (1U << signal_type_tracemode_map[sig_type]); + } + } + } + } + + /* Add TRACEMODE_NONE as supported to allow printing */ + ela_state.supported_tracemodes |= (1U << CS_ELA_TRACEMODE_NONE); + + return 0; +} + +int coresight_mali_sources_init_drvdata(struct coresight_mali_source_drvdata *drvdata) +{ + int res = 0; + +#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE + drvdata->type_name = type_name; +#endif + + drvdata->base.kbase_client = kbase_debug_coresight_csf_register( + drvdata->base.gpu_dev, ela_range, NELEMS(ela_range)); + if (drvdata->base.kbase_client == NULL) { + dev_err(drvdata->base.dev, "Registration with full range failed unexpectedly"); + return -EINVAL; + } + + drvdata->trcid = CS_MALI_TRACE_ID; + + drvdata->base.enable_seq.ops = ela_enable_ops; + drvdata->base.enable_seq.nr_ops = NELEMS(ela_enable_ops); + + drvdata->base.disable_seq.ops = ela_disable_ops; + drvdata->base.disable_seq.nr_ops = NELEMS(ela_disable_ops); + + drvdata->base.config = kbase_debug_coresight_csf_config_create( + drvdata->base.kbase_client, &drvdata->base.enable_seq, &drvdata->base.disable_seq); + if (!drvdata->base.config) { + dev_err(drvdata->base.dev, "config create failed unexpectedly"); + return -EINVAL; + } + + res = parse_signal_groups(drvdata); + if (res) { + dev_err(drvdata->base.dev, "Failed to parse signal groups"); + return res; + } + + return 0; +} + +void coresight_mali_sources_deinit_drvdata(struct coresight_mali_source_drvdata *drvdata) +{ + if (drvdata->base.config != NULL) + kbase_debug_coresight_csf_config_free(drvdata->base.config); + + if (drvdata->base.kbase_client != NULL) + kbase_debug_coresight_csf_unregister(drvdata->base.kbase_client); +} + +static const struct of_device_id mali_source_ids[] = { { .compatible = + "arm,coresight-mali-source-ela" }, + {} }; + +static struct platform_driver mali_sources_platform_driver = { + .probe = coresight_mali_sources_probe, + .remove = coresight_mali_sources_remove, + .driver = { + .name = "coresight-mali-source-ela", + .owner = THIS_MODULE, + .of_match_table = mali_source_ids, + .suppress_bind_attrs = true, + }, +}; + +static int __init mali_sources_init(void) +{ + return platform_driver_register(&mali_sources_platform_driver); +} + +static void __exit mali_sources_exit(void) +{ + platform_driver_unregister(&mali_sources_platform_driver); +} + +module_init(mali_sources_init); +module_exit(mali_sources_exit); + +MODULE_AUTHOR("Arm Ltd."); +MODULE_DESCRIPTION("Arm Coresight Mali source ELA"); +MODULE_LICENSE("GPL"); diff --git a/drivers/hwtracing/coresight/mali/sources/etm/coresight_mali_source_etm_core.c b/drivers/hwtracing/coresight/mali/sources/etm/coresight_mali_source_etm_core.c new file mode 100644 index 000000000000..ae9c2f7f2a8c --- /dev/null +++ b/drivers/hwtracing/coresight/mali/sources/etm/coresight_mali_source_etm_core.c @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include "sources/coresight_mali_sources.h" + +#define CS_ETM_BASE_ADDR 0xE0041000 +#define CS_MALI_TRACE_ID 0x00000010 + +#ifndef TRCVICTLR_SSSTATUS +#define TRCVICTLR_SSSTATUS BIT(9) +#endif + +#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE +static char *type_name = "mali-source-etm"; +#endif + +#define NELEMS(s) (sizeof(s) / sizeof((s)[0])) + +enum cs_etm_dynamic_regs { + CS_ETM_TRCCONFIGR, + CS_ETM_TRCTRACEIDR, + CS_ETM_TRCVDARCCTLR, + CS_ETM_TRCSTALLCTLR, + CS_ETM_TRCVIIECTLR, + CS_ETM_NR_DYN_REGS +}; + +struct cs_etm_state { + int enabled; + u32 regs[CS_ETM_NR_DYN_REGS]; +}; + +static struct cs_etm_state etm_state = { 0 }; + +static struct kbase_debug_coresight_csf_address_range etm_range[] = { + { CS_ETM_BASE_ADDR, CS_ETM_BASE_ADDR + CORESIGHT_DEVTYPE }, +}; + +struct kbase_debug_coresight_csf_op etm_enable_ops[] = { + // Unlock ETM configuration + WRITE_IMM_OP(CS_ETM_BASE_ADDR + CORESIGHT_LAR, CS_MALI_UNLOCK_COMPONENT), + // Power up request + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCPDCR, TRCPDCR_PU), + // Disable Tracing + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCPRGCTLR, 0x00000000), + // Check the tracing unit is inactive before programming + POLL_OP(CS_ETM_BASE_ADDR + TRCSTATR, BIT(TRCSTATR_IDLE_BIT), BIT(TRCSTATR_IDLE_BIT)), + // Set trace configuration to enable global timestamping, and data value tracing + WRITE_PTR_OP(CS_ETM_BASE_ADDR + TRCCONFIGR, &etm_state.regs[CS_ETM_TRCCONFIGR]), + // Set event control 0 register + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCEVENTCTL0R, 0x00000000), + // Set event control 1 register + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCEVENTCTL1R, 0x00000000), + // Set trace ID + WRITE_PTR_OP(CS_ETM_BASE_ADDR + TRCTRACEIDR, &etm_state.regs[CS_ETM_TRCTRACEIDR]), + // Configure stall control register + WRITE_PTR_OP(CS_ETM_BASE_ADDR + TRCSTALLCTLR, &etm_state.regs[CS_ETM_TRCSTALLCTLR]), + // Synchronization period register - sync every 2^11 bytes + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCSYNCPR, 0x0000000C), + // Set global timestamp control register to select resource 0 + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCTSCTLR, 0x00000000), + // Set viewData include/exclude address range comparators to 0 + WRITE_PTR_OP(CS_ETM_BASE_ADDR + TRCVDARCCTLR, &etm_state.regs[CS_ETM_TRCVDARCCTLR]), + // Set viewData main control to select resource 0 + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCVDCTLR, 0x00000001), + //Set viewData comparators to 0 + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCVDSACCTLR, 0x00000000), + // Set stop/start logic to started state, select resource 1 + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCVICTLR, TRCVICTLR_SSSTATUS | BIT(0)), + // Set viewInst start and stop control + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCVISSCTLR, 0x00000000), + // Set viewInst include and exclude control to math all addresses in range + WRITE_PTR_OP(CS_ETM_BASE_ADDR + TRCVIIECTLR, &etm_state.regs[CS_ETM_TRCVIIECTLR]), + // enable trace + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCPRGCTLR, 0x1), + // Wait that the unit is busy + POLL_OP(CS_ETM_BASE_ADDR + TRCSTATR, BIT(TRCSTATR_IDLE_BIT), 0), + // Lock the ETM configuration + WRITE_IMM_OP(CS_ETM_BASE_ADDR + CORESIGHT_LAR, 0x00000000), + // Set enabled bit on at the end of sequence + BIT_OR_OP(&etm_state.enabled, 0x1), +}; + +struct kbase_debug_coresight_csf_op etm_disable_ops[] = { + // Unlock ETM configuration + WRITE_IMM_OP(CS_ETM_BASE_ADDR + CORESIGHT_LAR, CS_MALI_UNLOCK_COMPONENT), + // Disable trace unit + WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCPRGCTLR, 0x00000000), + // Poll until idle + POLL_OP(CS_ETM_BASE_ADDR + TRCSTATR, BIT(TRCSTATR_IDLE_BIT), BIT(TRCSTATR_IDLE_BIT)), + // Lock ETM configuration + WRITE_IMM_OP(CS_ETM_BASE_ADDR + CORESIGHT_LAR, 0x00000000), + // Set enabled bit off at the end of sequence + BIT_AND_OP(&etm_state.enabled, 0x0), +}; + +static void set_default_regs(void) +{ + // Turn on instruction tracing + etm_state.regs[CS_ETM_TRCCONFIGR] = 0x00000800; + // Set ID + etm_state.regs[CS_ETM_TRCTRACEIDR] = CS_MALI_TRACE_ID; + // Set data comparators to none + etm_state.regs[CS_ETM_TRCVDARCCTLR] = 0x00000000; + // Set instructions address filter to none + etm_state.regs[CS_ETM_TRCVIIECTLR] = 0x00000000; + // Set stall configuration to a basic setting + etm_state.regs[CS_ETM_TRCSTALLCTLR] = 0x00000000; +} + +static const struct of_device_id mali_source_ids[] = { { .compatible = + "arm,coresight-mali-source-etm" }, + {} }; + +int coresight_mali_sources_init_drvdata(struct coresight_mali_source_drvdata *drvdata) +{ + int ret = 0; + + if (drvdata == NULL) + return -EINVAL; + +#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE + drvdata->type_name = type_name; +#endif + etm_state.enabled = 0x0; + + drvdata->base.kbase_client = kbase_debug_coresight_csf_register( + drvdata->base.gpu_dev, etm_range, NELEMS(etm_range)); + if (drvdata->base.kbase_client == NULL) { + dev_err(drvdata->base.dev, "Registration with full range failed unexpectedly\n"); + return -EINVAL; + } + + set_default_regs(); + drvdata->trcid = CS_MALI_TRACE_ID; + + drvdata->base.enable_seq.ops = etm_enable_ops; + drvdata->base.enable_seq.nr_ops = NELEMS(etm_enable_ops); + + drvdata->base.disable_seq.ops = etm_disable_ops; + drvdata->base.disable_seq.nr_ops = NELEMS(etm_disable_ops); + + drvdata->base.config = kbase_debug_coresight_csf_config_create( + drvdata->base.kbase_client, &drvdata->base.enable_seq, &drvdata->base.disable_seq); + if (!drvdata->base.config) { + dev_err(drvdata->base.dev, "Config create failed unexpectedly\n"); + kbase_debug_coresight_csf_unregister(drvdata->base.kbase_client); + return -EINVAL; + } + + return ret; +} + +void coresight_mali_sources_deinit_drvdata(struct coresight_mali_source_drvdata *drvdata) +{ + if (drvdata->base.config != NULL) + kbase_debug_coresight_csf_config_free(drvdata->base.config); + + if (drvdata->base.kbase_client != NULL) + kbase_debug_coresight_csf_unregister(drvdata->base.kbase_client); +} + +static int verify_store_reg(struct device *dev, const char *buf, size_t count, int reg) +{ + struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(dev->parent); + u32 val; + int err; + + if (buf == NULL) + return -EINVAL; + + if (etm_state.enabled == 1) { + dev_err(drvdata->base.dev, + "Config needs to be disabled before modifying registers\n"); + return -EINVAL; + } + + err = kstrtou32(buf, 0, &val); + if (err) { + dev_err(drvdata->base.dev, "Invalid input value\n"); + return -EINVAL; + } + + etm_state.regs[reg] = val; + return count; +} + +#define CS_ETM_REG_ATTR_RW(_a, _b) \ + static ssize_t _a##_show(struct device *dev, struct device_attribute *attr, \ + char *const buf) \ + { \ + return sprintf(buf, "%#x\n", etm_state.regs[CS_ETM_##_b]); \ + } \ + static ssize_t _a##_store(struct device *dev, struct device_attribute *attr, \ + const char *buf, size_t count) \ + { \ + return verify_store_reg(dev, buf, count, CS_ETM_##_b); \ + } \ + static DEVICE_ATTR_RW(_a) + +CS_ETM_REG_ATTR_RW(trcconfigr, TRCCONFIGR); +CS_ETM_REG_ATTR_RW(trctraceidr, TRCTRACEIDR); +CS_ETM_REG_ATTR_RW(trcvdarcctlr, TRCVDARCCTLR); +CS_ETM_REG_ATTR_RW(trcviiectlr, TRCVIIECTLR); +CS_ETM_REG_ATTR_RW(trcstallctlr, TRCSTALLCTLR); + +static ssize_t is_enabled_show(struct device *dev, struct device_attribute *attr, char *const buf) +{ + return sprintf(buf, "%d\n", etm_state.enabled); +} +static DEVICE_ATTR_RO(is_enabled); + +static struct attribute *coresight_etm_attrs[] = { + &dev_attr_is_enabled.attr, + &dev_attr_trcconfigr.attr, + &dev_attr_trctraceidr.attr, + &dev_attr_trcvdarcctlr.attr, + &dev_attr_trcviiectlr.attr, + &dev_attr_trcstallctlr.attr, + NULL, +}; +static struct attribute_group coresight_etm_group = { .attrs = coresight_etm_attrs, + .name = "mgmt" }; +static const struct attribute_group *coresight_etm_groups[] = { + &coresight_etm_group, + NULL, +}; +const struct attribute_group **coresight_mali_source_groups_get(void) +{ + return coresight_etm_groups; +} + +static struct platform_driver mali_sources_platform_driver = { + .probe = coresight_mali_sources_probe, + .remove = coresight_mali_sources_remove, + .driver = { + .name = "coresight-mali-source-etm", + .owner = THIS_MODULE, + .of_match_table = mali_source_ids, + .suppress_bind_attrs = true, + }, +}; + +static int __init mali_sources_init(void) +{ + return platform_driver_register(&mali_sources_platform_driver); +} + +static void __exit mali_sources_exit(void) +{ + platform_driver_unregister(&mali_sources_platform_driver); +} + +module_init(mali_sources_init); +module_exit(mali_sources_exit); + +MODULE_AUTHOR("ARM Ltd."); +MODULE_DESCRIPTION("Arm Coresight Mali source ETM"); +MODULE_LICENSE("GPL"); diff --git a/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c b/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c new file mode 100644 index 000000000000..9f60192a682b --- /dev/null +++ b/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include "sources/coresight_mali_sources.h" + +/* Linux Coresight framework does not support multiple sources enabled + * at the same time. + * + * To avoid Kernel instability, all Mali Coresight sources use the + * same trace ID value as the mandatory ETM one. + */ +#define CS_MALI_TRACE_ID 0x00000010 + +#define CS_SCS_BASE_ADDR 0xE000E000 +#define SCS_DEMCR 0xDFC +#define CS_ITM_BASE_ADDR 0xE0000000 +#define ITM_TCR 0xE80 +#define ITM_TCR_BUSY_BIT (0x1 << 22) +#define CS_DWT_BASE_ADDR 0xE0001000 +#define DWT_CTRL 0x000 +#define DWT_CYCCNT 0x004 + +#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE +static char *type_name = "mali-source-itm"; +#endif + +#define NELEMS(s) (sizeof(s) / sizeof((s)[0])) + +enum cs_itm_dwt_dynamic_regs { CS_DWT_CTRL, CS_ITM_TCR, CS_ITM_DWT_NR_DYN_REGS }; + +struct cs_itm_state { + int enabled; + u32 regs[CS_ITM_DWT_NR_DYN_REGS]; +}; + +static struct cs_itm_state itm_state = { 0 }; + +static struct kbase_debug_coresight_csf_address_range dwt_itm_range[] = { + { CS_SCS_BASE_ADDR, CS_SCS_BASE_ADDR + CORESIGHT_DEVTYPE }, + { CS_ITM_BASE_ADDR, CS_ITM_BASE_ADDR + CORESIGHT_DEVTYPE }, + { CS_DWT_BASE_ADDR, CS_DWT_BASE_ADDR + CORESIGHT_DEVTYPE } +}; + +static struct kbase_debug_coresight_csf_op dwt_itm_enable_ops[] = { + // enable ITM/DWT functionality via DEMCR register + WRITE_IMM_OP(CS_SCS_BASE_ADDR + SCS_DEMCR, 0x01000000), + // Unlock DWT configuration + WRITE_IMM_OP(CS_DWT_BASE_ADDR + CORESIGHT_LAR, CS_MALI_UNLOCK_COMPONENT), + // prep DWT counter to immediately send sync packet ((1 << 24) - 1) + WRITE_IMM_OP(CS_DWT_BASE_ADDR + DWT_CYCCNT, 0x00ffffff), + // Write initial value of post count counter + WRITE_IMM_OP(CS_DWT_BASE_ADDR + DWT_CTRL, 0x00000020), + // Set DWT configuration: + WRITE_PTR_OP(CS_DWT_BASE_ADDR + DWT_CTRL, &itm_state.regs[CS_DWT_CTRL]), + // Lock DWT Configuration + WRITE_IMM_OP(CS_DWT_BASE_ADDR + CORESIGHT_LAR, 0x00000000), + // Unlock DWT configuration + WRITE_IMM_OP(CS_ITM_BASE_ADDR + CORESIGHT_LAR, CS_MALI_UNLOCK_COMPONENT), + // Set ITM configuration: + WRITE_PTR_OP(CS_ITM_BASE_ADDR + ITM_TCR, &itm_state.regs[CS_ITM_TCR]), + // Lock DWT configuration + WRITE_IMM_OP(CS_ITM_BASE_ADDR + CORESIGHT_LAR, 0x00000000), + // Set enabled bit on at the end of sequence + BIT_OR_OP(&itm_state.enabled, 0x1), +}; + +static struct kbase_debug_coresight_csf_op dwt_itm_disable_ops[] = { + // Disable ITM/DWT functionality via DEMCR register + WRITE_IMM_OP(CS_SCS_BASE_ADDR + SCS_DEMCR, 0x00000000), + // Unlock ITM configuration + WRITE_IMM_OP(CS_ITM_BASE_ADDR + CORESIGHT_LAR, CS_MALI_UNLOCK_COMPONENT), + // Check ITM is disabled + POLL_OP(CS_ITM_BASE_ADDR + ITM_TCR, ITM_TCR_BUSY_BIT, 0x0), + // Lock + WRITE_IMM_OP(CS_ITM_BASE_ADDR + CORESIGHT_LAR, 0x00000000), + // Set enabled bit off at the end of sequence + BIT_AND_OP(&itm_state.enabled, 0x0), +}; + +static void set_default_regs(void) +{ + // DWT configuration: + // [0] = 1, enable cycle counter + // [4:1] = 4, set PC sample rate pf 256 cycles + // [8:5] = 1, set initial post count value + // [9] = 1, select position of post count tap on the cycle counter + // [10:11] = 1, enable sync packets + // [12] = 1, enable periodic PC sample packets + itm_state.regs[CS_DWT_CTRL] = 0x00001629; + // ITM configuration: + // [0] = 1, Enable ITM + // [1] = 1, Enable Time stamp generation + // [2] = 1, Enable sync packet transmission + // [3] = 1, Enable HW event forwarding + // [11:10] = 1, Generate TS request approx every 128 cycles + // [22:16] = 1, Trace bus ID + itm_state.regs[CS_ITM_TCR] = 0x0001040F; +} + +static int verify_store_reg(struct device *dev, const char *buf, size_t count, int reg) +{ + struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(dev->parent); + u32 val; + int err; + + if (buf == NULL) + return -EINVAL; + + if (itm_state.enabled == 1) { + dev_err(drvdata->base.dev, + "Config needs to be disabled before modifying registers\n"); + return -EINVAL; + } + + err = kstrtou32(buf, 0, &val); + if (err) { + dev_err(drvdata->base.dev, "Invalid input value\n"); + return -EINVAL; + } + + itm_state.regs[reg] = val; + return count; +} + +static ssize_t is_enabled_show(struct device *dev, struct device_attribute *attr, char *const buf) +{ + return sprintf(buf, "%d\n", itm_state.enabled); +} +static DEVICE_ATTR_RO(is_enabled); + +#define CS_ITM_DWT_REG_ATTR_RW(_a, _b) \ + static ssize_t _a##_show(struct device *dev, struct device_attribute *attr, \ + char *const buf) \ + { \ + return sprintf(buf, "%#x\n", itm_state.regs[CS_##_b]); \ + } \ + static ssize_t _a##_store(struct device *dev, struct device_attribute *attr, \ + const char *buf, size_t count) \ + { \ + return verify_store_reg(dev, buf, count, CS_##_b); \ + } \ + static DEVICE_ATTR_RW(_a) + +CS_ITM_DWT_REG_ATTR_RW(dwt_ctrl, DWT_CTRL); +CS_ITM_DWT_REG_ATTR_RW(itm_tcr, ITM_TCR); + +static struct attribute *coresight_mali_source_attrs[] = { + &dev_attr_is_enabled.attr, + &dev_attr_dwt_ctrl.attr, + &dev_attr_itm_tcr.attr, + NULL, +}; + +static const struct attribute_group coresight_mali_source_group = { + .attrs = coresight_mali_source_attrs, + .name = "mgmt" +}; + +static const struct attribute_group *coresight_mali_source_groups[] = { + &coresight_mali_source_group, + NULL, +}; + +const struct attribute_group **coresight_mali_source_groups_get(void) +{ + return coresight_mali_source_groups; +} + +int coresight_mali_sources_init_drvdata(struct coresight_mali_source_drvdata *drvdata) +{ + if (drvdata == NULL) + return -EINVAL; + +#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE + drvdata->type_name = type_name; +#endif + + drvdata->base.kbase_client = kbase_debug_coresight_csf_register( + drvdata->base.gpu_dev, dwt_itm_range, NELEMS(dwt_itm_range)); + if (drvdata->base.kbase_client == NULL) { + dev_err(drvdata->base.dev, "Registration with full range failed unexpectedly\n"); + return -EINVAL; + } + + drvdata->trcid = CS_MALI_TRACE_ID; + + drvdata->base.enable_seq.ops = dwt_itm_enable_ops; + drvdata->base.enable_seq.nr_ops = NELEMS(dwt_itm_enable_ops); + + drvdata->base.disable_seq.ops = dwt_itm_disable_ops; + drvdata->base.disable_seq.nr_ops = NELEMS(dwt_itm_disable_ops); + + set_default_regs(); + + drvdata->base.config = kbase_debug_coresight_csf_config_create( + drvdata->base.kbase_client, &drvdata->base.enable_seq, &drvdata->base.disable_seq); + if (!drvdata->base.config) { + dev_err(drvdata->base.dev, "config create failed unexpectedly\n"); + kbase_debug_coresight_csf_unregister(drvdata->base.kbase_client); + return -EINVAL; + } + + return 0; +} + +void coresight_mali_sources_deinit_drvdata(struct coresight_mali_source_drvdata *drvdata) +{ + if (drvdata->base.config != NULL) + kbase_debug_coresight_csf_config_free(drvdata->base.config); + + if (drvdata->base.kbase_client != NULL) + kbase_debug_coresight_csf_unregister(drvdata->base.kbase_client); +} + +static const struct of_device_id mali_source_ids[] = { { .compatible = + "arm,coresight-mali-source-itm" }, + {} }; + +static struct platform_driver mali_sources_platform_driver = { + .probe = coresight_mali_sources_probe, + .remove = coresight_mali_sources_remove, + .driver = { + .name = "coresight-mali-source-itm", + .owner = THIS_MODULE, + .of_match_table = mali_source_ids, + .suppress_bind_attrs = true, + }, +}; + +static int __init mali_sources_init(void) +{ + return platform_driver_register(&mali_sources_platform_driver); +} + +static void __exit mali_sources_exit(void) +{ + platform_driver_unregister(&mali_sources_platform_driver); +} + +module_init(mali_sources_init); +module_exit(mali_sources_exit); + +MODULE_AUTHOR("ARM Ltd."); +MODULE_DESCRIPTION("Arm Coresight Mali source ITM"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/mali_kbase_debug_coresight_csf.h b/include/linux/mali_kbase_debug_coresight_csf.h new file mode 100644 index 000000000000..8356fd497e74 --- /dev/null +++ b/include/linux/mali_kbase_debug_coresight_csf.h @@ -0,0 +1,241 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_DEBUG_CORESIGHT_CSF_ +#define _KBASE_DEBUG_CORESIGHT_CSF_ + +#include +#include + +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_NOP 0U +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM 1U +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE 2U +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE 3U +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ 4U +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL 5U +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR 6U +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR 7U +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND 8U +#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT 9U + +/** + * struct kbase_debug_coresight_csf_write_imm_op - Coresight immediate write operation structure + * + * @reg_addr: Register address to write to. + * @val: Value to write at @reg_addr. + */ +struct kbase_debug_coresight_csf_write_imm_op { + __u32 reg_addr; + __u32 val; +}; + +/** + * struct kbase_debug_coresight_csf_write_imm_range_op - Coresight immediate write range + * operation structure + * + * @reg_start: Register address to start writing from. + * @reg_end: Register address to stop writing from. End address included in the write range. + * @val: Value to write at @reg_addr. + */ +struct kbase_debug_coresight_csf_write_imm_range_op { + __u32 reg_start; + __u32 reg_end; + __u32 val; +}; + +/** + * struct kbase_debug_coresight_csf_write_op - Coresight write operation structure + * + * @reg_addr: Register address to write to. + * @ptr: Pointer to the value to write at @reg_addr. + */ +struct kbase_debug_coresight_csf_write_op { + __u32 reg_addr; + __u32 *ptr; +}; + +/** + * struct kbase_debug_coresight_csf_read_op - Coresight read operation structure + * + * @reg_addr: Register address to read. + * @ptr: Pointer where to store the read value. + */ +struct kbase_debug_coresight_csf_read_op { + __u32 reg_addr; + __u32 *ptr; +}; + +/** + * struct kbase_debug_coresight_csf_poll_op - Coresight poll operation structure + * + * @reg_addr: Register address to poll. + * @val: Expected value after poll. + * @mask: Mask to apply on the read value from @reg_addr when comparing against @val. + */ +struct kbase_debug_coresight_csf_poll_op { + __u32 reg_addr; + __u32 val; + __u32 mask; +}; + +/** + * struct kbase_debug_coresight_csf_bitw_op - Coresight bitwise operation structure + * + * @ptr: Pointer to the variable on which to execute the bit operation. + * @val: Value with which the operation should be executed against @ptr value. + */ +struct kbase_debug_coresight_csf_bitw_op { + __u32 *ptr; + __u32 val; +}; + +/** + * struct kbase_debug_coresight_csf_op - Coresight supported operations + * + * @type: Operation type. + * @padding: Padding for 64bit alignment. + * @op: Operation union. + * @op.write_imm: Parameters for immediate write operation. + * @op.write_imm_range: Parameters for immediate range write operation. + * @op.write: Parameters for write operation. + * @op.read: Parameters for read operation. + * @op.poll: Parameters for poll operation. + * @op.bitw: Parameters for bitwise operation. + * @op.padding: Padding for 64bit alignment. + * + * All operation structures should include padding to ensure they are the same size. + */ +struct kbase_debug_coresight_csf_op { + __u8 type; + __u8 padding[7]; + union { + struct kbase_debug_coresight_csf_write_imm_op write_imm; + struct kbase_debug_coresight_csf_write_imm_range_op write_imm_range; + struct kbase_debug_coresight_csf_write_op write; + struct kbase_debug_coresight_csf_read_op read; + struct kbase_debug_coresight_csf_poll_op poll; + struct kbase_debug_coresight_csf_bitw_op bitw; + u32 padding[3]; + } op; +}; + +/** + * struct kbase_debug_coresight_csf_sequence - Coresight sequence of operations + * + * @ops: Arrays containing Coresight operations. + * @nr_ops: Size of @ops. + */ +struct kbase_debug_coresight_csf_sequence { + struct kbase_debug_coresight_csf_op *ops; + int nr_ops; +}; + +/** + * struct kbase_debug_coresight_csf_address_range - Coresight client address range + * + * @start: Start offset of the address range. + * @end: End offset of the address range. + */ +struct kbase_debug_coresight_csf_address_range { + __u32 start; + __u32 end; +}; + +/** + * kbase_debug_coresight_csf_register - Register as a client for set ranges of MCU memory. + * + * @drv_data: Pointer to driver device data. + * @ranges: Pointer to an array of struct kbase_debug_coresight_csf_address_range + * that contains start and end addresses that the client will manage. + * @nr_ranges: Size of @ranges array. + * + * This function checks @ranges against current client claimed ranges. If there + * are no overlaps, a new client is created and added to the list. + * + * Return: A pointer of the registered client instance on success. NULL on failure. + */ +void *kbase_debug_coresight_csf_register(void *drv_data, + struct kbase_debug_coresight_csf_address_range *ranges, + int nr_ranges); + +/** + * kbase_debug_coresight_csf_unregister - Removes a coresight client. + * + * @client_data: A pointer to a coresight client. + * + * This function removes a client from the client list and frees the client struct. + */ +void kbase_debug_coresight_csf_unregister(void *client_data); + +/** + * kbase_debug_coresight_csf_config_create - Creates a configuration containing + * enable and disable sequence. + * + * @client_data: Pointer to a coresight client. + * @enable_seq: Pointer to a struct containing the ops needed to enable coresight blocks. + * It's optional so could be NULL. + * @disable_seq: Pointer to a struct containing ops to run to disable coresight blocks. + * It's optional so could be NULL. + * + * Return: Valid pointer on success. NULL on failure. + */ +void * +kbase_debug_coresight_csf_config_create(void *client_data, + struct kbase_debug_coresight_csf_sequence *enable_seq, + struct kbase_debug_coresight_csf_sequence *disable_seq); +/** + * kbase_debug_coresight_csf_config_free - Frees a configuration containing + * enable and disable sequence. + * + * @config_data: Pointer to a coresight configuration. + */ +void kbase_debug_coresight_csf_config_free(void *config_data); + +/** + * kbase_debug_coresight_csf_config_enable - Enables a coresight configuration + * + * @config_data: Pointer to coresight configuration. + * + * If GPU is turned on, the configuration is immediately applied the CoreSight blocks. + * If the GPU is turned off, the configuration is scheduled to be applied on the next + * time the GPU is turned on. + * + * A configuration is enabled by executing read/write/poll ops defined in config->enable_seq. + * + * Return: 0 if success. Error code on failure. + */ +int kbase_debug_coresight_csf_config_enable(void *config_data); +/** + * kbase_debug_coresight_csf_config_disable - Disables a coresight configuration + * + * @config_data: Pointer to coresight configuration. + * + * If the GPU is turned off, this is effective a NOP as kbase should have disabled + * the configuration when GPU is off. + * If the GPU is on, the configuration will be disabled. + * + * A configuration is disabled by executing read/write/poll ops defined in config->disable_seq. + * + * Return: 0 if success. Error code on failure. + */ +int kbase_debug_coresight_csf_config_disable(void *config_data); + +#endif /* _KBASE_DEBUG_CORESIGHT_CSF_ */ diff --git a/include/linux/version_compat_defs.h b/include/linux/version_compat_defs.h index d0a09985c5ca..335147cada2c 100644 --- a/include/linux/version_compat_defs.h +++ b/include/linux/version_compat_defs.h @@ -24,10 +24,12 @@ #include -#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE +#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE typedef unsigned int __poll_t; #endif +#if KERNEL_VERSION(4, 9, 78) >= LINUX_VERSION_CODE + #ifndef EPOLLHUP #define EPOLLHUP POLLHUP #endif @@ -44,4 +46,6 @@ typedef unsigned int __poll_t; #define EPOLLRDNORM POLLRDNORM #endif +#endif + #endif /* _VERSION_COMPAT_DEFS_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h index 613eb1fdd081..a44da7beb041 100644 --- a/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h +++ b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h @@ -29,7 +29,11 @@ #include #define KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS (4) +#if MALI_USE_CSF +#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE (65) +#else /* MALI_USE_CSF */ #define KBASE_DUMMY_MODEL_COUNTER_PER_CORE (60) +#endif /* !MALI_USE_CSF */ #define KBASE_DUMMY_MODEL_COUNTERS_PER_BIT (4) #define KBASE_DUMMY_MODEL_COUNTER_ENABLED(enable_mask, ctr_idx) \ (enable_mask & (1 << (ctr_idx / KBASE_DUMMY_MODEL_COUNTERS_PER_BIT))) @@ -56,7 +60,16 @@ #define KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE \ (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE) +/* + * Bit mask - no. bits set is no. cores + * Values obtained from talking to HW team + * Example: tODx has 10 cores, 0b11 1111 1111 -> 0x3FF + */ #define DUMMY_IMPLEMENTATION_SHADER_PRESENT (0xFull) +#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TBEX (0x7FFFull) +#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TODX (0x3FFull) +#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX (0x7FFull) +#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX (0xFFFull) #define DUMMY_IMPLEMENTATION_TILER_PRESENT (0x1ull) #define DUMMY_IMPLEMENTATION_L2_PRESENT (0x1ull) #define DUMMY_IMPLEMENTATION_STACK_PRESENT (0xFull) diff --git a/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h new file mode 100644 index 000000000000..69bc44c26361 --- /dev/null +++ b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Dummy Model interface + */ + +#ifndef _UAPI_KBASE_MODEL_LINUX_H_ +#define _UAPI_KBASE_MODEL_LINUX_H_ + +/* Generic model IRQs */ +enum model_linux_irqs { + MODEL_LINUX_JOB_IRQ, + MODEL_LINUX_GPU_IRQ, + MODEL_LINUX_MMU_IRQ, + MODEL_LINUX_NONE_IRQ, + MODEL_LINUX_NUM_TYPE_IRQ +}; + +#endif /* _UAPI_KBASE_MODEL_LINUX_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h b/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h index d9813c055809..ec8c02f18e16 100644 --- a/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h +++ b/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h @@ -118,9 +118,21 @@ #define BASE_QUEUE_MAX_PRIORITY (15U) -/* CQS Sync object is an array of __u32 event_mem[2], error field index is 1 */ -#define BASEP_EVENT_VAL_INDEX (0U) -#define BASEP_EVENT_ERR_INDEX (1U) +/* Sync32 object fields definition */ +#define BASEP_EVENT32_VAL_OFFSET (0U) +#define BASEP_EVENT32_ERR_OFFSET (4U) +#define BASEP_EVENT32_SIZE_BYTES (8U) + +/* Sync64 object fields definition */ +#define BASEP_EVENT64_VAL_OFFSET (0U) +#define BASEP_EVENT64_ERR_OFFSET (8U) +#define BASEP_EVENT64_SIZE_BYTES (16U) + +/* Sync32 object alignment, equal to its size */ +#define BASEP_EVENT32_ALIGN_BYTES (8U) + +/* Sync64 object alignment, equal to its size */ +#define BASEP_EVENT64_ALIGN_BYTES (16U) /* The upper limit for number of objects that could be waited/set per command. * This limit is now enforced as internally the error inherit inputs are diff --git a/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h index d9a1867e13c3..642ca3465ead 100644 --- a/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h +++ b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h @@ -72,10 +72,18 @@ * - base_jit_alloc_info_11_5 * - kbase_ioctl_mem_jit_init_10_2 * - kbase_ioctl_mem_jit_init_11_5 + * 1.17: + * - Fix kinstr_prfcnt issues: + * - Missing implicit sample for CMD_STOP when HWCNT buffer is full. + * - Race condition when stopping periodic sampling. + * - prfcnt_block_metadata::block_idx gaps. + * - PRFCNT_CONTROL_CMD_SAMPLE_ASYNC is removed. + * 1.18: + * - CPU mappings of USER_BUFFER imported memory handles must be cached. */ #define BASE_UK_VERSION_MAJOR 1 -#define BASE_UK_VERSION_MINOR 16 +#define BASE_UK_VERSION_MINOR 17 /** * struct kbase_ioctl_version_check - Check version compatibility between diff --git a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h index 1f34d99830fe..784e09a7edc2 100644 --- a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h +++ b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h @@ -119,7 +119,6 @@ #define GPU_ID2_PRODUCT_TBEX GPU_ID2_MODEL_MAKE(9, 2) #define GPU_ID2_PRODUCT_LBEX GPU_ID2_MODEL_MAKE(9, 4) #define GPU_ID2_PRODUCT_TBAX GPU_ID2_MODEL_MAKE(9, 5) -#define GPU_ID2_PRODUCT_TDUX GPU_ID2_MODEL_MAKE(10, 1) #define GPU_ID2_PRODUCT_TODX GPU_ID2_MODEL_MAKE(10, 2) #define GPU_ID2_PRODUCT_TGRX GPU_ID2_MODEL_MAKE(10, 3) #define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4) diff --git a/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h b/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h index 9c7553ff2bd2..902d0ce9145a 100644 --- a/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h +++ b/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h @@ -133,9 +133,17 @@ * - base_jit_alloc_info_11_5 * - kbase_ioctl_mem_jit_init_10_2 * - kbase_ioctl_mem_jit_init_11_5 + * 11.37: + * - Fix kinstr_prfcnt issues: + * - Missing implicit sample for CMD_STOP when HWCNT buffer is full. + * - Race condition when stopping periodic sampling. + * - prfcnt_block_metadata::block_idx gaps. + * - PRFCNT_CONTROL_CMD_SAMPLE_ASYNC is removed. + * 11.38: + * - CPU mappings of USER_BUFFER imported memory handles must be cached. */ #define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 36 +#define BASE_UK_VERSION_MINOR 37 /** * struct kbase_ioctl_version_check - Check version compatibility between diff --git a/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h b/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h index 962decc10efc..5089bf249528 100644 --- a/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h +++ b/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h @@ -445,7 +445,7 @@ struct prfcnt_metadata { * @PRFCNT_CONTROL_CMD_STOP: Stop the counter data dump run for the * calling client session. * @PRFCNT_CONTROL_CMD_SAMPLE_SYNC: Trigger a synchronous manual sample. - * @PRFCNT_CONTROL_CMD_SAMPLE_ASYNC: Trigger an asynchronous manual sample. + * @PRFCNT_CONTROL_CMD_RESERVED: Previously SAMPLE_ASYNC not supported any more. * @PRFCNT_CONTROL_CMD_DISCARD: Discard all samples which have not yet * been consumed by userspace. Note that * this can race with new samples if @@ -455,7 +455,7 @@ enum prfcnt_control_cmd_code { PRFCNT_CONTROL_CMD_START = 1, PRFCNT_CONTROL_CMD_STOP, PRFCNT_CONTROL_CMD_SAMPLE_SYNC, - PRFCNT_CONTROL_CMD_SAMPLE_ASYNC, + PRFCNT_CONTROL_CMD_RESERVED, PRFCNT_CONTROL_CMD_DISCARD, }; diff --git a/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h b/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h index 63bf48b603ef..c8a54f91165e 100644 --- a/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h +++ b/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h @@ -46,8 +46,7 @@ struct kbase_ioctl_set_flags { __u32 create_flags; }; -#define KBASE_IOCTL_SET_FLAGS \ - _IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags) +#define KBASE_IOCTL_SET_FLAGS _IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags) /** * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel @@ -81,8 +80,7 @@ struct kbase_ioctl_get_gpuprops { __u32 flags; }; -#define KBASE_IOCTL_GET_GPUPROPS \ - _IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops) +#define KBASE_IOCTL_GET_GPUPROPS _IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops) /** * union kbase_ioctl_mem_alloc - Allocate memory on the GPU @@ -108,8 +106,7 @@ union kbase_ioctl_mem_alloc { } out; }; -#define KBASE_IOCTL_MEM_ALLOC \ - _IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc) +#define KBASE_IOCTL_MEM_ALLOC _IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc) /** * struct kbase_ioctl_mem_query - Query properties of a GPU memory region @@ -131,12 +128,11 @@ union kbase_ioctl_mem_query { } out; }; -#define KBASE_IOCTL_MEM_QUERY \ - _IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query) +#define KBASE_IOCTL_MEM_QUERY _IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query) -#define KBASE_MEM_QUERY_COMMIT_SIZE ((__u64)1) -#define KBASE_MEM_QUERY_VA_SIZE ((__u64)2) -#define KBASE_MEM_QUERY_FLAGS ((__u64)3) +#define KBASE_MEM_QUERY_COMMIT_SIZE ((__u64)1) +#define KBASE_MEM_QUERY_VA_SIZE ((__u64)2) +#define KBASE_MEM_QUERY_FLAGS ((__u64)3) /** * struct kbase_ioctl_mem_free - Free a memory region @@ -146,8 +142,7 @@ struct kbase_ioctl_mem_free { __u64 gpu_addr; }; -#define KBASE_IOCTL_MEM_FREE \ - _IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free) +#define KBASE_IOCTL_MEM_FREE _IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free) /** * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader @@ -167,7 +162,7 @@ struct kbase_ioctl_hwcnt_reader_setup { __u32 mmu_l2_bm; }; -#define KBASE_IOCTL_HWCNT_READER_SETUP \ +#define KBASE_IOCTL_HWCNT_READER_SETUP \ _IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup) /** @@ -182,8 +177,7 @@ struct kbase_ioctl_hwcnt_values { __u32 padding; }; -#define KBASE_IOCTL_HWCNT_SET \ - _IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values) +#define KBASE_IOCTL_HWCNT_SET _IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values) /** * struct kbase_ioctl_disjoint_query - Query the disjoint counter @@ -193,8 +187,7 @@ struct kbase_ioctl_disjoint_query { __u32 counter; }; -#define KBASE_IOCTL_DISJOINT_QUERY \ - _IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query) +#define KBASE_IOCTL_DISJOINT_QUERY _IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query) /** * struct kbase_ioctl_get_ddk_version - Query the kernel version @@ -215,8 +208,7 @@ struct kbase_ioctl_get_ddk_version { __u32 padding; }; -#define KBASE_IOCTL_GET_DDK_VERSION \ - _IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version) +#define KBASE_IOCTL_GET_DDK_VERSION _IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version) /** * struct kbase_ioctl_mem_jit_init - Initialize the just-in-time memory @@ -241,8 +233,7 @@ struct kbase_ioctl_mem_jit_init { __u64 phys_pages; }; -#define KBASE_IOCTL_MEM_JIT_INIT \ - _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init) +#define KBASE_IOCTL_MEM_JIT_INIT _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init) /** * struct kbase_ioctl_mem_sync - Perform cache maintenance on memory @@ -262,8 +253,7 @@ struct kbase_ioctl_mem_sync { __u8 padding[7]; }; -#define KBASE_IOCTL_MEM_SYNC \ - _IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync) +#define KBASE_IOCTL_MEM_SYNC _IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync) /** * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer @@ -286,7 +276,7 @@ union kbase_ioctl_mem_find_cpu_offset { } out; }; -#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \ +#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \ _IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset) /** @@ -298,8 +288,7 @@ struct kbase_ioctl_get_context_id { __u32 id; }; -#define KBASE_IOCTL_GET_CONTEXT_ID \ - _IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id) +#define KBASE_IOCTL_GET_CONTEXT_ID _IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id) /** * struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd @@ -312,11 +301,9 @@ struct kbase_ioctl_tlstream_acquire { __u32 flags; }; -#define KBASE_IOCTL_TLSTREAM_ACQUIRE \ - _IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire) +#define KBASE_IOCTL_TLSTREAM_ACQUIRE _IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire) -#define KBASE_IOCTL_TLSTREAM_FLUSH \ - _IO(KBASE_IOCTL_TYPE, 19) +#define KBASE_IOCTL_TLSTREAM_FLUSH _IO(KBASE_IOCTL_TYPE, 19) /** * struct kbase_ioctl_mem_commit - Change the amount of memory backing a region @@ -333,8 +320,7 @@ struct kbase_ioctl_mem_commit { __u64 pages; }; -#define KBASE_IOCTL_MEM_COMMIT \ - _IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit) +#define KBASE_IOCTL_MEM_COMMIT _IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit) /** * union kbase_ioctl_mem_alias - Create an alias of memory regions @@ -362,8 +348,7 @@ union kbase_ioctl_mem_alias { } out; }; -#define KBASE_IOCTL_MEM_ALIAS \ - _IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias) +#define KBASE_IOCTL_MEM_ALIAS _IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias) /** * union kbase_ioctl_mem_import - Import memory for use by the GPU @@ -391,8 +376,7 @@ union kbase_ioctl_mem_import { } out; }; -#define KBASE_IOCTL_MEM_IMPORT \ - _IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import) +#define KBASE_IOCTL_MEM_IMPORT _IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import) /** * struct kbase_ioctl_mem_flags_change - Change the flags for a memory region @@ -406,8 +390,7 @@ struct kbase_ioctl_mem_flags_change { __u64 mask; }; -#define KBASE_IOCTL_MEM_FLAGS_CHANGE \ - _IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change) +#define KBASE_IOCTL_MEM_FLAGS_CHANGE _IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change) /** * struct kbase_ioctl_stream_create - Create a synchronisation stream @@ -424,8 +407,7 @@ struct kbase_ioctl_stream_create { char name[32]; }; -#define KBASE_IOCTL_STREAM_CREATE \ - _IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create) +#define KBASE_IOCTL_STREAM_CREATE _IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create) /** * struct kbase_ioctl_fence_validate - Validate a fd refers to a fence @@ -435,8 +417,7 @@ struct kbase_ioctl_fence_validate { int fd; }; -#define KBASE_IOCTL_FENCE_VALIDATE \ - _IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate) +#define KBASE_IOCTL_FENCE_VALIDATE _IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate) /** * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel @@ -452,8 +433,7 @@ struct kbase_ioctl_mem_profile_add { __u32 padding; }; -#define KBASE_IOCTL_MEM_PROFILE_ADD \ - _IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add) +#define KBASE_IOCTL_MEM_PROFILE_ADD _IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add) /** * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource @@ -465,7 +445,7 @@ struct kbase_ioctl_sticky_resource_map { __u64 address; }; -#define KBASE_IOCTL_STICKY_RESOURCE_MAP \ +#define KBASE_IOCTL_STICKY_RESOURCE_MAP \ _IOW(KBASE_IOCTL_TYPE, 29, struct kbase_ioctl_sticky_resource_map) /** @@ -479,7 +459,7 @@ struct kbase_ioctl_sticky_resource_unmap { __u64 address; }; -#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \ +#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \ _IOW(KBASE_IOCTL_TYPE, 30, struct kbase_ioctl_sticky_resource_unmap) /** @@ -507,14 +487,12 @@ union kbase_ioctl_mem_find_gpu_start_and_offset { } out; }; -#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \ +#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \ _IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset) -#define KBASE_IOCTL_CINSTR_GWT_START \ - _IO(KBASE_IOCTL_TYPE, 33) +#define KBASE_IOCTL_CINSTR_GWT_START _IO(KBASE_IOCTL_TYPE, 33) -#define KBASE_IOCTL_CINSTR_GWT_STOP \ - _IO(KBASE_IOCTL_TYPE, 34) +#define KBASE_IOCTL_CINSTR_GWT_STOP _IO(KBASE_IOCTL_TYPE, 34) /** * union kbase_ioctl_cinstr_gwt_dump - Used to collect all GPU write fault @@ -547,8 +525,7 @@ union kbase_ioctl_cinstr_gwt_dump { } out; }; -#define KBASE_IOCTL_CINSTR_GWT_DUMP \ - _IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump) +#define KBASE_IOCTL_CINSTR_GWT_DUMP _IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump) /** * struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone @@ -559,8 +536,7 @@ struct kbase_ioctl_mem_exec_init { __u64 va_pages; }; -#define KBASE_IOCTL_MEM_EXEC_INIT \ - _IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init) +#define KBASE_IOCTL_MEM_EXEC_INIT _IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init) /** * union kbase_ioctl_get_cpu_gpu_timeinfo - Request zero or more types of @@ -589,7 +565,7 @@ union kbase_ioctl_get_cpu_gpu_timeinfo { } out; }; -#define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \ +#define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \ _IOWR(KBASE_IOCTL_TYPE, 50, union kbase_ioctl_get_cpu_gpu_timeinfo) /** @@ -601,7 +577,7 @@ struct kbase_ioctl_context_priority_check { __u8 priority; }; -#define KBASE_IOCTL_CONTEXT_PRIORITY_CHECK \ +#define KBASE_IOCTL_CONTEXT_PRIORITY_CHECK \ _IOWR(KBASE_IOCTL_TYPE, 54, struct kbase_ioctl_context_priority_check) /** @@ -613,7 +589,7 @@ struct kbase_ioctl_set_limited_core_count { __u8 max_core_count; }; -#define KBASE_IOCTL_SET_LIMITED_CORE_COUNT \ +#define KBASE_IOCTL_SET_LIMITED_CORE_COUNT \ _IOW(KBASE_IOCTL_TYPE, 55, struct kbase_ioctl_set_limited_core_count) /** @@ -634,7 +610,7 @@ struct kbase_ioctl_kinstr_prfcnt_enum_info { __u64 info_list_ptr; }; -#define KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO \ +#define KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO \ _IOWR(KBASE_IOCTL_TYPE, 56, struct kbase_ioctl_kinstr_prfcnt_enum_info) /** @@ -663,7 +639,7 @@ union kbase_ioctl_kinstr_prfcnt_setup { } out; }; -#define KBASE_IOCTL_KINSTR_PRFCNT_SETUP \ +#define KBASE_IOCTL_KINSTR_PRFCNT_SETUP \ _IOWR(KBASE_IOCTL_TYPE, 57, union kbase_ioctl_kinstr_prfcnt_setup) /*************** @@ -687,8 +663,7 @@ struct kbase_ioctl_tlstream_stats { __u32 bytes_generated; }; -#define KBASE_IOCTL_TLSTREAM_STATS \ - _IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats) +#define KBASE_IOCTL_TLSTREAM_STATS _IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats) #endif /* MALI_UNIT_TEST */ @@ -706,108 +681,107 @@ struct kbase_ioctl_tlstream_stats { * _IOWR(KBASE_IOCTL_EXTRA_TYPE, 0, struct my_ioctl_args) */ - /********************************** * Definitions for GPU properties * **********************************/ -#define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0) -#define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1) -#define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2) -#define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3) +#define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0) +#define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1) +#define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2) +#define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3) -#define KBASE_GPUPROP_PRODUCT_ID 1 -#define KBASE_GPUPROP_VERSION_STATUS 2 -#define KBASE_GPUPROP_MINOR_REVISION 3 -#define KBASE_GPUPROP_MAJOR_REVISION 4 +#define KBASE_GPUPROP_PRODUCT_ID 1 +#define KBASE_GPUPROP_VERSION_STATUS 2 +#define KBASE_GPUPROP_MINOR_REVISION 3 +#define KBASE_GPUPROP_MAJOR_REVISION 4 /* 5 previously used for GPU speed */ -#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX 6 +#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX 6 /* 7 previously used for minimum GPU speed */ -#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE 8 -#define KBASE_GPUPROP_TEXTURE_FEATURES_0 9 -#define KBASE_GPUPROP_TEXTURE_FEATURES_1 10 -#define KBASE_GPUPROP_TEXTURE_FEATURES_2 11 -#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE 12 +#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE 8 +#define KBASE_GPUPROP_TEXTURE_FEATURES_0 9 +#define KBASE_GPUPROP_TEXTURE_FEATURES_1 10 +#define KBASE_GPUPROP_TEXTURE_FEATURES_2 11 +#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE 12 -#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE 13 -#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE 14 -#define KBASE_GPUPROP_L2_NUM_L2_SLICES 15 +#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE 13 +#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE 14 +#define KBASE_GPUPROP_L2_NUM_L2_SLICES 15 -#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES 16 -#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS 17 +#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES 16 +#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS 17 -#define KBASE_GPUPROP_MAX_THREADS 18 -#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE 19 -#define KBASE_GPUPROP_MAX_BARRIER_SIZE 20 -#define KBASE_GPUPROP_MAX_REGISTERS 21 -#define KBASE_GPUPROP_MAX_TASK_QUEUE 22 -#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT 23 -#define KBASE_GPUPROP_IMPL_TECH 24 +#define KBASE_GPUPROP_MAX_THREADS 18 +#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE 19 +#define KBASE_GPUPROP_MAX_BARRIER_SIZE 20 +#define KBASE_GPUPROP_MAX_REGISTERS 21 +#define KBASE_GPUPROP_MAX_TASK_QUEUE 22 +#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT 23 +#define KBASE_GPUPROP_IMPL_TECH 24 -#define KBASE_GPUPROP_RAW_SHADER_PRESENT 25 -#define KBASE_GPUPROP_RAW_TILER_PRESENT 26 -#define KBASE_GPUPROP_RAW_L2_PRESENT 27 -#define KBASE_GPUPROP_RAW_STACK_PRESENT 28 -#define KBASE_GPUPROP_RAW_L2_FEATURES 29 -#define KBASE_GPUPROP_RAW_CORE_FEATURES 30 -#define KBASE_GPUPROP_RAW_MEM_FEATURES 31 -#define KBASE_GPUPROP_RAW_MMU_FEATURES 32 -#define KBASE_GPUPROP_RAW_AS_PRESENT 33 -#define KBASE_GPUPROP_RAW_JS_PRESENT 34 -#define KBASE_GPUPROP_RAW_JS_FEATURES_0 35 -#define KBASE_GPUPROP_RAW_JS_FEATURES_1 36 -#define KBASE_GPUPROP_RAW_JS_FEATURES_2 37 -#define KBASE_GPUPROP_RAW_JS_FEATURES_3 38 -#define KBASE_GPUPROP_RAW_JS_FEATURES_4 39 -#define KBASE_GPUPROP_RAW_JS_FEATURES_5 40 -#define KBASE_GPUPROP_RAW_JS_FEATURES_6 41 -#define KBASE_GPUPROP_RAW_JS_FEATURES_7 42 -#define KBASE_GPUPROP_RAW_JS_FEATURES_8 43 -#define KBASE_GPUPROP_RAW_JS_FEATURES_9 44 -#define KBASE_GPUPROP_RAW_JS_FEATURES_10 45 -#define KBASE_GPUPROP_RAW_JS_FEATURES_11 46 -#define KBASE_GPUPROP_RAW_JS_FEATURES_12 47 -#define KBASE_GPUPROP_RAW_JS_FEATURES_13 48 -#define KBASE_GPUPROP_RAW_JS_FEATURES_14 49 -#define KBASE_GPUPROP_RAW_JS_FEATURES_15 50 -#define KBASE_GPUPROP_RAW_TILER_FEATURES 51 -#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0 52 -#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1 53 -#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2 54 -#define KBASE_GPUPROP_RAW_GPU_ID 55 -#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS 56 -#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE 57 -#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE 58 -#define KBASE_GPUPROP_RAW_THREAD_FEATURES 59 -#define KBASE_GPUPROP_RAW_COHERENCY_MODE 60 +#define KBASE_GPUPROP_RAW_SHADER_PRESENT 25 +#define KBASE_GPUPROP_RAW_TILER_PRESENT 26 +#define KBASE_GPUPROP_RAW_L2_PRESENT 27 +#define KBASE_GPUPROP_RAW_STACK_PRESENT 28 +#define KBASE_GPUPROP_RAW_L2_FEATURES 29 +#define KBASE_GPUPROP_RAW_CORE_FEATURES 30 +#define KBASE_GPUPROP_RAW_MEM_FEATURES 31 +#define KBASE_GPUPROP_RAW_MMU_FEATURES 32 +#define KBASE_GPUPROP_RAW_AS_PRESENT 33 +#define KBASE_GPUPROP_RAW_JS_PRESENT 34 +#define KBASE_GPUPROP_RAW_JS_FEATURES_0 35 +#define KBASE_GPUPROP_RAW_JS_FEATURES_1 36 +#define KBASE_GPUPROP_RAW_JS_FEATURES_2 37 +#define KBASE_GPUPROP_RAW_JS_FEATURES_3 38 +#define KBASE_GPUPROP_RAW_JS_FEATURES_4 39 +#define KBASE_GPUPROP_RAW_JS_FEATURES_5 40 +#define KBASE_GPUPROP_RAW_JS_FEATURES_6 41 +#define KBASE_GPUPROP_RAW_JS_FEATURES_7 42 +#define KBASE_GPUPROP_RAW_JS_FEATURES_8 43 +#define KBASE_GPUPROP_RAW_JS_FEATURES_9 44 +#define KBASE_GPUPROP_RAW_JS_FEATURES_10 45 +#define KBASE_GPUPROP_RAW_JS_FEATURES_11 46 +#define KBASE_GPUPROP_RAW_JS_FEATURES_12 47 +#define KBASE_GPUPROP_RAW_JS_FEATURES_13 48 +#define KBASE_GPUPROP_RAW_JS_FEATURES_14 49 +#define KBASE_GPUPROP_RAW_JS_FEATURES_15 50 +#define KBASE_GPUPROP_RAW_TILER_FEATURES 51 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0 52 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1 53 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2 54 +#define KBASE_GPUPROP_RAW_GPU_ID 55 +#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS 56 +#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE 57 +#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE 58 +#define KBASE_GPUPROP_RAW_THREAD_FEATURES 59 +#define KBASE_GPUPROP_RAW_COHERENCY_MODE 60 -#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61 -#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62 -#define KBASE_GPUPROP_COHERENCY_COHERENCY 63 -#define KBASE_GPUPROP_COHERENCY_GROUP_0 64 -#define KBASE_GPUPROP_COHERENCY_GROUP_1 65 -#define KBASE_GPUPROP_COHERENCY_GROUP_2 66 -#define KBASE_GPUPROP_COHERENCY_GROUP_3 67 -#define KBASE_GPUPROP_COHERENCY_GROUP_4 68 -#define KBASE_GPUPROP_COHERENCY_GROUP_5 69 -#define KBASE_GPUPROP_COHERENCY_GROUP_6 70 -#define KBASE_GPUPROP_COHERENCY_GROUP_7 71 -#define KBASE_GPUPROP_COHERENCY_GROUP_8 72 -#define KBASE_GPUPROP_COHERENCY_GROUP_9 73 -#define KBASE_GPUPROP_COHERENCY_GROUP_10 74 -#define KBASE_GPUPROP_COHERENCY_GROUP_11 75 -#define KBASE_GPUPROP_COHERENCY_GROUP_12 76 -#define KBASE_GPUPROP_COHERENCY_GROUP_13 77 -#define KBASE_GPUPROP_COHERENCY_GROUP_14 78 -#define KBASE_GPUPROP_COHERENCY_GROUP_15 79 +#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61 +#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62 +#define KBASE_GPUPROP_COHERENCY_COHERENCY 63 +#define KBASE_GPUPROP_COHERENCY_GROUP_0 64 +#define KBASE_GPUPROP_COHERENCY_GROUP_1 65 +#define KBASE_GPUPROP_COHERENCY_GROUP_2 66 +#define KBASE_GPUPROP_COHERENCY_GROUP_3 67 +#define KBASE_GPUPROP_COHERENCY_GROUP_4 68 +#define KBASE_GPUPROP_COHERENCY_GROUP_5 69 +#define KBASE_GPUPROP_COHERENCY_GROUP_6 70 +#define KBASE_GPUPROP_COHERENCY_GROUP_7 71 +#define KBASE_GPUPROP_COHERENCY_GROUP_8 72 +#define KBASE_GPUPROP_COHERENCY_GROUP_9 73 +#define KBASE_GPUPROP_COHERENCY_GROUP_10 74 +#define KBASE_GPUPROP_COHERENCY_GROUP_11 75 +#define KBASE_GPUPROP_COHERENCY_GROUP_12 76 +#define KBASE_GPUPROP_COHERENCY_GROUP_13 77 +#define KBASE_GPUPROP_COHERENCY_GROUP_14 78 +#define KBASE_GPUPROP_COHERENCY_GROUP_15 79 -#define KBASE_GPUPROP_TEXTURE_FEATURES_3 80 -#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3 81 +#define KBASE_GPUPROP_TEXTURE_FEATURES_3 80 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3 81 -#define KBASE_GPUPROP_NUM_EXEC_ENGINES 82 +#define KBASE_GPUPROP_NUM_EXEC_ENGINES 82 -#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC 83 -#define KBASE_GPUPROP_TLS_ALLOC 84 -#define KBASE_GPUPROP_RAW_GPU_FEATURES 85 +#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC 83 +#define KBASE_GPUPROP_TLS_ALLOC 84 +#define KBASE_GPUPROP_RAW_GPU_FEATURES 85 #ifdef __cpluscplus } #endif