Merge remote branch 'android-4.19' of https://android.googlesource.com/kernel/common
* android-4.19: (2854 commits) ANDROID: move up spin_unlock_bh() ahead of remove_proc_entry() BACKPORT: arm64: tags: Preserve tags for addresses translated via TTBR1 UPSTREAM: arm64: memory: Implement __tag_set() as common function UPSTREAM: arm64/mm: fix variable 'tag' set but not used UPSTREAM: arm64: avoid clang warning about self-assignment ANDROID: sdcardfs: evict dentries on fscrypt key removal ANDROID: fscrypt: add key removal notifier chain ANDROID: refactor build.config files to remove duplication ANDROID: Move from clang r353983c to r365631c ANDROID: gki_defconfig: remove PWRSEQ_EMMC and PWRSEQ_SIMPLE ANDROID: unconditionally compile sig_ok in struct module Linux 4.19.80 perf/hw_breakpoint: Fix arch_hw_breakpoint use-before-initialization PCI: vmd: Fix config addressing when using bus offsets x86/asm: Fix MWAITX C-state hint value hwmon: Fix HWMON_P_MIN_ALARM mask tracing: Get trace_array reference for available_tracers files ftrace: Get a reference counter for the trace_array on filter files tracing/hwlat: Don't ignore outer-loop duration when calculating max_latency tracing/hwlat: Report total time spent in all NMIs during the sample ... Conflicts: drivers/clk/rockchip/clk-mmc-phase.c drivers/gpu/drm/rockchip/rockchip_drm_vop.c drivers/regulator/core.c drivers/tty/serial/8250/8250_port.c drivers/usb/dwc3/core.h drivers/usb/dwc3/gadget.c drivers/usb/dwc3/gadget.h Change-Id: I65599d770d6613caba14251b890fcfd1cfa0f100
This commit is contained in:
@@ -29,7 +29,7 @@ Contact: Bjørn Mork <bjorn@mork.no>
|
||||
Description:
|
||||
Unsigned integer.
|
||||
|
||||
Write a number ranging from 1 to 127 to add a qmap mux
|
||||
Write a number ranging from 1 to 254 to add a qmap mux
|
||||
based network device, supported by recent Qualcomm based
|
||||
modems.
|
||||
|
||||
@@ -46,5 +46,5 @@ Contact: Bjørn Mork <bjorn@mork.no>
|
||||
Description:
|
||||
Unsigned integer.
|
||||
|
||||
Write a number ranging from 1 to 127 to delete a previously
|
||||
Write a number ranging from 1 to 254 to delete a previously
|
||||
created qmap mux based network device.
|
||||
|
||||
76
Documentation/ABI/testing/sysfs-class-wakeup
Normal file
76
Documentation/ABI/testing/sysfs-class-wakeup
Normal file
@@ -0,0 +1,76 @@
|
||||
What: /sys/class/wakeup/
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
The /sys/class/wakeup/ directory contains pointers to all
|
||||
wakeup sources in the kernel at that moment in time.
|
||||
|
||||
What: /sys/class/wakeup/.../name
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
This file contains the name of the wakeup source.
|
||||
|
||||
What: /sys/class/wakeup/.../active_count
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
This file contains the number of times the wakeup source was
|
||||
activated.
|
||||
|
||||
What: /sys/class/wakeup/.../event_count
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
This file contains the number of signaled wakeup events
|
||||
associated with the wakeup source.
|
||||
|
||||
What: /sys/class/wakeup/.../wakeup_count
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
This file contains the number of times the wakeup source might
|
||||
abort suspend.
|
||||
|
||||
What: /sys/class/wakeup/.../expire_count
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
This file contains the number of times the wakeup source's
|
||||
timeout has expired.
|
||||
|
||||
What: /sys/class/wakeup/.../active_time_ms
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
This file contains the amount of time the wakeup source has
|
||||
been continuously active, in milliseconds. If the wakeup
|
||||
source is not active, this file contains '0'.
|
||||
|
||||
What: /sys/class/wakeup/.../total_time_ms
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
This file contains the total amount of time this wakeup source
|
||||
has been active, in milliseconds.
|
||||
|
||||
What: /sys/class/wakeup/.../max_time_ms
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
This file contains the maximum amount of time this wakeup
|
||||
source has been continuously active, in milliseconds.
|
||||
|
||||
What: /sys/class/wakeup/.../last_change_ms
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
This file contains the monotonic clock time when the wakeup
|
||||
source was touched last time, in milliseconds.
|
||||
|
||||
What: /sys/class/wakeup/.../prevent_suspend_time_ms
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
The file contains the total amount of time this wakeup source
|
||||
has been preventing autosleep, in milliseconds.
|
||||
@@ -243,3 +243,18 @@ Description:
|
||||
- Del: echo '[h/c]!extension' > /sys/fs/f2fs/<disk>/extension_list
|
||||
- [h] means add/del hot file extension
|
||||
- [c] means add/del cold file extension
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/unusable
|
||||
Date April 2019
|
||||
Contact: "Daniel Rosenberg" <drosen@google.com>
|
||||
Description:
|
||||
If checkpoint=disable, it displays the number of blocks that are unusable.
|
||||
If checkpoint=enable it displays the enumber of blocks that would be unusable
|
||||
if checkpoint=disable were to be set.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/encoding
|
||||
Date July 2019
|
||||
Contact: "Daniel Rosenberg" <drosen@google.com>
|
||||
Description:
|
||||
Displays name and version of the encoding set for the filesystem.
|
||||
If no encoding is set, displays (none)
|
||||
|
||||
@@ -301,3 +301,109 @@ Description:
|
||||
|
||||
Using this sysfs file will override any values that were
|
||||
set using the kernel command line for disk offset.
|
||||
|
||||
What: /sys/power/suspend_stats
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats directory contains suspend related
|
||||
statistics.
|
||||
|
||||
What: /sys/power/suspend_stats/success
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/success file contains the number
|
||||
of times entering system sleep state succeeded.
|
||||
|
||||
What: /sys/power/suspend_stats/fail
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/fail file contains the number
|
||||
of times entering system sleep state failed.
|
||||
|
||||
What: /sys/power/suspend_stats/failed_freeze
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/failed_freeze file contains the
|
||||
number of times freezing processes failed.
|
||||
|
||||
What: /sys/power/suspend_stats/failed_prepare
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/failed_prepare file contains the
|
||||
number of times preparing all non-sysdev devices for
|
||||
a system PM transition failed.
|
||||
|
||||
What: /sys/power/suspend_stats/failed_resume
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/failed_resume file contains the
|
||||
number of times executing "resume" callbacks of
|
||||
non-sysdev devices failed.
|
||||
|
||||
What: /sys/power/suspend_stats/failed_resume_early
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/failed_resume_early file contains
|
||||
the number of times executing "early resume" callbacks
|
||||
of devices failed.
|
||||
|
||||
What: /sys/power/suspend_stats/failed_resume_noirq
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/failed_resume_noirq file contains
|
||||
the number of times executing "noirq resume" callbacks
|
||||
of devices failed.
|
||||
|
||||
What: /sys/power/suspend_stats/failed_suspend
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/failed_suspend file contains
|
||||
the number of times executing "suspend" callbacks
|
||||
of all non-sysdev devices failed.
|
||||
|
||||
What: /sys/power/suspend_stats/failed_suspend_late
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/failed_suspend_late file contains
|
||||
the number of times executing "late suspend" callbacks
|
||||
of all devices failed.
|
||||
|
||||
What: /sys/power/suspend_stats/failed_suspend_noirq
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/failed_suspend_noirq file contains
|
||||
the number of times executing "noirq suspend" callbacks
|
||||
of all devices failed.
|
||||
|
||||
What: /sys/power/suspend_stats/last_failed_dev
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/last_failed_dev file contains
|
||||
the last device for which a suspend/resume callback failed.
|
||||
|
||||
What: /sys/power/suspend_stats/last_failed_errno
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/last_failed_errno file contains
|
||||
the errno of the last failed attempt at entering
|
||||
system sleep state.
|
||||
|
||||
What: /sys/power/suspend_stats/last_failed_step
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/last_failed_step file contains
|
||||
the last failed step in the suspend/resume path.
|
||||
|
||||
@@ -9,5 +9,6 @@ are configurable at compile, boot or run time.
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
spectre
|
||||
l1tf
|
||||
mds
|
||||
|
||||
769
Documentation/admin-guide/hw-vuln/spectre.rst
Normal file
769
Documentation/admin-guide/hw-vuln/spectre.rst
Normal file
@@ -0,0 +1,769 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
Spectre Side Channels
|
||||
=====================
|
||||
|
||||
Spectre is a class of side channel attacks that exploit branch prediction
|
||||
and speculative execution on modern CPUs to read memory, possibly
|
||||
bypassing access controls. Speculative execution side channel exploits
|
||||
do not modify memory but attempt to infer privileged data in the memory.
|
||||
|
||||
This document covers Spectre variant 1 and Spectre variant 2.
|
||||
|
||||
Affected processors
|
||||
-------------------
|
||||
|
||||
Speculative execution side channel methods affect a wide range of modern
|
||||
high performance processors, since most modern high speed processors
|
||||
use branch prediction and speculative execution.
|
||||
|
||||
The following CPUs are vulnerable:
|
||||
|
||||
- Intel Core, Atom, Pentium, and Xeon processors
|
||||
|
||||
- AMD Phenom, EPYC, and Zen processors
|
||||
|
||||
- IBM POWER and zSeries processors
|
||||
|
||||
- Higher end ARM processors
|
||||
|
||||
- Apple CPUs
|
||||
|
||||
- Higher end MIPS CPUs
|
||||
|
||||
- Likely most other high performance CPUs. Contact your CPU vendor for details.
|
||||
|
||||
Whether a processor is affected or not can be read out from the Spectre
|
||||
vulnerability files in sysfs. See :ref:`spectre_sys_info`.
|
||||
|
||||
Related CVEs
|
||||
------------
|
||||
|
||||
The following CVE entries describe Spectre variants:
|
||||
|
||||
============= ======================= ==========================
|
||||
CVE-2017-5753 Bounds check bypass Spectre variant 1
|
||||
CVE-2017-5715 Branch target injection Spectre variant 2
|
||||
CVE-2019-1125 Spectre v1 swapgs Spectre variant 1 (swapgs)
|
||||
============= ======================= ==========================
|
||||
|
||||
Problem
|
||||
-------
|
||||
|
||||
CPUs use speculative operations to improve performance. That may leave
|
||||
traces of memory accesses or computations in the processor's caches,
|
||||
buffers, and branch predictors. Malicious software may be able to
|
||||
influence the speculative execution paths, and then use the side effects
|
||||
of the speculative execution in the CPUs' caches and buffers to infer
|
||||
privileged data touched during the speculative execution.
|
||||
|
||||
Spectre variant 1 attacks take advantage of speculative execution of
|
||||
conditional branches, while Spectre variant 2 attacks use speculative
|
||||
execution of indirect branches to leak privileged memory.
|
||||
See :ref:`[1] <spec_ref1>` :ref:`[5] <spec_ref5>` :ref:`[7] <spec_ref7>`
|
||||
:ref:`[10] <spec_ref10>` :ref:`[11] <spec_ref11>`.
|
||||
|
||||
Spectre variant 1 (Bounds Check Bypass)
|
||||
---------------------------------------
|
||||
|
||||
The bounds check bypass attack :ref:`[2] <spec_ref2>` takes advantage
|
||||
of speculative execution that bypasses conditional branch instructions
|
||||
used for memory access bounds check (e.g. checking if the index of an
|
||||
array results in memory access within a valid range). This results in
|
||||
memory accesses to invalid memory (with out-of-bound index) that are
|
||||
done speculatively before validation checks resolve. Such speculative
|
||||
memory accesses can leave side effects, creating side channels which
|
||||
leak information to the attacker.
|
||||
|
||||
There are some extensions of Spectre variant 1 attacks for reading data
|
||||
over the network, see :ref:`[12] <spec_ref12>`. However such attacks
|
||||
are difficult, low bandwidth, fragile, and are considered low risk.
|
||||
|
||||
Note that, despite "Bounds Check Bypass" name, Spectre variant 1 is not
|
||||
only about user-controlled array bounds checks. It can affect any
|
||||
conditional checks. The kernel entry code interrupt, exception, and NMI
|
||||
handlers all have conditional swapgs checks. Those may be problematic
|
||||
in the context of Spectre v1, as kernel code can speculatively run with
|
||||
a user GS.
|
||||
|
||||
Spectre variant 2 (Branch Target Injection)
|
||||
-------------------------------------------
|
||||
|
||||
The branch target injection attack takes advantage of speculative
|
||||
execution of indirect branches :ref:`[3] <spec_ref3>`. The indirect
|
||||
branch predictors inside the processor used to guess the target of
|
||||
indirect branches can be influenced by an attacker, causing gadget code
|
||||
to be speculatively executed, thus exposing sensitive data touched by
|
||||
the victim. The side effects left in the CPU's caches during speculative
|
||||
execution can be measured to infer data values.
|
||||
|
||||
.. _poison_btb:
|
||||
|
||||
In Spectre variant 2 attacks, the attacker can steer speculative indirect
|
||||
branches in the victim to gadget code by poisoning the branch target
|
||||
buffer of a CPU used for predicting indirect branch addresses. Such
|
||||
poisoning could be done by indirect branching into existing code,
|
||||
with the address offset of the indirect branch under the attacker's
|
||||
control. Since the branch prediction on impacted hardware does not
|
||||
fully disambiguate branch address and uses the offset for prediction,
|
||||
this could cause privileged code's indirect branch to jump to a gadget
|
||||
code with the same offset.
|
||||
|
||||
The most useful gadgets take an attacker-controlled input parameter (such
|
||||
as a register value) so that the memory read can be controlled. Gadgets
|
||||
without input parameters might be possible, but the attacker would have
|
||||
very little control over what memory can be read, reducing the risk of
|
||||
the attack revealing useful data.
|
||||
|
||||
One other variant 2 attack vector is for the attacker to poison the
|
||||
return stack buffer (RSB) :ref:`[13] <spec_ref13>` to cause speculative
|
||||
subroutine return instruction execution to go to a gadget. An attacker's
|
||||
imbalanced subroutine call instructions might "poison" entries in the
|
||||
return stack buffer which are later consumed by a victim's subroutine
|
||||
return instructions. This attack can be mitigated by flushing the return
|
||||
stack buffer on context switch, or virtual machine (VM) exit.
|
||||
|
||||
On systems with simultaneous multi-threading (SMT), attacks are possible
|
||||
from the sibling thread, as level 1 cache and branch target buffer
|
||||
(BTB) may be shared between hardware threads in a CPU core. A malicious
|
||||
program running on the sibling thread may influence its peer's BTB to
|
||||
steer its indirect branch speculations to gadget code, and measure the
|
||||
speculative execution's side effects left in level 1 cache to infer the
|
||||
victim's data.
|
||||
|
||||
Attack scenarios
|
||||
----------------
|
||||
|
||||
The following list of attack scenarios have been anticipated, but may
|
||||
not cover all possible attack vectors.
|
||||
|
||||
1. A user process attacking the kernel
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Spectre variant 1
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
The attacker passes a parameter to the kernel via a register or
|
||||
via a known address in memory during a syscall. Such parameter may
|
||||
be used later by the kernel as an index to an array or to derive
|
||||
a pointer for a Spectre variant 1 attack. The index or pointer
|
||||
is invalid, but bound checks are bypassed in the code branch taken
|
||||
for speculative execution. This could cause privileged memory to be
|
||||
accessed and leaked.
|
||||
|
||||
For kernel code that has been identified where data pointers could
|
||||
potentially be influenced for Spectre attacks, new "nospec" accessor
|
||||
macros are used to prevent speculative loading of data.
|
||||
|
||||
Spectre variant 1 (swapgs)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
An attacker can train the branch predictor to speculatively skip the
|
||||
swapgs path for an interrupt or exception. If they initialize
|
||||
the GS register to a user-space value, if the swapgs is speculatively
|
||||
skipped, subsequent GS-related percpu accesses in the speculation
|
||||
window will be done with the attacker-controlled GS value. This
|
||||
could cause privileged memory to be accessed and leaked.
|
||||
|
||||
For example:
|
||||
|
||||
::
|
||||
|
||||
if (coming from user space)
|
||||
swapgs
|
||||
mov %gs:<percpu_offset>, %reg
|
||||
mov (%reg), %reg1
|
||||
|
||||
When coming from user space, the CPU can speculatively skip the
|
||||
swapgs, and then do a speculative percpu load using the user GS
|
||||
value. So the user can speculatively force a read of any kernel
|
||||
value. If a gadget exists which uses the percpu value as an address
|
||||
in another load/store, then the contents of the kernel value may
|
||||
become visible via an L1 side channel attack.
|
||||
|
||||
A similar attack exists when coming from kernel space. The CPU can
|
||||
speculatively do the swapgs, causing the user GS to get used for the
|
||||
rest of the speculative window.
|
||||
|
||||
Spectre variant 2
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
A spectre variant 2 attacker can :ref:`poison <poison_btb>` the branch
|
||||
target buffer (BTB) before issuing syscall to launch an attack.
|
||||
After entering the kernel, the kernel could use the poisoned branch
|
||||
target buffer on indirect jump and jump to gadget code in speculative
|
||||
execution.
|
||||
|
||||
If an attacker tries to control the memory addresses leaked during
|
||||
speculative execution, he would also need to pass a parameter to the
|
||||
gadget, either through a register or a known address in memory. After
|
||||
the gadget has executed, he can measure the side effect.
|
||||
|
||||
The kernel can protect itself against consuming poisoned branch
|
||||
target buffer entries by using return trampolines (also known as
|
||||
"retpoline") :ref:`[3] <spec_ref3>` :ref:`[9] <spec_ref9>` for all
|
||||
indirect branches. Return trampolines trap speculative execution paths
|
||||
to prevent jumping to gadget code during speculative execution.
|
||||
x86 CPUs with Enhanced Indirect Branch Restricted Speculation
|
||||
(Enhanced IBRS) available in hardware should use the feature to
|
||||
mitigate Spectre variant 2 instead of retpoline. Enhanced IBRS is
|
||||
more efficient than retpoline.
|
||||
|
||||
There may be gadget code in firmware which could be exploited with
|
||||
Spectre variant 2 attack by a rogue user process. To mitigate such
|
||||
attacks on x86, Indirect Branch Restricted Speculation (IBRS) feature
|
||||
is turned on before the kernel invokes any firmware code.
|
||||
|
||||
2. A user process attacking another user process
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
A malicious user process can try to attack another user process,
|
||||
either via a context switch on the same hardware thread, or from the
|
||||
sibling hyperthread sharing a physical processor core on simultaneous
|
||||
multi-threading (SMT) system.
|
||||
|
||||
Spectre variant 1 attacks generally require passing parameters
|
||||
between the processes, which needs a data passing relationship, such
|
||||
as remote procedure calls (RPC). Those parameters are used in gadget
|
||||
code to derive invalid data pointers accessing privileged memory in
|
||||
the attacked process.
|
||||
|
||||
Spectre variant 2 attacks can be launched from a rogue process by
|
||||
:ref:`poisoning <poison_btb>` the branch target buffer. This can
|
||||
influence the indirect branch targets for a victim process that either
|
||||
runs later on the same hardware thread, or running concurrently on
|
||||
a sibling hardware thread sharing the same physical core.
|
||||
|
||||
A user process can protect itself against Spectre variant 2 attacks
|
||||
by using the prctl() syscall to disable indirect branch speculation
|
||||
for itself. An administrator can also cordon off an unsafe process
|
||||
from polluting the branch target buffer by disabling the process's
|
||||
indirect branch speculation. This comes with a performance cost
|
||||
from not using indirect branch speculation and clearing the branch
|
||||
target buffer. When SMT is enabled on x86, for a process that has
|
||||
indirect branch speculation disabled, Single Threaded Indirect Branch
|
||||
Predictors (STIBP) :ref:`[4] <spec_ref4>` are turned on to prevent the
|
||||
sibling thread from controlling branch target buffer. In addition,
|
||||
the Indirect Branch Prediction Barrier (IBPB) is issued to clear the
|
||||
branch target buffer when context switching to and from such process.
|
||||
|
||||
On x86, the return stack buffer is stuffed on context switch.
|
||||
This prevents the branch target buffer from being used for branch
|
||||
prediction when the return stack buffer underflows while switching to
|
||||
a deeper call stack. Any poisoned entries in the return stack buffer
|
||||
left by the previous process will also be cleared.
|
||||
|
||||
User programs should use address space randomization to make attacks
|
||||
more difficult (Set /proc/sys/kernel/randomize_va_space = 1 or 2).
|
||||
|
||||
3. A virtualized guest attacking the host
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The attack mechanism is similar to how user processes attack the
|
||||
kernel. The kernel is entered via hyper-calls or other virtualization
|
||||
exit paths.
|
||||
|
||||
For Spectre variant 1 attacks, rogue guests can pass parameters
|
||||
(e.g. in registers) via hyper-calls to derive invalid pointers to
|
||||
speculate into privileged memory after entering the kernel. For places
|
||||
where such kernel code has been identified, nospec accessor macros
|
||||
are used to stop speculative memory access.
|
||||
|
||||
For Spectre variant 2 attacks, rogue guests can :ref:`poison
|
||||
<poison_btb>` the branch target buffer or return stack buffer, causing
|
||||
the kernel to jump to gadget code in the speculative execution paths.
|
||||
|
||||
To mitigate variant 2, the host kernel can use return trampolines
|
||||
for indirect branches to bypass the poisoned branch target buffer,
|
||||
and flushing the return stack buffer on VM exit. This prevents rogue
|
||||
guests from affecting indirect branching in the host kernel.
|
||||
|
||||
To protect host processes from rogue guests, host processes can have
|
||||
indirect branch speculation disabled via prctl(). The branch target
|
||||
buffer is cleared before context switching to such processes.
|
||||
|
||||
4. A virtualized guest attacking other guest
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
A rogue guest may attack another guest to get data accessible by the
|
||||
other guest.
|
||||
|
||||
Spectre variant 1 attacks are possible if parameters can be passed
|
||||
between guests. This may be done via mechanisms such as shared memory
|
||||
or message passing. Such parameters could be used to derive data
|
||||
pointers to privileged data in guest. The privileged data could be
|
||||
accessed by gadget code in the victim's speculation paths.
|
||||
|
||||
Spectre variant 2 attacks can be launched from a rogue guest by
|
||||
:ref:`poisoning <poison_btb>` the branch target buffer or the return
|
||||
stack buffer. Such poisoned entries could be used to influence
|
||||
speculation execution paths in the victim guest.
|
||||
|
||||
Linux kernel mitigates attacks to other guests running in the same
|
||||
CPU hardware thread by flushing the return stack buffer on VM exit,
|
||||
and clearing the branch target buffer before switching to a new guest.
|
||||
|
||||
If SMT is used, Spectre variant 2 attacks from an untrusted guest
|
||||
in the sibling hyperthread can be mitigated by the administrator,
|
||||
by turning off the unsafe guest's indirect branch speculation via
|
||||
prctl(). A guest can also protect itself by turning on microcode
|
||||
based mitigations (such as IBPB or STIBP on x86) within the guest.
|
||||
|
||||
.. _spectre_sys_info:
|
||||
|
||||
Spectre system information
|
||||
--------------------------
|
||||
|
||||
The Linux kernel provides a sysfs interface to enumerate the current
|
||||
mitigation status of the system for Spectre: whether the system is
|
||||
vulnerable, and which mitigations are active.
|
||||
|
||||
The sysfs file showing Spectre variant 1 mitigation status is:
|
||||
|
||||
/sys/devices/system/cpu/vulnerabilities/spectre_v1
|
||||
|
||||
The possible values in this file are:
|
||||
|
||||
.. list-table::
|
||||
|
||||
* - 'Not affected'
|
||||
- The processor is not vulnerable.
|
||||
* - 'Vulnerable: __user pointer sanitization and usercopy barriers only; no swapgs barriers'
|
||||
- The swapgs protections are disabled; otherwise it has
|
||||
protection in the kernel on a case by case base with explicit
|
||||
pointer sanitation and usercopy LFENCE barriers.
|
||||
* - 'Mitigation: usercopy/swapgs barriers and __user pointer sanitization'
|
||||
- Protection in the kernel on a case by case base with explicit
|
||||
pointer sanitation, usercopy LFENCE barriers, and swapgs LFENCE
|
||||
barriers.
|
||||
|
||||
However, the protections are put in place on a case by case basis,
|
||||
and there is no guarantee that all possible attack vectors for Spectre
|
||||
variant 1 are covered.
|
||||
|
||||
The spectre_v2 kernel file reports if the kernel has been compiled with
|
||||
retpoline mitigation or if the CPU has hardware mitigation, and if the
|
||||
CPU has support for additional process-specific mitigation.
|
||||
|
||||
This file also reports CPU features enabled by microcode to mitigate
|
||||
attack between user processes:
|
||||
|
||||
1. Indirect Branch Prediction Barrier (IBPB) to add additional
|
||||
isolation between processes of different users.
|
||||
2. Single Thread Indirect Branch Predictors (STIBP) to add additional
|
||||
isolation between CPU threads running on the same core.
|
||||
|
||||
These CPU features may impact performance when used and can be enabled
|
||||
per process on a case-by-case base.
|
||||
|
||||
The sysfs file showing Spectre variant 2 mitigation status is:
|
||||
|
||||
/sys/devices/system/cpu/vulnerabilities/spectre_v2
|
||||
|
||||
The possible values in this file are:
|
||||
|
||||
- Kernel status:
|
||||
|
||||
==================================== =================================
|
||||
'Not affected' The processor is not vulnerable
|
||||
'Vulnerable' Vulnerable, no mitigation
|
||||
'Mitigation: Full generic retpoline' Software-focused mitigation
|
||||
'Mitigation: Full AMD retpoline' AMD-specific software mitigation
|
||||
'Mitigation: Enhanced IBRS' Hardware-focused mitigation
|
||||
==================================== =================================
|
||||
|
||||
- Firmware status: Show if Indirect Branch Restricted Speculation (IBRS) is
|
||||
used to protect against Spectre variant 2 attacks when calling firmware (x86 only).
|
||||
|
||||
========== =============================================================
|
||||
'IBRS_FW' Protection against user program attacks when calling firmware
|
||||
========== =============================================================
|
||||
|
||||
- Indirect branch prediction barrier (IBPB) status for protection between
|
||||
processes of different users. This feature can be controlled through
|
||||
prctl() per process, or through kernel command line options. This is
|
||||
an x86 only feature. For more details see below.
|
||||
|
||||
=================== ========================================================
|
||||
'IBPB: disabled' IBPB unused
|
||||
'IBPB: always-on' Use IBPB on all tasks
|
||||
'IBPB: conditional' Use IBPB on SECCOMP or indirect branch restricted tasks
|
||||
=================== ========================================================
|
||||
|
||||
- Single threaded indirect branch prediction (STIBP) status for protection
|
||||
between different hyper threads. This feature can be controlled through
|
||||
prctl per process, or through kernel command line options. This is x86
|
||||
only feature. For more details see below.
|
||||
|
||||
==================== ========================================================
|
||||
'STIBP: disabled' STIBP unused
|
||||
'STIBP: forced' Use STIBP on all tasks
|
||||
'STIBP: conditional' Use STIBP on SECCOMP or indirect branch restricted tasks
|
||||
==================== ========================================================
|
||||
|
||||
- Return stack buffer (RSB) protection status:
|
||||
|
||||
============= ===========================================
|
||||
'RSB filling' Protection of RSB on context switch enabled
|
||||
============= ===========================================
|
||||
|
||||
Full mitigation might require a microcode update from the CPU
|
||||
vendor. When the necessary microcode is not available, the kernel will
|
||||
report vulnerability.
|
||||
|
||||
Turning on mitigation for Spectre variant 1 and Spectre variant 2
|
||||
-----------------------------------------------------------------
|
||||
|
||||
1. Kernel mitigation
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Spectre variant 1
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
For the Spectre variant 1, vulnerable kernel code (as determined
|
||||
by code audit or scanning tools) is annotated on a case by case
|
||||
basis to use nospec accessor macros for bounds clipping :ref:`[2]
|
||||
<spec_ref2>` to avoid any usable disclosure gadgets. However, it may
|
||||
not cover all attack vectors for Spectre variant 1.
|
||||
|
||||
Copy-from-user code has an LFENCE barrier to prevent the access_ok()
|
||||
check from being mis-speculated. The barrier is done by the
|
||||
barrier_nospec() macro.
|
||||
|
||||
For the swapgs variant of Spectre variant 1, LFENCE barriers are
|
||||
added to interrupt, exception and NMI entry where needed. These
|
||||
barriers are done by the FENCE_SWAPGS_KERNEL_ENTRY and
|
||||
FENCE_SWAPGS_USER_ENTRY macros.
|
||||
|
||||
Spectre variant 2
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
For Spectre variant 2 mitigation, the compiler turns indirect calls or
|
||||
jumps in the kernel into equivalent return trampolines (retpolines)
|
||||
:ref:`[3] <spec_ref3>` :ref:`[9] <spec_ref9>` to go to the target
|
||||
addresses. Speculative execution paths under retpolines are trapped
|
||||
in an infinite loop to prevent any speculative execution jumping to
|
||||
a gadget.
|
||||
|
||||
To turn on retpoline mitigation on a vulnerable CPU, the kernel
|
||||
needs to be compiled with a gcc compiler that supports the
|
||||
-mindirect-branch=thunk-extern -mindirect-branch-register options.
|
||||
If the kernel is compiled with a Clang compiler, the compiler needs
|
||||
to support -mretpoline-external-thunk option. The kernel config
|
||||
CONFIG_RETPOLINE needs to be turned on, and the CPU needs to run with
|
||||
the latest updated microcode.
|
||||
|
||||
On Intel Skylake-era systems the mitigation covers most, but not all,
|
||||
cases. See :ref:`[3] <spec_ref3>` for more details.
|
||||
|
||||
On CPUs with hardware mitigation for Spectre variant 2 (e.g. Enhanced
|
||||
IBRS on x86), retpoline is automatically disabled at run time.
|
||||
|
||||
The retpoline mitigation is turned on by default on vulnerable
|
||||
CPUs. It can be forced on or off by the administrator
|
||||
via the kernel command line and sysfs control files. See
|
||||
:ref:`spectre_mitigation_control_command_line`.
|
||||
|
||||
On x86, indirect branch restricted speculation is turned on by default
|
||||
before invoking any firmware code to prevent Spectre variant 2 exploits
|
||||
using the firmware.
|
||||
|
||||
Using kernel address space randomization (CONFIG_RANDOMIZE_SLAB=y
|
||||
and CONFIG_SLAB_FREELIST_RANDOM=y in the kernel configuration) makes
|
||||
attacks on the kernel generally more difficult.
|
||||
|
||||
2. User program mitigation
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
User programs can mitigate Spectre variant 1 using LFENCE or "bounds
|
||||
clipping". For more details see :ref:`[2] <spec_ref2>`.
|
||||
|
||||
For Spectre variant 2 mitigation, individual user programs
|
||||
can be compiled with return trampolines for indirect branches.
|
||||
This protects them from consuming poisoned entries in the branch
|
||||
target buffer left by malicious software. Alternatively, the
|
||||
programs can disable their indirect branch speculation via prctl()
|
||||
(See :ref:`Documentation/userspace-api/spec_ctrl.rst <set_spec_ctrl>`).
|
||||
On x86, this will turn on STIBP to guard against attacks from the
|
||||
sibling thread when the user program is running, and use IBPB to
|
||||
flush the branch target buffer when switching to/from the program.
|
||||
|
||||
Restricting indirect branch speculation on a user program will
|
||||
also prevent the program from launching a variant 2 attack
|
||||
on x86. All sand-boxed SECCOMP programs have indirect branch
|
||||
speculation restricted by default. Administrators can change
|
||||
that behavior via the kernel command line and sysfs control files.
|
||||
See :ref:`spectre_mitigation_control_command_line`.
|
||||
|
||||
Programs that disable their indirect branch speculation will have
|
||||
more overhead and run slower.
|
||||
|
||||
User programs should use address space randomization
|
||||
(/proc/sys/kernel/randomize_va_space = 1 or 2) to make attacks more
|
||||
difficult.
|
||||
|
||||
3. VM mitigation
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
Within the kernel, Spectre variant 1 attacks from rogue guests are
|
||||
mitigated on a case by case basis in VM exit paths. Vulnerable code
|
||||
uses nospec accessor macros for "bounds clipping", to avoid any
|
||||
usable disclosure gadgets. However, this may not cover all variant
|
||||
1 attack vectors.
|
||||
|
||||
For Spectre variant 2 attacks from rogue guests to the kernel, the
|
||||
Linux kernel uses retpoline or Enhanced IBRS to prevent consumption of
|
||||
poisoned entries in branch target buffer left by rogue guests. It also
|
||||
flushes the return stack buffer on every VM exit to prevent a return
|
||||
stack buffer underflow so poisoned branch target buffer could be used,
|
||||
or attacker guests leaving poisoned entries in the return stack buffer.
|
||||
|
||||
To mitigate guest-to-guest attacks in the same CPU hardware thread,
|
||||
the branch target buffer is sanitized by flushing before switching
|
||||
to a new guest on a CPU.
|
||||
|
||||
The above mitigations are turned on by default on vulnerable CPUs.
|
||||
|
||||
To mitigate guest-to-guest attacks from sibling thread when SMT is
|
||||
in use, an untrusted guest running in the sibling thread can have
|
||||
its indirect branch speculation disabled by administrator via prctl().
|
||||
|
||||
The kernel also allows guests to use any microcode based mitigation
|
||||
they choose to use (such as IBPB or STIBP on x86) to protect themselves.
|
||||
|
||||
.. _spectre_mitigation_control_command_line:
|
||||
|
||||
Mitigation control on the kernel command line
|
||||
---------------------------------------------
|
||||
|
||||
Spectre variant 2 mitigation can be disabled or force enabled at the
|
||||
kernel command line.
|
||||
|
||||
nospectre_v1
|
||||
|
||||
[X86,PPC] Disable mitigations for Spectre Variant 1
|
||||
(bounds check bypass). With this option data leaks are
|
||||
possible in the system.
|
||||
|
||||
nospectre_v2
|
||||
|
||||
[X86] Disable all mitigations for the Spectre variant 2
|
||||
(indirect branch prediction) vulnerability. System may
|
||||
allow data leaks with this option, which is equivalent
|
||||
to spectre_v2=off.
|
||||
|
||||
|
||||
spectre_v2=
|
||||
|
||||
[X86] Control mitigation of Spectre variant 2
|
||||
(indirect branch speculation) vulnerability.
|
||||
The default operation protects the kernel from
|
||||
user space attacks.
|
||||
|
||||
on
|
||||
unconditionally enable, implies
|
||||
spectre_v2_user=on
|
||||
off
|
||||
unconditionally disable, implies
|
||||
spectre_v2_user=off
|
||||
auto
|
||||
kernel detects whether your CPU model is
|
||||
vulnerable
|
||||
|
||||
Selecting 'on' will, and 'auto' may, choose a
|
||||
mitigation method at run time according to the
|
||||
CPU, the available microcode, the setting of the
|
||||
CONFIG_RETPOLINE configuration option, and the
|
||||
compiler with which the kernel was built.
|
||||
|
||||
Selecting 'on' will also enable the mitigation
|
||||
against user space to user space task attacks.
|
||||
|
||||
Selecting 'off' will disable both the kernel and
|
||||
the user space protections.
|
||||
|
||||
Specific mitigations can also be selected manually:
|
||||
|
||||
retpoline
|
||||
replace indirect branches
|
||||
retpoline,generic
|
||||
google's original retpoline
|
||||
retpoline,amd
|
||||
AMD-specific minimal thunk
|
||||
|
||||
Not specifying this option is equivalent to
|
||||
spectre_v2=auto.
|
||||
|
||||
For user space mitigation:
|
||||
|
||||
spectre_v2_user=
|
||||
|
||||
[X86] Control mitigation of Spectre variant 2
|
||||
(indirect branch speculation) vulnerability between
|
||||
user space tasks
|
||||
|
||||
on
|
||||
Unconditionally enable mitigations. Is
|
||||
enforced by spectre_v2=on
|
||||
|
||||
off
|
||||
Unconditionally disable mitigations. Is
|
||||
enforced by spectre_v2=off
|
||||
|
||||
prctl
|
||||
Indirect branch speculation is enabled,
|
||||
but mitigation can be enabled via prctl
|
||||
per thread. The mitigation control state
|
||||
is inherited on fork.
|
||||
|
||||
prctl,ibpb
|
||||
Like "prctl" above, but only STIBP is
|
||||
controlled per thread. IBPB is issued
|
||||
always when switching between different user
|
||||
space processes.
|
||||
|
||||
seccomp
|
||||
Same as "prctl" above, but all seccomp
|
||||
threads will enable the mitigation unless
|
||||
they explicitly opt out.
|
||||
|
||||
seccomp,ibpb
|
||||
Like "seccomp" above, but only STIBP is
|
||||
controlled per thread. IBPB is issued
|
||||
always when switching between different
|
||||
user space processes.
|
||||
|
||||
auto
|
||||
Kernel selects the mitigation depending on
|
||||
the available CPU features and vulnerability.
|
||||
|
||||
Default mitigation:
|
||||
If CONFIG_SECCOMP=y then "seccomp", otherwise "prctl"
|
||||
|
||||
Not specifying this option is equivalent to
|
||||
spectre_v2_user=auto.
|
||||
|
||||
In general the kernel by default selects
|
||||
reasonable mitigations for the current CPU. To
|
||||
disable Spectre variant 2 mitigations, boot with
|
||||
spectre_v2=off. Spectre variant 1 mitigations
|
||||
cannot be disabled.
|
||||
|
||||
Mitigation selection guide
|
||||
--------------------------
|
||||
|
||||
1. Trusted userspace
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
If all userspace applications are from trusted sources and do not
|
||||
execute externally supplied untrusted code, then the mitigations can
|
||||
be disabled.
|
||||
|
||||
2. Protect sensitive programs
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
For security-sensitive programs that have secrets (e.g. crypto
|
||||
keys), protection against Spectre variant 2 can be put in place by
|
||||
disabling indirect branch speculation when the program is running
|
||||
(See :ref:`Documentation/userspace-api/spec_ctrl.rst <set_spec_ctrl>`).
|
||||
|
||||
3. Sandbox untrusted programs
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Untrusted programs that could be a source of attacks can be cordoned
|
||||
off by disabling their indirect branch speculation when they are run
|
||||
(See :ref:`Documentation/userspace-api/spec_ctrl.rst <set_spec_ctrl>`).
|
||||
This prevents untrusted programs from polluting the branch target
|
||||
buffer. All programs running in SECCOMP sandboxes have indirect
|
||||
branch speculation restricted by default. This behavior can be
|
||||
changed via the kernel command line and sysfs control files. See
|
||||
:ref:`spectre_mitigation_control_command_line`.
|
||||
|
||||
3. High security mode
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
All Spectre variant 2 mitigations can be forced on
|
||||
at boot time for all programs (See the "on" option in
|
||||
:ref:`spectre_mitigation_control_command_line`). This will add
|
||||
overhead as indirect branch speculations for all programs will be
|
||||
restricted.
|
||||
|
||||
On x86, branch target buffer will be flushed with IBPB when switching
|
||||
to a new program. STIBP is left on all the time to protect programs
|
||||
against variant 2 attacks originating from programs running on
|
||||
sibling threads.
|
||||
|
||||
Alternatively, STIBP can be used only when running programs
|
||||
whose indirect branch speculation is explicitly disabled,
|
||||
while IBPB is still used all the time when switching to a new
|
||||
program to clear the branch target buffer (See "ibpb" option in
|
||||
:ref:`spectre_mitigation_control_command_line`). This "ibpb" option
|
||||
has less performance cost than the "on" option, which leaves STIBP
|
||||
on all the time.
|
||||
|
||||
References on Spectre
|
||||
---------------------
|
||||
|
||||
Intel white papers:
|
||||
|
||||
.. _spec_ref1:
|
||||
|
||||
[1] `Intel analysis of speculative execution side channels <https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/Intel-Analysis-of-Speculative-Execution-Side-Channels.pdf>`_.
|
||||
|
||||
.. _spec_ref2:
|
||||
|
||||
[2] `Bounds check bypass <https://software.intel.com/security-software-guidance/software-guidance/bounds-check-bypass>`_.
|
||||
|
||||
.. _spec_ref3:
|
||||
|
||||
[3] `Deep dive: Retpoline: A branch target injection mitigation <https://software.intel.com/security-software-guidance/insights/deep-dive-retpoline-branch-target-injection-mitigation>`_.
|
||||
|
||||
.. _spec_ref4:
|
||||
|
||||
[4] `Deep Dive: Single Thread Indirect Branch Predictors <https://software.intel.com/security-software-guidance/insights/deep-dive-single-thread-indirect-branch-predictors>`_.
|
||||
|
||||
AMD white papers:
|
||||
|
||||
.. _spec_ref5:
|
||||
|
||||
[5] `AMD64 technology indirect branch control extension <https://developer.amd.com/wp-content/resources/Architecture_Guidelines_Update_Indirect_Branch_Control.pdf>`_.
|
||||
|
||||
.. _spec_ref6:
|
||||
|
||||
[6] `Software techniques for managing speculation on AMD processors <https://developer.amd.com/wp-content/resources/90343-B_SoftwareTechniquesforManagingSpeculation_WP_7-18Update_FNL.pdf>`_.
|
||||
|
||||
ARM white papers:
|
||||
|
||||
.. _spec_ref7:
|
||||
|
||||
[7] `Cache speculation side-channels <https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability/download-the-whitepaper>`_.
|
||||
|
||||
.. _spec_ref8:
|
||||
|
||||
[8] `Cache speculation issues update <https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability/latest-updates/cache-speculation-issues-update>`_.
|
||||
|
||||
Google white paper:
|
||||
|
||||
.. _spec_ref9:
|
||||
|
||||
[9] `Retpoline: a software construct for preventing branch-target-injection <https://support.google.com/faqs/answer/7625886>`_.
|
||||
|
||||
MIPS white paper:
|
||||
|
||||
.. _spec_ref10:
|
||||
|
||||
[10] `MIPS: response on speculative execution and side channel vulnerabilities <https://www.mips.com/blog/mips-response-on-speculative-execution-and-side-channel-vulnerabilities/>`_.
|
||||
|
||||
Academic papers:
|
||||
|
||||
.. _spec_ref11:
|
||||
|
||||
[11] `Spectre Attacks: Exploiting Speculative Execution <https://spectreattack.com/spectre.pdf>`_.
|
||||
|
||||
.. _spec_ref12:
|
||||
|
||||
[12] `NetSpectre: Read Arbitrary Memory over Network <https://arxiv.org/abs/1807.10535>`_.
|
||||
|
||||
.. _spec_ref13:
|
||||
|
||||
[13] `Spectre Returns! Speculation Attacks using the Return Stack Buffer <https://www.usenix.org/system/files/conference/woot18/woot18-paper-koruyeh.pdf>`_.
|
||||
@@ -126,6 +126,7 @@ parameter is applicable::
|
||||
NET Appropriate network support is enabled.
|
||||
NUMA NUMA support is enabled.
|
||||
NFS Appropriate NFS support is enabled.
|
||||
OF Devicetree is enabled.
|
||||
OSS OSS sound support is enabled.
|
||||
PV_OPS A paravirtualized kernel is enabled.
|
||||
PARIDE The ParIDE (parallel port IDE) subsystem is enabled.
|
||||
|
||||
@@ -1631,6 +1631,15 @@
|
||||
|
||||
initrd= [BOOT] Specify the location of the initial ramdisk
|
||||
|
||||
init_on_alloc= [MM] Fill newly allocated pages and heap objects with
|
||||
zeroes.
|
||||
Format: 0 | 1
|
||||
Default set by CONFIG_INIT_ON_ALLOC_DEFAULT_ON.
|
||||
|
||||
init_on_free= [MM] Fill freed pages and heap objects with zeroes.
|
||||
Format: 0 | 1
|
||||
Default set by CONFIG_INIT_ON_FREE_DEFAULT_ON.
|
||||
|
||||
init_pkru= [x86] Specify the default memory protection keys rights
|
||||
register contents for all processes. 0x55555554 by
|
||||
default (disallow access to all but pkey 0). Can
|
||||
@@ -2503,8 +2512,8 @@
|
||||
http://repo.or.cz/w/linux-2.6/mini2440.git
|
||||
|
||||
mitigations=
|
||||
[X86,PPC,S390] Control optional mitigations for CPU
|
||||
vulnerabilities. This is a set of curated,
|
||||
[X86,PPC,S390,ARM64] Control optional mitigations for
|
||||
CPU vulnerabilities. This is a set of curated,
|
||||
arch-independent options, each of which is an
|
||||
aggregation of existing arch-specific options.
|
||||
|
||||
@@ -2513,11 +2522,14 @@
|
||||
improves system performance, but it may also
|
||||
expose users to several CPU vulnerabilities.
|
||||
Equivalent to: nopti [X86,PPC]
|
||||
kpti=0 [ARM64]
|
||||
nospectre_v1 [PPC]
|
||||
nobp=0 [S390]
|
||||
nospectre_v2 [X86,PPC,S390]
|
||||
nospectre_v1 [X86]
|
||||
nospectre_v2 [X86,PPC,S390,ARM64]
|
||||
spectre_v2_user=off [X86]
|
||||
spec_store_bypass_disable=off [X86,PPC]
|
||||
ssbd=force-off [ARM64]
|
||||
l1tf=off [X86]
|
||||
mds=off [X86]
|
||||
|
||||
@@ -2861,14 +2873,14 @@
|
||||
nosmt=force: Force disable SMT, cannot be undone
|
||||
via the sysfs control file.
|
||||
|
||||
nospectre_v1 [PPC] Disable mitigations for Spectre Variant 1 (bounds
|
||||
check bypass). With this option data leaks are possible
|
||||
in the system.
|
||||
nospectre_v1 [X66, PPC] Disable mitigations for Spectre Variant 1
|
||||
(bounds check bypass). With this option data leaks
|
||||
are possible in the system.
|
||||
|
||||
nospectre_v2 [X86,PPC_FSL_BOOK3E] Disable all mitigations for the Spectre variant 2
|
||||
(indirect branch prediction) vulnerability. System may
|
||||
allow data leaks with this option, which is equivalent
|
||||
to spectre_v2=off.
|
||||
nospectre_v2 [X86,PPC_FSL_BOOK3E,ARM64] Disable all mitigations for
|
||||
the Spectre variant 2 (indirect branch prediction)
|
||||
vulnerability. System may allow data leaks with this
|
||||
option.
|
||||
|
||||
nospec_store_bypass_disable
|
||||
[HW] Disable all mitigations for the Speculative Store Bypass vulnerability
|
||||
@@ -3066,6 +3078,12 @@
|
||||
This can be set from sysctl after boot.
|
||||
See Documentation/sysctl/vm.txt for details.
|
||||
|
||||
of_devlink [OF, KNL] Create device links between consumer and
|
||||
supplier devices by scanning the devictree to infer the
|
||||
consumer/supplier relationships. A consumer device
|
||||
will not be probed until all the supplier devices have
|
||||
probed successfully.
|
||||
|
||||
ohci1394_dma=early [HW] enable debugging via the ohci1394 driver.
|
||||
See Documentation/debugging-via-ohci1394.txt for more
|
||||
info.
|
||||
@@ -3951,6 +3969,13 @@
|
||||
Run specified binary instead of /init from the ramdisk,
|
||||
used for early userspace startup. See initrd.
|
||||
|
||||
rdrand= [X86]
|
||||
force - Override the decision by the kernel to hide the
|
||||
advertisement of RDRAND support (this affects
|
||||
certain AMD processors because of buggy BIOS
|
||||
support, specifically around the suspend/resume
|
||||
path).
|
||||
|
||||
rdt= [HW,X86,RDT]
|
||||
Turn on/off individual RDT features. List is:
|
||||
cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp,
|
||||
@@ -4980,12 +5005,6 @@
|
||||
emulate [default] Vsyscalls turn into traps and are
|
||||
emulated reasonably safely.
|
||||
|
||||
native Vsyscalls are native syscall instructions.
|
||||
This is a little bit faster than trapping
|
||||
and makes a few dynamic recompilers work
|
||||
better than they would in emulation mode.
|
||||
It also makes exploits much easier to write.
|
||||
|
||||
none Vsyscalls don't work at all. This makes
|
||||
them quite hard to use for exploits but
|
||||
might break your system.
|
||||
|
||||
@@ -178,3 +178,7 @@ HWCAP_ILRCPC
|
||||
HWCAP_FLAGM
|
||||
|
||||
Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001.
|
||||
|
||||
HWCAP_SSBS
|
||||
|
||||
Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010.
|
||||
|
||||
156
Documentation/arm64/tagged-address-abi.rst
Normal file
156
Documentation/arm64/tagged-address-abi.rst
Normal file
@@ -0,0 +1,156 @@
|
||||
==========================
|
||||
AArch64 TAGGED ADDRESS ABI
|
||||
==========================
|
||||
|
||||
Authors: Vincenzo Frascino <vincenzo.frascino@arm.com>
|
||||
Catalin Marinas <catalin.marinas@arm.com>
|
||||
|
||||
Date: 21 August 2019
|
||||
|
||||
This document describes the usage and semantics of the Tagged Address
|
||||
ABI on AArch64 Linux.
|
||||
|
||||
1. Introduction
|
||||
---------------
|
||||
|
||||
On AArch64 the ``TCR_EL1.TBI0`` bit is set by default, allowing
|
||||
userspace (EL0) to perform memory accesses through 64-bit pointers with
|
||||
a non-zero top byte. This document describes the relaxation of the
|
||||
syscall ABI that allows userspace to pass certain tagged pointers to
|
||||
kernel syscalls.
|
||||
|
||||
2. AArch64 Tagged Address ABI
|
||||
-----------------------------
|
||||
|
||||
From the kernel syscall interface perspective and for the purposes of
|
||||
this document, a "valid tagged pointer" is a pointer with a potentially
|
||||
non-zero top-byte that references an address in the user process address
|
||||
space obtained in one of the following ways:
|
||||
|
||||
- ``mmap()`` syscall where either:
|
||||
|
||||
- flags have the ``MAP_ANONYMOUS`` bit set or
|
||||
- the file descriptor refers to a regular file (including those
|
||||
returned by ``memfd_create()``) or ``/dev/zero``
|
||||
|
||||
- ``brk()`` syscall (i.e. the heap area between the initial location of
|
||||
the program break at process creation and its current location).
|
||||
|
||||
- any memory mapped by the kernel in the address space of the process
|
||||
during creation and with the same restrictions as for ``mmap()`` above
|
||||
(e.g. data, bss, stack).
|
||||
|
||||
The AArch64 Tagged Address ABI has two stages of relaxation depending
|
||||
how the user addresses are used by the kernel:
|
||||
|
||||
1. User addresses not accessed by the kernel but used for address space
|
||||
management (e.g. ``mmap()``, ``mprotect()``, ``madvise()``). The use
|
||||
of valid tagged pointers in this context is always allowed.
|
||||
|
||||
2. User addresses accessed by the kernel (e.g. ``write()``). This ABI
|
||||
relaxation is disabled by default and the application thread needs to
|
||||
explicitly enable it via ``prctl()`` as follows:
|
||||
|
||||
- ``PR_SET_TAGGED_ADDR_CTRL``: enable or disable the AArch64 Tagged
|
||||
Address ABI for the calling thread.
|
||||
|
||||
The ``(unsigned int) arg2`` argument is a bit mask describing the
|
||||
control mode used:
|
||||
|
||||
- ``PR_TAGGED_ADDR_ENABLE``: enable AArch64 Tagged Address ABI.
|
||||
Default status is disabled.
|
||||
|
||||
Arguments ``arg3``, ``arg4``, and ``arg5`` must be 0.
|
||||
|
||||
- ``PR_GET_TAGGED_ADDR_CTRL``: get the status of the AArch64 Tagged
|
||||
Address ABI for the calling thread.
|
||||
|
||||
Arguments ``arg2``, ``arg3``, ``arg4``, and ``arg5`` must be 0.
|
||||
|
||||
The ABI properties described above are thread-scoped, inherited on
|
||||
clone() and fork() and cleared on exec().
|
||||
|
||||
Calling ``prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0)``
|
||||
returns ``-EINVAL`` if the AArch64 Tagged Address ABI is globally
|
||||
disabled by ``sysctl abi.tagged_addr_disabled=1``. The default
|
||||
``sysctl abi.tagged_addr_disabled`` configuration is 0.
|
||||
|
||||
When the AArch64 Tagged Address ABI is enabled for a thread, the
|
||||
following behaviours are guaranteed:
|
||||
|
||||
- All syscalls except the cases mentioned in section 3 can accept any
|
||||
valid tagged pointer.
|
||||
|
||||
- The syscall behaviour is undefined for invalid tagged pointers: it may
|
||||
result in an error code being returned, a (fatal) signal being raised,
|
||||
or other modes of failure.
|
||||
|
||||
- The syscall behaviour for a valid tagged pointer is the same as for
|
||||
the corresponding untagged pointer.
|
||||
|
||||
|
||||
A definition of the meaning of tagged pointers on AArch64 can be found
|
||||
in Documentation/arm64/tagged-pointers.rst.
|
||||
|
||||
3. AArch64 Tagged Address ABI Exceptions
|
||||
-----------------------------------------
|
||||
|
||||
The following system call parameters must be untagged regardless of the
|
||||
ABI relaxation:
|
||||
|
||||
- ``prctl()`` other than pointers to user data either passed directly or
|
||||
indirectly as arguments to be accessed by the kernel.
|
||||
|
||||
- ``ioctl()`` other than pointers to user data either passed directly or
|
||||
indirectly as arguments to be accessed by the kernel.
|
||||
|
||||
- ``shmat()`` and ``shmdt()``.
|
||||
|
||||
Any attempt to use non-zero tagged pointers may result in an error code
|
||||
being returned, a (fatal) signal being raised, or other modes of
|
||||
failure.
|
||||
|
||||
4. Example of correct usage
|
||||
---------------------------
|
||||
.. code-block:: c
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/prctl.h>
|
||||
|
||||
#define PR_SET_TAGGED_ADDR_CTRL 55
|
||||
#define PR_TAGGED_ADDR_ENABLE (1UL << 0)
|
||||
|
||||
#define TAG_SHIFT 56
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int tbi_enabled = 0;
|
||||
unsigned long tag = 0;
|
||||
char *ptr;
|
||||
|
||||
/* check/enable the tagged address ABI */
|
||||
if (!prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0))
|
||||
tbi_enabled = 1;
|
||||
|
||||
/* memory allocation */
|
||||
ptr = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (ptr == MAP_FAILED)
|
||||
return 1;
|
||||
|
||||
/* set a non-zero tag if the ABI is available */
|
||||
if (tbi_enabled)
|
||||
tag = rand() & 0xff;
|
||||
ptr = (char *)((unsigned long)ptr | (tag << TAG_SHIFT));
|
||||
|
||||
/* memory access to a tagged address */
|
||||
strcpy(ptr, "tagged pointer\n");
|
||||
|
||||
/* syscall with a tagged pointer */
|
||||
write(1, ptr, strlen(ptr));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -18,7 +18,9 @@ Passing tagged addresses to the kernel
|
||||
--------------------------------------
|
||||
|
||||
All interpretation of userspace memory addresses by the kernel assumes
|
||||
an address tag of 0x00.
|
||||
an address tag of 0x00, unless the application enables the AArch64
|
||||
Tagged Address ABI explicitly
|
||||
(Documentation/arm64/tagged-address-abi.rst).
|
||||
|
||||
This includes, but is not limited to, addresses found in:
|
||||
|
||||
@@ -31,13 +33,15 @@ This includes, but is not limited to, addresses found in:
|
||||
- the frame pointer (x29) and frame records, e.g. when interpreting
|
||||
them to generate a backtrace or call graph.
|
||||
|
||||
Using non-zero address tags in any of these locations may result in an
|
||||
error code being returned, a (fatal) signal being raised, or other modes
|
||||
of failure.
|
||||
Using non-zero address tags in any of these locations when the
|
||||
userspace application did not enable the AArch64 Tagged Address ABI may
|
||||
result in an error code being returned, a (fatal) signal being raised,
|
||||
or other modes of failure.
|
||||
|
||||
For these reasons, passing non-zero address tags to the kernel via
|
||||
system calls is forbidden, and using a non-zero address tag for sp is
|
||||
strongly discouraged.
|
||||
For these reasons, when the AArch64 Tagged Address ABI is disabled,
|
||||
passing non-zero address tags to the kernel via system calls is
|
||||
forbidden, and using a non-zero address tag for sp is strongly
|
||||
discouraged.
|
||||
|
||||
Programs maintaining a frame pointer and frame records that use non-zero
|
||||
address tags may suffer impaired or inaccurate debug and profiling
|
||||
@@ -57,6 +61,9 @@ be preserved.
|
||||
The architecture prevents the use of a tagged PC, so the upper byte will
|
||||
be set to a sign-extension of bit 55 on exception return.
|
||||
|
||||
This behaviour is maintained when the AArch64 Tagged Address ABI is
|
||||
enabled.
|
||||
|
||||
|
||||
Other considerations
|
||||
--------------------
|
||||
|
||||
@@ -177,6 +177,9 @@ These helper barriers exist because architectures have varying implicit
|
||||
ordering on their SMP atomic primitives. For example our TSO architectures
|
||||
provide full ordered atomics and these barriers are no-ops.
|
||||
|
||||
NOTE: when the atomic RmW ops are fully ordered, they should also imply a
|
||||
compiler barrier.
|
||||
|
||||
Thus:
|
||||
|
||||
atomic_fetch_add();
|
||||
|
||||
@@ -4,15 +4,25 @@ The Kernel Address Sanitizer (KASAN)
|
||||
Overview
|
||||
--------
|
||||
|
||||
KernelAddressSANitizer (KASAN) is a dynamic memory error detector. It provides
|
||||
a fast and comprehensive solution for finding use-after-free and out-of-bounds
|
||||
bugs.
|
||||
KernelAddressSANitizer (KASAN) is a dynamic memory error detector designed to
|
||||
find out-of-bound and use-after-free bugs. KASAN has two modes: generic KASAN
|
||||
(similar to userspace ASan) and software tag-based KASAN (similar to userspace
|
||||
HWASan).
|
||||
|
||||
KASAN uses compile-time instrumentation for checking every memory access,
|
||||
therefore you will need a GCC version 4.9.2 or later. GCC 5.0 or later is
|
||||
required for detection of out-of-bounds accesses to stack or global variables.
|
||||
KASAN uses compile-time instrumentation to insert validity checks before every
|
||||
memory access, and therefore requires a compiler version that supports that.
|
||||
|
||||
Currently KASAN is supported only for the x86_64 and arm64 architectures.
|
||||
Generic KASAN is supported in both GCC and Clang. With GCC it requires version
|
||||
4.9.2 or later for basic support and version 5.0 or later for detection of
|
||||
out-of-bounds accesses for stack and global variables and for inline
|
||||
instrumentation mode (see the Usage section). With Clang it requires version
|
||||
7.0.0 or later and it doesn't support detection of out-of-bounds accesses for
|
||||
global variables yet.
|
||||
|
||||
Tag-based KASAN is only supported in Clang and requires version 7.0.0 or later.
|
||||
|
||||
Currently generic KASAN is supported for the x86_64, arm64, xtensa and s390
|
||||
architectures, and tag-based KASAN is supported only for arm64.
|
||||
|
||||
Usage
|
||||
-----
|
||||
@@ -21,12 +31,14 @@ To enable KASAN configure kernel with::
|
||||
|
||||
CONFIG_KASAN = y
|
||||
|
||||
and choose between CONFIG_KASAN_OUTLINE and CONFIG_KASAN_INLINE. Outline and
|
||||
inline are compiler instrumentation types. The former produces smaller binary
|
||||
the latter is 1.1 - 2 times faster. Inline instrumentation requires a GCC
|
||||
version 5.0 or later.
|
||||
and choose between CONFIG_KASAN_GENERIC (to enable generic KASAN) and
|
||||
CONFIG_KASAN_SW_TAGS (to enable software tag-based KASAN).
|
||||
|
||||
KASAN works with both SLUB and SLAB memory allocators.
|
||||
You also need to choose between CONFIG_KASAN_OUTLINE and CONFIG_KASAN_INLINE.
|
||||
Outline and inline are compiler instrumentation types. The former produces
|
||||
smaller binary while the latter is 1.1 - 2 times faster.
|
||||
|
||||
Both KASAN modes work with both SLUB and SLAB memory allocators.
|
||||
For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.
|
||||
|
||||
To disable instrumentation for specific files or directories, add a line
|
||||
@@ -43,85 +55,85 @@ similar to the following to the respective kernel Makefile:
|
||||
Error reports
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
A typical out of bounds access report looks like this::
|
||||
A typical out-of-bounds access generic KASAN report looks like this::
|
||||
|
||||
==================================================================
|
||||
BUG: AddressSanitizer: out of bounds access in kmalloc_oob_right+0x65/0x75 [test_kasan] at addr ffff8800693bc5d3
|
||||
Write of size 1 by task modprobe/1689
|
||||
=============================================================================
|
||||
BUG kmalloc-128 (Not tainted): kasan error
|
||||
-----------------------------------------------------------------------------
|
||||
BUG: KASAN: slab-out-of-bounds in kmalloc_oob_right+0xa8/0xbc [test_kasan]
|
||||
Write of size 1 at addr ffff8801f44ec37b by task insmod/2760
|
||||
|
||||
Disabling lock debugging due to kernel taint
|
||||
INFO: Allocated in kmalloc_oob_right+0x3d/0x75 [test_kasan] age=0 cpu=0 pid=1689
|
||||
__slab_alloc+0x4b4/0x4f0
|
||||
kmem_cache_alloc_trace+0x10b/0x190
|
||||
kmalloc_oob_right+0x3d/0x75 [test_kasan]
|
||||
init_module+0x9/0x47 [test_kasan]
|
||||
do_one_initcall+0x99/0x200
|
||||
load_module+0x2cb3/0x3b20
|
||||
SyS_finit_module+0x76/0x80
|
||||
system_call_fastpath+0x12/0x17
|
||||
INFO: Slab 0xffffea0001a4ef00 objects=17 used=7 fp=0xffff8800693bd728 flags=0x100000000004080
|
||||
INFO: Object 0xffff8800693bc558 @offset=1368 fp=0xffff8800693bc720
|
||||
|
||||
Bytes b4 ffff8800693bc548: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
|
||||
Object ffff8800693bc558: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
|
||||
Object ffff8800693bc568: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
|
||||
Object ffff8800693bc578: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
|
||||
Object ffff8800693bc588: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
|
||||
Object ffff8800693bc598: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
|
||||
Object ffff8800693bc5a8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
|
||||
Object ffff8800693bc5b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
|
||||
Object ffff8800693bc5c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk.
|
||||
Redzone ffff8800693bc5d8: cc cc cc cc cc cc cc cc ........
|
||||
Padding ffff8800693bc718: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ
|
||||
CPU: 0 PID: 1689 Comm: modprobe Tainted: G B 3.18.0-rc1-mm1+ #98
|
||||
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014
|
||||
ffff8800693bc000 0000000000000000 ffff8800693bc558 ffff88006923bb78
|
||||
ffffffff81cc68ae 00000000000000f3 ffff88006d407600 ffff88006923bba8
|
||||
ffffffff811fd848 ffff88006d407600 ffffea0001a4ef00 ffff8800693bc558
|
||||
CPU: 1 PID: 2760 Comm: insmod Not tainted 4.19.0-rc3+ #698
|
||||
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
|
||||
Call Trace:
|
||||
[<ffffffff81cc68ae>] dump_stack+0x46/0x58
|
||||
[<ffffffff811fd848>] print_trailer+0xf8/0x160
|
||||
[<ffffffffa00026a7>] ? kmem_cache_oob+0xc3/0xc3 [test_kasan]
|
||||
[<ffffffff811ff0f5>] object_err+0x35/0x40
|
||||
[<ffffffffa0002065>] ? kmalloc_oob_right+0x65/0x75 [test_kasan]
|
||||
[<ffffffff8120b9fa>] kasan_report_error+0x38a/0x3f0
|
||||
[<ffffffff8120a79f>] ? kasan_poison_shadow+0x2f/0x40
|
||||
[<ffffffff8120b344>] ? kasan_unpoison_shadow+0x14/0x40
|
||||
[<ffffffff8120a79f>] ? kasan_poison_shadow+0x2f/0x40
|
||||
[<ffffffffa00026a7>] ? kmem_cache_oob+0xc3/0xc3 [test_kasan]
|
||||
[<ffffffff8120a995>] __asan_store1+0x75/0xb0
|
||||
[<ffffffffa0002601>] ? kmem_cache_oob+0x1d/0xc3 [test_kasan]
|
||||
[<ffffffffa0002065>] ? kmalloc_oob_right+0x65/0x75 [test_kasan]
|
||||
[<ffffffffa0002065>] kmalloc_oob_right+0x65/0x75 [test_kasan]
|
||||
[<ffffffffa00026b0>] init_module+0x9/0x47 [test_kasan]
|
||||
[<ffffffff810002d9>] do_one_initcall+0x99/0x200
|
||||
[<ffffffff811e4e5c>] ? __vunmap+0xec/0x160
|
||||
[<ffffffff81114f63>] load_module+0x2cb3/0x3b20
|
||||
[<ffffffff8110fd70>] ? m_show+0x240/0x240
|
||||
[<ffffffff81115f06>] SyS_finit_module+0x76/0x80
|
||||
[<ffffffff81cd3129>] system_call_fastpath+0x12/0x17
|
||||
dump_stack+0x94/0xd8
|
||||
print_address_description+0x73/0x280
|
||||
kasan_report+0x144/0x187
|
||||
__asan_report_store1_noabort+0x17/0x20
|
||||
kmalloc_oob_right+0xa8/0xbc [test_kasan]
|
||||
kmalloc_tests_init+0x16/0x700 [test_kasan]
|
||||
do_one_initcall+0xa5/0x3ae
|
||||
do_init_module+0x1b6/0x547
|
||||
load_module+0x75df/0x8070
|
||||
__do_sys_init_module+0x1c6/0x200
|
||||
__x64_sys_init_module+0x6e/0xb0
|
||||
do_syscall_64+0x9f/0x2c0
|
||||
entry_SYSCALL_64_after_hwframe+0x44/0xa9
|
||||
RIP: 0033:0x7f96443109da
|
||||
RSP: 002b:00007ffcf0b51b08 EFLAGS: 00000202 ORIG_RAX: 00000000000000af
|
||||
RAX: ffffffffffffffda RBX: 000055dc3ee521a0 RCX: 00007f96443109da
|
||||
RDX: 00007f96445cff88 RSI: 0000000000057a50 RDI: 00007f9644992000
|
||||
RBP: 000055dc3ee510b0 R08: 0000000000000003 R09: 0000000000000000
|
||||
R10: 00007f964430cd0a R11: 0000000000000202 R12: 00007f96445cff88
|
||||
R13: 000055dc3ee51090 R14: 0000000000000000 R15: 0000000000000000
|
||||
|
||||
Allocated by task 2760:
|
||||
save_stack+0x43/0xd0
|
||||
kasan_kmalloc+0xa7/0xd0
|
||||
kmem_cache_alloc_trace+0xe1/0x1b0
|
||||
kmalloc_oob_right+0x56/0xbc [test_kasan]
|
||||
kmalloc_tests_init+0x16/0x700 [test_kasan]
|
||||
do_one_initcall+0xa5/0x3ae
|
||||
do_init_module+0x1b6/0x547
|
||||
load_module+0x75df/0x8070
|
||||
__do_sys_init_module+0x1c6/0x200
|
||||
__x64_sys_init_module+0x6e/0xb0
|
||||
do_syscall_64+0x9f/0x2c0
|
||||
entry_SYSCALL_64_after_hwframe+0x44/0xa9
|
||||
|
||||
Freed by task 815:
|
||||
save_stack+0x43/0xd0
|
||||
__kasan_slab_free+0x135/0x190
|
||||
kasan_slab_free+0xe/0x10
|
||||
kfree+0x93/0x1a0
|
||||
umh_complete+0x6a/0xa0
|
||||
call_usermodehelper_exec_async+0x4c3/0x640
|
||||
ret_from_fork+0x35/0x40
|
||||
|
||||
The buggy address belongs to the object at ffff8801f44ec300
|
||||
which belongs to the cache kmalloc-128 of size 128
|
||||
The buggy address is located 123 bytes inside of
|
||||
128-byte region [ffff8801f44ec300, ffff8801f44ec380)
|
||||
The buggy address belongs to the page:
|
||||
page:ffffea0007d13b00 count:1 mapcount:0 mapping:ffff8801f7001640 index:0x0
|
||||
flags: 0x200000000000100(slab)
|
||||
raw: 0200000000000100 ffffea0007d11dc0 0000001a0000001a ffff8801f7001640
|
||||
raw: 0000000000000000 0000000080150015 00000001ffffffff 0000000000000000
|
||||
page dumped because: kasan: bad access detected
|
||||
|
||||
Memory state around the buggy address:
|
||||
ffff8800693bc300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
|
||||
ffff8800693bc380: fc fc 00 00 00 00 00 00 00 00 00 00 00 00 00 fc
|
||||
ffff8800693bc400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
|
||||
ffff8800693bc480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
|
||||
ffff8800693bc500: fc fc fc fc fc fc fc fc fc fc fc 00 00 00 00 00
|
||||
>ffff8800693bc580: 00 00 00 00 00 00 00 00 00 00 03 fc fc fc fc fc
|
||||
^
|
||||
ffff8800693bc600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
|
||||
ffff8800693bc680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
|
||||
ffff8800693bc700: fc fc fc fc fb fb fb fb fb fb fb fb fb fb fb fb
|
||||
ffff8800693bc780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
|
||||
ffff8800693bc800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
|
||||
ffff8801f44ec200: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
|
||||
ffff8801f44ec280: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
|
||||
>ffff8801f44ec300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 03
|
||||
^
|
||||
ffff8801f44ec380: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
|
||||
ffff8801f44ec400: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
|
||||
==================================================================
|
||||
|
||||
The header of the report discribe what kind of bug happened and what kind of
|
||||
access caused it. It's followed by the description of the accessed slub object
|
||||
(see 'SLUB Debug output' section in Documentation/vm/slub.rst for details) and
|
||||
the description of the accessed memory page.
|
||||
The header of the report provides a short summary of what kind of bug happened
|
||||
and what kind of access caused it. It's followed by a stack trace of the bad
|
||||
access, a stack trace of where the accessed memory was allocated (in case bad
|
||||
access happens on a slab object), and a stack trace of where the object was
|
||||
freed (in case of a use-after-free bug report). Next comes a description of
|
||||
the accessed slab object and information about the accessed memory page.
|
||||
|
||||
In the last section the report shows memory state around the accessed address.
|
||||
Reading this part requires some understanding of how KASAN works.
|
||||
@@ -138,18 +150,24 @@ inaccessible memory like redzones or freed memory (see mm/kasan/kasan.h).
|
||||
In the report above the arrows point to the shadow byte 03, which means that
|
||||
the accessed address is partially accessible.
|
||||
|
||||
For tag-based KASAN this last report section shows the memory tags around the
|
||||
accessed address (see Implementation details section).
|
||||
|
||||
|
||||
Implementation details
|
||||
----------------------
|
||||
|
||||
Generic KASAN
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
From a high level, our approach to memory error detection is similar to that
|
||||
of kmemcheck: use shadow memory to record whether each byte of memory is safe
|
||||
to access, and use compile-time instrumentation to check shadow memory on each
|
||||
memory access.
|
||||
to access, and use compile-time instrumentation to insert checks of shadow
|
||||
memory on each memory access.
|
||||
|
||||
AddressSanitizer dedicates 1/8 of kernel memory to its shadow memory
|
||||
(e.g. 16TB to cover 128TB on x86_64) and uses direct mapping with a scale and
|
||||
offset to translate a memory address to its corresponding shadow address.
|
||||
Generic KASAN dedicates 1/8th of kernel memory to its shadow memory (e.g. 16TB
|
||||
to cover 128TB on x86_64) and uses direct mapping with a scale and offset to
|
||||
translate a memory address to its corresponding shadow address.
|
||||
|
||||
Here is the function which translates an address to its corresponding shadow
|
||||
address::
|
||||
@@ -162,12 +180,38 @@ address::
|
||||
|
||||
where ``KASAN_SHADOW_SCALE_SHIFT = 3``.
|
||||
|
||||
Compile-time instrumentation used for checking memory accesses. Compiler inserts
|
||||
function calls (__asan_load*(addr), __asan_store*(addr)) before each memory
|
||||
access of size 1, 2, 4, 8 or 16. These functions check whether memory access is
|
||||
valid or not by checking corresponding shadow memory.
|
||||
Compile-time instrumentation is used to insert memory access checks. Compiler
|
||||
inserts function calls (__asan_load*(addr), __asan_store*(addr)) before each
|
||||
memory access of size 1, 2, 4, 8 or 16. These functions check whether memory
|
||||
access is valid or not by checking corresponding shadow memory.
|
||||
|
||||
GCC 5.0 has possibility to perform inline instrumentation. Instead of making
|
||||
function calls GCC directly inserts the code to check the shadow memory.
|
||||
This option significantly enlarges kernel but it gives x1.1-x2 performance
|
||||
boost over outline instrumented kernel.
|
||||
|
||||
Software tag-based KASAN
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Tag-based KASAN uses the Top Byte Ignore (TBI) feature of modern arm64 CPUs to
|
||||
store a pointer tag in the top byte of kernel pointers. Like generic KASAN it
|
||||
uses shadow memory to store memory tags associated with each 16-byte memory
|
||||
cell (therefore it dedicates 1/16th of the kernel memory for shadow memory).
|
||||
|
||||
On each memory allocation tag-based KASAN generates a random tag, tags the
|
||||
allocated memory with this tag, and embeds this tag into the returned pointer.
|
||||
Software tag-based KASAN uses compile-time instrumentation to insert checks
|
||||
before each memory access. These checks make sure that tag of the memory that
|
||||
is being accessed is equal to tag of the pointer that is used to access this
|
||||
memory. In case of a tag mismatch tag-based KASAN prints a bug report.
|
||||
|
||||
Software tag-based KASAN also has two instrumentation modes (outline, that
|
||||
emits callbacks to check memory accesses; and inline, that performs the shadow
|
||||
memory checks inline). With outline instrumentation mode, a bug report is
|
||||
simply printed from the function that performs the access check. With inline
|
||||
instrumentation a brk instruction is emitted by the compiler, and a dedicated
|
||||
brk handler is used to print bug reports.
|
||||
|
||||
A potential expansion of this mode is a hardware tag-based mode, which would
|
||||
use hardware memory tagging support instead of compiler instrumentation and
|
||||
manual shadow memory manipulation.
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
Armadeus ST0700 Adapt. A Santek ST0700I5Y-RBSLW 7.0" WVGA (800x480) TFT with
|
||||
an adapter board.
|
||||
|
||||
Required properties:
|
||||
- compatible: "armadeus,st0700-adapt"
|
||||
- power-supply: see panel-common.txt
|
||||
|
||||
Optional properties:
|
||||
- backlight: see panel-common.txt
|
||||
@@ -11,11 +11,13 @@ New driver handles the following
|
||||
|
||||
Required properties:
|
||||
- compatible: Must be "samsung,exynos-adc-v1"
|
||||
for exynos4412/5250 and s5pv210 controllers.
|
||||
for Exynos5250 controllers.
|
||||
Must be "samsung,exynos-adc-v2" for
|
||||
future controllers.
|
||||
Must be "samsung,exynos3250-adc" for
|
||||
controllers compatible with ADC of Exynos3250.
|
||||
Must be "samsung,exynos4212-adc" for
|
||||
controllers compatible with ADC of Exynos4212 and Exynos4412.
|
||||
Must be "samsung,exynos7-adc" for
|
||||
the ADC in Exynos7 and compatibles
|
||||
Must be "samsung,s3c2410-adc" for
|
||||
@@ -28,6 +30,8 @@ Required properties:
|
||||
the ADC in s3c2443 and compatibles
|
||||
Must be "samsung,s3c6410-adc" for
|
||||
the ADC in s3c6410 and compatibles
|
||||
Must be "samsung,s5pv210-adc" for
|
||||
the ADC in s5pv210 and compatibles
|
||||
- reg: List of ADC register address range
|
||||
- The base address and range of ADC register
|
||||
- The base address and range of ADC_PHY register (every
|
||||
|
||||
@@ -62,6 +62,10 @@ Optional properties:
|
||||
be referred to mmc-pwrseq-simple.txt. But now it's reused as a tunable delay
|
||||
waiting for I/O signalling and card power supply to be stable, regardless of
|
||||
whether pwrseq-simple is used. Default to 10ms if no available.
|
||||
- supports-cqe : The presence of this property indicates that the corresponding
|
||||
MMC host controller supports HW command queue feature.
|
||||
- disable-cqe-dcmd: This property indicates that the MMC controller's command
|
||||
queue engine (CQE) does not support direct commands (DCMDs).
|
||||
|
||||
*NOTE* on CD and WP polarity. To use common for all SD/MMC host controllers line
|
||||
polarity properties, we have to fix the meaning of the "normal" and "inverted"
|
||||
|
||||
@@ -4,6 +4,7 @@ Required properties:
|
||||
- compatible: Should be one of the following:
|
||||
- "microchip,mcp2510" for MCP2510.
|
||||
- "microchip,mcp2515" for MCP2515.
|
||||
- "microchip,mcp25625" for MCP25625.
|
||||
- reg: SPI chip select.
|
||||
- clocks: The clock feeding the CAN controller.
|
||||
- interrupts: Should contain IRQ line for the CAN controller.
|
||||
|
||||
@@ -16,7 +16,7 @@ Required properties:
|
||||
|
||||
Optional properties:
|
||||
- interrupts: interrupt line number for the SMI error/done interrupt
|
||||
- clocks: phandle for up to three required clocks for the MDIO instance
|
||||
- clocks: phandle for up to four required clocks for the MDIO instance
|
||||
|
||||
The child nodes of the MDIO driver are the individual PHY devices
|
||||
connected to this MDIO bus. They must have a "reg" property given the
|
||||
|
||||
@@ -177,6 +177,7 @@ mkprep
|
||||
mkregtable
|
||||
mktables
|
||||
mktree
|
||||
mkutf8data
|
||||
modpost
|
||||
modules.builtin
|
||||
modules.order
|
||||
@@ -255,6 +256,7 @@ vsyscall_32.lds
|
||||
wanxlfw.inc
|
||||
uImage
|
||||
unifdef
|
||||
utf8data.h
|
||||
wakeup.bin
|
||||
wakeup.elf
|
||||
wakeup.lds
|
||||
|
||||
@@ -242,7 +242,8 @@ State machine
|
||||
:c:func:`driver_bound()`.)
|
||||
|
||||
* Before a consumer device is probed, presence of supplier drivers is
|
||||
verified by checking that links to suppliers are in ``DL_STATE_AVAILABLE``
|
||||
verified by checking the consumer device is not in the wait_for_suppliers
|
||||
list and by checking that links to suppliers are in ``DL_STATE_AVAILABLE``
|
||||
state. The state of the links is updated to ``DL_STATE_CONSUMER_PROBE``.
|
||||
(Call to :c:func:`device_links_check_suppliers()` from
|
||||
:c:func:`really_probe()`.)
|
||||
|
||||
@@ -24,3 +24,4 @@ order.
|
||||
.. include:: bigalloc.rst
|
||||
.. include:: inlinedata.rst
|
||||
.. include:: eainode.rst
|
||||
.. include:: verity.rst
|
||||
|
||||
41
Documentation/filesystems/ext4/verity.rst
Normal file
41
Documentation/filesystems/ext4/verity.rst
Normal file
@@ -0,0 +1,41 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
Verity files
|
||||
------------
|
||||
|
||||
ext4 supports fs-verity, which is a filesystem feature that provides
|
||||
Merkle tree based hashing for individual readonly files. Most of
|
||||
fs-verity is common to all filesystems that support it; see
|
||||
:ref:`Documentation/filesystems/fsverity.rst <fsverity>` for the
|
||||
fs-verity documentation. However, the on-disk layout of the verity
|
||||
metadata is filesystem-specific. On ext4, the verity metadata is
|
||||
stored after the end of the file data itself, in the following format:
|
||||
|
||||
- Zero-padding to the next 65536-byte boundary. This padding need not
|
||||
actually be allocated on-disk, i.e. it may be a hole.
|
||||
|
||||
- The Merkle tree, as documented in
|
||||
:ref:`Documentation/filesystems/fsverity.rst
|
||||
<fsverity_merkle_tree>`, with the tree levels stored in order from
|
||||
root to leaf, and the tree blocks within each level stored in their
|
||||
natural order.
|
||||
|
||||
- Zero-padding to the next filesystem block boundary.
|
||||
|
||||
- The verity descriptor, as documented in
|
||||
:ref:`Documentation/filesystems/fsverity.rst <fsverity_descriptor>`,
|
||||
with optionally appended signature blob.
|
||||
|
||||
- Zero-padding to the next offset that is 4 bytes before a filesystem
|
||||
block boundary.
|
||||
|
||||
- The size of the verity descriptor in bytes, as a 4-byte little
|
||||
endian integer.
|
||||
|
||||
Verity inodes have EXT4_VERITY_FL set, and they must use extents, i.e.
|
||||
EXT4_EXTENTS_FL must be set and EXT4_INLINE_DATA_FL must be clear.
|
||||
They can have EXT4_ENCRYPT_FL set, in which case the verity metadata
|
||||
is encrypted as well as the data itself.
|
||||
|
||||
Verity files cannot have blocks allocated past the end of the verity
|
||||
metadata.
|
||||
@@ -157,6 +157,11 @@ noinline_data Disable the inline data feature, inline data feature is
|
||||
enabled by default.
|
||||
data_flush Enable data flushing before checkpoint in order to
|
||||
persist data of regular and symlink.
|
||||
reserve_root=%d Support configuring reserved space which is used for
|
||||
allocation from a privileged user with specified uid or
|
||||
gid, unit: 4KB, the default limit is 0.2% of user blocks.
|
||||
resuid=%d The user ID which may use the reserved blocks.
|
||||
resgid=%d The group ID which may use the reserved blocks.
|
||||
fault_injection=%d Enable fault injection in all supported types with
|
||||
specified injection rate.
|
||||
fault_type=%d Support configuring fault injection type, should be
|
||||
@@ -214,11 +219,22 @@ fsync_mode=%s Control the policy of fsync. Currently supports "posix",
|
||||
non-atomic files likewise "nobarrier" mount option.
|
||||
test_dummy_encryption Enable dummy encryption, which provides a fake fscrypt
|
||||
context. The fake fscrypt context is used by xfstests.
|
||||
checkpoint=%s Set to "disable" to turn off checkpointing. Set to "enable"
|
||||
checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "enable"
|
||||
to reenable checkpointing. Is enabled by default. While
|
||||
disabled, any unmounting or unexpected shutdowns will cause
|
||||
the filesystem contents to appear as they did when the
|
||||
filesystem was mounted with that option.
|
||||
While mounting with checkpoint=disabled, the filesystem must
|
||||
run garbage collection to ensure that all available space can
|
||||
be used. If this takes too much time, the mount may return
|
||||
EAGAIN. You may optionally add a value to indicate how much
|
||||
of the disk you would be willing to temporarily give up to
|
||||
avoid additional garbage collection. This can be given as a
|
||||
number of blocks, or as a percent. For instance, mounting
|
||||
with checkpoint=disable:100% would always succeed, but it may
|
||||
hide up to all remaining free space. The actual space that
|
||||
would be unusable can be viewed at /sys/fs/f2fs/<disk>/unusable
|
||||
This space is reclaimed once checkpoint=enable.
|
||||
|
||||
================================================================================
|
||||
DEBUGFS ENTRIES
|
||||
@@ -246,11 +262,14 @@ Files in /sys/fs/f2fs/<devname>
|
||||
..............................................................................
|
||||
File Content
|
||||
|
||||
gc_max_sleep_time This tuning parameter controls the maximum sleep
|
||||
gc_urgent_sleep_time This parameter controls sleep time for gc_urgent.
|
||||
500 ms is set by default. See above gc_urgent.
|
||||
|
||||
gc_min_sleep_time This tuning parameter controls the minimum sleep
|
||||
time for the garbage collection thread. Time is
|
||||
in milliseconds.
|
||||
|
||||
gc_min_sleep_time This tuning parameter controls the minimum sleep
|
||||
gc_max_sleep_time This tuning parameter controls the maximum sleep
|
||||
time for the garbage collection thread. Time is
|
||||
in milliseconds.
|
||||
|
||||
@@ -270,9 +289,6 @@ Files in /sys/fs/f2fs/<devname>
|
||||
to 1, background thread starts to do GC by given
|
||||
gc_urgent_sleep_time interval.
|
||||
|
||||
gc_urgent_sleep_time This parameter controls sleep time for gc_urgent.
|
||||
500 ms is set by default. See above gc_urgent.
|
||||
|
||||
reclaim_segments This parameter controls the number of prefree
|
||||
segments to be reclaimed. If the number of prefree
|
||||
segments is larger than the number of segments
|
||||
@@ -287,7 +303,16 @@ Files in /sys/fs/f2fs/<devname>
|
||||
checkpoint is triggered, and issued during the
|
||||
checkpoint. By default, it is disabled with 0.
|
||||
|
||||
trim_sections This parameter controls the number of sections
|
||||
discard_granularity This parameter controls the granularity of discard
|
||||
command size. It will issue discard commands iif
|
||||
the size is larger than given granularity. Its
|
||||
unit size is 4KB, and 4 (=16KB) is set by default.
|
||||
The maximum value is 128 (=512KB).
|
||||
|
||||
reserved_blocks This parameter indicates the number of blocks that
|
||||
f2fs reserves internally for root.
|
||||
|
||||
batched_trim_sections This parameter controls the number of sections
|
||||
to be trimmed out in batch mode when FITRIM
|
||||
conducts. 32 sections is set by default.
|
||||
|
||||
@@ -309,11 +334,35 @@ Files in /sys/fs/f2fs/<devname>
|
||||
the number is less than this value, it triggers
|
||||
in-place-updates.
|
||||
|
||||
min_seq_blocks This parameter controls the threshold to serialize
|
||||
write IOs issued by multiple threads in parallel.
|
||||
|
||||
min_hot_blocks This parameter controls the threshold to allocate
|
||||
a hot data log for pending data blocks to write.
|
||||
|
||||
min_ssr_sections This parameter adds the threshold when deciding
|
||||
SSR block allocation. If this is large, SSR mode
|
||||
will be enabled early.
|
||||
|
||||
ram_thresh This parameter controls the memory footprint used
|
||||
by free nids and cached nat entries. By default,
|
||||
10 is set, which indicates 10 MB / 1 GB RAM.
|
||||
|
||||
ra_nid_pages When building free nids, F2FS reads NAT blocks
|
||||
ahead for speed up. Default is 0.
|
||||
|
||||
dirty_nats_ratio Given dirty ratio of cached nat entries, F2FS
|
||||
determines flushing them in background.
|
||||
|
||||
max_victim_search This parameter controls the number of trials to
|
||||
find a victim segment when conducting SSR and
|
||||
cleaning operations. The default value is 4096
|
||||
which covers 8GB block address range.
|
||||
|
||||
migration_granularity For large-sized sections, F2FS can stop GC given
|
||||
this granularity instead of reclaiming entire
|
||||
section.
|
||||
|
||||
dir_level This parameter controls the directory level to
|
||||
support large directory. If a directory has a
|
||||
number of files, it can reduce the file lookup
|
||||
@@ -321,9 +370,56 @@ Files in /sys/fs/f2fs/<devname>
|
||||
Otherwise, it needs to decrease this value to
|
||||
reduce the space overhead. The default value is 0.
|
||||
|
||||
ram_thresh This parameter controls the memory footprint used
|
||||
by free nids and cached nat entries. By default,
|
||||
10 is set, which indicates 10 MB / 1 GB RAM.
|
||||
cp_interval F2FS tries to do checkpoint periodically, 60 secs
|
||||
by default.
|
||||
|
||||
idle_interval F2FS detects system is idle, if there's no F2FS
|
||||
operations during given interval, 5 secs by
|
||||
default.
|
||||
|
||||
discard_idle_interval F2FS detects the discard thread is idle, given
|
||||
time interval. Default is 5 secs.
|
||||
|
||||
gc_idle_interval F2FS detects the GC thread is idle, given time
|
||||
interval. Default is 5 secs.
|
||||
|
||||
umount_discard_timeout When unmounting the disk, F2FS waits for finishing
|
||||
queued discard commands which can take huge time.
|
||||
This gives time out for it, 5 secs by default.
|
||||
|
||||
iostat_enable This controls to enable/disable iostat in F2FS.
|
||||
|
||||
readdir_ra This enables/disabled readahead of inode blocks
|
||||
in readdir, and default is enabled.
|
||||
|
||||
gc_pin_file_thresh This indicates how many GC can be failed for the
|
||||
pinned file. If it exceeds this, F2FS doesn't
|
||||
guarantee its pinning state. 2048 trials is set
|
||||
by default.
|
||||
|
||||
extension_list This enables to change extension_list for hot/cold
|
||||
files in runtime.
|
||||
|
||||
inject_rate This controls injection rate of arbitrary faults.
|
||||
|
||||
inject_type This controls injection type of arbitrary faults.
|
||||
|
||||
dirty_segments This shows # of dirty segments.
|
||||
|
||||
lifetime_write_kbytes This shows # of data written to the disk.
|
||||
|
||||
features This shows current features enabled on F2FS.
|
||||
|
||||
current_reserved_blocks This shows # of blocks currently reserved.
|
||||
|
||||
unusable If checkpoint=disable, this shows the number of
|
||||
blocks that are unusable.
|
||||
If checkpoint=enable it shows the number of blocks
|
||||
that would be unusable if checkpoint=disable were
|
||||
to be set.
|
||||
|
||||
encoding This shows the encoding used for casefolding.
|
||||
If casefolding is not enabled, returns (none)
|
||||
|
||||
================================================================================
|
||||
USAGE
|
||||
@@ -716,3 +812,28 @@ WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
|
||||
WRITE_LIFE_NONE " WRITE_LIFE_NONE
|
||||
WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
|
||||
WRITE_LIFE_LONG " WRITE_LIFE_LONG
|
||||
|
||||
Fallocate(2) Policy
|
||||
-------------------
|
||||
|
||||
The default policy follows the below posix rule.
|
||||
|
||||
Allocating disk space
|
||||
The default operation (i.e., mode is zero) of fallocate() allocates
|
||||
the disk space within the range specified by offset and len. The
|
||||
file size (as reported by stat(2)) will be changed if offset+len is
|
||||
greater than the file size. Any subregion within the range specified
|
||||
by offset and len that did not contain data before the call will be
|
||||
initialized to zero. This default behavior closely resembles the
|
||||
behavior of the posix_fallocate(3) library function, and is intended
|
||||
as a method of optimally implementing that function.
|
||||
|
||||
However, once F2FS receives ioctl(fd, F2FS_IOC_SET_PIN_FILE) in prior to
|
||||
fallocate(fd, DEFAULT_MODE), it allocates on-disk blocks addressess having
|
||||
zero or random data, which is useful to the below scenario where:
|
||||
1. create(fd)
|
||||
2. ioctl(fd, F2FS_IOC_SET_PIN_FILE)
|
||||
3. fallocate(fd, 0, 0, size)
|
||||
4. address = fibmap(fd, offset)
|
||||
5. open(blkdev)
|
||||
6. write(blkdev, address)
|
||||
|
||||
@@ -72,6 +72,9 @@ Online attacks
|
||||
fscrypt (and storage encryption in general) can only provide limited
|
||||
protection, if any at all, against online attacks. In detail:
|
||||
|
||||
Side-channel attacks
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
fscrypt is only resistant to side-channel attacks, such as timing or
|
||||
electromagnetic attacks, to the extent that the underlying Linux
|
||||
Cryptographic API algorithms are. If a vulnerable algorithm is used,
|
||||
@@ -80,29 +83,90 @@ attacker to mount a side channel attack against the online system.
|
||||
Side channel attacks may also be mounted against applications
|
||||
consuming decrypted data.
|
||||
|
||||
After an encryption key has been provided, fscrypt is not designed to
|
||||
hide the plaintext file contents or filenames from other users on the
|
||||
same system, regardless of the visibility of the keyring key.
|
||||
Instead, existing access control mechanisms such as file mode bits,
|
||||
POSIX ACLs, LSMs, or mount namespaces should be used for this purpose.
|
||||
Also note that as long as the encryption keys are *anywhere* in
|
||||
memory, an online attacker can necessarily compromise them by mounting
|
||||
a physical attack or by exploiting any kernel security vulnerability
|
||||
which provides an arbitrary memory read primitive.
|
||||
Unauthorized file access
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
While it is ostensibly possible to "evict" keys from the system,
|
||||
recently accessed encrypted files will remain accessible at least
|
||||
until the filesystem is unmounted or the VFS caches are dropped, e.g.
|
||||
using ``echo 2 > /proc/sys/vm/drop_caches``. Even after that, if the
|
||||
RAM is compromised before being powered off, it will likely still be
|
||||
possible to recover portions of the plaintext file contents, if not
|
||||
some of the encryption keys as well. (Since Linux v4.12, all
|
||||
in-kernel keys related to fscrypt are sanitized before being freed.
|
||||
However, userspace would need to do its part as well.)
|
||||
After an encryption key has been added, fscrypt does not hide the
|
||||
plaintext file contents or filenames from other users on the same
|
||||
system. Instead, existing access control mechanisms such as file mode
|
||||
bits, POSIX ACLs, LSMs, or namespaces should be used for this purpose.
|
||||
|
||||
Currently, fscrypt does not prevent a user from maliciously providing
|
||||
an incorrect key for another user's existing encrypted files. A
|
||||
protection against this is planned.
|
||||
(For the reasoning behind this, understand that while the key is
|
||||
added, the confidentiality of the data, from the perspective of the
|
||||
system itself, is *not* protected by the mathematical properties of
|
||||
encryption but rather only by the correctness of the kernel.
|
||||
Therefore, any encryption-specific access control checks would merely
|
||||
be enforced by kernel *code* and therefore would be largely redundant
|
||||
with the wide variety of access control mechanisms already available.)
|
||||
|
||||
Kernel memory compromise
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
An attacker who compromises the system enough to read from arbitrary
|
||||
memory, e.g. by mounting a physical attack or by exploiting a kernel
|
||||
security vulnerability, can compromise all encryption keys that are
|
||||
currently in use.
|
||||
|
||||
However, fscrypt allows encryption keys to be removed from the kernel,
|
||||
which may protect them from later compromise.
|
||||
|
||||
In more detail, the FS_IOC_REMOVE_ENCRYPTION_KEY ioctl (or the
|
||||
FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS ioctl) can wipe a master
|
||||
encryption key from kernel memory. If it does so, it will also try to
|
||||
evict all cached inodes which had been "unlocked" using the key,
|
||||
thereby wiping their per-file keys and making them once again appear
|
||||
"locked", i.e. in ciphertext or encrypted form.
|
||||
|
||||
However, these ioctls have some limitations:
|
||||
|
||||
- Per-file keys for in-use files will *not* be removed or wiped.
|
||||
Therefore, for maximum effect, userspace should close the relevant
|
||||
encrypted files and directories before removing a master key, as
|
||||
well as kill any processes whose working directory is in an affected
|
||||
encrypted directory.
|
||||
|
||||
- The kernel cannot magically wipe copies of the master key(s) that
|
||||
userspace might have as well. Therefore, userspace must wipe all
|
||||
copies of the master key(s) it makes as well; normally this should
|
||||
be done immediately after FS_IOC_ADD_ENCRYPTION_KEY, without waiting
|
||||
for FS_IOC_REMOVE_ENCRYPTION_KEY. Naturally, the same also applies
|
||||
to all higher levels in the key hierarchy. Userspace should also
|
||||
follow other security precautions such as mlock()ing memory
|
||||
containing keys to prevent it from being swapped out.
|
||||
|
||||
- In general, decrypted contents and filenames in the kernel VFS
|
||||
caches are freed but not wiped. Therefore, portions thereof may be
|
||||
recoverable from freed memory, even after the corresponding key(s)
|
||||
were wiped. To partially solve this, you can set
|
||||
CONFIG_PAGE_POISONING=y in your kernel config and add page_poison=1
|
||||
to your kernel command line. However, this has a performance cost.
|
||||
|
||||
- Secret keys might still exist in CPU registers, in crypto
|
||||
accelerator hardware (if used by the crypto API to implement any of
|
||||
the algorithms), or in other places not explicitly considered here.
|
||||
|
||||
Limitations of v1 policies
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
v1 encryption policies have some weaknesses with respect to online
|
||||
attacks:
|
||||
|
||||
- There is no verification that the provided master key is correct.
|
||||
Therefore, a malicious user can temporarily associate the wrong key
|
||||
with another user's encrypted files to which they have read-only
|
||||
access. Because of filesystem caching, the wrong key will then be
|
||||
used by the other user's accesses to those files, even if the other
|
||||
user has the correct key in their own keyring. This violates the
|
||||
meaning of "read-only access".
|
||||
|
||||
- A compromise of a per-file key also compromises the master key from
|
||||
which it was derived.
|
||||
|
||||
- Non-root users cannot securely remove encryption keys.
|
||||
|
||||
All the above problems are fixed with v2 encryption policies. For
|
||||
this reason among others, it is recommended to use v2 encryption
|
||||
policies on all new encrypted directories.
|
||||
|
||||
Key hierarchy
|
||||
=============
|
||||
@@ -123,11 +187,52 @@ appropriate master key. There can be any number of master keys, each
|
||||
of which protects any number of directory trees on any number of
|
||||
filesystems.
|
||||
|
||||
Userspace should generate master keys either using a cryptographically
|
||||
secure random number generator, or by using a KDF (Key Derivation
|
||||
Function). Note that whenever a KDF is used to "stretch" a
|
||||
lower-entropy secret such as a passphrase, it is critical that a KDF
|
||||
designed for this purpose be used, such as scrypt, PBKDF2, or Argon2.
|
||||
Master keys must be real cryptographic keys, i.e. indistinguishable
|
||||
from random bytestrings of the same length. This implies that users
|
||||
**must not** directly use a password as a master key, zero-pad a
|
||||
shorter key, or repeat a shorter key. Security cannot be guaranteed
|
||||
if userspace makes any such error, as the cryptographic proofs and
|
||||
analysis would no longer apply.
|
||||
|
||||
Instead, users should generate master keys either using a
|
||||
cryptographically secure random number generator, or by using a KDF
|
||||
(Key Derivation Function). The kernel does not do any key stretching;
|
||||
therefore, if userspace derives the key from a low-entropy secret such
|
||||
as a passphrase, it is critical that a KDF designed for this purpose
|
||||
be used, such as scrypt, PBKDF2, or Argon2.
|
||||
|
||||
Key derivation function
|
||||
-----------------------
|
||||
|
||||
With one exception, fscrypt never uses the master key(s) for
|
||||
encryption directly. Instead, they are only used as input to a KDF
|
||||
(Key Derivation Function) to derive the actual keys.
|
||||
|
||||
The KDF used for a particular master key differs depending on whether
|
||||
the key is used for v1 encryption policies or for v2 encryption
|
||||
policies. Users **must not** use the same key for both v1 and v2
|
||||
encryption policies. (No real-world attack is currently known on this
|
||||
specific case of key reuse, but its security cannot be guaranteed
|
||||
since the cryptographic proofs and analysis would no longer apply.)
|
||||
|
||||
For v1 encryption policies, the KDF only supports deriving per-file
|
||||
encryption keys. It works by encrypting the master key with
|
||||
AES-128-ECB, using the file's 16-byte nonce as the AES key. The
|
||||
resulting ciphertext is used as the derived key. If the ciphertext is
|
||||
longer than needed, then it is truncated to the needed length.
|
||||
|
||||
For v2 encryption policies, the KDF is HKDF-SHA512. The master key is
|
||||
passed as the "input keying material", no salt is used, and a distinct
|
||||
"application-specific information string" is used for each distinct
|
||||
key to be derived. For example, when a per-file encryption key is
|
||||
derived, the application-specific information string is the file's
|
||||
nonce prefixed with "fscrypt\\0" and a context byte. Different
|
||||
context bytes are used for other types of derived keys.
|
||||
|
||||
HKDF-SHA512 is preferred to the original AES-128-ECB based KDF because
|
||||
HKDF is more flexible, is nonreversible, and evenly distributes
|
||||
entropy from the master key. HKDF is also standardized and widely
|
||||
used by other software, whereas the AES-128-ECB based KDF is ad-hoc.
|
||||
|
||||
Per-file keys
|
||||
-------------
|
||||
@@ -138,29 +243,9 @@ files doesn't map to the same ciphertext, or vice versa. In most
|
||||
cases, fscrypt does this by deriving per-file keys. When a new
|
||||
encrypted inode (regular file, directory, or symlink) is created,
|
||||
fscrypt randomly generates a 16-byte nonce and stores it in the
|
||||
inode's encryption xattr. Then, it uses a KDF (Key Derivation
|
||||
Function) to derive the file's key from the master key and nonce.
|
||||
|
||||
The Adiantum encryption mode (see `Encryption modes and usage`_) is
|
||||
special, since it accepts longer IVs and is suitable for both contents
|
||||
and filenames encryption. For it, a "direct key" option is offered
|
||||
where the file's nonce is included in the IVs and the master key is
|
||||
used for encryption directly. This improves performance; however,
|
||||
users must not use the same master key for any other encryption mode.
|
||||
|
||||
Below, the KDF and design considerations are described in more detail.
|
||||
|
||||
The current KDF works by encrypting the master key with AES-128-ECB,
|
||||
using the file's nonce as the AES key. The output is used as the
|
||||
derived key. If the output is longer than needed, then it is
|
||||
truncated to the needed length.
|
||||
|
||||
Note: this KDF meets the primary security requirement, which is to
|
||||
produce unique derived keys that preserve the entropy of the master
|
||||
key, assuming that the master key is already a good pseudorandom key.
|
||||
However, it is nonstandard and has some problems such as being
|
||||
reversible, so it is generally considered to be a mistake! It may be
|
||||
replaced with HKDF or another more standard KDF in the future.
|
||||
inode's encryption xattr. Then, it uses a KDF (as described in `Key
|
||||
derivation function`_) to derive the file's key from the master key
|
||||
and nonce.
|
||||
|
||||
Key derivation was chosen over key wrapping because wrapped keys would
|
||||
require larger xattrs which would be less likely to fit in-line in the
|
||||
@@ -176,6 +261,37 @@ rejected as it would have prevented ext4 filesystems from being
|
||||
resized, and by itself still wouldn't have been sufficient to prevent
|
||||
the same key from being directly reused for both XTS and CTS-CBC.
|
||||
|
||||
DIRECT_KEY and per-mode keys
|
||||
----------------------------
|
||||
|
||||
The Adiantum encryption mode (see `Encryption modes and usage`_) is
|
||||
suitable for both contents and filenames encryption, and it accepts
|
||||
long IVs --- long enough to hold both an 8-byte logical block number
|
||||
and a 16-byte per-file nonce. Also, the overhead of each Adiantum key
|
||||
is greater than that of an AES-256-XTS key.
|
||||
|
||||
Therefore, to improve performance and save memory, for Adiantum a
|
||||
"direct key" configuration is supported. When the user has enabled
|
||||
this by setting FSCRYPT_POLICY_FLAG_DIRECT_KEY in the fscrypt policy,
|
||||
per-file keys are not used. Instead, whenever any data (contents or
|
||||
filenames) is encrypted, the file's 16-byte nonce is included in the
|
||||
IV. Moreover:
|
||||
|
||||
- For v1 encryption policies, the encryption is done directly with the
|
||||
master key. Because of this, users **must not** use the same master
|
||||
key for any other purpose, even for other v1 policies.
|
||||
|
||||
- For v2 encryption policies, the encryption is done with a per-mode
|
||||
key derived using the KDF. Users may use the same master key for
|
||||
other v2 encryption policies.
|
||||
|
||||
Key identifiers
|
||||
---------------
|
||||
|
||||
For master keys used for v2 encryption policies, a unique 16-byte "key
|
||||
identifier" is also derived using the KDF. This value is stored in
|
||||
the clear, since it is needed to reliably identify the key itself.
|
||||
|
||||
Encryption modes and usage
|
||||
==========================
|
||||
|
||||
@@ -191,7 +307,9 @@ Currently, the following pairs of encryption modes are supported:
|
||||
If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair.
|
||||
|
||||
AES-128-CBC was added only for low-powered embedded devices with
|
||||
crypto accelerators such as CAAM or CESA that do not support XTS.
|
||||
crypto accelerators such as CAAM or CESA that do not support XTS. To
|
||||
use AES-128-CBC, CONFIG_CRYPTO_SHA256 (or another SHA-256
|
||||
implementation) must be enabled so that ESSIV can be used.
|
||||
|
||||
Adiantum is a (primarily) stream cipher-based mode that is fast even
|
||||
on CPUs without dedicated crypto instructions. It's also a true
|
||||
@@ -223,9 +341,10 @@ a little endian number, except that:
|
||||
is encrypted with AES-256 where the AES-256 key is the SHA-256 hash
|
||||
of the file's data encryption key.
|
||||
|
||||
- In the "direct key" configuration (FS_POLICY_FLAG_DIRECT_KEY set in
|
||||
the fscrypt_policy), the file's nonce is also appended to the IV.
|
||||
Currently this is only allowed with the Adiantum encryption mode.
|
||||
- In the "direct key" configuration (FSCRYPT_POLICY_FLAG_DIRECT_KEY
|
||||
set in the fscrypt_policy), the file's nonce is also appended to the
|
||||
IV. Currently this is only allowed with the Adiantum encryption
|
||||
mode.
|
||||
|
||||
Filenames encryption
|
||||
--------------------
|
||||
@@ -267,49 +386,77 @@ User API
|
||||
Setting an encryption policy
|
||||
----------------------------
|
||||
|
||||
FS_IOC_SET_ENCRYPTION_POLICY
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The FS_IOC_SET_ENCRYPTION_POLICY ioctl sets an encryption policy on an
|
||||
empty directory or verifies that a directory or regular file already
|
||||
has the specified encryption policy. It takes in a pointer to a
|
||||
:c:type:`struct fscrypt_policy`, defined as follows::
|
||||
:c:type:`struct fscrypt_policy_v1` or a :c:type:`struct
|
||||
fscrypt_policy_v2`, defined as follows::
|
||||
|
||||
#define FS_KEY_DESCRIPTOR_SIZE 8
|
||||
|
||||
struct fscrypt_policy {
|
||||
#define FSCRYPT_POLICY_V1 0
|
||||
#define FSCRYPT_KEY_DESCRIPTOR_SIZE 8
|
||||
struct fscrypt_policy_v1 {
|
||||
__u8 version;
|
||||
__u8 contents_encryption_mode;
|
||||
__u8 filenames_encryption_mode;
|
||||
__u8 flags;
|
||||
__u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
|
||||
__u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
|
||||
};
|
||||
#define fscrypt_policy fscrypt_policy_v1
|
||||
|
||||
#define FSCRYPT_POLICY_V2 2
|
||||
#define FSCRYPT_KEY_IDENTIFIER_SIZE 16
|
||||
struct fscrypt_policy_v2 {
|
||||
__u8 version;
|
||||
__u8 contents_encryption_mode;
|
||||
__u8 filenames_encryption_mode;
|
||||
__u8 flags;
|
||||
__u8 __reserved[4];
|
||||
__u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
|
||||
};
|
||||
|
||||
This structure must be initialized as follows:
|
||||
|
||||
- ``version`` must be 0.
|
||||
- ``version`` must be FSCRYPT_POLICY_V1 (0) if the struct is
|
||||
:c:type:`fscrypt_policy_v1` or FSCRYPT_POLICY_V2 (2) if the struct
|
||||
is :c:type:`fscrypt_policy_v2`. (Note: we refer to the original
|
||||
policy version as "v1", though its version code is really 0.) For
|
||||
new encrypted directories, use v2 policies.
|
||||
|
||||
- ``contents_encryption_mode`` and ``filenames_encryption_mode`` must
|
||||
be set to constants from ``<linux/fs.h>`` which identify the
|
||||
encryption modes to use. If unsure, use
|
||||
FS_ENCRYPTION_MODE_AES_256_XTS (1) for ``contents_encryption_mode``
|
||||
and FS_ENCRYPTION_MODE_AES_256_CTS (4) for
|
||||
``filenames_encryption_mode``.
|
||||
be set to constants from ``<linux/fscrypt.h>`` which identify the
|
||||
encryption modes to use. If unsure, use FSCRYPT_MODE_AES_256_XTS
|
||||
(1) for ``contents_encryption_mode`` and FSCRYPT_MODE_AES_256_CTS
|
||||
(4) for ``filenames_encryption_mode``.
|
||||
|
||||
- ``flags`` must contain a value from ``<linux/fs.h>`` which
|
||||
- ``flags`` must contain a value from ``<linux/fscrypt.h>`` which
|
||||
identifies the amount of NUL-padding to use when encrypting
|
||||
filenames. If unsure, use FS_POLICY_FLAGS_PAD_32 (0x3).
|
||||
In addition, if the chosen encryption modes are both
|
||||
FS_ENCRYPTION_MODE_ADIANTUM, this can contain
|
||||
FS_POLICY_FLAG_DIRECT_KEY to specify that the master key should be
|
||||
used directly, without key derivation.
|
||||
filenames. If unsure, use FSCRYPT_POLICY_FLAGS_PAD_32 (0x3).
|
||||
Additionally, if the encryption modes are both
|
||||
FSCRYPT_MODE_ADIANTUM, this can contain
|
||||
FSCRYPT_POLICY_FLAG_DIRECT_KEY; see `DIRECT_KEY and per-mode keys`_.
|
||||
|
||||
- ``master_key_descriptor`` specifies how to find the master key in
|
||||
the keyring; see `Adding keys`_. It is up to userspace to choose a
|
||||
unique ``master_key_descriptor`` for each master key. The e4crypt
|
||||
and fscrypt tools use the first 8 bytes of
|
||||
- For v2 encryption policies, ``__reserved`` must be zeroed.
|
||||
|
||||
- For v1 encryption policies, ``master_key_descriptor`` specifies how
|
||||
to find the master key in a keyring; see `Adding keys`_. It is up
|
||||
to userspace to choose a unique ``master_key_descriptor`` for each
|
||||
master key. The e4crypt and fscrypt tools use the first 8 bytes of
|
||||
``SHA-512(SHA-512(master_key))``, but this particular scheme is not
|
||||
required. Also, the master key need not be in the keyring yet when
|
||||
FS_IOC_SET_ENCRYPTION_POLICY is executed. However, it must be added
|
||||
before any files can be created in the encrypted directory.
|
||||
|
||||
For v2 encryption policies, ``master_key_descriptor`` has been
|
||||
replaced with ``master_key_identifier``, which is longer and cannot
|
||||
be arbitrarily chosen. Instead, the key must first be added using
|
||||
`FS_IOC_ADD_ENCRYPTION_KEY`_. Then, the ``key_spec.u.identifier``
|
||||
the kernel returned in the :c:type:`struct fscrypt_add_key_arg` must
|
||||
be used as the ``master_key_identifier`` in the :c:type:`struct
|
||||
fscrypt_policy_v2`.
|
||||
|
||||
If the file is not yet encrypted, then FS_IOC_SET_ENCRYPTION_POLICY
|
||||
verifies that the file is an empty directory. If so, the specified
|
||||
encryption policy is assigned to the directory, turning it into an
|
||||
@@ -325,6 +472,15 @@ policy exactly matches the actual one. If they match, then the ioctl
|
||||
returns 0. Otherwise, it fails with EEXIST. This works on both
|
||||
regular files and directories, including nonempty directories.
|
||||
|
||||
When a v2 encryption policy is assigned to a directory, it is also
|
||||
required that either the specified key has been added by the current
|
||||
user or that the caller has CAP_FOWNER in the initial user namespace.
|
||||
(This is needed to prevent a user from encrypting their data with
|
||||
another user's key.) The key must remain added while
|
||||
FS_IOC_SET_ENCRYPTION_POLICY is executing. However, if the new
|
||||
encrypted directory does not need to be accessed immediately, then the
|
||||
key can be removed right away afterwards.
|
||||
|
||||
Note that the ext4 filesystem does not allow the root directory to be
|
||||
encrypted, even if it is empty. Users who want to encrypt an entire
|
||||
filesystem with one key should consider using dm-crypt instead.
|
||||
@@ -337,7 +493,11 @@ FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors:
|
||||
- ``EEXIST``: the file is already encrypted with an encryption policy
|
||||
different from the one specified
|
||||
- ``EINVAL``: an invalid encryption policy was specified (invalid
|
||||
version, mode(s), or flags)
|
||||
version, mode(s), or flags; or reserved bits were set)
|
||||
- ``ENOKEY``: a v2 encryption policy was specified, but the key with
|
||||
the specified ``master_key_identifier`` has not been added, nor does
|
||||
the process have the CAP_FOWNER capability in the initial user
|
||||
namespace
|
||||
- ``ENOTDIR``: the file is unencrypted and is a regular file, not a
|
||||
directory
|
||||
- ``ENOTEMPTY``: the file is unencrypted and is a nonempty directory
|
||||
@@ -356,25 +516,79 @@ FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors:
|
||||
Getting an encryption policy
|
||||
----------------------------
|
||||
|
||||
The FS_IOC_GET_ENCRYPTION_POLICY ioctl retrieves the :c:type:`struct
|
||||
fscrypt_policy`, if any, for a directory or regular file. See above
|
||||
for the struct definition. No additional permissions are required
|
||||
beyond the ability to open the file.
|
||||
Two ioctls are available to get a file's encryption policy:
|
||||
|
||||
FS_IOC_GET_ENCRYPTION_POLICY can fail with the following errors:
|
||||
- `FS_IOC_GET_ENCRYPTION_POLICY_EX`_
|
||||
- `FS_IOC_GET_ENCRYPTION_POLICY`_
|
||||
|
||||
The extended (_EX) version of the ioctl is more general and is
|
||||
recommended to use when possible. However, on older kernels only the
|
||||
original ioctl is available. Applications should try the extended
|
||||
version, and if it fails with ENOTTY fall back to the original
|
||||
version.
|
||||
|
||||
FS_IOC_GET_ENCRYPTION_POLICY_EX
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The FS_IOC_GET_ENCRYPTION_POLICY_EX ioctl retrieves the encryption
|
||||
policy, if any, for a directory or regular file. No additional
|
||||
permissions are required beyond the ability to open the file. It
|
||||
takes in a pointer to a :c:type:`struct fscrypt_get_policy_ex_arg`,
|
||||
defined as follows::
|
||||
|
||||
struct fscrypt_get_policy_ex_arg {
|
||||
__u64 policy_size; /* input/output */
|
||||
union {
|
||||
__u8 version;
|
||||
struct fscrypt_policy_v1 v1;
|
||||
struct fscrypt_policy_v2 v2;
|
||||
} policy; /* output */
|
||||
};
|
||||
|
||||
The caller must initialize ``policy_size`` to the size available for
|
||||
the policy struct, i.e. ``sizeof(arg.policy)``.
|
||||
|
||||
On success, the policy struct is returned in ``policy``, and its
|
||||
actual size is returned in ``policy_size``. ``policy.version`` should
|
||||
be checked to determine the version of policy returned. Note that the
|
||||
version code for the "v1" policy is actually 0 (FSCRYPT_POLICY_V1).
|
||||
|
||||
FS_IOC_GET_ENCRYPTION_POLICY_EX can fail with the following errors:
|
||||
|
||||
- ``EINVAL``: the file is encrypted, but it uses an unrecognized
|
||||
encryption context format
|
||||
encryption policy version
|
||||
- ``ENODATA``: the file is not encrypted
|
||||
- ``ENOTTY``: this type of filesystem does not implement encryption
|
||||
- ``ENOTTY``: this type of filesystem does not implement encryption,
|
||||
or this kernel is too old to support FS_IOC_GET_ENCRYPTION_POLICY_EX
|
||||
(try FS_IOC_GET_ENCRYPTION_POLICY instead)
|
||||
- ``EOPNOTSUPP``: the kernel was not configured with encryption
|
||||
support for this filesystem
|
||||
support for this filesystem, or the filesystem superblock has not
|
||||
had encryption enabled on it
|
||||
- ``EOVERFLOW``: the file is encrypted and uses a recognized
|
||||
encryption policy version, but the policy struct does not fit into
|
||||
the provided buffer
|
||||
|
||||
Note: if you only need to know whether a file is encrypted or not, on
|
||||
most filesystems it is also possible to use the FS_IOC_GETFLAGS ioctl
|
||||
and check for FS_ENCRYPT_FL, or to use the statx() system call and
|
||||
check for STATX_ATTR_ENCRYPTED in stx_attributes.
|
||||
|
||||
FS_IOC_GET_ENCRYPTION_POLICY
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The FS_IOC_GET_ENCRYPTION_POLICY ioctl can also retrieve the
|
||||
encryption policy, if any, for a directory or regular file. However,
|
||||
unlike `FS_IOC_GET_ENCRYPTION_POLICY_EX`_,
|
||||
FS_IOC_GET_ENCRYPTION_POLICY only supports the original policy
|
||||
version. It takes in a pointer directly to a :c:type:`struct
|
||||
fscrypt_policy_v1` rather than a :c:type:`struct
|
||||
fscrypt_get_policy_ex_arg`.
|
||||
|
||||
The error codes for FS_IOC_GET_ENCRYPTION_POLICY are the same as those
|
||||
for FS_IOC_GET_ENCRYPTION_POLICY_EX, except that
|
||||
FS_IOC_GET_ENCRYPTION_POLICY also returns ``EINVAL`` if the file is
|
||||
encrypted using a newer encryption policy version.
|
||||
|
||||
Getting the per-filesystem salt
|
||||
-------------------------------
|
||||
|
||||
@@ -390,8 +604,115 @@ generate and manage any needed salt(s) in userspace.
|
||||
Adding keys
|
||||
-----------
|
||||
|
||||
To provide a master key, userspace must add it to an appropriate
|
||||
keyring using the add_key() system call (see:
|
||||
FS_IOC_ADD_ENCRYPTION_KEY
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The FS_IOC_ADD_ENCRYPTION_KEY ioctl adds a master encryption key to
|
||||
the filesystem, making all files on the filesystem which were
|
||||
encrypted using that key appear "unlocked", i.e. in plaintext form.
|
||||
It can be executed on any file or directory on the target filesystem,
|
||||
but using the filesystem's root directory is recommended. It takes in
|
||||
a pointer to a :c:type:`struct fscrypt_add_key_arg`, defined as
|
||||
follows::
|
||||
|
||||
struct fscrypt_add_key_arg {
|
||||
struct fscrypt_key_specifier key_spec;
|
||||
__u32 raw_size;
|
||||
__u32 __reserved[9];
|
||||
__u8 raw[];
|
||||
};
|
||||
|
||||
#define FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR 1
|
||||
#define FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER 2
|
||||
|
||||
struct fscrypt_key_specifier {
|
||||
__u32 type; /* one of FSCRYPT_KEY_SPEC_TYPE_* */
|
||||
__u32 __reserved;
|
||||
union {
|
||||
__u8 __reserved[32]; /* reserve some extra space */
|
||||
__u8 descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
|
||||
__u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
|
||||
} u;
|
||||
};
|
||||
|
||||
:c:type:`struct fscrypt_add_key_arg` must be zeroed, then initialized
|
||||
as follows:
|
||||
|
||||
- If the key is being added for use by v1 encryption policies, then
|
||||
``key_spec.type`` must contain FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR, and
|
||||
``key_spec.u.descriptor`` must contain the descriptor of the key
|
||||
being added, corresponding to the value in the
|
||||
``master_key_descriptor`` field of :c:type:`struct
|
||||
fscrypt_policy_v1`. To add this type of key, the calling process
|
||||
must have the CAP_SYS_ADMIN capability in the initial user
|
||||
namespace.
|
||||
|
||||
Alternatively, if the key is being added for use by v2 encryption
|
||||
policies, then ``key_spec.type`` must contain
|
||||
FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER, and ``key_spec.u.identifier`` is
|
||||
an *output* field which the kernel fills in with a cryptographic
|
||||
hash of the key. To add this type of key, the calling process does
|
||||
not need any privileges. However, the number of keys that can be
|
||||
added is limited by the user's quota for the keyrings service (see
|
||||
``Documentation/security/keys/core.rst``).
|
||||
|
||||
- ``raw_size`` must be the size of the ``raw`` key provided, in bytes.
|
||||
|
||||
- ``raw`` is a variable-length field which must contain the actual
|
||||
key, ``raw_size`` bytes long.
|
||||
|
||||
For v2 policy keys, the kernel keeps track of which user (identified
|
||||
by effective user ID) added the key, and only allows the key to be
|
||||
removed by that user --- or by "root", if they use
|
||||
`FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS`_.
|
||||
|
||||
However, if another user has added the key, it may be desirable to
|
||||
prevent that other user from unexpectedly removing it. Therefore,
|
||||
FS_IOC_ADD_ENCRYPTION_KEY may also be used to add a v2 policy key
|
||||
*again*, even if it's already added by other user(s). In this case,
|
||||
FS_IOC_ADD_ENCRYPTION_KEY will just install a claim to the key for the
|
||||
current user, rather than actually add the key again (but the raw key
|
||||
must still be provided, as a proof of knowledge).
|
||||
|
||||
FS_IOC_ADD_ENCRYPTION_KEY returns 0 if either the key or a claim to
|
||||
the key was either added or already exists.
|
||||
|
||||
FS_IOC_ADD_ENCRYPTION_KEY can fail with the following errors:
|
||||
|
||||
- ``EACCES``: FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR was specified, but the
|
||||
caller does not have the CAP_SYS_ADMIN capability in the initial
|
||||
user namespace
|
||||
- ``EDQUOT``: the key quota for this user would be exceeded by adding
|
||||
the key
|
||||
- ``EINVAL``: invalid key size or key specifier type, or reserved bits
|
||||
were set
|
||||
- ``ENOTTY``: this type of filesystem does not implement encryption
|
||||
- ``EOPNOTSUPP``: the kernel was not configured with encryption
|
||||
support for this filesystem, or the filesystem superblock has not
|
||||
had encryption enabled on it
|
||||
|
||||
Legacy method
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
For v1 encryption policies, a master encryption key can also be
|
||||
provided by adding it to a process-subscribed keyring, e.g. to a
|
||||
session keyring, or to a user keyring if the user keyring is linked
|
||||
into the session keyring.
|
||||
|
||||
This method is deprecated (and not supported for v2 encryption
|
||||
policies) for several reasons. First, it cannot be used in
|
||||
combination with FS_IOC_REMOVE_ENCRYPTION_KEY (see `Removing keys`_),
|
||||
so for removing a key a workaround such as keyctl_unlink() in
|
||||
combination with ``sync; echo 2 > /proc/sys/vm/drop_caches`` would
|
||||
have to be used. Second, it doesn't match the fact that the
|
||||
locked/unlocked status of encrypted files (i.e. whether they appear to
|
||||
be in plaintext form or in ciphertext form) is global. This mismatch
|
||||
has caused much confusion as well as real problems when processes
|
||||
running under different UIDs, such as a ``sudo`` command, need to
|
||||
access encrypted files.
|
||||
|
||||
Nevertheless, to add a key to one of the process-subscribed keyrings,
|
||||
the add_key() system call can be used (see:
|
||||
``Documentation/security/keys/core.rst``). The key type must be
|
||||
"logon"; keys of this type are kept in kernel memory and cannot be
|
||||
read back by userspace. The key description must be "fscrypt:"
|
||||
@@ -399,12 +720,12 @@ followed by the 16-character lower case hex representation of the
|
||||
``master_key_descriptor`` that was set in the encryption policy. The
|
||||
key payload must conform to the following structure::
|
||||
|
||||
#define FS_MAX_KEY_SIZE 64
|
||||
#define FSCRYPT_MAX_KEY_SIZE 64
|
||||
|
||||
struct fscrypt_key {
|
||||
u32 mode;
|
||||
u8 raw[FS_MAX_KEY_SIZE];
|
||||
u32 size;
|
||||
__u32 mode;
|
||||
__u8 raw[FSCRYPT_MAX_KEY_SIZE];
|
||||
__u32 size;
|
||||
};
|
||||
|
||||
``mode`` is ignored; just set it to 0. The actual key is provided in
|
||||
@@ -416,26 +737,194 @@ with a filesystem-specific prefix such as "ext4:". However, the
|
||||
filesystem-specific prefixes are deprecated and should not be used in
|
||||
new programs.
|
||||
|
||||
There are several different types of keyrings in which encryption keys
|
||||
may be placed, such as a session keyring, a user session keyring, or a
|
||||
user keyring. Each key must be placed in a keyring that is "attached"
|
||||
to all processes that might need to access files encrypted with it, in
|
||||
the sense that request_key() will find the key. Generally, if only
|
||||
processes belonging to a specific user need to access a given
|
||||
encrypted directory and no session keyring has been installed, then
|
||||
that directory's key should be placed in that user's user session
|
||||
keyring or user keyring. Otherwise, a session keyring should be
|
||||
installed if needed, and the key should be linked into that session
|
||||
keyring, or in a keyring linked into that session keyring.
|
||||
Removing keys
|
||||
-------------
|
||||
|
||||
Note: introducing the complex visibility semantics of keyrings here
|
||||
was arguably a mistake --- especially given that by design, after any
|
||||
process successfully opens an encrypted file (thereby setting up the
|
||||
per-file key), possessing the keyring key is not actually required for
|
||||
any process to read/write the file until its in-memory inode is
|
||||
evicted. In the future there probably should be a way to provide keys
|
||||
directly to the filesystem instead, which would make the intended
|
||||
semantics clearer.
|
||||
Two ioctls are available for removing a key that was added by
|
||||
`FS_IOC_ADD_ENCRYPTION_KEY`_:
|
||||
|
||||
- `FS_IOC_REMOVE_ENCRYPTION_KEY`_
|
||||
- `FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS`_
|
||||
|
||||
These two ioctls differ only in cases where v2 policy keys are added
|
||||
or removed by non-root users.
|
||||
|
||||
These ioctls don't work on keys that were added via the legacy
|
||||
process-subscribed keyrings mechanism.
|
||||
|
||||
Before using these ioctls, read the `Kernel memory compromise`_
|
||||
section for a discussion of the security goals and limitations of
|
||||
these ioctls.
|
||||
|
||||
FS_IOC_REMOVE_ENCRYPTION_KEY
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The FS_IOC_REMOVE_ENCRYPTION_KEY ioctl removes a claim to a master
|
||||
encryption key from the filesystem, and possibly removes the key
|
||||
itself. It can be executed on any file or directory on the target
|
||||
filesystem, but using the filesystem's root directory is recommended.
|
||||
It takes in a pointer to a :c:type:`struct fscrypt_remove_key_arg`,
|
||||
defined as follows::
|
||||
|
||||
struct fscrypt_remove_key_arg {
|
||||
struct fscrypt_key_specifier key_spec;
|
||||
#define FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY 0x00000001
|
||||
#define FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS 0x00000002
|
||||
__u32 removal_status_flags; /* output */
|
||||
__u32 __reserved[5];
|
||||
};
|
||||
|
||||
This structure must be zeroed, then initialized as follows:
|
||||
|
||||
- The key to remove is specified by ``key_spec``:
|
||||
|
||||
- To remove a key used by v1 encryption policies, set
|
||||
``key_spec.type`` to FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR and fill
|
||||
in ``key_spec.u.descriptor``. To remove this type of key, the
|
||||
calling process must have the CAP_SYS_ADMIN capability in the
|
||||
initial user namespace.
|
||||
|
||||
- To remove a key used by v2 encryption policies, set
|
||||
``key_spec.type`` to FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER and fill
|
||||
in ``key_spec.u.identifier``.
|
||||
|
||||
For v2 policy keys, this ioctl is usable by non-root users. However,
|
||||
to make this possible, it actually just removes the current user's
|
||||
claim to the key, undoing a single call to FS_IOC_ADD_ENCRYPTION_KEY.
|
||||
Only after all claims are removed is the key really removed.
|
||||
|
||||
For example, if FS_IOC_ADD_ENCRYPTION_KEY was called with uid 1000,
|
||||
then the key will be "claimed" by uid 1000, and
|
||||
FS_IOC_REMOVE_ENCRYPTION_KEY will only succeed as uid 1000. Or, if
|
||||
both uids 1000 and 2000 added the key, then for each uid
|
||||
FS_IOC_REMOVE_ENCRYPTION_KEY will only remove their own claim. Only
|
||||
once *both* are removed is the key really removed. (Think of it like
|
||||
unlinking a file that may have hard links.)
|
||||
|
||||
If FS_IOC_REMOVE_ENCRYPTION_KEY really removes the key, it will also
|
||||
try to "lock" all files that had been unlocked with the key. It won't
|
||||
lock files that are still in-use, so this ioctl is expected to be used
|
||||
in cooperation with userspace ensuring that none of the files are
|
||||
still open. However, if necessary, this ioctl can be executed again
|
||||
later to retry locking any remaining files.
|
||||
|
||||
FS_IOC_REMOVE_ENCRYPTION_KEY returns 0 if either the key was removed
|
||||
(but may still have files remaining to be locked), the user's claim to
|
||||
the key was removed, or the key was already removed but had files
|
||||
remaining to be the locked so the ioctl retried locking them. In any
|
||||
of these cases, ``removal_status_flags`` is filled in with the
|
||||
following informational status flags:
|
||||
|
||||
- ``FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY``: set if some file(s)
|
||||
are still in-use. Not guaranteed to be set in the case where only
|
||||
the user's claim to the key was removed.
|
||||
- ``FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS``: set if only the
|
||||
user's claim to the key was removed, not the key itself
|
||||
|
||||
FS_IOC_REMOVE_ENCRYPTION_KEY can fail with the following errors:
|
||||
|
||||
- ``EACCES``: The FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR key specifier type
|
||||
was specified, but the caller does not have the CAP_SYS_ADMIN
|
||||
capability in the initial user namespace
|
||||
- ``EINVAL``: invalid key specifier type, or reserved bits were set
|
||||
- ``ENOKEY``: the key object was not found at all, i.e. it was never
|
||||
added in the first place or was already fully removed including all
|
||||
files locked; or, the user does not have a claim to the key (but
|
||||
someone else does).
|
||||
- ``ENOTTY``: this type of filesystem does not implement encryption
|
||||
- ``EOPNOTSUPP``: the kernel was not configured with encryption
|
||||
support for this filesystem, or the filesystem superblock has not
|
||||
had encryption enabled on it
|
||||
|
||||
FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS is exactly the same as
|
||||
`FS_IOC_REMOVE_ENCRYPTION_KEY`_, except that for v2 policy keys, the
|
||||
ALL_USERS version of the ioctl will remove all users' claims to the
|
||||
key, not just the current user's. I.e., the key itself will always be
|
||||
removed, no matter how many users have added it. This difference is
|
||||
only meaningful if non-root users are adding and removing keys.
|
||||
|
||||
Because of this, FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS also requires
|
||||
"root", namely the CAP_SYS_ADMIN capability in the initial user
|
||||
namespace. Otherwise it will fail with EACCES.
|
||||
|
||||
Getting key status
|
||||
------------------
|
||||
|
||||
FS_IOC_GET_ENCRYPTION_KEY_STATUS
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The FS_IOC_GET_ENCRYPTION_KEY_STATUS ioctl retrieves the status of a
|
||||
master encryption key. It can be executed on any file or directory on
|
||||
the target filesystem, but using the filesystem's root directory is
|
||||
recommended. It takes in a pointer to a :c:type:`struct
|
||||
fscrypt_get_key_status_arg`, defined as follows::
|
||||
|
||||
struct fscrypt_get_key_status_arg {
|
||||
/* input */
|
||||
struct fscrypt_key_specifier key_spec;
|
||||
__u32 __reserved[6];
|
||||
|
||||
/* output */
|
||||
#define FSCRYPT_KEY_STATUS_ABSENT 1
|
||||
#define FSCRYPT_KEY_STATUS_PRESENT 2
|
||||
#define FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED 3
|
||||
__u32 status;
|
||||
#define FSCRYPT_KEY_STATUS_FLAG_ADDED_BY_SELF 0x00000001
|
||||
__u32 status_flags;
|
||||
__u32 user_count;
|
||||
__u32 __out_reserved[13];
|
||||
};
|
||||
|
||||
The caller must zero all input fields, then fill in ``key_spec``:
|
||||
|
||||
- To get the status of a key for v1 encryption policies, set
|
||||
``key_spec.type`` to FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR and fill
|
||||
in ``key_spec.u.descriptor``.
|
||||
|
||||
- To get the status of a key for v2 encryption policies, set
|
||||
``key_spec.type`` to FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER and fill
|
||||
in ``key_spec.u.identifier``.
|
||||
|
||||
On success, 0 is returned and the kernel fills in the output fields:
|
||||
|
||||
- ``status`` indicates whether the key is absent, present, or
|
||||
incompletely removed. Incompletely removed means that the master
|
||||
secret has been removed, but some files are still in use; i.e.,
|
||||
`FS_IOC_REMOVE_ENCRYPTION_KEY`_ returned 0 but set the informational
|
||||
status flag FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY.
|
||||
|
||||
- ``status_flags`` can contain the following flags:
|
||||
|
||||
- ``FSCRYPT_KEY_STATUS_FLAG_ADDED_BY_SELF`` indicates that the key
|
||||
has added by the current user. This is only set for keys
|
||||
identified by ``identifier`` rather than by ``descriptor``.
|
||||
|
||||
- ``user_count`` specifies the number of users who have added the key.
|
||||
This is only set for keys identified by ``identifier`` rather than
|
||||
by ``descriptor``.
|
||||
|
||||
FS_IOC_GET_ENCRYPTION_KEY_STATUS can fail with the following errors:
|
||||
|
||||
- ``EINVAL``: invalid key specifier type, or reserved bits were set
|
||||
- ``ENOTTY``: this type of filesystem does not implement encryption
|
||||
- ``EOPNOTSUPP``: the kernel was not configured with encryption
|
||||
support for this filesystem, or the filesystem superblock has not
|
||||
had encryption enabled on it
|
||||
|
||||
Among other use cases, FS_IOC_GET_ENCRYPTION_KEY_STATUS can be useful
|
||||
for determining whether the key for a given encrypted directory needs
|
||||
to be added before prompting the user for the passphrase needed to
|
||||
derive the key.
|
||||
|
||||
FS_IOC_GET_ENCRYPTION_KEY_STATUS can only get the status of keys in
|
||||
the filesystem-level keyring, i.e. the keyring managed by
|
||||
`FS_IOC_ADD_ENCRYPTION_KEY`_ and `FS_IOC_REMOVE_ENCRYPTION_KEY`_. It
|
||||
cannot get the status of a key that has only been added for use by v1
|
||||
encryption policies using the legacy mechanism involving
|
||||
process-subscribed keyrings.
|
||||
|
||||
Access semantics
|
||||
================
|
||||
@@ -498,7 +987,7 @@ Without the key
|
||||
|
||||
Some filesystem operations may be performed on encrypted regular
|
||||
files, directories, and symlinks even before their encryption key has
|
||||
been provided:
|
||||
been added, or after their encryption key has been removed:
|
||||
|
||||
- File metadata may be read, e.g. using stat().
|
||||
|
||||
@@ -563,33 +1052,44 @@ Encryption context
|
||||
------------------
|
||||
|
||||
An encryption policy is represented on-disk by a :c:type:`struct
|
||||
fscrypt_context`. It is up to individual filesystems to decide where
|
||||
to store it, but normally it would be stored in a hidden extended
|
||||
attribute. It should *not* be exposed by the xattr-related system
|
||||
calls such as getxattr() and setxattr() because of the special
|
||||
semantics of the encryption xattr. (In particular, there would be
|
||||
much confusion if an encryption policy were to be added to or removed
|
||||
from anything other than an empty directory.) The struct is defined
|
||||
as follows::
|
||||
fscrypt_context_v1` or a :c:type:`struct fscrypt_context_v2`. It is
|
||||
up to individual filesystems to decide where to store it, but normally
|
||||
it would be stored in a hidden extended attribute. It should *not* be
|
||||
exposed by the xattr-related system calls such as getxattr() and
|
||||
setxattr() because of the special semantics of the encryption xattr.
|
||||
(In particular, there would be much confusion if an encryption policy
|
||||
were to be added to or removed from anything other than an empty
|
||||
directory.) These structs are defined as follows::
|
||||
|
||||
#define FS_KEY_DESCRIPTOR_SIZE 8
|
||||
#define FS_KEY_DERIVATION_NONCE_SIZE 16
|
||||
|
||||
struct fscrypt_context {
|
||||
u8 format;
|
||||
#define FSCRYPT_KEY_DESCRIPTOR_SIZE 8
|
||||
struct fscrypt_context_v1 {
|
||||
u8 version;
|
||||
u8 contents_encryption_mode;
|
||||
u8 filenames_encryption_mode;
|
||||
u8 flags;
|
||||
u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
|
||||
u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
|
||||
u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
|
||||
};
|
||||
|
||||
Note that :c:type:`struct fscrypt_context` contains the same
|
||||
information as :c:type:`struct fscrypt_policy` (see `Setting an
|
||||
encryption policy`_), except that :c:type:`struct fscrypt_context`
|
||||
also contains a nonce. The nonce is randomly generated by the kernel
|
||||
and is used to derive the inode's encryption key as described in
|
||||
`Per-file keys`_.
|
||||
#define FSCRYPT_KEY_IDENTIFIER_SIZE 16
|
||||
struct fscrypt_context_v2 {
|
||||
u8 version;
|
||||
u8 contents_encryption_mode;
|
||||
u8 filenames_encryption_mode;
|
||||
u8 flags;
|
||||
u8 __reserved[4];
|
||||
u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
|
||||
u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
|
||||
};
|
||||
|
||||
The context structs contain the same information as the corresponding
|
||||
policy structs (see `Setting an encryption policy`_), except that the
|
||||
context structs also contain a nonce. The nonce is randomly generated
|
||||
by the kernel and is used as KDF input or as a tweak to cause
|
||||
different files to be encrypted differently; see `Per-file keys`_ and
|
||||
`DIRECT_KEY and per-mode keys`_.
|
||||
|
||||
Data path changes
|
||||
-----------------
|
||||
@@ -647,3 +1147,42 @@ Note that the precise way that filenames are presented to userspace
|
||||
without the key is subject to change in the future. It is only meant
|
||||
as a way to temporarily present valid filenames so that commands like
|
||||
``rm -r`` work as expected on encrypted directories.
|
||||
|
||||
Tests
|
||||
=====
|
||||
|
||||
To test fscrypt, use xfstests, which is Linux's de facto standard
|
||||
filesystem test suite. First, run all the tests in the "encrypt"
|
||||
group on the relevant filesystem(s). For example, to test ext4 and
|
||||
f2fs encryption using `kvm-xfstests
|
||||
<https://github.com/tytso/xfstests-bld/blob/master/Documentation/kvm-quickstart.md>`_::
|
||||
|
||||
kvm-xfstests -c ext4,f2fs -g encrypt
|
||||
|
||||
UBIFS encryption can also be tested this way, but it should be done in
|
||||
a separate command, and it takes some time for kvm-xfstests to set up
|
||||
emulated UBI volumes::
|
||||
|
||||
kvm-xfstests -c ubifs -g encrypt
|
||||
|
||||
No tests should fail. However, tests that use non-default encryption
|
||||
modes (e.g. generic/549 and generic/550) will be skipped if the needed
|
||||
algorithms were not built into the kernel's crypto API. Also, tests
|
||||
that access the raw block device (e.g. generic/399, generic/548,
|
||||
generic/549, generic/550) will be skipped on UBIFS.
|
||||
|
||||
Besides running the "encrypt" group tests, for ext4 and f2fs it's also
|
||||
possible to run most xfstests with the "test_dummy_encryption" mount
|
||||
option. This option causes all new files to be automatically
|
||||
encrypted with a dummy key, without having to make any API calls.
|
||||
This tests the encrypted I/O paths more thoroughly. To do this with
|
||||
kvm-xfstests, use the "encrypt" filesystem configuration::
|
||||
|
||||
kvm-xfstests -c ext4/encrypt,f2fs/encrypt -g auto
|
||||
|
||||
Because this runs many more tests than "-g encrypt" does, it takes
|
||||
much longer to run; so also consider using `gce-xfstests
|
||||
<https://github.com/tytso/xfstests-bld/blob/master/Documentation/gce-xfstests.md>`_
|
||||
instead of kvm-xfstests::
|
||||
|
||||
gce-xfstests -c ext4/encrypt,f2fs/encrypt -g auto
|
||||
|
||||
726
Documentation/filesystems/fsverity.rst
Normal file
726
Documentation/filesystems/fsverity.rst
Normal file
@@ -0,0 +1,726 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
.. _fsverity:
|
||||
|
||||
=======================================================
|
||||
fs-verity: read-only file-based authenticity protection
|
||||
=======================================================
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
fs-verity (``fs/verity/``) is a support layer that filesystems can
|
||||
hook into to support transparent integrity and authenticity protection
|
||||
of read-only files. Currently, it is supported by the ext4 and f2fs
|
||||
filesystems. Like fscrypt, not too much filesystem-specific code is
|
||||
needed to support fs-verity.
|
||||
|
||||
fs-verity is similar to `dm-verity
|
||||
<https://www.kernel.org/doc/Documentation/device-mapper/verity.txt>`_
|
||||
but works on files rather than block devices. On regular files on
|
||||
filesystems supporting fs-verity, userspace can execute an ioctl that
|
||||
causes the filesystem to build a Merkle tree for the file and persist
|
||||
it to a filesystem-specific location associated with the file.
|
||||
|
||||
After this, the file is made readonly, and all reads from the file are
|
||||
automatically verified against the file's Merkle tree. Reads of any
|
||||
corrupted data, including mmap reads, will fail.
|
||||
|
||||
Userspace can use another ioctl to retrieve the root hash (actually
|
||||
the "file measurement", which is a hash that includes the root hash)
|
||||
that fs-verity is enforcing for the file. This ioctl executes in
|
||||
constant time, regardless of the file size.
|
||||
|
||||
fs-verity is essentially a way to hash a file in constant time,
|
||||
subject to the caveat that reads which would violate the hash will
|
||||
fail at runtime.
|
||||
|
||||
Use cases
|
||||
=========
|
||||
|
||||
By itself, the base fs-verity feature only provides integrity
|
||||
protection, i.e. detection of accidental (non-malicious) corruption.
|
||||
|
||||
However, because fs-verity makes retrieving the file hash extremely
|
||||
efficient, it's primarily meant to be used as a tool to support
|
||||
authentication (detection of malicious modifications) or auditing
|
||||
(logging file hashes before use).
|
||||
|
||||
Trusted userspace code (e.g. operating system code running on a
|
||||
read-only partition that is itself authenticated by dm-verity) can
|
||||
authenticate the contents of an fs-verity file by using the
|
||||
`FS_IOC_MEASURE_VERITY`_ ioctl to retrieve its hash, then verifying a
|
||||
digital signature of it.
|
||||
|
||||
A standard file hash could be used instead of fs-verity. However,
|
||||
this is inefficient if the file is large and only a small portion may
|
||||
be accessed. This is often the case for Android application package
|
||||
(APK) files, for example. These typically contain many translations,
|
||||
classes, and other resources that are infrequently or even never
|
||||
accessed on a particular device. It would be slow and wasteful to
|
||||
read and hash the entire file before starting the application.
|
||||
|
||||
Unlike an ahead-of-time hash, fs-verity also re-verifies data each
|
||||
time it's paged in. This ensures that malicious disk firmware can't
|
||||
undetectably change the contents of the file at runtime.
|
||||
|
||||
fs-verity does not replace or obsolete dm-verity. dm-verity should
|
||||
still be used on read-only filesystems. fs-verity is for files that
|
||||
must live on a read-write filesystem because they are independently
|
||||
updated and potentially user-installed, so dm-verity cannot be used.
|
||||
|
||||
The base fs-verity feature is a hashing mechanism only; actually
|
||||
authenticating the files is up to userspace. However, to meet some
|
||||
users' needs, fs-verity optionally supports a simple signature
|
||||
verification mechanism where users can configure the kernel to require
|
||||
that all fs-verity files be signed by a key loaded into a keyring; see
|
||||
`Built-in signature verification`_. Support for fs-verity file hashes
|
||||
in IMA (Integrity Measurement Architecture) policies is also planned.
|
||||
|
||||
User API
|
||||
========
|
||||
|
||||
FS_IOC_ENABLE_VERITY
|
||||
--------------------
|
||||
|
||||
The FS_IOC_ENABLE_VERITY ioctl enables fs-verity on a file. It takes
|
||||
in a pointer to a :c:type:`struct fsverity_enable_arg`, defined as
|
||||
follows::
|
||||
|
||||
struct fsverity_enable_arg {
|
||||
__u32 version;
|
||||
__u32 hash_algorithm;
|
||||
__u32 block_size;
|
||||
__u32 salt_size;
|
||||
__u64 salt_ptr;
|
||||
__u32 sig_size;
|
||||
__u32 __reserved1;
|
||||
__u64 sig_ptr;
|
||||
__u64 __reserved2[11];
|
||||
};
|
||||
|
||||
This structure contains the parameters of the Merkle tree to build for
|
||||
the file, and optionally contains a signature. It must be initialized
|
||||
as follows:
|
||||
|
||||
- ``version`` must be 1.
|
||||
- ``hash_algorithm`` must be the identifier for the hash algorithm to
|
||||
use for the Merkle tree, such as FS_VERITY_HASH_ALG_SHA256. See
|
||||
``include/uapi/linux/fsverity.h`` for the list of possible values.
|
||||
- ``block_size`` must be the Merkle tree block size. Currently, this
|
||||
must be equal to the system page size, which is usually 4096 bytes.
|
||||
Other sizes may be supported in the future. This value is not
|
||||
necessarily the same as the filesystem block size.
|
||||
- ``salt_size`` is the size of the salt in bytes, or 0 if no salt is
|
||||
provided. The salt is a value that is prepended to every hashed
|
||||
block; it can be used to personalize the hashing for a particular
|
||||
file or device. Currently the maximum salt size is 32 bytes.
|
||||
- ``salt_ptr`` is the pointer to the salt, or NULL if no salt is
|
||||
provided.
|
||||
- ``sig_size`` is the size of the signature in bytes, or 0 if no
|
||||
signature is provided. Currently the signature is (somewhat
|
||||
arbitrarily) limited to 16128 bytes. See `Built-in signature
|
||||
verification`_ for more information.
|
||||
- ``sig_ptr`` is the pointer to the signature, or NULL if no
|
||||
signature is provided.
|
||||
- All reserved fields must be zeroed.
|
||||
|
||||
FS_IOC_ENABLE_VERITY causes the filesystem to build a Merkle tree for
|
||||
the file and persist it to a filesystem-specific location associated
|
||||
with the file, then mark the file as a verity file. This ioctl may
|
||||
take a long time to execute on large files, and it is interruptible by
|
||||
fatal signals.
|
||||
|
||||
FS_IOC_ENABLE_VERITY checks for write access to the inode. However,
|
||||
it must be executed on an O_RDONLY file descriptor and no processes
|
||||
can have the file open for writing. Attempts to open the file for
|
||||
writing while this ioctl is executing will fail with ETXTBSY. (This
|
||||
is necessary to guarantee that no writable file descriptors will exist
|
||||
after verity is enabled, and to guarantee that the file's contents are
|
||||
stable while the Merkle tree is being built over it.)
|
||||
|
||||
On success, FS_IOC_ENABLE_VERITY returns 0, and the file becomes a
|
||||
verity file. On failure (including the case of interruption by a
|
||||
fatal signal), no changes are made to the file.
|
||||
|
||||
FS_IOC_ENABLE_VERITY can fail with the following errors:
|
||||
|
||||
- ``EACCES``: the process does not have write access to the file
|
||||
- ``EBADMSG``: the signature is malformed
|
||||
- ``EBUSY``: this ioctl is already running on the file
|
||||
- ``EEXIST``: the file already has verity enabled
|
||||
- ``EFAULT``: the caller provided inaccessible memory
|
||||
- ``EINTR``: the operation was interrupted by a fatal signal
|
||||
- ``EINVAL``: unsupported version, hash algorithm, or block size; or
|
||||
reserved bits are set; or the file descriptor refers to neither a
|
||||
regular file nor a directory.
|
||||
- ``EISDIR``: the file descriptor refers to a directory
|
||||
- ``EKEYREJECTED``: the signature doesn't match the file
|
||||
- ``EMSGSIZE``: the salt or signature is too long
|
||||
- ``ENOKEY``: the fs-verity keyring doesn't contain the certificate
|
||||
needed to verify the signature
|
||||
- ``ENOPKG``: fs-verity recognizes the hash algorithm, but it's not
|
||||
available in the kernel's crypto API as currently configured (e.g.
|
||||
for SHA-512, missing CONFIG_CRYPTO_SHA512).
|
||||
- ``ENOTTY``: this type of filesystem does not implement fs-verity
|
||||
- ``EOPNOTSUPP``: the kernel was not configured with fs-verity
|
||||
support; or the filesystem superblock has not had the 'verity'
|
||||
feature enabled on it; or the filesystem does not support fs-verity
|
||||
on this file. (See `Filesystem support`_.)
|
||||
- ``EPERM``: the file is append-only; or, a signature is required and
|
||||
one was not provided.
|
||||
- ``EROFS``: the filesystem is read-only
|
||||
- ``ETXTBSY``: someone has the file open for writing. This can be the
|
||||
caller's file descriptor, another open file descriptor, or the file
|
||||
reference held by a writable memory map.
|
||||
|
||||
FS_IOC_MEASURE_VERITY
|
||||
---------------------
|
||||
|
||||
The FS_IOC_MEASURE_VERITY ioctl retrieves the measurement of a verity
|
||||
file. The file measurement is a digest that cryptographically
|
||||
identifies the file contents that are being enforced on reads.
|
||||
|
||||
This ioctl takes in a pointer to a variable-length structure::
|
||||
|
||||
struct fsverity_digest {
|
||||
__u16 digest_algorithm;
|
||||
__u16 digest_size; /* input/output */
|
||||
__u8 digest[];
|
||||
};
|
||||
|
||||
``digest_size`` is an input/output field. On input, it must be
|
||||
initialized to the number of bytes allocated for the variable-length
|
||||
``digest`` field.
|
||||
|
||||
On success, 0 is returned and the kernel fills in the structure as
|
||||
follows:
|
||||
|
||||
- ``digest_algorithm`` will be the hash algorithm used for the file
|
||||
measurement. It will match ``fsverity_enable_arg::hash_algorithm``.
|
||||
- ``digest_size`` will be the size of the digest in bytes, e.g. 32
|
||||
for SHA-256. (This can be redundant with ``digest_algorithm``.)
|
||||
- ``digest`` will be the actual bytes of the digest.
|
||||
|
||||
FS_IOC_MEASURE_VERITY is guaranteed to execute in constant time,
|
||||
regardless of the size of the file.
|
||||
|
||||
FS_IOC_MEASURE_VERITY can fail with the following errors:
|
||||
|
||||
- ``EFAULT``: the caller provided inaccessible memory
|
||||
- ``ENODATA``: the file is not a verity file
|
||||
- ``ENOTTY``: this type of filesystem does not implement fs-verity
|
||||
- ``EOPNOTSUPP``: the kernel was not configured with fs-verity
|
||||
support, or the filesystem superblock has not had the 'verity'
|
||||
feature enabled on it. (See `Filesystem support`_.)
|
||||
- ``EOVERFLOW``: the digest is longer than the specified
|
||||
``digest_size`` bytes. Try providing a larger buffer.
|
||||
|
||||
FS_IOC_GETFLAGS
|
||||
---------------
|
||||
|
||||
The existing ioctl FS_IOC_GETFLAGS (which isn't specific to fs-verity)
|
||||
can also be used to check whether a file has fs-verity enabled or not.
|
||||
To do so, check for FS_VERITY_FL (0x00100000) in the returned flags.
|
||||
|
||||
The verity flag is not settable via FS_IOC_SETFLAGS. You must use
|
||||
FS_IOC_ENABLE_VERITY instead, since parameters must be provided.
|
||||
|
||||
Accessing verity files
|
||||
======================
|
||||
|
||||
Applications can transparently access a verity file just like a
|
||||
non-verity one, with the following exceptions:
|
||||
|
||||
- Verity files are readonly. They cannot be opened for writing or
|
||||
truncate()d, even if the file mode bits allow it. Attempts to do
|
||||
one of these things will fail with EPERM. However, changes to
|
||||
metadata such as owner, mode, timestamps, and xattrs are still
|
||||
allowed, since these are not measured by fs-verity. Verity files
|
||||
can also still be renamed, deleted, and linked to.
|
||||
|
||||
- Direct I/O is not supported on verity files. Attempts to use direct
|
||||
I/O on such files will fall back to buffered I/O.
|
||||
|
||||
- DAX (Direct Access) is not supported on verity files, because this
|
||||
would circumvent the data verification.
|
||||
|
||||
- Reads of data that doesn't match the verity Merkle tree will fail
|
||||
with EIO (for read()) or SIGBUS (for mmap() reads).
|
||||
|
||||
- If the sysctl "fs.verity.require_signatures" is set to 1 and the
|
||||
file's verity measurement is not signed by a key in the fs-verity
|
||||
keyring, then opening the file will fail. See `Built-in signature
|
||||
verification`_.
|
||||
|
||||
Direct access to the Merkle tree is not supported. Therefore, if a
|
||||
verity file is copied, or is backed up and restored, then it will lose
|
||||
its "verity"-ness. fs-verity is primarily meant for files like
|
||||
executables that are managed by a package manager.
|
||||
|
||||
File measurement computation
|
||||
============================
|
||||
|
||||
This section describes how fs-verity hashes the file contents using a
|
||||
Merkle tree to produce the "file measurement" which cryptographically
|
||||
identifies the file contents. This algorithm is the same for all
|
||||
filesystems that support fs-verity.
|
||||
|
||||
Userspace only needs to be aware of this algorithm if it needs to
|
||||
compute the file measurement itself, e.g. in order to sign the file.
|
||||
|
||||
.. _fsverity_merkle_tree:
|
||||
|
||||
Merkle tree
|
||||
-----------
|
||||
|
||||
The file contents is divided into blocks, where the block size is
|
||||
configurable but is usually 4096 bytes. The end of the last block is
|
||||
zero-padded if needed. Each block is then hashed, producing the first
|
||||
level of hashes. Then, the hashes in this first level are grouped
|
||||
into 'blocksize'-byte blocks (zero-padding the ends as needed) and
|
||||
these blocks are hashed, producing the second level of hashes. This
|
||||
proceeds up the tree until only a single block remains. The hash of
|
||||
this block is the "Merkle tree root hash".
|
||||
|
||||
If the file fits in one block and is nonempty, then the "Merkle tree
|
||||
root hash" is simply the hash of the single data block. If the file
|
||||
is empty, then the "Merkle tree root hash" is all zeroes.
|
||||
|
||||
The "blocks" here are not necessarily the same as "filesystem blocks".
|
||||
|
||||
If a salt was specified, then it's zero-padded to the closest multiple
|
||||
of the input size of the hash algorithm's compression function, e.g.
|
||||
64 bytes for SHA-256 or 128 bytes for SHA-512. The padded salt is
|
||||
prepended to every data or Merkle tree block that is hashed.
|
||||
|
||||
The purpose of the block padding is to cause every hash to be taken
|
||||
over the same amount of data, which simplifies the implementation and
|
||||
keeps open more possibilities for hardware acceleration. The purpose
|
||||
of the salt padding is to make the salting "free" when the salted hash
|
||||
state is precomputed, then imported for each hash.
|
||||
|
||||
Example: in the recommended configuration of SHA-256 and 4K blocks,
|
||||
128 hash values fit in each block. Thus, each level of the Merkle
|
||||
tree is approximately 128 times smaller than the previous, and for
|
||||
large files the Merkle tree's size converges to approximately 1/127 of
|
||||
the original file size. However, for small files, the padding is
|
||||
significant, making the space overhead proportionally more.
|
||||
|
||||
.. _fsverity_descriptor:
|
||||
|
||||
fs-verity descriptor
|
||||
--------------------
|
||||
|
||||
By itself, the Merkle tree root hash is ambiguous. For example, it
|
||||
can't a distinguish a large file from a small second file whose data
|
||||
is exactly the top-level hash block of the first file. Ambiguities
|
||||
also arise from the convention of padding to the next block boundary.
|
||||
|
||||
To solve this problem, the verity file measurement is actually
|
||||
computed as a hash of the following structure, which contains the
|
||||
Merkle tree root hash as well as other fields such as the file size::
|
||||
|
||||
struct fsverity_descriptor {
|
||||
__u8 version; /* must be 1 */
|
||||
__u8 hash_algorithm; /* Merkle tree hash algorithm */
|
||||
__u8 log_blocksize; /* log2 of size of data and tree blocks */
|
||||
__u8 salt_size; /* size of salt in bytes; 0 if none */
|
||||
__le32 sig_size; /* must be 0 */
|
||||
__le64 data_size; /* size of file the Merkle tree is built over */
|
||||
__u8 root_hash[64]; /* Merkle tree root hash */
|
||||
__u8 salt[32]; /* salt prepended to each hashed block */
|
||||
__u8 __reserved[144]; /* must be 0's */
|
||||
};
|
||||
|
||||
Note that the ``sig_size`` field must be set to 0 for the purpose of
|
||||
computing the file measurement, even if a signature was provided (or
|
||||
will be provided) to `FS_IOC_ENABLE_VERITY`_.
|
||||
|
||||
Built-in signature verification
|
||||
===============================
|
||||
|
||||
With CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y, fs-verity supports putting
|
||||
a portion of an authentication policy (see `Use cases`_) in the
|
||||
kernel. Specifically, it adds support for:
|
||||
|
||||
1. At fs-verity module initialization time, a keyring ".fs-verity" is
|
||||
created. The root user can add trusted X.509 certificates to this
|
||||
keyring using the add_key() system call, then (when done)
|
||||
optionally use keyctl_restrict_keyring() to prevent additional
|
||||
certificates from being added.
|
||||
|
||||
2. `FS_IOC_ENABLE_VERITY`_ accepts a pointer to a PKCS#7 formatted
|
||||
detached signature in DER format of the file measurement. On
|
||||
success, this signature is persisted alongside the Merkle tree.
|
||||
Then, any time the file is opened, the kernel will verify the
|
||||
file's actual measurement against this signature, using the
|
||||
certificates in the ".fs-verity" keyring.
|
||||
|
||||
3. A new sysctl "fs.verity.require_signatures" is made available.
|
||||
When set to 1, the kernel requires that all verity files have a
|
||||
correctly signed file measurement as described in (2).
|
||||
|
||||
File measurements must be signed in the following format, which is
|
||||
similar to the structure used by `FS_IOC_MEASURE_VERITY`_::
|
||||
|
||||
struct fsverity_signed_digest {
|
||||
char magic[8]; /* must be "FSVerity" */
|
||||
__le16 digest_algorithm;
|
||||
__le16 digest_size;
|
||||
__u8 digest[];
|
||||
};
|
||||
|
||||
fs-verity's built-in signature verification support is meant as a
|
||||
relatively simple mechanism that can be used to provide some level of
|
||||
authenticity protection for verity files, as an alternative to doing
|
||||
the signature verification in userspace or using IMA-appraisal.
|
||||
However, with this mechanism, userspace programs still need to check
|
||||
that the verity bit is set, and there is no protection against verity
|
||||
files being swapped around.
|
||||
|
||||
Filesystem support
|
||||
==================
|
||||
|
||||
fs-verity is currently supported by the ext4 and f2fs filesystems.
|
||||
The CONFIG_FS_VERITY kconfig option must be enabled to use fs-verity
|
||||
on either filesystem.
|
||||
|
||||
``include/linux/fsverity.h`` declares the interface between the
|
||||
``fs/verity/`` support layer and filesystems. Briefly, filesystems
|
||||
must provide an ``fsverity_operations`` structure that provides
|
||||
methods to read and write the verity metadata to a filesystem-specific
|
||||
location, including the Merkle tree blocks and
|
||||
``fsverity_descriptor``. Filesystems must also call functions in
|
||||
``fs/verity/`` at certain times, such as when a file is opened or when
|
||||
pages have been read into the pagecache. (See `Verifying data`_.)
|
||||
|
||||
ext4
|
||||
----
|
||||
|
||||
ext4 supports fs-verity since Linux TODO and e2fsprogs v1.45.2.
|
||||
|
||||
To create verity files on an ext4 filesystem, the filesystem must have
|
||||
been formatted with ``-O verity`` or had ``tune2fs -O verity`` run on
|
||||
it. "verity" is an RO_COMPAT filesystem feature, so once set, old
|
||||
kernels will only be able to mount the filesystem readonly, and old
|
||||
versions of e2fsck will be unable to check the filesystem. Moreover,
|
||||
currently ext4 only supports mounting a filesystem with the "verity"
|
||||
feature when its block size is equal to PAGE_SIZE (often 4096 bytes).
|
||||
|
||||
ext4 sets the EXT4_VERITY_FL on-disk inode flag on verity files. It
|
||||
can only be set by `FS_IOC_ENABLE_VERITY`_, and it cannot be cleared.
|
||||
|
||||
ext4 also supports encryption, which can be used simultaneously with
|
||||
fs-verity. In this case, the plaintext data is verified rather than
|
||||
the ciphertext. This is necessary in order to make the file
|
||||
measurement meaningful, since every file is encrypted differently.
|
||||
|
||||
ext4 stores the verity metadata (Merkle tree and fsverity_descriptor)
|
||||
past the end of the file, starting at the first 64K boundary beyond
|
||||
i_size. This approach works because (a) verity files are readonly,
|
||||
and (b) pages fully beyond i_size aren't visible to userspace but can
|
||||
be read/written internally by ext4 with only some relatively small
|
||||
changes to ext4. This approach avoids having to depend on the
|
||||
EA_INODE feature and on rearchitecturing ext4's xattr support to
|
||||
support paging multi-gigabyte xattrs into memory, and to support
|
||||
encrypting xattrs. Note that the verity metadata *must* be encrypted
|
||||
when the file is, since it contains hashes of the plaintext data.
|
||||
|
||||
Currently, ext4 verity only supports the case where the Merkle tree
|
||||
block size, filesystem block size, and page size are all the same. It
|
||||
also only supports extent-based files.
|
||||
|
||||
f2fs
|
||||
----
|
||||
|
||||
f2fs supports fs-verity since Linux TODO and f2fs-tools v1.11.0.
|
||||
|
||||
To create verity files on an f2fs filesystem, the filesystem must have
|
||||
been formatted with ``-O verity``.
|
||||
|
||||
f2fs sets the FADVISE_VERITY_BIT on-disk inode flag on verity files.
|
||||
It can only be set by `FS_IOC_ENABLE_VERITY`_, and it cannot be
|
||||
cleared.
|
||||
|
||||
Like ext4, f2fs stores the verity metadata (Merkle tree and
|
||||
fsverity_descriptor) past the end of the file, starting at the first
|
||||
64K boundary beyond i_size. See explanation for ext4 above.
|
||||
Moreover, f2fs supports at most 4096 bytes of xattr entries per inode
|
||||
which wouldn't be enough for even a single Merkle tree block.
|
||||
|
||||
Currently, f2fs verity only supports a Merkle tree block size of 4096.
|
||||
Also, f2fs doesn't support enabling verity on files that currently
|
||||
have atomic or volatile writes pending.
|
||||
|
||||
Implementation details
|
||||
======================
|
||||
|
||||
Verifying data
|
||||
--------------
|
||||
|
||||
fs-verity ensures that all reads of a verity file's data are verified,
|
||||
regardless of which syscall is used to do the read (e.g. mmap(),
|
||||
read(), pread()) and regardless of whether it's the first read or a
|
||||
later read (unless the later read can return cached data that was
|
||||
already verified). Below, we describe how filesystems implement this.
|
||||
|
||||
Pagecache
|
||||
~~~~~~~~~
|
||||
|
||||
For filesystems using Linux's pagecache, the ``->readpage()`` and
|
||||
``->readpages()`` methods must be modified to verify pages before they
|
||||
are marked Uptodate. Merely hooking ``->read_iter()`` would be
|
||||
insufficient, since ``->read_iter()`` is not used for memory maps.
|
||||
|
||||
Therefore, fs/verity/ provides a function fsverity_verify_page() which
|
||||
verifies a page that has been read into the pagecache of a verity
|
||||
inode, but is still locked and not Uptodate, so it's not yet readable
|
||||
by userspace. As needed to do the verification,
|
||||
fsverity_verify_page() will call back into the filesystem to read
|
||||
Merkle tree pages via fsverity_operations::read_merkle_tree_page().
|
||||
|
||||
fsverity_verify_page() returns false if verification failed; in this
|
||||
case, the filesystem must not set the page Uptodate. Following this,
|
||||
as per the usual Linux pagecache behavior, attempts by userspace to
|
||||
read() from the part of the file containing the page will fail with
|
||||
EIO, and accesses to the page within a memory map will raise SIGBUS.
|
||||
|
||||
fsverity_verify_page() currently only supports the case where the
|
||||
Merkle tree block size is equal to PAGE_SIZE (often 4096 bytes).
|
||||
|
||||
In principle, fsverity_verify_page() verifies the entire path in the
|
||||
Merkle tree from the data page to the root hash. However, for
|
||||
efficiency the filesystem may cache the hash pages. Therefore,
|
||||
fsverity_verify_page() only ascends the tree reading hash pages until
|
||||
an already-verified hash page is seen, as indicated by the PageChecked
|
||||
bit being set. It then verifies the path to that page.
|
||||
|
||||
This optimization, which is also used by dm-verity, results in
|
||||
excellent sequential read performance. This is because usually (e.g.
|
||||
127 in 128 times for 4K blocks and SHA-256) the hash page from the
|
||||
bottom level of the tree will already be cached and checked from
|
||||
reading a previous data page. However, random reads perform worse.
|
||||
|
||||
Block device based filesystems
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Block device based filesystems (e.g. ext4 and f2fs) in Linux also use
|
||||
the pagecache, so the above subsection applies too. However, they
|
||||
also usually read many pages from a file at once, grouped into a
|
||||
structure called a "bio". To make it easier for these types of
|
||||
filesystems to support fs-verity, fs/verity/ also provides a function
|
||||
fsverity_verify_bio() which verifies all pages in a bio.
|
||||
|
||||
ext4 and f2fs also support encryption. If a verity file is also
|
||||
encrypted, the pages must be decrypted before being verified. To
|
||||
support this, these filesystems allocate a "post-read context" for
|
||||
each bio and store it in ``->bi_private``::
|
||||
|
||||
struct bio_post_read_ctx {
|
||||
struct bio *bio;
|
||||
struct work_struct work;
|
||||
unsigned int cur_step;
|
||||
unsigned int enabled_steps;
|
||||
};
|
||||
|
||||
``enabled_steps`` is a bitmask that specifies whether decryption,
|
||||
verity, or both is enabled. After the bio completes, for each needed
|
||||
postprocessing step the filesystem enqueues the bio_post_read_ctx on a
|
||||
workqueue, and then the workqueue work does the decryption or
|
||||
verification. Finally, pages where no decryption or verity error
|
||||
occurred are marked Uptodate, and the pages are unlocked.
|
||||
|
||||
Files on ext4 and f2fs may contain holes. Normally, ``->readpages()``
|
||||
simply zeroes holes and sets the corresponding pages Uptodate; no bios
|
||||
are issued. To prevent this case from bypassing fs-verity, these
|
||||
filesystems use fsverity_verify_page() to verify hole pages.
|
||||
|
||||
ext4 and f2fs disable direct I/O on verity files, since otherwise
|
||||
direct I/O would bypass fs-verity. (They also do the same for
|
||||
encrypted files.)
|
||||
|
||||
Userspace utility
|
||||
=================
|
||||
|
||||
This document focuses on the kernel, but a userspace utility for
|
||||
fs-verity can be found at:
|
||||
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/fsverity-utils.git
|
||||
|
||||
See the README.md file in the fsverity-utils source tree for details,
|
||||
including examples of setting up fs-verity protected files.
|
||||
|
||||
Tests
|
||||
=====
|
||||
|
||||
To test fs-verity, use xfstests. For example, using `kvm-xfstests
|
||||
<https://github.com/tytso/xfstests-bld/blob/master/Documentation/kvm-quickstart.md>`_::
|
||||
|
||||
kvm-xfstests -c ext4,f2fs -g verity
|
||||
|
||||
FAQ
|
||||
===
|
||||
|
||||
This section answers frequently asked questions about fs-verity that
|
||||
weren't already directly answered in other parts of this document.
|
||||
|
||||
:Q: Why isn't fs-verity part of IMA?
|
||||
:A: fs-verity and IMA (Integrity Measurement Architecture) have
|
||||
different focuses. fs-verity is a filesystem-level mechanism for
|
||||
hashing individual files using a Merkle tree. In contrast, IMA
|
||||
specifies a system-wide policy that specifies which files are
|
||||
hashed and what to do with those hashes, such as log them,
|
||||
authenticate them, or add them to a measurement list.
|
||||
|
||||
IMA is planned to support the fs-verity hashing mechanism as an
|
||||
alternative to doing full file hashes, for people who want the
|
||||
performance and security benefits of the Merkle tree based hash.
|
||||
But it doesn't make sense to force all uses of fs-verity to be
|
||||
through IMA. As a standalone filesystem feature, fs-verity
|
||||
already meets many users' needs, and it's testable like other
|
||||
filesystem features e.g. with xfstests.
|
||||
|
||||
:Q: Isn't fs-verity useless because the attacker can just modify the
|
||||
hashes in the Merkle tree, which is stored on-disk?
|
||||
:A: To verify the authenticity of an fs-verity file you must verify
|
||||
the authenticity of the "file measurement", which is basically the
|
||||
root hash of the Merkle tree. See `Use cases`_.
|
||||
|
||||
:Q: Isn't fs-verity useless because the attacker can just replace a
|
||||
verity file with a non-verity one?
|
||||
:A: See `Use cases`_. In the initial use case, it's really trusted
|
||||
userspace code that authenticates the files; fs-verity is just a
|
||||
tool to do this job efficiently and securely. The trusted
|
||||
userspace code will consider non-verity files to be inauthentic.
|
||||
|
||||
:Q: Why does the Merkle tree need to be stored on-disk? Couldn't you
|
||||
store just the root hash?
|
||||
:A: If the Merkle tree wasn't stored on-disk, then you'd have to
|
||||
compute the entire tree when the file is first accessed, even if
|
||||
just one byte is being read. This is a fundamental consequence of
|
||||
how Merkle tree hashing works. To verify a leaf node, you need to
|
||||
verify the whole path to the root hash, including the root node
|
||||
(the thing which the root hash is a hash of). But if the root
|
||||
node isn't stored on-disk, you have to compute it by hashing its
|
||||
children, and so on until you've actually hashed the entire file.
|
||||
|
||||
That defeats most of the point of doing a Merkle tree-based hash,
|
||||
since if you have to hash the whole file ahead of time anyway,
|
||||
then you could simply do sha256(file) instead. That would be much
|
||||
simpler, and a bit faster too.
|
||||
|
||||
It's true that an in-memory Merkle tree could still provide the
|
||||
advantage of verification on every read rather than just on the
|
||||
first read. However, it would be inefficient because every time a
|
||||
hash page gets evicted (you can't pin the entire Merkle tree into
|
||||
memory, since it may be very large), in order to restore it you
|
||||
again need to hash everything below it in the tree. This again
|
||||
defeats most of the point of doing a Merkle tree-based hash, since
|
||||
a single block read could trigger re-hashing gigabytes of data.
|
||||
|
||||
:Q: But couldn't you store just the leaf nodes and compute the rest?
|
||||
:A: See previous answer; this really just moves up one level, since
|
||||
one could alternatively interpret the data blocks as being the
|
||||
leaf nodes of the Merkle tree. It's true that the tree can be
|
||||
computed much faster if the leaf level is stored rather than just
|
||||
the data, but that's only because each level is less than 1% the
|
||||
size of the level below (assuming the recommended settings of
|
||||
SHA-256 and 4K blocks). For the exact same reason, by storing
|
||||
"just the leaf nodes" you'd already be storing over 99% of the
|
||||
tree, so you might as well simply store the whole tree.
|
||||
|
||||
:Q: Can the Merkle tree be built ahead of time, e.g. distributed as
|
||||
part of a package that is installed to many computers?
|
||||
:A: This isn't currently supported. It was part of the original
|
||||
design, but was removed to simplify the kernel UAPI and because it
|
||||
wasn't a critical use case. Files are usually installed once and
|
||||
used many times, and cryptographic hashing is somewhat fast on
|
||||
most modern processors.
|
||||
|
||||
:Q: Why doesn't fs-verity support writes?
|
||||
:A: Write support would be very difficult and would require a
|
||||
completely different design, so it's well outside the scope of
|
||||
fs-verity. Write support would require:
|
||||
|
||||
- A way to maintain consistency between the data and hashes,
|
||||
including all levels of hashes, since corruption after a crash
|
||||
(especially of potentially the entire file!) is unacceptable.
|
||||
The main options for solving this are data journalling,
|
||||
copy-on-write, and log-structured volume. But it's very hard to
|
||||
retrofit existing filesystems with new consistency mechanisms.
|
||||
Data journalling is available on ext4, but is very slow.
|
||||
|
||||
- Rebuilding the the Merkle tree after every write, which would be
|
||||
extremely inefficient. Alternatively, a different authenticated
|
||||
dictionary structure such as an "authenticated skiplist" could
|
||||
be used. However, this would be far more complex.
|
||||
|
||||
Compare it to dm-verity vs. dm-integrity. dm-verity is very
|
||||
simple: the kernel just verifies read-only data against a
|
||||
read-only Merkle tree. In contrast, dm-integrity supports writes
|
||||
but is slow, is much more complex, and doesn't actually support
|
||||
full-device authentication since it authenticates each sector
|
||||
independently, i.e. there is no "root hash". It doesn't really
|
||||
make sense for the same device-mapper target to support these two
|
||||
very different cases; the same applies to fs-verity.
|
||||
|
||||
:Q: Since verity files are immutable, why isn't the immutable bit set?
|
||||
:A: The existing "immutable" bit (FS_IMMUTABLE_FL) already has a
|
||||
specific set of semantics which not only make the file contents
|
||||
read-only, but also prevent the file from being deleted, renamed,
|
||||
linked to, or having its owner or mode changed. These extra
|
||||
properties are unwanted for fs-verity, so reusing the immutable
|
||||
bit isn't appropriate.
|
||||
|
||||
:Q: Why does the API use ioctls instead of setxattr() and getxattr()?
|
||||
:A: Abusing the xattr interface for basically arbitrary syscalls is
|
||||
heavily frowned upon by most of the Linux filesystem developers.
|
||||
An xattr should really just be an xattr on-disk, not an API to
|
||||
e.g. magically trigger construction of a Merkle tree.
|
||||
|
||||
:Q: Does fs-verity support remote filesystems?
|
||||
:A: Only ext4 and f2fs support is implemented currently, but in
|
||||
principle any filesystem that can store per-file verity metadata
|
||||
can support fs-verity, regardless of whether it's local or remote.
|
||||
Some filesystems may have fewer options of where to store the
|
||||
verity metadata; one possibility is to store it past the end of
|
||||
the file and "hide" it from userspace by manipulating i_size. The
|
||||
data verification functions provided by ``fs/verity/`` also assume
|
||||
that the filesystem uses the Linux pagecache, but both local and
|
||||
remote filesystems normally do so.
|
||||
|
||||
:Q: Why is anything filesystem-specific at all? Shouldn't fs-verity
|
||||
be implemented entirely at the VFS level?
|
||||
:A: There are many reasons why this is not possible or would be very
|
||||
difficult, including the following:
|
||||
|
||||
- To prevent bypassing verification, pages must not be marked
|
||||
Uptodate until they've been verified. Currently, each
|
||||
filesystem is responsible for marking pages Uptodate via
|
||||
``->readpages()``. Therefore, currently it's not possible for
|
||||
the VFS to do the verification on its own. Changing this would
|
||||
require significant changes to the VFS and all filesystems.
|
||||
|
||||
- It would require defining a filesystem-independent way to store
|
||||
the verity metadata. Extended attributes don't work for this
|
||||
because (a) the Merkle tree may be gigabytes, but many
|
||||
filesystems assume that all xattrs fit into a single 4K
|
||||
filesystem block, and (b) ext4 and f2fs encryption doesn't
|
||||
encrypt xattrs, yet the Merkle tree *must* be encrypted when the
|
||||
file contents are, because it stores hashes of the plaintext
|
||||
file contents.
|
||||
|
||||
So the verity metadata would have to be stored in an actual
|
||||
file. Using a separate file would be very ugly, since the
|
||||
metadata is fundamentally part of the file to be protected, and
|
||||
it could cause problems where users could delete the real file
|
||||
but not the metadata file or vice versa. On the other hand,
|
||||
having it be in the same file would break applications unless
|
||||
filesystems' notion of i_size were divorced from the VFS's,
|
||||
which would be complex and require changes to all filesystems.
|
||||
|
||||
- It's desirable that FS_IOC_ENABLE_VERITY uses the filesystem's
|
||||
transaction mechanism so that either the file ends up with
|
||||
verity enabled, or no changes were made. Allowing intermediate
|
||||
states to occur after a crash may cause problems.
|
||||
@@ -359,3 +359,4 @@ encryption of files and directories.
|
||||
:maxdepth: 2
|
||||
|
||||
fscrypt
|
||||
fsverity
|
||||
|
||||
@@ -325,7 +325,7 @@ beneath or above the path of another overlay lower layer path.
|
||||
|
||||
Using an upper layer path and/or a workdir path that are already used by
|
||||
another overlay mount is not allowed and may fail with EBUSY. Using
|
||||
partially overlapping paths is not allowed but will not fail with EBUSY.
|
||||
partially overlapping paths is not allowed and may fail with EBUSY.
|
||||
If files are accessed from two overlayfs mounts which share or overlap the
|
||||
upper layer and/or workdir path the behavior of the overlay is undefined,
|
||||
though it will not result in a crash or deadlock.
|
||||
|
||||
@@ -218,5 +218,4 @@ All other architectures should build just fine too - but they won't have
|
||||
the new syscalls yet.
|
||||
|
||||
Architectures need to implement the new futex_atomic_cmpxchg_inatomic()
|
||||
inline function before writing up the syscalls (that function returns
|
||||
-ENOSYS right now).
|
||||
inline function before writing up the syscalls.
|
||||
|
||||
@@ -20,7 +20,8 @@ void calc_runnable_avg_yN_inv(void)
|
||||
int i;
|
||||
unsigned int x;
|
||||
|
||||
printf("static const u32 runnable_avg_yN_inv[] = {");
|
||||
/* To silence -Wunused-but-set-variable warnings. */
|
||||
printf("static const u32 runnable_avg_yN_inv[] __maybe_unused = {");
|
||||
for (i = 0; i < HALFLIFE; i++) {
|
||||
x = ((1UL<<32)-1)*pow(y, i);
|
||||
|
||||
|
||||
@@ -1,138 +0,0 @@
|
||||
Copyright (C) 1999, 2000 Bruce Tenison
|
||||
Portions Copyright (C) 1999, 2000 David Nelson
|
||||
Thanks to David Nelson for guidance and the usage of the scanner.txt
|
||||
and scanner.c files to model our driver and this informative file.
|
||||
|
||||
Mar. 2, 2000
|
||||
|
||||
CHANGES
|
||||
|
||||
- Initial Revision
|
||||
|
||||
|
||||
OVERVIEW
|
||||
|
||||
This README will address issues regarding how to configure the kernel
|
||||
to access a RIO 500 mp3 player.
|
||||
Before I explain how to use this to access the Rio500 please be warned:
|
||||
|
||||
W A R N I N G:
|
||||
--------------
|
||||
|
||||
Please note that this software is still under development. The authors
|
||||
are in no way responsible for any damage that may occur, no matter how
|
||||
inconsequential.
|
||||
|
||||
It seems that the Rio has a problem when sending .mp3 with low batteries.
|
||||
I suggest when the batteries are low and you want to transfer stuff that you
|
||||
replace it with a fresh one. In my case, what happened is I lost two 16kb
|
||||
blocks (they are no longer usable to store information to it). But I don't
|
||||
know if that's normal or not; it could simply be a problem with the flash
|
||||
memory.
|
||||
|
||||
In an extreme case, I left my Rio playing overnight and the batteries wore
|
||||
down to nothing and appear to have corrupted the flash memory. My RIO
|
||||
needed to be replaced as a result. Diamond tech support is aware of the
|
||||
problem. Do NOT allow your batteries to wear down to nothing before
|
||||
changing them. It appears RIO 500 firmware does not handle low battery
|
||||
power well at all.
|
||||
|
||||
On systems with OHCI controllers, the kernel OHCI code appears to have
|
||||
power on problems with some chipsets. If you are having problems
|
||||
connecting to your RIO 500, try turning it on first and then plugging it
|
||||
into the USB cable.
|
||||
|
||||
Contact information:
|
||||
--------------------
|
||||
|
||||
The main page for the project is hosted at sourceforge.net in the following
|
||||
URL: <http://rio500.sourceforge.net>. You can also go to the project's
|
||||
sourceforge home page at: <http://sourceforge.net/projects/rio500/>.
|
||||
There is also a mailing list: rio500-users@lists.sourceforge.net
|
||||
|
||||
Authors:
|
||||
-------
|
||||
|
||||
Most of the code was written by Cesar Miquel <miquel@df.uba.ar>. Keith
|
||||
Clayton <kclayton@jps.net> is incharge of the PPC port and making sure
|
||||
things work there. Bruce Tenison <btenison@dibbs.net> is adding support
|
||||
for .fon files and also does testing. The program will mostly sure be
|
||||
re-written and Pete Ikusz along with the rest will re-design it. I would
|
||||
also like to thank Tri Nguyen <tmn_3022000@hotmail.com> who provided use
|
||||
with some important information regarding the communication with the Rio.
|
||||
|
||||
ADDITIONAL INFORMATION and Userspace tools
|
||||
|
||||
http://rio500.sourceforge.net/
|
||||
|
||||
|
||||
REQUIREMENTS
|
||||
|
||||
A host with a USB port. Ideally, either a UHCI (Intel) or OHCI
|
||||
(Compaq and others) hardware port should work.
|
||||
|
||||
A Linux development kernel (2.3.x) with USB support enabled or a
|
||||
backported version to linux-2.2.x. See http://www.linux-usb.org for
|
||||
more information on accomplishing this.
|
||||
|
||||
A Linux kernel with RIO 500 support enabled.
|
||||
|
||||
'lspci' which is only needed to determine the type of USB hardware
|
||||
available in your machine.
|
||||
|
||||
CONFIGURATION
|
||||
|
||||
Using `lspci -v`, determine the type of USB hardware available.
|
||||
|
||||
If you see something like:
|
||||
|
||||
USB Controller: ......
|
||||
Flags: .....
|
||||
I/O ports at ....
|
||||
|
||||
Then you have a UHCI based controller.
|
||||
|
||||
If you see something like:
|
||||
|
||||
USB Controller: .....
|
||||
Flags: ....
|
||||
Memory at .....
|
||||
|
||||
Then you have a OHCI based controller.
|
||||
|
||||
Using `make menuconfig` or your preferred method for configuring the
|
||||
kernel, select 'Support for USB', 'OHCI/UHCI' depending on your
|
||||
hardware (determined from the steps above), 'USB Diamond Rio500 support', and
|
||||
'Preliminary USB device filesystem'. Compile and install the modules
|
||||
(you may need to execute `depmod -a` to update the module
|
||||
dependencies).
|
||||
|
||||
Add a device for the USB rio500:
|
||||
`mknod /dev/usb/rio500 c 180 64`
|
||||
|
||||
Set appropriate permissions for /dev/usb/rio500 (don't forget about
|
||||
group and world permissions). Both read and write permissions are
|
||||
required for proper operation.
|
||||
|
||||
Load the appropriate modules (if compiled as modules):
|
||||
|
||||
OHCI:
|
||||
modprobe usbcore
|
||||
modprobe usb-ohci
|
||||
modprobe rio500
|
||||
|
||||
UHCI:
|
||||
modprobe usbcore
|
||||
modprobe usb-uhci (or uhci)
|
||||
modprobe rio500
|
||||
|
||||
That's it. The Rio500 Utils at: http://rio500.sourceforge.net should
|
||||
be able to access the rio500.
|
||||
|
||||
BUGS
|
||||
|
||||
If you encounter any problems feel free to drop me an email.
|
||||
|
||||
Bruce Tenison
|
||||
btenison@dibbs.net
|
||||
|
||||
@@ -47,6 +47,8 @@ If PR_SPEC_PRCTL is set, then the per-task control of the mitigation is
|
||||
available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation
|
||||
misfeature will fail.
|
||||
|
||||
.. _set_spec_ctrl:
|
||||
|
||||
PR_SET_SPECULATION_CTRL
|
||||
-----------------------
|
||||
|
||||
|
||||
20
MAINTAINERS
20
MAINTAINERS
@@ -5999,6 +5999,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tytso/fscrypt.git
|
||||
S: Supported
|
||||
F: fs/crypto/
|
||||
F: include/linux/fscrypt*.h
|
||||
F: include/uapi/linux/fscrypt.h
|
||||
F: Documentation/filesystems/fscrypt.rst
|
||||
|
||||
FSI-ATTACHED I2C DRIVER
|
||||
@@ -6017,6 +6018,18 @@ S: Maintained
|
||||
F: fs/notify/
|
||||
F: include/linux/fsnotify*.h
|
||||
|
||||
FSVERITY: READ-ONLY FILE-BASED AUTHENTICITY PROTECTION
|
||||
M: Eric Biggers <ebiggers@kernel.org>
|
||||
M: Theodore Y. Ts'o <tytso@mit.edu>
|
||||
L: linux-fscrypt@vger.kernel.org
|
||||
Q: https://patchwork.kernel.org/project/linux-fscrypt/list/
|
||||
T: git git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git fsverity
|
||||
S: Supported
|
||||
F: fs/verity/
|
||||
F: include/linux/fsverity.h
|
||||
F: include/uapi/linux/fsverity.h
|
||||
F: Documentation/filesystems/fsverity.rst
|
||||
|
||||
FUJITSU LAPTOP EXTRAS
|
||||
M: Jonathan Woithe <jwoithe@just42.net>
|
||||
L: platform-driver-x86@vger.kernel.org
|
||||
@@ -15117,13 +15130,6 @@ W: http://www.linux-usb.org/usbnet
|
||||
S: Maintained
|
||||
F: drivers/net/usb/dm9601.c
|
||||
|
||||
USB DIAMOND RIO500 DRIVER
|
||||
M: Cesar Miquel <miquel@df.uba.ar>
|
||||
L: rio500-users@lists.sourceforge.net
|
||||
W: http://rio500.sourceforge.net
|
||||
S: Maintained
|
||||
F: drivers/usb/misc/rio500*
|
||||
|
||||
USB EHCI DRIVER
|
||||
M: Alan Stern <stern@rowland.harvard.edu>
|
||||
L: linux-usb@vger.kernel.org
|
||||
|
||||
12
Makefile
12
Makefile
@@ -1,7 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
VERSION = 4
|
||||
PATCHLEVEL = 19
|
||||
SUBLEVEL = 53
|
||||
SUBLEVEL = 80
|
||||
EXTRAVERSION =
|
||||
NAME = "People's Front"
|
||||
|
||||
@@ -454,6 +454,7 @@ KBUILD_CFLAGS_MODULE := -DMODULE
|
||||
KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds
|
||||
KBUILD_LDFLAGS :=
|
||||
GCC_PLUGINS_CFLAGS :=
|
||||
CLANG_FLAGS :=
|
||||
|
||||
export ARCH SRCARCH CONFIG_SHELL HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE AS LD CC
|
||||
export CPP AR NM STRIP OBJCOPY OBJDUMP KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS
|
||||
@@ -507,7 +508,7 @@ endif
|
||||
ifeq ($(cc-name),clang)
|
||||
ifneq ($(CROSS_COMPILE),)
|
||||
CLANG_TRIPLE ?= $(CROSS_COMPILE)
|
||||
CLANG_FLAGS := --target=$(notdir $(CLANG_TRIPLE:%-=%))
|
||||
CLANG_FLAGS += --target=$(notdir $(CLANG_TRIPLE:%-=%))
|
||||
ifeq ($(shell $(srctree)/scripts/clang-android.sh $(CC) $(CLANG_FLAGS)), y)
|
||||
$(error "Clang with Android --target detected. Did you specify CLANG_TRIPLE?")
|
||||
endif
|
||||
@@ -519,6 +520,7 @@ ifneq ($(GCC_TOOLCHAIN),)
|
||||
CLANG_FLAGS += --gcc-toolchain=$(GCC_TOOLCHAIN)
|
||||
endif
|
||||
CLANG_FLAGS += -no-integrated-as
|
||||
CLANG_FLAGS += -Werror=unknown-warning-option
|
||||
KBUILD_CFLAGS += $(CLANG_FLAGS)
|
||||
KBUILD_AFLAGS += $(CLANG_FLAGS)
|
||||
export CLANG_FLAGS
|
||||
@@ -690,6 +692,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
|
||||
|
||||
ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
|
||||
KBUILD_CFLAGS += -Os $(call cc-disable-warning,maybe-uninitialized,)
|
||||
@@ -740,7 +743,6 @@ ifeq ($(cc-name),clang)
|
||||
KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
|
||||
# Quiet clang warning: comparison of unsigned expression < 0 is always false
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
|
||||
# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
|
||||
@@ -946,6 +948,10 @@ ifeq ($(CONFIG_STRIP_ASM_SYMS),y)
|
||||
LDFLAGS_vmlinux += $(call ld-option, -X,)
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_RELR),y)
|
||||
LDFLAGS_vmlinux += --pack-dyn-relocs=relr
|
||||
endif
|
||||
|
||||
# insure the checker run with the right endianness
|
||||
CHECKFLAGS += $(if $(CONFIG_CPU_BIG_ENDIAN),-mbig-endian,-mlittle-endian)
|
||||
|
||||
|
||||
175918
abi_gki_aarch64.xml
Normal file
175918
abi_gki_aarch64.xml
Normal file
File diff suppressed because it is too large
Load Diff
14
arch/Kconfig
14
arch/Kconfig
@@ -930,6 +930,20 @@ config HAVE_ARCH_PREL32_RELOCATIONS
|
||||
architectures, and don't require runtime relocation on relocatable
|
||||
kernels.
|
||||
|
||||
# Select if the architecture has support for applying RELR relocations.
|
||||
config ARCH_HAS_RELR
|
||||
bool
|
||||
|
||||
config RELR
|
||||
bool "Use RELR relocation packing"
|
||||
depends on ARCH_HAS_RELR && TOOLS_SUPPORT_RELR
|
||||
default y
|
||||
help
|
||||
Store the kernel's dynamic relocations in the RELR relocation packing
|
||||
format. Requires a compatible linker (LLD supports this feature), as
|
||||
well as compatible NM and OBJCOPY utilities (llvm-nm and llvm-objcopy
|
||||
are compatible).
|
||||
|
||||
source "kernel/gcov/Kconfig"
|
||||
|
||||
source "scripts/gcc-plugins/Kconfig"
|
||||
|
||||
@@ -199,7 +199,6 @@ config NR_CPUS
|
||||
|
||||
config ARC_SMP_HALT_ON_RESET
|
||||
bool "Enable Halt-on-reset boot mode"
|
||||
default y if ARC_UBOOT_SUPPORT
|
||||
help
|
||||
In SMP configuration cores can be configured as Halt-on-reset
|
||||
or they could all start at same time. For Halt-on-reset, non
|
||||
@@ -539,18 +538,6 @@ config ARC_DBG_TLB_PARANOIA
|
||||
|
||||
endif
|
||||
|
||||
config ARC_UBOOT_SUPPORT
|
||||
bool "Support uboot arg Handling"
|
||||
default n
|
||||
help
|
||||
ARC Linux by default checks for uboot provided args as pointers to
|
||||
external cmdline or DTB. This however breaks in absence of uboot,
|
||||
when booting from Metaware debugger directly, as the registers are
|
||||
not zeroed out on reset by mdb and/or ARCv2 based cores. The bogus
|
||||
registers look like uboot args to kernel which then chokes.
|
||||
So only enable the uboot arg checking/processing if users are sure
|
||||
of uboot being in play.
|
||||
|
||||
config ARC_BUILTIN_DTB_NAME
|
||||
string "Built in DTB"
|
||||
help
|
||||
|
||||
@@ -175,6 +175,7 @@
|
||||
interrupt-names = "macirq";
|
||||
phy-mode = "rgmii";
|
||||
snps,pbl = <32>;
|
||||
snps,multicast-filter-bins = <256>;
|
||||
clocks = <&gmacclk>;
|
||||
clock-names = "stmmaceth";
|
||||
phy-handle = <&phy0>;
|
||||
@@ -183,6 +184,9 @@
|
||||
mac-address = [00 00 00 00 00 00]; /* Filled in by U-Boot */
|
||||
dma-coherent;
|
||||
|
||||
tx-fifo-depth = <4096>;
|
||||
rx-fifo-depth = <4096>;
|
||||
|
||||
mdio {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
@@ -31,7 +31,6 @@ CONFIG_ARC_CACHE_LINE_SHIFT=5
|
||||
# CONFIG_ARC_HAS_LLSC is not set
|
||||
CONFIG_ARC_KVADDR_SIZE=402
|
||||
CONFIG_ARC_EMUL_UNALIGNED=y
|
||||
CONFIG_ARC_UBOOT_SUPPORT=y
|
||||
CONFIG_PREEMPT=y
|
||||
CONFIG_NET=y
|
||||
CONFIG_UNIX=y
|
||||
|
||||
@@ -13,7 +13,6 @@ CONFIG_PARTITION_ADVANCED=y
|
||||
CONFIG_ARC_PLAT_AXS10X=y
|
||||
CONFIG_AXS103=y
|
||||
CONFIG_ISA_ARCV2=y
|
||||
CONFIG_ARC_UBOOT_SUPPORT=y
|
||||
CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38"
|
||||
CONFIG_PREEMPT=y
|
||||
CONFIG_NET=y
|
||||
|
||||
@@ -15,8 +15,6 @@ CONFIG_AXS103=y
|
||||
CONFIG_ISA_ARCV2=y
|
||||
CONFIG_SMP=y
|
||||
# CONFIG_ARC_TIMERS_64BIT is not set
|
||||
# CONFIG_ARC_SMP_HALT_ON_RESET is not set
|
||||
CONFIG_ARC_UBOOT_SUPPORT=y
|
||||
CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38_smp"
|
||||
CONFIG_PREEMPT=y
|
||||
CONFIG_NET=y
|
||||
|
||||
@@ -92,8 +92,11 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
|
||||
|
||||
#endif /* CONFIG_ARC_HAS_LLSC */
|
||||
|
||||
#define cmpxchg(ptr, o, n) ((typeof(*(ptr)))__cmpxchg((ptr), \
|
||||
(unsigned long)(o), (unsigned long)(n)))
|
||||
#define cmpxchg(ptr, o, n) ({ \
|
||||
(typeof(*(ptr)))__cmpxchg((ptr), \
|
||||
(unsigned long)(o), \
|
||||
(unsigned long)(n)); \
|
||||
})
|
||||
|
||||
/*
|
||||
* atomic_cmpxchg is same as cmpxchg
|
||||
@@ -198,8 +201,11 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
|
||||
return __xchg_bad_pointer();
|
||||
}
|
||||
|
||||
#define xchg(ptr, with) ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), \
|
||||
sizeof(*(ptr))))
|
||||
#define xchg(ptr, with) ({ \
|
||||
(typeof(*(ptr)))__xchg((unsigned long)(with), \
|
||||
(ptr), \
|
||||
sizeof(*(ptr))); \
|
||||
})
|
||||
|
||||
#endif /* CONFIG_ARC_PLAT_EZNPS */
|
||||
|
||||
|
||||
@@ -100,7 +100,6 @@ ENTRY(stext)
|
||||
st.ab 0, [r5, 4]
|
||||
1:
|
||||
|
||||
#ifdef CONFIG_ARC_UBOOT_SUPPORT
|
||||
; Uboot - kernel ABI
|
||||
; r0 = [0] No uboot interaction, [1] cmdline in r2, [2] DTB in r2
|
||||
; r1 = magic number (always zero as of now)
|
||||
@@ -109,7 +108,6 @@ ENTRY(stext)
|
||||
st r0, [@uboot_tag]
|
||||
st r1, [@uboot_magic]
|
||||
st r2, [@uboot_arg]
|
||||
#endif
|
||||
|
||||
; setup "current" tsk and optionally cache it in dedicated r25
|
||||
mov r9, @init_task
|
||||
|
||||
@@ -493,7 +493,6 @@ void __init handle_uboot_args(void)
|
||||
bool use_embedded_dtb = true;
|
||||
bool append_cmdline = false;
|
||||
|
||||
#ifdef CONFIG_ARC_UBOOT_SUPPORT
|
||||
/* check that we know this tag */
|
||||
if (uboot_tag != UBOOT_TAG_NONE &&
|
||||
uboot_tag != UBOOT_TAG_CMDLINE &&
|
||||
@@ -525,7 +524,6 @@ void __init handle_uboot_args(void)
|
||||
append_cmdline = true;
|
||||
|
||||
ignore_uboot_args:
|
||||
#endif
|
||||
|
||||
if (use_embedded_dtb) {
|
||||
machine_desc = setup_machine_fdt(__dtb_start);
|
||||
|
||||
@@ -179,6 +179,12 @@ void show_regs(struct pt_regs *regs)
|
||||
struct task_struct *tsk = current;
|
||||
struct callee_regs *cregs;
|
||||
|
||||
/*
|
||||
* generic code calls us with preemption disabled, but some calls
|
||||
* here could sleep, so re-enable to avoid lockdep splat
|
||||
*/
|
||||
preempt_enable();
|
||||
|
||||
print_task_path_n_nm(tsk);
|
||||
show_regs_print_info(KERN_INFO);
|
||||
|
||||
@@ -221,6 +227,8 @@ void show_regs(struct pt_regs *regs)
|
||||
cregs = (struct callee_regs *)current->thread.callee_reg;
|
||||
if (cregs)
|
||||
show_callee_regs(cregs);
|
||||
|
||||
preempt_disable();
|
||||
}
|
||||
|
||||
void show_kernel_fault_diag(const char *str, struct pt_regs *regs,
|
||||
|
||||
@@ -185,11 +185,6 @@ static void *__init unw_hdr_alloc_early(unsigned long sz)
|
||||
MAX_DMA_ADDRESS);
|
||||
}
|
||||
|
||||
static void *unw_hdr_alloc(unsigned long sz)
|
||||
{
|
||||
return kmalloc(sz, GFP_KERNEL);
|
||||
}
|
||||
|
||||
static void init_unwind_table(struct unwind_table *table, const char *name,
|
||||
const void *core_start, unsigned long core_size,
|
||||
const void *init_start, unsigned long init_size,
|
||||
@@ -370,6 +365,10 @@ ret_err:
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MODULES
|
||||
static void *unw_hdr_alloc(unsigned long sz)
|
||||
{
|
||||
return kmalloc(sz, GFP_KERNEL);
|
||||
}
|
||||
|
||||
static struct unwind_table *last_table;
|
||||
|
||||
|
||||
@@ -66,14 +66,12 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
|
||||
struct vm_area_struct *vma = NULL;
|
||||
struct task_struct *tsk = current;
|
||||
struct mm_struct *mm = tsk->mm;
|
||||
siginfo_t info;
|
||||
int si_code = SEGV_MAPERR;
|
||||
int ret;
|
||||
vm_fault_t fault;
|
||||
int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
|
||||
clear_siginfo(&info);
|
||||
|
||||
/*
|
||||
* We fault-in kernel-space virtual memory on-demand. The
|
||||
* 'reference' page table is init_mm.pgd.
|
||||
@@ -83,16 +81,14 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
|
||||
* only copy the information from the master page table,
|
||||
* nothing more.
|
||||
*/
|
||||
if (address >= VMALLOC_START) {
|
||||
if (address >= VMALLOC_START && !user_mode(regs)) {
|
||||
ret = handle_kernel_vaddr_fault(address);
|
||||
if (unlikely(ret))
|
||||
goto bad_area_nosemaphore;
|
||||
goto no_context;
|
||||
else
|
||||
return;
|
||||
}
|
||||
|
||||
info.si_code = SEGV_MAPERR;
|
||||
|
||||
/*
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
@@ -119,7 +115,7 @@ retry:
|
||||
* we can handle it..
|
||||
*/
|
||||
good_area:
|
||||
info.si_code = SEGV_ACCERR;
|
||||
si_code = SEGV_ACCERR;
|
||||
|
||||
/* Handle protection violation, execute on heap or stack */
|
||||
|
||||
@@ -143,12 +139,17 @@ good_area:
|
||||
*/
|
||||
fault = handle_mm_fault(vma, address, flags);
|
||||
|
||||
/* If Pagefault was interrupted by SIGKILL, exit page fault "early" */
|
||||
if (unlikely(fatal_signal_pending(current))) {
|
||||
if ((fault & VM_FAULT_ERROR) && !(fault & VM_FAULT_RETRY))
|
||||
up_read(&mm->mmap_sem);
|
||||
if (user_mode(regs))
|
||||
|
||||
/*
|
||||
* if fault retry, mmap_sem already relinquished by core mm
|
||||
* so OK to return to user mode (with signal handled first)
|
||||
*/
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
if (!user_mode(regs))
|
||||
goto no_context;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||
@@ -195,15 +196,10 @@ good_area:
|
||||
bad_area:
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
bad_area_nosemaphore:
|
||||
/* User mode accesses just cause a SIGSEGV */
|
||||
if (user_mode(regs)) {
|
||||
tsk->thread.fault_address = address;
|
||||
info.si_signo = SIGSEGV;
|
||||
info.si_errno = 0;
|
||||
/* info.si_code has been set above */
|
||||
info.si_addr = (void __user *)address;
|
||||
force_sig_info(SIGSEGV, &info, tsk);
|
||||
force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -238,9 +234,5 @@ do_sigbus:
|
||||
goto no_context;
|
||||
|
||||
tsk->thread.fault_address = address;
|
||||
info.si_signo = SIGBUS;
|
||||
info.si_errno = 0;
|
||||
info.si_code = BUS_ADRERR;
|
||||
info.si_addr = (void __user *)address;
|
||||
force_sig_info(SIGBUS, &info, tsk);
|
||||
force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk);
|
||||
}
|
||||
|
||||
@@ -911,9 +911,11 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
|
||||
unsigned int pd0[mmu->ways];
|
||||
unsigned long flags;
|
||||
int set;
|
||||
int set, n_ways = mmu->ways;
|
||||
|
||||
n_ways = min(n_ways, 4);
|
||||
BUG_ON(mmu->ways > 4);
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
@@ -921,9 +923,10 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
|
||||
for (set = 0; set < mmu->sets; set++) {
|
||||
|
||||
int is_valid, way;
|
||||
unsigned int pd0[4];
|
||||
|
||||
/* read out all the ways of current set */
|
||||
for (way = 0, is_valid = 0; way < mmu->ways; way++) {
|
||||
for (way = 0, is_valid = 0; way < n_ways; way++) {
|
||||
write_aux_reg(ARC_REG_TLBINDEX,
|
||||
SET_WAY_TO_IDX(mmu, set, way));
|
||||
write_aux_reg(ARC_REG_TLBCOMMAND, TLBRead);
|
||||
@@ -937,14 +940,14 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
|
||||
continue;
|
||||
|
||||
/* Scan the set for duplicate ways: needs a nested loop */
|
||||
for (way = 0; way < mmu->ways - 1; way++) {
|
||||
for (way = 0; way < n_ways - 1; way++) {
|
||||
|
||||
int n;
|
||||
|
||||
if (!pd0[way])
|
||||
continue;
|
||||
|
||||
for (n = way + 1; n < mmu->ways; n++) {
|
||||
for (n = way + 1; n < n_ways; n++) {
|
||||
if (pd0[way] != pd0[n])
|
||||
continue;
|
||||
|
||||
|
||||
@@ -1586,8 +1586,9 @@ config ARM_PATCH_IDIV
|
||||
code to do integer division.
|
||||
|
||||
config AEABI
|
||||
bool "Use the ARM EABI to compile the kernel" if !CPU_V7 && !CPU_V7M && !CPU_V6 && !CPU_V6K
|
||||
default CPU_V7 || CPU_V7M || CPU_V6 || CPU_V6K
|
||||
bool "Use the ARM EABI to compile the kernel" if !CPU_V7 && \
|
||||
!CPU_V7M && !CPU_V6 && !CPU_V6K && !CC_IS_CLANG
|
||||
default CPU_V7 || CPU_V7M || CPU_V6 || CPU_V6K || CC_IS_CLANG
|
||||
help
|
||||
This option allows for the kernel to be compiled using the latest
|
||||
ARM ABI (aka EABI). This is only useful if you are using a user
|
||||
|
||||
@@ -197,7 +197,7 @@
|
||||
bus-width = <4>;
|
||||
pinctrl-names = "default";
|
||||
pinctrl-0 = <&mmc1_pins>;
|
||||
cd-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>;
|
||||
cd-gpios = <&gpio0 6 GPIO_ACTIVE_LOW>;
|
||||
status = "okay";
|
||||
};
|
||||
|
||||
|
||||
@@ -157,7 +157,7 @@
|
||||
bus-width = <4>;
|
||||
pinctrl-names = "default";
|
||||
pinctrl-0 = <&mmc1_pins>;
|
||||
cd-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>;
|
||||
cd-gpios = <&gpio0 6 GPIO_ACTIVE_LOW>;
|
||||
status = "okay";
|
||||
};
|
||||
|
||||
|
||||
@@ -98,14 +98,9 @@
|
||||
};
|
||||
|
||||
&mmc1 {
|
||||
pinctrl-names = "default", "hs", "sdr12", "sdr25", "sdr50", "ddr50", "sdr104";
|
||||
pinctrl-names = "default", "hs";
|
||||
pinctrl-0 = <&mmc1_pins_default_no_clk_pu>;
|
||||
pinctrl-1 = <&mmc1_pins_hs>;
|
||||
pinctrl-2 = <&mmc1_pins_sdr12>;
|
||||
pinctrl-3 = <&mmc1_pins_sdr25>;
|
||||
pinctrl-4 = <&mmc1_pins_sdr50>;
|
||||
pinctrl-5 = <&mmc1_pins_ddr50_rev20 &mmc1_iodelay_ddr50_conf>;
|
||||
pinctrl-6 = <&mmc1_pins_sdr104 &mmc1_iodelay_sdr104_rev20_conf>;
|
||||
};
|
||||
|
||||
&mmc2 {
|
||||
|
||||
@@ -20,14 +20,9 @@
|
||||
};
|
||||
|
||||
&mmc1 {
|
||||
pinctrl-names = "default", "hs", "sdr12", "sdr25", "sdr50", "ddr50", "sdr104";
|
||||
pinctrl-names = "default", "hs";
|
||||
pinctrl-0 = <&mmc1_pins_default_no_clk_pu>;
|
||||
pinctrl-1 = <&mmc1_pins_hs>;
|
||||
pinctrl-2 = <&mmc1_pins_sdr12>;
|
||||
pinctrl-3 = <&mmc1_pins_sdr25>;
|
||||
pinctrl-4 = <&mmc1_pins_sdr50>;
|
||||
pinctrl-5 = <&mmc1_pins_ddr50 &mmc1_iodelay_ddr_rev20_conf>;
|
||||
pinctrl-6 = <&mmc1_pins_sdr104 &mmc1_iodelay_sdr104_rev20_conf>;
|
||||
};
|
||||
|
||||
&mmc2 {
|
||||
|
||||
@@ -24,14 +24,9 @@
|
||||
};
|
||||
|
||||
&mmc1 {
|
||||
pinctrl-names = "default", "hs", "sdr12", "sdr25", "sdr50", "ddr50", "sdr104";
|
||||
pinctrl-names = "default", "hs";
|
||||
pinctrl-0 = <&mmc1_pins_default_no_clk_pu>;
|
||||
pinctrl-1 = <&mmc1_pins_hs>;
|
||||
pinctrl-2 = <&mmc1_pins_default>;
|
||||
pinctrl-3 = <&mmc1_pins_hs>;
|
||||
pinctrl-4 = <&mmc1_pins_sdr50>;
|
||||
pinctrl-5 = <&mmc1_pins_ddr50 &mmc1_iodelay_ddr_conf>;
|
||||
pinctrl-6 = <&mmc1_pins_ddr50 &mmc1_iodelay_sdr104_conf>;
|
||||
};
|
||||
|
||||
&mmc2 {
|
||||
|
||||
@@ -433,6 +433,7 @@
|
||||
|
||||
bus-width = <4>;
|
||||
cd-gpios = <&gpio6 27 GPIO_ACTIVE_LOW>; /* gpio 219 */
|
||||
no-1-8-v;
|
||||
};
|
||||
|
||||
&mmc2 {
|
||||
|
||||
@@ -19,14 +19,9 @@
|
||||
};
|
||||
|
||||
&mmc1 {
|
||||
pinctrl-names = "default", "hs", "sdr12", "sdr25", "sdr50", "ddr50", "sdr104";
|
||||
pinctrl-names = "default", "hs";
|
||||
pinctrl-0 = <&mmc1_pins_default>;
|
||||
pinctrl-1 = <&mmc1_pins_hs>;
|
||||
pinctrl-2 = <&mmc1_pins_sdr12>;
|
||||
pinctrl-3 = <&mmc1_pins_sdr25>;
|
||||
pinctrl-4 = <&mmc1_pins_sdr50>;
|
||||
pinctrl-5 = <&mmc1_pins_ddr50 &mmc1_iodelay_ddr_rev11_conf>;
|
||||
pinctrl-6 = <&mmc1_pins_sdr104 &mmc1_iodelay_sdr104_rev11_conf>;
|
||||
vmmc-supply = <&vdd_3v3>;
|
||||
vqmmc-supply = <&ldo1_reg>;
|
||||
};
|
||||
|
||||
@@ -19,14 +19,9 @@
|
||||
};
|
||||
|
||||
&mmc1 {
|
||||
pinctrl-names = "default", "hs", "sdr12", "sdr25", "sdr50", "ddr50", "sdr104";
|
||||
pinctrl-names = "default", "hs";
|
||||
pinctrl-0 = <&mmc1_pins_default>;
|
||||
pinctrl-1 = <&mmc1_pins_hs>;
|
||||
pinctrl-2 = <&mmc1_pins_sdr12>;
|
||||
pinctrl-3 = <&mmc1_pins_sdr25>;
|
||||
pinctrl-4 = <&mmc1_pins_sdr50>;
|
||||
pinctrl-5 = <&mmc1_pins_ddr50 &mmc1_iodelay_ddr_rev20_conf>;
|
||||
pinctrl-6 = <&mmc1_pins_sdr104 &mmc1_iodelay_sdr104_rev20_conf>;
|
||||
vmmc-supply = <&vdd_3v3>;
|
||||
vqmmc-supply = <&ldo1_reg>;
|
||||
};
|
||||
|
||||
@@ -410,6 +410,7 @@
|
||||
vqmmc-supply = <&ldo1_reg>;
|
||||
bus-width = <4>;
|
||||
cd-gpios = <&gpio6 27 GPIO_ACTIVE_LOW>; /* gpio 219 */
|
||||
no-1-8-v;
|
||||
};
|
||||
|
||||
&mmc2 {
|
||||
|
||||
@@ -336,3 +336,11 @@
|
||||
status = "disabled";
|
||||
};
|
||||
|
||||
&uart0 {
|
||||
compatible = "marvell,armada-38x-uart";
|
||||
};
|
||||
|
||||
&uart1 {
|
||||
compatible = "marvell,armada-38x-uart";
|
||||
};
|
||||
|
||||
|
||||
@@ -125,6 +125,9 @@
|
||||
};
|
||||
|
||||
mdio-bus-mux {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
/* BIT(9) = 1 => external mdio */
|
||||
mdio_ext: mdio@200 {
|
||||
reg = <0x200>;
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
*
|
||||
* Datamanual Revisions:
|
||||
*
|
||||
* AM572x Silicon Revision 2.0: SPRS953B, Revised November 2016
|
||||
* AM572x Silicon Revision 2.0: SPRS953F, Revised May 2019
|
||||
* AM572x Silicon Revision 1.1: SPRS915R, Revised November 2016
|
||||
*
|
||||
*/
|
||||
@@ -229,45 +229,45 @@
|
||||
|
||||
mmc3_pins_default: mmc3_pins_default {
|
||||
pinctrl-single,pins = <
|
||||
DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_clk.mmc3_clk */
|
||||
DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */
|
||||
DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */
|
||||
DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */
|
||||
DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */
|
||||
DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */
|
||||
DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_clk.mmc3_clk */
|
||||
DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */
|
||||
DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */
|
||||
DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */
|
||||
DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */
|
||||
DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */
|
||||
>;
|
||||
};
|
||||
|
||||
mmc3_pins_hs: mmc3_pins_hs {
|
||||
pinctrl-single,pins = <
|
||||
DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_clk.mmc3_clk */
|
||||
DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */
|
||||
DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */
|
||||
DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */
|
||||
DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */
|
||||
DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */
|
||||
DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_clk.mmc3_clk */
|
||||
DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */
|
||||
DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */
|
||||
DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */
|
||||
DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */
|
||||
DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */
|
||||
>;
|
||||
};
|
||||
|
||||
mmc3_pins_sdr12: mmc3_pins_sdr12 {
|
||||
pinctrl-single,pins = <
|
||||
DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_clk.mmc3_clk */
|
||||
DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */
|
||||
DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */
|
||||
DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */
|
||||
DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */
|
||||
DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */
|
||||
DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_clk.mmc3_clk */
|
||||
DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */
|
||||
DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */
|
||||
DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */
|
||||
DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */
|
||||
DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */
|
||||
>;
|
||||
};
|
||||
|
||||
mmc3_pins_sdr25: mmc3_pins_sdr25 {
|
||||
pinctrl-single,pins = <
|
||||
DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_clk.mmc3_clk */
|
||||
DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */
|
||||
DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */
|
||||
DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */
|
||||
DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */
|
||||
DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */
|
||||
DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_clk.mmc3_clk */
|
||||
DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */
|
||||
DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */
|
||||
DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */
|
||||
DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */
|
||||
DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */
|
||||
>;
|
||||
};
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
*
|
||||
* Datamanual Revisions:
|
||||
*
|
||||
* DRA76x Silicon Revision 1.0: SPRS993A, Revised July 2017
|
||||
* DRA76x Silicon Revision 1.0: SPRS993E, Revised December 2018
|
||||
*
|
||||
*/
|
||||
|
||||
@@ -169,25 +169,25 @@
|
||||
/* Corresponds to MMC2_HS200_MANUAL1 in datamanual */
|
||||
mmc2_iodelay_hs200_conf: mmc2_iodelay_hs200_conf {
|
||||
pinctrl-pin-array = <
|
||||
0x190 A_DELAY_PS(384) G_DELAY_PS(0) /* CFG_GPMC_A19_OEN */
|
||||
0x194 A_DELAY_PS(0) G_DELAY_PS(174) /* CFG_GPMC_A19_OUT */
|
||||
0x1a8 A_DELAY_PS(410) G_DELAY_PS(0) /* CFG_GPMC_A20_OEN */
|
||||
0x1ac A_DELAY_PS(85) G_DELAY_PS(0) /* CFG_GPMC_A20_OUT */
|
||||
0x1b4 A_DELAY_PS(468) G_DELAY_PS(0) /* CFG_GPMC_A21_OEN */
|
||||
0x1b8 A_DELAY_PS(139) G_DELAY_PS(0) /* CFG_GPMC_A21_OUT */
|
||||
0x1c0 A_DELAY_PS(676) G_DELAY_PS(0) /* CFG_GPMC_A22_OEN */
|
||||
0x1c4 A_DELAY_PS(69) G_DELAY_PS(0) /* CFG_GPMC_A22_OUT */
|
||||
0x1d0 A_DELAY_PS(1062) G_DELAY_PS(154) /* CFG_GPMC_A23_OUT */
|
||||
0x1d8 A_DELAY_PS(640) G_DELAY_PS(0) /* CFG_GPMC_A24_OEN */
|
||||
0x1dc A_DELAY_PS(0) G_DELAY_PS(0) /* CFG_GPMC_A24_OUT */
|
||||
0x1e4 A_DELAY_PS(356) G_DELAY_PS(0) /* CFG_GPMC_A25_OEN */
|
||||
0x1e8 A_DELAY_PS(0) G_DELAY_PS(0) /* CFG_GPMC_A25_OUT */
|
||||
0x1f0 A_DELAY_PS(579) G_DELAY_PS(0) /* CFG_GPMC_A26_OEN */
|
||||
0x1f4 A_DELAY_PS(0) G_DELAY_PS(0) /* CFG_GPMC_A26_OUT */
|
||||
0x1fc A_DELAY_PS(435) G_DELAY_PS(0) /* CFG_GPMC_A27_OEN */
|
||||
0x200 A_DELAY_PS(36) G_DELAY_PS(0) /* CFG_GPMC_A27_OUT */
|
||||
0x364 A_DELAY_PS(759) G_DELAY_PS(0) /* CFG_GPMC_CS1_OEN */
|
||||
0x368 A_DELAY_PS(72) G_DELAY_PS(0) /* CFG_GPMC_CS1_OUT */
|
||||
0x190 A_DELAY_PS(384) G_DELAY_PS(0) /* CFG_GPMC_A19_OEN */
|
||||
0x194 A_DELAY_PS(350) G_DELAY_PS(174) /* CFG_GPMC_A19_OUT */
|
||||
0x1a8 A_DELAY_PS(410) G_DELAY_PS(0) /* CFG_GPMC_A20_OEN */
|
||||
0x1ac A_DELAY_PS(335) G_DELAY_PS(0) /* CFG_GPMC_A20_OUT */
|
||||
0x1b4 A_DELAY_PS(468) G_DELAY_PS(0) /* CFG_GPMC_A21_OEN */
|
||||
0x1b8 A_DELAY_PS(339) G_DELAY_PS(0) /* CFG_GPMC_A21_OUT */
|
||||
0x1c0 A_DELAY_PS(676) G_DELAY_PS(0) /* CFG_GPMC_A22_OEN */
|
||||
0x1c4 A_DELAY_PS(219) G_DELAY_PS(0) /* CFG_GPMC_A22_OUT */
|
||||
0x1d0 A_DELAY_PS(1062) G_DELAY_PS(154) /* CFG_GPMC_A23_OUT */
|
||||
0x1d8 A_DELAY_PS(640) G_DELAY_PS(0) /* CFG_GPMC_A24_OEN */
|
||||
0x1dc A_DELAY_PS(150) G_DELAY_PS(0) /* CFG_GPMC_A24_OUT */
|
||||
0x1e4 A_DELAY_PS(356) G_DELAY_PS(0) /* CFG_GPMC_A25_OEN */
|
||||
0x1e8 A_DELAY_PS(150) G_DELAY_PS(0) /* CFG_GPMC_A25_OUT */
|
||||
0x1f0 A_DELAY_PS(579) G_DELAY_PS(0) /* CFG_GPMC_A26_OEN */
|
||||
0x1f4 A_DELAY_PS(200) G_DELAY_PS(0) /* CFG_GPMC_A26_OUT */
|
||||
0x1fc A_DELAY_PS(435) G_DELAY_PS(0) /* CFG_GPMC_A27_OEN */
|
||||
0x200 A_DELAY_PS(236) G_DELAY_PS(0) /* CFG_GPMC_A27_OUT */
|
||||
0x364 A_DELAY_PS(759) G_DELAY_PS(0) /* CFG_GPMC_CS1_OEN */
|
||||
0x368 A_DELAY_PS(372) G_DELAY_PS(0) /* CFG_GPMC_CS1_OUT */
|
||||
>;
|
||||
};
|
||||
|
||||
|
||||
@@ -437,6 +437,7 @@
|
||||
regulator-name = "vdd_ldo10";
|
||||
regulator-min-microvolt = <1800000>;
|
||||
regulator-max-microvolt = <1800000>;
|
||||
regulator-always-on;
|
||||
regulator-state-mem {
|
||||
regulator-off-in-suspend;
|
||||
};
|
||||
|
||||
@@ -437,6 +437,7 @@
|
||||
regulator-name = "vdd_ldo10";
|
||||
regulator-min-microvolt = <1800000>;
|
||||
regulator-max-microvolt = <1800000>;
|
||||
regulator-always-on;
|
||||
regulator-state-mem {
|
||||
regulator-off-in-suspend;
|
||||
};
|
||||
|
||||
@@ -65,7 +65,7 @@
|
||||
gpio-miso = <&gpio1 8 GPIO_ACTIVE_HIGH>;
|
||||
gpio-mosi = <&gpio1 7 GPIO_ACTIVE_HIGH>;
|
||||
/* Collides with pflash CE1, not so cool */
|
||||
cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
|
||||
cs-gpios = <&gpio0 20 GPIO_ACTIVE_LOW>;
|
||||
num-chipselects = <1>;
|
||||
|
||||
panel: display@0 {
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
/ {
|
||||
model = "D-Link DNS-313 1-Bay Network Storage Enclosure";
|
||||
compatible = "dlink,dir-313", "cortina,gemini";
|
||||
compatible = "dlink,dns-313", "cortina,gemini";
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
|
||||
|
||||
@@ -359,7 +359,7 @@
|
||||
pwm1: pwm@2080000 {
|
||||
compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
|
||||
reg = <0x02080000 0x4000>;
|
||||
interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>;
|
||||
interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
|
||||
clocks = <&clks IMX6UL_CLK_PWM1>,
|
||||
<&clks IMX6UL_CLK_PWM1>;
|
||||
clock-names = "ipg", "per";
|
||||
@@ -370,7 +370,7 @@
|
||||
pwm2: pwm@2084000 {
|
||||
compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
|
||||
reg = <0x02084000 0x4000>;
|
||||
interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>;
|
||||
interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
|
||||
clocks = <&clks IMX6UL_CLK_PWM2>,
|
||||
<&clks IMX6UL_CLK_PWM2>;
|
||||
clock-names = "ipg", "per";
|
||||
@@ -381,7 +381,7 @@
|
||||
pwm3: pwm@2088000 {
|
||||
compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
|
||||
reg = <0x02088000 0x4000>;
|
||||
interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
|
||||
interrupts = <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>;
|
||||
clocks = <&clks IMX6UL_CLK_PWM3>,
|
||||
<&clks IMX6UL_CLK_PWM3>;
|
||||
clock-names = "ipg", "per";
|
||||
@@ -392,7 +392,7 @@
|
||||
pwm4: pwm@208c000 {
|
||||
compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
|
||||
reg = <0x0208c000 0x4000>;
|
||||
interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>;
|
||||
interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
|
||||
clocks = <&clks IMX6UL_CLK_PWM4>,
|
||||
<&clks IMX6UL_CLK_PWM4>;
|
||||
clock-names = "ipg", "per";
|
||||
|
||||
@@ -323,6 +323,7 @@
|
||||
vmmc-supply = <®_module_3v3>;
|
||||
vqmmc-supply = <®_DCDC3>;
|
||||
non-removable;
|
||||
sdhci-caps-mask = <0x80000000 0x0>;
|
||||
};
|
||||
|
||||
&iomuxc {
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
<&clks IMX7D_ENET1_TIME_ROOT_CLK>;
|
||||
assigned-clock-parents = <&clks IMX7D_PLL_ENET_MAIN_100M_CLK>;
|
||||
assigned-clock-rates = <0>, <100000000>;
|
||||
phy-mode = "rgmii";
|
||||
phy-mode = "rgmii-id";
|
||||
phy-handle = <ðphy0>;
|
||||
fsl,magic-packet;
|
||||
status = "okay";
|
||||
@@ -69,7 +69,7 @@
|
||||
<&clks IMX7D_ENET2_TIME_ROOT_CLK>;
|
||||
assigned-clock-parents = <&clks IMX7D_PLL_ENET_MAIN_100M_CLK>;
|
||||
assigned-clock-rates = <0>, <100000000>;
|
||||
phy-mode = "rgmii";
|
||||
phy-mode = "rgmii-id";
|
||||
phy-handle = <ðphy1>;
|
||||
fsl,magic-packet;
|
||||
status = "okay";
|
||||
|
||||
@@ -386,10 +386,10 @@
|
||||
#address-cells = <3>;
|
||||
#size-cells = <2>;
|
||||
|
||||
ranges = <0x81000000 0 0x40200000 0x40200000 0 0x00100000
|
||||
0x82000000 0 0x48000000 0x48000000 0 0x10000000>;
|
||||
ranges = <0x81000000 0 0x40200000 0x40200000 0 0x00100000>,
|
||||
<0x82000000 0 0x40300000 0x40300000 0 0x00d00000>;
|
||||
|
||||
interrupts = <GIC_SPI 141 IRQ_TYPE_EDGE_RISING>;
|
||||
interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>;
|
||||
interrupt-names = "msi";
|
||||
#interrupt-cells = <1>;
|
||||
interrupt-map-mask = <0 0 0 0x7>;
|
||||
|
||||
@@ -124,10 +124,6 @@
|
||||
};
|
||||
};
|
||||
|
||||
&emmc {
|
||||
/delete-property/mmc-hs200-1_8v;
|
||||
};
|
||||
|
||||
&i2c2 {
|
||||
status = "disabled";
|
||||
};
|
||||
|
||||
@@ -90,10 +90,6 @@
|
||||
pwm-off-delay-ms = <200>;
|
||||
};
|
||||
|
||||
&emmc {
|
||||
/delete-property/mmc-hs200-1_8v;
|
||||
};
|
||||
|
||||
&gpio_keys {
|
||||
pinctrl-0 = <&pwr_key_l &ap_lid_int_l &volum_down_l &volum_up_l>;
|
||||
|
||||
|
||||
@@ -228,6 +228,7 @@
|
||||
<GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>,
|
||||
<GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
|
||||
clock-frequency = <24000000>;
|
||||
arm,no-tick-in-suspend;
|
||||
};
|
||||
|
||||
timer: timer@ff810000 {
|
||||
|
||||
@@ -91,7 +91,6 @@ CONFIG_USB_SERIAL_PL2303=m
|
||||
CONFIG_USB_SERIAL_CYBERJACK=m
|
||||
CONFIG_USB_SERIAL_XIRCOM=m
|
||||
CONFIG_USB_SERIAL_OMNINET=m
|
||||
CONFIG_USB_RIO500=m
|
||||
CONFIG_EXT2_FS=m
|
||||
CONFIG_EXT3_FS=m
|
||||
CONFIG_MSDOS_FS=y
|
||||
|
||||
@@ -197,7 +197,6 @@ CONFIG_USB_SERIAL_XIRCOM=m
|
||||
CONFIG_USB_SERIAL_OMNINET=m
|
||||
CONFIG_USB_EMI62=m
|
||||
CONFIG_USB_EMI26=m
|
||||
CONFIG_USB_RIO500=m
|
||||
CONFIG_USB_LEGOTOWER=m
|
||||
CONFIG_USB_LCD=m
|
||||
CONFIG_USB_CYTHERM=m
|
||||
|
||||
@@ -588,7 +588,6 @@ CONFIG_USB_SERIAL_XIRCOM=m
|
||||
CONFIG_USB_SERIAL_OMNINET=m
|
||||
CONFIG_USB_EMI62=m
|
||||
CONFIG_USB_EMI26=m
|
||||
CONFIG_USB_RIO500=m
|
||||
CONFIG_USB_LEGOTOWER=m
|
||||
CONFIG_USB_LCD=m
|
||||
CONFIG_USB_CYTHERM=m
|
||||
|
||||
@@ -334,7 +334,6 @@ CONFIG_USB_EMI62=m
|
||||
CONFIG_USB_EMI26=m
|
||||
CONFIG_USB_ADUTUX=m
|
||||
CONFIG_USB_SEVSEG=m
|
||||
CONFIG_USB_RIO500=m
|
||||
CONFIG_USB_LEGOTOWER=m
|
||||
CONFIG_USB_LCD=m
|
||||
CONFIG_USB_CYPRESS_CY7C63=m
|
||||
|
||||
@@ -191,7 +191,6 @@ CONFIG_USB_SERIAL_XIRCOM=m
|
||||
CONFIG_USB_SERIAL_OMNINET=m
|
||||
CONFIG_USB_EMI62=m
|
||||
CONFIG_USB_EMI26=m
|
||||
CONFIG_USB_RIO500=m
|
||||
CONFIG_USB_LEGOTOWER=m
|
||||
CONFIG_USB_LCD=m
|
||||
CONFIG_USB_CYTHERM=m
|
||||
|
||||
@@ -22,7 +22,6 @@ config KVM
|
||||
bool "Kernel-based Virtual Machine (KVM) support"
|
||||
depends on MMU && OF
|
||||
select PREEMPT_NOTIFIERS
|
||||
select ANON_INODES
|
||||
select ARM_GIC
|
||||
select ARM_GIC_V3
|
||||
select ARM_GIC_V3_ITS
|
||||
|
||||
@@ -658,13 +658,22 @@ int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
}
|
||||
|
||||
static void reset_coproc_regs(struct kvm_vcpu *vcpu,
|
||||
const struct coproc_reg *table, size_t num)
|
||||
const struct coproc_reg *table, size_t num,
|
||||
unsigned long *bmap)
|
||||
{
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < num; i++)
|
||||
if (table[i].reset)
|
||||
if (table[i].reset) {
|
||||
int reg = table[i].reg;
|
||||
|
||||
table[i].reset(vcpu, &table[i]);
|
||||
if (reg > 0 && reg < NR_CP15_REGS) {
|
||||
set_bit(reg, bmap);
|
||||
if (table[i].is_64bit)
|
||||
set_bit(reg + 1, bmap);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static struct coproc_params decode_32bit_hsr(struct kvm_vcpu *vcpu)
|
||||
@@ -1439,17 +1448,15 @@ void kvm_reset_coprocs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
size_t num;
|
||||
const struct coproc_reg *table;
|
||||
|
||||
/* Catch someone adding a register without putting in reset entry. */
|
||||
memset(vcpu->arch.ctxt.cp15, 0x42, sizeof(vcpu->arch.ctxt.cp15));
|
||||
DECLARE_BITMAP(bmap, NR_CP15_REGS) = { 0, };
|
||||
|
||||
/* Generic chip reset first (so target could override). */
|
||||
reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs));
|
||||
reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs), bmap);
|
||||
|
||||
table = get_target_table(vcpu->arch.target, &num);
|
||||
reset_coproc_regs(vcpu, table, num);
|
||||
reset_coproc_regs(vcpu, table, num, bmap);
|
||||
|
||||
for (num = 1; num < NR_CP15_REGS; num++)
|
||||
WARN(vcpu_cp15(vcpu, num) == 0x42424242,
|
||||
WARN(!test_bit(num, bmap),
|
||||
"Didn't reset vcpu_cp15(vcpu, %zi)", num);
|
||||
}
|
||||
|
||||
@@ -1472,6 +1472,8 @@ static __init void da850_evm_init(void)
|
||||
if (ret)
|
||||
pr_warn("%s: dsp/rproc registration failed: %d\n",
|
||||
__func__, ret);
|
||||
|
||||
regulator_has_full_constraints();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SERIAL_8250_CONSOLE
|
||||
|
||||
@@ -685,6 +685,9 @@ static struct platform_device da8xx_lcdc_device = {
|
||||
.id = 0,
|
||||
.num_resources = ARRAY_SIZE(da8xx_lcdc_resources),
|
||||
.resource = da8xx_lcdc_resources,
|
||||
.dev = {
|
||||
.coherent_dma_mask = DMA_BIT_MASK(32),
|
||||
}
|
||||
};
|
||||
|
||||
int __init da8xx_register_lcdc(struct da8xx_lcdc_platform_data *pdata)
|
||||
@@ -701,6 +704,46 @@ static struct resource da8xx_gpio_resources[] = {
|
||||
},
|
||||
{ /* interrupt */
|
||||
.start = IRQ_DA8XX_GPIO0,
|
||||
.end = IRQ_DA8XX_GPIO0,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DA8XX_GPIO1,
|
||||
.end = IRQ_DA8XX_GPIO1,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DA8XX_GPIO2,
|
||||
.end = IRQ_DA8XX_GPIO2,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DA8XX_GPIO3,
|
||||
.end = IRQ_DA8XX_GPIO3,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DA8XX_GPIO4,
|
||||
.end = IRQ_DA8XX_GPIO4,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DA8XX_GPIO5,
|
||||
.end = IRQ_DA8XX_GPIO5,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DA8XX_GPIO6,
|
||||
.end = IRQ_DA8XX_GPIO6,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DA8XX_GPIO7,
|
||||
.end = IRQ_DA8XX_GPIO7,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DA8XX_GPIO8,
|
||||
.end = IRQ_DA8XX_GPIO8,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
|
||||
@@ -548,6 +548,36 @@ static struct resource dm355_gpio_resources[] = {
|
||||
},
|
||||
{ /* interrupt */
|
||||
.start = IRQ_DM355_GPIOBNK0,
|
||||
.end = IRQ_DM355_GPIOBNK0,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DM355_GPIOBNK1,
|
||||
.end = IRQ_DM355_GPIOBNK1,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DM355_GPIOBNK2,
|
||||
.end = IRQ_DM355_GPIOBNK2,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DM355_GPIOBNK3,
|
||||
.end = IRQ_DM355_GPIOBNK3,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DM355_GPIOBNK4,
|
||||
.end = IRQ_DM355_GPIOBNK4,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DM355_GPIOBNK5,
|
||||
.end = IRQ_DM355_GPIOBNK5,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DM355_GPIOBNK6,
|
||||
.end = IRQ_DM355_GPIOBNK6,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
|
||||
@@ -267,6 +267,41 @@ static struct resource dm365_gpio_resources[] = {
|
||||
},
|
||||
{ /* interrupt */
|
||||
.start = IRQ_DM365_GPIO0,
|
||||
.end = IRQ_DM365_GPIO0,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DM365_GPIO1,
|
||||
.end = IRQ_DM365_GPIO1,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DM365_GPIO2,
|
||||
.end = IRQ_DM365_GPIO2,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DM365_GPIO3,
|
||||
.end = IRQ_DM365_GPIO3,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DM365_GPIO4,
|
||||
.end = IRQ_DM365_GPIO4,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DM365_GPIO5,
|
||||
.end = IRQ_DM365_GPIO5,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DM365_GPIO6,
|
||||
.end = IRQ_DM365_GPIO6,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DM365_GPIO7,
|
||||
.end = IRQ_DM365_GPIO7,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
|
||||
@@ -492,6 +492,26 @@ static struct resource dm644_gpio_resources[] = {
|
||||
},
|
||||
{ /* interrupt */
|
||||
.start = IRQ_GPIOBNK0,
|
||||
.end = IRQ_GPIOBNK0,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_GPIOBNK1,
|
||||
.end = IRQ_GPIOBNK1,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_GPIOBNK2,
|
||||
.end = IRQ_GPIOBNK2,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_GPIOBNK3,
|
||||
.end = IRQ_GPIOBNK3,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_GPIOBNK4,
|
||||
.end = IRQ_GPIOBNK4,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
|
||||
@@ -442,6 +442,16 @@ static struct resource dm646x_gpio_resources[] = {
|
||||
},
|
||||
{ /* interrupt */
|
||||
.start = IRQ_DM646X_GPIOBNK0,
|
||||
.end = IRQ_DM646X_GPIOBNK0,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DM646X_GPIOBNK1,
|
||||
.end = IRQ_DM646X_GPIOBNK1,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
{
|
||||
.start = IRQ_DM646X_GPIOBNK2,
|
||||
.end = IRQ_DM646X_GPIOBNK2,
|
||||
.flags = IORESOURCE_IRQ,
|
||||
},
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
#define DEEPSLEEP_SLEEPENABLE_BIT BIT(31)
|
||||
|
||||
.text
|
||||
.arch armv5te
|
||||
/*
|
||||
* Move DaVinci into deep sleep state
|
||||
*
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
|
||||
#include "common.h"
|
||||
#include "cpuidle.h"
|
||||
#include "hardware.h"
|
||||
|
||||
static int imx6sx_idle_finish(unsigned long val)
|
||||
{
|
||||
@@ -110,7 +111,7 @@ int __init imx6sx_cpuidle_init(void)
|
||||
* except for power up sw2iso which need to be
|
||||
* larger than LDO ramp up time.
|
||||
*/
|
||||
imx_gpc_set_arm_power_up_timing(0xf, 1);
|
||||
imx_gpc_set_arm_power_up_timing(cpu_is_imx6sx() ? 0xf : 0x2, 1);
|
||||
imx_gpc_set_arm_power_down_timing(1, 1);
|
||||
|
||||
return cpuidle_register(&imx6sx_cpuidle_driver, NULL);
|
||||
|
||||
@@ -135,6 +135,8 @@ restart:
|
||||
orr r11, r11, r13 @ mask all requested interrupts
|
||||
str r11, [r12, #OMAP1510_GPIO_INT_MASK]
|
||||
|
||||
str r13, [r12, #OMAP1510_GPIO_INT_STATUS] @ ack all requested interrupts
|
||||
|
||||
ands r10, r13, #KEYBRD_CLK_MASK @ extract keyboard status - set?
|
||||
beq hksw @ no - try next source
|
||||
|
||||
@@ -142,7 +144,6 @@ restart:
|
||||
@@@@@@@@@@@@@@@@@@@@@@
|
||||
@ Keyboard clock FIQ mode interrupt handler
|
||||
@ r10 now contains KEYBRD_CLK_MASK, use it
|
||||
str r10, [r12, #OMAP1510_GPIO_INT_STATUS] @ ack the interrupt
|
||||
bic r11, r11, r10 @ unmask it
|
||||
str r11, [r12, #OMAP1510_GPIO_INT_MASK]
|
||||
|
||||
|
||||
@@ -73,9 +73,7 @@ static irqreturn_t deferred_fiq(int irq, void *dev_id)
|
||||
* interrupts default to since commit 80ac93c27441
|
||||
* requires interrupt already acked and unmasked.
|
||||
*/
|
||||
if (irq_chip->irq_ack)
|
||||
irq_chip->irq_ack(d);
|
||||
if (irq_chip->irq_unmask)
|
||||
if (!WARN_ON_ONCE(!irq_chip->irq_unmask))
|
||||
irq_chip->irq_unmask(d);
|
||||
}
|
||||
for (; irq_counter[gpio] < fiq_count; irq_counter[gpio]++)
|
||||
|
||||
@@ -131,6 +131,9 @@ static int __init omap4_sram_init(void)
|
||||
struct device_node *np;
|
||||
struct gen_pool *sram_pool;
|
||||
|
||||
if (!soc_is_omap44xx() && !soc_is_omap54xx())
|
||||
return 0;
|
||||
|
||||
np = of_find_compatible_node(NULL, NULL, "ti,omap4-mpu");
|
||||
if (!np)
|
||||
pr_warn("%s:Unable to allocate sram needed to handle errata I688\n",
|
||||
|
||||
@@ -385,7 +385,8 @@ static struct omap_hwmod dra7xx_dcan2_hwmod = {
|
||||
static struct omap_hwmod_class_sysconfig dra7xx_epwmss_sysc = {
|
||||
.rev_offs = 0x0,
|
||||
.sysc_offs = 0x4,
|
||||
.sysc_flags = SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET,
|
||||
.sysc_flags = SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET |
|
||||
SYSC_HAS_RESET_STATUS,
|
||||
.idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART),
|
||||
.sysc_fields = &omap_hwmod_sysc_type2,
|
||||
};
|
||||
|
||||
@@ -433,7 +433,7 @@ static void omap3_prm_reconfigure_io_chain(void)
|
||||
* registers, and omap3xxx_prm_reconfigure_io_chain() must be called.
|
||||
* No return value.
|
||||
*/
|
||||
static void __init omap3xxx_prm_enable_io_wakeup(void)
|
||||
static void omap3xxx_prm_enable_io_wakeup(void)
|
||||
{
|
||||
if (prm_features & PRM_HAS_IO_WAKEUP)
|
||||
omap2_prm_set_mod_reg_bits(OMAP3430_EN_IO_MASK, WKUP_MOD,
|
||||
|
||||
@@ -131,7 +131,7 @@ static irqreturn_t iomd_dma_handle(int irq, void *dev_id)
|
||||
} while (1);
|
||||
|
||||
idma->state = ~DMA_ST_AB;
|
||||
disable_irq(irq);
|
||||
disable_irq_nosync(irq);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
@@ -174,6 +174,9 @@ static void iomd_enable_dma(unsigned int chan, dma_t *dma)
|
||||
DMA_FROM_DEVICE : DMA_TO_DEVICE);
|
||||
}
|
||||
|
||||
idma->dma_addr = idma->dma.sg->dma_address;
|
||||
idma->dma_len = idma->dma.sg->length;
|
||||
|
||||
iomd_writeb(DMA_CR_C, dma_base + CR);
|
||||
idma->state = DMA_ST_AB;
|
||||
}
|
||||
|
||||
@@ -65,7 +65,7 @@ int zynq_cpun_start(u32 address, int cpu)
|
||||
* 0x4: Jump by mov instruction
|
||||
* 0x8: Jumping address
|
||||
*/
|
||||
memcpy((__force void *)zero, &zynq_secondary_trampoline,
|
||||
memcpy_toio(zero, &zynq_secondary_trampoline,
|
||||
trampoline_size);
|
||||
writel(address, zero + trampoline_size);
|
||||
|
||||
|
||||
@@ -216,7 +216,7 @@ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma)
|
||||
{
|
||||
unsigned int mask = VM_READ | VM_WRITE | VM_EXEC;
|
||||
|
||||
if (fsr & FSR_WRITE)
|
||||
if ((fsr & FSR_WRITE) && !(fsr & FSR_CM))
|
||||
mask = VM_WRITE;
|
||||
if (fsr & FSR_LNX_PF)
|
||||
mask = VM_EXEC;
|
||||
@@ -287,7 +287,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
|
||||
|
||||
if (user_mode(regs))
|
||||
flags |= FAULT_FLAG_USER;
|
||||
if (fsr & FSR_WRITE)
|
||||
if ((fsr & FSR_WRITE) && !(fsr & FSR_CM))
|
||||
flags |= FAULT_FLAG_WRITE;
|
||||
|
||||
/*
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
* Fault status register encodings. We steal bit 31 for our own purposes.
|
||||
*/
|
||||
#define FSR_LNX_PF (1 << 31)
|
||||
#define FSR_CM (1 << 13)
|
||||
#define FSR_WRITE (1 << 11)
|
||||
#define FSR_FS4 (1 << 10)
|
||||
#define FSR_FS3_0 (15)
|
||||
|
||||
@@ -196,6 +196,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max_low,
|
||||
#ifdef CONFIG_HAVE_ARCH_PFN_VALID
|
||||
int pfn_valid(unsigned long pfn)
|
||||
{
|
||||
phys_addr_t addr = __pfn_to_phys(pfn);
|
||||
|
||||
if (__phys_to_pfn(addr) != pfn)
|
||||
return 0;
|
||||
|
||||
return memblock_is_map_memory(__pfn_to_phys(pfn));
|
||||
}
|
||||
EXPORT_SYMBOL(pfn_valid);
|
||||
@@ -713,7 +718,8 @@ static void update_sections_early(struct section_perm perms[], int n)
|
||||
if (t->flags & PF_KTHREAD)
|
||||
continue;
|
||||
for_each_thread(t, s)
|
||||
set_section_perms(perms, n, true, s->mm);
|
||||
if (s->mm)
|
||||
set_section_perms(perms, n, true, s->mm);
|
||||
}
|
||||
set_section_perms(perms, n, true, current->active_mm);
|
||||
set_section_perms(perms, n, true, &init_mm);
|
||||
|
||||
@@ -18,8 +18,9 @@
|
||||
(((pgoff)<<PAGE_SHIFT) & (SHMLBA-1)))
|
||||
|
||||
/* gap between mmap and stack */
|
||||
#define MIN_GAP (128*1024*1024UL)
|
||||
#define MAX_GAP ((TASK_SIZE)/6*5)
|
||||
#define MIN_GAP (128*1024*1024UL)
|
||||
#define MAX_GAP ((STACK_TOP)/6*5)
|
||||
#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))
|
||||
|
||||
static int mmap_is_legacy(struct rlimit *rlim_stack)
|
||||
{
|
||||
@@ -35,13 +36,22 @@ static int mmap_is_legacy(struct rlimit *rlim_stack)
|
||||
static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
|
||||
{
|
||||
unsigned long gap = rlim_stack->rlim_cur;
|
||||
unsigned long pad = stack_guard_gap;
|
||||
|
||||
/* Account for stack randomization if necessary */
|
||||
if (current->flags & PF_RANDOMIZE)
|
||||
pad += (STACK_RND_MASK << PAGE_SHIFT);
|
||||
|
||||
/* Values close to RLIM_INFINITY can overflow. */
|
||||
if (gap + pad > gap)
|
||||
gap += pad;
|
||||
|
||||
if (gap < MIN_GAP)
|
||||
gap = MIN_GAP;
|
||||
else if (gap > MAX_GAP)
|
||||
gap = MAX_GAP;
|
||||
|
||||
return PAGE_ALIGN(TASK_SIZE - gap - rnd);
|
||||
return PAGE_ALIGN(STACK_TOP - gap - rnd);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -1175,6 +1175,22 @@ void __init adjust_lowmem_bounds(void)
|
||||
*/
|
||||
vmalloc_limit = (u64)(uintptr_t)vmalloc_min - PAGE_OFFSET + PHYS_OFFSET;
|
||||
|
||||
/*
|
||||
* The first usable region must be PMD aligned. Mark its start
|
||||
* as MEMBLOCK_NOMAP if it isn't
|
||||
*/
|
||||
for_each_memblock(memory, reg) {
|
||||
if (!memblock_is_nomap(reg)) {
|
||||
if (!IS_ALIGNED(reg->base, PMD_SIZE)) {
|
||||
phys_addr_t len;
|
||||
|
||||
len = round_up(reg->base, PMD_SIZE) - reg->base;
|
||||
memblock_mark_nomap(reg->base, len);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for_each_memblock(memory, reg) {
|
||||
phys_addr_t block_start = reg->base;
|
||||
phys_addr_t block_end = reg->base + reg->size;
|
||||
|
||||
@@ -62,6 +62,7 @@ void samsung_wdt_reset(void)
|
||||
#ifdef CONFIG_OF
|
||||
static const struct of_device_id s3c2410_wdt_match[] = {
|
||||
{ .compatible = "samsung,s3c2410-wdt" },
|
||||
{ .compatible = "samsung,s3c6410-wdt" },
|
||||
{},
|
||||
};
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user