|
|
|
|
@@ -124,6 +124,51 @@ static u64 get_cc_mask(void)
|
|
|
|
|
return BIT_ULL(gpa_width - 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The TDX module spec states that #VE may be injected for a limited set of
|
|
|
|
|
* reasons:
|
|
|
|
|
*
|
|
|
|
|
* - Emulation of the architectural #VE injection on EPT violation;
|
|
|
|
|
*
|
|
|
|
|
* - As a result of guest TD execution of a disallowed instruction,
|
|
|
|
|
* a disallowed MSR access, or CPUID virtualization;
|
|
|
|
|
*
|
|
|
|
|
* - A notification to the guest TD about anomalous behavior;
|
|
|
|
|
*
|
|
|
|
|
* The last one is opt-in and is not used by the kernel.
|
|
|
|
|
*
|
|
|
|
|
* The Intel Software Developer's Manual describes cases when instruction
|
|
|
|
|
* length field can be used in section "Information for VM Exits Due to
|
|
|
|
|
* Instruction Execution".
|
|
|
|
|
*
|
|
|
|
|
* For TDX, it ultimately means GET_VEINFO provides reliable instruction length
|
|
|
|
|
* information if #VE occurred due to instruction execution, but not for EPT
|
|
|
|
|
* violations.
|
|
|
|
|
*/
|
|
|
|
|
static int ve_instr_len(struct ve_info *ve)
|
|
|
|
|
{
|
|
|
|
|
switch (ve->exit_reason) {
|
|
|
|
|
case EXIT_REASON_HLT:
|
|
|
|
|
case EXIT_REASON_MSR_READ:
|
|
|
|
|
case EXIT_REASON_MSR_WRITE:
|
|
|
|
|
case EXIT_REASON_CPUID:
|
|
|
|
|
case EXIT_REASON_IO_INSTRUCTION:
|
|
|
|
|
/* It is safe to use ve->instr_len for #VE due instructions */
|
|
|
|
|
return ve->instr_len;
|
|
|
|
|
case EXIT_REASON_EPT_VIOLATION:
|
|
|
|
|
/*
|
|
|
|
|
* For EPT violations, ve->insn_len is not defined. For those,
|
|
|
|
|
* the kernel must decode instructions manually and should not
|
|
|
|
|
* be using this function.
|
|
|
|
|
*/
|
|
|
|
|
WARN_ONCE(1, "ve->instr_len is not defined for EPT violations");
|
|
|
|
|
return 0;
|
|
|
|
|
default:
|
|
|
|
|
WARN_ONCE(1, "Unexpected #VE-type: %lld\n", ve->exit_reason);
|
|
|
|
|
return ve->instr_len;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
|
|
|
|
|
{
|
|
|
|
|
struct tdx_hypercall_args args = {
|
|
|
|
|
@@ -147,7 +192,7 @@ static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
|
|
|
|
|
return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool handle_halt(void)
|
|
|
|
|
static int handle_halt(struct ve_info *ve)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* Since non safe halt is mainly used in CPU offlining
|
|
|
|
|
@@ -158,9 +203,9 @@ static bool handle_halt(void)
|
|
|
|
|
const bool do_sti = false;
|
|
|
|
|
|
|
|
|
|
if (__halt(irq_disabled, do_sti))
|
|
|
|
|
return false;
|
|
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
return ve_instr_len(ve);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void __cpuidle tdx_safe_halt(void)
|
|
|
|
|
@@ -180,7 +225,7 @@ void __cpuidle tdx_safe_halt(void)
|
|
|
|
|
WARN_ONCE(1, "HLT instruction emulation failed\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool read_msr(struct pt_regs *regs)
|
|
|
|
|
static int read_msr(struct pt_regs *regs, struct ve_info *ve)
|
|
|
|
|
{
|
|
|
|
|
struct tdx_hypercall_args args = {
|
|
|
|
|
.r10 = TDX_HYPERCALL_STANDARD,
|
|
|
|
|
@@ -194,14 +239,14 @@ static bool read_msr(struct pt_regs *regs)
|
|
|
|
|
* (GHCI), section titled "TDG.VP.VMCALL<Instruction.RDMSR>".
|
|
|
|
|
*/
|
|
|
|
|
if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
|
|
|
|
|
return false;
|
|
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
|
|
regs->ax = lower_32_bits(args.r11);
|
|
|
|
|
regs->dx = upper_32_bits(args.r11);
|
|
|
|
|
return true;
|
|
|
|
|
return ve_instr_len(ve);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool write_msr(struct pt_regs *regs)
|
|
|
|
|
static int write_msr(struct pt_regs *regs, struct ve_info *ve)
|
|
|
|
|
{
|
|
|
|
|
struct tdx_hypercall_args args = {
|
|
|
|
|
.r10 = TDX_HYPERCALL_STANDARD,
|
|
|
|
|
@@ -215,10 +260,13 @@ static bool write_msr(struct pt_regs *regs)
|
|
|
|
|
* can be found in TDX Guest-Host-Communication Interface
|
|
|
|
|
* (GHCI) section titled "TDG.VP.VMCALL<Instruction.WRMSR>".
|
|
|
|
|
*/
|
|
|
|
|
return !__tdx_hypercall(&args, 0);
|
|
|
|
|
if (__tdx_hypercall(&args, 0))
|
|
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
|
|
return ve_instr_len(ve);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool handle_cpuid(struct pt_regs *regs)
|
|
|
|
|
static int handle_cpuid(struct pt_regs *regs, struct ve_info *ve)
|
|
|
|
|
{
|
|
|
|
|
struct tdx_hypercall_args args = {
|
|
|
|
|
.r10 = TDX_HYPERCALL_STANDARD,
|
|
|
|
|
@@ -236,7 +284,7 @@ static bool handle_cpuid(struct pt_regs *regs)
|
|
|
|
|
*/
|
|
|
|
|
if (regs->ax < 0x40000000 || regs->ax > 0x4FFFFFFF) {
|
|
|
|
|
regs->ax = regs->bx = regs->cx = regs->dx = 0;
|
|
|
|
|
return true;
|
|
|
|
|
return ve_instr_len(ve);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
@@ -245,7 +293,7 @@ static bool handle_cpuid(struct pt_regs *regs)
|
|
|
|
|
* (GHCI), section titled "VP.VMCALL<Instruction.CPUID>".
|
|
|
|
|
*/
|
|
|
|
|
if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
|
|
|
|
|
return false;
|
|
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of
|
|
|
|
|
@@ -257,7 +305,7 @@ static bool handle_cpuid(struct pt_regs *regs)
|
|
|
|
|
regs->cx = args.r14;
|
|
|
|
|
regs->dx = args.r15;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
return ve_instr_len(ve);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool mmio_read(int size, unsigned long addr, unsigned long *val)
|
|
|
|
|
@@ -283,10 +331,10 @@ static bool mmio_write(int size, unsigned long addr, unsigned long val)
|
|
|
|
|
EPT_WRITE, addr, val);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
|
|
|
|
|
static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
|
|
|
|
|
{
|
|
|
|
|
unsigned long *reg, val, vaddr;
|
|
|
|
|
char buffer[MAX_INSN_SIZE];
|
|
|
|
|
unsigned long *reg, val;
|
|
|
|
|
struct insn insn = {};
|
|
|
|
|
enum mmio_type mmio;
|
|
|
|
|
int size, extend_size;
|
|
|
|
|
@@ -294,34 +342,49 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
|
|
|
|
|
|
|
|
|
|
/* Only in-kernel MMIO is supported */
|
|
|
|
|
if (WARN_ON_ONCE(user_mode(regs)))
|
|
|
|
|
return false;
|
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
|
|
if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE))
|
|
|
|
|
return false;
|
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
|
|
if (insn_decode(&insn, buffer, MAX_INSN_SIZE, INSN_MODE_64))
|
|
|
|
|
return false;
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
|
|
mmio = insn_decode_mmio(&insn, &size);
|
|
|
|
|
if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED))
|
|
|
|
|
return false;
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
|
|
if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) {
|
|
|
|
|
reg = insn_get_modrm_reg_ptr(&insn, regs);
|
|
|
|
|
if (!reg)
|
|
|
|
|
return false;
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ve->instr_len = insn.length;
|
|
|
|
|
/*
|
|
|
|
|
* Reject EPT violation #VEs that split pages.
|
|
|
|
|
*
|
|
|
|
|
* MMIO accesses are supposed to be naturally aligned and therefore
|
|
|
|
|
* never cross page boundaries. Seeing split page accesses indicates
|
|
|
|
|
* a bug or a load_unaligned_zeropad() that stepped into an MMIO page.
|
|
|
|
|
*
|
|
|
|
|
* load_unaligned_zeropad() will recover using exception fixups.
|
|
|
|
|
*/
|
|
|
|
|
vaddr = (unsigned long)insn_get_addr_ref(&insn, regs);
|
|
|
|
|
if (vaddr / PAGE_SIZE != (vaddr + size - 1) / PAGE_SIZE)
|
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
|
|
/* Handle writes first */
|
|
|
|
|
switch (mmio) {
|
|
|
|
|
case MMIO_WRITE:
|
|
|
|
|
memcpy(&val, reg, size);
|
|
|
|
|
return mmio_write(size, ve->gpa, val);
|
|
|
|
|
if (!mmio_write(size, ve->gpa, val))
|
|
|
|
|
return -EIO;
|
|
|
|
|
return insn.length;
|
|
|
|
|
case MMIO_WRITE_IMM:
|
|
|
|
|
val = insn.immediate.value;
|
|
|
|
|
return mmio_write(size, ve->gpa, val);
|
|
|
|
|
if (!mmio_write(size, ve->gpa, val))
|
|
|
|
|
return -EIO;
|
|
|
|
|
return insn.length;
|
|
|
|
|
case MMIO_READ:
|
|
|
|
|
case MMIO_READ_ZERO_EXTEND:
|
|
|
|
|
case MMIO_READ_SIGN_EXTEND:
|
|
|
|
|
@@ -334,15 +397,15 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
|
|
|
|
|
* decoded or handled properly. It was likely not using io.h
|
|
|
|
|
* helpers or accessed MMIO accidentally.
|
|
|
|
|
*/
|
|
|
|
|
return false;
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
default:
|
|
|
|
|
WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?");
|
|
|
|
|
return false;
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Handle reads */
|
|
|
|
|
if (!mmio_read(size, ve->gpa, &val))
|
|
|
|
|
return false;
|
|
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
|
|
switch (mmio) {
|
|
|
|
|
case MMIO_READ:
|
|
|
|
|
@@ -364,13 +427,13 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
|
|
|
|
|
default:
|
|
|
|
|
/* All other cases has to be covered with the first switch() */
|
|
|
|
|
WARN_ON_ONCE(1);
|
|
|
|
|
return false;
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (extend_size)
|
|
|
|
|
memset(reg, extend_val, extend_size);
|
|
|
|
|
memcpy(reg, &val, size);
|
|
|
|
|
return true;
|
|
|
|
|
return insn.length;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool handle_in(struct pt_regs *regs, int size, int port)
|
|
|
|
|
@@ -421,13 +484,14 @@ static bool handle_out(struct pt_regs *regs, int size, int port)
|
|
|
|
|
*
|
|
|
|
|
* Return True on success or False on failure.
|
|
|
|
|
*/
|
|
|
|
|
static bool handle_io(struct pt_regs *regs, u32 exit_qual)
|
|
|
|
|
static int handle_io(struct pt_regs *regs, struct ve_info *ve)
|
|
|
|
|
{
|
|
|
|
|
u32 exit_qual = ve->exit_qual;
|
|
|
|
|
int size, port;
|
|
|
|
|
bool in;
|
|
|
|
|
bool in, ret;
|
|
|
|
|
|
|
|
|
|
if (VE_IS_IO_STRING(exit_qual))
|
|
|
|
|
return false;
|
|
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
|
|
in = VE_IS_IO_IN(exit_qual);
|
|
|
|
|
size = VE_GET_IO_SIZE(exit_qual);
|
|
|
|
|
@@ -435,9 +499,13 @@ static bool handle_io(struct pt_regs *regs, u32 exit_qual)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (in)
|
|
|
|
|
return handle_in(regs, size, port);
|
|
|
|
|
ret = handle_in(regs, size, port);
|
|
|
|
|
else
|
|
|
|
|
return handle_out(regs, size, port);
|
|
|
|
|
ret = handle_out(regs, size, port);
|
|
|
|
|
if (!ret)
|
|
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
|
|
return ve_instr_len(ve);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
@@ -447,13 +515,19 @@ static bool handle_io(struct pt_regs *regs, u32 exit_qual)
|
|
|
|
|
__init bool tdx_early_handle_ve(struct pt_regs *regs)
|
|
|
|
|
{
|
|
|
|
|
struct ve_info ve;
|
|
|
|
|
int insn_len;
|
|
|
|
|
|
|
|
|
|
tdx_get_ve_info(&ve);
|
|
|
|
|
|
|
|
|
|
if (ve.exit_reason != EXIT_REASON_IO_INSTRUCTION)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return handle_io(regs, ve.exit_qual);
|
|
|
|
|
insn_len = handle_io(regs, &ve);
|
|
|
|
|
if (insn_len < 0)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
regs->ip += insn_len;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void tdx_get_ve_info(struct ve_info *ve)
|
|
|
|
|
@@ -486,54 +560,65 @@ void tdx_get_ve_info(struct ve_info *ve)
|
|
|
|
|
ve->instr_info = upper_32_bits(out.r10);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Handle the user initiated #VE */
|
|
|
|
|
static bool virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
|
|
|
|
|
/*
|
|
|
|
|
* Handle the user initiated #VE.
|
|
|
|
|
*
|
|
|
|
|
* On success, returns the number of bytes RIP should be incremented (>=0)
|
|
|
|
|
* or -errno on error.
|
|
|
|
|
*/
|
|
|
|
|
static int virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
|
|
|
|
|
{
|
|
|
|
|
switch (ve->exit_reason) {
|
|
|
|
|
case EXIT_REASON_CPUID:
|
|
|
|
|
return handle_cpuid(regs);
|
|
|
|
|
return handle_cpuid(regs, ve);
|
|
|
|
|
default:
|
|
|
|
|
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
|
|
|
|
|
return false;
|
|
|
|
|
return -EIO;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Handle the kernel #VE */
|
|
|
|
|
static bool virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
|
|
|
|
|
/*
|
|
|
|
|
* Handle the kernel #VE.
|
|
|
|
|
*
|
|
|
|
|
* On success, returns the number of bytes RIP should be incremented (>=0)
|
|
|
|
|
* or -errno on error.
|
|
|
|
|
*/
|
|
|
|
|
static int virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
|
|
|
|
|
{
|
|
|
|
|
switch (ve->exit_reason) {
|
|
|
|
|
case EXIT_REASON_HLT:
|
|
|
|
|
return handle_halt();
|
|
|
|
|
return handle_halt(ve);
|
|
|
|
|
case EXIT_REASON_MSR_READ:
|
|
|
|
|
return read_msr(regs);
|
|
|
|
|
return read_msr(regs, ve);
|
|
|
|
|
case EXIT_REASON_MSR_WRITE:
|
|
|
|
|
return write_msr(regs);
|
|
|
|
|
return write_msr(regs, ve);
|
|
|
|
|
case EXIT_REASON_CPUID:
|
|
|
|
|
return handle_cpuid(regs);
|
|
|
|
|
return handle_cpuid(regs, ve);
|
|
|
|
|
case EXIT_REASON_EPT_VIOLATION:
|
|
|
|
|
return handle_mmio(regs, ve);
|
|
|
|
|
case EXIT_REASON_IO_INSTRUCTION:
|
|
|
|
|
return handle_io(regs, ve->exit_qual);
|
|
|
|
|
return handle_io(regs, ve);
|
|
|
|
|
default:
|
|
|
|
|
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
|
|
|
|
|
return false;
|
|
|
|
|
return -EIO;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve)
|
|
|
|
|
{
|
|
|
|
|
bool ret;
|
|
|
|
|
int insn_len;
|
|
|
|
|
|
|
|
|
|
if (user_mode(regs))
|
|
|
|
|
ret = virt_exception_user(regs, ve);
|
|
|
|
|
insn_len = virt_exception_user(regs, ve);
|
|
|
|
|
else
|
|
|
|
|
ret = virt_exception_kernel(regs, ve);
|
|
|
|
|
insn_len = virt_exception_kernel(regs, ve);
|
|
|
|
|
if (insn_len < 0)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* After successful #VE handling, move the IP */
|
|
|
|
|
if (ret)
|
|
|
|
|
regs->ip += ve->instr_len;
|
|
|
|
|
regs->ip += insn_len;
|
|
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool tdx_tlb_flush_required(bool private)
|
|
|
|
|
|