Merge branch 'x86/entry' into ras/core

to fixup conflicts in arch/x86/kernel/cpu/mce/core.c so MCE specific follow
up patches can be applied without creating a horrible merge conflict
afterwards.
This commit is contained in:
Thomas Gleixner
2020-06-11 15:17:57 +02:00
14668 changed files with 838508 additions and 301022 deletions

View File

@@ -10,10 +10,10 @@
*/
#include <linux/interrupt.h>
#include <asm/acrn.h>
#include <asm/apic.h>
#include <asm/desc.h>
#include <asm/hypervisor.h>
#include <asm/idtentry.h>
#include <asm/irq_regs.h>
static uint32_t __init acrn_detect(void)
@@ -24,7 +24,7 @@ static uint32_t __init acrn_detect(void)
static void __init acrn_init_platform(void)
{
/* Setup the IDT for ACRN hypervisor callback */
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, acrn_hv_callback_vector);
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_acrn_hv_callback);
}
static bool acrn_x2apic_available(void)
@@ -39,7 +39,7 @@ static bool acrn_x2apic_available(void)
static void (*acrn_intr_handler)(void);
__visible void __irq_entry acrn_hv_vector_handler(struct pt_regs *regs)
DEFINE_IDTENTRY_SYSVEC(sysvec_acrn_hv_callback)
{
struct pt_regs *old_regs = set_irq_regs(regs);
@@ -50,13 +50,12 @@ __visible void __irq_entry acrn_hv_vector_handler(struct pt_regs *regs)
* will block the interrupt whose vector is lower than
* HYPERVISOR_CALLBACK_VECTOR.
*/
entering_ack_irq();
ack_APIC_irq();
inc_irq_stat(irq_hv_callback_count);
if (acrn_intr_handler)
acrn_intr_handler();
exiting_irq();
set_irq_regs(old_regs);
}

View File

@@ -18,6 +18,7 @@
#include <asm/pci-direct.h>
#include <asm/delay.h>
#include <asm/debugreg.h>
#include <asm/resctrl.h>
#ifdef CONFIG_X86_64
# include <asm/mmconfig.h>
@@ -597,6 +598,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
}
}
resctrl_cpu_detect(c);
}
static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
@@ -1142,8 +1145,7 @@ static const int amd_erratum_383[] =
/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
static const int amd_erratum_1054[] =
AMD_OSVW_ERRATUM(0, AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
{

View File

@@ -15,6 +15,7 @@
#include <linux/nospec.h>
#include <linux/prctl.h>
#include <linux/sched/smt.h>
#include <linux/pgtable.h>
#include <asm/spec-ctrl.h>
#include <asm/cmdline.h>
@@ -26,7 +27,6 @@
#include <asm/vmx.h>
#include <asm/paravirt.h>
#include <asm/alternative.h>
#include <asm/pgtable.h>
#include <asm/set_memory.h>
#include <asm/intel-family.h>
#include <asm/e820/api.h>
@@ -41,6 +41,7 @@ static void __init l1tf_select_mitigation(void);
static void __init mds_select_mitigation(void);
static void __init mds_print_mitigation(void);
static void __init taa_select_mitigation(void);
static void __init srbds_select_mitigation(void);
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
u64 x86_spec_ctrl_base;
@@ -108,6 +109,7 @@ void __init check_bugs(void)
l1tf_select_mitigation();
mds_select_mitigation();
taa_select_mitigation();
srbds_select_mitigation();
/*
* As MDS and TAA mitigations are inter-related, print MDS
@@ -397,6 +399,97 @@ static int __init tsx_async_abort_parse_cmdline(char *str)
}
early_param("tsx_async_abort", tsx_async_abort_parse_cmdline);
#undef pr_fmt
#define pr_fmt(fmt) "SRBDS: " fmt
enum srbds_mitigations {
SRBDS_MITIGATION_OFF,
SRBDS_MITIGATION_UCODE_NEEDED,
SRBDS_MITIGATION_FULL,
SRBDS_MITIGATION_TSX_OFF,
SRBDS_MITIGATION_HYPERVISOR,
};
static enum srbds_mitigations srbds_mitigation __ro_after_init = SRBDS_MITIGATION_FULL;
static const char * const srbds_strings[] = {
[SRBDS_MITIGATION_OFF] = "Vulnerable",
[SRBDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode",
[SRBDS_MITIGATION_FULL] = "Mitigation: Microcode",
[SRBDS_MITIGATION_TSX_OFF] = "Mitigation: TSX disabled",
[SRBDS_MITIGATION_HYPERVISOR] = "Unknown: Dependent on hypervisor status",
};
static bool srbds_off;
void update_srbds_msr(void)
{
u64 mcu_ctrl;
if (!boot_cpu_has_bug(X86_BUG_SRBDS))
return;
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return;
if (srbds_mitigation == SRBDS_MITIGATION_UCODE_NEEDED)
return;
rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
switch (srbds_mitigation) {
case SRBDS_MITIGATION_OFF:
case SRBDS_MITIGATION_TSX_OFF:
mcu_ctrl |= RNGDS_MITG_DIS;
break;
case SRBDS_MITIGATION_FULL:
mcu_ctrl &= ~RNGDS_MITG_DIS;
break;
default:
break;
}
wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
}
static void __init srbds_select_mitigation(void)
{
u64 ia32_cap;
if (!boot_cpu_has_bug(X86_BUG_SRBDS))
return;
/*
* Check to see if this is one of the MDS_NO systems supporting
* TSX that are only exposed to SRBDS when TSX is enabled.
*/
ia32_cap = x86_read_arch_cap_msr();
if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM))
srbds_mitigation = SRBDS_MITIGATION_TSX_OFF;
else if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR;
else if (!boot_cpu_has(X86_FEATURE_SRBDS_CTRL))
srbds_mitigation = SRBDS_MITIGATION_UCODE_NEEDED;
else if (cpu_mitigations_off() || srbds_off)
srbds_mitigation = SRBDS_MITIGATION_OFF;
update_srbds_msr();
pr_info("%s\n", srbds_strings[srbds_mitigation]);
}
static int __init srbds_parse_cmdline(char *str)
{
if (!str)
return -EINVAL;
if (!boot_cpu_has_bug(X86_BUG_SRBDS))
return 0;
srbds_off = !strcmp(str, "off");
return 0;
}
early_param("srbds", srbds_parse_cmdline);
#undef pr_fmt
#define pr_fmt(fmt) "Spectre V1 : " fmt
@@ -1528,6 +1621,11 @@ static char *ibpb_state(void)
return "";
}
static ssize_t srbds_show_state(char *buf)
{
return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
}
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
@@ -1572,6 +1670,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_ITLB_MULTIHIT:
return itlb_multihit_show_state(buf);
case X86_BUG_SRBDS:
return srbds_show_state(buf);
default:
break;
}
@@ -1618,4 +1719,9 @@ ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr
{
return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT);
}
ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf)
{
return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS);
}
#endif

View File

@@ -21,6 +21,7 @@
#include <linux/smp.h>
#include <linux/io.h>
#include <linux/syscore_ops.h>
#include <linux/pgtable.h>
#include <asm/stackprotector.h>
#include <asm/perf_event.h>
@@ -35,7 +36,6 @@
#include <asm/vsyscall.h>
#include <linux/topology.h>
#include <linux/cpumask.h>
#include <asm/pgtable.h>
#include <linux/atomic.h>
#include <asm/proto.h>
#include <asm/setup.h>
@@ -387,7 +387,30 @@ set_register:
bits_missing);
}
}
EXPORT_SYMBOL(native_write_cr4);
#if IS_MODULE(CONFIG_LKDTM)
EXPORT_SYMBOL_GPL(native_write_cr4);
#endif
void cr4_update_irqsoff(unsigned long set, unsigned long clear)
{
unsigned long newval, cr4 = this_cpu_read(cpu_tlbstate.cr4);
lockdep_assert_irqs_disabled();
newval = (cr4 & ~clear) | set;
if (newval != cr4) {
this_cpu_write(cpu_tlbstate.cr4, newval);
__write_cr4(newval);
}
}
EXPORT_SYMBOL(cr4_update_irqsoff);
/* Read the CR4 shadow. */
unsigned long cr4_read_shadow(void)
{
return this_cpu_read(cpu_tlbstate.cr4);
}
EXPORT_SYMBOL_GPL(cr4_read_shadow);
void cr4_init(void)
{
@@ -854,30 +877,6 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
}
}
static void init_cqm(struct cpuinfo_x86 *c)
{
if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
c->x86_cache_max_rmid = -1;
c->x86_cache_occ_scale = -1;
return;
}
/* will be overridden if occupancy monitoring exists */
c->x86_cache_max_rmid = cpuid_ebx(0xf);
if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
u32 eax, ebx, ecx, edx;
/* QoS sub-leaf, EAX=0Fh, ECX=1 */
cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
c->x86_cache_max_rmid = ecx;
c->x86_cache_occ_scale = ebx;
}
}
void get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
@@ -945,7 +944,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
init_scattered_cpuid_features(c);
init_speculation_control(c);
init_cqm(c);
/*
* Clear/Set all flags overridden by options, after probe.
@@ -1075,9 +1073,30 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
{}
};
static bool __init cpu_matches(unsigned long which)
#define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \
X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \
INTEL_FAM6_##model, steppings, \
X86_FEATURE_ANY, issues)
#define SRBDS BIT(0)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0xC), SRBDS),
VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0xD), SRBDS),
{}
};
static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long which)
{
const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist);
const struct x86_cpu_id *m = x86_match_cpu(table);
return m && !!(m->driver_data & which);
}
@@ -1097,31 +1116,34 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
u64 ia32_cap = x86_read_arch_cap_msr();
/* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) &&
!(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
if (cpu_matches(NO_SPECULATION))
if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION))
return;
setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
if (!cpu_matches(NO_SPECTRE_V2))
if (!cpu_matches(cpu_vuln_whitelist, NO_SPECTRE_V2))
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) &&
if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) &&
!(ia32_cap & ARCH_CAP_SSB_NO) &&
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
if (ia32_cap & ARCH_CAP_IBRS_ALL)
setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) {
if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) &&
!(ia32_cap & ARCH_CAP_MDS_NO)) {
setup_force_cpu_bug(X86_BUG_MDS);
if (cpu_matches(MSBDS_ONLY))
if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY))
setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
}
if (!cpu_matches(NO_SWAPGS))
if (!cpu_matches(cpu_vuln_whitelist, NO_SWAPGS))
setup_force_cpu_bug(X86_BUG_SWAPGS);
/*
@@ -1139,7 +1161,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
setup_force_cpu_bug(X86_BUG_TAA);
if (cpu_matches(NO_MELTDOWN))
/*
* SRBDS affects CPUs which support RDRAND or RDSEED and are listed
* in the vulnerability blacklist.
*/
if ((cpu_has(c, X86_FEATURE_RDRAND) ||
cpu_has(c, X86_FEATURE_RDSEED)) &&
cpu_matches(cpu_vuln_blacklist, SRBDS))
setup_force_cpu_bug(X86_BUG_SRBDS);
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
/* Rogue Data Cache Load? No! */
@@ -1148,7 +1179,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
if (cpu_matches(NO_L1TF))
if (cpu_matches(cpu_vuln_whitelist, NO_L1TF))
return;
setup_force_cpu_bug(X86_BUG_L1TF);
@@ -1377,20 +1408,6 @@ static void generic_identify(struct cpuinfo_x86 *c)
#endif
}
static void x86_init_cache_qos(struct cpuinfo_x86 *c)
{
/*
* The heavy lifting of max_rmid and cache_occ_scale are handled
* in get_cpu_cap(). Here we just set the max_rmid for the boot_cpu
* in case CQM bits really aren't there in this CPU.
*/
if (c != &boot_cpu_data) {
boot_cpu_data.x86_cache_max_rmid =
min(boot_cpu_data.x86_cache_max_rmid,
c->x86_cache_max_rmid);
}
}
/*
* Validate that ACPI/mptables have the same information about the
* effective APIC id and update the package map.
@@ -1503,7 +1520,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
#endif
x86_init_rdrand(c);
x86_init_cache_qos(c);
setup_pku(c);
/*
@@ -1591,6 +1607,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
mtrr_ap_init();
validate_apic_and_package_id(c);
x86_spec_ctrl_setup_ap();
update_srbds_msr();
}
static __init int setup_noclflush(char *arg)
@@ -1689,25 +1706,6 @@ void syscall_init(void)
X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
}
DEFINE_PER_CPU(int, debug_stack_usage);
DEFINE_PER_CPU(u32, debug_idt_ctr);
void debug_stack_set_zero(void)
{
this_cpu_inc(debug_idt_ctr);
load_current_idt();
}
NOKPROBE_SYMBOL(debug_stack_set_zero);
void debug_stack_reset(void)
{
if (WARN_ON(!this_cpu_read(debug_idt_ctr)))
return;
if (this_cpu_dec_return(debug_idt_ctr) == 0)
load_current_idt();
}
NOKPROBE_SYMBOL(debug_stack_reset);
#else /* CONFIG_X86_64 */
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;

View File

@@ -77,6 +77,7 @@ extern void detect_ht(struct cpuinfo_x86 *c);
unsigned int aperfmperf_get_khz(int cpu);
extern void x86_spec_ctrl_setup_ap(void);
extern void update_srbds_msr(void);
extern u64 x86_read_arch_cap_msr(void);

View File

@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/pgtable.h>
#include <linux/string.h>
#include <linux/bitops.h>
@@ -11,7 +12,6 @@
#include <linux/uaccess.h>
#include <asm/cpufeature.h>
#include <asm/pgtable.h>
#include <asm/msr.h>
#include <asm/bugs.h>
#include <asm/cpu.h>
@@ -22,6 +22,7 @@
#include <asm/cpu_device_id.h>
#include <asm/cmdline.h>
#include <asm/traps.h>
#include <asm/resctrl.h>
#ifdef CONFIG_X86_64
#include <linux/topology.h>
@@ -322,6 +323,11 @@ static void early_init_intel(struct cpuinfo_x86 *c)
detect_ht_early(c);
}
static void bsp_init_intel(struct cpuinfo_x86 *c)
{
resctrl_cpu_detect(c);
}
#ifdef CONFIG_X86_32
/*
* Early probe support logic for ppro memory erratum #50
@@ -961,6 +967,7 @@ static const struct cpu_dev intel_cpu_dev = {
#endif
.c_detect_tlb = intel_detect_tlb,
.c_early_init = early_init_intel,
.c_bsp_init = bsp_init_intel,
.c_init = init_intel,
.c_x86_vendor = X86_VENDOR_INTEL,
};
@@ -1119,35 +1126,53 @@ void switch_to_sld(unsigned long tifn)
sld_update_msr(!(tifn & _TIF_SLD));
}
#define SPLIT_LOCK_CPU(model) {X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY}
/*
* The following processors have the split lock detection feature. But
* since they don't have the IA32_CORE_CAPABILITIES MSR, the feature cannot
* be enumerated. Enable it by family and model matching on these
* processors.
* Bits in the IA32_CORE_CAPABILITIES are not architectural, so they should
* only be trusted if it is confirmed that a CPU model implements a
* specific feature at a particular bit position.
*
* The possible driver data field values:
*
* - 0: CPU models that are known to have the per-core split-lock detection
* feature even though they do not enumerate IA32_CORE_CAPABILITIES.
*
* - 1: CPU models which may enumerate IA32_CORE_CAPABILITIES and if so use
* bit 5 to enumerate the per-core split-lock detection feature.
*/
static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_X),
SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_L),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, 1),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, 1),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, 1),
{}
};
void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c)
{
u64 ia32_core_caps = 0;
const struct x86_cpu_id *m;
u64 ia32_core_caps;
if (c->x86_vendor != X86_VENDOR_INTEL)
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return;
if (cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) {
/* Enumerate features reported in IA32_CORE_CAPABILITIES MSR. */
m = x86_match_cpu(split_lock_cpu_ids);
if (!m)
return;
switch (m->driver_data) {
case 0:
break;
case 1:
if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES))
return;
rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps);
} else if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
/* Enumerate split lock detection by family and model. */
if (x86_match_cpu(split_lock_cpu_ids))
ia32_core_caps |= MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT;
if (!(ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT))
return;
break;
default:
return;
}
if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)
split_lock_setup();
split_lock_setup();
}

View File

@@ -39,13 +39,18 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
const struct x86_cpu_id *m;
struct cpuinfo_x86 *c = &boot_cpu_data;
for (m = match; m->vendor | m->family | m->model | m->feature; m++) {
for (m = match;
m->vendor | m->family | m->model | m->steppings | m->feature;
m++) {
if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor)
continue;
if (m->family != X86_FAMILY_ANY && c->x86 != m->family)
continue;
if (m->model != X86_MODEL_ANY && c->x86_model != m->model)
continue;
if (m->steppings != X86_STEPPING_ANY &&
!(BIT(c->x86_stepping) & m->steppings))
continue;
if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature))
continue;
return m;

View File

@@ -921,14 +921,13 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
mce_log(&m);
}
asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(struct pt_regs *regs)
DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
{
entering_irq();
trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
inc_irq_stat(irq_deferred_error_count);
deferred_error_int_vector();
trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
exiting_ack_irq();
ack_APIC_irq();
}
/*

View File

@@ -42,6 +42,8 @@
#include <linux/export.h>
#include <linux/jump_label.h>
#include <linux/set_memory.h>
#include <linux/task_work.h>
#include <linux/hardirq.h>
#include <asm/intel-family.h>
#include <asm/processor.h>
@@ -128,7 +130,7 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
/* Do initial initialization of a struct mce */
void mce_setup(struct mce *m)
noinstr void mce_setup(struct mce *m)
{
memset(m, 0, sizeof(struct mce));
m->cpu = m->extcpu = smp_processor_id();
@@ -138,12 +140,12 @@ void mce_setup(struct mce *m)
m->cpuid = cpuid_eax(1);
m->socketid = cpu_data(m->extcpu).phys_proc_id;
m->apicid = cpu_data(m->extcpu).initial_apicid;
rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP);
if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
rdmsrl(MSR_PPIN, m->ppin);
m->ppin = __rdmsr(MSR_PPIN);
else if (this_cpu_has(X86_FEATURE_AMD_PPIN))
rdmsrl(MSR_AMD_PPIN, m->ppin);
m->ppin = __rdmsr(MSR_AMD_PPIN);
m->microcode = boot_cpu_data.microcode;
}
@@ -1057,23 +1059,6 @@ static void mce_clear_state(unsigned long *toclear)
}
}
static int do_memory_failure(struct mce *m)
{
int flags = MF_ACTION_REQUIRED;
int ret;
pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
if (!(m->mcgstatus & MCG_STATUS_RIPV))
flags |= MF_MUST_KILL;
ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
if (ret)
pr_err("Memory error not recovered");
else
set_mce_nospec(m->addr >> PAGE_SHIFT);
return ret;
}
/*
* Cases where we avoid rendezvous handler timeout:
* 1) If this CPU is offline.
@@ -1086,13 +1071,15 @@ static int do_memory_failure(struct mce *m)
* kdump kernel establishing a new #MC handler where a broadcasted MCE
* might not get handled properly.
*/
static bool __mc_check_crashing_cpu(int cpu)
static noinstr bool mce_check_crashing_cpu(void)
{
unsigned int cpu = smp_processor_id();
if (cpu_is_offline(cpu) ||
(crashing_cpu != -1 && crashing_cpu != cpu)) {
u64 mcgstatus;
mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
mcgstatus = __rdmsr(MSR_IA32_MCG_STATUS);
if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
if (mcgstatus & MCG_STATUS_LMCES)
@@ -1100,7 +1087,7 @@ static bool __mc_check_crashing_cpu(int cpu)
}
if (mcgstatus & MCG_STATUS_RIPV) {
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
__wrmsr(MSR_IA32_MCG_STATUS, 0, 0);
return true;
}
}
@@ -1175,6 +1162,29 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
*m = *final;
}
static void kill_me_now(struct callback_head *ch)
{
force_sig(SIGBUS);
}
static void kill_me_maybe(struct callback_head *cb)
{
struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
int flags = MF_ACTION_REQUIRED;
pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr);
if (!(p->mce_status & MCG_STATUS_RIPV))
flags |= MF_MUST_KILL;
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
return;
}
pr_err("Memory error not recovered");
kill_me_now(cb);
}
/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
@@ -1193,12 +1203,11 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
* backing the user stack, tracing that reads the user stack will cause
* potentially infinite recursion.
*/
void notrace do_machine_check(struct pt_regs *regs, long error_code)
void noinstr do_machine_check(struct pt_regs *regs)
{
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
struct mca_config *cfg = &mca_cfg;
int cpu = smp_processor_id();
struct mce m, *final;
char *msg = NULL;
int worst = 0;
@@ -1227,11 +1236,6 @@ void notrace do_machine_check(struct pt_regs *regs, long error_code)
*/
int lmce = 1;
if (__mc_check_crashing_cpu(cpu))
return;
ist_enter(regs);
this_cpu_inc(mce_exception_count);
mce_gather_info(&m, regs);
@@ -1319,17 +1323,19 @@ void notrace do_machine_check(struct pt_regs *regs, long error_code)
sync_core();
if (worst != MCE_AR_SEVERITY && !kill_it)
goto out_ist;
return;
/* Fault was in user mode and we need to take some action */
if ((m.cs & 3) == 3) {
ist_begin_non_atomic(regs);
local_irq_enable();
/* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs));
if (kill_it || do_memory_failure(&m))
force_sig(SIGBUS);
local_irq_disable();
ist_end_non_atomic();
current->mce_addr = m.addr;
current->mce_status = m.mcgstatus;
current->mce_kill_me.func = kill_me_maybe;
if (kill_it)
current->mce_kill_me.func = kill_me_now;
task_work_add(current, &current->mce_kill_me, true);
} else {
/*
* Handle an MCE which has happened in kernel space but from
@@ -1341,16 +1347,12 @@ void notrace do_machine_check(struct pt_regs *regs, long error_code)
* proper one.
*/
if (m.kflags & MCE_IN_KERNEL_RECOV) {
if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0))
if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
mce_panic("Failed kernel mode recovery", &m, msg);
}
}
out_ist:
ist_exit(regs);
}
EXPORT_SYMBOL_GPL(do_machine_check);
NOKPROBE_SYMBOL(do_machine_check);
#ifndef CONFIG_MEMORY_FAILURE
int memory_failure(unsigned long pfn, int flags)
@@ -1876,21 +1878,84 @@ bool filter_mce(struct mce *m)
}
/* Handle unconfigured int18 (should never happen) */
static void unexpected_machine_check(struct pt_regs *regs, long error_code)
static noinstr void unexpected_machine_check(struct pt_regs *regs)
{
instrumentation_begin();
pr_err("CPU#%d: Unexpected int18 (Machine Check)\n",
smp_processor_id());
instrumentation_end();
}
/* Call the installed machine check handler for this CPU setup. */
void (*machine_check_vector)(struct pt_regs *, long error_code) =
unexpected_machine_check;
void (*machine_check_vector)(struct pt_regs *) = unexpected_machine_check;
dotraplinkage notrace void do_mce(struct pt_regs *regs, long error_code)
static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
{
machine_check_vector(regs, error_code);
/*
* Only required when from kernel mode. See
* mce_check_crashing_cpu() for details.
*/
if (machine_check_vector == do_machine_check &&
mce_check_crashing_cpu())
return;
nmi_enter();
/*
* The call targets are marked noinstr, but objtool can't figure
* that out because it's an indirect call. Annotate it.
*/
instrumentation_begin();
trace_hardirqs_off_finish();
machine_check_vector(regs);
if (regs->flags & X86_EFLAGS_IF)
trace_hardirqs_on_prepare();
instrumentation_end();
nmi_exit();
}
NOKPROBE_SYMBOL(do_mce);
static __always_inline void exc_machine_check_user(struct pt_regs *regs)
{
idtentry_enter_user(regs);
instrumentation_begin();
machine_check_vector(regs);
instrumentation_end();
idtentry_exit_user(regs);
}
#ifdef CONFIG_X86_64
/* MCE hit kernel mode */
DEFINE_IDTENTRY_MCE(exc_machine_check)
{
unsigned long dr7;
dr7 = local_db_save();
exc_machine_check_kernel(regs);
local_db_restore(dr7);
}
/* The user mode variant. */
DEFINE_IDTENTRY_MCE_USER(exc_machine_check)
{
unsigned long dr7;
dr7 = local_db_save();
exc_machine_check_user(regs);
local_db_restore(dr7);
}
#else
/* 32bit unified entry point */
DEFINE_IDTENTRY_MCE(exc_machine_check)
{
unsigned long dr7;
dr7 = local_db_save();
if (user_mode(regs))
exc_machine_check_user(regs);
else
exc_machine_check_kernel(regs);
local_db_restore(dr7);
}
#endif
/*
* Called for each booted CPU to set up machine checks.

View File

@@ -146,9 +146,9 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
regs.cs = m->cs;
pregs = &regs;
}
/* in mcheck exeception handler, irq will be disabled */
/* do_machine_check() expects interrupts disabled -- at least */
local_irq_save(flags);
do_machine_check(pregs, 0);
do_machine_check(pregs);
local_irq_restore(flags);
m->finished = 0;
}

View File

@@ -9,7 +9,7 @@
#include <asm/mce.h>
/* Pointer to the installed machine check handler for this CPU setup. */
extern void (*machine_check_vector)(struct pt_regs *, long error_code);
extern void (*machine_check_vector)(struct pt_regs *);
enum severity_level {
MCE_NO_SEVERITY,

View File

@@ -7,6 +7,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/smp.h>
#include <linux/hardirq.h>
#include <asm/processor.h>
#include <asm/traps.h>
@@ -20,12 +21,11 @@
int mce_p5_enabled __read_mostly;
/* Machine check handler for Pentium class Intel CPUs: */
static void pentium_machine_check(struct pt_regs *regs, long error_code)
static noinstr void pentium_machine_check(struct pt_regs *regs)
{
u32 loaddr, hi, lotype;
ist_enter(regs);
instrumentation_begin();
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
@@ -38,8 +38,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
}
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
ist_exit(regs);
instrumentation_end();
}
/* Set up machine check reporting for processors with Intel style MCE: */

View File

@@ -614,14 +614,13 @@ static void unexpected_thermal_interrupt(void)
static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
asmlinkage __visible void __irq_entry smp_thermal_interrupt(struct pt_regs *regs)
DEFINE_IDTENTRY_SYSVEC(sysvec_thermal)
{
entering_irq();
trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
inc_irq_stat(irq_thermal_count);
smp_thermal_vector();
trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
exiting_ack_irq();
ack_APIC_irq();
}
/* Thermal monitoring depends on APIC, ACPI and clock modulation */

View File

@@ -21,12 +21,11 @@ static void default_threshold_interrupt(void)
void (*mce_threshold_vector)(void) = default_threshold_interrupt;
asmlinkage __visible void __irq_entry smp_threshold_interrupt(struct pt_regs *regs)
DEFINE_IDTENTRY_SYSVEC(sysvec_threshold)
{
entering_irq();
trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
inc_irq_stat(irq_threshold_count);
mce_threshold_vector();
trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
exiting_ack_irq();
ack_APIC_irq();
}

View File

@@ -6,6 +6,7 @@
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/hardirq.h>
#include <asm/processor.h>
#include <asm/traps.h>
@@ -16,14 +17,12 @@
#include "internal.h"
/* Machine check handler for WinChip C6: */
static void winchip_machine_check(struct pt_regs *regs, long error_code)
static noinstr void winchip_machine_check(struct pt_regs *regs)
{
ist_enter(regs);
instrumentation_begin();
pr_emerg("CPU0: Machine Check Exception.\n");
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
ist_exit(regs);
instrumentation_end();
}
/* Set up machine check reporting on the Winchip C6 series */

View File

@@ -545,8 +545,7 @@ static int __wait_for_cpus(atomic_t *t, long long timeout)
/*
* Returns:
* < 0 - on error
* 0 - no update done
* 1 - microcode was updated
* 0 - success (no update done or microcode was updated)
*/
static int __reload_late(void *info)
{
@@ -573,11 +572,11 @@ static int __reload_late(void *info)
else
goto wait_for_siblings;
if (err > UCODE_NFOUND) {
pr_warn("Error reloading microcode on CPU %d\n", cpu);
if (err >= UCODE_NFOUND) {
if (err == UCODE_ERROR)
pr_warn("Error reloading microcode on CPU %d\n", cpu);
ret = -1;
} else if (err == UCODE_UPDATED || err == UCODE_OK) {
ret = 1;
}
wait_for_siblings:
@@ -608,7 +607,7 @@ static int microcode_reload_late(void)
atomic_set(&late_cpus_out, 0);
ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
if (ret > 0)
if (ret == 0)
microcode_check();
pr_info("Reload completed, microcode revision: 0x%x\n", boot_cpu_data.microcode);
@@ -649,7 +648,7 @@ static ssize_t reload_store(struct device *dev,
put:
put_online_cpus();
if (ret >= 0)
if (ret == 0)
ret = size;
return ret;

View File

@@ -23,6 +23,7 @@
#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
#include <asm/desc.h>
#include <asm/idtentry.h>
#include <asm/irq_regs.h>
#include <asm/i8259.h>
#include <asm/apic.h>
@@ -40,11 +41,10 @@ static void (*hv_stimer0_handler)(void);
static void (*hv_kexec_handler)(void);
static void (*hv_crash_handler)(struct pt_regs *regs);
__visible void __irq_entry hyperv_vector_handler(struct pt_regs *regs)
DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
{
struct pt_regs *old_regs = set_irq_regs(regs);
entering_irq();
inc_irq_stat(irq_hv_callback_count);
if (vmbus_handler)
vmbus_handler();
@@ -52,7 +52,6 @@ __visible void __irq_entry hyperv_vector_handler(struct pt_regs *regs)
if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
ack_APIC_irq();
exiting_irq();
set_irq_regs(old_regs);
}
@@ -73,19 +72,16 @@ EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq);
* Routines to do per-architecture handling of stimer0
* interrupts when in Direct Mode
*/
__visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs)
DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0)
{
struct pt_regs *old_regs = set_irq_regs(regs);
entering_irq();
inc_irq_stat(hyperv_stimer0_count);
if (hv_stimer0_handler)
hv_stimer0_handler();
add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0);
ack_APIC_irq();
exiting_irq();
set_irq_regs(old_regs);
}
@@ -227,8 +223,8 @@ static void __init ms_hyperv_init_platform(void)
ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
pr_info("Hyper-V: features 0x%x, hints 0x%x\n",
ms_hyperv.features, ms_hyperv.hints);
pr_info("Hyper-V: features 0x%x, hints 0x%x, misc 0x%x\n",
ms_hyperv.features, ms_hyperv.hints, ms_hyperv.misc_features);
ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS);
ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS);
@@ -263,6 +259,16 @@ static void __init ms_hyperv_init_platform(void)
cpuid_eax(HYPERV_CPUID_NESTED_FEATURES);
}
/*
* Hyper-V expects to get crash register data or kmsg when
* crash enlightment is available and system crashes. Set
* crash_kexec_post_notifiers to be true to make sure that
* calling crash enlightment interface before running kdump
* kernel.
*/
if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE)
crash_kexec_post_notifiers = true;
#ifdef CONFIG_X86_LOCAL_APIC
if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
@@ -321,17 +327,19 @@ static void __init ms_hyperv_init_platform(void)
x86_platform.apic_post_init = hyperv_init;
hyperv_setup_mmu_ops();
/* Setup the IDT for hypervisor callback */
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_hyperv_callback);
/* Setup the IDT for reenlightenment notifications */
if (ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT)
if (ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT) {
alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR,
hyperv_reenlightenment_vector);
asm_sysvec_hyperv_reenlightenment);
}
/* Setup the IDT for stimer0 */
if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE)
if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE) {
alloc_intr_gate(HYPERV_STIMER0_VECTOR,
hv_stimer0_callback_vector);
asm_sysvec_hyperv_stimer0);
}
# ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;

View File

@@ -761,7 +761,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
__flush_tlb();
flush_tlb_local();
/* Save MTRR state */
rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
@@ -778,7 +778,7 @@ static void post_set(void) __releases(set_atomicity_lock)
{
/* Flush TLBs (no need to flush caches - they are disabled) */
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
__flush_tlb();
flush_tlb_local();
/* Intel (P6) standard MTRRs */
mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);

View File

@@ -63,6 +63,10 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
case 15:
return msr - MSR_P4_BPU_PERFCTR0;
}
fallthrough;
case X86_VENDOR_ZHAOXIN:
case X86_VENDOR_CENTAUR:
return msr - MSR_ARCH_PERFMON_PERFCTR0;
}
return 0;
}
@@ -92,6 +96,10 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
case 15:
return msr - MSR_P4_BSU_ESCR0;
}
fallthrough;
case X86_VENDOR_ZHAOXIN:
case X86_VENDOR_CENTAUR:
return msr - MSR_ARCH_PERFMON_EVENTSEL0;
}
return 0;

View File

@@ -22,7 +22,7 @@
#include <linux/cpuhotplug.h>
#include <asm/intel-family.h>
#include <asm/resctrl_sched.h>
#include <asm/resctrl.h>
#include "internal.h"
/* Mutex to protect rdtgroup access. */
@@ -578,6 +578,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
d->id = id;
cpumask_set_cpu(cpu, &d->cpu_mask);
rdt_domain_reconfigure_cdp(r);
if (r->alloc_capable && domain_setup_ctrlval(r, d)) {
kfree(d);
return;
@@ -956,6 +958,36 @@ static __init void rdt_init_res_defs(void)
static enum cpuhp_state rdt_online;
/* Runs once on the BSP during boot. */
void resctrl_cpu_detect(struct cpuinfo_x86 *c)
{
if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
c->x86_cache_max_rmid = -1;
c->x86_cache_occ_scale = -1;
c->x86_cache_mbm_width_offset = -1;
return;
}
/* will be overridden if occupancy monitoring exists */
c->x86_cache_max_rmid = cpuid_ebx(0xf);
if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
u32 eax, ebx, ecx, edx;
/* QoS sub-leaf, EAX=0Fh, ECX=1 */
cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
c->x86_cache_max_rmid = ecx;
c->x86_cache_occ_scale = ebx;
if (c->x86_vendor == X86_VENDOR_INTEL)
c->x86_cache_mbm_width_offset = eax & 0xff;
else
c->x86_cache_mbm_width_offset = -1;
}
}
static int __init resctrl_late_init(void)
{
struct rdt_resource *r;

View File

@@ -495,14 +495,16 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
return ret;
}
void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
struct rdtgroup *rdtgrp, int evtid, int first)
void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
struct rdt_domain *d, struct rdtgroup *rdtgrp,
int evtid, int first)
{
/*
* setup the parameters to send to the IPI to read the data.
*/
rr->rgrp = rdtgrp;
rr->evtid = evtid;
rr->r = r;
rr->d = d;
rr->val = 0;
rr->first = first;
@@ -539,7 +541,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
goto out;
}
mon_event_read(&rr, d, rdtgrp, evtid, false);
mon_event_read(&rr, r, d, rdtgrp, evtid, false);
if (rr.val & RMID_VAL_ERROR)
seq_puts(m, "Error\n");

View File

@@ -31,7 +31,7 @@
#define CQM_LIMBOCHECK_INTERVAL 1000
#define MBM_CNTR_WIDTH 24
#define MBM_CNTR_WIDTH_BASE 24
#define MBM_OVERFLOW_INTERVAL 1000
#define MAX_MBA_BW 100u
#define MBA_IS_LINEAR 0x4
@@ -40,6 +40,12 @@
#define RMID_VAL_ERROR BIT_ULL(63)
#define RMID_VAL_UNAVAIL BIT_ULL(62)
/*
* With the above fields in use 62 bits remain in MSR_IA32_QM_CTR for
* data to be returned. The counter width is discovered from the hardware
* as an offset from MBM_CNTR_WIDTH_BASE.
*/
#define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE)
struct rdt_fs_context {
@@ -87,6 +93,7 @@ union mon_data_bits {
struct rmid_read {
struct rdtgroup *rgrp;
struct rdt_resource *r;
struct rdt_domain *d;
int evtid;
bool first;
@@ -460,6 +467,7 @@ struct rdt_resource {
struct list_head evt_list;
int num_rmid;
unsigned int mon_scale;
unsigned int mbm_width;
unsigned long fflags;
};
@@ -587,8 +595,9 @@ void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
unsigned int dom_id);
void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
struct rdt_domain *d);
void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
struct rdtgroup *rdtgrp, int evtid, int first);
void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
struct rdt_domain *d, struct rdtgroup *rdtgrp,
int evtid, int first);
void mbm_setup_overflow_handler(struct rdt_domain *dom,
unsigned long delay_ms);
void mbm_handle_overflow(struct work_struct *work);
@@ -601,5 +610,6 @@ bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
void __check_limbo(struct rdt_domain *d, bool force_free);
bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r);
bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r);
void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
#endif /* _ASM_X86_RESCTRL_INTERNAL_H */

View File

@@ -214,9 +214,9 @@ void free_rmid(u32 rmid)
list_add_tail(&entry->list, &rmid_free_lru);
}
static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr)
static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
{
u64 shift = 64 - MBM_CNTR_WIDTH, chunks;
u64 shift = 64 - width, chunks;
chunks = (cur_msr << shift) - (prev_msr << shift);
return chunks >>= shift;
@@ -256,7 +256,7 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
return 0;
}
chunks = mbm_overflow_count(m->prev_msr, tval);
chunks = mbm_overflow_count(m->prev_msr, tval, rr->r->mbm_width);
m->chunks += chunks;
m->prev_msr = tval;
@@ -278,7 +278,7 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
return;
chunks = mbm_overflow_count(m->prev_bw_msr, tval);
chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width);
m->chunks_bw += chunks;
m->chunks = m->chunks_bw;
cur_bw = (chunks * r->mon_scale) >> 20;
@@ -433,11 +433,12 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
}
}
static void mbm_update(struct rdt_domain *d, int rmid)
static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
{
struct rmid_read rr;
rr.first = false;
rr.r = r;
rr.d = d;
/*
@@ -510,6 +511,7 @@ void mbm_handle_overflow(struct work_struct *work)
struct rdtgroup *prgrp, *crgrp;
int cpu = smp_processor_id();
struct list_head *head;
struct rdt_resource *r;
struct rdt_domain *d;
mutex_lock(&rdtgroup_mutex);
@@ -517,16 +519,18 @@ void mbm_handle_overflow(struct work_struct *work)
if (!static_branch_likely(&rdt_mon_enable_key))
goto out_unlock;
d = get_domain_from_cpu(cpu, &rdt_resources_all[RDT_RESOURCE_L3]);
r = &rdt_resources_all[RDT_RESOURCE_L3];
d = get_domain_from_cpu(cpu, r);
if (!d)
goto out_unlock;
list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
mbm_update(d, prgrp->mon.rmid);
mbm_update(r, d, prgrp->mon.rmid);
head = &prgrp->mon.crdtgrp_list;
list_for_each_entry(crgrp, head, mon.crdtgrp_list)
mbm_update(d, crgrp->mon.rmid);
mbm_update(r, d, crgrp->mon.rmid);
if (is_mba_sc(NULL))
update_mba_bw(prgrp, d);
@@ -614,11 +618,18 @@ static void l3_mon_evt_init(struct rdt_resource *r)
int rdt_get_mon_l3_config(struct rdt_resource *r)
{
unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
unsigned int cl_size = boot_cpu_data.x86_cache_size;
int ret;
r->mon_scale = boot_cpu_data.x86_cache_occ_scale;
r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
r->mbm_width = MBM_CNTR_WIDTH_BASE;
if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
r->mbm_width += mbm_offset;
else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX)
pr_warn("Ignoring impossible MBM counter offset\n");
/*
* A reasonable upper limit on the max threshold is the number

View File

@@ -24,7 +24,7 @@
#include <asm/cacheflush.h>
#include <asm/intel-family.h>
#include <asm/resctrl_sched.h>
#include <asm/resctrl.h>
#include <asm/perf_event.h>
#include "../../events/perf_event.h" /* For X86_CONFIG() */
@@ -1326,9 +1326,9 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
* pseudo-locked region will still be here on return.
*
* The mutex has to be released temporarily to avoid a potential
* deadlock with the mm->mmap_sem semaphore which is obtained in
* the device_create() and debugfs_create_dir() callpath below
* as well as before the mmap() callback is called.
* deadlock with the mm->mmap_lock which is obtained in the
* device_create() and debugfs_create_dir() callpath below as well as
* before the mmap() callback is called.
*/
mutex_unlock(&rdtgroup_mutex);

View File

@@ -29,7 +29,7 @@
#include <uapi/linux/magic.h>
#include <asm/resctrl_sched.h>
#include <asm/resctrl.h>
#include "internal.h"
DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
@@ -1859,6 +1859,19 @@ static int set_cache_qos_cfg(int level, bool enable)
return 0;
}
/* Restore the qos cfg state when a domain comes online */
void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
{
if (!r->alloc_capable)
return;
if (r == &rdt_resources_all[RDT_RESOURCE_L2DATA])
l2_qos_cfg_update(&r->alloc_enabled);
if (r == &rdt_resources_all[RDT_RESOURCE_L3DATA])
l3_qos_cfg_update(&r->alloc_enabled);
}
/*
* Enable or disable the MBA software controller
* which helps user specify bandwidth in MBps.
@@ -2459,7 +2472,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
goto out_destroy;
if (is_mbm_event(mevt->evtid))
mon_event_read(&rr, d, prgrp, mevt->evtid, true);
mon_event_read(&rr, r, d, prgrp, mevt->evtid, true);
}
kernfs_activate(kn);
return 0;
@@ -3072,7 +3085,8 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
* If the rdtgroup is a mon group and parent directory
* is a valid "mon_groups" directory, remove the mon group.
*/
if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn) {
if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn &&
rdtgrp != &rdtgroup_default) {
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
ret = rdtgroup_ctrl_remove(kn, rdtgrp);
@@ -3185,10 +3199,10 @@ int __init rdtgroup_init(void)
* during the debugfs directory creation also &sb->s_type->i_mutex_key
* (the lockdep class of inode->i_rwsem). Other filesystem
* interactions (eg. SyS_getdents) have the lock ordering:
* &sb->s_type->i_mutex_key --> &mm->mmap_sem
* During mmap(), called with &mm->mmap_sem, the rdtgroup_mutex
* &sb->s_type->i_mutex_key --> &mm->mmap_lock
* During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex
* is taken, thus creating dependency:
* &mm->mmap_sem --> rdtgroup_mutex for the latter that can cause
* &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause
* issues considering the other two lock dependencies.
* By creating the debugfs directory here we avoid a dependency
* that may cause deadlock (even though file operations cannot