powerpc/smp: Use existing L2 cache_map cpumask to find L3 cache siblings
On POWER10 systems, the "ibm,thread-groups" property "2" indicates the cpus in thread-group share both L2 and L3 caches. Hence, use cache_property = 2 itself to find both the L2 and L3 cache siblings. Hence, create a new thread_group_l3_cache_map to keep list of L3 siblings, but fill the mask using same property "2" array. Signed-off-by: Parth Shah <parth@linux.ibm.com> Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20210728175607.591679-4-parth@linux.ibm.com
This commit is contained in:
committed by
Michael Ellerman
parent
69aa8e0785
commit
e9ef81e107
@@ -35,6 +35,7 @@ extern int *chip_id_lookup_table;
|
|||||||
|
|
||||||
DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
|
DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
|
||||||
DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
|
DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
|
||||||
|
DECLARE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
|
|
||||||
@@ -144,6 +145,7 @@ extern int cpu_to_core_id(int cpu);
|
|||||||
|
|
||||||
extern bool has_big_cores;
|
extern bool has_big_cores;
|
||||||
extern bool thread_group_shares_l2;
|
extern bool thread_group_shares_l2;
|
||||||
|
extern bool thread_group_shares_l3;
|
||||||
|
|
||||||
#define cpu_smt_mask cpu_smt_mask
|
#define cpu_smt_mask cpu_smt_mask
|
||||||
#ifdef CONFIG_SCHED_SMT
|
#ifdef CONFIG_SCHED_SMT
|
||||||
@@ -198,6 +200,7 @@ extern void __cpu_die(unsigned int cpu);
|
|||||||
#define hard_smp_processor_id() get_hard_smp_processor_id(0)
|
#define hard_smp_processor_id() get_hard_smp_processor_id(0)
|
||||||
#define smp_setup_cpu_maps()
|
#define smp_setup_cpu_maps()
|
||||||
#define thread_group_shares_l2 0
|
#define thread_group_shares_l2 0
|
||||||
|
#define thread_group_shares_l3 0
|
||||||
static inline void inhibit_secondary_onlining(void) {}
|
static inline void inhibit_secondary_onlining(void) {}
|
||||||
static inline void uninhibit_secondary_onlining(void) {}
|
static inline void uninhibit_secondary_onlining(void) {}
|
||||||
static inline const struct cpumask *cpu_sibling_mask(int cpu)
|
static inline const struct cpumask *cpu_sibling_mask(int cpu)
|
||||||
|
|||||||
@@ -469,6 +469,9 @@ static int get_group_id(unsigned int cpu_id, int level)
|
|||||||
else if (thread_group_shares_l2 && level == 2)
|
else if (thread_group_shares_l2 && level == 2)
|
||||||
return cpumask_first(per_cpu(thread_group_l2_cache_map,
|
return cpumask_first(per_cpu(thread_group_l2_cache_map,
|
||||||
cpu_id));
|
cpu_id));
|
||||||
|
else if (thread_group_shares_l3 && level == 3)
|
||||||
|
return cpumask_first(per_cpu(thread_group_l3_cache_map,
|
||||||
|
cpu_id));
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -78,6 +78,7 @@ struct task_struct *secondary_current;
|
|||||||
bool has_big_cores;
|
bool has_big_cores;
|
||||||
bool coregroup_enabled;
|
bool coregroup_enabled;
|
||||||
bool thread_group_shares_l2;
|
bool thread_group_shares_l2;
|
||||||
|
bool thread_group_shares_l3;
|
||||||
|
|
||||||
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
|
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
|
||||||
DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
|
DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
|
||||||
@@ -101,7 +102,7 @@ enum {
|
|||||||
|
|
||||||
#define MAX_THREAD_LIST_SIZE 8
|
#define MAX_THREAD_LIST_SIZE 8
|
||||||
#define THREAD_GROUP_SHARE_L1 1
|
#define THREAD_GROUP_SHARE_L1 1
|
||||||
#define THREAD_GROUP_SHARE_L2 2
|
#define THREAD_GROUP_SHARE_L2_L3 2
|
||||||
struct thread_groups {
|
struct thread_groups {
|
||||||
unsigned int property;
|
unsigned int property;
|
||||||
unsigned int nr_groups;
|
unsigned int nr_groups;
|
||||||
@@ -131,6 +132,12 @@ DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
|
|||||||
*/
|
*/
|
||||||
DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
|
DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On P10, thread_group_l3_cache_map for each CPU is equal to the
|
||||||
|
* thread_group_l2_cache_map
|
||||||
|
*/
|
||||||
|
DEFINE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
|
||||||
|
|
||||||
/* SMP operations for this machine */
|
/* SMP operations for this machine */
|
||||||
struct smp_ops_t *smp_ops;
|
struct smp_ops_t *smp_ops;
|
||||||
|
|
||||||
@@ -889,33 +896,10 @@ out:
|
|||||||
return tg;
|
return tg;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __init init_thread_group_cache_map(int cpu, int cache_property)
|
static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start)
|
||||||
|
|
||||||
{
|
{
|
||||||
int first_thread = cpu_first_thread_sibling(cpu);
|
int first_thread = cpu_first_thread_sibling(cpu);
|
||||||
int i, cpu_group_start = -1, err = 0;
|
int i;
|
||||||
struct thread_groups *tg = NULL;
|
|
||||||
cpumask_var_t *mask = NULL;
|
|
||||||
|
|
||||||
if (cache_property != THREAD_GROUP_SHARE_L1 &&
|
|
||||||
cache_property != THREAD_GROUP_SHARE_L2)
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
tg = get_thread_groups(cpu, cache_property, &err);
|
|
||||||
if (!tg)
|
|
||||||
return err;
|
|
||||||
|
|
||||||
cpu_group_start = get_cpu_thread_group_start(cpu, tg);
|
|
||||||
|
|
||||||
if (unlikely(cpu_group_start == -1)) {
|
|
||||||
WARN_ON_ONCE(1);
|
|
||||||
return -ENODATA;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cache_property == THREAD_GROUP_SHARE_L1)
|
|
||||||
mask = &per_cpu(thread_group_l1_cache_map, cpu);
|
|
||||||
else if (cache_property == THREAD_GROUP_SHARE_L2)
|
|
||||||
mask = &per_cpu(thread_group_l2_cache_map, cpu);
|
|
||||||
|
|
||||||
zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
|
zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
|
||||||
|
|
||||||
@@ -934,6 +918,44 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int __init init_thread_group_cache_map(int cpu, int cache_property)
|
||||||
|
|
||||||
|
{
|
||||||
|
int cpu_group_start = -1, err = 0;
|
||||||
|
struct thread_groups *tg = NULL;
|
||||||
|
cpumask_var_t *mask = NULL;
|
||||||
|
|
||||||
|
if (cache_property != THREAD_GROUP_SHARE_L1 &&
|
||||||
|
cache_property != THREAD_GROUP_SHARE_L2_L3)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
tg = get_thread_groups(cpu, cache_property, &err);
|
||||||
|
|
||||||
|
if (!tg)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
cpu_group_start = get_cpu_thread_group_start(cpu, tg);
|
||||||
|
|
||||||
|
if (unlikely(cpu_group_start == -1)) {
|
||||||
|
WARN_ON_ONCE(1);
|
||||||
|
return -ENODATA;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cache_property == THREAD_GROUP_SHARE_L1) {
|
||||||
|
mask = &per_cpu(thread_group_l1_cache_map, cpu);
|
||||||
|
update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
|
||||||
|
}
|
||||||
|
else if (cache_property == THREAD_GROUP_SHARE_L2_L3) {
|
||||||
|
mask = &per_cpu(thread_group_l2_cache_map, cpu);
|
||||||
|
update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
|
||||||
|
mask = &per_cpu(thread_group_l3_cache_map, cpu);
|
||||||
|
update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static bool shared_caches;
|
static bool shared_caches;
|
||||||
|
|
||||||
#ifdef CONFIG_SCHED_SMT
|
#ifdef CONFIG_SCHED_SMT
|
||||||
@@ -1020,14 +1042,16 @@ static int __init init_big_cores(void)
|
|||||||
has_big_cores = true;
|
has_big_cores = true;
|
||||||
|
|
||||||
for_each_possible_cpu(cpu) {
|
for_each_possible_cpu(cpu) {
|
||||||
int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2);
|
int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3);
|
||||||
|
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
thread_group_shares_l2 = true;
|
thread_group_shares_l2 = true;
|
||||||
pr_debug("L2 cache only shared by the threads in the small core\n");
|
thread_group_shares_l3 = true;
|
||||||
|
pr_debug("L2/L3 cache only shared by the threads in the small core\n");
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user