aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
authorGravatar Srikar Dronamraju <srikar@linux.vnet.ibm.com> 2023-12-14 23:37:11 +0530
committerGravatar Michael Ellerman <mpe@ellerman.id.au> 2023-12-15 13:51:34 +1100
commitaa80c6343fcf53cbc29f84ba9f89ca87d4e41350 (patch)
tree6e9f6dcbde0f272d3dcbb9e0e71337283e8146ae /arch/powerpc
parentLinux 6.7-rc2 (diff)
downloadlinux-aa80c6343fcf53cbc29f84ba9f89ca87d4e41350.tar.gz
linux-aa80c6343fcf53cbc29f84ba9f89ca87d4e41350.tar.bz2
linux-aa80c6343fcf53cbc29f84ba9f89ca87d4e41350.zip
powerpc/smp: Enable Asym packing for cores on shared processor
If there are shared processor LPARs, underlying Hypervisor can have more virtual cores to handle than actual physical cores. Starting with Power 9, a big core (aka SMT8 core) has 2 nearly independent thread groups. On a shared processors LPARs, it helps to pack threads to lesser number of cores so that the overall system performance and utilization improves. PowerVM schedules at a big core level. Hence packing to fewer cores helps. Since each thread-group is independent, running threads on both the thread-groups of a SMT8 core, should have a minimal adverse impact in non over provisioned scenarios. These changes in this patchset will not affect in the over provisioned scenario. If there are more threads than SMT domains, then asym_packing will not kick-in For example: Lets says there are two 8-core Shared LPARs that are actually sharing a 8 Core shared physical pool, each running 8 threads each. Then Consolidating 8 threads to 4 cores on each LPAR would help them to perform better. This is because each of the LPAR will get 100% time to run applications and there will no switching required by the Hypervisor. To achieve this, enable SD_ASYM_PACKING flag at CACHE, MC and DIE level when the system is running in shared processor mode and has big cores. Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20231214180720.310852-2-srikar@linux.vnet.ibm.com
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/kernel/smp.c25
1 files changed, 23 insertions, 2 deletions
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ab691c89d787..3fc8ad9646a4 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1004,6 +1004,13 @@ static int powerpc_smt_flags(void)
#endif
/*
+ * On shared processor LPARs scheduled on a big core (which has two or more
+ * independent thread groups per core), prefer lower numbered CPUs, so
+ * that workload consolidates to lesser number of cores.
+ */
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(splpar_asym_pack);
+
+/*
* P9 has a slightly odd architecture where pairs of cores share an L2 cache.
* This topology makes it *much* cheaper to migrate tasks between adjacent cores
* since the migrated task remains cache hot. We want to take advantage of this
@@ -1011,9 +1018,20 @@ static int powerpc_smt_flags(void)
*/
static int powerpc_shared_cache_flags(void)
{
+ if (static_branch_unlikely(&splpar_asym_pack))
+ return SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING;
+
return SD_SHARE_PKG_RESOURCES;
}
+static int powerpc_shared_proc_flags(void)
+{
+ if (static_branch_unlikely(&splpar_asym_pack))
+ return SD_ASYM_PACKING;
+
+ return 0;
+}
+
/*
* We can't just pass cpu_l2_cache_mask() directly because
* returns a non-const pointer and the compiler barfs on that.
@@ -1050,8 +1068,8 @@ static struct sched_domain_topology_level powerpc_topology[] = {
{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
#endif
{ shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
- { cpu_mc_mask, SD_INIT_NAME(MC) },
- { cpu_cpu_mask, SD_INIT_NAME(PKG) },
+ { cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC) },
+ { cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(PKG) },
{ NULL, },
};
@@ -1686,6 +1704,9 @@ static void __init fixup_topology(void)
{
int i;
+ if (is_shared_processor() && has_big_cores)
+ static_branch_enable(&splpar_asym_pack);
+
#ifdef CONFIG_SCHED_SMT
if (has_big_cores) {
pr_info("Big cores detected but using small core scheduling\n");