From 7980ad7e4ca80f6c255f4473fba82a475342035a Mon Sep 17 00:00:00 2001
From: Andrea Righi <arighi@nvidia.com>
Date: Tue, 1 Jul 2025 08:08:45 +0200
Subject: [PATCH 1/3] selftests/sched_ext: Fix exit selftest hang on UP

On single-CPU systems, ops.select_cpu() is never called, causing the
EXIT_SELECT_CPU test case to wait indefinitely.

Avoid the stall by skipping this specific sub-test when only one CPU is
available.

Reported-by: Phil Auld <pauld@redhat.com>
Fixes: a5db7817af780 ("sched_ext: Add selftests")
Signed-off-by: Andrea Righi <arighi@nvidia.com>
Reviewed-by: Phil Auld <pauld@redhat.com>
Tested-by: Phil Auld <pauld@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 tools/testing/selftests/sched_ext/exit.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tools/testing/selftests/sched_ext/exit.c b/tools/testing/selftests/sched_ext/exit.c
index 9451782689de..ee25824b1cbe 100644
--- a/tools/testing/selftests/sched_ext/exit.c
+++ b/tools/testing/selftests/sched_ext/exit.c
@@ -22,6 +22,14 @@ static enum scx_test_status run(void *ctx)
 		struct bpf_link *link;
 		char buf[16];
 
+		/*
+		 * On single-CPU systems, ops.select_cpu() is never
+		 * invoked, so skip this test to avoid getting stuck
+		 * indefinitely.
+		 */
+		if (tc == EXIT_SELECT_CPU && libbpf_num_possible_cpus() == 1)
+			continue;
+
 		skel = exit__open();
 		SCX_ENUM_INIT(skel);
 		skel->rodata->exit_point = tc;

From e14fd98c6d66cb76694b12c05768e4f9e8c95664 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Wed, 16 Jul 2025 10:38:48 -0700
Subject: [PATCH 2/3] sched/ext: Prevent update_locked_rq() calls with NULL rq

Avoid invoking update_locked_rq() when the runqueue (rq) pointer is NULL
in the SCX_CALL_OP and SCX_CALL_OP_RET macros.

Previously, calling update_locked_rq(NULL) with preemption enabled could
trigger the following warning:

    BUG: using __this_cpu_write() in preemptible [00000000]

This happens because __this_cpu_write() is unsafe to use in preemptible
context.

rq is NULL when an ops invoked from an unlocked context. In such cases, we
don't need to store any rq, since the value should already be NULL
(unlocked). Ensure that update_locked_rq() is only called when rq is
non-NULL, preventing calling __this_cpu_write() on preemptible context.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Fixes: 18853ba782bef ("sched_ext: Track currently locked rq")
Signed-off-by: Breno Leitao <leitao@debian.org>
Acked-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: stable@vger.kernel.org # v6.15
---
 kernel/sched/ext.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index b498d867ba21..7dd5cbcb7a06 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -1272,7 +1272,8 @@ static inline struct rq *scx_locked_rq(void)
 
 #define SCX_CALL_OP(sch, mask, op, rq, args...)					\
 do {										\
-	update_locked_rq(rq);							\
+	if (rq)									\
+		update_locked_rq(rq);						\
 	if (mask) {								\
 		scx_kf_allow(mask);						\
 		(sch)->ops.op(args);						\
@@ -1280,14 +1281,16 @@ do {										\
 	} else {								\
 		(sch)->ops.op(args);						\
 	}									\
-	update_locked_rq(NULL);							\
+	if (rq)									\
+		update_locked_rq(NULL);						\
 } while (0)
 
 #define SCX_CALL_OP_RET(sch, mask, op, rq, args...)				\
 ({										\
 	__typeof__((sch)->ops.op(args)) __ret;					\
 										\
-	update_locked_rq(rq);							\
+	if (rq)									\
+		update_locked_rq(rq);						\
 	if (mask) {								\
 		scx_kf_allow(mask);						\
 		__ret = (sch)->ops.op(args);					\
@@ -1295,7 +1298,8 @@ do {										\
 	} else {								\
 		__ret = (sch)->ops.op(args);					\
 	}									\
-	update_locked_rq(NULL);							\
+	if (rq)									\
+		update_locked_rq(NULL);						\
 	__ret;									\
 })
 

From 06efc9fe0b8deeb83b47fd7c5451fe1a60c8a761 Mon Sep 17 00:00:00 2001
From: Andrea Righi <arighi@nvidia.com>
Date: Sat, 5 Jul 2025 07:43:51 +0200
Subject: [PATCH 3/3] sched_ext: idle: Handle migration-disabled tasks in idle
 selection

When SCX_OPS_ENQ_MIGRATION_DISABLED is enabled, migration-disabled tasks
are also routed to ops.enqueue(). A scheduler may attempt to dispatch
such tasks directly to an idle CPU using the default idle selection
policy via scx_bpf_select_cpu_and() or scx_bpf_select_cpu_dfl().

This scenario must be properly handled by the built-in idle policy to
avoid returning an idle CPU where the target task isn't allowed to run.
Otherwise, it can lead to errors such as:

 EXIT: runtime error (SCX_DSQ_LOCAL[_ON] cannot move migration disabled Chrome_ChildIOT[291646] from CPU 3 to 14)

Prevent this by explicitly handling migration-disabled tasks in the
built-in idle selection logic, maintaining their CPU affinity.

Fixes: a730e3f7a48bc ("sched_ext: idle: Consolidate default idle CPU selection kfuncs")
Signed-off-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/sched/ext_idle.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index 6d29d3cbc670..001fb88a8481 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -903,7 +903,7 @@ s32 select_cpu_from_kfunc(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
 	 * selection optimizations and simply check whether the previously
 	 * used CPU is idle and within the allowed cpumask.
 	 */
-	if (p->nr_cpus_allowed == 1) {
+	if (p->nr_cpus_allowed == 1 || is_migration_disabled(p)) {
 		if (cpumask_test_cpu(prev_cpu, allowed ?: p->cpus_ptr) &&
 		    scx_idle_test_and_clear_cpu(prev_cpu))
 			cpu = prev_cpu;