mirror of
				https://kernel.googlesource.com/pub/scm/linux/kernel/git/torvalds/linux
				synced 2025-10-31 17:04:14 +10:00 
			
		
		
		
	Dimitri Sivanich noticed that xtime_lock is held write locked across calc_load() which iterates over all online CPUs. That can cause long latencies for xtime_lock readers on large SMP systems. The load average calculation is an rough estimate anyway so there is no real need to protect the readers vs. the update. It's not a problem when the avenrun array is updated while a reader copies the values. Instead of iterating over all online CPUs let the scheduler_tick code update the number of active tasks shortly before the avenrun update happens. The avenrun update itself is handled by the CPU which calls do_timer(). [ Impact: reduce xtime_lock write locked section ] Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Peter Zijlstra <peterz@infradead.org>
		
			
				
	
	
		
			130 lines
		
	
	
		
			3.0 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			130 lines
		
	
	
		
			3.0 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * idle-task scheduling class.
 | |
|  *
 | |
|  * (NOTE: these are not related to SCHED_IDLE tasks which are
 | |
|  *  handled in sched_fair.c)
 | |
|  */
 | |
| 
 | |
| #ifdef CONFIG_SMP
 | |
| static int select_task_rq_idle(struct task_struct *p, int sync)
 | |
| {
 | |
| 	return task_cpu(p); /* IDLE tasks as never migrated */
 | |
| }
 | |
| #endif /* CONFIG_SMP */
 | |
| /*
 | |
|  * Idle tasks are unconditionally rescheduled:
 | |
|  */
 | |
| static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sync)
 | |
| {
 | |
| 	resched_task(rq->idle);
 | |
| }
 | |
| 
 | |
| static struct task_struct *pick_next_task_idle(struct rq *rq)
 | |
| {
 | |
| 	schedstat_inc(rq, sched_goidle);
 | |
| 	/* adjust the active tasks as we might go into a long sleep */
 | |
| 	calc_load_account_active(rq);
 | |
| 	return rq->idle;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * It is not legal to sleep in the idle task - print a warning
 | |
|  * message if some code attempts to do it:
 | |
|  */
 | |
| static void
 | |
| dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep)
 | |
| {
 | |
| 	spin_unlock_irq(&rq->lock);
 | |
| 	printk(KERN_ERR "bad: scheduling from the idle thread!\n");
 | |
| 	dump_stack();
 | |
| 	spin_lock_irq(&rq->lock);
 | |
| }
 | |
| 
 | |
| static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
 | |
| {
 | |
| }
 | |
| 
 | |
| #ifdef CONFIG_SMP
 | |
| static unsigned long
 | |
| load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
 | |
| 		  unsigned long max_load_move,
 | |
| 		  struct sched_domain *sd, enum cpu_idle_type idle,
 | |
| 		  int *all_pinned, int *this_best_prio)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static int
 | |
| move_one_task_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
 | |
| 		   struct sched_domain *sd, enum cpu_idle_type idle)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued)
 | |
| {
 | |
| }
 | |
| 
 | |
| static void set_curr_task_idle(struct rq *rq)
 | |
| {
 | |
| }
 | |
| 
 | |
| static void switched_to_idle(struct rq *rq, struct task_struct *p,
 | |
| 			     int running)
 | |
| {
 | |
| 	/* Can this actually happen?? */
 | |
| 	if (running)
 | |
| 		resched_task(rq->curr);
 | |
| 	else
 | |
| 		check_preempt_curr(rq, p, 0);
 | |
| }
 | |
| 
 | |
| static void prio_changed_idle(struct rq *rq, struct task_struct *p,
 | |
| 			      int oldprio, int running)
 | |
| {
 | |
| 	/* This can happen for hot plug CPUS */
 | |
| 
 | |
| 	/*
 | |
| 	 * Reschedule if we are currently running on this runqueue and
 | |
| 	 * our priority decreased, or if we are not currently running on
 | |
| 	 * this runqueue and our priority is higher than the current's
 | |
| 	 */
 | |
| 	if (running) {
 | |
| 		if (p->prio > oldprio)
 | |
| 			resched_task(rq->curr);
 | |
| 	} else
 | |
| 		check_preempt_curr(rq, p, 0);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Simple, special scheduling class for the per-CPU idle tasks:
 | |
|  */
 | |
| static const struct sched_class idle_sched_class = {
 | |
| 	/* .next is NULL */
 | |
| 	/* no enqueue/yield_task for idle tasks */
 | |
| 
 | |
| 	/* dequeue is not valid, we print a debug message there: */
 | |
| 	.dequeue_task		= dequeue_task_idle,
 | |
| 
 | |
| 	.check_preempt_curr	= check_preempt_curr_idle,
 | |
| 
 | |
| 	.pick_next_task		= pick_next_task_idle,
 | |
| 	.put_prev_task		= put_prev_task_idle,
 | |
| 
 | |
| #ifdef CONFIG_SMP
 | |
| 	.select_task_rq		= select_task_rq_idle,
 | |
| 
 | |
| 	.load_balance		= load_balance_idle,
 | |
| 	.move_one_task		= move_one_task_idle,
 | |
| #endif
 | |
| 
 | |
| 	.set_curr_task          = set_curr_task_idle,
 | |
| 	.task_tick		= task_tick_idle,
 | |
| 
 | |
| 	.prio_changed		= prio_changed_idle,
 | |
| 	.switched_to		= switched_to_idle,
 | |
| 
 | |
| 	/* no .task_new for idle tasks */
 | |
| };
 |