下面进行 schedule()函数的简单分析!

此时我们仍然在进程 0 中,由进程 0 调用 schedule() 进行进程 0 和进程 1 间的切换!

asmlinkage void __sched schedule(void)
{
    long *switch_count;

    task_t *prev, *next;

    runqueue_t *rq;

    prio_array_t *array;

    struct list_head *queue;

    unsigned long long now;

    unsigned long run_time;

    int cpu, idx;

    /*
     * Test if we are atomic.  Since do_exit() needs to call into
     * schedule() atomically, we ignore that path for now.
     * Otherwise, whine if we are scheduling when we should not be.
     */

    if (likely(!current->exit_state)) {
        if (unlikely(in_atomic())) {
            printk(KERN_ERR "scheduling while atomic: "
                "%s/0x%08x/%d\n",
                current->comm, preempt_count(), current->pid);

            dump_stack();
        }
    }

    profile_hit(SCHED_PROFILING, __builtin_return_address(0));

need_resched:

    preempt_disable();
    禁止内核抢占

    prev = current;
    将进程 0 的进程描述符指针赋给 prev

    release_kernel_lock(prev);
    释放大内核锁

need_resched_nonpreemptible:

    rq = this_rq();
    获得当前cpu运行队列

    /*
     * The idle thread is not allowed to schedule!
     * Remove this check after it has been exercised a bit.
     */
    if (unlikely(prev == rq->idle) && prev->state != TASK_RUNNING) {
    如果当前运行进程为进程 0 并且运行状态不是 TASK_RUNNING

        printk(KERN_ERR "bad: scheduling from the idle thread!\n");

        dump_stack();
    }

    schedstat_inc(rq, sched_cnt);

    rq->sched_cnt++

    now = sched_clock();
    返回当前时间 ns 级

    if (likely((long long)(now - prev->timestamp) < NS_MAX_SLEEP_AVG)) {
    若当前系统时间减去进程最近插入运行队列时间小于小于平均睡眠时间

        run_time = now - prev->timestamp;   

        if (unlikely((long long)(now - prev->timestamp) < 0))
            run_time = 0;
    } else

        run_time = NS_MAX_SLEEP_AVG; 

    /*
     * Tasks charged proportionately less run_time at high sleep_avg to
     * delay them losing their interactive status
     */

    run_time /= (CURRENT_BONUS(prev) ? : 1);
    CURRENT_BONUS 返回 0-10 之间的值,它与进程的平均睡眠时间是成比例的!
    run_time 用于限制进程对CPU的使用!

    spin_lock_irq(&rq->lock); 

    if (unlikely(prev->flags & PF_DEAD))
        prev->state = EXIT_DEAD;
    判断进程是否是正在被终止的进程

    switch_count = &prev->nivcsw;
    上下文切换数

    if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
    如果当前进程运行状态不是TASK_RUNNING 并且没有在内核态被抢占

        switch_count = &prev->nvcsw;

        if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
                unlikely(signal_pending(prev))))
       若进程为非阻塞挂起信号,而且状态为TASK_INTERRUPTIBLE

            prev->state = TASK_RUNNING;

        else {

            否则若当前进程运行状态为 TASK_UNINTERRUPTIBLE 从cpu运行队列中删除当前进程,当前进程编程睡眠状态,等待由某个进程调用 wake_up 将其唤醒并插入到cpu运行队列中!

            if (prev->state == TASK_UNINTERRUPTIBLE)
                rq->nr_uninterruptible++;

            deactivate_task(prev, rq);

        }

    }

    cpu = smp_processor_id();

    if (unlikely(!rq->nr_running)) {
    若cpu运行队列中包含可运行进程数为 0

go_idle:

        idle_balance(cpu, rq);

        if (!rq->nr_running) {
            next = rq->idle;

            rq->expired_timestamp = 0;

            wake_sleeping_dependent(cpu, rq);

            /*
             * wake_sleeping_dependent() might have released
             * the runqueue, so break out if we got new
             * tasks meanwhile:
             */

            if (!rq->nr_running)
                goto switch_tasks;

        }

    } else {

        if (dependent_sleeper(cpu, rq)) {
        由于没有选中内核选项 CONFIG_SCHED_SMT 所以此函数直接返回0

            next = rq->idle;

            goto switch_tasks;
        }

        /*
         * dependent_sleeper() releases and reacquires the runqueue
         * lock, hence go into the idle loop if the rq went
         * empty meanwhile:
         */

        if (unlikely(!rq->nr_running))
            goto go_idle;
    }

    array = rq->active;
    指向cpu运行队列活动优先级数组

    if (unlikely(!array->nr_active)) {
    若活动运行数组中可运行进程数位0,在创建进程 1 的过程中已经 ++

        /*
         * Switch the active and expired arrays.
         */

        schedstat_inc(rq, sched_switch);

        rq->active = rq->expired;

        rq->expired = array;

        array = rq->active;

        rq->expired_timestamp = 0;

        rq->best_expired_prio = MAX_PRIO;

    }

 

    idx = sched_find_first_bit(array->bitmap);
    从活动优先级数组的优先级位图中找到第一个不为 0 的bit 位序号

    queue = array->queue + idx;
    指向对应优先级相同的进程链表

    next = list_entry(queue->next, task_t, run_list);
    算出要被切换到的进程的进程描述符地址

    if (!rt_task(next) && next->activated > 0) {
        unsigned long long delta = now - next->timestamp;

        if (unlikely((long long)(now - next->timestamp) < 0))
            delta = 0;

        if (next->activated == 1)
            delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128;

        array = next->array;

        dequeue_task(next, array);

        recalc_task_prio(next, next->timestamp + delta);

        enqueue_task(next, array);

    }

    next->activated = 0;
    进程被唤醒时所使用的条件代码

switch_tasks:

    if (next == rq->idle)
        schedstat_inc(rq, sched_goidle);

    prefetch(next);
    提示cpu控制单元把next的进程描述符的第一部分内容装入到硬件高速缓存

    clear_tsk_need_resched(prev);
    清除当前进程(进程 0)的 TIF_NEED_RESCHED 标志

    rcu_qsctr_inc(task_cpu(prev)); 

    update_cpu_clock(prev, rq, now);
    更新当前进程 sched_time 字段

    prev->sleep_avg -= run_time;
    减少平均睡眠时间

    if ((long)prev->sleep_avg <= 0)
        prev->sleep_avg = 0;

    prev->timestamp = prev->last_ran = now;
    更新时间戳

    sched_info_switch(prev, next);
    更新cpu运行队列的字段

    if (likely(prev != next)) {
        next->timestamp = now;

        rq->nr_switches++;
        更新当前处理器上进行进程切换的数目

        rq->curr = next;
        更新运行队列当前运行进程为进程 1

        ++*switch_count;
        更新当前进程nivcsw字段

        prepare_arch_switch(rq, next);

        prev = context_switch(rq, prev, next);
        进程上下文切换

        barrier();

        finish_task_switch(prev);      

    } else

      此时表示当前 cpu 运行队列中没有优先级较高或相等的其他活动进程

        spin_unlock_irq(&rq->lock);

    prev = current;

    if (unlikely(reacquire_kernel_lock(prev) < 0))
        goto need_resched_nonpreemptible;

    preempt_enable_no_resched();

    if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
        goto need_resched;
     检查是否一些其他进程设置了当前进程的 TIF_NEED_RESCHED 标志。若设置了,则重新调度

}
#define NS_MAX_SLEEP_AVG        (JIFFIES_TO_NS(MAX_SLEEP_AVG))</pre>
<p></p>


schedule() 分析完毕!其中几个重要的函数(context_switch())以后会分析。