话说上一回合已经完成了Global事件设备(hpet)的注册,现在来看下Local时钟事件的注册,就以 CPU0 为例.

在 kernel 初始化的末期,内核线程 1 中(还没有 up 其他 cpu),在 log 中出现了类似“CPU0: Intel(R) Xeon(TM) CPU 3.20GHz stepping 04”,发生了这样一个故事……

局部时钟事件设备的注册

`</p>

kernel_init()
  -> smp_prepare_cpus(setup_max_cpus)
     native_smp_prepare_cpus(64)
       -> x86_init.timers.setup_percpu_clockev()
          setup_boot_APIC_clock()
            -> setup_APIC_timer()
                 -> clockevents_register_device(levt)`

 

static void __cpuinit setup_APIC_timer(void)
{
        struct clock_event_device *levt = &__get_cpu_var(lapic_events);

        if (cpu_has(&current_cpu_data, X86_FEATURE_ARAT)) {
                lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
                /* Make LAPIC timer preferrable over percpu HPET */
                lapic_clockevent.rating = 150;
        }

        memcpy(levt, &lapic_clockevent, sizeof(*levt));
        levt->cpumask = cpumask_of(smp_processor_id());

        clockevents_register_device(levt);
        // 注册 CPU0 Local Clock Event Device
}

 

`</p>

 +----------------------------------------------------------------------------------+
 |      /*                                                                          |
 |       * The local apic timer can be used for any function which is CPU local.    |
 |       */                                                                         |
 |      static struct clock_event_device lapic_clockevent = {                       |
 |              .name           = "lapic",                                          |
 |              .features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT  |
 |                              | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,     |
 |              .shift          = 32,                                               |
 |              .set_mode       = lapic_timer_setup,                                |
 |              .set_next_event = lapic_next_event,                                 |
 |              .broadcast      = lapic_timer_broadcast,                            |
 |              .rating         = 100,                                              |
 |              .irq            = -1,                                               |
 |      };                                                                          |
 |                                                                                  |
 +----------------------------------------------------------------------------------+
` 

 

故事从这里就又开始了

void clockevents_register_device(struct clock_event_device *dev)
{
        // dev=&lapic_clockevent
 
        unsigned long flags;
 
        BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
        BUG_ON(!dev->cpumask);
        // 此时设备必须绑定到某个 CPU 上,此时还是 cpu0
 
        spin_lock_irqsave(&clockevents_lock, flags);
 
        list_add(&dev->list, &clockevent_devices);
        // 加入到 clockevent_devices 链表中,跟时钟源的注册很像,只要注意以下插入链表的方式就好
 
        clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
 
        clockevents_notify_released();
        // 当 Global Clock Event Device 注册完 hpet 和 cpu0 注册 lapic 之后会被调用.
 
        spin_unlock_irqrestore(&clockevents_lock, flags);
}

        ......

static int tick_check_new_device(struct clock_event_device *newdev)
{
        ......

        cpu = smp_processor_id();
        // 还是 CPU0
        if (!cpumask_test_cpu(cpu, newdev->cpumask))
                goto out_bc;
        // cpumask:            cpumask to indicate for which CPUs this device works

        td = &per_cpu(tick_cpu_device, cpu);
        // 还是 CPU0 的那个 struct tick_device 实例

        curdev = td->evtdev;
        // 注意此时 curdev 并不是 NULL 而是指向 hpet_clockevent

        ......

        if (curdev) {
                // curdev = &hpet_clockevent
                // newdev = &lapic_clockevent

                /*
                 * Prefer one shot capable devices !
                 */
                if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) &&
                    !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
                        goto out_bc;
                /*
                 * Check the rating
                 */
                if (curdev->rating >= newdev->rating)
                        goto out_bc;

                // lapic rating = 100,hpet rating = 50 

                // 此处需要说明一下:一个 CPU 的一个 tick_device 只能对应一个 clock event device ,但是此时由于 CPU0 的 tick_device 上已经有了用作 global event device 的 hpet,所以此时就应该确定一下要选哪个根据 clcok event device 是否支持 one-shot mode 和 rating 值,此时顺序执行。
        }

        /*
         * Replace the eventually existing device by the new
         * device. If the current device is the broadcast device, do
         * not give it back to the clockevents layer !
         */
        if (tick_is_broadcast_device(curdev)) {
                clockevents_shutdown(curdev);
                curdev = NULL;
        }

        clockevents_exchange_device(curdev, newdev);

        tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
        // 设置 lapic_clockevent 的 event_handler 为 tick_handle_periodic

        ......        

        return NOTIFY_STOP;

out_bc:
        /*
         * Can the new device be used as a broadcast device ?
         */
        if (tick_check_broadcast_device(newdev))
                ret = NOTIFY_STOP;

        spin_unlock_irqrestore(&tick_device_lock, flags);

        return ret;
}

 

static void tick_setup_device(struct tick_device *td,
                              struct clock_event_device *newdev, int cpu,
                              const struct cpumask *cpumask)
{
        ......
        
        // 回到前文留下伏笔的 else
        else {
                handler = td->evtdev->event_handler;
                // handler 应该为 tick_handle_periodic

                next_event = td->evtdev->next_event;
                td->evtdev->event_handler = clockevents_handle_noop;
                // 设置 hpet_clockevent 的 event_handler 为 clockevents_handle_noop.
        }

        ......
}

 

void clockevents_exchange_device(struct clock_event_device *old,
                                 struct clock_event_device *new)
{
        // old = hpet
        // new = lapic
 
        unsigned long flags;
 
        local_irq_save(flags);
        /*
         * Caller releases a clock event device. We queue it into the
         * released list and do a notify add later.
         */
        if (old) {
                clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);
                // 设置为 UNUSED mode
                list_del(&old->list);
                // 从原来的 clockevent_devices 中删除
                list_add(&old->list, &clockevents_released);
                // 加到 clockevents_released 链表中
        }
 
        if (new) {
                BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED);
                clockevents_shutdown(new);
        }
        local_irq_restore(flags);
}

 

好了,现在回到了 clockevents_register_device() 中的 clockevents_notify_released() 函数。

static void clockevents_notify_released(void)
{
        struct clock_event_device *dev;

        while (!list_empty(&clockevents_released)) {
                // 为空 list_empty 返回 1,如果 clockevents_released 链表不为空则进入循环
                // 现在链表中的设备为 hpet

                dev = list_entry(clockevents_released.next,
                                 struct clock_event_device, list);
                // dev 指向 hpet_clockevent  

                list_del(&dev->list);
                list_add(&dev->list, &clockevent_devices);
                // 再次加入到 clockevent_devices 链表
                clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
                // 再次设置 event_handler
        }
        // 在注册 global clock_event device 时因为 clockevents_released 链表为空,根本就没有进入循环体,但是现在不一样了。
}

 

故事从这又 TMD 的开始了……

现在 CPU0 的 tick_device 上的 clock_event device 是 local 的 (lapic_clockevent),现在就是要处理全局的~

static int tick_check_new_device(struct clock_event_device *newdev)
{
        ......

        cpu = smp_processor_id();
        // 还是 CPU0
        if (!cpumask_test_cpu(cpu, newdev->cpumask))
                goto out_bc;
        // cpumask:            cpumask to indicate for which CPUs this device works

        td = &per_cpu(tick_cpu_device, cpu);
        // 还是 CPU0 的那个 struct tick_device 实例

        curdev = td->evtdev;
        // 注意此时 curdev 并不是 NULL 而是指向 lapic_clockevent

        ......

        if (curdev) {
                // curdev = &lapic_clockevent
                // newdev = &hpet_clockevent

                /*
                 * Prefer one shot capable devices !
                 */
                if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) &&
                    !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
                        goto out_bc;
                /*
                 * Check the rating
                 */
                if (curdev->rating >= newdev->rating)
                        goto out_bc;
                        // 没办法只能去检查 hpet_clockevent 能否用作 broadcast device,因为它的 rating 值小于已经注册上的 lapic_clockevent

                // lapic rating = 100,hpet rating = 50 
                
        }

        ......

out_bc:
        /*
         * Can the new device be used as a broadcast device ?
         */
        if (tick_check_broadcast_device(newdev))
                ret = NOTIFY_STOP;

        spin_unlock_irqrestore(&tick_device_lock, flags);

        return ret;
}

 

int tick_check_broadcast_device(struct clock_event_device *dev)
{
        if ((tick_broadcast_device.evtdev &&
             tick_broadcast_device.evtdev->rating >= dev->rating) ||
             (dev->features & CLOCK_EVT_FEAT_C3STOP))
                return 0;

        clockevents_exchange_device(NULL, dev);

        tick_broadcast_device.evtdev = dev;
        // tick_broadcast_device 终于出现了,设置为 hpet

        if (!cpumask_empty(tick_get_broadcast_mask()))
                tick_broadcast_start_periodic(dev);
                // 设置周期模式,此函数在当前 context 中并没有被执行,也就是说 hpet_clockevent 的 event_handler 应该是 "clockevents_handle_noop"
        return 1;
}

 

好现现在说说现在所处的situation,Global 以及 CPU0 的 LAPIC 都已经注册完毕,假设系统 4-core,现在处于 kernel 初始化末期线程1中,马上要进行的是启动其他额外 CPU(1-3),并且注册相应的 LAPIC clock_event device.

kernel_init()
  -> smp_init()

static void __init smp_init(void)
{       
        unsigned int cpu;
        
        /* FIXME: This should be done in userspace --RR */
        for_each_present_cpu(cpu) {
                if (num_online_cpus() >= setup_max_cpus)
                        break;
                if (!cpu_online(cpu))
                        cpu_up(cpu);
        }
        // 启动其他 CPU 调用 start_secondary()
                
        /* Any cleanup work */
        printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus()); 
        smp_cpus_done(setup_max_cpus);
                
        ......
}

 

局部时钟事件设备的注册

`</p>

cpu_up()
  ...
  -> _cpu_up()
     smp_ops.cpu_up(cpu)
       -> native_cpu_up()
            -> do_boot_cpu()
                 -> wakeup_secondary_cpu_via_init()
                      -> startup_ipi_hook()
                         ......
                         maybe (这块没太看懂怎么 invoke 的 start_secondary)
                         start_secondary()
                           -> setup_secondary_APIC_clock()
                                -> setup_APIC_timer()
                                     -> clockevents_register_device(levt)`

 

其余的 CPU Lapic 注册过程和 CPU0 的一样也不复杂,最终都把相关 event_handler 初始化为 “tick_handle_periodic”

可是 …… 但是 …… 还是那句话,,故事远没有结束这才是一个开始 ……