本地时钟事件设备的注册
话说上一回合已经完成了Global事件设备(hpet)的注册,现在来看下Local时钟事件的注册,就以 CPU0 为例.
在 kernel 初始化的末期,内核线程 1 中(还没有 up 其他 cpu),在 log 中出现了类似“CPU0: Intel(R) Xeon(TM) CPU 3.20GHz stepping 04”,发生了这样一个故事……
局部时钟事件设备的注册`</p>
kernel_init() -> smp_prepare_cpus(setup_max_cpus) native_smp_prepare_cpus(64) -> x86_init.timers.setup_percpu_clockev() setup_boot_APIC_clock() -> setup_APIC_timer() -> clockevents_register_device(levt)`
static void __cpuinit setup_APIC_timer(void) { struct clock_event_device *levt = &__get_cpu_var(lapic_events); if (cpu_has(¤t_cpu_data, X86_FEATURE_ARAT)) { lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; /* Make LAPIC timer preferrable over percpu HPET */ lapic_clockevent.rating = 150; } memcpy(levt, &lapic_clockevent, sizeof(*levt)); levt->cpumask = cpumask_of(smp_processor_id()); clockevents_register_device(levt); // 注册 CPU0 Local Clock Event Device }
`</p>
+----------------------------------------------------------------------------------+ | /* | | * The local apic timer can be used for any function which is CPU local. | | */ | | static struct clock_event_device lapic_clockevent = { | | .name = "lapic", | | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | | | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, | | .shift = 32, | | .set_mode = lapic_timer_setup, | | .set_next_event = lapic_next_event, | | .broadcast = lapic_timer_broadcast, | | .rating = 100, | | .irq = -1, | | }; | | | +----------------------------------------------------------------------------------+ `
故事从这里就又开始了
void clockevents_register_device(struct clock_event_device *dev) { // dev=&lapic_clockevent unsigned long flags; BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); BUG_ON(!dev->cpumask); // 此时设备必须绑定到某个 CPU 上,此时还是 cpu0 spin_lock_irqsave(&clockevents_lock, flags); list_add(&dev->list, &clockevent_devices); // 加入到 clockevent_devices 链表中,跟时钟源的注册很像,只要注意以下插入链表的方式就好 clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); clockevents_notify_released(); // 当 Global Clock Event Device 注册完 hpet 和 cpu0 注册 lapic 之后会被调用. spin_unlock_irqrestore(&clockevents_lock, flags); } ...... static int tick_check_new_device(struct clock_event_device *newdev) { ...... cpu = smp_processor_id(); // 还是 CPU0 if (!cpumask_test_cpu(cpu, newdev->cpumask)) goto out_bc; // cpumask: cpumask to indicate for which CPUs this device works td = &per_cpu(tick_cpu_device, cpu); // 还是 CPU0 的那个 struct tick_device 实例 curdev = td->evtdev; // 注意此时 curdev 并不是 NULL 而是指向 hpet_clockevent ...... if (curdev) { // curdev = &hpet_clockevent // newdev = &lapic_clockevent /* * Prefer one shot capable devices ! */ if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) && !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) goto out_bc; /* * Check the rating */ if (curdev->rating >= newdev->rating) goto out_bc; // lapic rating = 100,hpet rating = 50 // 此处需要说明一下:一个 CPU 的一个 tick_device 只能对应一个 clock event device ,但是此时由于 CPU0 的 tick_device 上已经有了用作 global event device 的 hpet,所以此时就应该确定一下要选哪个根据 clcok event device 是否支持 one-shot mode 和 rating 值,此时顺序执行。 } /* * Replace the eventually existing device by the new * device. If the current device is the broadcast device, do * not give it back to the clockevents layer ! */ if (tick_is_broadcast_device(curdev)) { clockevents_shutdown(curdev); curdev = NULL; } clockevents_exchange_device(curdev, newdev); tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); // 设置 lapic_clockevent 的 event_handler 为 tick_handle_periodic ...... return NOTIFY_STOP; out_bc: /* * Can the new device be used as a broadcast device ? */ if (tick_check_broadcast_device(newdev)) ret = NOTIFY_STOP; spin_unlock_irqrestore(&tick_device_lock, flags); return ret; }
static void tick_setup_device(struct tick_device *td, struct clock_event_device *newdev, int cpu, const struct cpumask *cpumask) { ...... // 回到前文留下伏笔的 else else { handler = td->evtdev->event_handler; // handler 应该为 tick_handle_periodic next_event = td->evtdev->next_event; td->evtdev->event_handler = clockevents_handle_noop; // 设置 hpet_clockevent 的 event_handler 为 clockevents_handle_noop. } ...... }
void clockevents_exchange_device(struct clock_event_device *old, struct clock_event_device *new) { // old = hpet // new = lapic unsigned long flags; local_irq_save(flags); /* * Caller releases a clock event device. We queue it into the * released list and do a notify add later. */ if (old) { clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); // 设置为 UNUSED mode list_del(&old->list); // 从原来的 clockevent_devices 中删除 list_add(&old->list, &clockevents_released); // 加到 clockevents_released 链表中 } if (new) { BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED); clockevents_shutdown(new); } local_irq_restore(flags); }
好了,现在回到了 clockevents_register_device() 中的 clockevents_notify_released() 函数。
static void clockevents_notify_released(void) { struct clock_event_device *dev; while (!list_empty(&clockevents_released)) { // 为空 list_empty 返回 1,如果 clockevents_released 链表不为空则进入循环 // 现在链表中的设备为 hpet dev = list_entry(clockevents_released.next, struct clock_event_device, list); // dev 指向 hpet_clockevent list_del(&dev->list); list_add(&dev->list, &clockevent_devices); // 再次加入到 clockevent_devices 链表 clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); // 再次设置 event_handler } // 在注册 global clock_event device 时因为 clockevents_released 链表为空,根本就没有进入循环体,但是现在不一样了。 }
故事从这又 TMD 的开始了……
现在 CPU0 的 tick_device 上的 clock_event device 是 local 的 (lapic_clockevent),现在就是要处理全局的~
static int tick_check_new_device(struct clock_event_device *newdev) { ...... cpu = smp_processor_id(); // 还是 CPU0 if (!cpumask_test_cpu(cpu, newdev->cpumask)) goto out_bc; // cpumask: cpumask to indicate for which CPUs this device works td = &per_cpu(tick_cpu_device, cpu); // 还是 CPU0 的那个 struct tick_device 实例 curdev = td->evtdev; // 注意此时 curdev 并不是 NULL 而是指向 lapic_clockevent ...... if (curdev) { // curdev = &lapic_clockevent // newdev = &hpet_clockevent /* * Prefer one shot capable devices ! */ if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) && !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) goto out_bc; /* * Check the rating */ if (curdev->rating >= newdev->rating) goto out_bc; // 没办法只能去检查 hpet_clockevent 能否用作 broadcast device,因为它的 rating 值小于已经注册上的 lapic_clockevent // lapic rating = 100,hpet rating = 50 } ...... out_bc: /* * Can the new device be used as a broadcast device ? */ if (tick_check_broadcast_device(newdev)) ret = NOTIFY_STOP; spin_unlock_irqrestore(&tick_device_lock, flags); return ret; }
int tick_check_broadcast_device(struct clock_event_device *dev) { if ((tick_broadcast_device.evtdev && tick_broadcast_device.evtdev->rating >= dev->rating) || (dev->features & CLOCK_EVT_FEAT_C3STOP)) return 0; clockevents_exchange_device(NULL, dev); tick_broadcast_device.evtdev = dev; // tick_broadcast_device 终于出现了,设置为 hpet if (!cpumask_empty(tick_get_broadcast_mask())) tick_broadcast_start_periodic(dev); // 设置周期模式,此函数在当前 context 中并没有被执行,也就是说 hpet_clockevent 的 event_handler 应该是 "clockevents_handle_noop" return 1; }
好现现在说说现在所处的situation,Global 以及 CPU0 的 LAPIC 都已经注册完毕,假设系统 4-core,现在处于 kernel 初始化末期线程1中,马上要进行的是启动其他额外 CPU(1-3),并且注册相应的 LAPIC clock_event device.
kernel_init() -> smp_init() static void __init smp_init(void) { unsigned int cpu; /* FIXME: This should be done in userspace --RR */ for_each_present_cpu(cpu) { if (num_online_cpus() >= setup_max_cpus) break; if (!cpu_online(cpu)) cpu_up(cpu); } // 启动其他 CPU 调用 start_secondary() /* Any cleanup work */ printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus()); smp_cpus_done(setup_max_cpus); ...... }
局部时钟事件设备的注册
`</p>
cpu_up() ... -> _cpu_up() smp_ops.cpu_up(cpu) -> native_cpu_up() -> do_boot_cpu() -> wakeup_secondary_cpu_via_init() -> startup_ipi_hook() ...... maybe (这块没太看懂怎么 invoke 的 start_secondary) start_secondary() -> setup_secondary_APIC_clock() -> setup_APIC_timer() -> clockevents_register_device(levt)`
其余的 CPU Lapic 注册过程和 CPU0 的一样也不复杂,最终都把相关 event_handler 初始化为 “tick_handle_periodic”
可是 …… 但是 …… 还是那句话,,故事远没有结束这才是一个开始 ……