全局时钟事件设备的注册
全局时钟事件设备(Global Clock Event Device):HPET/PIT
-
主要负责提供周期时钟,更新 jiffies
-
全局时钟的角色由一个明选择的局部时钟承担,每个 cpu 都有 local apic,而 global clock 有一个特定的 cpu 承担,全局的时钟事件设备虽然附属于某一个特定的 CPU 上,但是完成的是系统相关的工作,例如完成系统的 tick 更新,说白了就是某个 cpu 一个人接俩活~
-
结构 struct clock_event_device
局部时钟事件设备(Local Clock Event Device):lapic
-
每个 CPU 都有一个局部时钟,用作进程统计,最主要的实现了高分辨率定时器(只能工作在提供了lapic 的系统上)
-
主要完成统计运行在当前 CPU 上的进程的统计,以及设置 Local CPU 下一次中断
-
结构 struct clock_event_device
时钟设备(tick device)
`</p>
struct tick_device { struct clock_event_device *evtdev; enum tick_device_mode mode; }; enum tick_device_mode { TICKDEV_MODE_PERIODIC, TICKDEV_MODE_ONESHOT, };`
tick device 只是 clock_event_device 的一包装器,增加了而外的字段用于指定设备的运行模式(周期或者单触发)。
全局 tick device
`</p>
static struct tick_device tick_broadcast_device;`
tick_broadcast_device 很重要,后面会说到~
查看当前系统的 Global Clock Event Device 以及 Local Clock Event Device
`</p>
cat /proc/timer_list`
查看 tick_device
`</p>
cat /proc/timer_list | grep "Clock Event Device" Clock Event Device: hpet Clock Event Device: lapic Clock Event Device: lapic Clock Event Device: lapic Clock Event Device: lapic`
查看 event_handler
`</p>
cat /proc/timer_list | grep "event_handler" event_handler: tick_handle_oneshot_broadcast event_handler: hrtimer_interrupt event_handler: hrtimer_interrupt event_handler: hrtimer_interrupt event_handler: hrtimer_interrupt`
从以上信息可以得出 Global Clock Event Device 是 hpet,event_hanler 为 tick_handle_oneshot_broadcast,Local Clock Event Device 是 lapic,event_hanler 为 hrtimer_interrupt,当前系统使用的是高分辨率的 Timer.
event_handler 是什么呢就是中断到来所要执行的函数。(在中断处理程序中被调用)
大概是这个这样的
Global
`</p>
static irqreturn_t timer_interrupt(int irq, void *dev_id) { ...... global_clock_event->event_handler(global_clock_event); // tick_handle_oneshot_broadcast ...... }`
Local
`</p>
void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) { ...... local_apic_timer_interrupt(); ...... } static void local_apic_timer_interrupt(void) { int cpu = smp_processor_id(); struct clock_event_device *evt = &per_cpu(lapic_events, cpu); ...... evt->event_handler(evt); // hrtimer_interrupt }`
那 GLobal 与 Local 的 event_handler 是经过怎样的过程而最终得到的呢,下面就以此主线来分析~有关一些基本的概念大家可以参考 《Professional Linux Kernel Architecture》Chapter 15
我觉得要彻底弄懂一个东西就得看源代码,不看是怎么实现的就是看再多书也没用,这是亲身感受~ ok,Let us Go.全局时钟事件设备的注册
`</p>
start_kernel() -> if (late_time_init) late_time_init() x86_late_time_init() -> hpet_time_init() -> hpet_enable() -> hpet_legacy_clockevent_register()`
static void hpet_legacy_clockevent_register(void) { /* Start HPET legacy interrupts */ hpet_enable_legacy_int(); hpet_clockevent.mult = div_sc((unsigned long) FSEC_PER_NSEC, hpet_period, hpet_clockevent.shift); /* Calculate the min / max delta */ hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, &hpet_clockevent); /* 5 usec minimum reprogramming delta. */ hpet_clockevent.min_delta_ns = 5000; /* * Start hpet with the boot cpu mask and make it * global after the IO_APIC has been initialized. */ hpet_clockevent.cpumask = cpumask_of(smp_processor_id()); // 当前 cpu 肯定是 cpu0 clockevents_register_device(&hpet_clockevent); // 开始注册 global_clock_event = &hpet_clockevent; // Global clock event printk(KERN_DEBUG "hpet clockevent registered\n"); }
`</p>
static struct clock_event_device hpet_clockevent = { .name = "hpet", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_mode = hpet_legacy_set_mode, .set_next_event = hpet_legacy_next_event, .shift = 32, .irq = 0, .rating = 50, };`
故事从这就开始了 ……
void clockevents_register_device(struct clock_event_device *dev) { // dev=&hpet_clockevent unsigned long flags; BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); BUG_ON(!dev->cpumask); // 此时设备必须绑定到某个 CPU 上,此时是 cpu0 spin_lock_irqsave(&clockevents_lock, flags); list_add(&dev->list, &clockevent_devices); // 加入到 clockevent_devices 链表中,跟时钟源的注册很像,只要注意以下插入链表的方式就好 clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); clockevents_notify_released(); // 当 Global Clock Event Device 注册完 hpet 和 cpu0 注册 lapic 之后会被调用. spin_unlock_irqrestore(&clockevents_lock, flags); } static void clockevents_do_notify(unsigned long reason, void *dev) { // dev 为时钟事件设备 (hpet) raw_notifier_call_chain(&clockevents_chain, reason, dev); } // clockevents_chain 已经在 tick_init 函数中初始化 // clockevents_chain->head = &tick_notifier struct raw_notifier_head { struct notifier_block *head; }; int raw_notifier_call_chain(struct raw_notifier_head *nh, unsigned long val, void *v) { return __raw_notifier_call_chain(nh, val, v, -1, NULL); } int __raw_notifier_call_chain(struct raw_notifier_head *nh, unsigned long val, void *v, int nr_to_call, int *nr_calls) { return notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls); } static int __kprobes notifier_call_chain(struct notifier_block **nl, unsigned long val, void *v, int nr_to_call, int *nr_calls) { int ret = NOTIFY_DONE; struct notifier_block *nb, *next_nb; nb = rcu_dereference(*nl); // nb 指向 notifier_block while (nb && nr_to_call) { // nr_to_call = -1 next_nb = rcu_dereference(nb->next); #ifdef CONFIG_DEBUG_NOTIFIERS if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) { WARN(1, "Invalid notifier called!"); nb = next_nb; continue; } #endif ret = nb->notifier_call(nb, val, v); // 调用 tick_notify 函数 val = CLOCK_EVT_NOTIFY_ADD // 返回 ret = NOTIFY_STOP if (nr_calls) (*nr_calls)++; // nr_calls = NULL; if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK) break; nb = next_nb; nr_to_call--; } return ret; } #define NOTIFY_STOP_MASK 0x8000 /* Don't call further */ #define NOTIFY_DONE 0x0000 /* Don't care */ #define NOTIFY_OK 0x0001 /* Suits me */ #define NOTIFY_STOP (NOTIFY_OK|NOTIFY_STOP_MASK) static int tick_notify(struct notifier_block *nb, unsigned long reason, void *dev) { switch (reason) { case CLOCK_EVT_NOTIFY_ADD: return tick_check_new_device(dev); // 这里 case CLOCK_EVT_NOTIFY_BROADCAST_ON: case CLOCK_EVT_NOTIFY_BROADCAST_OFF: case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: tick_broadcast_on_off(reason, dev); break; case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: tick_broadcast_oneshot_control(reason); break; case CLOCK_EVT_NOTIFY_CPU_DYING: tick_handover_do_timer(dev); break; case CLOCK_EVT_NOTIFY_CPU_DEAD: tick_shutdown_broadcast_oneshot(dev); tick_shutdown_broadcast(dev); tick_shutdown(dev); break; case CLOCK_EVT_NOTIFY_SUSPEND: tick_suspend(); tick_suspend_broadcast(); break; case CLOCK_EVT_NOTIFY_RESUME: tick_resume(); break; default: break; } return NOTIFY_OK; }
接下来的两个函数很重要,当时看的时候丈二和尚摸不着头脑,但是经过各种 debug kernel,各种 brew,终于看出了点门道~
static int tick_check_new_device(struct clock_event_device *newdev) { // newdev = &hpet_clockevent; struct clock_event_device *curdev; struct tick_device *td; int cpu, ret = NOTIFY_OK; unsigned long flags; spin_lock_irqsave(&tick_device_lock, flags); cpu = smp_processor_id(); if (!cpumask_test_cpu(cpu, newdev->cpumask)) goto out_bc; // cpumask:cpumask to indicate for which CPUs this device works td = &per_cpu(tick_cpu_device, cpu); // td 时钟设备,tick_cpu_device 是一个每CPU链表,包含了系统中每个CPU对应的struct tick_device 实例.关于每 CPU 变量这里就不说了,因为也不是一两句话能说明白的。 curdev = td->evtdev; // 此时由于刚注册时钟设备上没有时钟事件设备,所以 curdev 为 NULL,而之后发生的可完全不一样那时 curdev 不为空,到时候再说 /* cpu local device ? */ if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) { /* * If the cpu affinity of the device interrupt can not * be set, ignore it. */ if (!irq_can_set_affinity(newdev->irq)) goto out_bc; /* * If we have a cpu local device already, do not replace it * by a non cpu local device */ if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu))) goto out_bc; } // 这几行代码没太看懂貌似根本就没有被执行? /* * If we have an active device, then check the rating and the oneshot * feature. */ if (curdev) { // 2.curdev = hpet // 2.newdev = lapic // 3.curdev = lapic // 3.newdev = hpet /* * Prefer one shot capable devices ! */ if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) && !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) goto out_bc; /* * Check the rating */ if (curdev->rating >= newdev->rating) goto out_bc; // lapic rating = 100,hpet rating = 50 } // curdev = NULL ,ignore this "if",这块是个关键点 /* * Replace the eventually existing device by the new * device. If the current device is the broadcast device, do * not give it back to the clockevents layer ! */ if (tick_is_broadcast_device(curdev)) { clockevents_shutdown(curdev); curdev = NULL; } clockevents_exchange_device(curdev, newdev); tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); // 建立 clock event device if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) tick_oneshot_notify(); // here spin_unlock_irqrestore(&tick_device_lock, flags); return NOTIFY_STOP; out_bc: /* * Can the new device be used as a broadcast device ? */ if (tick_check_broadcast_device(newdev)) ret = NOTIFY_STOP; spin_unlock_irqrestore(&tick_device_lock, flags); return ret; }
int tick_is_broadcast_device(struct clock_event_device *dev) { return (dev && tick_broadcast_device.evtdev == dev); } void clockevents_exchange_device(struct clock_event_device *old, struct clock_event_device *new) { // <condition 1> //old = NULL //new = &hpet_clockevent // <condition 2> // old = hpet // new = lapic unsigned long flags; local_irq_save(flags); /* * Caller releases a clock event device. We queue it into the * released list and do a notify add later. */ if (old) { clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); list_del(&old->list); list_add(&old->list, &clockevents_released); // 加到 clockevents_released 链表中 } if (new) { BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED); clockevents_shutdown(new); } local_irq_restore(flags); // 此时我们考虑 condition 1 }
static void tick_setup_device(struct tick_device *td, struct clock_event_device *newdev, int cpu, const struct cpumask *cpumask) { ktime_t next_event; void (*handler)(struct clock_vent_device *) = NULL; /* * First device setup ? */ if (!td->evtdev) { // 此时钟设备没有相关的时钟事件设备 /* * If no cpu took the do_timer update, assign it to * this cpu: */ if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { // 如果没有选定时钟设备来承担全局时钟设备的角色,那么将选择当前设备来承担此职责 tick_do_timer_cpu = cpu; // 设置为当前设备所属处理器编号 tick_next_period = ktime_get(); tick_period = ktime_set(0, NSEC_PER_SEC / HZ); // 时钟周期,纳秒 //HZ = 1000 } /* * Startup in periodic mode first. */ td->mode = TICKDEV_MODE_PERIODIC; // 设备运行模式 --> 周期模式 } else { // 关于这个 else ,我们某天会来到这里,现在 ignore it. handler = td->evtdev->event_handler; next_event = td->evtdev->next_event; td->evtdev->event_handler = clockevents_handle_noop; } td->evtdev = newdev; //为时钟设备指定事件设备 /* * When the device is not per cpu, pin the interrupt to the * current cpu: */ if (!cpumask_equal(newdev->cpumask, cpumask)) irq_set_affinity(newdev->irq, cpumask); /* * When global broadcasting is active, check if the current * device is registered as a placeholder for broadcast mode. * This allows us to handle this x86 misfeature in a generic * way. */ // check whether enable the broadcast mode,如果系统处于省电模式,而局部时钟停止工作,则会使用广播机制 if (tick_device_uses_broadcast(newdev, cpu)) return; if (td->mode == TICKDEV_MODE_PERIODIC) tick_setup_periodic(newdev, 0); // 周期模式 invoke this ...... else tick_setup_oneshot(newdev, handler, next_event); // 单触发模式 }
void tick_setup_periodic(struct clock_event_device *dev, int broadcast) { tick_set_periodic_handler(dev, broadcast); // broadcast = 0 /* Broadcast setup ? */ if (!tick_device_is_functional(dev)) return; if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && !tick_broadcast_oneshot_active()) { clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC); // here // 设置成周期模式 } else { unsigned long seq; ktime_t next; do { seq = read_seqbegin(&xtime_lock); next = tick_next_period; } while (read_seqretry(&xtime_lock, seq)); clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); for (;;) { if (!clockevents_program_event(dev, next, ktime_get())) return; next = ktime_add(next, tick_period); } } } void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) { if (!broadcast) dev->event_handler = tick_handle_periodic; // here else dev->event_handler = tick_handle_periodic_broadcast; } static inline int tick_device_is_functional(struct clock_event_device *dev) { return !(dev->features & CLOCK_EVT_FEAT_DUMMY); }
此时 Global event_handler 的注册就接近尾声了,event_handler = tick_handle_periodic,不对啊应该是 “tick_handle_oneshot_broadcast”啊,是啊,莫急,故事远没有结束这才是一个开始 ……