kernel:3.6
硬件:
一般soc会有多个sp804外部timer,假设现在timer0作全部时钟设备,timer1作为clocksource。
arm smp local timer。
核心数据结构对象:
1. struct clock_event_device 时钟设备抽象类型,其中set_next_event可以设置下次中断时间,
2. event_handler是中断处理回调一般是tick_handle_periodic或hrtimer_interrupt核心方法。
sp804 timer0, smp local timer都会抽象成clock_event_device并且注册到系统中。
3. struct clocksource 时钟源抽象类型,通过read方法可以读取当前时间。系统中sp804 timer1,
纯软件的jiffies抽象成clocksource注册到系统中。
4. struct timekeeper timekeeper全局变量,管理clocksource,配合保存的xtime值提供读取
当前时间功能。
5. struct timespec xtime全局变量,保存了当前时间,在tick中断时更新。
整体流程:
1.start_kernel init_timers)/hrtimers_init) 初始化wheel timer和hrtimer。
2.start_kernel->timekeeping_init) 初始化timekeeper结构,并且clock=jiffies clocksource。
3.start_kernel->time_init) 板级代码注册sp804 timer0 clock_event_device。
初始化sp804 timer,注册中断设置每cpu的tick_cpu_device->evtdev为sp804 timer。之后中断就生效了。
4.start_kernel->time_init) 板级代码注册sp804 timer1 clocksource。
5.start_kernel->reset_init)…kernel_init)->smp_prepare_cpus) 注册cpu0 local timer clock_event_device。
其中会close 之前的sp804timer。之后localtimer中断就生效了(timer0中断不会有了)。
设置每cpu的tick_cpu_device->evtdev为local timer。
6.secondary_start_kernel)->percpu_timer_setup) 注册cpuX local timer clock_event_device。同cpu0。
7.do_init_calls)->init_jiffies_clocksource)注册jiffies clocksource。
触发timekeeper的clock设置为优先级更高的sp804 timer1 clocksource。
从而后面timer软中断就会从periodic模式切换为oneshot模式。
8.cpu0,cpuX的TIMER_SOFTIRQ软中断,会将每cpu的tick_cpu_device->evtdev模式设置为oneshot模式,
event_handler方法也变成了hrtimer_interrupt。
clock_event_device注册
static struct clock_event_device sp804_clockevent = { .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_mode = sp804_set_mode, .set_next_event = sp804_set_next_event, .rating = 300, .cpumask = cpu_all_mask, }; static struct irqaction sp804_timer_irq = { .name = "timer", .flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL, .handler = sp804_timer_interrupt, .dev_id = &sp804_clockevent, }; void __init sp804_clockevents_initvoid __iomem *base, unsigned int irq, const char *name) { struct clock_event_device *evt = &sp804_clockevent; long rate = sp804_get_clock_ratename); if rate < 0) return; clkevt_base = base; clkevt_reload = DIV_ROUND_CLOSESTrate, HZ); evt->name = name; evt->irq = irq; setup_irqirq, &sp804_timer_irq); clockevents_config_and_registerevt, rate, 0xf, 0xffffffff); }
在kernel提供好的sp804_clockevents_init方法中,实际上主要是注册了一个sp804_clockevent。
参数base是板级相关的timer基地址,irq是中断号。
然后关键的是通过setup_irq设置中断处理函数sp804_timer_irq,
以及clockevents_config_and_register注册clock_event_device。
sp804_timer_irq主要是调用了关联的sp804_clockevent的event_handler方法。
当然这个event_handler方法是在clockevents_register_device过程中初始化的
static irqreturn_t sp804_timer_interruptint irq, void *dev_id) { struct clock_event_device *evt = dev_id; /* clear the interrupt */ writel1, clkevt_base + TIMER_INTCLR); //这个是在clockevents_config_and_register中初始化的方法。 evt->event_handlerevt); return IRQ_HANDLED; } void clockevents_config_and_registerstruct clock_event_device *dev, u32 freq, unsigned long min_delta, unsigned long max_delta) { dev->min_delta_ticks = min_delta; dev->max_delta_ticks = max_delta; clockevents_configdev, freq); clockevents_register_devicedev); } void clockevents_register_devicestruct clock_event_device *dev) { unsigned long flags; BUG_ONdev->mode != CLOCK_EVT_MODE_UNUSED); if !dev->cpumask) { WARN_ONnum_possible_cpus) > 1); dev->cpumask = cpumask_ofsmp_processor_id)); } raw_spin_lock_irqsave&clockevents_lock, flags); list_add&dev->list, &clockevent_devices); clockevents_do_notifyCLOCK_EVT_NOTIFY_ADD, dev); clockevents_notify_released); raw_spin_unlock_irqrestore&clockevents_lock, flags); }
通过clockevents_do_notifyCLOCK_EVT_NOTIFY_ADD, dev)会通知调用tick_notifier来处理。
static struct notifier_block tick_notifier = { .notifier_call = tick_notify, }; static int tick_notifystruct notifier_block *nb, unsigned long reason, void *dev) { switch reason) { case CLOCK_EVT_NOTIFY_ADD: return tick_check_new_devicedev); 。。。 }
新的clock_event_device加入的时候通过这个方法来初始化。
当然其实这是我们系统中的第一个clock_event_device。这个时候cpu1还处于wfi状态。
此时这个newdev不是cpu local device。每cpu的tick_cpu_device->evtdev的clock_event_device也指向的null。
关键的通过tick_setup_device设置这个newdev。
static int tick_check_new_devicestruct clock_event_device *newdev) { struct clock_event_device *curdev; struct tick_device *td; int cpu, ret = NOTIFY_OK; unsigned long flags; raw_spin_lock_irqsave&tick_device_lock, flags); cpu = smp_processor_id); if !cpumask_test_cpucpu, newdev->cpumask)) goto out_bc; td = &per_cputick_cpu_device, cpu); curdev = td->evtdev; /* cpu local device ? */ if !cpumask_equalnewdev->cpumask, cpumask_ofcpu))) { /* * If the cpu affinity of the device interrupt can not * be set, ignore it. */ if !irq_can_set_affinitynewdev->irq)) goto out_bc; /* * If we have a cpu local device already, do not replace it * by a non cpu local device */ if curdev && cpumask_equalcurdev->cpumask, cpumask_ofcpu))) goto out_bc; } /* * If we have an active device, then check the rating and the oneshot * feature. */ if curdev) { /* * Prefer one shot capable devices ! */ if curdev->features & CLOCK_EVT_FEAT_ONESHOT) && !newdev->features & CLOCK_EVT_FEAT_ONESHOT)) goto out_bc; /* * Check the rating */ if curdev->rating >= newdev->rating) goto out_bc; } /* * Replace the eventually existing device by the new * device. If the current device is the broadcast device, do * not give it back to the clockevents layer ! */ if tick_is_broadcast_devicecurdev)) { clockevents_shutdowncurdev); curdev = NULL; } clockevents_exchange_devicecurdev, newdev); tick_setup_devicetd, newdev, cpu, cpumask_ofcpu)); if newdev->features & CLOCK_EVT_FEAT_ONESHOT) tick_oneshot_notify); raw_spin_unlock_irqrestore&tick_device_lock, flags); return NOTIFY_STOP; out_bc: /* * Can the new device be used as a broadcast device ? */ if tick_check_broadcast_devicenewdev)) ret = NOTIFY_STOP; raw_spin_unlock_irqrestore&tick_device_lock, flags); return ret; }
tick_setup_device中会将我们的sp804 timer作为每cpu的tick_cpu_device->evtdev.
由于原来的每cpu的tick_cpu_device->evtdev是空的,所以会初始化tick周期,设置下次tick时间,
并且设置当前cpu(cpu0)作为do_timer处理时间工作的cpu。
之后会通过tick_setup_periodic来设置clock_event_device的event_handler方法,
中断处理函数实际上就是调用的这个event_handler方法,到这里才设置的。
static void tick_setup_devicestruct tick_device *td, struct clock_event_device *newdev, int cpu, const struct cpumask *cpumask) { ktime_t next_event; void *handler)struct clock_event_device *) = NULL; /* * First device setup ? */ if !td->evtdev) { /* * If no cpu took the do_timer update, assign it to * this cpu: */ if tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { tick_do_timer_cpu = cpu; tick_next_period = ktime_get); tick_period = ktime_set0, NSEC_PER_SEC / HZ); } /* * Startup in periodic mode first. */ td->mode = TICKDEV_MODE_PERIODIC; } else { handler = td->evtdev->event_handler; next_event = td->evtdev->next_event; td->evtdev->event_handler = clockevents_handle_noop; } td->evtdev = newdev; /* * When the device is not per cpu, pin the interrupt to the * current cpu: */ if !cpumask_equalnewdev->cpumask, cpumask)) irq_set_affinitynewdev->irq, cpumask); /* * When global broadcasting is active, check if the current * device is registered as a placeholder for broadcast mode. * This allows us to handle this x86 misfeature in a generic * way. */ if tick_device_uses_broadcastnewdev, cpu)) return; if td->mode == TICKDEV_MODE_PERIODIC) tick_setup_periodicnewdev, 0); else tick_setup_oneshotnewdev, handler, next_event); }
ick_set_periodic_handler将event_handler设置为tick_handle_periodic。
然后设置clock_event_device模式为CLOCK_EVT_MODE_PERIODIC)。
void tick_setup_periodicstruct clock_event_device *dev, int broadcast) { tick_set_periodic_handlerdev, broadcast); /* Broadcast setup ? */ if !tick_device_is_functionaldev)) return; if dev->features & CLOCK_EVT_FEAT_PERIODIC) && !tick_broadcast_oneshot_active)) { clockevents_set_modedev, CLOCK_EVT_MODE_PERIODIC); } else { unsigned long seq; ktime_t next; do { seq = read_seqbegin&xtime_lock); next = tick_next_period; } while read_seqretry&xtime_lock, seq)); clockevents_set_modedev, CLOCK_EVT_MODE_ONESHOT); for ; { if !clockevents_program_eventdev, next, false)) return; next = ktime_addnext, tick_period); } } } void tick_set_periodic_handlerstruct clock_event_device *dev, int broadcast) { if !broadcast) dev->event_handler = tick_handle_periodic; else dev->event_handler = tick_handle_periodic_broadcast; }
event_handler periodic中断处理函数。kernel的一个core方法。
tick_periodic是具体的处理。
void tick_handle_periodicstruct clock_event_device *dev) { int cpu = smp_processor_id); ktime_t next; tick_periodiccpu); if dev->mode != CLOCK_EVT_MODE_ONESHOT) return; /* * Setup the next period for devices, which do not have * periodic mode: */ next = ktime_adddev->next_event, tick_period); for ; { if !clockevents_program_eventdev, next, false)) return; /* * Have to be careful here. If we're in oneshot mode, * before we call tick_periodic) in a loop, we need * to be sure we're using a real hardware clocksource. * Otherwise we could get trapped in an infinite * loop, as the tick_periodic) increments jiffies, * when then will increment time, posibly causing * the loop to trigger again and again. */ if timekeeping_valid_for_hres)) tick_periodiccpu); next = ktime_addnext, tick_period); } }
如果是cpu0的话,调用do_timer做更新时间等操作。
不管哪个cpu都要调用update_process_times更新cpu使用信息,以及处理timer wheel软中断。
static void tick_periodicint cpu) { if tick_do_timer_cpu == cpu) { write_seqlock&xtime_lock); /* Keep track of the next tick event */ tick_next_period = ktime_addtick_next_period, tick_period); do_timer1); write_sequnlock&xtime_lock); } update_process_timesuser_modeget_irq_regs))); profile_tickCPU_PROFILING); } void do_timerunsigned long ticks) { jiffies_64 += ticks; update_wall_time); calc_global_loadticks); }
如果当前cpu负责更新时间,则通过do_timer进行以下操作:
更新jiffies_64变量;
更新墙上时钟;
每10个tick,更新一次cpu的负载信息;
调用update_peocess_times,完成以下事情:
更新进程的时间统计信息;account_process_tick更新cpu统计信息。
触发TIMER_SOFTIRQ软件中断,以便系统处理传统的低分辨率定时器;
检查rcu的callback;
通过scheduler_tick触发调度系统进行进程统计和调度工作;
void update_process_timesint user_tick) { struct task_struct *p = current; int cpu = smp_processor_id); /* Note: this timer irq context must be accounted for as well. */ account_process_tickp, user_tick); run_local_timers); rcu_check_callbackscpu, user_tick); printk_tick); #ifdef CONFIG_IRQ_WORK if in_irq)) irq_work_run); #endif scheduler_tick); run_posix_cpu_timersp); }
到这里为止,cpu0的timer中断已经是准备好并且开始工作了。jiffies_64也在不断的增加了。
大概只需要10来个jiffies(HZ=100)之后,arm的local timer也参与到kernel中来了。
sched_clock_register
void __init sched_clock_registeru64 *read)void), int bits, unsigned long rate) { ... struct clock_read_data rd; rd.read_sched_clock = read; ... update_clock_read_data&rd); }
在 sched_clock_register 中,填充一个 struct clock_read_data 结构 rd,将传入的读取 system counter 的回调函数赋值给 rd.read_sched_clock,并更新到系统。
而在调度器频繁使用的 sched_clock) 中,正是调用了该回调函数以获取 system counter 的时间:
unsigned long long notrace sched_clockvoid) { ... cyc = rd->read_sched_clock) - rd->epoch_cyc) & rd->sched_clock_mask; ... }jiffies &
clocksource
static void __inithi3536_clocksource_initvoid __iomem *base, const char *name)
{
long rate = sp804_get_clock_ratename); //获取定时器时钟62.5MHz
struct clocksource *clksrc = &hi3536_clocksource.clksrc;
if rate < 0)
return;
clksrc->name = name; //name=timer0
clksrc->rating = 200; //时钟源精度值
clksrc->read =hi3536_clocksource_read; //获取计数值,系统主要调用该接口转化为系统时间
clksrc->mask =CLOCKSOURCE_MASK32), //计数值32位
clksrc->flags = CLOCK_SOURCE_IS_CONTINUOUS, //持续的时钟源
clksrc->resume = hi3536_clocksource_resume,
hi3536_clocksource.base = base;
hi3536_clocksource_startbase); //初始化寄存器
clocksource_register_hzclksrc, rate); //计算出mult和shift,为系统选择更好的时钟源
setup_sched_clockhi3536_sched_clock_read, 32, rate); //通用sched_clock模块,这个模块主要是提供一个sched_clock的接口函数,获取当前时间点和系统启动之间的纳秒值。
}
read函数注册
clocksource_mmio_initclkevt->value, name, rate, 200, 32, clocksource_mmio_readl_down);
int __init clocksource_mmio_initvoid __iomem *base, const char *name, unsigned long hz, int rating, unsigned bits, u64 *read)struct clocksource *)) { struct clocksource_mmio *cs; if bits > 64 || bits < 16) return -EINVAL; cs = kzallocsizeofstruct clocksource_mmio), GFP_KERNEL); if !cs) return -ENOMEM; cs->reg = base; cs->clksrc.name = name; cs->clksrc.rating = rating; cs->clksrc.read = read; cs->clksrc.mask = CLOCKSOURCE_MASKbits); cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS; return clocksource_register_hz&cs->clksrc, hz); }