ARMLinux的启动代码分析


本次所分析的linux版本为linux-4.9.291

为了分析ARM Linux的启动代码,可以先从Linux的链接脚本入手,找到程序的入口。在arch/arm/kernel/vmlinux.lds中,我们可以找到以下代码:

ENTRY(stext)指明了Linux内核入口为stext,而stextarch/arm/kernel/head.S中定义,因此我们首先分析arch/arm/kernel/head.S这个文件。

一、内核引导阶段

该阶段的主要代码位于arch/arm/kernel/head.Sarch/arm/kernel/head-common.S中。主要完成以下工作:

arch/arm/kernel/head.S中,分析ENTRY(stext)的代码:

  • 调用函数 safe_svcmode_maskall 确保 CPU 处于 SVC 模式,并且关闭了所有的中断。safe_svcmode_maskall 定义在文件 arch/arm/include/asm/assembler.h 中。
1
2
3
4
5
6
7
8
9
10
11
12
13
ENTRY(stext)
ARM_BE8(setend be ) @ ensure we are in BE8 mode

THUMB( badr r9, 1f ) @ Kernel is always entered in ARM.
THUMB( bx r9 ) @ If this is a Thumb-2 kernel,
THUMB( .thumb ) @ switch to Thumb now.
THUMB(1: )

#ifdef CONFIG_ARM_VIRT_EXT
bl __hyp_stub_install
#endif
@ ensure svc mode and all interrupts masked
safe_svcmode_maskall r9
  • 读处理器 ID,ID 值保存在 r9 寄存器中。
1
mrc	p15, 0, r9, c0, c0		@ get processor id
  • 调用函数 __lookup_processor_type 检查当前系统是否支持此 CPU,如果支持就获取 procinfo 信息。 procinfo 是 proc_info_list 类 型 的 结 构 体 ,proc_info_list 在文件 arch/arm/include/asm/procinfo.h 中定义。
1
2
3
4
bl	__lookup_processor_type		@ r5=procinfo r9=cpuid
movs r10, r5 @ invalid processor (r5=0)?
THUMB( it eq ) @ force fixup-able long branch encoding
beq __error_p @ yes, error 'p'
  • 调用函数 __create_page_tables 创建页表
1
bl	__create_page_tables
  • 将函数 __mmap_switched 的地址保存到 r13 寄存器中。__mmap_switched 定义在文件 arch/arm/kernel/head-common.S,__mmap_switched 最终会调用 start_kernel 函数
1
2
3
ldr	r13, =__mmap_switched		@ address to jump to after
@ mmu has been enabled
badr lr, 1f @ return (PIC) address
  • 调 用 __enable_mmu 函数使能 MMU。__enable_mmu 最终会通过调用 __turn_mmu_on 来打开 MMU,__turn_mmu_on 最后会执行 r13 里面保存的 __mmap_switched 函数。
1
2
1:	b	__enable_mmu
ENDPROC(stext)

__mmap_switched 函数定义在文件 arch/arm/kernel/head-common.S 中,函数代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
__mmap_switched:
adr r3, __mmap_switched_data

ldmia r3!, {r4, r5, r6, r7}
cmp r4, r5 @ Copy data segment if needed
1: cmpne r5, r6
ldrne fp, [r4], #4
strne fp, [r5], #4
bne 1b

mov fp, #0 @ Clear BSS (and zero fp)
1: cmp r6, r7
strcc fp, [r6],#4
bcc 1b

ARM( ldmia r3, {r4, r5, r6, r7, sp})
THUMB( ldmia r3, {r4, r5, r6, r7} )
THUMB( ldr sp, [r3, #16] )
str r9, [r4] @ Save processor ID
str r1, [r5] @ Save machine type
str r2, [r6] @ Save atags pointer
cmp r7, #0
strne r0, [r7] @ Save control register values
b start_kernel
ENDPROC(__mmap_switched)

__mmap_switched 函数最终调用 start_kernel 来启动 Linux 内核

二、内核初始化阶段

该阶段的主要代码位于init/main.c中,主要是三个函数:start_kernelrest_initkernel_init。主要任务如下图所示:

start_kernel 通过调用众多的子函数来完成 Linux 启动之前的一些初始化工作,核心代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
asmlinkage __visible void __init start_kernel(void)
{
char *command_line;
char *after_dashes;

set_task_stack_end_magic(&init_task);
smp_setup_processor_id();
debug_objects_early_init();

/*
* Set up the the initial canary ASAP:
*/
boot_init_stack_canary();

cgroup_init_early();

local_irq_disable();
early_boot_irqs_disabled = true;

/*
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
boot_cpu_init();
page_address_init();
pr_notice("%s", linux_banner);
setup_arch(&command_line);
mm_init_cpumask(&init_mm);
setup_command_line(command_line);
setup_nr_cpu_ids();
setup_per_cpu_areas();
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
boot_cpu_hotplug_init();

build_all_zonelists(NULL, NULL, false);
page_alloc_init();

pr_notice("Kernel command line: %s\n", boot_command_line);
/* parameters may set static keys */
jump_label_init();
parse_early_param();
after_dashes = parse_args("Booting kernel",
static_command_line, __start___param,
__stop___param - __start___param,
-1, -1, NULL, &unknown_bootoption);
if (!IS_ERR_OR_NULL(after_dashes))
parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
NULL, set_init_arg);

/*
* These use large bootmem allocations and must precede
* kmem_cache_init()
*/
setup_log_buf(0);
pidhash_init();
vfs_caches_init_early();
sort_main_extable();
trap_init();
mm_init();

/*
* Set up the scheduler prior starting any interrupts (such as the
* timer interrupt). Full topology setup happens at smp_init()
* time - but meanwhile we still have a functioning scheduler.
*/
sched_init();
/*
* Disable preemption - early bootup scheduling is extremely
* fragile until we cpu_idle() for the first time.
*/
preempt_disable();
if (WARN(!irqs_disabled(),
"Interrupts were enabled *very* early, fixing it\n"))
local_irq_disable();
idr_init_cache();
rcu_init();

/* trace_printk() and trace points may be used after this */
trace_init();

context_tracking_init();
radix_tree_init();
/* init some links before init_ISA_irqs() */
early_irq_init();
init_IRQ();
tick_init();
rcu_init_nohz();
init_timers();
hrtimers_init();
softirq_init();
timekeeping_init();
time_init();
sched_clock_postinit();
printk_nmi_init();
perf_event_init();
profile_init();
call_function_init();
WARN(!irqs_disabled(), "Interrupts were enabled early\n");
early_boot_irqs_disabled = false;
local_irq_enable();

kmem_cache_init_late();

/*
* HACK ALERT! This is early. We're enabling the console before
* we've done PCI setups etc, and console_init() must be aware of
* this. But we do want output early, in case something goes wrong.
*/
console_init();
if (panic_later)
panic("Too many boot %s vars at `%s'", panic_later,
panic_param);

lockdep_info();

/*
* Need to run this when irqs are enabled, because it wants
* to self-test [hard/soft]-irqs on/off lock inversion bugs
* too:
*/
locking_selftest();

#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start && !initrd_below_start_ok &&
page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
page_to_pfn(virt_to_page((void *)initrd_start)),
min_low_pfn);
initrd_start = 0;
}
#endif
page_ext_init();
debug_objects_mem_init();
kmemleak_init();
setup_per_cpu_pageset();
numa_policy_init();
if (late_time_init)
late_time_init();
sched_clock_init();
calibrate_delay();
pidmap_init();
anon_vma_init();
acpi_early_init();
#ifdef CONFIG_X86
if (efi_enabled(EFI_RUNTIME_SERVICES))
efi_enter_virtual_mode();
#endif
#ifdef CONFIG_X86_ESPFIX64
/* Should be run before the first non-init thread is created */
init_espfix_bsp();
#endif
thread_stack_cache_init();
cred_init();
fork_init();
proc_caches_init();
buffer_init();
key_init();
security_init();
dbg_late_init();
vfs_caches_init();
signals_init();
/* rootfs populating might need page-writeback */
page_writeback_init();
proc_root_init();
nsfs_init();
cpuset_init();
cgroup_init();
taskstats_init_early();
delayacct_init();

check_bugs();

acpi_subsystem_init();
sfi_init_late();

if (efi_enabled(EFI_RUNTIME_SERVICES)) {
efi_late_init();
efi_free_boot_services();
}

ftrace_init();

/* Do the rest non-__init'ed, we're now alive */
rest_init();

prevent_tail_call_optimization();
}

rest_init函数代码如下:

  • 调用函数 rcu_scheduler_starting,启动 RCU 锁调度器
  • 调用函数 kernel_thread 创建 kernel_init 进程,也就是 init 内核进程。init 进程的 PID 为 1。
  • 调用函数 kernel_thread 创建 kthreadd 内核进程,此内核进程的 PID 为 2。kthreadd进程负责所有内核进程的调度和管理。
  • 最后调用函数 cpu_startup_entry 来进入 idle 进程,cpu_startup_entry 会调用 cpu_idle_loop,cpu_idle_loop 是个 while 循环,也就是 idle 进程代码。idle 进程的 PID 为 0,idle 进程叫做空闲进程。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
static noinline void __ref rest_init(void)
{
int pid;

rcu_scheduler_starting();
/*
* We need to spawn init first so that it obtains pid 1, however
* the init task will end up wanting to create kthreads, which, if
* we schedule it before we create kthreadd, will OOPS.
*/
kernel_thread(kernel_init, NULL, CLONE_FS);
numa_default_policy();
pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
rcu_read_lock();
kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
rcu_read_unlock();
complete(&kthreadd_done);

/*
* The boot idle thread must execute schedule()
* at least once to get things moving:
*/
init_idle_bootup_task(current);
schedule_preempt_disabled();
/* Call into cpu_idle with preempt disabled */
cpu_startup_entry(CPUHP_ONLINE);
}

kernel_init函数定义了 init 进程具体做的工作,函数代码如下:

  • kernel_init_freeable 函数用于完成 init 进程的一些其他初始化工作;
  • ramdisk_execute_command 是一个全局的 char 指针变量,此变量值为“/init”,也就是根目录下的 init 程序。ramdisk_execute_command 也可以通过 uboot 传递,在 bootargs 中使用 “rdinit=xxx” 即可,xxx 为具体的 init 程序名字。
  • 如果存在 “/init” 程序的话就通过函数 run_init_process 来运行此程序。
  • 如果 ramdisk_execute_command 为空的话就看 execute_command 是否为空,反正不管如何一定要在根文件系统中找到一个可运行的 init 程序。execute_command 的值是通过 uboot 传递,在 bootargs 中使用“init=xxxx”就可以了,比如“init=/linuxrc”表示根文件系统中的 linuxrc 就是要执行的用户空间 init 程序。
  • 如果 ramdisk_execute_command 和 execute_command 都为空,那么就依次查找“/sbin/init”、“/etc/init”、“/bin/init”和“/bin/sh”,这四个相当于备用 init 程序,如果这四个也不存在,那么 Linux 启动失败
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
static int __ref kernel_init(void *unused)
{
int ret;

kernel_init_freeable();
/* need to finish all async __init code before freeing the memory */
async_synchronize_full();
free_initmem();
mark_readonly();
system_state = SYSTEM_RUNNING;
numa_default_policy();

rcu_end_inkernel_boot();

if (ramdisk_execute_command) {
ret = run_init_process(ramdisk_execute_command);
if (!ret)
return 0;
pr_err("Failed to execute %s (error %d)\n",
ramdisk_execute_command, ret);
}

/*
* We try each of these until one succeeds.
*
* The Bourne shell can be used instead of init if we are
* trying to recover a really broken machine.
*/
if (execute_command) {
ret = run_init_process(execute_command);
if (!ret)
return 0;
panic("Requested init %s failed (error %d).",
execute_command, ret);
}
if (!try_to_run_init_process("/sbin/init") ||
!try_to_run_init_process("/etc/init") ||
!try_to_run_init_process("/bin/init") ||
!try_to_run_init_process("/bin/sh"))
return 0;

panic("No working init found. Try passing init= option to kernel. "
"See Linux Documentation/init.txt for guidance.");
}

三、总结

我总结了ARMLinux内核代码的启动流程如下图所示: