二. 进程管理
这部分我们将理解内核提供的创建/删除进程的功能.
2.1 创建进程
在linux中,创建进程有如下两个目的.
- 将同一个进程分成多个进程进行处理
- 创建另外一个程序
为了达成这两个目的,linux分别提供了fork()
和execve()
函数.
对应系统调用的请求名称则分别为clone()和execve()
2.2 fork函数
要想将同一个程序分成多个进程进行处理,需要用到fork函数.
我们使用python 的Process 库,调用的就是fork()方法,在调用该方法后,会基于调用的进程,创建一个新的进程,发出请求的进程成为父进程,被创建的进程被成为子进程.
创建进程的流程图如下
- 为子进程申请内存空间,并复制父进程的内存到子进程的内存空间.
- 父进程与子进程分裂成两个进程,以执行不同的代码.这一点的实现依赖于
fork()
函数分别返回不同的值给父进程与子进程.
为了更清楚上述步骤,下面使用C语言举例
-
创建一个新进程
-
父进程输出自身与子进程的进程ID.而子进程只输出自身的进程ID.
(base) [root@ecs0003 linux_pro]# cc -o fork fork.c
输入后wq保存
#include <unistd.h> #include <stdio.h> #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <err.h> static void child() { printf("I'm child! my pid is %d.\n",getpid()); exit(EXIT_SUCCESS); } static void parent(pid_t pid_c) { printf("I'm parnet! my pid is %d and the pid of my child is %d.\n",getpid(),pid_c); exit(EXIT_SUCCESS); } int main(void) { pid_t ret; ret = fork(); if (ret == -1) err(EXIT_FAILURE,"fork() failed"); if (ret == 0) { //fork()会返回0给子进程,因此这里调用child() child(); } else { //fork()会返回新创建的子进程的进程ID(大于1)给父进程,因此这里调用parent() parent(ret); } // 当程序异常时,做异常处理 err(EXIT_FAILURE,"shouldn't reach here!"); }
编译
(base) [root@ecs0003 linux_pro]# cc -o fork fork.c (base) [root@ecs0003 linux_pro]# ./fork I'm parnet! my pid is 187646 and the pid of my child is 187647. I'm child! my pid is 187647.
使用
strace
命令(base) [root@ecs0003 linux_pro]# strace ./fork execve("./fork", ["./fork"], [/* 32 vars */]) = 0 brk(NULL) = 0x21cb000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f862132a000 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=45852, ...}) = 0 mmap(NULL, 45852, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f862131e000 close(3) = 0 open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P%\2\0\0\0\0\0"..., 832) = 832 fstat(3, {st_mode=S_IFREG|0755, st_size=2173512, ...}) = 0 mmap(NULL, 3981792, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f8620d3d000 mprotect(0x7f8620f00000, 2093056, PROT_NONE) = 0 mmap(0x7f86210ff000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1c2000) = 0x7f86210ff000 mmap(0x7f8621105000, 16864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f8621105000 close(3) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f862131d000 mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f862131b000 arch_prctl(ARCH_SET_FS, 0x7f862131b740) = 0 mprotect(0x7f86210ff000, 16384, PROT_READ) = 0 mprotect(0x600000, 4096, PROT_READ) = 0 mprotect(0x7f862132b000, 4096, PROT_READ) = 0 munmap(0x7f862131e000, 45852) = 0 # 系统包装函数为CLONE clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f862131ba10) = 189703 getpid() = 189702 I'm child! my pid is 189703. fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8621329000 --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=189703, si_uid=0, si_status=0, si_utime=0, si_stime=0} --- write(1, "I'm parnet! my pid is 189702 and"..., 64I'm parnet! my pid is 189702 and the pid of my child is 189703. ) = 64 exit_group(0) = ? +++ exited with 0 +++
2.3 execve() 函数
在启动另外一个进程的时候,需要调用execve()函数.
内核运行的流程如下
- 读取可执行文件,并读取创建进程的内存映像所需的信息.
- 用新进程的数据覆盖当前进程的内存.
- 从最初的命令开始运行新的进程.
也就是说.启动另外一个程序的时候,并非新增了一个进程,而是替换了当前进程.
下面详解以下这个过程.
-
首先读取可执行文件,以及创建进程的内存映像所需的信息.
可执行文件中不仅包含进程在运行过程中使用的代码与数据
- 包含代码的代码段在文件中的偏移量,大小,以及内存映像的起始地址
- 包含代码以外的变量等数据的数据段在文件中的偏移量,大小,以及内存映像的起始地址.
- 程序执行的第一条指令的内存地址(入口点)
我们可以使用
readelf -S
命令查看可执行文件,可以得到序头,节头和符号表等.-S
表示节头信息(base) [root@ecs0003 linux_pro]# readelf -S fork There are 31 section headers, starting at offset 0x1a40: Section Headers: [Nr] Name Type Address Offset Size EntSize Flags Link Info Align [ 0] NULL 0000000000000000 00000000 0000000000000000 0000000000000000 0 0 0 [ 1] .interp PROGBITS 0000000000400238 00000238 000000000000001c 0000000000000000 A 0 0 1 [ 2] .note.ABI-tag NOTE 0000000000400254 00000254 0000000000000020 0000000000000000 A 0 0 4 [ 3] .note.gnu.build-i NOTE 0000000000400274 00000274 0000000000000024 0000000000000000 A 0 0 4 [ 4] .gnu.hash GNU_HASH 0000000000400298 00000298 000000000000001c 0000000000000000 A 5 0 8 .................................................................... 0000000000000221 0000000000000000 0 0 1 [30] .shstrtab STRTAB 0000000000000000 00001931 000000000000010c 0000000000000000 0 0 1 Key to Flags: W (write), A (alloc), X (execute), M (merge), S (strings), I (info), L (link order), O (extra OS processing required), G (group), T (TLS), C (compressed), x (unknown), o (OS specific), E (exclude), l (large), p (processor specific) # 获取全部信息 (base) [root@ecs0003 linux_pro]# readelf -a hello ELF Header: Magic: 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 Class: ELF64 Data: 2's complement, little endian ................................................ Section Headers: [Nr] Name Type Address Offset Size EntSize Flags Link Info Align [ 0] NULL 0000000000000000 00000000 0000000000000000 0000000000000000 0 0 0 [ 1] .interp PROGBITS 0000000000400238 00000238 000000000000001c 0000000000000000 A 0 0 1 .................................................. [28] .symtab SYMTAB 0000000000000000 00001060 0000000000000600 0000000000000018 29 47 8 [29] .strtab STRTAB 0000000000000000 00001660 00000000000001ca 0000000000000000 0 0 1 [30] .shstrtab STRTAB 0000000000000000 0000182a 000000000000010c 0000000000000000 0 0 1 ................. Dynamic section at offset 0xe28 contains 24 entries: Tag Type Name/Value 0x0000000000000001 (NEEDED) Shared library: [libc.so.6] 0x000000000000000c (INIT) 0x4003c8 0x000000000000000d (FINI) 0x4005b4 0x0000000000000019 (INIT_ARRAY) 0x600e10 .................. Symbol table '.symtab' contains 64 entries: Num: Value Size Type Bind Vis Ndx Name 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND 1: 0000000000400238 0 SECTION LOCAL DEFAULT 1 2: 0000000000400254 0 SECTION LOCAL DEFAULT 2 3: 0000000000400274 0 SECTION LOCAL DEFAULT 3 4: 0000000000400298 0 SECTION LOCAL DEFAULT 4 5: 00000000004002b8 0 SECTION LOCAL DEFAULT 5 6: 0000000000400318 0 SECTION LOCAL DEFAULT 6 7: 0000000000400356 0 SECTION LOCAL DEFAULT 7 ........................... 59: 0000000000400430 0 FUNC GLOBAL DEFAULT 14 _start 60: 000000000060102c 0 NOTYPE GLOBAL DEFAULT 26 __bss_start 61: 000000000040051d 21 FUNC GLOBAL DEFAULT 14 main 62: 0000000000601030 0 OBJECT GLOBAL HIDDEN 25 __TMC_END__ 63: 00000000004003c8 0 FUNC GLOBAL DEFAULT 11 _init ....................... Displaying notes found at file offset 0x00000274 with length 0x00000024: Owner Data size Description GNU 0x00000014 NT_GNU_BUILD_ID (unique build ID bitstring) Build ID: 32226bc83daf861fb356e6625d326eb888c3050f
也可以使用
objdump -d -M intel -S hello
命令来查看(base) [root@ecs0003 linux_pro]# objdump -d -M intel -S hello hello: file format elf64-x86-64 Disassembly of section .init: 00000000004003c8 <_init>: 4003c8: 48 83 ec 08 sub rsp,0x8 4003cc: 48 8b 05 25 0c 20 00 mov rax,QWORD PTR [rip+0x200c25] # 600ff8 <__gmon_start__> 4003d3: 48 85 c0 test rax,rax 4003d6: 74 05 je 4003dd <_init+0x15> 4003d8: e8 43 00 00 00 call 400420 <.plt.got> 4003dd: 48 83 c4 08 add rsp,0x8 4003e1: c3 ret Disassembly of section .plt: 00000000004003f0 <.plt>: 4003f0: ff 35 12 0c 20 00 push QWORD PTR [rip+0x200c12] # 601008 <_GLOBAL_OFFSET_TABLE_+0x8> 4003f6: ff 25 14 0c 20 00 jmp QWORD PTR [rip+0x200c14] # 601010 <_GLOBAL_OFFSET_TABLE_+0x10> 4003fc: 0f 1f 40 00 nop DWORD PTR [rax+0x0] 0000000000400400 <puts@plt>: 400400: ff 25 12 0c 20 00 jmp QWORD PTR [rip+0x200c12] # 601018 <puts@GLIBC_2.2.5> 400406: 68 00 00 00 00 push 0x0 40040b: e9 e0 ff ff ff jmp 4003f0 <.plt> 0000000000400410 <__libc_start_main@plt>: ........................................................................................................................ 400583: 4c 89 f6 mov rsi,r14 400586: 44 89 ff mov edi,r15d 400589: 41 ff 14 dc call QWORD PTR [r12+rbx*8] 40058d: 48 83 c3 01 add rbx,0x1 400591: 48 39 eb cmp rbx,rbp 400594: 75 ea jne 400580 <__libc_csu_init+0x40> 400596: 48 83 c4 08 add rsp,0x8 40059a: 5b pop rbx 40059b: 5d pop rbp 40059c: 41 5c pop r12 40059e: 41 5d pop r13 4005a0: 41 5e pop r14 4005a2: 41 5f pop r15 4005a4: c3 ret 4005a5: 90 nop 4005a6: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0] 4005ad: 00 00 00 00000000004005b0 <__libc_csu_fini>: 4005b0: f3 c3 repz ret Disassembly of section .fini: 00000000004005b4 <_fini>: 4005b4: 48 83 ec 08 sub rsp,0x8 4005b8: 48 83 c4 08 add rsp,0x8 4005bc: c3 ret
- 通过objdump,我们可以看到对应代码段编译出的汇编语言命令。
假设将要运行的程序的可执行文件结构如下:
与使用高级语言编写的源代码不同,在CPU上执行机器语言指令时,必须提供操作的内存地址,因此在代码段和数据段中必须包含内存映像的起始地址.
在将程序映射到内存之后,从入口点开始运行程序
Linux的可执行文件结构遵循名称为ELF(Executeable and Linkable Format 可执行与可连接格式)的格式.
上面已经使用过的readelf
命令,配合-h
参数就可以看到入口了
(base) [root@ecs0003 linux_pro]# readelf -h hello
ELF Header:
Magic: 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00
Class: ELF64
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: Advanced Micro Devices X86-64
Version: 0x1
# 此处就是入口地址
Entry point address: 0x400430
Start of program headers: 64 (bytes into file)
Start of section headers: 6456 (bytes into file)
Flags: 0x0
Size of this header: 64 (bytes)
Size of program headers: 56 (bytes)
Number of program headers: 9
Size of section headers: 64 (bytes)
Number of section headers: 31
Section header string table index: 30
参数-S
则提供了序头的信息
Section header string table index: 30
(base) [root@ecs0003 linux_pro]# readelf -S hello
There are 31 section headers, starting at offset 0x1938:
Section Headers:
[Nr] Name Type Address Offset
Size EntSize Flags Link Info Align
[ 0] NULL 0000000000000000 00000000
0000000000000000 0000000000000000 0 0 0
[ 1] .interp PROGBITS 0000000000400238 00000238
000000000000001c 0000000000000000 A 0 0 1
[ 2] .note.ABI-tag NOTE 0000000000400254 00000254
0000000000000020 0000000000000000 A 0 0 4
[ 3] .note.gnu.build-i NOTE 0000000000400274 00000274
0000000000000024 0000000000000000 A 0 0 4
[ 4] .gnu.hash GNU_HASH 0000000000400298 00000298
000000000000001c 0000000000000000 A 5 0 8
[ 5] .dynsym DYNSYM 00000000004002b8 000002b8
0000000000000060 0000000000000018 A 6 1 8
[ 6] .dynstr STRTAB 0000000000400318 00000318
..................................................................................
[28] .symtab SYMTAB 0000000000000000 00001060
0000000000000600 0000000000000018 29 47 8
[29] .strtab STRTAB 0000000000000000 00001660
00000000000001ca 0000000000000000 0 0 1
[30] .shstrtab STRTAB 0000000000000000 0000182a
000000000000010c 0000000000000000 0 0 1
Key to Flags:
W (write), A (alloc), X (execute), M (merge), S (strings), I (info),
L (link order), O (extra OS processing required), G (group), T (TLS),
C (compressed), x (unknown), o (OS specific), E (exclude),
l (large), p (processor specific)
- 输出的数据没两行为一组.
- 全部数值均为16进制数据
- 在每组的第一行第二个字段中,
.text
对应的是代码段信息,而.data
对应的则是数据段的信息 - 我们只需要关注每组的第一行第四个字段
内存映像的起始起始地址
.第一行第五个字段文件中的偏移量
.以及第二行第一个字段,大小
在程序运行期间,进程的内存映像信息,可以从/proc/{pid}/maps
这个文件中找到,比如
(base) [root@ecs0003 linux_pro]# /bin/sleep 10000 &
[1] 42182
(base) [root@ecs0003 linux_pro]# cat /proc/42182/maps
# 代码段
00400000-00406000 r-xp 00000000 fd:02 23461 /usr/bin/sleep
00606000-00607000 r--p 00006000 fd:02 23461 /usr/bin/sleep
# 数据段
00607000-00608000 rw-p 00007000 fd:02 23461 /usr/bin/sleep
007d5000-007f6000 rw-p 00000000 00:00 0 [heap]
7f8dcf17d000-7f8dd56a6000 r--p 00000000 fd:02 154897 /usr/lib/locale/locale-archive
7f8dd56a6000-7f8dd5869000 r-xp 00000000 fd:02 16784485 /usr/lib64/libc-2.17.so
7f8dd5869000-7f8dd5a68000 ---p 001c3000 fd:02 16784485 /usr/lib64/libc-2.17.so
7f8dd5a68000-7f8dd5a6c000 r--p 001c2000 fd:02 16784485 /usr/lib64/libc-2.17.so
7f8dd5a6c000-7f8dd5a6e000 rw-p 001c6000 fd:02 16784485 /usr/lib64/libc-2.17.so
7f8dd5a6e000-7f8dd5a73000 rw-p 00000000 00:00 0
7f8dd5a73000-7f8dd5a95000 r-xp 00000000 fd:02 16784478 /usr/lib64/ld-2.17.so
7f8dd5c84000-7f8dd5c87000 rw-p 00000000 00:00 0
7f8dd5c93000-7f8dd5c94000 rw-p 00000000 00:00 0
7f8dd5c94000-7f8dd5c95000 r--p 00021000 fd:02 16784478 /usr/lib64/ld-2.17.so
7f8dd5c95000-7f8dd5c96000 rw-p 00022000 fd:02 16784478 /usr/lib64/ld-2.17.so
7f8dd5c96000-7f8dd5c97000 rw-p 00000000 00:00 0
7ffe30beb000-7ffe30c0c000 rw-p 00000000 00:00 0 [stack]
7ffe30c0f000-7ffe30c11000 r-xp 00000000 00:00 0 [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]
如上,r-xp
代表代码段的映射地址,而rw-p
则代表数据段
在打算新建一个其他进程时,通常采用被称为fork and exec
的方式,即由父进程调用fork()创建子进程,再由子进程调用exec(),
下面以echo "hello world"
为例查看echo的调用
(base) [root@ecs0003 linux_pro]# strace echo "helloworld"
execve("/bin/echo", ["echo", "helloworld"], [/* 32 vars */]) = 0
brk(NULL) = 0x1f3e000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f9ca2326000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=45852, ...}) = 0
mmap(NULL, 45852, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f9ca231a000
close(3) = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P%\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2173512, ...}) = 0
mmap(NULL, 3981792, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f9ca1d39000
mprotect(0x7f9ca1efc000, 2093056, PROT_NONE) = 0
mmap(0x7f9ca20fb000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1c2000) = 0x7f9ca20fb000
mmap(0x7f9ca2101000, 16864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f9ca2101000
close(3) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f9ca2319000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f9ca2317000
arch_prctl(ARCH_SET_FS, 0x7f9ca2317740) = 0
mprotect(0x7f9ca20fb000, 16384, PROT_READ) = 0
mprotect(0x606000, 4096, PROT_READ) = 0
mprotect(0x7f9ca2327000, 4096, PROT_READ) = 0
munmap(0x7f9ca231a000, 45852) = 0
brk(NULL) = 0x1f3e000
brk(0x1f5f000) = 0x1f5f000
2.4 结束进程
进程的结束可以通过_exit()
函数(底层发起exit_group()
系统调用)来结束进程.
在进程结束后,所有分配给进程的内存将被回收.
一般很少直接调用_exit()函数,而是通过调用C标准库中的exit()函数来结束进程的运行.这种情况下,C标准库会在调用完自身的终止处理后调用_exit()
函数.在main()函数中恢复时也是同样方式.