AFL forkserver源码阅读

前阵子出于好奇读了部分源码,由于工作原因还没看完,后续有时间把后面的补上

AFL forkserver部分源码阅读

afl-gcc.c(替换as)

主要分为两部分

find_as和edit_params

find_as负责找到afl-as插桩,从三个地方触发,第一个是直接从环境变量中读取u8 *afl_path = getenv("AFL_PATH");

第二个是程序运行时目录寻找dir = ck_strdup(argv0);

如上两种情况都找不到就会从编译时的AFL_PATH中寻找access(AFL_PATH "/as", X_OK)

edit_params则是负责解析命令行参数并进行修改和替换

第一步是确定编译器,目录中存在afl-clang, afl-g++等文件,其实都是个软链接指向了afl-gcc,afl-gcc会根据命令行文件判断使用哪个编译器,如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
name = strrchr(argv[0], '/');
if (!name) name = argv[0]; else name++;

if (!strncmp(name, "afl-clang", 9)) {

clang_mode = 1;

setenv(CLANG_ENV_VAR, "1", 1);

if (!strcmp(name, "afl-clang++")) {
u8* alt_cxx = getenv("AFL_CXX");
cc_params[0] = alt_cxx ? alt_cxx : (u8*)"clang++";
} else {
u8* alt_cc = getenv("AFL_CC");
cc_params[0] = alt_cc ? alt_cc : (u8*)"clang";
}
...
...

确定完编译器后会进行后续参数的解析和替换,这里会忽略一些参数如-B,这个是默认带的,afl-gcc会使用-B指定afl-as为汇编器对代码进行汇编和插桩,用于统计代码覆盖率。这是整个函数中最关键的一步,其余的基本就是对参数的解析等

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
while (--argc) {
u8* cur = *(++argv);

if (!strncmp(cur, "-B", 2)) {

if (!be_quiet) WARNF("-B is already set, overriding");

if (!cur[2] && argc > 1) { argc--; argv++; }
continue;

}

if (!strcmp(cur, "-integrated-as")) continue;

if (!strcmp(cur, "-pipe")) continue;

#if defined(__FreeBSD__) && defined(__x86_64__)
if (!strcmp(cur, "-m32")) m32_set = 1;
#endif

if (!strcmp(cur, "-fsanitize=address") ||
!strcmp(cur, "-fsanitize=memory")) asan_set = 1;

if (strstr(cur, "FORTIFY_SOURCE")) fortify_set = 1;

cc_params[cc_par_cnt++] = cur;

}

cc_params[cc_par_cnt++] = "-B";
cc_params[cc_par_cnt++] = as_path;
...
...

afl-as.c(代码插桩)

首先也是edit_params,在afl-gcc中,程序指定了afl-as作为连接器,edit_params用于对afl-gcc调用传入的参数进行解析和修改,后续会调用add_instrumentation来对汇编文件进行插桩。这里比较重要的是设置了修改后的.s文件保存路径

modified_file = alloc_printf("%s/.afl-%u-%u.s", tmp_dir, getpid(),(u32)time(NULL));

最初始的inputfile由afl-gcc传入。参数解析完后会对原始的汇编文件进行插桩,在每个分支都会有如下插桩代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
static const u8* trampoline_fmt_64 =

"\n"
"/* --- AFL TRAMPOLINE (64-BIT) --- */\n"
"\n"
".align 4\n"
"\n"
"leaq -(128+24)(%%rsp), %%rsp\n"
"movq %%rdx, 0(%%rsp)\n"
"movq %%rcx, 8(%%rsp)\n"
"movq %%rax, 16(%%rsp)\n"
"movq $0x%08x, %%rcx\n"
"call __afl_maybe_log\n"
"movq 16(%%rsp), %%rax\n"
"movq 8(%%rsp), %%rcx\n"
"movq 0(%%rsp), %%rdx\n"
"leaq (128+24)(%%rsp), %%rsp\n"
"\n"
"/* --- END --- */\n"
"\n";

64位程序如上

其中主要实现位于__afl_maybe_log,代码类似如下

1
2
3
cur_location = <COMPILE_TIME_RANDOM>;
shared_mem[cur_location ^ prev_location]++;
prev_location = cur_location >> 1;

设计时存在一个右移操作是想维持覆盖率的方向性

cur_location为编译时的随机数,在afl-as.c中体现如下

1
2
3
gettimeofday(&tv, &tz);
rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
srandom(rand_seed);

上述进行了随机数的初始化

每遇到一个分支就会进行代码插入,并把随机数填充

1
#  define R(x) (random() % (x))

add_instrumentation函数中最主要的插桩代码如下

1
2
3
4
5
6
if (line[0] == '\t') {
if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) {
fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
R(MAP_SIZE));
ins_lines++;
}

判断力当前程序架构并插入对应的汇编指令,填充对应的cur_location值。

所有的汇编解析完后,在最后会插入很长的main_payload。里面就包括了上述统计覆盖率的afl_maybe_log 函数和初始化内存的afl_setup函数

afl-as.h(fork server,覆盖率统计实现)

主要讲解main_payload_64

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
  "  seto  %al\n"
"\n"
" /* Check if SHM region is already mapped. */\n"
"\n"
" movq __afl_area_ptr(%rip), %rdx\n"
" testq %rdx, %rdx\n"
" je __afl_setup\n"
"\n"
"__afl_store:\n"
"\n"
" /* Calculate and store hit for the code location specified in rcx. */\n"
"\n"
#ifndef COVERAGE_ONLY
" xorq __afl_prev_loc(%rip), %rcx\n"
" xorq %rcx, __afl_prev_loc(%rip)\n"
" shrq $1, __afl_prev_loc(%rip)\n"
#endif /* ^!COVERAGE_ONLY */
"\n"
#ifdef SKIP_COUNTS
" orb $1, (%rdx, %rcx, 1)\n"
#else
" incb (%rdx, %rcx, 1)\n"
#endif /* ^SKIP_COUNTS */
"\n"
"__afl_return:\n"
"\n"
" addb $127, %al\n"
#if defined(__OpenBSD__) || (defined(__FreeBSD__) && (__FreeBSD__ < 9))
" .byte 0x9e /* sahf */\n"
#else
" sahf\n"
#endif /* ^__OpenBSD__, etc */
" ret\n"

首先判断共享内存区域是否被映射,如果没有被映射则进入__afl_setup函数,否则进入store函数,这里负责使得对应loc的map值加一统计覆盖率。

下面看setup函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
  "__afl_setup:\n"
"\n"
" /* Do not retry setup if we had previous failures. */\n"
"\n"
" cmpb $0, __afl_setup_failure(%rip)\n"
" jne __afl_return\n"
"\n"
" /* Check out if we have a global pointer on file. */\n"
"\n"
#ifndef __APPLE__
" movq __afl_global_area_ptr@GOTPCREL(%rip), %rdx\n"
" movq (%rdx), %rdx\n"
#else
" movq __afl_global_area_ptr(%rip), %rdx\n"
#endif /* !^__APPLE__ */
" testq %rdx, %rdx\n"
" je __afl_setup_first\n"
"\n"
" movq %rdx, __afl_area_ptr(%rip)\n"
" jmp __afl_store\n"

判断之前是否初始化失败,失败直接return,接着判断全局变量值是否为空,非空则直接进行赋值然后跳转至afl_store函数,如果都不满足则进入setup_first函数如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
  "__afl_setup_first:\n"
"\n"
" /* Save everything that is not yet saved and that may be touched by\n"
" getenv() and several other libcalls we'll be relying on. */\n"
"\n"
" leaq -352(%rsp), %rsp\n"
"\n"
" movq %rax, 0(%rsp)\n"
" movq %rcx, 8(%rsp)\n"
" movq %rdi, 16(%rsp)\n"
" movq %rsi, 32(%rsp)\n"
" movq %r8, 40(%rsp)\n"
" movq %r9, 48(%rsp)\n"
" movq %r10, 56(%rsp)\n"
" movq %r11, 64(%rsp)\n"
"\n"
" movq %xmm0, 96(%rsp)\n"
" movq %xmm1, 112(%rsp)\n"
" movq %xmm2, 128(%rsp)\n"
" movq %xmm3, 144(%rsp)\n"
" movq %xmm4, 160(%rsp)\n"
" movq %xmm5, 176(%rsp)\n"
" movq %xmm6, 192(%rsp)\n"
" movq %xmm7, 208(%rsp)\n"
" movq %xmm8, 224(%rsp)\n"
" movq %xmm9, 240(%rsp)\n"
" movq %xmm10, 256(%rsp)\n"
" movq %xmm11, 272(%rsp)\n"
" movq %xmm12, 288(%rsp)\n"
" movq %xmm13, 304(%rsp)\n"
" movq %xmm14, 320(%rsp)\n"
" movq %xmm15, 336(%rsp)\n"
"\n"
" /* Map SHM, jumping to __afl_setup_abort if something goes wrong. */\n"
"\n"
" /* The 64-bit ABI requires 16-byte stack alignment. We'll keep the\n"
" original stack ptr in the callee-saved r12. */\n"
"\n"
" pushq %r12\n"
" movq %rsp, %r12\n"
" subq $16, %rsp\n"
" andq $0xfffffffffffffff0, %rsp\n"
"\n"
" leaq .AFL_SHM_ENV(%rip), %rdi\n"
CALL_L64("getenv")
"\n"
" testq %rax, %rax\n"
" je __afl_setup_abort\n"
"\n"
" movq %rax, %rdi\n"
CALL_L64("atoi")
"\n"
" xorq %rdx, %rdx /* shmat flags */\n"
" xorq %rsi, %rsi /* requested addr */\n"
" movq %rax, %rdi /* SHM ID */\n"
CALL_L64("shmat")
"\n"
" cmpq $-1, %rax\n"
" je __afl_setup_abort\n"
"\n"
" /* Store the address of the SHM region. */\n"
"\n"
" movq %rax, %rdx\n"
" movq %rax, __afl_area_ptr(%rip)\n"
"\n"
#ifdef __APPLE__
" movq %rax, __afl_global_area_ptr(%rip)\n"
#else
" movq __afl_global_area_ptr@GOTPCREL(%rip), %rdx\n"
" movq %rax, (%rdx)\n"
#endif /* ^__APPLE__ */
" movq %rax, %rdx\n"

首先保存环境,push一堆寄存器,随后对rsp进行了16字节对齐操作,作者注释中也写到了,andq $0xfffffffffffffff0, %rsp\n保证了rsp为16字节对齐,开头的sub rsp也是为了防止覆盖先前压入的寄存器,最原始的rsp被保存在了r12寄存器中。后续就是一些初始化,首先获取环境变量AFL_SHM_ENV然后转为整数后传入shmat将内存映射至当前进程的内存空间然后为afl_area_ptr和afl_global_area_ptr进行赋值。如果中间有失败就会进入afl_setup_abort函数中,可以看到代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
"__afl_setup_abort:\n"
"\n"
" /* Record setup failure so that we don't keep calling\n"
" shmget() / shmat() over and over again. */\n"
"\n"
" incb __afl_setup_failure(%rip)\n"
"\n"
" movq %r12, %rsp\n"
" popq %r12\n"
"\n"
" movq 0(%rsp), %rax\n"
" movq 8(%rsp), %rcx\n"
" movq 16(%rsp), %rdi\n"
" movq 32(%rsp), %rsi\n"
" movq 40(%rsp), %r8\n"
" movq 48(%rsp), %r9\n"
" movq 56(%rsp), %r10\n"
" movq 64(%rsp), %r11\n"
"\n"
" movq 96(%rsp), %xmm0\n"
" movq 112(%rsp), %xmm1\n"
" movq 128(%rsp), %xmm2\n"
" movq 144(%rsp), %xmm3\n"
" movq 160(%rsp), %xmm4\n"
" movq 176(%rsp), %xmm5\n"
" movq 192(%rsp), %xmm6\n"
" movq 208(%rsp), %xmm7\n"
" movq 224(%rsp), %xmm8\n"
" movq 240(%rsp), %xmm9\n"
" movq 256(%rsp), %xmm10\n"
" movq 272(%rsp), %xmm11\n"
" movq 288(%rsp), %xmm12\n"
" movq 304(%rsp), %xmm13\n"
" movq 320(%rsp), %xmm14\n"
" movq 336(%rsp), %xmm15\n"
"\n"
" leaq 352(%rsp), %rsp\n"
"\n"
" jmp __afl_return\n"

对afl_setup_failure值加1然后恢复现场,这也保证了编译出来的程序可以正常运行而不是必须以fuzzer的形式。

最后是forkserver部分如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
"__afl_forkserver:\n"
"\n"
" /* Enter the fork server mode to avoid the overhead of execve() calls. We\n"
" push rdx (area ptr) twice to keep stack alignment neat. */\n"
"\n"
" pushq %rdx\n"
" pushq %rdx\n"
"\n"
" /* Phone home and tell the parent that we're OK. (Note that signals with\n"
" no SA_RESTART will mess it up). If this fails, assume that the fd is\n"
" closed because we were execve()d from an instrumented binary, or because\n"
" the parent doesn't want to use the fork server. */\n"
"\n"
" movq $4, %rdx /* length */\n"
" leaq __afl_temp(%rip), %rsi /* data */\n"
" movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi /* file desc */\n"
CALL_L64("write")
"\n"
" cmpq $4, %rax\n"
" jne __afl_fork_resume\n"
"\n"
"__afl_fork_wait_loop:\n"
"\n"
" /* Wait for parent by reading from the pipe. Abort if read fails. */\n"
"\n"
" movq $4, %rdx /* length */\n"
" leaq __afl_temp(%rip), %rsi /* data */\n"
" movq $" STRINGIFY(FORKSRV_FD) ", %rdi /* file desc */\n"
CALL_L64("read")
" cmpq $4, %rax\n"
" jne __afl_die\n"
"\n"
" /* Once woken up, create a clone of our process. This is an excellent use\n"
" case for syscall(__NR_clone, 0, CLONE_PARENT), but glibc boneheadedly\n"
" caches getpid() results and offers no way to update the value, breaking\n"
" abort(), raise(), and a bunch of other things :-( */\n"
"\n"
CALL_L64("fork")
" cmpq $0, %rax\n"
" jl __afl_die\n"
" je __afl_fork_resume\n"
"\n"
" /* In parent process: write PID to pipe, then wait for child. */\n"
"\n"
" movl %eax, __afl_fork_pid(%rip)\n"
"\n"
" movq $4, %rdx /* length */\n"
" leaq __afl_fork_pid(%rip), %rsi /* data */\n"
" movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi /* file desc */\n"
CALL_L64("write")
"\n"
" movq $0, %rdx /* no flags */\n"
" leaq __afl_temp(%rip), %rsi /* status */\n"
" movq __afl_fork_pid(%rip), %rdi /* PID */\n"
CALL_L64("waitpid")
" cmpq $0, %rax\n"
" jle __afl_die\n"
"\n"
" /* Relay wait status to pipe, then loop back. */\n"
"\n"
" movq $4, %rdx /* length */\n"
" leaq __afl_temp(%rip), %rsi /* data */\n"
" movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi /* file desc */\n"
CALL_L64("write")
"\n"
" jmp __afl_fork_wait_loop\n"
"\n"
"__afl_fork_resume:\n"
"\n"
" /* In child process: close fds, resume execution. */\n"
"\n"
" movq $" STRINGIFY(FORKSRV_FD) ", %rdi\n"
CALL_L64("close")
"\n"
" movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi\n"
CALL_L64("close")
"\n"
" popq %rdx\n"
" popq %rdx\n"
"\n"
" movq %r12, %rsp\n"
" popq %r12\n"
"\n"
" movq 0(%rsp), %rax\n"
" movq 8(%rsp), %rcx\n"
" movq 16(%rsp), %rdi\n"
" movq 32(%rsp), %rsi\n"
" movq 40(%rsp), %r8\n"
" movq 48(%rsp), %r9\n"
" movq 56(%rsp), %r10\n"
" movq 64(%rsp), %r11\n"
"\n"
" movq 96(%rsp), %xmm0\n"
" movq 112(%rsp), %xmm1\n"
" movq 128(%rsp), %xmm2\n"
" movq 144(%rsp), %xmm3\n"
" movq 160(%rsp), %xmm4\n"
" movq 176(%rsp), %xmm5\n"
" movq 192(%rsp), %xmm6\n"
" movq 208(%rsp), %xmm7\n"
" movq 224(%rsp), %xmm8\n"
" movq 240(%rsp), %xmm9\n"
" movq 256(%rsp), %xmm10\n"
" movq 272(%rsp), %xmm11\n"
" movq 288(%rsp), %xmm12\n"
" movq 304(%rsp), %xmm13\n"
" movq 320(%rsp), %xmm14\n"
" movq 336(%rsp), %xmm15\n"
"\n"
" leaq 352(%rsp), %rsp\n"
"\n"
" jmp __afl_store\n"
"\n"
"__afl_die:\n"
"\n"
" xorq %rax, %rax\n"
CALL_L64("_exit")
"\n"

首先压栈两次保证16字节对齐,接着向指定的管道FORKSRV_FD + 1写入四字节magicnumber表示子进程已准备就绪,接着比较返回值是否为4判断是否写入成功,失败则进入afl_fork_resume函数,否则进入afl_fork_wait_loop,通过调用read函数来从管道接收消息判断父进程是否返回响应,如果读取失败或者读取字节不足4则进入afl_die函数。如果成功响应则开始fork。如果fork失败则会进入afl_die,如果为0则代表为子进程进入fork_resume,成功的话会保存子进程pid并写入给父进程,接着调用watpid等待子进程结束。关于afl_resume可以看到是关闭了两个管道并恢复了现场环境然后进入覆盖率统计函数,afl_die则是直接exit。

至此可以大致看到整体的afl框架

1
2
3
4
5
6
7
8
父进程(fuzzer)
负责管理整个afl fuzzer的初始进程,主要负责生成和修改输入的数据并监控子进程(fork server)的状态。他不会执行测试用例而是通过和fork server进行通信,让fork server来启动新的测试用例

子进程(fork server)
即mainpayload代码。主要功能是负责和父进程通信接收信号,每接收到一次信号就通过fork创建一个新的测试用例然后通过waitpid等待测试用例执行并判断其状态,收集结果后继续等待来自父进程的下一个信号。这种设计避免了反复调用execve从而提升了效率

子子进程(测试用例)
fork server创建的进程,每个进程都独立执行一次,完成测试后将执行结果返回给fork server然后退出。

这也可以看出来afl比libfuzzer的一个优势,测试用例程序的崩溃并不会影响父进程,都是隔离的,而libfuzzer一旦崩溃就直接退出。


AFL forkserver源码阅读
http://www.psbazx.com/2024/03/22/AFL-forkserver源码阅读/
Beitragsautor
皮三宝
Veröffentlicht am
March 22, 2024
Urheberrechtshinweis