MIT6.828 Lab3

Lec 7

一些虚拟内存实现的技巧,如延迟分配,写时复制等
https://pdos.csail.mit.edu/6.828/2018/lec/l-usingvm.pdf

Lab3 User Environments

PartA: User Environments and Exception Handling

Creating and Running Environments

创建并初始化envs,env_setup_vm函数为环境e建立一个专属的页目录表,由于在UTOP上的内核空间映射对每个环境都是相同的,所以可以直接拷贝kern_pgdir过来.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
void
env_init(void)
{
// Set up envs array
// LAB 3: Your code here.
struct Env* e;
for(e = envs;e<envs+NENV;++e)
{
e->env_id = 0;
e->env_status = ENV_FREE;
e->env_link = (e==envs+NENV-1)?0:e+1;
}
env_free_list = envs;
// Per-CPU part of the initialization
env_init_percpu();
}

static int
env_setup_vm(struct Env *e)
{
int i;
struct PageInfo *p = NULL;

// Allocate a page for the page directory
if (!(p = page_alloc(ALLOC_ZERO)))
return -E_NO_MEM;

// LAB 3: Your code here.
e->env_pgdir = page2kva(p);
memcpy(e->env_pgdir,kern_pgdir,PGSIZE);
p->pp_ref++;

// UVPT maps the env's own page table read-only.
// Permissions: kernel R, user R
e->env_pgdir[PDX(UVPT)] = PADDR(e->env_pgdir) | PTE_P | PTE_U;

return 0;
}

由于还没有文件系统,JOS采用将ELF文件直接链接到内核的方式来提供用户程序,load_icode函数按照binary文件中的ELF头和程序头,将binary映像加载到用户进程(以JOS的概念,进程应该改成环境)空间中,并建立用户页表的相应映射.

实现这一函数主要是熟悉ELF头和程序头各项的意义,其实在MBR加载内核映像的时候我们已经完成过相同的操作.注意页目录表的切换.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69

static void
load_icode(struct Env *e, uint8_t *binary)
{
// LAB 3: Your code here.
struct Proghdr *ph, *eph;
struct Elf *elfhdr = (struct Elf *)binary;

if(elfhdr->e_magic!=ELF_MAGIC)
panic("invalid ELF format,magic is not correct\n");

ph = (struct Proghdr *)((uint8_t*)elfhdr+elfhdr->e_phoff);
eph = ph+elfhdr->e_phnum;

//为使之后能直接使用memcpy,切换到用户页表.
lcr3(PADDR(e->env_pgdir));

for(;ph<eph;++ph)
{
if (ph->p_filesz > ph->p_memsz)
panic("file size is great than memmory size\n");
if(ph->p_type==ELF_PROG_LOAD)
{
region_alloc(e->env_pgdir,ph->p_va,ph->p_memsz);
memcpy(ph->p_va,binary+ph->p_offset,ph->p_filesz);
memset(ph->p_va+ph->p_filesz,0,ph->p_memsz-ph->p_filesz);
}
}

e->env_tf.tf_eip = elfhdr->e_entry;
// Now map one page for the program's initial stack
// at virtual address USTACKTOP - PGSIZE.
region_alloc(e->env_pgdir,USTACKTOP-PGSIZE,PGSIZE);

lcr3(PADDR(kern_pgdir));

}

//
// Allocate len bytes of physical memory for environment env,
// and map it at virtual address va in the environment's address space.
// Does not zero or otherwise initialize the mapped pages in any way.
// Pages should be writable by user and kernel.
// Panic if any allocation attempt fails.
//
static void
region_alloc(struct Env *e, void *va, size_t len)
{
// LAB 3: Your code here.
// (But only if you need it for load_icode.)
//
// Hint: It is easier to use region_alloc if the caller can pass
// 'va' and 'len' values that are not page-aligned.
// You should round va down, and round (va + len) up.
// (Watch out for corner-cases!)

struct PageInfo* pp;
void* end_va = ROUNDUP(va+len,PGSIZE);
va = ROUNDDOWN(va,PGSIZE);
for(;va<end_va;va += PGSIZE)
{

if((pp = page_alloc(ALLOC_ZERO))==0)
{
panic("region_alloc:out of memory");
}
page_insert(e->env_pgdir,pp,va,PTE_U|PTE_W|PTE_P);
}
}

之后便可以创建用户环境并运行.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
void
env_create(uint8_t *binary, enum EnvType type)
{
// LAB 3: Your code here.
struct Env* e;
int ret = env_alloc(&e,0);
if(ret<0)
panic("env_alloc:%e",ret);
e->env_type = type;
load_icode(e,binary);
}

void
env_run(struct Env *e)
{
// LAB 3: Your code here.
if(curenv != NULL)
{
if(curenv->env_status==ENV_RUNNING)
curenv->env_status=ENV_RUNNABLE;
}
curenv = e;
e->env_status=ENV_RUNNING;
e->env_runs++;
lcr3(PADDR(e->env_pgdir));

env_pop_tf(&e->env_tf);
}



env相关流程分析
创建与初始化

i386_init函数在mem_init函数中完成envs数组的内存分配并在kern_pgdir中完成映射.
调用env_init,初始化envs数组中的各env,并链入env_free_list等待分配,load新的带有用户段的GDT,设置LDT.

运行准备与运行

env_create函数创建一个针对特定二进制(ELF)文件的运行环境.先调用env_alloc函数分配一个env结构,设置env环境(包括各段选择子,这是切换到用户级权限的关键之一),并为env建立一个单独的页表,映射内核及该页表本身.
调用load_icode函数按ELF头和文件头记录的信息为ELF格式的可执行文件创建内存映像,设置Trapframe->tf_eip为该程序的入口点,为环境映射一个初始栈.
env_run函数设置相关运行信息,调用env_pop_tf函数用env->env_tf设置程序状态,开始以用户级权限运行程序.

Handling Interrupts and Exceptions

trapentry.S中以宏的方式形式定义了各中断处理例程的入口,以及所有traps共用的例程alltraps.
该函数按照Trapframe的结构为trap函数压栈准备参数,设置ds和es寄存器.然后调用trap函数且不再返回.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
/* TRAPHANDLER defines a globally-visible function for handling a trap.
* It pushes a trap number onto the stack, then jumps to _alltraps.
* Use TRAPHANDLER for traps where the CPU automatically pushes an error code.
*
* You shouldn't call a TRAPHANDLER function from C, but you may
* need to _declare_ one in C (for instance, to get a function pointer
* during IDT setup). You can declare the function with
* void NAME();
* where NAME is the argument passed to TRAPHANDLER.
*/
#define TRAPHANDLER(name, num) \
.globl name; /* define global symbol for 'name' */ \
.type name, @function; /* symbol type is function */ \
.align 2; /* align function definition */ \
name: /* function starts here */ \
pushl $(num); \
jmp _alltraps

/* Use TRAPHANDLER_NOEC for traps where the CPU doesn't push an error code.
* It pushes a 0 in place of the error code, so the trap frame has the same
* format in either case.
*/
#define TRAPHANDLER_NOEC(name, num) \
.globl name; \
.type name, @function; \
.align 2; \
name: \
pushl $0; \
pushl $(num); \
jmp _alltraps

.text

TRAPHANDLER_NOEC(divide_handler, T_DIVIDE);
//......
//其他中断定义方式相同,这里省略了

/*
* Lab 3: Your code here for _alltraps
*/
.global _alltraps
_alltraps:
pushl %ds;
pushl %es;
pushal;

movw $GD_KD,%ax;
movw %ds,%ax;
movw %es,%ax;

pushl %esp;
call trap;

下面是Trapframe结构的定义,由注释分为三个部分,
最下面的一部分仅在发生特权级切换的时候才压入.
下图中的Trapframe部分在发生异常时由处理器硬件压入.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
struct PushRegs {
/* registers as pushed by pusha */
uint32_t reg_edi;
uint32_t reg_esi;
uint32_t reg_ebp;
uint32_t reg_oesp; /* Useless */
uint32_t reg_ebx;
uint32_t reg_edx;
uint32_t reg_ecx;
uint32_t reg_eax;
} __attribute__((packed));

struct Trapframe {
struct PushRegs tf_regs;
uint16_t tf_es;
uint16_t tf_padding1;
uint16_t tf_ds;
uint16_t tf_padding2;
uint32_t tf_trapno;
/* below here defined by x86 hardware */
uint32_t tf_err;
uintptr_t tf_eip;
uint16_t tf_cs;
uint16_t tf_padding3;
uint32_t tf_eflags;
/* below here only when crossing rings, such as from user to kernel */
uintptr_t tf_esp;
uint16_t tf_ss;
uint16_t tf_padding4;
} __attribute__((packed));
trap流程分析

i386_init中调用trap_init完成trap的初始化:将各中断处理例程(在本实现中是中断处理例程的入口点)挂接到IDT中,设置TSS段(存储了内核栈的地址信息)并挂接到GDT中,设置IDT和TSS.

当中断或异常发生,处理器根据中断向量号在IDT中寻找对应的中断处理例程,在进行特权级检查(中断处理例程DPL≤CPL≤中断门描述符DPL)后,若发生特权级转换,CPU从当前TSS段中取出内核栈的地址信息(ss,esp)并加载到ss,esp寄存器中(加载前临时保存原值).在内核栈中压入原栈地址,再压入CS和eip,跳转到中断处理例程.接下来由操作系统(而不是处理器)压入错误码(是否压入因异常而异)及其他寄存器信息,形成Trapframe结构.跳转到trap函数.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
void
trap(struct Trapframe *tf)
{
// The environment may have set DF and some versions
// of GCC rely on DF being clear
asm volatile("cld" ::: "cc");

// Check that interrupts are disabled. If this assertion
// fails, DO NOT be tempted to fix it by inserting a "cli" in
// the interrupt path.
assert(!(read_eflags() & FL_IF));

cprintf("Incoming TRAP frame at %p\n", tf);

if ((tf->tf_cs & 3) == 3) {
// Trapped from user mode.
assert(curenv);

// Copy trap frame (which is currently on the stack)
// into 'curenv->env_tf', so that running the environment
// will restart at the trap point.
curenv->env_tf = *tf;
// The trapframe on the stack should be ignored from here on.
tf = &curenv->env_tf;
}

// Record that tf is the last real trapframe so
// print_trapframe can print some additional information.
last_tf = tf;

// Dispatch based on what type of trap occurred
trap_dispatch(tf);

// Return to the current environment, which should be running.
assert(curenv && curenv->env_status == ENV_RUNNING);
env_run(curenv);
}

trap函数进一步根据中断向量号分发(trap_dispatch)到各真正的中断处理例程(本实现中IDT中保存的只是一个中断处理例程的entry).
完成异常处理后,调用env_run函数恢复原环境(进程).

Part B: Page Faults, Breakpoints Exceptions, and System Calls

The Breakpoint Exception

调试器执行的原理:临时替换断点处的1字节指令为int 3系统调用触发The Breakpoint Exception.在这里的实现中会调用内核监视器monitor,添加两个命令nextstep和continue,前者会修改eflags的FL_TF位使处理器开始单步执行,每次执行完成发出一个中断号为1的debug exception,在本实现中该异常处理例程同样会启动monitor.

The breakpoint exception, interrupt vector 3 (T_BRKPT), is normally used to allow debuggers to insert breakpoints in a program’s code by temporarily replacing the relevant program instruction with the special 1-byte int3 software interrupt instruction.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
int
mon_nextstep(int argc, char **argv, struct Trapframe *tf)
{
if(!tf)
{
panic("empty Trapframe");
}

cprintf("$rip: %p\n",tf->tf_eip);
switch(tf->tf_trapno)
{
case T_BRKPT:
tf->tf_eflags |= FL_TF;
return -1;
case T_DEBUG:
if (tf->tf_eflags & FL_TF)
return -1;
default:
cprintf("nextstep(ni) can only called via int 3(breakpoint exception)\n");
}
return 0;
}

int
mon_continue(int argc, char **argv, struct Trapframe *tf)
{
if(!tf)
{
panic("empty Trapframe");
}

if(tf->tf_trapno==T_DEBUG||tf->tf_trapno==T_BRKPT)
{
if (tf->tf_eflags & FL_TF)
{
tf->tf_eflags &= ~FL_TF;
return -1;
}
}

cprintf("continue can only called via breakpoint or debug exception!\n");
return 0;
}

System calls 系统调用

注意权限位的检验采用(perm&(*pte))==perm的形式,想象一下交集便于理解这一操作.(一些类型转换使程序看起来很乱,ye..i know…)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
int
user_mem_check(struct Env *env, const void *va, size_t len, int perm)
{
// LAB 3: Your code here.
uintptr_t start_va = ROUNDDOWN((uintptr_t)va,PGSIZE);
uintptr_t end_va = ROUNDUP((uintptr_t)(va+len),PGSIZE);
pte_t* pte;
for(;start_va<end_va;start_va+=PGSIZE)
{
pte = pgdir_walk(env->env_pgdir,(void*)start_va,false);
if(start_va>=ULIM||pte==NULL||!(*pte&PTE_P)||((perm&(*pte))!=perm))
{
user_mem_check_addr = start_va>(uintptr_t)va?start_va:(uintptr_t)va;
return -E_FAULT;
}
}
return 0;
}
syscall流程分析

用户进程调用sys_xxx函数(其实用户并不直接调用这样的函数,而是由更上一层的函数如cprintf之类的调用.这里的用户指的是使用JOS而不是开发JOS的开发者hh),向操作系统申请xxx的操作,sys_xxx函数调用syscall函数,syscall函数使用int 0x30触发中断.
(上述sys_xxx,syscall函数均为lib/syscall.c中定义的由用户进程使用的函数,而非kern/syscall.c中的内核使用的实现)

中断处理过程参考上面的trap流程分析.
最终被分发到内核的syscall函数,该函数通过调用相应的内核函数完成功能,并设置系统调用返回值,之后正常从中断返回到用户进程.

  • 版权声明: 本博客所有文章除特别声明外,著作权归作者所有。转载请注明出处!
  • Copyrights © 2022-2024 翰青HanQi

请我喝杯咖啡吧~

支付宝
微信