Linux kernel Lab学习笔记

只是做lab时的记录, 笔者并不认为文中的代码部分有什么阅读的价值

内核模块

概述

执行上下文

我们可以根据内核执行的情况,把上下文分为两种:进程上下文和中断上下文。如果我们是因为系统调用而在内核中运行代码,或者是在内核线程中运行,那么我们就在进程上下文中。如果我们是在响应中断或执行延迟操作的函数时运行,那么我们就在中断上下文中。

锁定

首先对于线程上下文,获取了自旋锁,使用普通的spin_lock是没有禁止本地中断的,完全可能出现,获取锁后,发生中断,中断上下文又访问临界区,这样肯定不行。所以在线程上下文中一定会使用 spin_lock_irqsave来保护临界代码区域。
而中断上下文中,访问临界代码区域,就复杂些。首先需要对中断有简单了解,如果是在中断处理程序的上半部分中访问临界代码区域,考虑到linux中断处理上半部分的中断屏蔽机制,会暂时屏蔽同优先级和低优先级的中断, 所以除非有更高优先级的中断中有访问临界代码区域(外部中断其实都是一个优先级),通常使用spin_lock保护即可。这个保护是一定需要的,有可能线程上下文中先获取了该锁,那么此时中断上下文就要先等一等才能获取到该锁(同步), 再进入临界代码区域。在多核系统中,线程上下文和中断上下文可能在两个CPU核心上执行。线程上下文屏蔽本地的中断,是完全有可能在其他cpu核心上产生中断并执行中断处理程序的,这样就会出现并行的情况,两边的锁都是必须要加的。
中断上下文还有一种情况,就是只在中断的下半部分访问临界代码,这时候,其实需要屏蔽本地CPU外部中断的,仅禁用软中断即可,也就是一个更细粒度的自旋锁。使用spin_lock_bh来保护线程上下文中的临界代码区域即可, 可以优化系统性能,更合适,当然,使用上面的spin_lock_irqsave来保护也肯定没有问题的,只是有点“过保护”了,无法响应外部中断,性能会有一点损失。

可抢占性

Linux 使用的是可抢占内核。这里,我们需要明确区分可抢占多任务(preemptive multitasking)和可抢占内核两个概念。可抢占多任务是指,当一个进程在用户空间运行时,一旦其分配的时间片(时间片段)到期,操作系统会强制中断该进程,转而运行另一个进程。而如果一个在内核模式下运行的进程(通常是作为系统调用的结果),可以被中断以便运行另一个进程,那么我们就说这个内核具有可抢占性。

练习

list_proc

遍历进程的task采用for_each_process.

1
2
#define for_each_process(p) \
for (p = &init_task ; (p = next_task(p)) != &init_task ; )
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
/* TODO: add missing headers */
#include <linux/sched.h>
#include <linux/sched/signal.h>

MODULE_DESCRIPTION("List current processes");
MODULE_AUTHOR("Kernel Hacker");
MODULE_LICENSE("GPL");

static int my_proc_init(void)
{
struct task_struct *p;

/* TODO: print current process pid and its name */
p = current;
pr_info("current: %s -- %d\n",p->comm,p->pid);

/* TODO: print the pid and name of all processes */
pr_info("others: \n");

struct task_struct *cur;

for_each_process(cur)
{
pr_info("%s -- %d\n",cur->comm,cur->pid);
}

//这种遍历方式会跳过p, 因为把p当作头结点了.
// list_for_each_entry(cur,&p->tasks,tasks)
// {
// pr_info("%s -- %d\n",cur->comm,cur->pid);
// }

return 0;
}

static void my_proc_exit(void)
{
/* TODO: print current process pid and name */
struct task_struct *p;
p = current;
pr_info("current: %s -- %d\n",p->comm,p->pid);
}

module_init(my_proc_init);
module_exit(my_proc_exit);

Memory Info

顺着VMA链表遍历就行, 注意VMA链表不是环状的.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
static int my_hello_init(void)
{
struct task_struct* p = current;
struct vm_area_struct* cur = p->mm->mmap;


while(cur)
{
if(cur->vm_file)
printk(KERN_INFO "0x%lx -- 0x%lx -- [%s]\n",cur->vm_start,cur->vm_end,cur->vm_file->f_path.dentry->d_name.name);
else
printk(KERN_INFO "0x%lx -- 0x%lx -- [Anonymous]\n",cur->vm_start,cur->vm_end);

cur = cur->vm_next;
}

// if(cur)
// {
// do{
// printk(KERN_INFO "%lx -- %lx -- ",cur->vm_start,cur->vm_end);
// if(cur->vm_file)
// pr_info("[%s]",cur->vm_file->f_path.dentry->d_name.name);
// else
// pr_info("[Anonymous]");

// cur = cur->vm_next;
// }while(cur != p->mm->mmap);
// }

return 0;
}

内核api

练习

memory

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
static struct task_info *task_info_alloc(int pid)
{
struct task_info *ti;

/* TODO 1: allocated and initialize a task_info struct */
ti = kmalloc(sizeof(*ti),GFP_KERNEL);
if(ti == NULL)
return NULL;
ti->pid = pid;
ti->timestamp = jiffies;
return ti;
}

static int memory_init(void)
{
/* TODO 2: call task_info_alloc for current pid */
ti1 = task_info_alloc(current->pid);
/* TODO 2: call task_info_alloc for parent PID */
ti2 = task_info_alloc(current->parent->pid);
/* TODO 2: call task_info alloc for next process PID */
ti3 = task_info_alloc(next_task(current)->pid);
/* TODO 2: call task_info_alloc for next process of the next process */
ti4 = task_info_alloc(next_task(next_task(current))->pid);
return 0;
}

static void memory_exit(void)
{

/* TODO 3: print ti* field values */
printk("[task_info] Current:\n\tPID:%d\n\ttimestamp:%lu\n\n", ti1->pid, ti1->timestamp);
printk("[task_info] Parent:\n\tPID:%d\n\ttimestamp:%lu\n\n", ti2->pid, ti2->timestamp);
printk("[task_info] Next:\n\tPID:%d\n\ttimestamp:%lu\n\n", ti3->pid, ti3->timestamp);
printk("[task_info] Next(Next):\n\tPID:%d\n\ttimestamp:%lu\n", ti4->pid, ti4->timestamp);
/* TODO 4: free ti* structures */
if(ti1)
kfree(ti1);
if(ti2)
kfree(ti2);
if(ti3)
kfree(ti3);
if(ti4)
kfree(ti4);
}

list_full

有两点:

  1. 遍历过程中若需要改变链表, 使用list_for_each_safe.
  2. 对于侵入式链表来说, 必须先list_del再kfree.
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    /*
    * Kernel API lab
    *
    * list-full.c: Working with lists (advanced)
    */

    #include <linux/module.h>
    #include <linux/init.h>
    #include <linux/kernel.h>
    #include <linux/slab.h>
    #include <linux/list.h>
    #include <linux/sched/signal.h>

    MODULE_DESCRIPTION("Full list processing");
    MODULE_AUTHOR("SO2");
    MODULE_LICENSE("GPL");

    struct task_info {
    pid_t pid;
    unsigned long timestamp;
    atomic_t count;
    struct list_head list;
    };

    static struct list_head head;

    static struct task_info *task_info_alloc(int pid)
    {
    struct task_info *ti;

    ti = kmalloc(sizeof(*ti), GFP_KERNEL);
    if (ti == NULL)
    return NULL;
    ti->pid = pid;
    ti->timestamp = jiffies;
    atomic_set(&ti->count, 0);

    return ti;
    }

    static struct task_info *task_info_find_pid(int pid)
    {
    struct list_head *p;
    struct task_info *ti;

    /* TODO 1: Look for pid and return task_info or NULL if not found */
    list_for_each_entry(ti,&head,list)
    {
    if(ti->pid == pid)
    return ti;
    }

    return NULL;
    }

    static void task_info_add_to_list(int pid)
    {
    struct task_info *ti;

    ti = task_info_find_pid(pid);
    if (ti != NULL) {
    ti->timestamp = jiffies;
    atomic_inc(&ti->count);
    return;
    }

    ti = task_info_alloc(pid);
    list_add(&ti->list, &head);
    }

    static void task_info_add_for_current(void)
    {
    task_info_add_to_list(current->pid);
    task_info_add_to_list(current->parent->pid);
    task_info_add_to_list(next_task(current)->pid);
    task_info_add_to_list(next_task(next_task(current))->pid);
    }

    static void task_info_print_list(const char *msg)
    {
    struct list_head *p;
    struct task_info *ti;

    pr_info("%s: [ ", msg);
    list_for_each(p, &head) {
    ti = list_entry(p, struct task_info, list);
    pr_info("(%d, %lu) ", ti->pid, ti->timestamp);
    }
    pr_info("]\n");
    }

    static void task_info_remove_expired(void)
    {
    struct list_head *p, *q;
    struct task_info *ti;

    list_for_each_safe(p, q, &head) {
    ti = list_entry(p, struct task_info, list);
    if (jiffies - ti->timestamp > 3 * HZ && atomic_read(&ti->count) < 5) {
    list_del(p);
    kfree(ti);
    }
    }
    }

    static void task_info_purge_list(void)
    {
    struct list_head *p, *q;
    struct task_info *ti;

    list_for_each_safe(p, q, &head) {
    ti = list_entry(p, struct task_info, list);
    list_del(p);
    kfree(ti);
    }
    }

    static int list_full_init(void)
    {
    INIT_LIST_HEAD(&head);

    task_info_add_for_current();
    task_info_print_list("after first add");

    set_current_state(TASK_INTERRUPTIBLE);
    schedule_timeout(5 * HZ);

    return 0;
    }

    static void list_full_exit(void)
    {
    struct task_info *ti;

    /* TODO 2: Ensure that at least one task is not deleted */
    ti = list_first_entry(&head,struct task_info,list);
    if(ti)
    atomic_set(&ti->count,5);

    task_info_remove_expired();
    task_info_print_list("after removing expired");
    task_info_purge_list();
    }

    module_init(list_full_init);
    module_exit(list_full_exit);

字符设备驱动

概述

主设备号和次设备号

在 UNIX 中,设备通常有一个唯一的、固定的标识符与之关联。这种传统在 Linux 中得以保留,尽管标识符可以动态分配(出于兼容性的原因,大多数驱动程序仍然使用静态标识符)。这个标识符由两部分组成:主设备号(major)和次设备号(minor)。第一部分用于标识设备类型(如 IDE 硬盘、SCSI 硬盘、串口等),而第二部分用于标识设备本身(如第一个硬盘、第二个串口等)。大多数情况下,主设备号用于标识驱动程序,而次设备号用于标识驱动程序所服务的某个物理设备。通常情况下,一个驱动程序会有一个关联的主设备号,并负责处理与该主设备号关联的所有次设备号。

字符设备的数据结构

inode 代表文件系统视角中的文件。inode 的属性包括文件大小、权限和相关时间。单个 inode 在文件系统中唯一标识一个文件。
file 结构仍然代表单个文件,但更接近用户的视角。file 结构的属性中,有 inode、文件名、文件打开属性和文件位置等。在给定时间内,所有打开的文件都有一个关联的 file 结构。
为了更好的理解 inode 和 file 之间的区别,我们可以使用面向对象编程的类比:如果我们将 inode 视为一个类,那么文件就是对象,即 inode 类的实例。inode 表示文件的静态映像(inode 没有状态),而 file 表示文件的动态映像(file 具有状态)。

延迟工作

概述

延迟工作是一类内核功能,允许我们安排代码在稍后的时间执行。这些安排的代码可以在进程上下文或中断上下文中运行,具体取决于延迟工作的类型。延迟工作用于补充中断处理程序的功能,因为中断具有重要的要求和限制:

  • 中断处理程序的执行时间必须尽可能短
  • 在中断上下文中,我们不能使用阻塞调用

使用延迟工作,我们可以在中断处理程序中执行最小所需的工作,并安排一个异步操作在稍后的时间运行,以执行其余的操作。

在中断上下文中运行的延迟工作也称为下半部(bottom-half),因为其目的是执行中断处理程序(top-half)之外所剩余的操作。

练习

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
/*
* SO2 - Lab 6 - Deferred Work
*
* Exercises #3, #4, #5: deferred work
*
* Code skeleton.
*/

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/sched/task.h>
#include "../include/deferred.h"

#define MY_MAJOR 42
#define MY_MINOR 0
#define MODULE_NAME "deferred"

#define TIMER_TYPE_NONE -1
#define TIMER_TYPE_SET 0
#define TIMER_TYPE_ALLOC 1
#define TIMER_TYPE_MON 2

MODULE_DESCRIPTION("Deferred work character device");
MODULE_AUTHOR("SO2");
MODULE_LICENSE("GPL");

struct mon_proc {
struct task_struct *task;
struct list_head list;
};

static struct my_device_data {
struct cdev cdev;
/* TODO 1: add timer */
struct timer_list timer;
/* TODO 2: add flag */
int flag;
/* TODO 3: add work */
struct work_struct work;
/* TODO 4: add list for monitored processes */
struct mon_proc procs;
/* TODO 4: add spinlock to protect list */
spinlock_t lock;
} dev;

static void alloc_io(void)
{
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(5 * HZ);
pr_info("Yawn! I've been sleeping for 5 seconds.\n");
}

static struct mon_proc *get_proc(pid_t pid)
{
struct task_struct *task;
struct mon_proc *p;

rcu_read_lock();
task = pid_task(find_vpid(pid), PIDTYPE_PID);
rcu_read_unlock();
if (!task)
return ERR_PTR(-ESRCH);

p = kmalloc(sizeof(*p), GFP_ATOMIC);
if (!p)
return ERR_PTR(-ENOMEM);

get_task_struct(task);
p->task = task;

return p;
}


/* TODO 3: define work handler */
static void work_handler(struct work_struct* work)
{
struct my_device_data* my_data = container_of(work,struct my_device_data,work);

pr_info("[%s -- %d]: work_handler called\n",current->comm,current->pid);
alloc_io();
}

// #define ALLOC_IO_DIRECT
/* TODO 3: undef ALLOC_IO_DIRECT*/

static void timer_handler(struct timer_list *tl)
{
struct my_device_data* my_data = container_of(tl,struct my_device_data,timer);
/* TODO 1: implement timer handler */
/* TODO 2: check flags: TIMER_TYPE_SET or TIMER_TYPE_ALLOC */
if(my_data->flag == TIMER_TYPE_SET)
pr_info("[%s -- %d]: timer expired\n",current->comm,current->pid);
if(my_data->flag == TIMER_TYPE_ALLOC)
/* TODO 3: schedule work */
schedule_work(&my_data->work);

/* TODO 4: iterate the list and check the proccess state */
/* TODO 4: if task is dead print info ... */
/* TODO 4: ... decrement task usage counter ... */
/* TODO 4: ... remove it from the list ... */
/* TODO 4: ... free the struct mon_proc */
struct mon_proc* p, *q;
if(my_data->flag == TIMER_TYPE_MON)
{
spin_lock(&my_data->lock);
list_for_each_entry_safe(p,q,&my_data->procs.list,list)
{
if(p->task->state == TASK_DEAD)
{
pr_info("[%s -- %d] DEAD\n",p->task->comm,p->task->pid);
put_task_struct(p->task);
list_del(&p->list);
kfree(p);
}
}
spin_unlock(&my_data->lock);
mod_timer(tl,jiffies+1*HZ);
}

}

static int deferred_open(struct inode *inode, struct file *file)
{
struct my_device_data *my_data =
container_of(inode->i_cdev, struct my_device_data, cdev);
file->private_data = my_data;
pr_info("[deferred_open] Device opened\n");
return 0;
}

static int deferred_release(struct inode *inode, struct file *file)
{
pr_info("[deferred_release] Device released\n");
return 0;
}

static long deferred_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct my_device_data *my_data = (struct my_device_data*) file->private_data;

pr_info("[deferred_ioctl] Command: %s\n", ioctl_command_to_string(cmd));

switch (cmd) {
case MY_IOCTL_TIMER_SET:
/* TODO 2: set flag */
/* TODO 1: schedule timer */
my_data->flag = TIMER_TYPE_SET;
mod_timer(&my_data->timer,jiffies+arg*HZ);
break;
case MY_IOCTL_TIMER_CANCEL:
/* TODO 1: cancel timer */
del_timer(&my_data->timer);
my_data->flag = TIMER_TYPE_NONE;
break;
case MY_IOCTL_TIMER_ALLOC:
/* TODO 2: set flag and schedule timer */
my_data->flag = TIMER_TYPE_ALLOC;
mod_timer(&my_data->timer,jiffies+arg*HZ);
break;
case MY_IOCTL_TIMER_MON:
{
/* TODO 4: use get_proc() and add task to list */
/* TODO 4: protect access to list */
/* TODO 4: set flag and schedule timer */
struct mon_proc* proc = get_proc(arg);
if(proc == ERR_PTR(-ESRCH))
return -ESRCH;

spin_lock_bh(&my_data->lock);
list_add(&proc->list,&my_data->procs.list);
spin_unlock_bh(&my_data->lock);

my_data->flag = TIMER_TYPE_MON;
mod_timer(&my_data->timer,jiffies+1*HZ);
break;
}
default:
return -ENOTTY;
}
return 0;
}

struct file_operations my_fops = {
.owner = THIS_MODULE,
.open = deferred_open,
.release = deferred_release,
.unlocked_ioctl = deferred_ioctl,
};

static int deferred_init(void)
{
int err;

pr_info("[deferred_init] Init module\n");
err = register_chrdev_region(MKDEV(MY_MAJOR, MY_MINOR), 1, MODULE_NAME);
if (err) {
pr_info("[deffered_init] register_chrdev_region: %d\n", err);
return err;
}

/* TODO 2: Initialize flag. */
/* TODO 3: Initialize work. */
dev.flag = TIMER_TYPE_NONE;
INIT_WORK(&dev.work,work_handler);

/* TODO 4: Initialize lock and list. */
spin_lock_init(&dev.lock);
INIT_LIST_HEAD(&dev.procs.list);

cdev_init(&dev.cdev, &my_fops);
cdev_add(&dev.cdev, MKDEV(MY_MAJOR, MY_MINOR), 1);

/* TODO 1: Initialize timer. */
timer_setup(&dev.timer,timer_handler,0);

return 0;
}

static void deferred_exit(void)
{
struct mon_proc *p, *n;

pr_info("[deferred_exit] Exit module\n" );

cdev_del(&dev.cdev);
unregister_chrdev_region(MKDEV(MY_MAJOR, MY_MINOR), 1);

/* TODO 1: Cleanup: make sure the timer is not running after exiting. */
del_timer_sync(&dev.timer);
/* TODO 3: Cleanup: make sure the work handler is not scheduled. */
cancel_work_sync(&dev.work);


/* TODO 4: Cleanup the monitered process list */
/* TODO 4: ... decrement task usage counter ... */
/* TODO 4: ... remove it from the list ... */
/* TODO 4: ... free the struct mon_proc */
list_for_each_entry_safe(p,n,&dev.procs.list,list)
{
put_task_struct(p->task);
list_del(&p->list);
kfree(p);
}
}

module_init(deferred_init);
module_exit(deferred_exit);

块设备驱动

概述

数据结构有点复杂, 建议看原资料.
说一下request, bio, bio_vec.

一个bio是上层给块层的一次IO请求, 这些IO请求对应的内存不一定物理连续, 所以又细分为物理连续的内存向量bio_vec(又称片段segment).

I/O调度算法可将连续的bio合并成一个请求,请求是bio经由I/O调度进行调整后的结果,因此一个request可以包含多个bio。当bio被提交给I/O调度器时,I/O调度器可能会将这个bio插入现存的请求中,也可能生成新的请求。

对应三个遍历api.
rq_for_each_bio()遍历一个request的所有bio。

1
2
3
#define __rq_for_each_bio(_bio,rq)  \
if((rq->bio)) \
for(_bio = (rq)->bio; _bio ; _bio = _bio->bi_next)

bio_for_each_segment()遍历一个bio的所有bio_vec。

1
2
3
4
5
6
7
8

#define __bio_for_each_segment(bvl, bio, iter, start) \
for (iter = (start); \
(iter).bi_size && \
((bvl = bio_iter_iovec((bio), (iter))), 1); \
bio_advance_iter((bio), &(iter), (bvl).bv_len))
#define bio_for_each_segment(bvl, bio, iter) \
__bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter)

rq_for_each_segment() 迭代遍历一个request所有bio中的所有segment

1
2
3
4

#define rq_for_each_segment(bvl, _rq, _iter) \
__rq_for_each_bio(_iter.bio, _rq) \
bio_for_each_segment(bvl, _iter.bio, _iter.iter)

练习

ram-disk

主要的关系在概述部分说了, 再提一个page的访问问题.

在访问bvec.bv_page时, 需要先建立映射.

1
2
3
4
5
6
char* buffer = kmap_atomic(bvec.bv_page);
if(buffer)
{
// 读写buffer
}
kunmap_atomic(buffer);

在不同的架构中, page与virt的对应关系不同. 比如在ARM64架构中, 不存在高端内存的说法, 线性映射区完整映射了所有的物理内存, 所以每个page对应的虚拟内存都可以直接访问.而ARM架构中, 高端内存需要动态映射再进行访问.
而kmap中有对非高端内存的特化, 所以即使在ARM64架构中也不会建立多余的映射.

以及blk_mq_ops->queue_rq是在原子上下文中调用的, 不能阻塞.

1
2
3
4
5
6
7
8
9
static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
{
preempt_disable();
pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
return kmap_atomic_high_prot(page, prot);
}
#define kmap_atomic(page) kmap_atomic_prot(page, kmap_prot)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
/*
* SO2 - Block device drivers lab (#7)
* Linux - Exercise #1, #2, #3, #6 (RAM Disk)
*/

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>

#include <linux/genhd.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/blk_types.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/bio.h>
#include <linux/vmalloc.h>

MODULE_DESCRIPTION("Simple RAM Disk");
MODULE_AUTHOR("SO2");
MODULE_LICENSE("GPL");


#define KERN_LOG_LEVEL KERN_ALERT

#define MY_BLOCK_MAJOR 240
#define MY_BLKDEV_NAME "mybdev"
#define MY_BLOCK_MINORS 1
#define NR_SECTORS 128

#define KERNEL_SECTOR_SIZE 512

/* TODO 6: use bios for read/write requests */
#define USE_BIO_TRANSFER 1


static struct my_block_dev {
struct blk_mq_tag_set tag_set;
struct request_queue *queue;
struct gendisk *gd;
u8 *data;
size_t size;
} g_dev;

static int my_block_open(struct block_device *bdev, fmode_t mode)
{
return 0;
}

static void my_block_release(struct gendisk *gd, fmode_t mode)
{
}

static const struct block_device_operations my_block_ops = {
.owner = THIS_MODULE,
.open = my_block_open,
.release = my_block_release
};

static void my_block_transfer(struct my_block_dev *dev, sector_t sector,
unsigned long len, char *buffer, int dir)
{
unsigned long offset = sector * KERNEL_SECTOR_SIZE;

/* check for read/write beyond end of block device */
if ((offset + len) > dev->size)
return;

/* TODO 3: read/write to dev buffer depending on dir */
if(dir)
memcpy(dev->data+offset,buffer,len);
else
memcpy(buffer,dev->data+offset,len);
}

/* to transfer data using bio structures enable USE_BIO_TRANFER */
#if USE_BIO_TRANSFER == 1
static void my_xfer_request(struct my_block_dev *dev, struct request *req)
{
struct bio_vec bvec;
struct req_iterator iter;
sector_t pos_sector = blk_rq_pos(req);
/* TODO 6: iterate segments */
rq_for_each_segment(bvec,req,iter)
{
/* TODO 6: copy bio data to device buffer */
char* buffer = kmap_atomic(bvec.bv_page);
if(buffer)
{
unsigned long offset = bvec.bv_offset;
my_block_transfer(dev,iter.iter.bi_sector,bvec.bv_len,buffer+offset,rq_data_dir(req) == WRITE);
kunmap_atomic(buffer);
}

}
}
#endif

static blk_status_t my_block_request(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *rq;
struct my_block_dev *dev = hctx->queue->queuedata;

/* TODO 2: get pointer to request */
rq = bd->rq;
/* TODO 2: start request processing. */
blk_mq_start_request(rq);
/* TODO 2: check fs request. Return if passthrough. */
if(blk_rq_is_passthrough(rq))
{
pr_info("Skip non-fs request\n");
blk_mq_end_request(rq,BLK_STS_IOERR);
return BLK_STS_IOERR;
}
/* TODO 2: print request information */
pr_info("request received\n");
pr_info("start_sector: %llu, total_size: %d, data_size:%d, direction: %d\n", blk_rq_pos(rq),blk_rq_bytes(rq),blk_rq_cur_bytes(rq),rq_data_dir(rq));
#if USE_BIO_TRANSFER == 1
/* TODO 6: process the request by calling my_xfer_request */
my_xfer_request(dev,rq);
#else
/* TODO 3: process the request by calling my_block_transfer */
my_block_transfer(dev,blk_rq_pos(rq),blk_rq_cur_bytes(rq),bio_data(rq->bio),rq_data_dir(rq));



#endif

out:
/* TODO 2: end request successfully */
blk_mq_end_request(rq,BLK_STS_OK);
return BLK_STS_OK;
}

static struct blk_mq_ops my_queue_ops = {
.queue_rq = my_block_request,
};

static int create_block_device(struct my_block_dev *dev)
{
int err;

dev->size = NR_SECTORS * KERNEL_SECTOR_SIZE;
dev->data = vmalloc(dev->size);
if (dev->data == NULL) {
printk(KERN_ERR "vmalloc: out of memory\n");
err = -ENOMEM;
goto out_vmalloc;
}

/* Initialize tag set. */
dev->tag_set.ops = &my_queue_ops;
dev->tag_set.nr_hw_queues = 1;
dev->tag_set.queue_depth = 128;
dev->tag_set.numa_node = NUMA_NO_NODE;
dev->tag_set.cmd_size = 0;
dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
err = blk_mq_alloc_tag_set(&dev->tag_set);
if (err) {
printk(KERN_ERR "blk_mq_alloc_tag_set: can't allocate tag set\n");
goto out_alloc_tag_set;
}

/* Allocate queue. */
dev->queue = blk_mq_init_queue(&dev->tag_set);
if (IS_ERR(dev->queue)) {
printk(KERN_ERR "blk_mq_init_queue: out of memory\n");
err = -ENOMEM;
goto out_blk_init;
}
blk_queue_logical_block_size(dev->queue, KERNEL_SECTOR_SIZE);
dev->queue->queuedata = dev;

/* initialize the gendisk structure */
dev->gd = alloc_disk(MY_BLOCK_MINORS);
if (!dev->gd) {
printk(KERN_ERR "alloc_disk: failure\n");
err = -ENOMEM;
goto out_alloc_disk;
}

dev->gd->major = MY_BLOCK_MAJOR;
dev->gd->first_minor = 0;
dev->gd->fops = &my_block_ops;
dev->gd->queue = dev->queue;
dev->gd->private_data = dev;
snprintf(dev->gd->disk_name, DISK_NAME_LEN, "myblock");
set_capacity(dev->gd, NR_SECTORS);

add_disk(dev->gd);

return 0;

out_alloc_disk:
blk_cleanup_queue(dev->queue);
out_blk_init:
blk_mq_free_tag_set(&dev->tag_set);
out_alloc_tag_set:
vfree(dev->data);
out_vmalloc:
return err;
}

static int __init my_block_init(void)
{
int err = 0;

/* TODO 1: register block device */
err = register_blkdev(MY_BLOCK_MAJOR,MY_BLKDEV_NAME);
if(err < 0)
{
pr_err("register_blkdev failed\n");
return err;
}

/* TODO 2: create block device using create_block_device */
err = create_block_device(&g_dev);
if(err < 0)
{
pr_err("register_blkdev failed\n");
goto out;
}

return 0;

out:
/* TODO 2: unregister block device in case of an error */
unregister_blkdev(MY_BLOCK_MAJOR,MY_BLKDEV_NAME);
return err;
}

static void delete_block_device(struct my_block_dev *dev)
{
if (dev->gd) {
del_gendisk(dev->gd);
put_disk(dev->gd);
}

if (dev->queue)
blk_cleanup_queue(dev->queue);
if (dev->tag_set.tags)
blk_mq_free_tag_set(&dev->tag_set);
if (dev->data)
vfree(dev->data);
}

static void __exit my_block_exit(void)
{
/* TODO 2: cleanup block device using delete_block_device */
delete_block_device(&g_dev);
/* TODO 1: unregister block device */
unregister_blkdev(MY_BLOCK_MAJOR,MY_BLKDEV_NAME);
}

module_init(my_block_init);
module_exit(my_block_exit);

relay-disk

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/*
* SO2 Lab - Block device drivers (#7)
* Linux - Exercise #4, #5 (Relay disk - bio)
*/

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>

MODULE_AUTHOR("SO2");
MODULE_DESCRIPTION("Relay disk");
MODULE_LICENSE("GPL");

#define KERN_LOG_LEVEL KERN_ALERT

#define PHYSICAL_DISK_NAME "/dev/vdb"
#define KERNEL_SECTOR_SIZE 512

#define BIO_WRITE_MESSAGE "def"


/* pointer to physical device structure */
static struct block_device *phys_bdev;

static void send_test_bio(struct block_device *bdev, int dir)
{
struct bio *bio = bio_alloc(GFP_NOIO, 1);
struct page *page;
char *buf;

/* TODO 4: fill bio (bdev, sector, direction) */
bio_set_dev(bio,bdev);
bio->bi_opf = dir | REQ_PREFLUSH;
bio->bi_iter.bi_sector = 0;

page = alloc_page(GFP_NOIO);
bio_add_page(bio, page, KERNEL_SECTOR_SIZE, 0);

/* TODO 5: write message to bio buffer if direction is write */
/* TODO 4: submit bio and wait for completion */
/* TODO 4: read data (first 3 bytes) from bio buffer and print it */
if(dir==REQ_OP_WRITE)
{
buf = kmap_atomic(page);
if(buf)
{
snprintf(buf,PAGE_SIZE,BIO_WRITE_MESSAGE);
kunmap_atomic(buf);
}
submit_bio_wait(bio);
}
else
{
submit_bio_wait(bio);
buf = kmap_atomic(page);
if(buf)
{
pr_info("% 02x, % 02x, %02x\n",buf[0],buf[1],buf[2]);
kunmap_atomic(buf);
}
}


bio_put(bio);
__free_page(page);
}

static struct block_device *open_disk(char *name)
{
struct block_device *bdev;

/* TODO 4: get block device in exclusive mode */
bdev = blkdev_get_by_path(name,FMODE_WRITE|FMODE_READ|FMODE_EXCL,THIS_MODULE);

return bdev;
}

static int __init relay_init(void)
{
phys_bdev = open_disk(PHYSICAL_DISK_NAME);
if (phys_bdev == NULL) {
printk(KERN_ERR "[relay_init] No such device\n");
return -EINVAL;
}

send_test_bio(phys_bdev, REQ_OP_READ);

return 0;
}

static void close_disk(struct block_device *bdev)
{
/* TODO 4: put block device */
blkdev_put(bdev,FMODE_WRITE|FMODE_READ|FMODE_EXCL);
}

static void __exit relay_exit(void)
{
/* TODO 5: send test write bio */
send_test_bio(phys_bdev,REQ_OP_WRITE);
close_disk(phys_bdev);
}

module_init(relay_init);
module_exit(relay_exit);

文件系统驱动

练习

无设备的文件系统

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
/*
* SO2 Lab - Filesystem drivers
* Exercise #1 (no-dev filesystem)
*/

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/pagemap.h>

MODULE_DESCRIPTION("Simple no-dev filesystem");
MODULE_AUTHOR("SO2");
MODULE_LICENSE("GPL");

#define MYFS_BLOCKSIZE 4096
#define MYFS_BLOCKSIZE_BITS 12
#define MYFS_MAGIC 0xbeefcafe
#define LOG_LEVEL KERN_ALERT

/* declarations of functions that are part of operation structures */

static int myfs_mknod(struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t dev);
static int myfs_create(struct inode *dir, struct dentry *dentry,
umode_t mode, bool excl);
static int myfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);

/* TODO 2: define super_operations structure */
static const struct super_operations myfs_super_operations = {
.drop_inode = generic_delete_inode,
.statfs = simple_statfs
};

static const struct inode_operations myfs_dir_inode_operations = {
/* TODO 5: Fill dir inode operations structure. */
.mknod = myfs_mknod,
.create = myfs_create,
.mkdir = myfs_mkdir,
.lookup = simple_lookup,
.rename = simple_rename,
.link = simple_link,
.rmdir = simple_rmdir,
.unlink = simple_unlink,
};


static const struct file_operations myfs_file_operations = {
/* TODO 6: Fill file operations structure. */
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.fsync = noop_fsync,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.llseek = generic_file_llseek,
};

static const struct inode_operations myfs_file_inode_operations = {
/* TODO 6: Fill file inode operations structure. */
.setattr = simple_setattr,
.getattr = simple_getattr,
};

static const struct address_space_operations myfs_aops = {
/* TODO 6: Fill address space operations structure. */
.readpage = simple_readpage,
.write_begin = simple_write_begin,
.write_end = simple_write_end,
};

struct inode *myfs_get_inode(struct super_block *sb, const struct inode *dir,
int mode)
{
struct inode *inode = new_inode(sb);

if (!inode)
return NULL;

/* TODO 3: fill inode structure
* - mode
* - uid
* - gid
* - atime,ctime,mtime
* - ino
*/
inode->i_mode = mode;
inode_init_owner(inode,dir,mode);
inode->i_atime = current_time(inode);
inode->i_ctime = current_time(inode);
inode->i_mtime = current_time(inode);

/* TODO 5: Init i_ino using get_next_ino */
inode->i_ino = get_next_ino();

/* TODO 6: Initialize address space operations. */
inode->i_mapping->a_ops = &myfs_aops;

if (S_ISDIR(mode)) {
/* TODO 3: set inode operations for dir inodes. */
// inode->i_op = &simple_dir_inode_operations;
// inode->i_fop = &simple_dir_operations;
/* TODO 5: use myfs_dir_inode_operations for inode
* operations (i_op).
*/
inode->i_op = &myfs_dir_inode_operations;
inode->i_fop = &simple_dir_operations;

/* TODO 3: directory inodes start off with i_nlink == 2 (for "." entry).
* Directory link count should be incremented (use inc_nlink).
*/
inc_nlink(inode);
}

/* TODO 6: Set file inode and file operations for regular files
* (use the S_ISREG macro).
*/
if(S_ISREG(mode))
{
inode->i_op = &myfs_file_inode_operations;
inode->i_fop = &myfs_file_operations;
}

return inode;
}

/* TODO 5: Implement myfs_mknod, myfs_create, myfs_mkdir. */
int myfs_mknod(struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t dev)
{
pr_info("myfs_mknod called\n");
struct inode* inode = myfs_get_inode(dir->i_sb,dir,mode);
if(!inode)
return -ENOSPC;

d_instantiate(dentry,inode);
dget(dentry);
dir->i_mtime = dir->i_ctime = current_time(dir);

return 0;
}


int myfs_create(struct inode *dir, struct dentry *dentry,
umode_t mode, bool excl)
{
int ret = myfs_mknod(dir, dentry, mode | S_IFREG, 0);
// pr_info("myfs_create called: %d\n",ret);
return ret;

}

int myfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
// pr_info("myfs_mkdir called\n");
int error = myfs_mknod(dir,dentry,S_IFDIR,0);
if(!error)
{
inc_nlink(dir);
}
return error;
}


static int myfs_fill_super(struct super_block *sb, void *data, int silent)
{
struct inode *root_inode;
struct dentry *root_dentry;

/* TODO 2: fill super_block
* - blocksize, blocksize_bits
* - magic
* - super operations
* - maxbytes
*/
sb->s_blocksize = MYFS_BLOCKSIZE;
sb->s_blocksize_bits = MYFS_BLOCKSIZE_BITS;
sb->s_magic = MYFS_MAGIC;
sb->s_op = &myfs_super_operations;
sb->s_maxbytes = MAX_LFS_FILESIZE;

/* mode = directory & access rights (755) */
root_inode = myfs_get_inode(sb, NULL,
S_IFDIR | S_IRWXU | S_IRGRP |
S_IXGRP | S_IROTH | S_IXOTH);

printk(LOG_LEVEL "root inode has %d link(s)\n", root_inode->i_nlink);

if (!root_inode)
return -ENOMEM;

root_dentry = d_make_root(root_inode);
if (!root_dentry)
goto out_no_root;
sb->s_root = root_dentry;

return 0;

out_no_root:
iput(root_inode);
return -ENOMEM;
}

static struct dentry *myfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
/* TODO 1: call superblock mount function */
//分配并调用fill_super初始化超级块, 为超级块的根目录分配dentry.
return mount_nodev(fs_type,flags,data,myfs_fill_super);

}

/* TODO 1: define file_system_type structure */
struct file_system_type my_fs_type = {
.name = "myfs",
.mount = myfs_mount,
.kill_sb = kill_litter_super,
.owner = THIS_MODULE,
};

static int __init myfs_init(void)
{
int err;

/* TODO 1: register */
err = register_filesystem(&my_fs_type);
if (err) {
printk(LOG_LEVEL "register_filesystem failed\n");
return err;
}

return 0;
}

static void __exit myfs_exit(void)
{
/* TODO 1: unregister */
unregister_filesystem(&my_fs_type);
}

module_init(myfs_init);
module_exit(myfs_exit);

基于块设备的文件系统

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
/*
* SO2 Lab - Filesystem drivers
* Exercise #2 (dev filesystem)
*/

#include <linux/buffer_head.h>
#include <linux/cred.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pagemap.h>
#include <linux/sched.h>
#include <linux/slab.h>

#include "minfs.h"

MODULE_DESCRIPTION("Simple filesystem");
MODULE_AUTHOR("SO2");
MODULE_LICENSE("GPL");

#define LOG_LEVEL KERN_ALERT


struct minfs_sb_info {
__u8 version;
unsigned long imap;
struct buffer_head *sbh;
};

struct minfs_inode_info {
__u16 data_block;
struct inode vfs_inode;
};

/* declarations of functions that are part of operation structures */

static int minfs_readdir(struct file *filp, struct dir_context *ctx);
static struct dentry *minfs_lookup(struct inode *dir,
struct dentry *dentry, unsigned int flags);
static int minfs_create(struct inode *dir, struct dentry *dentry,
umode_t mode, bool excl);

/* dir and inode operation structures */

static const struct file_operations minfs_dir_operations = {
.read = generic_read_dir,
.iterate = minfs_readdir,
};

static const struct inode_operations minfs_dir_inode_operations = {
.lookup = minfs_lookup,
/* TODO 7: Use minfs_create as the create function. */
.create = minfs_create
};

static const struct address_space_operations minfs_aops = {
.readpage = simple_readpage,
.write_begin = simple_write_begin,
.write_end = simple_write_end,
};

static const struct file_operations minfs_file_operations = {
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.llseek = generic_file_llseek,
};

static const struct inode_operations minfs_file_inode_operations = {
.getattr = simple_getattr,
};

static struct inode *minfs_iget(struct super_block *s, unsigned long ino)
{
struct minfs_inode *mi;
struct buffer_head *bh;
struct inode *inode;
struct minfs_inode_info *mii;

/* Allocate VFS inode. */
inode = iget_locked(s, ino);
if (inode == NULL) {
printk(LOG_LEVEL "error aquiring inode\n");
return ERR_PTR(-ENOMEM);
}

/* Return inode from cache */
if (!(inode->i_state & I_NEW))
return inode;

/* TODO 4: Read block with inodes. It's the second block on
* the device, i.e. the block with the index 1. This is the index
* to be passed to sb_bread().
*/
bh = sb_bread(s,1);

/* TODO 4: Get inode with index ino from the block. */
mi = (struct minfs_inode*)bh->b_data + ino;
/* TODO 4: fill VFS inode */
inode->i_mode = mi->mode;
inode->i_size = mi->size;
inode->i_atime = current_time(inode);
inode->i_ctime = current_time(inode);
inode->i_mtime = current_time(inode);
i_uid_write(inode,mi->uid);
i_gid_write(inode,mi->gid);

/* TODO 7: Fill address space operations (inode->i_mapping->a_ops) */
inode->i_mapping->a_ops = &minfs_aops;

if (S_ISDIR(inode->i_mode)) {
/* TODO 4: Fill dir inode operations. */
// inode->i_op = &simple_dir_inode_operations;
// inode->i_fop = &simple_dir_operations;
/* TODO 5: Use minfs_dir_inode_operations for i_op
* and minfs_dir_operations for i_fop. */
inode->i_op = &minfs_dir_inode_operations;
inode->i_fop = &minfs_dir_operations;

/* TODO 4: Directory inodes start off with i_nlink == 2.
* (use inc_link) */
inc_nlink(inode);
}

/* TODO 7: Fill inode and file operations for regular files
* (i_op and i_fop). Use the S_ISREG macro.
*/
if(S_ISREG(inode->i_mode))
{
inode->i_op = &minfs_file_inode_operations;
inode->i_fop = &minfs_file_operations;
}

/* fill data for mii */
mii = container_of(inode, struct minfs_inode_info, vfs_inode);

/* TODO 4: uncomment after the minfs_inode is initialized */
mii->data_block = mi->data_block;

/* Free resources. */
/* TODO 4: uncomment after the buffer_head is initialized */
brelse(bh);
unlock_new_inode(inode);

return inode;

out_bad_sb:
iget_failed(inode);
return NULL;
}

static int minfs_readdir(struct file *filp, struct dir_context *ctx)
{
struct buffer_head *bh;
struct minfs_dir_entry *de;
struct minfs_inode_info *mii;
struct inode *inode;
struct super_block *sb;
int over;
int err = 0;

/* TODO 5: Get inode of directory and container inode. */
inode = filp->f_inode;
mii = container_of(inode,struct minfs_inode_info,vfs_inode);
/* TODO 5: Get superblock from inode (i_sb). */
sb = inode->i_sb;
/* TODO 5: Read data block for directory inode. */
bh = sb_bread(sb,mii->data_block);
for (; ctx->pos < MINFS_NUM_ENTRIES; ctx->pos++) {
/* TODO 5: Data block contains an array of
* "struct minfs_dir_entry". Use `de' for storing.
*/
de = (struct minfs_dir_entry*)bh->b_data + ctx->pos;
/* TODO 5: Step over empty entries (de->ino == 0). */
if(de->ino == 0)
continue;
/*
* Use `over` to store return value of dir_emit and exit
* if required.
*/
over = dir_emit(ctx, de->name, MINFS_NAME_LEN, de->ino,
DT_UNKNOWN);
if (over) {
printk(KERN_INFO "Read %s from folder %s, ctx->pos: %lld\n",
de->name,
filp->f_path.dentry->d_name.name,
ctx->pos);
ctx->pos++;
goto done;
}
}

done:
brelse(bh);
out_bad_sb:
return err;
}

/*
* Find dentry in parent folder. Return parent folder's data buffer_head.
*/

static struct minfs_dir_entry *minfs_find_entry(struct dentry *dentry,
struct buffer_head **bhp)
{
struct buffer_head *bh;
struct inode *dir = dentry->d_parent->d_inode;
struct minfs_inode_info *mii = container_of(dir,
struct minfs_inode_info, vfs_inode);
struct super_block *sb = dir->i_sb;
const char *name = dentry->d_name.name;
struct minfs_dir_entry *final_de = NULL;
struct minfs_dir_entry *de;
int i;

/* TODO 6: Read parent folder data block (contains dentries).
* Fill bhp with return value.
*/
bh = sb_bread(sb,mii->data_block);
for (i = 0; i < MINFS_NUM_ENTRIES; i++) {
/* TODO 6: Traverse all entries, find entry by name
* Use `de' to traverse. Use `final_de' to store dentry
* found, if existing.
*/
de = (struct minfs_dir_entry*)bh->b_data + i;
if(de->ino == 0)
continue;

if(!strcmp(name,de->name))
{
final_de = de;
break;
}
}

/* bh needs to be released by caller. */
return final_de;
}

static struct dentry *minfs_lookup(struct inode *dir,
struct dentry *dentry, unsigned int flags)
{
/* TODO 6: Comment line. */
// return simple_lookup(dir, dentry, flags);

struct super_block *sb = dir->i_sb;
struct minfs_dir_entry *de;
struct buffer_head *bh = NULL;
struct inode *inode = NULL;

dentry->d_op = sb->s_root->d_op;

de = minfs_find_entry(dentry, &bh);
if (de != NULL) {
printk(KERN_DEBUG "getting entry: name: %s, ino: %d\n",
de->name, de->ino);
inode = minfs_iget(sb, de->ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
}

d_add(dentry, inode);
brelse(bh);

printk(KERN_DEBUG "looked up dentry %s\n", dentry->d_name.name);

return NULL;
}

static struct inode *minfs_alloc_inode(struct super_block *s)
{
struct minfs_inode_info *mii;

/* TODO 3: Allocate minfs_inode_info. */
/* TODO 3: init VFS inode in minfs_inode_info */
mii = kzalloc(sizeof(*mii),GFP_KERNEL);
if(!mii)
return NULL;

inode_init_once(&mii->vfs_inode);

return &mii->vfs_inode;
}

static void minfs_destroy_inode(struct inode *inode)
{
/* TODO 3: free minfs_inode_info */
struct minfs_inode_info* mii = container_of(inode,struct minfs_inode_info,vfs_inode);
kfree(mii);
}

/*
* Create a new VFS inode. Do basic initialization and fill imap.
*/

static struct inode *minfs_new_inode(struct inode *dir)
{
struct super_block *sb = dir->i_sb;
struct minfs_sb_info *sbi = sb->s_fs_info;
struct inode *inode;
int idx;

/* TODO 7: Find first available inode. */
idx = find_first_zero_bit(&sbi->imap,sizeof(sbi->imap));
/* TODO 7: Mark the inode as used in the bitmap and mark
* the superblock buffer head as dirty.
*/
set_bit(1,&sbi->imap); // ? 并发?
mark_buffer_dirty(sbi->sbh);
/* TODO 7: Call new_inode(), fill inode fields
* and insert inode into inode hash table.
*/
inode = new_inode(sb);
/* Actual writing to the disk will be done in minfs_write_inode,
* which will be called at a later time.
*/
if (!inode)
return NULL;

insert_inode_hash(inode);

inode->i_mode = 0;
inode_init_owner(inode,dir,0);
inode->i_atime = current_time(inode);
inode->i_ctime = current_time(inode);
inode->i_mtime = current_time(inode);

inode->i_ino = idx;
inode->i_mapping->a_ops = &minfs_aops;

return inode;
}

/*
* Add dentry link on parent inode disk structure.
*/

static int minfs_add_link(struct dentry *dentry, struct inode *inode)
{
struct buffer_head *bh;
struct inode *dir;
struct super_block *sb;
struct minfs_inode_info *mii;
struct minfs_dir_entry *de;
int i;
int err = 0;

/* TODO 7: Get: directory inode (in inode); containing inode (in mii); superblock (in sb). */
dir = dentry->d_parent->d_inode;
mii = container_of(dir,struct minfs_inode_info,vfs_inode);
sb = dir->i_sb;

/* TODO 7: Read dir data block (use sb_bread). */
bh = sb_bread(sb,mii->data_block);
/* TODO 7: Find first free dentry (de->ino == 0). */
for(i = 0; i < MINFS_NUM_ENTRIES;++i)
{
de = (struct minfs_dir_entry*)bh->b_data + i;
if(de->ino == 0)
break;
}
if(i == MINFS_NUM_ENTRIES)
return -ENOSPC;
/* TODO 7: Place new entry in the available slot. Mark buffer_head
* as dirty. */
de->ino = inode->i_ino;
strncpy(de->name,dentry->d_name.name,MINFS_NAME_LEN);
pr_info("debug:%s",de->name);
mark_buffer_dirty(bh);

out:
brelse(bh);

return err;
}

/*
* Create a VFS file inode. Use minfs_file_... operations.
*/

static int minfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl)
{
struct inode *inode;
struct minfs_inode_info *mii;
int err;

inode = minfs_new_inode(dir);
if (inode == NULL) {
printk(LOG_LEVEL "error allocating new inode\n");
err = -ENOMEM;
goto err_new_inode;
}

inode->i_mode = mode;
inode->i_op = &minfs_file_inode_operations;
inode->i_fop = &minfs_file_operations;
mii = container_of(inode, struct minfs_inode_info, vfs_inode);
mii->data_block = MINFS_FIRST_DATA_BLOCK + inode->i_ino;

err = minfs_add_link(dentry, inode);
if (err != 0)
goto err_add_link;

d_instantiate(dentry, inode);
mark_inode_dirty(inode);

printk(KERN_DEBUG "new file inode created (ino = %lu)\n",
inode->i_ino);

return 0;

err_add_link:
inode_dec_link_count(inode);
iput(inode);
err_new_inode:
return err;
}

/*
* Write VFS inode contents to disk inode.
*/

static int minfs_write_inode(struct inode *inode,
struct writeback_control *wbc)
{
struct super_block *sb = inode->i_sb;
struct minfs_inode *mi;
struct minfs_inode_info *mii = container_of(inode,
struct minfs_inode_info, vfs_inode);
struct buffer_head *bh;
int err = 0;

bh = sb_bread(sb, MINFS_INODE_BLOCK);
if (bh == NULL) {
printk(LOG_LEVEL "could not read block\n");
err = -ENOMEM;
goto out;
}

mi = (struct minfs_inode *) bh->b_data + inode->i_ino;

/* fill disk inode */
mi->mode = inode->i_mode;
mi->uid = i_uid_read(inode);
mi->gid = i_gid_read(inode);
mi->size = inode->i_size;
mi->data_block = mii->data_block;

printk(KERN_DEBUG "mode is %05o; data_block is %d\n", mi->mode,
mii->data_block);

mark_buffer_dirty(bh);
brelse(bh);

printk(KERN_DEBUG "wrote inode %lu\n", inode->i_ino);

out:
return err;
}

static void minfs_put_super(struct super_block *sb)
{
struct minfs_sb_info *sbi = sb->s_fs_info;

/* Free superblock buffer head. */
mark_buffer_dirty(sbi->sbh);
brelse(sbi->sbh);

printk(KERN_DEBUG "released superblock resources\n");
}

static const struct super_operations minfs_ops = {
.statfs = simple_statfs,
.put_super = minfs_put_super,
/* TODO 4: add alloc and destroy inode functions */
.alloc_inode = minfs_alloc_inode,
.destroy_inode = minfs_destroy_inode,
/* TODO 7: = set write_inode function. */
.write_inode = minfs_write_inode
};

struct inode *myfs_get_inode(struct super_block *sb, const struct inode *dir,
int mode)
{
struct inode *inode = new_inode(sb);

if (!inode)
return NULL;

/* TODO 3: fill inode structure
* - mode
* - uid
* - gid
* - atime,ctime,mtime
* - ino
*/
inode->i_mode = mode;
inode_init_owner(inode,dir,mode);
inode->i_atime = current_time(inode);
inode->i_ctime = current_time(inode);
inode->i_mtime = current_time(inode);

/* TODO 5: Init i_ino using get_next_ino */
inode->i_ino = get_next_ino();

/* TODO 6: Initialize address space operations. */
inode->i_mapping->a_ops = &minfs_aops;

if (S_ISDIR(mode)) {
/* TODO 3: set inode operations for dir inodes. */
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
/* TODO 5: use myfs_dir_inode_operations for inode
* operations (i_op).
*/

/* TODO 3: directory inodes start off with i_nlink == 2 (for "." entry).
* Directory link count should be incremented (use inc_nlink).
*/
inc_nlink(inode);
}

/* TODO 6: Set file inode and file operations for regular files
* (use the S_ISREG macro).
*/

return inode;
}

static int minfs_fill_super(struct super_block *s, void *data, int silent)
{
struct minfs_sb_info *sbi;
struct minfs_super_block *ms;
struct inode *root_inode;
struct dentry *root_dentry;
struct buffer_head *bh;
int ret = -EINVAL;

sbi = kzalloc(sizeof(struct minfs_sb_info), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
s->s_fs_info = sbi;

/* Set block size for superblock. */
if (!sb_set_blocksize(s, MINFS_BLOCK_SIZE))
goto out_bad_blocksize;

/* TODO 2: Read block with superblock. It's the first block on
* the device, i.e. the block with the index 0. This is the index
* to be passed to sb_bread().
*/
bh = sb_bread(s,0);

/* TODO 2: interpret read data as minfs_super_block */
ms = (struct minfs_super_block*)bh->b_data;

/* TODO 2: check magic number with value defined in minfs.h. jump to out_bad_magic if not suitable */
if( ms->magic != MINFS_MAGIC)
goto out_bad_magic;

/* TODO 2: fill super_block with magic_number, super_operations */
s->s_magic = ms->magic;
s->s_op = &minfs_ops;

/* TODO 2: Fill sbi with rest of information from disk superblock
* (i.e. version).
*/
sbi->imap = ms->imap;
sbi->version = ms->version;
sbi->sbh = bh;

/* allocate root inode and root dentry */
/* TODO 2: use myfs_get_inode instead of minfs_iget */
root_inode = minfs_iget(s, MINFS_ROOT_INODE);
// root_inode = myfs_get_inode(s,NULL,
// S_IFDIR | S_IRWXU | S_IRGRP |
// S_IXGRP | S_IROTH | S_IXOTH);
if (!root_inode)
goto out_bad_inode;

root_dentry = d_make_root(root_inode);
if (!root_dentry)
goto out_iput;
s->s_root = root_dentry;

/* Store superblock buffer_head for further use. */
sbi->sbh = bh;

return 0;

out_iput:
iput(root_inode);
out_bad_inode:
printk(LOG_LEVEL "bad inode\n");
out_bad_magic:
printk(LOG_LEVEL "bad magic number\n");
brelse(bh);
out_bad_sb:
printk(LOG_LEVEL "error reading buffer_head\n");
out_bad_blocksize:
printk(LOG_LEVEL "bad block size\n");
s->s_fs_info = NULL;
kfree(sbi);
return ret;
}

static struct dentry *minfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
/* TODO 1: call superblock mount function */
return mount_bdev(fs_type,flags,dev_name,data,minfs_fill_super);
}

static struct file_system_type minfs_fs_type = {
.owner = THIS_MODULE,
.name = "minfs",
/* TODO 1: add mount, kill_sb and fs_flags */
.mount = minfs_mount,
.kill_sb = kill_block_super,
};

static int __init minfs_init(void)
{
int err;

err = register_filesystem(&minfs_fs_type);
if (err) {
printk(LOG_LEVEL "register_filesystem failed\n");
return err;
}

return 0;
}

static void __exit minfs_exit(void)
{
unregister_filesystem(&minfs_fs_type);
}

module_init(minfs_init);
module_exit(minfs_exit);

网络

TCP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
/*
* SO2 - Networking Lab (#10)
*
* Exercise #3, #4: simple kernel TCP socket
*
* Code skeleton.
*/

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/net.h>
#include <linux/in.h>
#include <linux/fs.h>
#include <net/sock.h>

MODULE_DESCRIPTION("Simple kernel TCP socket");
MODULE_AUTHOR("SO2");
MODULE_LICENSE("GPL");

#define LOG_LEVEL KERN_ALERT
#define MY_TCP_PORT 60000
#define LISTEN_BACKLOG 5

#define ON 1
#define OFF 0
#define DEBUG ON

#if DEBUG == ON
#define LOG(s) \
do { \
printk(KERN_DEBUG s "\n"); \
} while (0)
#else
#define LOG(s) \
do {} while (0)
#endif

#define print_sock_address(addr) \
do { \
printk(LOG_LEVEL "connection established to " \
"%pI4:%d\n", \
&addr.sin_addr.s_addr, \
ntohs(addr.sin_port)); \
} while (0)

static struct socket *sock; /* listening (server) socket */
static struct socket *new_sock; /* communication socket */

int __init my_tcp_sock_init(void)
{
int err;
/* address to bind on */
struct sockaddr_in addr = {
.sin_family = AF_INET,
.sin_port = htons(MY_TCP_PORT),
.sin_addr = { htonl(INADDR_LOOPBACK) }
};
int addrlen = sizeof(addr);
/* address of peer */
struct sockaddr_in raddr;

/* TODO 1: create listening socket */
err = sock_create_kern(&init_net,PF_INET,SOCK_STREAM,IPPROTO_TCP,&sock);
if(err < 0)
goto out;
/* TODO 1: bind socket to loopback on port MY_TCP_PORT */
err = kernel_bind(sock,(struct sockaddr*)&addr,addrlen);
if(err < 0)
goto out_release;
/* TODO 1: start listening */
err = kernel_listen(sock,LISTEN_BACKLOG);
if(err < 0)
goto out_release;
/* TODO 2: create new socket for the accepted connection */
/* TODO 2: accept a connection */
err = kernel_accept(sock,&new_sock,0);
if(err < 0)
goto out_release_new_sock;
if(new_sock)
/* TODO 2: get the address of the peer and print it */
err = new_sock->ops->getname(new_sock,(struct sockaddr*)&raddr,1);
if(err < 0)
goto out_release_new_sock;

print_sock_address(raddr);
return 0;

out_release_new_sock:
/* TODO 2: cleanup socket for accepted connection */
sock_release(new_sock);
out_release:
/* TODO 1: cleanup listening socket */
sock_release(sock);
out:
return err;
}

void __exit my_tcp_sock_exit(void)
{
/* TODO 2: cleanup socket for accepted connection */
sock_release(new_sock);
/* TODO 1: cleanup listening socket */
sock_release(sock);
}

module_init(my_tcp_sock_init);
module_exit(my_tcp_sock_exit);

udp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
/*
* SO2 - Networking Lab (#10)
*
* Bonus: simple kernel UDP socket
*
* Code skeleton.
*/

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/net.h>
#include <linux/in.h>
#include <net/sock.h>

MODULE_DESCRIPTION("Simple kernel UDP socket");
MODULE_AUTHOR("SO2");
MODULE_LICENSE("GPL");

#define LOG_LEVEL KERN_ALERT
#define MY_UDP_LOCAL_PORT 60000
#define MY_UDP_REMOTE_PORT 60001
#define MY_TEST_MESSAGE "kernelsocket\n"

#define ON 1
#define OFF 0
#define DEBUG ON

#if DEBUG == ON
#define LOG(s) \
do { \
printk(KERN_DEBUG s "\n"); \
} while (0)
#else
#define LOG(s) \
do {} while (0)
#endif

#define print_sock_address(addr) \
do { \
printk(LOG_LEVEL "connection established to " \
NIPQUAD_FMT ":%d\n", \
NIPQUAD(addr.sin_addr.s_addr), \
ntohs(addr.sin_port)); \
} while (0)

static struct socket *sock; /* UDP server */

/* send datagram */
static int my_udp_msgsend(struct socket *s)
{
/* address to send to */
struct sockaddr_in raddr = {
.sin_family = AF_INET,
.sin_port = htons(MY_UDP_REMOTE_PORT),
.sin_addr = { htonl(INADDR_LOOPBACK) }
};
int raddrlen = sizeof(raddr);
/* message */
struct msghdr msg;
struct iovec iov;
char *buffer = MY_TEST_MESSAGE;
int len = strlen(buffer) + 1;

/* TODO 1: build message */
msg.msg_name = &raddr;
msg.msg_namelen = raddrlen;
msg.msg_flags = 0;
msg.msg_control = NULL;
msg.msg_controllen = 0;

iov.iov_base = buffer;
iov.iov_len = len;
/* TODO 1: send the message down the socket and return the
* error code.
*/
kernel_sendmsg(s,&msg,(struct kvec*)&iov,1,len);

return 0;
}

int __init my_udp_sock_init(void)
{
int err;
/* address to bind on */
struct sockaddr_in addr = {
.sin_family = AF_INET,
.sin_port = htons(MY_UDP_LOCAL_PORT),
.sin_addr = { htonl(INADDR_LOOPBACK) }
};
int addrlen = sizeof(addr);

/* TODO 1: create UDP socket */
err = sock_create_kern(&init_net,PF_INET,SOCK_DGRAM,IPPROTO_UDP,&sock);
if(err < 0)
goto out;
/* TODO 1: bind socket to loopback on port MY_UDP_LOCAL_PORT */
err = kernel_bind(sock,(struct sockaddr*)&addr,addrlen);
if(err < 0)
goto out_release;

/* send message */
err = my_udp_msgsend(sock);
if (err < 0) {
printk(LOG_LEVEL "can't send message\n");
goto out_release;
}

return 0;

out_release:
/* TODO 1: release socket */
sock_release(sock);
out:
return err;
}

void __exit my_udp_sock_exit(void)
{
/* TODO 1: release socket */
sock_release(sock);
}

module_init(my_udp_sock_init);
module_exit(my_udp_sock_exit);

内存映射

kmmap

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
/*
* PSO - Memory Mapping Lab(#11)
*
* Exercise #1: memory mapping using kmalloc'd kernel areas
*/

#include <linux/version.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <asm/pgtable.h>
#include <linux/sched/mm.h>
#include <linux/sched.h>
#include <asm/io.h>
#include <asm/highmem.h>
#include <linux/rmap.h>
#include <asm/uaccess.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>

#include "../test/mmap-test.h"

MODULE_DESCRIPTION("simple mmap driver");
MODULE_AUTHOR("PSO");
MODULE_LICENSE("Dual BSD/GPL");

#define MY_MAJOR 42
/* how many pages do we actually kmalloc */
#define NPAGES 16

/* character device basic structure */
static struct cdev mmap_cdev;

/* pointer to kmalloc'd area */
static void *kmalloc_ptr;

/* pointer to the kmalloc'd area, rounded up to a page boundary */
static char *kmalloc_area;

static int my_open(struct inode *inode, struct file *filp)
{
pr_info("open\n");
return 0;
}

static int my_release(struct inode *inode, struct file *filp)
{
return 0;
}

static int my_read(struct file *file, char __user *user_buffer,
size_t size, loff_t *offset)
{
if(!kmalloc_area)
return -EFAULT;
/* TODO 2: check size doesn't exceed our mapped area size */
if(size > (NPAGES)*PAGE_SIZE)
return -EFAULT;
/* TODO 2: copy from mapped area to user buffer */
if(copy_to_user(user_buffer,kmalloc_area,size))
return -EFAULT;

return size;
}

static int my_write(struct file *file, const char __user *user_buffer,
size_t size, loff_t *offset)
{
if(!kmalloc_area)
return -EFAULT;
/* TODO 2: check size doesn't exceed our mapped area size */
if(size > (NPAGES)*PAGE_SIZE)
return -EFAULT;
/* TODO 2: copy from user buffer to mapped area */
if(copy_from_user(kmalloc_area,user_buffer,size))
return -EFAULT;
return size;
}

static int my_mmap(struct file *filp, struct vm_area_struct *vma)
{
int ret;
long length = vma->vm_end - vma->vm_start;

/* do not map more than we can */
if (length > NPAGES * PAGE_SIZE)
return -EIO;

if(!kmalloc_area)
return -EFAULT;

int pfn = virt_to_phys(kmalloc_area) >> PAGE_SHIFT;
/* TODO 1: map the whole physically contiguous area in one piece */
ret = remap_pfn_range(vma,vma->vm_start,pfn,length,vma->vm_page_prot);
if(ret < 0)
{
pr_err("map address area failed\n");
return -EIO;
}
return 0;
}

static const struct file_operations mmap_fops = {
.owner = THIS_MODULE,
.open = my_open,
.release = my_release,
.mmap = my_mmap,
.read = my_read,
.write = my_write
};

static int my_seq_show(struct seq_file *seq, void *v)
{
struct mm_struct *mm;
struct vm_area_struct *vma_iterator;
unsigned long total = 0;

/* TODO 3: Get current process' mm_struct */
mm = get_task_mm(current);
/* TODO 3: Iterate through all memory mappings */
vma_iterator = mm->mmap;
while(vma_iterator)
{
pr_info("0x%lx -- 0x%lx\n",vma_iterator->vm_start,vma_iterator->vm_end);

total += vma_iterator->vm_end-vma_iterator->vm_start;
vma_iterator = vma_iterator->vm_next;
}

/* TODO 3: Release mm_struct */
mmput(mm);
/* TODO 3: write the total count to file */
seq_printf(seq,"%lu",total);

return 0;
}

static int my_seq_open(struct inode *inode, struct file *file)
{
/* TODO 3: Register the display function */
int ret = single_open(file,my_seq_show,NULL);
return ret;
}

static const struct proc_ops my_proc_ops = {
.proc_open = my_seq_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
.proc_release = single_release,
};

static int __init my_init(void)
{
int ret = 0;
int i;
/* TODO 3: create a new entry in procfs */

if(!proc_create(PROC_ENTRY_NAME,0,NULL,&my_proc_ops))
goto out;

ret = register_chrdev_region(MKDEV(MY_MAJOR, 0), 1, "mymap");
if (ret < 0) {
pr_err("could not register region\n");
goto out_no_chrdev;
}

/* TODO 1: allocate NPAGES+2 pages using kmalloc */
kmalloc_ptr = kmalloc((NPAGES+2)*PAGE_SIZE,GFP_KERNEL);
if(!kmalloc_ptr)
{
ret = -ENOMEM;
goto out;
}
/* TODO 1: round kmalloc_ptr to nearest page start address */
kmalloc_area = (char*)round_up((unsigned long)kmalloc_ptr,PAGE_SIZE);
/* TODO 1: mark pages as reserved */
/* TODO 1: write data in each page */
for(i = 0; i < (NPAGES+2); ++i)
{
struct page* pg = virt_to_page(kmalloc_ptr+i*PAGE_SIZE);
SetPageReserved(pg);
memcpy(kmalloc_ptr+i*PAGE_SIZE,"\xaa\xbb\xcc\xdd",4);
}

/* Init device. */
cdev_init(&mmap_cdev, &mmap_fops);
ret = cdev_add(&mmap_cdev, MKDEV(MY_MAJOR, 0), 1);
if (ret < 0) {
pr_err("could not add device\n");
goto out_kfree;
}

return 0;

out_kfree:
kfree(kmalloc_ptr);
out_unreg:
unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1);
out_no_chrdev:
remove_proc_entry(PROC_ENTRY_NAME, NULL);
out:
return ret;
}

static void __exit my_exit(void)
{
int i;

cdev_del(&mmap_cdev);

/* TODO 1: clear reservation on pages and free mem. */
for(i = 0; i < (NPAGES+2); ++i)
{
struct page* pg = virt_to_page(kmalloc_ptr+i*PAGE_SIZE);
ClearPageReserved(pg);
}

kfree(kmalloc_ptr);
kmalloc_ptr = NULL;
kmalloc_area = NULL;

unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1);
/* TODO 3: remove proc entry */

remove_proc_entry(PROC_ENTRY_NAME,NULL);
}

module_init(my_init);
module_exit(my_exit);

vmmap

注意remap_vmalloc_range只能 映射内核中带有VM_USERMAP标志的vma, 需要通过vmalloc_user获得.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
/*
* PSO - Memory Mapping Lab(#11)
*
* Exercise #2: memory mapping using vmalloc'd kernel areas
*/

#include <linux/version.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/mm.h>
#include <asm/io.h>
#include <linux/uaccess.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>

#include "../test/mmap-test.h"


MODULE_DESCRIPTION("simple mmap driver");
MODULE_AUTHOR("PSO");
MODULE_LICENSE("Dual BSD/GPL");

#define MY_MAJOR 42

/* how many pages do we actually vmalloc */
#define NPAGES 16

/* character device basic structure */
static struct cdev mmap_cdev;

/* pointer to the vmalloc'd area, rounded up to a page boundary */
static char *vmalloc_area;

static int my_open(struct inode *inode, struct file *filp)
{
return 0;
}

static int my_release(struct inode *inode, struct file *filp)
{
return 0;
}

static ssize_t my_read(struct file *file, char __user *user_buffer,
size_t size, loff_t *offset)
{
if(!vmalloc_area)
return -EFAULT;
/* TODO 2: check size doesn't exceed our mapped area size */
if(size > (NPAGES)*PAGE_SIZE)
return -EFAULT;
/* TODO 2: copy from mapped area to user buffer */
if(copy_to_user(user_buffer,vmalloc_area,size))
return -EFAULT;

return size;
}

static ssize_t my_write(struct file *file, const char __user *user_buffer,
size_t size, loff_t *offset)
{
if(!vmalloc_area)
return -EFAULT;
/* TODO 2: check size doesn't exceed our mapped area size */
if(size > (NPAGES)*PAGE_SIZE)
return -EFAULT;
/* TODO 2: copy from user buffer to mapped area */
if(copy_from_user(vmalloc_area,user_buffer,size))
return -EFAULT;
return size;
}
static int my_mmap(struct file *filp, struct vm_area_struct *vma)
{
int ret;
long length = vma->vm_end - vma->vm_start;
unsigned long start = vma->vm_start;
char *vmalloc_area_ptr = vmalloc_area;
unsigned long pfn;
int i;

if (length > NPAGES * PAGE_SIZE)
return -EIO;

if(!vmalloc_area)
return -EFAULT;

/* TODO 1: map pages individually */
for(i = 0; i < (length >> PAGE_SHIFT); ++i)
{
pfn = vmalloc_to_pfn(vmalloc_area_ptr);
ret = remap_pfn_range(vma,start,pfn,PAGE_SIZE,vma->vm_page_prot);
if(ret)
{
pr_err("remap_pfn_range failed");
return ret;
}
vmalloc_area_ptr += PAGE_SIZE;
start += PAGE_SIZE;

}

return ret;
}

static const struct file_operations mmap_fops = {
.owner = THIS_MODULE,
.open = my_open,
.release = my_release,
.mmap = my_mmap,
.read = my_read,
.write = my_write
};

static int my_seq_show(struct seq_file *seq, void *v)
{
struct mm_struct *mm;
struct vm_area_struct *vma_iterator;
unsigned long total = 0;

/* TODO 3: Get current process' mm_struct */
mm = get_task_mm(current);
/* TODO 3: Iterate through all memory mappings */
vma_iterator = mm->mmap;
while(vma_iterator)
{
pr_info("0x%lx -- 0x%lx\n",vma_iterator->vm_start,vma_iterator->vm_end);

total += vma_iterator->vm_end-vma_iterator->vm_start;
vma_iterator = vma_iterator->vm_next;
}

/* TODO 3: Release mm_struct */
mmput(mm);
/* TODO 3: write the total count to file */
seq_printf(seq,"%lu",total);

return 0;
}

static int my_seq_open(struct inode *inode, struct file *file)
{
/* TODO 3: Register the display function */
int ret = single_open(file,my_seq_show,NULL);
return ret;
}


static const struct proc_ops my_proc_ops = {
.proc_open = my_seq_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
.proc_release = single_release,
};

static int __init my_init(void)
{
int ret = 0;
int i;
/* TODO 3: create a new entry in procfs */
proc_create(PROC_ENTRY_NAME,0,NULL,&my_proc_ops);

ret = register_chrdev_region(MKDEV(MY_MAJOR, 0), 1, "mymap");
if (ret < 0) {
pr_err("could not register region\n");
goto out_no_chrdev;
}

/* TODO 1: allocate NPAGES using vmalloc */
vmalloc_area = vmalloc(NPAGES*PAGE_SIZE);
if(vmalloc_area == NULL)
goto out_unreg;
/* TODO 1: mark pages as reserved */
/* TODO 1: write data in each page */
for(i = 0; i < NPAGES; ++i)
{
struct page* pg = vmalloc_to_page(vmalloc_area + i*PAGE_SIZE);
SetPageReserved(pg);
memcpy(vmalloc_area+i*PAGE_SIZE,"\xaa\xbb\xcc\xdd",4);

}


cdev_init(&mmap_cdev, &mmap_fops);
ret = cdev_add(&mmap_cdev, MKDEV(MY_MAJOR, 0), 1);
if (ret < 0) {
pr_err("could not add device\n");
goto out_vfree;
}

return 0;

out_vfree:
vfree(vmalloc_area);
out_unreg:
unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1);
out_no_chrdev:
remove_proc_entry(PROC_ENTRY_NAME, NULL);
out:
return ret;
}

static void __exit my_exit(void)
{
int i;

cdev_del(&mmap_cdev);

/* TODO 1: clear reservation on pages and free mem.*/
for(i = 0; i < NPAGES; ++i)
{
struct page* pg = vmalloc_to_page(vmalloc_area + i*PAGE_SIZE);
ClearPageReserved(pg);
}
vfree(vmalloc_area);
vmalloc_area = NULL;

unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1);
/* TODO 3: remove proc entry */
remove_proc_entry(PROC_ENTRY_NAME,NULL);
}

module_init(my_init);
module_exit(my_exit);

设备模型

最复杂的一集.

总结一下, 本练习主要做了这样的事:

注册一个叫做bex的总线(bus), 以及总线上的一个初始设备(dev)root. 该总线上的设备类型是bex_device. 该总线上的设备都有两个提供给用户的只读属性, version和type. 这是在向函数添加设备时创建的. 该总线本身也有两个提供给用户的只写属性, add和del, 用户可以通过访问这两个属性来向总线上添加和删除设备. 看到这里, 发现其实把属性理解为用户态接口更加易懂.

设备应该有对应的驱动程序(driver), 所以有对应的bex_driver驱动类型. 当设备被添加到总线上, 总线会遍历已注册的驱动程序, 对该设备调用总线对应的match函数. 如果匹配成功, 则将该设备与该驱动相关联, 然后调用总线上的probe函数. bex总线的probe函数会直接转给驱动的probe函数处理, 该函数创建一个bex_misc_device类型的设备, 该设备本质上是一个miscdevice.
访问miscdevice的dev属性可以查看对应的设备号, mknod之后便可对该设备进行读写.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#ifndef _BEX_H
#define _BEX_H

#include <linux/device.h>

struct bex_device {
const char *type;
int version;
struct device dev;
};

#define to_bex_device(drv) container_of(dev, struct bex_device, dev)

struct bex_driver {
const char *type;

int (*probe)(struct bex_device *dev);
void (*remove)(struct bex_device *dev);

struct device_driver driver;
};

#define to_bex_driver(drv) container_of(drv, struct bex_driver, driver)

int bex_register_driver(struct bex_driver *drv);
void bex_unregister_driver(struct bex_driver *drv);

#endif

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/string.h>
#include <linux/slab.h>

#include "bex.h"

MODULE_AUTHOR ("Kernel Hacker");
MODULE_LICENSE ("GPL");
MODULE_DESCRIPTION ("BEX bus module");

static int bex_match(struct device *dev, struct device_driver *driver)
{
/* TODO 5: implement the bus match function */
return !strcmp(dev_name(dev),driver->name);
}

static int bex_probe(struct device *dev)
{
struct bex_device *bex_dev = to_bex_device(dev);
struct bex_driver *bex_drv = to_bex_driver(dev->driver);

return bex_drv->probe(bex_dev);
}

static int bex_remove(struct device *dev)
{
struct bex_device *bex_dev = to_bex_device(dev);
struct bex_driver *bex_drv = to_bex_driver(dev->driver);

bex_drv->remove(bex_dev);
return 0;
}

static int bex_add_dev(const char *name, const char *type, int version);

/* TODO 3: implement write only add attribute */
static ssize_t add_store(struct bus_type *bt, const char *buf, size_t count)
{
char name[32];
int version;
char type[32];
int ret;

ret = sscanf(buf, "%31s %31s %d", name,type,&version);
if (ret != 3)
return -EINVAL;

// pr_info("%s,%s,%d\n", name,type,version);
ret = bex_add_dev(name,type,version);
if(ret < 0)
return 0;

return count;
}
BUS_ATTR_WO(add);

static int bex_del_dev(const char *name);

/* TODO 3: implement write only del attribute */
static ssize_t del_store(struct bus_type *bt, const char *buf, size_t count)
{
char name[32];
int ret;

ret = sscanf(buf, "%31s", name);
if (ret != 1)
return -EINVAL;

ret = bex_del_dev(name);
if(ret < 0)
return 0;

return count;
}
BUS_ATTR_WO(del);


static struct attribute *bex_bus_attrs[] = {
/* TODO 3: add del and add attributes */
&bus_attr_add.attr,
&bus_attr_del.attr,
};
ATTRIBUTE_GROUPS(bex_bus);

struct bus_type bex_bus_type = {
.name = "bex",
.match = bex_match,
.probe = bex_probe,
.remove = bex_remove,
/* TODO 3: add bus groups attributes */
.bus_groups = bex_bus_groups,
};

static ssize_t
type_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct bex_device *bex_dev = to_bex_device(dev);

return sprintf(buf, "%s\n", bex_dev->type);
}

static ssize_t
version_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct bex_device *bex_dev = to_bex_device(dev);

return sprintf(buf, "%d\n", bex_dev->version);
}


/*TODO 2: add read-only device attribute to show the type */
DEVICE_ATTR(type,S_IRUSR,type_show,NULL);

/*TODO 2: add read-only device attribute to show the version */
DEVICE_ATTR(version,S_IRUSR,version_show,NULL);

static struct attribute *bex_dev_attrs[] = {
/* TODO 2: add type and version attributes */
&dev_attr_type.attr,
&dev_attr_version.attr
};
ATTRIBUTE_GROUPS(bex_dev);

static int bex_dev_uevent(struct device *dev, struct kobj_uevent_env *env)
{
return add_uevent_var(env, "MODALIAS=bex:%s", dev_name(dev));
}

static void bex_dev_release(struct device *dev)
{
struct bex_device *bex_dev = to_bex_device(dev);

kfree(bex_dev->type);
kfree(bex_dev);
}

struct device_type bex_device_type = {
/* TODO 2: set the device groups attributes */
.uevent = bex_dev_uevent,
.release = bex_dev_release,
.groups = bex_dev_groups
};

static int bex_add_dev(const char *name, const char *type, int version)
{
struct bex_device *bex_dev;
int ret;

bex_dev = kzalloc(sizeof(*bex_dev), GFP_KERNEL);
if (!bex_dev)
return -ENOMEM;

bex_dev->type = kstrdup(type, GFP_KERNEL);
bex_dev->version = version;

bex_dev->dev.bus = &bex_bus_type;
bex_dev->dev.type = &bex_device_type;
bex_dev->dev.parent = NULL;

dev_set_name(&bex_dev->dev, "%s", name);
ret = device_register(&bex_dev->dev);
if(ret < 0)
{
put_device(&bex_dev->dev);
goto out;
}

device_create_file(&bex_dev->dev,&dev_attr_type);
device_create_file(&bex_dev->dev,&dev_attr_version);

out:
return ret;
}

static int bex_del_dev(const char *name)
{
struct device *dev;

dev = bus_find_device_by_name(&bex_bus_type, NULL, name);
if (!dev)
return -EINVAL;

device_remove_file(dev,&dev_attr_version);
device_remove_file(dev,&dev_attr_type);
device_unregister(dev);
put_device(dev);

return 0;
}

int bex_register_driver(struct bex_driver *drv)
{
int ret;

drv->driver.bus = &bex_bus_type;
ret = driver_register(&drv->driver);
if (ret)
return ret;

return 0;
}
EXPORT_SYMBOL(bex_register_driver);

void bex_unregister_driver(struct bex_driver *drv)
{
driver_unregister(&drv->driver);
}
EXPORT_SYMBOL(bex_unregister_driver);

static int __init my_bus_init (void)
{
int ret;

/* TODO 1: register the bus driver */
ret = bus_register(&bex_bus_type);
if(ret < 0)
goto out;
/* TODO 1: add a device */
ret = bex_add_dev("root",NULL,1);
if(ret < 0)
goto out_unregister_bus;

bus_create_file(&bex_bus_type,&bus_attr_add);
bus_create_file(&bex_bus_type,&bus_attr_del);

return ret;


out_del_dev:
bex_del_dev("root");

out_unregister_bus:
bus_unregister(&bex_bus_type);

out:
return ret;
}

static void my_bus_exit (void)
{
/* TODO 1: unregister the bus driver */

bus_remove_file(&bex_bus_type,&bus_attr_add);
bus_remove_file(&bex_bus_type,&bus_attr_del);
bex_del_dev("root");
bus_unregister(&bex_bus_type);
}

module_init (my_bus_init);
module_exit (my_bus_exit);


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/uaccess.h>
#include <linux/miscdevice.h>
#include <linux/slab.h>
#include "bex.h"

MODULE_DESCRIPTION("BEX misc driver");
MODULE_AUTHOR("Kernel Hacker");
MODULE_LICENSE("GPL");

#define BUF_SIZE 1024

struct bex_misc_device {
struct miscdevice misc;
struct bex_device *dev;
char buf[BUF_SIZE];
};

static int my_open(struct inode *inode, struct file *file)
{
return 0;
}

static int my_release(struct inode *inode, struct file *file)
{
return 0;
}

static int my_read(struct file *file, char __user *user_buffer,
size_t size, loff_t *offset)
{
struct bex_misc_device *bmd = (struct bex_misc_device *)file->private_data;
ssize_t len = min(sizeof(bmd->buf) - (ssize_t)*offset, size);

if (len <= 0)
return 0;

if (copy_to_user(user_buffer, bmd->buf + *offset, len))
return -EFAULT;

*offset += len;
return len;
}

static int my_write(struct file *file, const char __user *user_buffer,
size_t size, loff_t *offset)
{
struct bex_misc_device *bmd = (struct bex_misc_device *)file->private_data;
ssize_t len = min(sizeof(bmd->buf) - (ssize_t)*offset, size);

if (len <= 0)
return 0;

if (copy_from_user(bmd->buf + *offset, user_buffer, len))
return -EFAULT;

*offset += len;
return len;
}

struct file_operations bex_misc_fops = {
.owner = THIS_MODULE,
.open = my_open,
.read = my_read,
.write = my_write,
.release = my_release,
};

static int bex_misc_count;

int bex_misc_probe(struct bex_device *dev)
{
struct bex_misc_device *bmd;
char buf[32];
int ret;

dev_info(&dev->dev, "%s: %s %d\n", __func__, dev->type, dev->version);

/* TODO 6: refuse the probe is version > 1 */
if(dev->version > 1)
return -EINVAL;

bmd = kzalloc(sizeof(*bmd), GFP_KERNEL);
if (!bmd)
return -ENOMEM;

bmd->misc.minor = MISC_DYNAMIC_MINOR;
snprintf(buf, sizeof(buf), "bex-misc-%d", bex_misc_count++);
bmd->misc.name = kstrdup(buf, GFP_KERNEL);
bmd->misc.parent = &dev->dev;
bmd->misc.fops = &bex_misc_fops;
bmd->dev = dev;
dev_set_drvdata(&dev->dev, bmd);

/* TODO 6: register the misc device */
misc_register(&bmd->misc);

return 0;
}

void bex_misc_remove(struct bex_device *dev)
{
struct bex_misc_device *bmd;

bmd = (struct bex_misc_device *)dev_get_drvdata(&dev->dev);

/* TODO 6: deregister the misc device */
misc_deregister(&bmd->misc);
kfree(bmd);
}

struct bex_driver bex_misc_driver = {
.type = "misc",
.probe = bex_misc_probe,
.remove = bex_misc_remove,
.driver = {
.owner = THIS_MODULE,
.name = "bex_misc",
},
};

static int my_init(void)
{
int err;

/* TODO 4: register the driver */
err = bex_register_driver(&bex_misc_driver);
return err;
}

static void my_exit(void)
{
/* TODO 4: unregister the driver */
bex_unregister_driver(&bex_misc_driver);
}

module_init(my_init);
module_exit(my_exit);

  • 版权声明: 本博客所有文章除特别声明外,著作权归作者所有。转载请注明出处!
  • Copyrights © 2022-2024 翰青HanQi

请我喝杯咖啡吧~

支付宝
微信