Linux kernel Lab学习笔记

2024-10-24

Linux内核

字数统计: 11.2k | 阅读时长≈ 63 分钟

只是做lab时的记录, 笔者并不认为文中的代码部分有什么阅读的价值

内核模块

概述

执行上下文

我们可以根据内核执行的情况，把上下文分为两种：进程上下文和中断上下文。如果我们是因为系统调用而在内核中运行代码，或者是在内核线程中运行，那么我们就在进程上下文中。如果我们是在响应中断或执行延迟操作的函数时运行，那么我们就在中断上下文中。

锁定

首先对于线程上下文，获取了自旋锁，使用普通的spin_lock是没有禁止本地中断的，完全可能出现，获取锁后，发生中断，中断上下文又访问临界区，这样肯定不行。所以在线程上下文中一定会使用 spin_lock_irqsave来保护临界代码区域。
而中断上下文中，访问临界代码区域，就复杂些。首先需要对中断有简单了解，如果是在中断处理程序的上半部分中访问临界代码区域，考虑到linux中断处理上半部分的中断屏蔽机制，会暂时屏蔽同优先级和低优先级的中断，所以除非有更高优先级的中断中有访问临界代码区域（外部中断其实都是一个优先级），通常使用spin_lock保护即可。这个保护是一定需要的，有可能线程上下文中先获取了该锁，那么此时中断上下文就要先等一等才能获取到该锁（同步），再进入临界代码区域。在多核系统中，线程上下文和中断上下文可能在两个CPU核心上执行。线程上下文屏蔽本地的中断，是完全有可能在其他cpu核心上产生中断并执行中断处理程序的，这样就会出现并行的情况，两边的锁都是必须要加的。
中断上下文还有一种情况，就是只在中断的下半部分访问临界代码，这时候，其实需要屏蔽本地CPU外部中断的，仅禁用软中断即可，也就是一个更细粒度的自旋锁。使用spin_lock_bh来保护线程上下文中的临界代码区域即可，可以优化系统性能，更合适，当然，使用上面的spin_lock_irqsave来保护也肯定没有问题的，只是有点“过保护”了，无法响应外部中断，性能会有一点损失。

可抢占性

Linux 使用的是可抢占内核。这里，我们需要明确区分可抢占多任务（preemptive multitasking）和可抢占内核两个概念。可抢占多任务是指，当一个进程在用户空间运行时，一旦其分配的时间片（时间片段）到期，操作系统会强制中断该进程，转而运行另一个进程。而如果一个在内核模式下运行的进程（通常是作为系统调用的结果），可以被中断以便运行另一个进程，那么我们就说这个内核具有可抢占性。

练习

list_proc

遍历进程的task采用for_each_process.

1 2	#define for_each_process(p) \ for (p = &init_task ; (p = next_task(p)) != &init_task ; )

#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
/* TODO: add missing headers */
#include <linux/sched.h>
#include <linux/sched/signal.h>

MODULE_DESCRIPTION("List current processes");
MODULE_AUTHOR("Kernel Hacker");
MODULE_LICENSE("GPL");

static int my_proc_init(void)
{
	struct task_struct *p;

	/* TODO: print current process pid and its name */
	p = current;
	pr_info("current: %s -- %d\n",p->comm,p->pid);

	/* TODO: print the pid and name of all processes */
	pr_info("others: \n");

	struct task_struct *cur;

	for_each_process(cur)
	{
    	pr_info("%s -- %d\n",cur->comm,cur->pid);
	}

    //这种遍历方式会跳过p, 因为把p当作头结点了.
	// list_for_each_entry(cur,&p->tasks,tasks)
	// {
    // 	  pr_info("%s -- %d\n",cur->comm,cur->pid);
	// }

	return 0;
}

static void my_proc_exit(void)
{
	/* TODO: print current process pid and name */
	struct task_struct *p;
	p = current;
	pr_info("current: %s -- %d\n",p->comm,p->pid);
}

module_init(my_proc_init);
module_exit(my_proc_exit);

Memory Info

顺着VMA链表遍历就行, 注意VMA链表不是环状的.

static int my_hello_init(void)
{
	struct task_struct* p = current;
	struct vm_area_struct* cur = p->mm->mmap;


	while(cur)
	{
		if(cur->vm_file)
			printk(KERN_INFO "0x%lx -- 0x%lx -- [%s]\n",cur->vm_start,cur->vm_end,cur->vm_file->f_path.dentry->d_name.name);
		else
			printk(KERN_INFO "0x%lx -- 0x%lx -- [Anonymous]\n",cur->vm_start,cur->vm_end);

		cur = cur->vm_next;
	}

	// if(cur)
	// {
	// 	do{
	// 		printk(KERN_INFO "%lx -- %lx -- ",cur->vm_start,cur->vm_end);
	// 		if(cur->vm_file)
	// 			pr_info("[%s]",cur->vm_file->f_path.dentry->d_name.name);
	// 		else
	// 			pr_info("[Anonymous]");

	// 		cur = cur->vm_next;
	// 	}while(cur != p->mm->mmap);
	// }
	
	return 0;
}

内核api

练习

memory

static struct task_info *task_info_alloc(int pid)
{
	struct task_info *ti;

	/* TODO 1: allocated and initialize a task_info struct */
	ti = kmalloc(sizeof(*ti),GFP_KERNEL);
	if(ti == NULL)
		return NULL;
	ti->pid = pid;
	ti->timestamp = jiffies;
	return ti;
}

static int memory_init(void)
{
	/* TODO 2: call task_info_alloc for current pid */
	ti1 = task_info_alloc(current->pid);
	/* TODO 2: call task_info_alloc for parent PID */
	ti2 = task_info_alloc(current->parent->pid);
	/* TODO 2: call task_info alloc for next process PID */
	ti3 = task_info_alloc(next_task(current)->pid);
	/* TODO 2: call task_info_alloc for next process of the next process */
	ti4 = task_info_alloc(next_task(next_task(current))->pid);
	return 0;
}

static void memory_exit(void)
{

	/* TODO 3: print ti* field values */
	printk("[task_info] Current:\n\tPID:%d\n\ttimestamp:%lu\n\n", ti1->pid, ti1->timestamp);
	printk("[task_info] Parent:\n\tPID:%d\n\ttimestamp:%lu\n\n", ti2->pid, ti2->timestamp);
	printk("[task_info] Next:\n\tPID:%d\n\ttimestamp:%lu\n\n", ti3->pid, ti3->timestamp);
	printk("[task_info] Next(Next):\n\tPID:%d\n\ttimestamp:%lu\n", ti4->pid, ti4->timestamp);
	/* TODO 4: free ti* structures */
	if(ti1)
		kfree(ti1);
	if(ti2)
		kfree(ti2);
	if(ti3)
		kfree(ti3);
	if(ti4)
		kfree(ti4);
}

list_full

有两点:

遍历过程中若需要改变链表, 使用list_for_each_safe.

对于侵入式链表来说, 必须先list_del再kfree.

/*
 * Kernel API lab
 * 
 * list-full.c: Working with lists (advanced)
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/sched/signal.h>

MODULE_DESCRIPTION("Full list processing");
MODULE_AUTHOR("SO2");
MODULE_LICENSE("GPL");

struct task_info {
	pid_t pid;
	unsigned long timestamp;
	atomic_t count;
	struct list_head list;
};

static struct list_head head;

static struct task_info *task_info_alloc(int pid)
{
	struct task_info *ti;

	ti = kmalloc(sizeof(*ti), GFP_KERNEL);
	if (ti == NULL)
		return NULL;
	ti->pid = pid;
	ti->timestamp = jiffies;
	atomic_set(&ti->count, 0);

	return ti;
}

static struct task_info *task_info_find_pid(int pid)
{
	struct list_head *p;
	struct task_info *ti;

	/* TODO 1: Look for pid and return task_info or NULL if not found */
	list_for_each_entry(ti,&head,list)
	{
		if(ti->pid == pid)
			return ti;
	}

	return NULL;
}

static void task_info_add_to_list(int pid)
{
	struct task_info *ti;

	ti = task_info_find_pid(pid);
	if (ti != NULL) {
		ti->timestamp = jiffies;
		atomic_inc(&ti->count);
		return;
	}

	ti = task_info_alloc(pid);
	list_add(&ti->list, &head);
}

static void task_info_add_for_current(void)
{
	task_info_add_to_list(current->pid);
	task_info_add_to_list(current->parent->pid);
	task_info_add_to_list(next_task(current)->pid);
	task_info_add_to_list(next_task(next_task(current))->pid);
}

static void task_info_print_list(const char *msg)
{
	struct list_head *p;
	struct task_info *ti;

	pr_info("%s: [ ", msg);
	list_for_each(p, &head) {
		ti = list_entry(p, struct task_info, list);
		pr_info("(%d, %lu) ", ti->pid, ti->timestamp);
	}
	pr_info("]\n");
}

static void task_info_remove_expired(void)
{
	struct list_head *p, *q;
	struct task_info *ti;

	list_for_each_safe(p, q, &head) {
		ti = list_entry(p, struct task_info, list);
		if (jiffies - ti->timestamp > 3 * HZ && atomic_read(&ti->count) < 5) {
			list_del(p);
			kfree(ti);
		}
	}
}

static void task_info_purge_list(void)
{
	struct list_head *p, *q;
	struct task_info *ti;

	list_for_each_safe(p, q, &head) {
		ti = list_entry(p, struct task_info, list);
		list_del(p);
		kfree(ti);
	}
}

static int list_full_init(void)
{
	INIT_LIST_HEAD(&head);

	task_info_add_for_current();
	task_info_print_list("after first add");

	set_current_state(TASK_INTERRUPTIBLE);
	schedule_timeout(5 * HZ);

	return 0;
}

static void list_full_exit(void)
{
	struct task_info *ti;

	/* TODO 2: Ensure that at least one task is not deleted */
	ti = list_first_entry(&head,struct task_info,list);
	if(ti)
		atomic_set(&ti->count,5);

	task_info_remove_expired();
	task_info_print_list("after removing expired");
	task_info_purge_list();
}

module_init(list_full_init);
module_exit(list_full_exit);

字符设备驱动

概述

主设备号和次设备号

在 UNIX 中，设备通常有一个唯一的、固定的标识符与之关联。这种传统在 Linux 中得以保留，尽管标识符可以动态分配（出于兼容性的原因，大多数驱动程序仍然使用静态标识符）。这个标识符由两部分组成：主设备号（major）和次设备号（minor）。第一部分用于标识设备类型（如 IDE 硬盘、SCSI 硬盘、串口等），而第二部分用于标识设备本身（如第一个硬盘、第二个串口等）。大多数情况下，主设备号用于标识驱动程序，而次设备号用于标识驱动程序所服务的某个物理设备。通常情况下，一个驱动程序会有一个关联的主设备号，并负责处理与该主设备号关联的所有次设备号。

字符设备的数据结构

inode 代表文件系统视角中的文件。inode 的属性包括文件大小、权限和相关时间。单个 inode 在文件系统中唯一标识一个文件。
file 结构仍然代表单个文件，但更接近用户的视角。file 结构的属性中，有 inode、文件名、文件打开属性和文件位置等。在给定时间内，所有打开的文件都有一个关联的 file 结构。
为了更好的理解 inode 和 file 之间的区别，我们可以使用面向对象编程的类比：如果我们将 inode 视为一个类，那么文件就是对象，即 inode 类的实例。inode 表示文件的静态映像（inode 没有状态），而 file 表示文件的动态映像（file 具有状态）。

延迟工作

概述

延迟工作是一类内核功能，允许我们安排代码在稍后的时间执行。这些安排的代码可以在进程上下文或中断上下文中运行，具体取决于延迟工作的类型。延迟工作用于补充中断处理程序的功能，因为中断具有重要的要求和限制：

中断处理程序的执行时间必须尽可能短
在中断上下文中，我们不能使用阻塞调用

使用延迟工作，我们可以在中断处理程序中执行最小所需的工作，并安排一个异步操作在稍后的时间运行，以执行其余的操作。

在中断上下文中运行的延迟工作也称为下半部（bottom-half），因为其目的是执行中断处理程序（top-half）之外所剩余的操作。

练习

/*
 * SO2 - Lab 6 - Deferred Work
 *
 * Exercises #3, #4, #5: deferred work
 *
 * Code skeleton.
 */

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/sched/task.h>
#include "../include/deferred.h"

#define MY_MAJOR		42
#define MY_MINOR		0
#define MODULE_NAME		"deferred"

#define TIMER_TYPE_NONE		-1
#define TIMER_TYPE_SET		0
#define TIMER_TYPE_ALLOC	1
#define TIMER_TYPE_MON		2

MODULE_DESCRIPTION("Deferred work character device");
MODULE_AUTHOR("SO2");
MODULE_LICENSE("GPL");

struct mon_proc {
	struct task_struct *task;
	struct list_head list;
};

static struct my_device_data {
	struct cdev cdev;
	/* TODO 1: add timer */
	struct timer_list timer;
	/* TODO 2: add flag */
	int flag;
	/* TODO 3: add work */
	struct work_struct work;
	/* TODO 4: add list for monitored processes */
	struct mon_proc procs;
	/* TODO 4: add spinlock to protect list */
	spinlock_t lock;
} dev;

static void alloc_io(void)
{
	set_current_state(TASK_INTERRUPTIBLE);
	schedule_timeout(5 * HZ);
	pr_info("Yawn! I've been sleeping for 5 seconds.\n");
}

static struct mon_proc *get_proc(pid_t pid)
{
	struct task_struct *task;
	struct mon_proc *p;

	rcu_read_lock();
	task = pid_task(find_vpid(pid), PIDTYPE_PID);
	rcu_read_unlock();
	if (!task)
		return ERR_PTR(-ESRCH);

	p = kmalloc(sizeof(*p), GFP_ATOMIC);
	if (!p)
		return ERR_PTR(-ENOMEM);

	get_task_struct(task);
	p->task = task;

	return p;
}


/* TODO 3: define work handler */
static void work_handler(struct work_struct* work)
{
	struct my_device_data* my_data = container_of(work,struct my_device_data,work);

	pr_info("[%s -- %d]: work_handler called\n",current->comm,current->pid);
	alloc_io();
}

// #define ALLOC_IO_DIRECT
/* TODO 3: undef ALLOC_IO_DIRECT*/

static void timer_handler(struct timer_list *tl)
{
	struct my_device_data* my_data = container_of(tl,struct my_device_data,timer);
	/* TODO 1: implement timer handler */
	/* TODO 2: check flags: TIMER_TYPE_SET or TIMER_TYPE_ALLOC */
	if(my_data->flag == TIMER_TYPE_SET)
		pr_info("[%s -- %d]: timer expired\n",current->comm,current->pid);
	if(my_data->flag == TIMER_TYPE_ALLOC)
		/* TODO 3: schedule work */
		schedule_work(&my_data->work);

	/* TODO 4: iterate the list and check the proccess state */
		/* TODO 4: if task is dead print info ... */
		/* TODO 4: ... decrement task usage counter ... */
		/* TODO 4: ... remove it from the list ... */
		/* TODO 4: ... free the struct mon_proc */
	struct mon_proc* p, *q;
	if(my_data->flag == TIMER_TYPE_MON)
	{
		spin_lock(&my_data->lock);
		list_for_each_entry_safe(p,q,&my_data->procs.list,list)
		{
			if(p->task->state == TASK_DEAD)
			{
				pr_info("[%s -- %d] DEAD\n",p->task->comm,p->task->pid);
				put_task_struct(p->task);
				list_del(&p->list);
				kfree(p);
			}
		}
		spin_unlock(&my_data->lock);
		mod_timer(tl,jiffies+1*HZ);
	}

}

static int deferred_open(struct inode *inode, struct file *file)
{
	struct my_device_data *my_data =
		container_of(inode->i_cdev, struct my_device_data, cdev);
	file->private_data = my_data;
	pr_info("[deferred_open] Device opened\n");
	return 0;
}

static int deferred_release(struct inode *inode, struct file *file)
{
	pr_info("[deferred_release] Device released\n");
	return 0;
}

static long deferred_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
	struct my_device_data *my_data = (struct my_device_data*) file->private_data;

	pr_info("[deferred_ioctl] Command: %s\n", ioctl_command_to_string(cmd));

	switch (cmd) {
		case MY_IOCTL_TIMER_SET:
			/* TODO 2: set flag */
			/* TODO 1: schedule timer */
			my_data->flag = TIMER_TYPE_SET;
			mod_timer(&my_data->timer,jiffies+arg*HZ);
			break;
		case MY_IOCTL_TIMER_CANCEL:
			/* TODO 1: cancel timer */
			del_timer(&my_data->timer);
			my_data->flag = TIMER_TYPE_NONE;
			break;
		case MY_IOCTL_TIMER_ALLOC:
			/* TODO 2: set flag and schedule timer */
			my_data->flag = TIMER_TYPE_ALLOC;
			mod_timer(&my_data->timer,jiffies+arg*HZ);
			break;
		case MY_IOCTL_TIMER_MON:
		{
			/* TODO 4: use get_proc() and add task to list */
			/* TODO 4: protect access to list */
			/* TODO 4: set flag and schedule timer */
			struct mon_proc* proc = get_proc(arg);
			if(proc == ERR_PTR(-ESRCH))
				return -ESRCH;

			spin_lock_bh(&my_data->lock);
			list_add(&proc->list,&my_data->procs.list);
			spin_unlock_bh(&my_data->lock);

			my_data->flag = TIMER_TYPE_MON;
			mod_timer(&my_data->timer,jiffies+1*HZ);
			break;
		}
		default:
			return -ENOTTY;
	}
	return 0;
}

struct file_operations my_fops = {
	.owner = THIS_MODULE,
	.open = deferred_open,
	.release = deferred_release,
	.unlocked_ioctl = deferred_ioctl,
};

static int deferred_init(void)
{
	int err;

	pr_info("[deferred_init] Init module\n");
	err = register_chrdev_region(MKDEV(MY_MAJOR, MY_MINOR), 1, MODULE_NAME);
	if (err) {
		pr_info("[deffered_init] register_chrdev_region: %d\n", err);
		return err;
	}

	/* TODO 2: Initialize flag. */
	/* TODO 3: Initialize work. */
	dev.flag = TIMER_TYPE_NONE;
	INIT_WORK(&dev.work,work_handler);

	/* TODO 4: Initialize lock and list. */
	spin_lock_init(&dev.lock);
	INIT_LIST_HEAD(&dev.procs.list);

	cdev_init(&dev.cdev, &my_fops);
	cdev_add(&dev.cdev, MKDEV(MY_MAJOR, MY_MINOR), 1);

	/* TODO 1: Initialize timer. */
	timer_setup(&dev.timer,timer_handler,0);

	return 0;
}

static void deferred_exit(void)
{
	struct mon_proc *p, *n;

	pr_info("[deferred_exit] Exit module\n" );

	cdev_del(&dev.cdev);
	unregister_chrdev_region(MKDEV(MY_MAJOR, MY_MINOR), 1);

	/* TODO 1: Cleanup: make sure the timer is not running after exiting. */
	del_timer_sync(&dev.timer);
	/* TODO 3: Cleanup: make sure the work handler is not scheduled. */
	cancel_work_sync(&dev.work);


	/* TODO 4: Cleanup the monitered process list */
		/* TODO 4: ... decrement task usage counter ... */
		/* TODO 4: ... remove it from the list ... */
		/* TODO 4: ... free the struct mon_proc */
	list_for_each_entry_safe(p,n,&dev.procs.list,list)
	{
		put_task_struct(p->task);
		list_del(&p->list);
		kfree(p);
	}
}

module_init(deferred_init);
module_exit(deferred_exit);

块设备驱动

概述

数据结构有点复杂, 建议看原资料.
说一下request, bio, bio_vec.

一个bio是上层给块层的一次IO请求, 这些IO请求对应的内存不一定物理连续, 所以又细分为物理连续的内存向量bio_vec(又称片段segment).

I/O调度算法可将连续的bio合并成一个请求，请求是bio经由I/O调度进行调整后的结果，因此一个request可以包含多个bio。当bio被提交给I/O调度器时，I/O调度器可能会将这个bio插入现存的请求中，也可能生成新的请求。

对应三个遍历api.
rq_for_each_bio()遍历一个request的所有bio。

1
2
3

#define __rq_for_each_bio(_bio,rq)  \
	if((rq->bio))                \
		for(_bio = (rq)->bio; _bio ; _bio = _bio->bi_next)

bio_for_each_segment()遍历一个bio的所有bio_vec。


#define __bio_for_each_segment(bvl, bio, iter, start) \
	for (iter = (start); \
			(iter).bi_size && \
				((bvl = bio_iter_iovec((bio), (iter))), 1); \
			bio_advance_iter((bio), &(iter), (bvl).bv_len))
#define bio_for_each_segment(bvl, bio, iter) \
			__bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter)

rq_for_each_segment() 迭代遍历一个request所有bio中的所有segment


#define rq_for_each_segment(bvl, _rq, _iter) \
		__rq_for_each_bio(_iter.bio, _rq) \
			bio_for_each_segment(bvl, _iter.bio, _iter.iter)

练习

ram-disk

主要的关系在概述部分说了, 再提一个page的访问问题.

在访问bvec.bv_page时, 需要先建立映射.

char* buffer = kmap_atomic(bvec.bv_page);
if(buffer)
{
	// 读写buffer
}
kunmap_atomic(buffer);

在不同的架构中, page与virt的对应关系不同. 比如在ARM64架构中, 不存在高端内存的说法, 线性映射区完整映射了所有的物理内存, 所以每个page对应的虚拟内存都可以直接访问.而ARM架构中, 高端内存需要动态映射再进行访问.
而kmap中有对非高端内存的特化, 所以即使在ARM64架构中也不会建立多余的映射.

以及blk_mq_ops->queue_rq是在原子上下文中调用的, 不能阻塞.

static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
{
	preempt_disable();
	pagefault_disable();
	if (!PageHighMem(page))
		return page_address(page);
	return kmap_atomic_high_prot(page, prot);
}
#define kmap_atomic(page)	kmap_atomic_prot(page, kmap_prot)

/*
 * SO2 - Block device drivers lab (#7)
 * Linux - Exercise #1, #2, #3, #6 (RAM Disk)
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>

#include <linux/genhd.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/blk_types.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/bio.h>
#include <linux/vmalloc.h>

MODULE_DESCRIPTION("Simple RAM Disk");
MODULE_AUTHOR("SO2");
MODULE_LICENSE("GPL");


#define KERN_LOG_LEVEL		KERN_ALERT

#define MY_BLOCK_MAJOR		240
#define MY_BLKDEV_NAME		"mybdev"
#define MY_BLOCK_MINORS		1
#define NR_SECTORS		128

#define KERNEL_SECTOR_SIZE	512

/* TODO 6: use bios for read/write requests */
#define USE_BIO_TRANSFER	1


static struct my_block_dev {
	struct blk_mq_tag_set tag_set;
	struct request_queue *queue;
	struct gendisk *gd;
	u8 *data;
	size_t size;
} g_dev;

static int my_block_open(struct block_device *bdev, fmode_t mode)
{
	return 0;
}

static void my_block_release(struct gendisk *gd, fmode_t mode)
{
}

static const struct block_device_operations my_block_ops = {
	.owner = THIS_MODULE,
	.open = my_block_open,
	.release = my_block_release
};

static void my_block_transfer(struct my_block_dev *dev, sector_t sector,
		unsigned long len, char *buffer, int dir)
{
	unsigned long offset = sector * KERNEL_SECTOR_SIZE;

	/* check for read/write beyond end of block device */
	if ((offset + len) > dev->size)
		return;

	/* TODO 3: read/write to dev buffer depending on dir */
	if(dir)
		memcpy(dev->data+offset,buffer,len);
	else
		memcpy(buffer,dev->data+offset,len);
}

/* to transfer data using bio structures enable USE_BIO_TRANFER */
#if USE_BIO_TRANSFER == 1
static void my_xfer_request(struct my_block_dev *dev, struct request *req)
{
	struct bio_vec bvec;
	struct req_iterator iter;
	sector_t pos_sector = blk_rq_pos(req);
	/* TODO 6: iterate segments */
	rq_for_each_segment(bvec,req,iter)
	{
		/* TODO 6: copy bio data to device buffer */
		char* buffer = kmap_atomic(bvec.bv_page);
		if(buffer)
		{
			unsigned long offset = bvec.bv_offset;
			my_block_transfer(dev,iter.iter.bi_sector,bvec.bv_len,buffer+offset,rq_data_dir(req) == WRITE);
            kunmap_atomic(buffer);
		}

	}
}
#endif

static blk_status_t my_block_request(struct blk_mq_hw_ctx *hctx,
				     const struct blk_mq_queue_data *bd)
{
	struct request *rq;
	struct my_block_dev *dev = hctx->queue->queuedata;

	/* TODO 2: get pointer to request */
	rq = bd->rq;
	/* TODO 2: start request processing. */
	blk_mq_start_request(rq);
	/* TODO 2: check fs request. Return if passthrough. */
	if(blk_rq_is_passthrough(rq))
	{
		pr_info("Skip non-fs request\n");
		blk_mq_end_request(rq,BLK_STS_IOERR);
		return BLK_STS_IOERR;
	}
	/* TODO 2: print request information */
	pr_info("request received\n");
	pr_info("start_sector: %llu, total_size: %d, data_size:%d, direction: %d\n", blk_rq_pos(rq),blk_rq_bytes(rq),blk_rq_cur_bytes(rq),rq_data_dir(rq));
#if USE_BIO_TRANSFER == 1
	/* TODO 6: process the request by calling my_xfer_request */
	my_xfer_request(dev,rq);
#else
	/* TODO 3: process the request by calling my_block_transfer */
	my_block_transfer(dev,blk_rq_pos(rq),blk_rq_cur_bytes(rq),bio_data(rq->bio),rq_data_dir(rq));



#endif

out:
	/* TODO 2: end request successfully */
	blk_mq_end_request(rq,BLK_STS_OK);
	return BLK_STS_OK;
}

static struct blk_mq_ops my_queue_ops = {
	.queue_rq = my_block_request,
};

static int create_block_device(struct my_block_dev *dev)
{
	int err;

	dev->size = NR_SECTORS * KERNEL_SECTOR_SIZE;
	dev->data = vmalloc(dev->size);
	if (dev->data == NULL) {
		printk(KERN_ERR "vmalloc: out of memory\n");
		err = -ENOMEM;
		goto out_vmalloc;
	}

	/* Initialize tag set. */
	dev->tag_set.ops = &my_queue_ops;
	dev->tag_set.nr_hw_queues = 1;
	dev->tag_set.queue_depth = 128;
	dev->tag_set.numa_node = NUMA_NO_NODE;
	dev->tag_set.cmd_size = 0;
	dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
	err = blk_mq_alloc_tag_set(&dev->tag_set);
	if (err) {
	    printk(KERN_ERR "blk_mq_alloc_tag_set: can't allocate tag set\n");
	    goto out_alloc_tag_set;
	}

	/* Allocate queue. */
	dev->queue = blk_mq_init_queue(&dev->tag_set);
	if (IS_ERR(dev->queue)) {
		printk(KERN_ERR "blk_mq_init_queue: out of memory\n");
		err = -ENOMEM;
		goto out_blk_init;
	}
	blk_queue_logical_block_size(dev->queue, KERNEL_SECTOR_SIZE);
	dev->queue->queuedata = dev;

	/* initialize the gendisk structure */
	dev->gd = alloc_disk(MY_BLOCK_MINORS);
	if (!dev->gd) {
		printk(KERN_ERR "alloc_disk: failure\n");
		err = -ENOMEM;
		goto out_alloc_disk;
	}

	dev->gd->major = MY_BLOCK_MAJOR;
	dev->gd->first_minor = 0;
	dev->gd->fops = &my_block_ops;
	dev->gd->queue = dev->queue;
	dev->gd->private_data = dev;
	snprintf(dev->gd->disk_name, DISK_NAME_LEN, "myblock");
	set_capacity(dev->gd, NR_SECTORS);

	add_disk(dev->gd);

	return 0;

out_alloc_disk:
	blk_cleanup_queue(dev->queue);
out_blk_init:
	blk_mq_free_tag_set(&dev->tag_set);
out_alloc_tag_set:
	vfree(dev->data);
out_vmalloc:
	return err;
}

static int __init my_block_init(void)
{
	int err = 0;

	/* TODO 1: register block device */
	err = register_blkdev(MY_BLOCK_MAJOR,MY_BLKDEV_NAME);
	if(err < 0)
	{
		pr_err("register_blkdev failed\n");
		return err;
	}

	/* TODO 2: create block device using create_block_device */
	err = create_block_device(&g_dev);
	if(err < 0)
	{
		pr_err("register_blkdev failed\n");
		goto out;
	}

	return 0;

out:
	/* TODO 2: unregister block device in case of an error */
	unregister_blkdev(MY_BLOCK_MAJOR,MY_BLKDEV_NAME);
	return err;
}

static void delete_block_device(struct my_block_dev *dev)
{
	if (dev->gd) {
		del_gendisk(dev->gd);
		put_disk(dev->gd);
	}

	if (dev->queue)
		blk_cleanup_queue(dev->queue);
	if (dev->tag_set.tags)
		blk_mq_free_tag_set(&dev->tag_set);
	if (dev->data)
		vfree(dev->data);
}

static void __exit my_block_exit(void)
{
	/* TODO 2: cleanup block device using delete_block_device */
	delete_block_device(&g_dev);
	/* TODO 1: unregister block device */
	unregister_blkdev(MY_BLOCK_MAJOR,MY_BLKDEV_NAME);
}

module_init(my_block_init);
module_exit(my_block_exit);

relay-disk

/*
 * SO2 Lab - Block device drivers (#7)
 * Linux - Exercise #4, #5 (Relay disk - bio)
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>

MODULE_AUTHOR("SO2");
MODULE_DESCRIPTION("Relay disk");
MODULE_LICENSE("GPL");

#define KERN_LOG_LEVEL		KERN_ALERT

#define PHYSICAL_DISK_NAME	"/dev/vdb"
#define KERNEL_SECTOR_SIZE	512

#define BIO_WRITE_MESSAGE	"def"


/* pointer to physical device structure */
static struct block_device *phys_bdev;

static void send_test_bio(struct block_device *bdev, int dir)
{
	struct bio *bio = bio_alloc(GFP_NOIO, 1);
	struct page *page;
	char *buf;

	/* TODO 4: fill bio (bdev, sector, direction) */
	bio_set_dev(bio,bdev);
	bio->bi_opf = dir | REQ_PREFLUSH;
	bio->bi_iter.bi_sector = 0;

	page = alloc_page(GFP_NOIO);
	bio_add_page(bio, page, KERNEL_SECTOR_SIZE, 0);

	/* TODO 5: write message to bio buffer if direction is write */
	/* TODO 4: submit bio and wait for completion */
	/* TODO 4: read data (first 3 bytes) from bio buffer and print it */
	if(dir==REQ_OP_WRITE)
	{
		buf = kmap_atomic(page);
		if(buf)
		{
			snprintf(buf,PAGE_SIZE,BIO_WRITE_MESSAGE);
			kunmap_atomic(buf);
		}
		submit_bio_wait(bio);
	}
	else
	{
		submit_bio_wait(bio);
		buf = kmap_atomic(page);
		if(buf)
		{
			pr_info("% 02x, % 02x, %02x\n",buf[0],buf[1],buf[2]);
			kunmap_atomic(buf);
		}
	}


	bio_put(bio);
	__free_page(page);
}

static struct block_device *open_disk(char *name)
{
	struct block_device *bdev;

	/* TODO 4: get block device in exclusive mode */
	bdev = blkdev_get_by_path(name,FMODE_WRITE|FMODE_READ|FMODE_EXCL,THIS_MODULE);

	return bdev;
}

static int __init relay_init(void)
{
	phys_bdev = open_disk(PHYSICAL_DISK_NAME);
	if (phys_bdev == NULL) {
		printk(KERN_ERR "[relay_init] No such device\n");
		return -EINVAL;
	}

	send_test_bio(phys_bdev, REQ_OP_READ);

	return 0;
}

static void close_disk(struct block_device *bdev)
{
	/* TODO 4: put block device */
	blkdev_put(bdev,FMODE_WRITE|FMODE_READ|FMODE_EXCL);
}

static void __exit relay_exit(void)
{
	/* TODO 5: send test write bio */
	send_test_bio(phys_bdev,REQ_OP_WRITE);
	close_disk(phys_bdev);
}

module_init(relay_init);
module_exit(relay_exit);

文件系统驱动

练习

无设备的文件系统

/*
 * SO2 Lab - Filesystem drivers
 * Exercise #1 (no-dev filesystem)
 */

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/pagemap.h>

MODULE_DESCRIPTION("Simple no-dev filesystem");
MODULE_AUTHOR("SO2");
MODULE_LICENSE("GPL");

#define MYFS_BLOCKSIZE		4096
#define MYFS_BLOCKSIZE_BITS	12
#define MYFS_MAGIC		0xbeefcafe
#define LOG_LEVEL		KERN_ALERT

/* declarations of functions that are part of operation structures */

static int myfs_mknod(struct inode *dir,
		struct dentry *dentry, umode_t mode, dev_t dev);
static int myfs_create(struct inode *dir, struct dentry *dentry,
		umode_t mode, bool excl);
static int myfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);

/* TODO 2: define super_operations structure */
static const struct super_operations myfs_super_operations = {
	.drop_inode = generic_delete_inode,
	.statfs = simple_statfs
};

static const struct inode_operations myfs_dir_inode_operations = {
	/* TODO 5: Fill dir inode operations structure. */
	.mknod = myfs_mknod,
	.create = myfs_create,
	.mkdir = myfs_mkdir,
	.lookup = simple_lookup,
	.rename = simple_rename,
	.link = simple_link,
	.rmdir = simple_rmdir,
	.unlink = simple_unlink,
};


static const struct file_operations myfs_file_operations = {
	/* TODO 6: Fill file operations structure. */
	.read_iter	= generic_file_read_iter,
	.write_iter	= generic_file_write_iter,
	.mmap		= generic_file_mmap,
	.fsync		= noop_fsync,
	.splice_read	= generic_file_splice_read,
	.splice_write	= iter_file_splice_write,
	.llseek		= generic_file_llseek,
};

static const struct inode_operations myfs_file_inode_operations = {
	/* TODO 6: Fill file inode operations structure. */
	.setattr	= simple_setattr,
	.getattr	= simple_getattr,
};

static const struct address_space_operations myfs_aops = {
	/* TODO 6: Fill address space operations structure. */
	.readpage	= simple_readpage,
	.write_begin	= simple_write_begin,
	.write_end	= simple_write_end,
};

struct inode *myfs_get_inode(struct super_block *sb, const struct inode *dir,
		int mode)
{
	struct inode *inode = new_inode(sb);

	if (!inode)
		return NULL;

	/* TODO 3: fill inode structure
	 *     - mode
	 *     - uid
	 *     - gid
	 *     - atime,ctime,mtime
	 *     - ino
	 */
	inode->i_mode = mode;
	inode_init_owner(inode,dir,mode);
	inode->i_atime = current_time(inode);
	inode->i_ctime = current_time(inode);
	inode->i_mtime = current_time(inode);

	/* TODO 5: Init i_ino using get_next_ino */
	inode->i_ino = get_next_ino();

	/* TODO 6: Initialize address space operations. */
	inode->i_mapping->a_ops = &myfs_aops;

	if (S_ISDIR(mode)) {
		/* TODO 3: set inode operations for dir inodes. */
		// inode->i_op = &simple_dir_inode_operations;
		// inode->i_fop = &simple_dir_operations;
		/* TODO 5: use myfs_dir_inode_operations for inode
		 * operations (i_op).
		 */
		inode->i_op = &myfs_dir_inode_operations;
		inode->i_fop = &simple_dir_operations;

		/* TODO 3: directory inodes start off with i_nlink == 2 (for "." entry).
		 * Directory link count should be incremented (use inc_nlink).
		 */
		inc_nlink(inode);
	}

	/* TODO 6: Set file inode and file operations for regular files
	 * (use the S_ISREG macro).
	 */
	if(S_ISREG(mode))
	{
		inode->i_op = &myfs_file_inode_operations;
		inode->i_fop = &myfs_file_operations;
	}

	return inode;
}

/* TODO 5: Implement myfs_mknod, myfs_create, myfs_mkdir. */
int myfs_mknod(struct inode *dir,
		struct dentry *dentry, umode_t mode, dev_t dev)
{
	pr_info("myfs_mknod called\n");
	struct inode* inode = myfs_get_inode(dir->i_sb,dir,mode);
	if(!inode)
		return -ENOSPC;
	
	d_instantiate(dentry,inode);
	dget(dentry);
	dir->i_mtime = dir->i_ctime = current_time(dir);

	return 0;
}


int myfs_create(struct inode *dir, struct dentry *dentry,
		umode_t mode, bool excl)
{
	int ret = myfs_mknod(dir, dentry, mode | S_IFREG, 0);
	// pr_info("myfs_create called: %d\n",ret);
	return ret;

}

int myfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
	// pr_info("myfs_mkdir called\n");
	int error = myfs_mknod(dir,dentry,S_IFDIR,0);
	if(!error)
	{
		inc_nlink(dir);
	}
	return error;
}


static int myfs_fill_super(struct super_block *sb, void *data, int silent)
{
	struct inode *root_inode;
	struct dentry *root_dentry;

	/* TODO 2: fill super_block
	 *   - blocksize, blocksize_bits
	 *   - magic
	 *   - super operations
	 *   - maxbytes
	 */
	sb->s_blocksize = MYFS_BLOCKSIZE;
	sb->s_blocksize_bits = MYFS_BLOCKSIZE_BITS;
	sb->s_magic = MYFS_MAGIC;
	sb->s_op = &myfs_super_operations;
	sb->s_maxbytes = MAX_LFS_FILESIZE;

	/* mode = directory & access rights (755) */
	root_inode = myfs_get_inode(sb, NULL,
			S_IFDIR | S_IRWXU | S_IRGRP |
			S_IXGRP | S_IROTH | S_IXOTH);

	printk(LOG_LEVEL "root inode has %d link(s)\n", root_inode->i_nlink);

	if (!root_inode)
		return -ENOMEM;

	root_dentry = d_make_root(root_inode);
	if (!root_dentry)
		goto out_no_root;
	sb->s_root = root_dentry;

	return 0;

out_no_root:
	iput(root_inode);
	return -ENOMEM;
}

static struct dentry *myfs_mount(struct file_system_type *fs_type,
		int flags, const char *dev_name, void *data)
{
	/* TODO 1: call superblock mount function */
	 //分配并调用fill_super初始化超级块, 为超级块的根目录分配dentry.
	return mount_nodev(fs_type,flags,data,myfs_fill_super);
	
}

/* TODO 1: define file_system_type structure */
struct file_system_type my_fs_type = {
	.name = "myfs",
	.mount = myfs_mount,
	.kill_sb = kill_litter_super,
	.owner = THIS_MODULE,
};

static int __init myfs_init(void)
{
	int err;

	/* TODO 1: register */
	err =  register_filesystem(&my_fs_type);
	if (err) {
		printk(LOG_LEVEL "register_filesystem failed\n");
		return err;
	}

	return 0;
}

static void __exit myfs_exit(void)
{
	/* TODO 1: unregister */
	unregister_filesystem(&my_fs_type);
}

module_init(myfs_init);
module_exit(myfs_exit);

基于块设备的文件系统

/*
 * SO2 Lab - Filesystem drivers
 * Exercise #2 (dev filesystem)
 */

#include <linux/buffer_head.h>
#include <linux/cred.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pagemap.h>
#include <linux/sched.h>
#include <linux/slab.h>

#include "minfs.h"

MODULE_DESCRIPTION("Simple filesystem");
MODULE_AUTHOR("SO2");
MODULE_LICENSE("GPL");

#define LOG_LEVEL	KERN_ALERT


struct minfs_sb_info {
	__u8 version;
	unsigned long imap;
	struct buffer_head *sbh;
};

struct minfs_inode_info {
	__u16 data_block;
	struct inode vfs_inode;
};

/* declarations of functions that are part of operation structures */

static int minfs_readdir(struct file *filp, struct dir_context *ctx);
static struct dentry *minfs_lookup(struct inode *dir,
		struct dentry *dentry, unsigned int flags);
static int minfs_create(struct inode *dir, struct dentry *dentry,
		umode_t mode, bool excl);

/* dir and inode operation structures */

static const struct file_operations minfs_dir_operations = {
	.read		= generic_read_dir,
	.iterate	= minfs_readdir,
};

static const struct inode_operations minfs_dir_inode_operations = {
	.lookup		= minfs_lookup,
	/* TODO 7: Use minfs_create as the create function. */
	.create    = minfs_create
};

static const struct address_space_operations minfs_aops = {
	.readpage       = simple_readpage,
	.write_begin    = simple_write_begin,
	.write_end      = simple_write_end,
};

static const struct file_operations minfs_file_operations = {
	.read_iter	= generic_file_read_iter,
	.write_iter	= generic_file_write_iter,
	.mmap		= generic_file_mmap,
	.llseek		= generic_file_llseek,
};

static const struct inode_operations minfs_file_inode_operations = {
	.getattr	= simple_getattr,
};

static struct inode *minfs_iget(struct super_block *s, unsigned long ino)
{
	struct minfs_inode *mi;
	struct buffer_head *bh;
	struct inode *inode;
	struct minfs_inode_info *mii;

	/* Allocate VFS inode. */
	inode = iget_locked(s, ino);
	if (inode == NULL) {
		printk(LOG_LEVEL "error aquiring inode\n");
		return ERR_PTR(-ENOMEM);
	}

	/* Return inode from cache */
	if (!(inode->i_state & I_NEW))
		return inode;

	/* TODO 4: Read block with inodes. It's the second block on
	 * the device, i.e. the block with the index 1. This is the index
	 * to be passed to sb_bread().
	 */
	bh = sb_bread(s,1);

	/* TODO 4: Get inode with index ino from the block. */
	mi = (struct minfs_inode*)bh->b_data + ino;
	/* TODO 4: fill VFS inode */
	inode->i_mode = mi->mode;
	inode->i_size = mi->size;
	inode->i_atime = current_time(inode);
	inode->i_ctime = current_time(inode);
	inode->i_mtime = current_time(inode);
	i_uid_write(inode,mi->uid);
	i_gid_write(inode,mi->gid);

	/* TODO 7: Fill address space operations (inode->i_mapping->a_ops) */
	inode->i_mapping->a_ops = &minfs_aops;
	
	if (S_ISDIR(inode->i_mode)) {
		/* TODO 4: Fill dir inode operations. */
		// inode->i_op = &simple_dir_inode_operations;
		// inode->i_fop = &simple_dir_operations;
		/* TODO 5: Use minfs_dir_inode_operations for i_op
		 * and minfs_dir_operations for i_fop. */
		inode->i_op = &minfs_dir_inode_operations;
		inode->i_fop = &minfs_dir_operations;

		/* TODO 4: Directory inodes start off with i_nlink == 2.
		 * (use inc_link) */
		inc_nlink(inode);
	}

	/* TODO 7: Fill inode and file operations for regular files
	 * (i_op and i_fop). Use the S_ISREG macro.
	 */
	if(S_ISREG(inode->i_mode))
	{
		inode->i_op  = &minfs_file_inode_operations;
		inode->i_fop = &minfs_file_operations;
	}

	/* fill data for mii */
	mii = container_of(inode, struct minfs_inode_info, vfs_inode);

	/* TODO 4: uncomment after the minfs_inode is initialized */
	mii->data_block = mi->data_block;

	/* Free resources. */
	/* TODO 4: uncomment after the buffer_head is initialized */
	brelse(bh);
	unlock_new_inode(inode);

	return inode;

out_bad_sb:
	iget_failed(inode);
	return NULL;
}

static int minfs_readdir(struct file *filp, struct dir_context *ctx)
{
	struct buffer_head *bh;
	struct minfs_dir_entry *de;
	struct minfs_inode_info *mii;
	struct inode *inode;
	struct super_block *sb;
	int over;
	int err = 0;

	/* TODO 5: Get inode of directory and container inode. */
	inode = filp->f_inode;
	mii = container_of(inode,struct minfs_inode_info,vfs_inode);
	/* TODO 5: Get superblock from inode (i_sb). */
	sb = inode->i_sb;
	/* TODO 5: Read data block for directory inode. */
	bh = sb_bread(sb,mii->data_block);
	for (; ctx->pos < MINFS_NUM_ENTRIES; ctx->pos++) {
		/* TODO 5: Data block contains an array of
		 * "struct minfs_dir_entry". Use `de' for storing.
		 */
		de = (struct minfs_dir_entry*)bh->b_data + ctx->pos;
		/* TODO 5: Step over empty entries (de->ino == 0). */
		if(de->ino == 0)
			continue;
		/*
		 * Use `over` to store return value of dir_emit and exit
		 * if required.
		 */
		over = dir_emit(ctx, de->name, MINFS_NAME_LEN, de->ino,
				DT_UNKNOWN);
		if (over) {
			printk(KERN_INFO "Read %s from folder %s, ctx->pos: %lld\n",
				de->name,
				filp->f_path.dentry->d_name.name,
				ctx->pos);
			ctx->pos++;
			goto done;
		}
	}

done:
	brelse(bh);
out_bad_sb:
	return err;
}

/*
 * Find dentry in parent folder. Return parent folder's data buffer_head.
 */

static struct minfs_dir_entry *minfs_find_entry(struct dentry *dentry,
		struct buffer_head **bhp)
{
	struct buffer_head *bh;
	struct inode *dir = dentry->d_parent->d_inode;
	struct minfs_inode_info *mii = container_of(dir,
			struct minfs_inode_info, vfs_inode);
	struct super_block *sb = dir->i_sb;
	const char *name = dentry->d_name.name;
	struct minfs_dir_entry *final_de = NULL;
	struct minfs_dir_entry *de;
	int i;

	/* TODO 6: Read parent folder data block (contains dentries).
	 * Fill bhp with return value.
	 */
	bh = sb_bread(sb,mii->data_block);
	for (i = 0; i < MINFS_NUM_ENTRIES; i++) {
		/* TODO 6: Traverse all entries, find entry by name
		 * Use `de' to traverse. Use `final_de' to store dentry
		 * found, if existing.
		 */
		de = (struct minfs_dir_entry*)bh->b_data + i;
		if(de->ino == 0)
			continue;
		
		if(!strcmp(name,de->name))
		{
			final_de = de;
			break;
		}
	}

	/* bh needs to be released by caller. */
	return final_de;
}

static struct dentry *minfs_lookup(struct inode *dir,
		struct dentry *dentry, unsigned int flags)
{
	/* TODO 6: Comment line. */
	// return simple_lookup(dir, dentry, flags);

	struct super_block *sb = dir->i_sb;
	struct minfs_dir_entry *de;
	struct buffer_head *bh = NULL;
	struct inode *inode = NULL;

	dentry->d_op = sb->s_root->d_op;

	de = minfs_find_entry(dentry, &bh);
	if (de != NULL) {
		printk(KERN_DEBUG "getting entry: name: %s, ino: %d\n",
			de->name, de->ino);
		inode = minfs_iget(sb, de->ino);
		if (IS_ERR(inode))
			return ERR_CAST(inode);
	}

	d_add(dentry, inode);
	brelse(bh);

	printk(KERN_DEBUG "looked up dentry %s\n", dentry->d_name.name);

	return NULL;
}

static struct inode *minfs_alloc_inode(struct super_block *s)
{
	struct minfs_inode_info *mii;

	/* TODO 3: Allocate minfs_inode_info. */
	/* TODO 3: init VFS inode in minfs_inode_info */
	mii = kzalloc(sizeof(*mii),GFP_KERNEL);
	if(!mii)
		return NULL;

	inode_init_once(&mii->vfs_inode);
	
	return &mii->vfs_inode;
}

static void minfs_destroy_inode(struct inode *inode)
{
	/* TODO 3: free minfs_inode_info */
	struct minfs_inode_info* mii = container_of(inode,struct minfs_inode_info,vfs_inode);
	kfree(mii);
}

/*
 * Create a new VFS inode. Do basic initialization and fill imap.
 */

static struct inode *minfs_new_inode(struct inode *dir)
{
	struct super_block *sb = dir->i_sb;
	struct minfs_sb_info *sbi = sb->s_fs_info;
	struct inode *inode;
	int idx;

	/* TODO 7: Find first available inode. */
	idx = find_first_zero_bit(&sbi->imap,sizeof(sbi->imap));
	/* TODO 7: Mark the inode as used in the bitmap and mark
	 * the superblock buffer head as dirty.
	 */
	set_bit(1,&sbi->imap); // ? 并发?
	mark_buffer_dirty(sbi->sbh);
	/* TODO 7: Call new_inode(), fill inode fields
	 * and insert inode into inode hash table.
	 */
	inode = new_inode(sb);
	/* Actual writing to the disk will be done in minfs_write_inode,
	 * which will be called at a later time.
	 */
	if (!inode)
		return NULL;

	insert_inode_hash(inode);

	inode->i_mode = 0;
	inode_init_owner(inode,dir,0);
	inode->i_atime = current_time(inode);
	inode->i_ctime = current_time(inode);
	inode->i_mtime = current_time(inode);

	inode->i_ino = idx;
	inode->i_mapping->a_ops = &minfs_aops;

	return inode;
}

/*
 * Add dentry link on parent inode disk structure.
 */

static int minfs_add_link(struct dentry *dentry, struct inode *inode)
{
	struct buffer_head *bh;
	struct inode *dir;
	struct super_block *sb;
	struct minfs_inode_info *mii;
	struct minfs_dir_entry *de;
	int i;
	int err = 0;

	/* TODO 7: Get: directory inode (in inode); containing inode (in mii); superblock (in sb). */
	dir = dentry->d_parent->d_inode;
	mii = container_of(dir,struct minfs_inode_info,vfs_inode);
	sb = dir->i_sb;

	/* TODO 7: Read dir data block (use sb_bread). */
	bh = sb_bread(sb,mii->data_block);
	/* TODO 7: Find first free dentry (de->ino == 0). */
	for(i = 0; i < MINFS_NUM_ENTRIES;++i)
	{
		de = (struct minfs_dir_entry*)bh->b_data + i;
		if(de->ino == 0)
			break;
	}
	if(i == MINFS_NUM_ENTRIES)
		return -ENOSPC;
	/* TODO 7: Place new entry in the available slot. Mark buffer_head
	 * as dirty. */
	de->ino = inode->i_ino;
	strncpy(de->name,dentry->d_name.name,MINFS_NAME_LEN);
	pr_info("debug:%s",de->name);
	mark_buffer_dirty(bh);

out:
	brelse(bh);

	return err;
}

/*
 * Create a VFS file inode. Use minfs_file_... operations.
 */

static int minfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
		bool excl)
{
	struct inode *inode;
	struct minfs_inode_info *mii;
	int err;

	inode = minfs_new_inode(dir);
	if (inode == NULL) {
		printk(LOG_LEVEL "error allocating new inode\n");
		err = -ENOMEM;
		goto err_new_inode;
	}

	inode->i_mode = mode;
	inode->i_op = &minfs_file_inode_operations;
	inode->i_fop = &minfs_file_operations;
	mii = container_of(inode, struct minfs_inode_info, vfs_inode);
	mii->data_block = MINFS_FIRST_DATA_BLOCK + inode->i_ino;

	err = minfs_add_link(dentry, inode);
	if (err != 0)
		goto err_add_link;

	d_instantiate(dentry, inode);
	mark_inode_dirty(inode);

	printk(KERN_DEBUG "new file inode created (ino = %lu)\n",
		inode->i_ino);

	return 0;

err_add_link:
	inode_dec_link_count(inode);
	iput(inode);
err_new_inode:
	return err;
}

/*
 * Write VFS inode contents to disk inode.
 */

static int minfs_write_inode(struct inode *inode,
		struct writeback_control *wbc)
{
	struct super_block *sb = inode->i_sb;
	struct minfs_inode *mi;
	struct minfs_inode_info *mii = container_of(inode,
			struct minfs_inode_info, vfs_inode);
	struct buffer_head *bh;
	int err = 0;

	bh = sb_bread(sb, MINFS_INODE_BLOCK);
	if (bh == NULL) {
		printk(LOG_LEVEL "could not read block\n");
		err = -ENOMEM;
		goto out;
	}

	mi = (struct minfs_inode *) bh->b_data + inode->i_ino;

	/* fill disk inode */
	mi->mode = inode->i_mode;
	mi->uid = i_uid_read(inode);
	mi->gid = i_gid_read(inode);
	mi->size = inode->i_size;
	mi->data_block = mii->data_block;

	printk(KERN_DEBUG "mode is %05o; data_block is %d\n", mi->mode,
		mii->data_block);

	mark_buffer_dirty(bh);
	brelse(bh);

	printk(KERN_DEBUG "wrote inode %lu\n", inode->i_ino);

out:
	return err;
}

static void minfs_put_super(struct super_block *sb)
{
	struct minfs_sb_info *sbi = sb->s_fs_info;

	/* Free superblock buffer head. */
	mark_buffer_dirty(sbi->sbh);
	brelse(sbi->sbh);

	printk(KERN_DEBUG "released superblock resources\n");
}

static const struct super_operations minfs_ops = {
	.statfs		= simple_statfs,
	.put_super	= minfs_put_super,
	/* TODO 4: add alloc and destroy inode functions */
	.alloc_inode = minfs_alloc_inode,
	.destroy_inode = minfs_destroy_inode,
	/* TODO 7:	= set write_inode function. */
	.write_inode = minfs_write_inode
};

struct inode *myfs_get_inode(struct super_block *sb, const struct inode *dir,
		int mode)
{
	struct inode *inode = new_inode(sb);

	if (!inode)
		return NULL;

	/* TODO 3: fill inode structure
	 *     - mode
	 *     - uid
	 *     - gid
	 *     - atime,ctime,mtime
	 *     - ino
	 */
	inode->i_mode = mode;
	inode_init_owner(inode,dir,mode);
	inode->i_atime = current_time(inode);
	inode->i_ctime = current_time(inode);
	inode->i_mtime = current_time(inode);

	/* TODO 5: Init i_ino using get_next_ino */
	inode->i_ino = get_next_ino();

	/* TODO 6: Initialize address space operations. */
	inode->i_mapping->a_ops = &minfs_aops;

	if (S_ISDIR(mode)) {
		/* TODO 3: set inode operations for dir inodes. */
		inode->i_op = &simple_dir_inode_operations;
		inode->i_fop = &simple_dir_operations;
		/* TODO 5: use myfs_dir_inode_operations for inode
		 * operations (i_op).
		 */

		/* TODO 3: directory inodes start off with i_nlink == 2 (for "." entry).
		 * Directory link count should be incremented (use inc_nlink).
		 */
		inc_nlink(inode);
	}

	/* TODO 6: Set file inode and file operations for regular files
	 * (use the S_ISREG macro).
	 */

	return inode;
}

static int minfs_fill_super(struct super_block *s, void *data, int silent)
{
	struct minfs_sb_info *sbi;
	struct minfs_super_block *ms;
	struct inode *root_inode;
	struct dentry *root_dentry;
	struct buffer_head *bh;
	int ret = -EINVAL;

	sbi = kzalloc(sizeof(struct minfs_sb_info), GFP_KERNEL);
	if (!sbi)
		return -ENOMEM;
	s->s_fs_info = sbi;

	/* Set block size for superblock. */
	if (!sb_set_blocksize(s, MINFS_BLOCK_SIZE))
		goto out_bad_blocksize;

	/* TODO 2: Read block with superblock. It's the first block on
	 * the device, i.e. the block with the index 0. This is the index
	 * to be passed to sb_bread().
	 */
	bh = sb_bread(s,0);

	/* TODO 2: interpret read data as minfs_super_block */
	ms = (struct minfs_super_block*)bh->b_data;

	/* TODO 2: check magic number with value defined in minfs.h. jump to out_bad_magic if not suitable */
	if( ms->magic != MINFS_MAGIC)
		goto out_bad_magic;

	/* TODO 2: fill super_block with magic_number, super_operations */
	s->s_magic = ms->magic;
	s->s_op = &minfs_ops;

	/* TODO 2: Fill sbi with rest of information from disk superblock
	 * (i.e. version).
	 */
	sbi->imap = ms->imap;
	sbi->version = ms->version;
	sbi->sbh = bh;

	/* allocate root inode and root dentry */
	/* TODO 2: use myfs_get_inode instead of minfs_iget */
	root_inode = minfs_iget(s, MINFS_ROOT_INODE);
	// root_inode = myfs_get_inode(s,NULL,	
	// 		S_IFDIR | S_IRWXU | S_IRGRP |
	// 		S_IXGRP | S_IROTH | S_IXOTH);
	if (!root_inode)
		goto out_bad_inode;

	root_dentry = d_make_root(root_inode);
	if (!root_dentry)
		goto out_iput;
	s->s_root = root_dentry;

	/* Store superblock buffer_head for further use. */
	sbi->sbh = bh;

	return 0;

out_iput:
	iput(root_inode);
out_bad_inode:
	printk(LOG_LEVEL "bad inode\n");
out_bad_magic:
	printk(LOG_LEVEL "bad magic number\n");
	brelse(bh);
out_bad_sb:
	printk(LOG_LEVEL "error reading buffer_head\n");
out_bad_blocksize:
	printk(LOG_LEVEL "bad block size\n");
	s->s_fs_info = NULL;
	kfree(sbi);
	return ret;
}

static struct dentry *minfs_mount(struct file_system_type *fs_type,
		int flags, const char *dev_name, void *data)
{
	/* TODO 1: call superblock mount function */
	return mount_bdev(fs_type,flags,dev_name,data,minfs_fill_super);
}

static struct file_system_type minfs_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "minfs",
	/* TODO 1: add mount, kill_sb and fs_flags */
	.mount = minfs_mount,
	.kill_sb =  kill_block_super,
};

static int __init minfs_init(void)
{
	int err;

	err = register_filesystem(&minfs_fs_type);
	if (err) {
		printk(LOG_LEVEL "register_filesystem failed\n");
		return err;
	}

	return 0;
}

static void __exit minfs_exit(void)
{
	unregister_filesystem(&minfs_fs_type);
}

module_init(minfs_init);
module_exit(minfs_exit);

网络

TCP

/*
 * SO2 - Networking Lab (#10)
 *
 * Exercise #3, #4: simple kernel TCP socket
 *
 * Code skeleton.
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/net.h>
#include <linux/in.h>
#include <linux/fs.h>
#include <net/sock.h>

MODULE_DESCRIPTION("Simple kernel TCP socket");
MODULE_AUTHOR("SO2");
MODULE_LICENSE("GPL");

#define LOG_LEVEL		KERN_ALERT
#define MY_TCP_PORT		60000
#define LISTEN_BACKLOG		5

#define ON			1
#define OFF			0
#define DEBUG			ON

#if DEBUG == ON
#define LOG(s)					\
	do {					\
		printk(KERN_DEBUG s "\n");	\
	} while (0)
#else
#define LOG(s)					\
	do {} while (0)
#endif

#define print_sock_address(addr)		\
	do {					\
		printk(LOG_LEVEL "connection established to "	\
				"%pI4:%d\n",	 		\
				&addr.sin_addr.s_addr,		\
				ntohs(addr.sin_port));		\
	} while (0)

static struct socket *sock;	/* listening (server) socket */
static struct socket *new_sock;	/* communication socket */

int __init my_tcp_sock_init(void)
{
	int err;
	/* address to bind on */
	struct sockaddr_in addr = {
		.sin_family	= AF_INET,
		.sin_port	= htons(MY_TCP_PORT),
		.sin_addr	= { htonl(INADDR_LOOPBACK) }
	};
	int addrlen = sizeof(addr);
	/* address of peer */
	struct sockaddr_in raddr;

	/* TODO 1: create listening socket */
	err = sock_create_kern(&init_net,PF_INET,SOCK_STREAM,IPPROTO_TCP,&sock);
	if(err < 0)
		goto out;
	/* TODO 1: bind socket to loopback on port MY_TCP_PORT */
	err = kernel_bind(sock,(struct sockaddr*)&addr,addrlen);
	if(err < 0)
		goto out_release;
	/* TODO 1: start listening */
	err = kernel_listen(sock,LISTEN_BACKLOG);
	if(err < 0)
		goto out_release;
	/* TODO 2: create new socket for the accepted connection */
	/* TODO 2: accept a connection */
	err = kernel_accept(sock,&new_sock,0);
	if(err < 0)
		goto out_release_new_sock;
	if(new_sock)
	/* TODO 2: get the address of the peer and print it */
	err = new_sock->ops->getname(new_sock,(struct sockaddr*)&raddr,1);
	if(err < 0)
		goto out_release_new_sock;
	
	print_sock_address(raddr);
	return 0;

out_release_new_sock:
	/* TODO 2: cleanup socket for accepted connection */
	sock_release(new_sock);
out_release:
	/* TODO 1: cleanup listening socket */
	sock_release(sock);
out:
	return err;
}

void __exit my_tcp_sock_exit(void)
{
	/* TODO 2: cleanup socket for accepted connection */
	sock_release(new_sock);
	/* TODO 1: cleanup listening socket */
	sock_release(sock);
}

module_init(my_tcp_sock_init);
module_exit(my_tcp_sock_exit);

udp

/*
 * SO2 - Networking Lab (#10)
 *
 * Bonus: simple kernel UDP socket
 *
 * Code skeleton.
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/net.h>
#include <linux/in.h>
#include <net/sock.h>

MODULE_DESCRIPTION("Simple kernel UDP socket");
MODULE_AUTHOR("SO2");
MODULE_LICENSE("GPL");

#define LOG_LEVEL		KERN_ALERT
#define MY_UDP_LOCAL_PORT	60000
#define MY_UDP_REMOTE_PORT	60001
#define MY_TEST_MESSAGE		"kernelsocket\n"

#define ON			1
#define OFF			0
#define DEBUG			ON

#if DEBUG == ON
#define LOG(s)					\
	do {					\
		printk(KERN_DEBUG s "\n");	\
	} while (0)
#else
#define LOG(s)					\
	do {} while (0)
#endif

#define print_sock_address(addr)		\
	do {					\
		printk(LOG_LEVEL "connection established to "	\
				NIPQUAD_FMT ":%d\n", 		\
				NIPQUAD(addr.sin_addr.s_addr),	\
				ntohs(addr.sin_port));		\
	} while (0)

static struct socket *sock;	/* UDP server */

/* send datagram */
static int my_udp_msgsend(struct socket *s)
{
	/* address to send to */
	struct sockaddr_in raddr = {
		.sin_family	= AF_INET,
		.sin_port	= htons(MY_UDP_REMOTE_PORT),
		.sin_addr	= { htonl(INADDR_LOOPBACK) }
	};
	int raddrlen = sizeof(raddr);
	/* message */
	struct msghdr msg;
	struct iovec iov;
	char *buffer = MY_TEST_MESSAGE;
	int len = strlen(buffer) + 1;

	/* TODO 1: build message */
	msg.msg_name = &raddr;
	msg.msg_namelen = raddrlen;
	msg.msg_flags = 0;
	msg.msg_control = NULL;
	msg.msg_controllen = 0;

	iov.iov_base = buffer;
	iov.iov_len =  len;
	/* TODO 1: send the message down the socket and return the
	 * error code.
	 */
	kernel_sendmsg(s,&msg,(struct kvec*)&iov,1,len);

	return 0;
}

int __init my_udp_sock_init(void)
{
	int err;
	/* address to bind on */
	struct sockaddr_in addr = {
		.sin_family	= AF_INET,
		.sin_port	= htons(MY_UDP_LOCAL_PORT),
		.sin_addr	= { htonl(INADDR_LOOPBACK) }
	};
	int addrlen = sizeof(addr);

	/* TODO 1: create UDP socket */
	err = sock_create_kern(&init_net,PF_INET,SOCK_DGRAM,IPPROTO_UDP,&sock);
	if(err < 0)
		goto out;
	/* TODO 1: bind socket to loopback on port MY_UDP_LOCAL_PORT */
	err = kernel_bind(sock,(struct sockaddr*)&addr,addrlen);
	if(err < 0)
		goto out_release;

	/* send message */
	err = my_udp_msgsend(sock);
	if (err < 0) {
		printk(LOG_LEVEL "can't send message\n");
		goto out_release;
	}

	return 0;

out_release:
	/* TODO 1: release socket */
	sock_release(sock);
out:
	return err;
}

void __exit my_udp_sock_exit(void)
{
	/* TODO 1: release socket */
	sock_release(sock);
}

module_init(my_udp_sock_init);
module_exit(my_udp_sock_exit);

内存映射

kmmap

/*
 * PSO - Memory Mapping Lab(#11)
 *
 * Exercise #1: memory mapping using kmalloc'd kernel areas
 */

#include <linux/version.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <asm/pgtable.h>
#include <linux/sched/mm.h>
#include <linux/sched.h>
#include <asm/io.h>
#include <asm/highmem.h>
#include <linux/rmap.h>
#include <asm/uaccess.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>

#include "../test/mmap-test.h"

MODULE_DESCRIPTION("simple mmap driver");
MODULE_AUTHOR("PSO");
MODULE_LICENSE("Dual BSD/GPL");

#define MY_MAJOR	42
/* how many pages do we actually kmalloc */
#define NPAGES		16

/* character device basic structure */
static struct cdev mmap_cdev;

/* pointer to kmalloc'd area */
static void *kmalloc_ptr;

/* pointer to the kmalloc'd area, rounded up to a page boundary */
static char *kmalloc_area;

static int my_open(struct inode *inode, struct file *filp)
{
	pr_info("open\n");
	return 0;
}

static int my_release(struct inode *inode, struct file *filp)
{
	return 0;
}

static int my_read(struct file *file, char __user *user_buffer,
		size_t size, loff_t *offset)
{
	if(!kmalloc_area)
		return -EFAULT;
	/* TODO 2: check size doesn't exceed our mapped area size */
	if(size > (NPAGES)*PAGE_SIZE)
		return -EFAULT;
	/* TODO 2: copy from mapped area to user buffer */
	if(copy_to_user(user_buffer,kmalloc_area,size))
		return -EFAULT;

	return size;
}

static int my_write(struct file *file, const char __user *user_buffer,
		size_t size, loff_t *offset)
{
	if(!kmalloc_area)
		return -EFAULT;
	/* TODO 2: check size doesn't exceed our mapped area size */
	if(size > (NPAGES)*PAGE_SIZE)
		return -EFAULT;
	/* TODO 2: copy from user buffer to mapped area */
	if(copy_from_user(kmalloc_area,user_buffer,size))
		return -EFAULT;
	return size;
}

static int my_mmap(struct file *filp, struct vm_area_struct *vma)
{
	int ret;
	long length = vma->vm_end - vma->vm_start;

	/* do not map more than we can */
	if (length > NPAGES * PAGE_SIZE)
		return -EIO;
	
	if(!kmalloc_area)
		return -EFAULT;

	int pfn = virt_to_phys(kmalloc_area) >> PAGE_SHIFT;
	/* TODO 1: map the whole physically contiguous area in one piece */
	ret = remap_pfn_range(vma,vma->vm_start,pfn,length,vma->vm_page_prot);
	if(ret < 0)
	{
		pr_err("map address area failed\n");
		return -EIO;
	}
	return 0;
}

static const struct file_operations mmap_fops = {
	.owner = THIS_MODULE,
	.open = my_open,
	.release = my_release,
	.mmap = my_mmap,
	.read = my_read,
	.write = my_write
};

static int my_seq_show(struct seq_file *seq, void *v)
{
	struct mm_struct *mm;
	struct vm_area_struct *vma_iterator;
	unsigned long total = 0;

	/* TODO 3: Get current process' mm_struct */
	mm = get_task_mm(current);
	/* TODO 3: Iterate through all memory mappings */
	vma_iterator = mm->mmap;
	while(vma_iterator)
	{
		pr_info("0x%lx -- 0x%lx\n",vma_iterator->vm_start,vma_iterator->vm_end);

		total += vma_iterator->vm_end-vma_iterator->vm_start;
		vma_iterator = vma_iterator->vm_next;
	}

	/* TODO 3: Release mm_struct */
	mmput(mm);
	/* TODO 3: write the total count to file  */
	seq_printf(seq,"%lu",total);

	return 0;
}

static int my_seq_open(struct inode *inode, struct file *file)
{
	/* TODO 3: Register the display function */
	int ret = single_open(file,my_seq_show,NULL);
	return ret;
}

static const struct proc_ops my_proc_ops = {
	.proc_open    = my_seq_open,
	.proc_read    = seq_read,
	.proc_lseek   = seq_lseek,
	.proc_release = single_release,
};

static int __init my_init(void)
{
	int ret = 0;
	int i;
	/* TODO 3: create a new entry in procfs */
	
	if(!proc_create(PROC_ENTRY_NAME,0,NULL,&my_proc_ops))
		goto out;

	ret = register_chrdev_region(MKDEV(MY_MAJOR, 0), 1, "mymap");
	if (ret < 0) {
		pr_err("could not register region\n");
		goto out_no_chrdev;
	}

	/* TODO 1: allocate NPAGES+2 pages using kmalloc */
	kmalloc_ptr = kmalloc((NPAGES+2)*PAGE_SIZE,GFP_KERNEL);
	if(!kmalloc_ptr)
	{
		ret = -ENOMEM;
		goto out;
	}
	/* TODO 1: round kmalloc_ptr to nearest page start address */
	kmalloc_area = (char*)round_up((unsigned long)kmalloc_ptr,PAGE_SIZE);
	/* TODO 1: mark pages as reserved */
	/* TODO 1: write data in each page */
	for(i = 0; i < (NPAGES+2); ++i)
	{
		struct page* pg = virt_to_page(kmalloc_ptr+i*PAGE_SIZE);
		SetPageReserved(pg);
		memcpy(kmalloc_ptr+i*PAGE_SIZE,"\xaa\xbb\xcc\xdd",4);
	}

	/* Init device. */
	cdev_init(&mmap_cdev, &mmap_fops);
	ret = cdev_add(&mmap_cdev, MKDEV(MY_MAJOR, 0), 1);
	if (ret < 0) {
		pr_err("could not add device\n");
		goto out_kfree;
	}

	return 0;

out_kfree:
	kfree(kmalloc_ptr);
out_unreg:
	unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1);
out_no_chrdev:
	remove_proc_entry(PROC_ENTRY_NAME, NULL);
out:
	return ret;
}

static void __exit my_exit(void)
{
	int i;

	cdev_del(&mmap_cdev);

	/* TODO 1: clear reservation on pages and free mem. */
	for(i = 0; i < (NPAGES+2); ++i)
	{
		struct page* pg = virt_to_page(kmalloc_ptr+i*PAGE_SIZE);
		ClearPageReserved(pg);
	}

	kfree(kmalloc_ptr);
	kmalloc_ptr = NULL;
	kmalloc_area = NULL;

	unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1);
	/* TODO 3: remove proc entry */

	remove_proc_entry(PROC_ENTRY_NAME,NULL);
}

module_init(my_init);
module_exit(my_exit);

vmmap

注意remap_vmalloc_range只能映射内核中带有VM_USERMAP标志的vma, 需要通过vmalloc_user获得.

/*
 * PSO - Memory Mapping Lab(#11)
 *
 * Exercise #2: memory mapping using vmalloc'd kernel areas
 */

#include <linux/version.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/mm.h>
#include <asm/io.h>
#include <linux/uaccess.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>

#include "../test/mmap-test.h"


MODULE_DESCRIPTION("simple mmap driver");
MODULE_AUTHOR("PSO");
MODULE_LICENSE("Dual BSD/GPL");

#define MY_MAJOR	42

/* how many pages do we actually vmalloc */
#define NPAGES		16

/* character device basic structure */
static struct cdev mmap_cdev;

/* pointer to the vmalloc'd area, rounded up to a page boundary */
static char *vmalloc_area;

static int my_open(struct inode *inode, struct file *filp)
{
	return 0;
}

static int my_release(struct inode *inode, struct file *filp)
{
	return 0;
}

static ssize_t my_read(struct file *file, char __user *user_buffer,
		size_t size, loff_t *offset)
{
	if(!vmalloc_area)
		return -EFAULT;
	/* TODO 2: check size doesn't exceed our mapped area size */
	if(size > (NPAGES)*PAGE_SIZE)
		return -EFAULT;
	/* TODO 2: copy from mapped area to user buffer */
	if(copy_to_user(user_buffer,vmalloc_area,size))
		return -EFAULT;

	return size;
}

static ssize_t my_write(struct file *file, const char __user *user_buffer,
		size_t size, loff_t *offset)
{
	if(!vmalloc_area)
		return -EFAULT;
	/* TODO 2: check size doesn't exceed our mapped area size */
	if(size > (NPAGES)*PAGE_SIZE)
		return -EFAULT;
	/* TODO 2: copy from user buffer to mapped area */
	if(copy_from_user(vmalloc_area,user_buffer,size))
		return -EFAULT;
	return size;
}
static int my_mmap(struct file *filp, struct vm_area_struct *vma)
{
	int ret;
	long length = vma->vm_end - vma->vm_start;
	unsigned long start = vma->vm_start;
	char *vmalloc_area_ptr = vmalloc_area;
	unsigned long pfn;
	int i;

	if (length > NPAGES * PAGE_SIZE)
		return -EIO;

	if(!vmalloc_area)
		return -EFAULT;

	/* TODO 1: map pages individually */
	for(i = 0; i < (length >> PAGE_SHIFT); ++i)
	{
		pfn = vmalloc_to_pfn(vmalloc_area_ptr);
		ret = remap_pfn_range(vma,start,pfn,PAGE_SIZE,vma->vm_page_prot);
		if(ret)
		{
			pr_err("remap_pfn_range failed");
			return ret;
		}
		vmalloc_area_ptr += PAGE_SIZE;
		start += PAGE_SIZE;

	}

	return ret;
}

static const struct file_operations mmap_fops = {
	.owner = THIS_MODULE,
	.open = my_open,
	.release = my_release,
	.mmap = my_mmap,
	.read = my_read,
	.write = my_write
};

static int my_seq_show(struct seq_file *seq, void *v)
{
	struct mm_struct *mm;
	struct vm_area_struct *vma_iterator;
	unsigned long total = 0;

	/* TODO 3: Get current process' mm_struct */
	mm = get_task_mm(current);
	/* TODO 3: Iterate through all memory mappings */
	vma_iterator = mm->mmap;
	while(vma_iterator)
	{
		pr_info("0x%lx -- 0x%lx\n",vma_iterator->vm_start,vma_iterator->vm_end);

		total += vma_iterator->vm_end-vma_iterator->vm_start;
		vma_iterator = vma_iterator->vm_next;
	}

	/* TODO 3: Release mm_struct */
	mmput(mm);
	/* TODO 3: write the total count to file  */
	seq_printf(seq,"%lu",total);

	return 0;
}

static int my_seq_open(struct inode *inode, struct file *file)
{
	/* TODO 3: Register the display function */
	int ret = single_open(file,my_seq_show,NULL);
	return ret;
}


static const struct proc_ops my_proc_ops = {
	.proc_open    = my_seq_open,
	.proc_read    = seq_read,
	.proc_lseek   = seq_lseek,
	.proc_release = single_release,
};

static int __init my_init(void)
{
	int ret = 0;
	int i;
	/* TODO 3: create a new entry in procfs */
	proc_create(PROC_ENTRY_NAME,0,NULL,&my_proc_ops);

	ret = register_chrdev_region(MKDEV(MY_MAJOR, 0), 1, "mymap");
	if (ret < 0) {
		pr_err("could not register region\n");
		goto out_no_chrdev;
	}

	/* TODO 1: allocate NPAGES using vmalloc */
	vmalloc_area = vmalloc(NPAGES*PAGE_SIZE);
	if(vmalloc_area == NULL)
		goto out_unreg;
	/* TODO 1: mark pages as reserved */
	/* TODO 1: write data in each page */
	for(i = 0; i < NPAGES; ++i)
	{
		struct page* pg = vmalloc_to_page(vmalloc_area + i*PAGE_SIZE);
		SetPageReserved(pg);
		memcpy(vmalloc_area+i*PAGE_SIZE,"\xaa\xbb\xcc\xdd",4);

	}


	cdev_init(&mmap_cdev, &mmap_fops);
	ret = cdev_add(&mmap_cdev, MKDEV(MY_MAJOR, 0), 1);
	if (ret < 0) {
		pr_err("could not add device\n");
		goto out_vfree;
	}

	return 0;

out_vfree:
	vfree(vmalloc_area);
out_unreg:
	unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1);
out_no_chrdev:
	remove_proc_entry(PROC_ENTRY_NAME, NULL);
out:
	return ret;
}

static void __exit my_exit(void)
{
	int i;

	cdev_del(&mmap_cdev);

	/* TODO 1: clear reservation on pages and free mem.*/
	for(i = 0; i < NPAGES; ++i)
	{
		struct page* pg = vmalloc_to_page(vmalloc_area + i*PAGE_SIZE);
		ClearPageReserved(pg);
	}
	vfree(vmalloc_area);
	vmalloc_area = NULL;

	unregister_chrdev_region(MKDEV(MY_MAJOR, 0), 1);
	/* TODO 3: remove proc entry */
	remove_proc_entry(PROC_ENTRY_NAME,NULL);
}

module_init(my_init);
module_exit(my_exit);

设备模型

最复杂的一集.

总结一下, 本练习主要做了这样的事:

注册一个叫做bex的总线(bus), 以及总线上的一个初始设备(dev)root. 该总线上的设备类型是bex_device. 该总线上的设备都有两个提供给用户的只读属性, version和type. 这是在向函数添加设备时创建的. 该总线本身也有两个提供给用户的只写属性, add和del, 用户可以通过访问这两个属性来向总线上添加和删除设备. 看到这里, 发现其实把属性理解为用户态接口更加易懂.

设备应该有对应的驱动程序(driver), 所以有对应的bex_driver驱动类型. 当设备被添加到总线上, 总线会遍历已注册的驱动程序, 对该设备调用总线对应的match函数. 如果匹配成功, 则将该设备与该驱动相关联, 然后调用总线上的probe函数. bex总线的probe函数会直接转给驱动的probe函数处理, 该函数创建一个bex_misc_device类型的设备, 该设备本质上是一个miscdevice.
访问miscdevice的dev属性可以查看对应的设备号, mknod之后便可对该设备进行读写.

#ifndef _BEX_H
#define _BEX_H

#include <linux/device.h>

struct bex_device {
	const char *type;
	int version;
	struct device dev;
};

#define to_bex_device(drv) container_of(dev, struct bex_device, dev)

struct bex_driver {
	const char *type;

	int (*probe)(struct bex_device *dev);
	void (*remove)(struct bex_device *dev);

	struct device_driver driver;
};

#define to_bex_driver(drv) container_of(drv, struct bex_driver, driver)

int bex_register_driver(struct bex_driver *drv);
void bex_unregister_driver(struct bex_driver *drv);

#endif

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/string.h>
#include <linux/slab.h>

#include "bex.h"

MODULE_AUTHOR ("Kernel Hacker");
MODULE_LICENSE ("GPL");
MODULE_DESCRIPTION ("BEX bus module");

static int bex_match(struct device *dev, struct device_driver *driver)
{
	/* TODO 5: implement the bus match function */
	return !strcmp(dev_name(dev),driver->name);
}

static int bex_probe(struct device *dev)
{
	struct bex_device *bex_dev = to_bex_device(dev);
	struct bex_driver *bex_drv = to_bex_driver(dev->driver);

	return bex_drv->probe(bex_dev);
}

static int bex_remove(struct device *dev)
{
	struct bex_device *bex_dev = to_bex_device(dev);
	struct bex_driver *bex_drv = to_bex_driver(dev->driver);

	bex_drv->remove(bex_dev);
	return 0;
}

static int bex_add_dev(const char *name, const char *type, int version);

/* TODO 3: implement write only add attribute */
static ssize_t add_store(struct bus_type *bt, const char *buf, size_t count)
{
  char name[32];
  int version;
  char type[32];
  int ret;

  ret = sscanf(buf, "%31s %31s %d", name,type,&version);
  if (ret != 3)
    return -EINVAL;

//   pr_info("%s,%s,%d\n", name,type,version);
  ret = bex_add_dev(name,type,version);
  if(ret < 0)
	return 0;

  return count;
}
BUS_ATTR_WO(add);

static int bex_del_dev(const char *name);

/* TODO 3: implement write only del attribute */
static ssize_t del_store(struct bus_type *bt, const char *buf, size_t count)
{
  char name[32];
  int ret;

  ret = sscanf(buf, "%31s", name);
  if (ret != 1)
    return -EINVAL;

  ret = bex_del_dev(name);
  if(ret < 0)
	return 0;

  return count;
}
BUS_ATTR_WO(del);


static struct attribute *bex_bus_attrs[] = {
	/* TODO 3: add del and add attributes */
	&bus_attr_add.attr,
	&bus_attr_del.attr,
};
ATTRIBUTE_GROUPS(bex_bus);

struct bus_type bex_bus_type = {
	.name	= "bex",
	.match	= bex_match,
	.probe  = bex_probe,
	.remove  = bex_remove,
	/* TODO 3: add bus groups attributes */
	.bus_groups = bex_bus_groups,
};

static ssize_t
type_show(struct device *dev, struct device_attribute *attr, char *buf)
{
  struct bex_device *bex_dev = to_bex_device(dev);

  return sprintf(buf, "%s\n", bex_dev->type);
}

static ssize_t
version_show(struct device *dev, struct device_attribute *attr, char *buf)
{
  struct bex_device *bex_dev = to_bex_device(dev);

  return sprintf(buf, "%d\n", bex_dev->version);
}


/*TODO 2: add read-only device attribute to show the type */
DEVICE_ATTR(type,S_IRUSR,type_show,NULL);

/*TODO 2: add read-only device attribute to show the version */
DEVICE_ATTR(version,S_IRUSR,version_show,NULL);

static struct attribute *bex_dev_attrs[] = {
	/* TODO 2: add type and version attributes */
	&dev_attr_type.attr,
	&dev_attr_version.attr
};
ATTRIBUTE_GROUPS(bex_dev);

static int bex_dev_uevent(struct device *dev, struct kobj_uevent_env *env)
{
	return add_uevent_var(env, "MODALIAS=bex:%s", dev_name(dev));
}

static void bex_dev_release(struct device *dev)
{
	struct bex_device *bex_dev = to_bex_device(dev);

	kfree(bex_dev->type);
	kfree(bex_dev);
}

struct device_type bex_device_type = {
	/* TODO 2: set the device groups attributes */
	.uevent	= bex_dev_uevent,
	.release = bex_dev_release,
	.groups = bex_dev_groups
};

static int bex_add_dev(const char *name, const char *type, int version)
{
	struct bex_device *bex_dev;
	int ret;

	bex_dev = kzalloc(sizeof(*bex_dev), GFP_KERNEL);
	if (!bex_dev)
		return -ENOMEM;

	bex_dev->type = kstrdup(type, GFP_KERNEL);
	bex_dev->version = version;

	bex_dev->dev.bus = &bex_bus_type;
	bex_dev->dev.type = &bex_device_type;
	bex_dev->dev.parent = NULL;

	dev_set_name(&bex_dev->dev, "%s", name);
	ret = device_register(&bex_dev->dev);
	if(ret < 0)
	{
		put_device(&bex_dev->dev);
		goto out;
	}

	device_create_file(&bex_dev->dev,&dev_attr_type);
	device_create_file(&bex_dev->dev,&dev_attr_version);

out: 
	return ret;
}

static int bex_del_dev(const char *name)
{
	struct device *dev;

	dev = bus_find_device_by_name(&bex_bus_type, NULL, name);
	if (!dev)
		return -EINVAL;

	device_remove_file(dev,&dev_attr_version);
	device_remove_file(dev,&dev_attr_type);
	device_unregister(dev);
	put_device(dev);

	return 0;
}

int bex_register_driver(struct bex_driver *drv)
{
	int ret;

	drv->driver.bus = &bex_bus_type;
	ret = driver_register(&drv->driver);
	if (ret)
		return ret;

	return 0;
}
EXPORT_SYMBOL(bex_register_driver);

void bex_unregister_driver(struct bex_driver *drv)
{
	driver_unregister(&drv->driver);
}
EXPORT_SYMBOL(bex_unregister_driver);

static int __init my_bus_init (void)
{
	int ret;

	/* TODO 1: register the bus driver */
	ret = bus_register(&bex_bus_type);
	if(ret < 0)
		goto out;
	/* TODO 1: add a device */
	ret = bex_add_dev("root",NULL,1);
	if(ret < 0)
		goto out_unregister_bus;

	bus_create_file(&bex_bus_type,&bus_attr_add);
	bus_create_file(&bex_bus_type,&bus_attr_del);

	return ret;


out_del_dev:
	bex_del_dev("root");

out_unregister_bus:
	bus_unregister(&bex_bus_type);

out: 
	return ret;
}

static void my_bus_exit (void)
{
	/* TODO 1: unregister the bus driver */

	bus_remove_file(&bex_bus_type,&bus_attr_add);
	bus_remove_file(&bex_bus_type,&bus_attr_del);
	bex_del_dev("root");
	bus_unregister(&bex_bus_type);
}

module_init (my_bus_init);
module_exit (my_bus_exit);

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/uaccess.h>
#include <linux/miscdevice.h>
#include <linux/slab.h>
#include "bex.h"

MODULE_DESCRIPTION("BEX misc driver");
MODULE_AUTHOR("Kernel Hacker");
MODULE_LICENSE("GPL");

#define BUF_SIZE 1024

struct bex_misc_device {
	struct miscdevice misc;
	struct bex_device *dev;
	char buf[BUF_SIZE];
};

static int my_open(struct inode *inode, struct file *file)
{
	return 0;
}

static int my_release(struct inode *inode, struct file *file)
{
	return 0;
}

static int my_read(struct file *file, char __user *user_buffer,
		   size_t size, loff_t *offset)
{
	struct bex_misc_device *bmd = (struct bex_misc_device *)file->private_data;
	ssize_t len = min(sizeof(bmd->buf) - (ssize_t)*offset, size);

	if (len <= 0)
		return 0;

	if (copy_to_user(user_buffer, bmd->buf + *offset, len))
		return -EFAULT;

	*offset += len;
	return len;
}

static int my_write(struct file *file, const char __user *user_buffer,
		    size_t size, loff_t *offset)
{
	struct bex_misc_device *bmd = (struct bex_misc_device *)file->private_data;
	ssize_t len = min(sizeof(bmd->buf) - (ssize_t)*offset, size);

	if (len <= 0)
		return 0;

	if (copy_from_user(bmd->buf + *offset, user_buffer, len))
		return -EFAULT;

	*offset += len;
	return len;
}

struct file_operations bex_misc_fops = {
	.owner = THIS_MODULE,
	.open = my_open,
	.read = my_read,
	.write = my_write,
	.release = my_release,
};

static int bex_misc_count;

int bex_misc_probe(struct bex_device *dev)
{
	struct bex_misc_device *bmd;
	char buf[32];
	int ret;

	dev_info(&dev->dev, "%s: %s %d\n", __func__, dev->type, dev->version);

	/* TODO 6: refuse the probe is version > 1 */
	if(dev->version > 1)
		return -EINVAL;

	bmd = kzalloc(sizeof(*bmd), GFP_KERNEL);
	if (!bmd)
		return -ENOMEM;

	bmd->misc.minor = MISC_DYNAMIC_MINOR;
	snprintf(buf, sizeof(buf), "bex-misc-%d", bex_misc_count++);
	bmd->misc.name = kstrdup(buf, GFP_KERNEL);
	bmd->misc.parent = &dev->dev;
	bmd->misc.fops = &bex_misc_fops;
	bmd->dev = dev;
	dev_set_drvdata(&dev->dev, bmd);

	/* TODO 6: register the misc device */
	misc_register(&bmd->misc);

	return 0;
}

void bex_misc_remove(struct bex_device *dev)
{
	struct bex_misc_device *bmd;

	bmd = (struct bex_misc_device *)dev_get_drvdata(&dev->dev);

	/* TODO 6: deregister the misc device */
	misc_deregister(&bmd->misc);
	kfree(bmd);
}

struct bex_driver bex_misc_driver = {
	.type = "misc",
	.probe = bex_misc_probe,
	.remove = bex_misc_remove,
	.driver = {
		.owner = THIS_MODULE,
		.name = "bex_misc",
	},
};

static int my_init(void)
{
	int err;

	/* TODO 4: register the driver */
	err = bex_register_driver(&bex_misc_driver);
	return err;
}

static void my_exit(void)
{
	/* TODO 4: unregister the driver */
	bex_unregister_driver(&bex_misc_driver);
}

module_init(my_init);
module_exit(my_exit);

版权声明： 本博客所有文章除特别声明外，著作权归作者所有。转载请注明出处！