2008年10月22日星期三

[LDD] C3. Character Device driver

代码阅读 <Linux Device Driver> v3
Chapter 3. Character Device Driver

一、scull介绍 Simple Chracter Utility for Loading Localities
scull是一个操作内存区域的字符设备。

scull源码实现的设备有:
scull0 ~ scull3
这四个设备分别由一个全局且持久的内存区域组成。
scullpipe0 ~ scullpipe3
这四个FIFO与管道类似。一个进程读取由另一个进程写入的数据。如果多个进程读
取同一个设备,它们就会为数据发生竞争。scullpipe的内部实现将说明在不借助
于中断的情况下如何实现阻塞式和非阻塞式读写操作。
scullsingle
scullpriv
sculluid
scullwuid
这些设备与scull0相似,但在何时允许open操作方面有一些限制。
scullsingle,一次只允许一个进程使用该驱动程序。
scullpriv,它对每个虚拟控制台是私有的。因为控制台进程会获取不痛的内存区。
sculluid和scullwuid可以被多次打开,但是每次只能由一个用户打开;如果另一
个用户锁定该设备,sculluid则返回“Device Busy”错误,而scullwuid则实现了阻
塞式open。

二、设备编号
对字符设备的访问是通过文件系统内的设备文件进行的,这些设备文件通常位于/dev。而这
些文件包含两个重要数据,主设备号和次设备号。

主设备号表示对应的驱动程序,现代的Linux允许多个驱动程序共享主设备号,但一般仍旧
遵循“一个主设备号对应一个驱动程序”的原则。

次设备号由内核使用,用于正确确定设备文件指定的设备。

MAJOR(dev_t dev); /* 返回主设备号 */
MINOR(dev_t dev); /* 返回次设备号 */
MKDEV(int major, int minor); /* 由主次设备得到dev_t类型的设备编号 */


关于分配和释放设备编号,系统在linux/fs.h中声明了以下函数:

/* 获得一个或者多个设备编号 */
int register_chrdev_region(dev_t first, unsigned int, char *name);

/* 运行过程中,使用以下函数会为我们恰当分配所需要的设备号 */
int alloc_chrdev_region(dev_t *dev, unsigned int firstminor,
unsigned int count, char *name);

/* 应该用以下函数释放这些设备编号 */
void unregister_chrdev_region(dev_t first, unsigned int count);


三、重要数据结构

#include <linux/fs.h>
struct inode {
struct hlist_node i_hash;
struct list_head i_list;
struct list_head i_sb_list;
struct list_head i_dentry;
unsigned long i_ino;
atomic_t i_count;
unsigned int i_nlink;
uid_t i_uid;
gid_t i_gid;
dev_t i_rdev;
u64 i_version;
loff_t i_size;
#ifdef __NEED_I_SIZE_ORDERED
seqcount_t i_size_seqcount;
#endif
struct timespec i_atime;
struct timespec i_mtime;
struct timespec i_ctime;
unsigned int i_blkbits;
blkcnt_t i_blocks;
unsigned short i_bytes;
umode_t i_mode;
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
struct mutex i_mutex;
struct rw_semaphore i_alloc_sem;
const struct inode_operations *i_op;
const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
struct super_block *i_sb;
struct file_lock *i_flock;
struct address_space *i_mapping;
struct address_space i_data;
#ifdef CONFIG_QUOTA
struct dquot *i_dquot[MAXQUOTAS];
#endif
struct list_head i_devices;
union {
struct pipe_inode_info *i_pipe;
struct block_device *i_bdev;
struct cdev *i_cdev;
};
/* ... */
};

struct file {
/*
* fu_list becomes invalid after file_free is called and queued via
* fu_rcuhead for RCU freeing
*/
union {
struct list_head fu_list;
struct rcu_head fu_rcuhead;
} f_u;
struct path f_path;
#define f_dentry f_path.dentry
#define f_vfsmnt f_path.mnt
const struct file_operations *f_op;
atomic_t f_count;
unsigned int f_flags;
mode_t f_mode;
loff_t f_pos;
struct fown_struct f_owner;
unsigned int f_uid, f_gid;
struct file_ra_state f_ra;

u64 f_version;
#ifdef CONFIG_SECURITY
void *f_security;
#endif
/* needed for tty driver, and maybe others */
void *private_data;

/* ... */
};

struct file_operations {
struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
int (*release) (struct inode *, struct file *);
int (*fsync) (struct file *, struct dentry *, int datasync);
int (*aio_fsync) (struct kiocb *, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
int (*dir_notify)(struct file *filp, unsigned long arg);
int (*flock) (struct file *, int, struct file_lock *);
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
int (*setlease)(struct file *, long, struct file_lock **);
};

/* 自定义结构 */
struct scull_qset {
void **data;
struct scull_qset *next;
};

struct scull_dev {
struct scull_qset *data;
int quantum;
int qset;
unsigned long size;
unsigned int access_key;
struct semaphore sem;
struct cdev cdev;
};


struct inode ————
内核使用inode结构在内部表示文件,因此它和file结构不同,后者表示打开的文件描述符。
对单个文件,可能会有许多个表示打开的文件描述符的file结构,但他们都指向单个inode
结构。

inode包含大量节点文件信息。而此驱动只是用到了以下两个字段:
dev_t i_rdev; // 对于表示设备的inode结构,该字段包含真正的设备编号
struct cdev *i_cdev; // 对于表示字符设备的inode结构,此字段包含struct cdev指针

另外,开发者可以通过两个宏来获取主次设备编号:
unsigned int iminor(struct inode *inode);
unsigned int imajor(struct inode *inode);

struct file ————
file结构代表一个打开的文件,系统中任何一个打开的文件都有一个对应的file结构,他由
内核在open时候创建,并传递给在文件上的所有函数,知道最后的close函数。在文件实例
关闭之后,内核会释放这个结构。用到的字段有:

struct path f_path;
#define f_dentry f_path.dentry // 目录项结构
const struct file_operations *f_op;
unsigned int f_flags; // 标志,是否阻塞
mode_t f_mode; // 权限检查
loff_t f_pos; // 当前读写位置
/* needed for tty driver, and maybe others */
void *private_data; // 跨系统调用时候保存状态信息


struct file_operations ————
file_operations用来建立驱动程序操作和驱动设备编号之间的连接,他包含了一组函数指
针,这些函数和某些系统调用相关联。程序员可以视此结构为面向对象的C编程的一个例证。

file_operations的函数指针必须指向驱动程序实现特定操作的函数,对于不支持的操作,
对应的字段被赋值NULL,这种情况下,内核的处理行为是不尽相同的。
在本例中,我们实现了以下操作:

struct file_operations scull_fops = {
.owner = THIS_MODULE,
.llseek = scull_llseek,
.read = scull_read,
.write = scull_write,
.ioctl = scull_ioctl,
.open = scull_open,
.release = scull_release,
};


四、scull内存使用
两个函数 ————
void *kmalloc(size_t size, gfp_t flags);
void kfree(void *ptr);
这个两个函数定义在linux/stab.h中,是内核中非常重要的两个内存操作函数。用kmalloc
函数分配,然后用kfree释放,同时也注意不应该把非kmalloc返回的指针传递给kfree。
(但是将NULL传递给kfree是合法的)


scull驱动中,每一个scull设备(scull_dev)都是一个链表头,他的指针指向scull_qset
结构的链表。而每一个scull_qset则是典型的链表结构,包含了一个4000bytes * 1000的内
存区域,这里的每一个区域就被称为是一个量子(quantum)。
4000和1000的尺寸,由宏SCULL_QUANTUM和SCULL_QSET定义。当然,你也可以设置为模块参
数,也可以在运行时使用ioctl来更改。

由于scull设备是虚拟的内存区域,所以我们的read/write操作也将是对内存的操作,顶多
的麻烦是跨越内核空间和用户空间的拷贝而已。这让实现变的非常容易。

五、实现
初始化和清理函数

int scull_init_module(void)
{
int result, i;
dev_t dev = 0;

if (scull_major) {
dev = MKDEV(scull_major, scull_minor);
result = register_chrdev_region(dev, scull_nr_devs, "scull");
} else {
result = alloc_chrdev_region(&dev, scull_minor, scull_nr_devs,
"scull");
scull_major = MAJOR(dev);
}

if (result < 0) {
printk(KERN_WARNING "scull: can't get major %d\n", scull_major);
return result;
}

/* allocate the devices -- we can't have them static, as the number
* can be specified at load time.
*/
scull_devices = kmalloc(scull_nr_devs * sizeof(struct scull_dev), GFP_KERNEL);
if (!scull_devices) {
result = -ENOMEM;
goto fail;
}
memset(scull_devices, 0, scull_nr_devs * sizeof(struct scull_dev));

/* initialize each device */
for (i = 0; i < scull_nr_devs; i++) {
scull_devices[i].quantum = scull_quantum;
scull_devices[i].qset = scull_qset;
init_MUTEX(&scull_devices[i].sem);
scull_setup_cdev(&scull_devices[i], i);
}

/* At this point call the init function for any friend device */
dev = MKDEV(scull_major, scull_minor + scull_nr_devs);
// dev += scull_p_init(dev);
// dev += scull_access_init(dev);

#ifdef SCULL_DEBUG /* only when debugging */
// scull_create_proc();
#endif
printk(KERN_INFO "init scull successful\n");
return 0; /* succeed */

fail:
scull_cleanup_module();
return result;
}

void scull_cleanup_module(void)
{
int i;
dev_t devno = MKDEV(scull_major, scull_minor);

/* Get rid of our char dev entries */
if (scull_devices) {
for (i = 0; i < scull_nr_devs; i++) {
scull_trim(scull_devices + i);
cdev_del(&scull_devices[i].cdev);
}
kfree(scull_devices);
}

#ifdef SCULL_DEBUG /* use proc only if debugging */
scull_remove_proc();
#endif

/* cleanup_module is never called if registering failed */
unregister_chrdev_region(devno, scull_nr_devs);

/* and call the cleanup functions for friend devices */
// scull_p_cleanup();
// scull_access_cleanup();
printk(KERN_INFO "cleanup scull successful\n");
}

init_module过程包括 ————
1) 注册设备
2) 初始化scull_dev设备
3) 初始化其他设备
为了简化设计,此章节没有引入pipe等设备,所以注销了某些方法。

cleanup_module过程 ————
1)释放scull_dev结构,注销字符设备
2) 注销设备号


然后是在file_operations指明的函数,这里重点讲解read和write的实现 ————

ssize_t scull_read(struct file *filp, char __user *buf,
size_t count, loff_t *f_pos)
{
struct scull_dev *dev = filp->private_data;
struct scull_qset *dptr;
int quantum = dev->quantum, qset = dev->qset;
int itemsize = quantum * qset;
int item, s_pos, q_pos, rest;
ssize_t retval = 0;

if (down_interruptible(&dev->sem))
return -ERESTARTSYS;
if(*f_pos >= dev->size)
goto out;
if(*f_pos + count > dev->size)
count = dev->size - *f_pos;

item = (long)*f_pos / itemsize;
rest = (long)*f_pos % itemsize;
s_pos = rest / quantum;
q_pos = rest % quantum;

dptr = scull_follow(dev, item);

if (dptr == NULL || !dptr->data || !dptr->data[s_pos])
goto out;

if (count > quantum - q_pos)
count = quantum - q_pos;

if (copy_to_user(buf, dptr->data[s_pos] + q_pos, count)) {
retval = -EFAULT;
goto out;
}
*f_pos += count;
retval = count;
out:
up(&dev->sem);
return(retval);
}


ssize_t scull_write(struct file *filp, const char __user *buf,
size_t count, loff_t *f_pos)
{
struct scull_dev *dev = filp->private_data;
struct scull_qset *dptr;
int quantum = dev->quantum, qset = dev->qset;
int itemsize = quantum * qset;
int item, s_pos, q_pos, rest;
ssize_t retval = 0;

if (down_interruptible(&dev->sem))
return -ERESTARTSYS;
if(*f_pos >= dev->size)
goto out;
if(*f_pos + count > dev->size)
count = dev->size - *f_pos;

item = (long)*f_pos / itemsize;
rest = (long)*f_pos % itemsize;
s_pos = rest / quantum;
q_pos = rest % quantum;

dptr = scull_follow(dev, item);

if (dptr == NULL)
goto out;
if (!dptr->data) {
dptr->data = kmalloc(qset *sizeof(char *), GFP_KERNEL);
if (!dptr->data)
goto out;
memset(dptr->data, 0, qset * sizeof(char *));
}
if (!dptr->data[s_pos]) {
dptr->data[s_pos] = kmalloc(quantum, GFP_KERNEL);
if (!dptr->data[s_pos])
goto out;
}

if (count > quantum - q_pos)
count = quantum - q_pos;

if (copy_from_user(dptr->data[s_pos] + q_pos, buf, count)) {
retval = -EFAULT;
goto out;
}
*f_pos += count;
retval = count;

/* 更新文件大小 */
if (dev->size < *f_pos)
dev->size = *f_pos;
out:
up(&dev->sem);
return(retval);
}


scull_read从filp的f_pos处读取count个字节,写到buf中。
首先要作的是,确定f_pos的位置,根据f_pos,我们要确定写的位置,位于第几个item的
第s_pos数组的第q_pos个量子处,然后根据item返回我们要的scull_qset结构体。最后使用
copy_to_user完成读操作。

写操作和读操作很相似,使用了逆向的copy_from_user函数。

这两个重要的系统调用实现,都使用了跨越内核和用户空间的写操作:
copy_to_user(void *to, void *from, int count);
copy_from_user(void *to, void *from, int count);

六、总结
scull_init_module
* alloc_chrdev_region
* scull_setup_cdev
** cdev_init, cdev_add
** scull_fops(llseek, read, write, ioctl, open, release)
* scull_create_proc
* scull_p_init
* scull_access_init

_

没有评论:

发表评论