From dacea75f37a15e5c0e5bc6a32e042c75e73d0872 Mon Sep 17 00:00:00 2001 From: via8 Date: Mon, 21 Mar 2022 20:52:31 +0300 Subject: impl sbdd --- Kbuild | 38 +++++++ Makefile | 41 ++++++++ readme.md | 19 ++++ sbdd.c | 349 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 447 insertions(+) create mode 100644 Kbuild create mode 100644 Makefile create mode 100644 readme.md create mode 100644 sbdd.c diff --git a/Kbuild b/Kbuild new file mode 100644 index 0000000..65143ef --- /dev/null +++ b/Kbuild @@ -0,0 +1,38 @@ +######## Documentation + +# In newer versions of the kernel, kbuild will first look for a file named +# "Kbuild," and only if that is not found, will it then look for a makefile. + +# The kbuild system will build .o from .c, +# and, after linking, will result in the kernel module .ko: +# obj-m := sbdd.o + +# When the module is built from multiple sources, an additional line +# with '-y := .o .o ...' is needed: +# Or you can do smth like this: +# sbdd-y := src1.o +# sbdd-y += src2.o +# ... + +# kbuild supports building multiple modules with a single build file. For example, +# if you wanted to build two modules, foo.ko and bar.ko, the kbuild lines would be: +# obj-m := foo.o bar.o +# foo-y := +# bar-y := + +# You can also add compile flags here, e.g.: +# ccflags-y := -I$(src)/include +# ccflags-y += -I$(src)/include +# About $(src): when kbuild executes, the current directory is always the root of +# the kernel tree (the argument to "-C") and therefore an absolute path is needed. +# $(src) provides the absolute path by pointing to the directory where the +# currently executing kbuild file is located. + + +######## Kbuild + +ccflags-y := -Wall +# ccflags-y += -DBLK_MQ_MODE +# CFLAGS_sbdd.o := -DDEBUG + +obj-m := sbdd.o diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0216fb0 --- /dev/null +++ b/Makefile @@ -0,0 +1,41 @@ +######## Documentation (1/2) + +# Kernel build system requires makefiles that do not look like traditional ones. +# The said system hadles all this stuff. See for more: +# https://www.kernel.org/doc/Documentation/kbuild/ +# (makefiles.txt and modules.txt should be the main ones) + +# This line states that there is one module to be built from obj file .o. +# The resulting module will be named .ko after being built: +# obj-m := sbdd.o + +# The command to build a module is the following: +# $ make -C M=`pwd` modules +# In the '' it finds kernel's top-level makefile. +# 'M=...' option sets the path to module's files. +# 'modules' is the target of make. It refers to the list of modules found +# in the obj-m variable. + + +######## Documentation (2/2) + +# There is an idiom on creating makefiles for kernel developers. +# If KERNELRELEASE is defined, we've been invoked from the kernel build system +# (we get here the 2nd time when 'modules' target is processed): +# ifneq ($(KERNELRELEASE),) +# # It is actually a Kbuild part of makefile (should be placed in different file) +# # and will only be processed by kbuild system, not make. +# obj-m := sbdd.o +# # Otherwise we were called directly from the command line and should invoke kbuild. +# else +# default: +# $(MAKE) -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) modules +# endif + + +######## Makefile + +default: + $(MAKE) -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) modules +clean: + $(MAKE) -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) clean diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..c3bec58 --- /dev/null +++ b/readme.md @@ -0,0 +1,19 @@ +# Simple Block Device Driver +Implementation of Linux Kernel 5.4.X simple block device. + +## Build +- regular: +`$ make` +- with blk_mq support: +uncomment `ccflags-y += -DBLK_MQ_MODE` in `Kbuild` +- with requests debug info: +uncomment `CFLAGS_sbdd.o := -DDEBUG` in `Kbuild` + +## Clean +`$ make clean` + +## References +- [Linux Device Drivers](https://lwn.net/Kernel/LDD3/) +- [Linux Kernel Development](https://rlove.org) +- [Linux Kernel Teaching](https://linux-kernel-labs.github.io/refs/heads/master/labs/block_device_drivers.html) +- [Linux Kernel Sources](https://github.com/torvalds/linux) diff --git a/sbdd.c b/sbdd.c new file mode 100644 index 0000000..f06bcb0 --- /dev/null +++ b/sbdd.c @@ -0,0 +1,349 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef BLK_MQ_MODE +#include +#endif + +#define SBDD_SECTOR_SHIFT 9 +#define SBDD_SECTOR_SIZE (1 << SBDD_SECTOR_SHIFT) +#define SBDD_MIB_SECTORS (1 << (20 - SBDD_SECTOR_SHIFT)) +#define SBDD_NAME "sbdd" + +struct sbdd { + wait_queue_head_t exitwait; + spinlock_t datalock; + atomic_t deleting; + atomic_t refs_cnt; + sector_t capacity; + u8 *data; + struct gendisk *gd; + struct request_queue *q; +#ifdef BLK_MQ_MODE + struct blk_mq_tag_set *tag_set; +#endif +}; + +static struct sbdd __sbdd; +static int __sbdd_major = 0; +static unsigned long __sbdd_capacity_mib = 100; + +static sector_t sbdd_xfer(struct bio_vec* bvec, sector_t pos, int dir) +{ + void *buff = page_address(bvec->bv_page) + bvec->bv_offset; + sector_t len = bvec->bv_len >> SBDD_SECTOR_SHIFT; + size_t offset; + size_t nbytes; + + if (pos + len > __sbdd.capacity) + len = __sbdd.capacity - pos; + + offset = pos << SBDD_SECTOR_SHIFT; + nbytes = len << SBDD_SECTOR_SHIFT; + + spin_lock(&__sbdd.datalock); + + if (dir) + memcpy(__sbdd.data + offset, buff, nbytes); + else + memcpy(buff, __sbdd.data + offset, nbytes); + + spin_unlock(&__sbdd.datalock); + + pr_debug("pos=%6llu len=%4llu %s\n", pos, len, dir ? "written" : "read"); + + return len; +} + +#ifdef BLK_MQ_MODE + +static void sbdd_xfer_rq(struct request *rq) +{ + struct req_iterator iter; + struct bio_vec bvec; + int dir = rq_data_dir(rq); + sector_t pos = blk_rq_pos(rq); + + rq_for_each_segment(bvec, rq, iter) + pos += sbdd_xfer(&bvec, pos, dir); +} + +static blk_status_t sbdd_queue_rq(struct blk_mq_hw_ctx *hctx, + struct blk_mq_queue_data const *bd) +{ + if (atomic_read(&__sbdd.deleting)) + return BLK_STS_IOERR; + + atomic_inc(&__sbdd.refs_cnt); + + blk_mq_start_request(bd->rq); + sbdd_xfer_rq(bd->rq); + blk_mq_end_request(bd->rq, BLK_STS_OK); + + if (atomic_dec_and_test(&__sbdd.refs_cnt)) + wake_up(&__sbdd.exitwait); + + return BLK_STS_OK; +} + +static struct blk_mq_ops const __sbdd_blk_mq_ops = { + /* + The function receives requests for the device as arguments + and can use various functions to process them. The functions + used to process requests in the handler are described below: + + blk_mq_start_request() - must be called before processing a request + blk_mq_requeue_request() - to re-send the request in the queue + blk_mq_end_request() - to end request processing and notify upper layers + */ + .queue_rq = sbdd_queue_rq, +}; + +#else + +static void sbdd_xfer_bio(struct bio *bio) +{ + struct bvec_iter iter; + struct bio_vec bvec; + int dir = bio_data_dir(bio); + sector_t pos = bio->bi_iter.bi_sector; + + bio_for_each_segment(bvec, bio, iter) + pos += sbdd_xfer(&bvec, pos, dir); +} + +static blk_qc_t sbdd_make_request(struct request_queue *q, struct bio *bio) +{ + if (atomic_read(&__sbdd.deleting)) + return BLK_STS_IOERR; + + atomic_inc(&__sbdd.refs_cnt); + + sbdd_xfer_bio(bio); + bio_endio(bio); + + if (atomic_dec_and_test(&__sbdd.refs_cnt)) + wake_up(&__sbdd.exitwait); + + return BLK_STS_OK; +} + +#endif /* BLK_MQ_MODE */ + +/* +There are no read or write operations. These operations are performed by +the request() function associated with the request queue of the disk. +*/ +static struct block_device_operations const __sbdd_bdev_ops = { + .owner = THIS_MODULE, +}; + +static int sbdd_create(void) +{ + int ret = 0; + + /* + This call is somewhat redundant, but used anyways by tradition. + The number is to be displayed in /proc/devices (0 for auto). + */ + pr_info("registering blkdev\n"); + __sbdd_major = register_blkdev(0, SBDD_NAME); + if (__sbdd_major < 0) { + pr_err("call register_blkdev() failed with %d\n", __sbdd_major); + return -EBUSY; + } + + memset(&__sbdd, 0, sizeof(struct sbdd)); + __sbdd.capacity = (sector_t)__sbdd_capacity_mib * SBDD_MIB_SECTORS; + + pr_info("allocating data\n"); + __sbdd.data = vmalloc(__sbdd.capacity << SBDD_SECTOR_SHIFT); + if (!__sbdd.data) { + pr_err("unable to alloc data\n"); + return -ENOMEM; + } + + spin_lock_init(&__sbdd.datalock); + init_waitqueue_head(&__sbdd.exitwait); + +#ifdef BLK_MQ_MODE + pr_info("allocating tag_set\n"); + __sbdd.tag_set = kzalloc(sizeof(struct blk_mq_tag_set), GFP_KERNEL); + if (!__sbdd.tag_set) { + pr_err("unable to alloc tag_set\n"); + return -ENOMEM; + } + + /* Number of hardware dispatch queues */ + __sbdd.tag_set->nr_hw_queues = 1; + /* Depth of hardware dispatch queues */ + __sbdd.tag_set->queue_depth = 128; + __sbdd.tag_set->numa_node = NUMA_NO_NODE; + __sbdd.tag_set->ops = &__sbdd_blk_mq_ops; + + ret = blk_mq_alloc_tag_set(__sbdd.tag_set); + if (ret) { + pr_err("call blk_mq_alloc_tag_set() failed with %d\n", ret); + return ret; + } + + /* Creates both the hardware and the software queues and initializes structs */ + pr_info("initing queue\n"); + __sbdd.q = blk_mq_init_queue(__sbdd.tag_set); + if (IS_ERR(__sbdd.q)) { + ret = (int)PTR_ERR(__sbdd.q); + pr_err("call blk_mq_init_queue() failed witn %d\n", ret); + __sbdd.q = NULL; + return ret; + } +#else + pr_info("allocating queue\n"); + __sbdd.q = blk_alloc_queue(GFP_KERNEL); + if (!__sbdd.q) { + pr_err("call blk_alloc_queue() failed\n"); + return -EINVAL; + } + blk_queue_make_request(__sbdd.q, sbdd_make_request); +#endif /* BLK_MQ_MODE */ + + /* Configure queue */ + blk_queue_logical_block_size(__sbdd.q, SBDD_SECTOR_SIZE); + + /* A disk must have at least one minor */ + pr_info("allocating disk\n"); + __sbdd.gd = alloc_disk(1); + + /* Configure gendisk */ + __sbdd.gd->queue = __sbdd.q; + __sbdd.gd->major = __sbdd_major; + __sbdd.gd->first_minor = 0; + __sbdd.gd->fops = &__sbdd_bdev_ops; + /* Represents name in /proc/partitions and /sys/block */ + scnprintf(__sbdd.gd->disk_name, DISK_NAME_LEN, SBDD_NAME); + set_capacity(__sbdd.gd, __sbdd.capacity); + + /* + Allocating gd does not make it available, add_disk() required. + After this call, gd methods can be called at any time. Should not be + called before the driver is fully initialized and ready to process reqs. + */ + pr_info("adding disk\n"); + add_disk(__sbdd.gd); + + return ret; +} + +static void sbdd_delete(void) +{ + atomic_set(&__sbdd.deleting, 1); + + if (!wait_event_timeout(__sbdd.exitwait, + !atomic_read(&__sbdd.refs_cnt), + msecs_to_jiffies(1000))) { + pr_err("call sbdd_delete() failed, timed out\n"); + return; + } + + /* gd will be removed only after the last reference put */ + if (__sbdd.gd) { + pr_info("deleting disk\n"); + del_gendisk(__sbdd.gd); + } + + if (__sbdd.q) { + pr_info("cleaning up queue\n"); + blk_cleanup_queue(__sbdd.q); + } + + if (__sbdd.gd) + put_disk(__sbdd.gd); + +#ifdef BLK_MQ_MODE + if (__sbdd.tag_set && __sbdd.tag_set->tags) { + pr_info("freeing tag_set\n"); + blk_mq_free_tag_set(__sbdd.tag_set); + } + + if (__sbdd.tag_set) + kfree(__sbdd.tag_set); +#endif + + if (__sbdd.data) { + pr_info("freeing data\n"); + vfree(__sbdd.data); + } + + memset(&__sbdd, 0, sizeof(struct sbdd)); + + if (__sbdd_major > 0) { + pr_info("unregistering blkdev\n"); + unregister_blkdev(__sbdd_major, SBDD_NAME); + __sbdd_major = 0; + } +} + +/* +Note __init is for the kernel to drop this function after +initialization complete making its memory available for other uses. +There is also __initdata note, same but used for variables. +*/ +static int __init sbdd_init(void) +{ + int ret = 0; + + pr_info("starting initialization...\n"); + ret = sbdd_create(); + + if (ret) { + pr_warn("initialization failed\n"); + sbdd_delete(); + } else { + pr_info("initialization complete\n"); + } + + return ret; +} + +/* +Note __exit is for the compiler to place this code in a special ELF section. +Sometimes such functions are simply discarded (e.g. when module is built +directly into the kernel). There is also __exitdata note. +*/ +static void __exit sbdd_exit(void) +{ + pr_info("exiting...\n"); + sbdd_delete(); + pr_info("exiting complete\n"); +} + +/* Called on module loading. Is mandatory. */ +module_init(sbdd_init); + +/* Called on module unloading. Unloading module is not allowed without it. */ +module_exit(sbdd_exit); + +/* Set desired capacity with insmod */ +module_param_named(capacity_mib, __sbdd_capacity_mib, ulong, S_IRUGO); + +/* Note for the kernel: a free license module. A warning will be outputted without it. */ +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Simple Block Device Driver"); -- cgit v1.2.3-18-g5258