aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Kbuild38
-rw-r--r--Makefile41
-rw-r--r--readme.md19
-rw-r--r--sbdd.c349
4 files changed, 447 insertions, 0 deletions
diff --git a/Kbuild b/Kbuild
new file mode 100644
index 0000000..65143ef
--- /dev/null
+++ b/Kbuild
@@ -0,0 +1,38 @@
+######## Documentation
+
+# In newer versions of the kernel, kbuild will first look for a file named
+# "Kbuild," and only if that is not found, will it then look for a makefile.
+
+# The kbuild system will build <module_name>.o from <module_name>.c,
+# and, after linking, will result in the kernel module <module_name>.ko:
+# obj-m := sbdd.o
+
+# When the module is built from multiple sources, an additional line
+# with '<module_name>-y := <src1>.o <src2>.o ...' is needed:
+# Or you can do smth like this:
+# sbdd-y := src1.o
+# sbdd-y += src2.o
+# ...
+
+# kbuild supports building multiple modules with a single build file. For example,
+# if you wanted to build two modules, foo.ko and bar.ko, the kbuild lines would be:
+# obj-m := foo.o bar.o
+# foo-y := <foo_srcs>
+# bar-y := <bar_srcs>
+
+# You can also add compile flags here, e.g.:
+# ccflags-y := -I$(src)/include
+# ccflags-y += -I$(src)/include
+# About $(src): when kbuild executes, the current directory is always the root of
+# the kernel tree (the argument to "-C") and therefore an absolute path is needed.
+# $(src) provides the absolute path by pointing to the directory where the
+# currently executing kbuild file is located.
+
+
+######## Kbuild
+
+ccflags-y := -Wall
+# ccflags-y += -DBLK_MQ_MODE
+# CFLAGS_sbdd.o := -DDEBUG
+
+obj-m := sbdd.o
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..0216fb0
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,41 @@
+######## Documentation (1/2)
+
+# Kernel build system requires makefiles that do not look like traditional ones.
+# The said system hadles all this stuff. See for more:
+# https://www.kernel.org/doc/Documentation/kbuild/
+# (makefiles.txt and modules.txt should be the main ones)
+
+# This line states that there is one module to be built from obj file <name>.o.
+# The resulting module will be named <name>.ko after being built:
+# obj-m := sbdd.o
+
+# The command to build a module is the following:
+# $ make -C <path_to_kernel_source_tree> M=`pwd` modules
+# In the '<path_...>' it finds kernel's top-level makefile.
+# 'M=...' option sets the path to module's files.
+# 'modules' is the target of make. It refers to the list of modules found
+# in the obj-m variable.
+
+
+######## Documentation (2/2)
+
+# There is an idiom on creating makefiles for kernel developers.
+# If KERNELRELEASE is defined, we've been invoked from the kernel build system
+# (we get here the 2nd time when 'modules' target is processed):
+# ifneq ($(KERNELRELEASE),)
+# # It is actually a Kbuild part of makefile (should be placed in different file)
+# # and will only be processed by kbuild system, not make.
+# obj-m := sbdd.o
+# # Otherwise we were called directly from the command line and should invoke kbuild.
+# else
+# default:
+# $(MAKE) -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) modules
+# endif
+
+
+######## Makefile
+
+default:
+ $(MAKE) -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) modules
+clean:
+ $(MAKE) -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) clean
diff --git a/readme.md b/readme.md
new file mode 100644
index 0000000..c3bec58
--- /dev/null
+++ b/readme.md
@@ -0,0 +1,19 @@
+# Simple Block Device Driver
+Implementation of Linux Kernel 5.4.X simple block device.
+
+## Build
+- regular:
+`$ make`
+- with blk_mq support:
+uncomment `ccflags-y += -DBLK_MQ_MODE` in `Kbuild`
+- with requests debug info:
+uncomment `CFLAGS_sbdd.o := -DDEBUG` in `Kbuild`
+
+## Clean
+`$ make clean`
+
+## References
+- [Linux Device Drivers](https://lwn.net/Kernel/LDD3/)
+- [Linux Kernel Development](https://rlove.org)
+- [Linux Kernel Teaching](https://linux-kernel-labs.github.io/refs/heads/master/labs/block_device_drivers.html)
+- [Linux Kernel Sources](https://github.com/torvalds/linux)
diff --git a/sbdd.c b/sbdd.c
new file mode 100644
index 0000000..f06bcb0
--- /dev/null
+++ b/sbdd.c
@@ -0,0 +1,349 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/bio.h>
+#include <linux/bvec.h>
+#include <linux/init.h>
+#include <linux/wait.h>
+#include <linux/stat.h>
+#include <linux/slab.h>
+#include <linux/numa.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/genhd.h>
+#include <linux/blkdev.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/jiffies.h>
+#include <linux/moduleparam.h>
+#include <linux/spinlock_types.h>
+#ifdef BLK_MQ_MODE
+#include <linux/blk-mq.h>
+#endif
+
+#define SBDD_SECTOR_SHIFT 9
+#define SBDD_SECTOR_SIZE (1 << SBDD_SECTOR_SHIFT)
+#define SBDD_MIB_SECTORS (1 << (20 - SBDD_SECTOR_SHIFT))
+#define SBDD_NAME "sbdd"
+
+struct sbdd {
+ wait_queue_head_t exitwait;
+ spinlock_t datalock;
+ atomic_t deleting;
+ atomic_t refs_cnt;
+ sector_t capacity;
+ u8 *data;
+ struct gendisk *gd;
+ struct request_queue *q;
+#ifdef BLK_MQ_MODE
+ struct blk_mq_tag_set *tag_set;
+#endif
+};
+
+static struct sbdd __sbdd;
+static int __sbdd_major = 0;
+static unsigned long __sbdd_capacity_mib = 100;
+
+static sector_t sbdd_xfer(struct bio_vec* bvec, sector_t pos, int dir)
+{
+ void *buff = page_address(bvec->bv_page) + bvec->bv_offset;
+ sector_t len = bvec->bv_len >> SBDD_SECTOR_SHIFT;
+ size_t offset;
+ size_t nbytes;
+
+ if (pos + len > __sbdd.capacity)
+ len = __sbdd.capacity - pos;
+
+ offset = pos << SBDD_SECTOR_SHIFT;
+ nbytes = len << SBDD_SECTOR_SHIFT;
+
+ spin_lock(&__sbdd.datalock);
+
+ if (dir)
+ memcpy(__sbdd.data + offset, buff, nbytes);
+ else
+ memcpy(buff, __sbdd.data + offset, nbytes);
+
+ spin_unlock(&__sbdd.datalock);
+
+ pr_debug("pos=%6llu len=%4llu %s\n", pos, len, dir ? "written" : "read");
+
+ return len;
+}
+
+#ifdef BLK_MQ_MODE
+
+static void sbdd_xfer_rq(struct request *rq)
+{
+ struct req_iterator iter;
+ struct bio_vec bvec;
+ int dir = rq_data_dir(rq);
+ sector_t pos = blk_rq_pos(rq);
+
+ rq_for_each_segment(bvec, rq, iter)
+ pos += sbdd_xfer(&bvec, pos, dir);
+}
+
+static blk_status_t sbdd_queue_rq(struct blk_mq_hw_ctx *hctx,
+ struct blk_mq_queue_data const *bd)
+{
+ if (atomic_read(&__sbdd.deleting))
+ return BLK_STS_IOERR;
+
+ atomic_inc(&__sbdd.refs_cnt);
+
+ blk_mq_start_request(bd->rq);
+ sbdd_xfer_rq(bd->rq);
+ blk_mq_end_request(bd->rq, BLK_STS_OK);
+
+ if (atomic_dec_and_test(&__sbdd.refs_cnt))
+ wake_up(&__sbdd.exitwait);
+
+ return BLK_STS_OK;
+}
+
+static struct blk_mq_ops const __sbdd_blk_mq_ops = {
+ /*
+ The function receives requests for the device as arguments
+ and can use various functions to process them. The functions
+ used to process requests in the handler are described below:
+
+ blk_mq_start_request() - must be called before processing a request
+ blk_mq_requeue_request() - to re-send the request in the queue
+ blk_mq_end_request() - to end request processing and notify upper layers
+ */
+ .queue_rq = sbdd_queue_rq,
+};
+
+#else
+
+static void sbdd_xfer_bio(struct bio *bio)
+{
+ struct bvec_iter iter;
+ struct bio_vec bvec;
+ int dir = bio_data_dir(bio);
+ sector_t pos = bio->bi_iter.bi_sector;
+
+ bio_for_each_segment(bvec, bio, iter)
+ pos += sbdd_xfer(&bvec, pos, dir);
+}
+
+static blk_qc_t sbdd_make_request(struct request_queue *q, struct bio *bio)
+{
+ if (atomic_read(&__sbdd.deleting))
+ return BLK_STS_IOERR;
+
+ atomic_inc(&__sbdd.refs_cnt);
+
+ sbdd_xfer_bio(bio);
+ bio_endio(bio);
+
+ if (atomic_dec_and_test(&__sbdd.refs_cnt))
+ wake_up(&__sbdd.exitwait);
+
+ return BLK_STS_OK;
+}
+
+#endif /* BLK_MQ_MODE */
+
+/*
+There are no read or write operations. These operations are performed by
+the request() function associated with the request queue of the disk.
+*/
+static struct block_device_operations const __sbdd_bdev_ops = {
+ .owner = THIS_MODULE,
+};
+
+static int sbdd_create(void)
+{
+ int ret = 0;
+
+ /*
+ This call is somewhat redundant, but used anyways by tradition.
+ The number is to be displayed in /proc/devices (0 for auto).
+ */
+ pr_info("registering blkdev\n");
+ __sbdd_major = register_blkdev(0, SBDD_NAME);
+ if (__sbdd_major < 0) {
+ pr_err("call register_blkdev() failed with %d\n", __sbdd_major);
+ return -EBUSY;
+ }
+
+ memset(&__sbdd, 0, sizeof(struct sbdd));
+ __sbdd.capacity = (sector_t)__sbdd_capacity_mib * SBDD_MIB_SECTORS;
+
+ pr_info("allocating data\n");
+ __sbdd.data = vmalloc(__sbdd.capacity << SBDD_SECTOR_SHIFT);
+ if (!__sbdd.data) {
+ pr_err("unable to alloc data\n");
+ return -ENOMEM;
+ }
+
+ spin_lock_init(&__sbdd.datalock);
+ init_waitqueue_head(&__sbdd.exitwait);
+
+#ifdef BLK_MQ_MODE
+ pr_info("allocating tag_set\n");
+ __sbdd.tag_set = kzalloc(sizeof(struct blk_mq_tag_set), GFP_KERNEL);
+ if (!__sbdd.tag_set) {
+ pr_err("unable to alloc tag_set\n");
+ return -ENOMEM;
+ }
+
+ /* Number of hardware dispatch queues */
+ __sbdd.tag_set->nr_hw_queues = 1;
+ /* Depth of hardware dispatch queues */
+ __sbdd.tag_set->queue_depth = 128;
+ __sbdd.tag_set->numa_node = NUMA_NO_NODE;
+ __sbdd.tag_set->ops = &__sbdd_blk_mq_ops;
+
+ ret = blk_mq_alloc_tag_set(__sbdd.tag_set);
+ if (ret) {
+ pr_err("call blk_mq_alloc_tag_set() failed with %d\n", ret);
+ return ret;
+ }
+
+ /* Creates both the hardware and the software queues and initializes structs */
+ pr_info("initing queue\n");
+ __sbdd.q = blk_mq_init_queue(__sbdd.tag_set);
+ if (IS_ERR(__sbdd.q)) {
+ ret = (int)PTR_ERR(__sbdd.q);
+ pr_err("call blk_mq_init_queue() failed witn %d\n", ret);
+ __sbdd.q = NULL;
+ return ret;
+ }
+#else
+ pr_info("allocating queue\n");
+ __sbdd.q = blk_alloc_queue(GFP_KERNEL);
+ if (!__sbdd.q) {
+ pr_err("call blk_alloc_queue() failed\n");
+ return -EINVAL;
+ }
+ blk_queue_make_request(__sbdd.q, sbdd_make_request);
+#endif /* BLK_MQ_MODE */
+
+ /* Configure queue */
+ blk_queue_logical_block_size(__sbdd.q, SBDD_SECTOR_SIZE);
+
+ /* A disk must have at least one minor */
+ pr_info("allocating disk\n");
+ __sbdd.gd = alloc_disk(1);
+
+ /* Configure gendisk */
+ __sbdd.gd->queue = __sbdd.q;
+ __sbdd.gd->major = __sbdd_major;
+ __sbdd.gd->first_minor = 0;
+ __sbdd.gd->fops = &__sbdd_bdev_ops;
+ /* Represents name in /proc/partitions and /sys/block */
+ scnprintf(__sbdd.gd->disk_name, DISK_NAME_LEN, SBDD_NAME);
+ set_capacity(__sbdd.gd, __sbdd.capacity);
+
+ /*
+ Allocating gd does not make it available, add_disk() required.
+ After this call, gd methods can be called at any time. Should not be
+ called before the driver is fully initialized and ready to process reqs.
+ */
+ pr_info("adding disk\n");
+ add_disk(__sbdd.gd);
+
+ return ret;
+}
+
+static void sbdd_delete(void)
+{
+ atomic_set(&__sbdd.deleting, 1);
+
+ if (!wait_event_timeout(__sbdd.exitwait,
+ !atomic_read(&__sbdd.refs_cnt),
+ msecs_to_jiffies(1000))) {
+ pr_err("call sbdd_delete() failed, timed out\n");
+ return;
+ }
+
+ /* gd will be removed only after the last reference put */
+ if (__sbdd.gd) {
+ pr_info("deleting disk\n");
+ del_gendisk(__sbdd.gd);
+ }
+
+ if (__sbdd.q) {
+ pr_info("cleaning up queue\n");
+ blk_cleanup_queue(__sbdd.q);
+ }
+
+ if (__sbdd.gd)
+ put_disk(__sbdd.gd);
+
+#ifdef BLK_MQ_MODE
+ if (__sbdd.tag_set && __sbdd.tag_set->tags) {
+ pr_info("freeing tag_set\n");
+ blk_mq_free_tag_set(__sbdd.tag_set);
+ }
+
+ if (__sbdd.tag_set)
+ kfree(__sbdd.tag_set);
+#endif
+
+ if (__sbdd.data) {
+ pr_info("freeing data\n");
+ vfree(__sbdd.data);
+ }
+
+ memset(&__sbdd, 0, sizeof(struct sbdd));
+
+ if (__sbdd_major > 0) {
+ pr_info("unregistering blkdev\n");
+ unregister_blkdev(__sbdd_major, SBDD_NAME);
+ __sbdd_major = 0;
+ }
+}
+
+/*
+Note __init is for the kernel to drop this function after
+initialization complete making its memory available for other uses.
+There is also __initdata note, same but used for variables.
+*/
+static int __init sbdd_init(void)
+{
+ int ret = 0;
+
+ pr_info("starting initialization...\n");
+ ret = sbdd_create();
+
+ if (ret) {
+ pr_warn("initialization failed\n");
+ sbdd_delete();
+ } else {
+ pr_info("initialization complete\n");
+ }
+
+ return ret;
+}
+
+/*
+Note __exit is for the compiler to place this code in a special ELF section.
+Sometimes such functions are simply discarded (e.g. when module is built
+directly into the kernel). There is also __exitdata note.
+*/
+static void __exit sbdd_exit(void)
+{
+ pr_info("exiting...\n");
+ sbdd_delete();
+ pr_info("exiting complete\n");
+}
+
+/* Called on module loading. Is mandatory. */
+module_init(sbdd_init);
+
+/* Called on module unloading. Unloading module is not allowed without it. */
+module_exit(sbdd_exit);
+
+/* Set desired capacity with insmod */
+module_param_named(capacity_mib, __sbdd_capacity_mib, ulong, S_IRUGO);
+
+/* Note for the kernel: a free license module. A warning will be outputted without it. */
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Simple Block Device Driver");