From c0aa4d19cbb18d38b9cc05299e529c838aade82e Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Thu, 9 May 2013 03:23:34 +0400 Subject: [PATCH] I want to write a simple Flash Translation Layer Overlay for block devices --- Makefile | 5 ++ STL | 60 +++++++++++++++++++ sftl.c | 175 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 240 insertions(+) create mode 100644 Makefile create mode 100644 STL create mode 100644 sftl.c diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5055ba5 --- /dev/null +++ b/Makefile @@ -0,0 +1,5 @@ +obj-m := sftl.o +KDIR := /lib/modules/$(shell uname -r)/build +PWD := $(shell pwd) +default: + $(MAKE) -C $(KDIR) SUBDIRS=$(PWD) modules diff --git a/STL b/STL new file mode 100644 index 0000000..2f03055 --- /dev/null +++ b/STL @@ -0,0 +1,60 @@ +Stupid Translation Layer: + +mapping = 16b: +4b magic +4b block number +4b version number +4b crc32 + +[block] phys block = 512 +[cluster] mapping unit = 4096? = X phys blocks, X=8 +index block = phys block +N = index block size / 16 = phys block size / 16 = 32 +[segment] sequence of N mapping units and 1 physical block +[erase unit] device erase unit (or management unit in case of FTLed flash like USB/SD) + +* maintain block mappings in RAM +* reserve at least N*(N*X+1) phys blocks for defragmentation +* scan each (N*X+1)th device block during mount +* => for the case of 512 byte sector mappings will eat 128MB of 4GB flash, plus 528KB reserved space +* => for 4096b sector AND 4096b index block mappings = 16MB/4GB, but reserved space = 256MB!!! +* => for 4096b sector and 512b index block mappings = 16MB/4GB, reserved space = 4MB +* first just write next available map unit +* commit mappings each N blocks or each 1 second +* mark blocks having old version numbers as unused (only in RAM, do not touch the flash itself!) +* N unused blocks = "free block sequence" +* When we wrap around the ring buffer end, we must find free place to continue writing. + (and ideally it should be exactly after the previous end). There will always be enough + reserved space to move blocks, because each partially occupied segment has at least 1 + free block, and we have N segments reserved. We just find first available segments that + have at least N free blocks in total, and move them to reserved space. If there is an + offset between first moved block and the previous end of ring buffer, we decide between + moving or skipping blocks based on and . + For example, if the offset to first partially free segment is VERY BIG, we won't move anything. + But we ALWAYS take first available partially free segments - because the increasing offset + cost is almost always greater than the decrease of moving cost. + + Here are the cleaning costs: + Cost of skipping some segments is determined by the idea that else we could write + them and gain more performance (totally true for FTLed devices like USB flash drive or SD + card; but the expression differs for raw NAND). + + E = erase unit size in blocks + L = number of last written segment + O = number of first moved segment + S = N*X+1 = segment size in blocks + = (blocks occupied in sequence)*(READ + WRITE) + min(, ) + = (O-L)*S*(READ + WRITE) + = WRITE*(int(O*S/E) > int(L*S/E) ? (E-(L*S)%E) + ((O*S)%E) : S*(O-L)) + = WRITE*(int(O*S/E) > int(L*S/E) ? ((O*S)%E) : 0) + +Data structures: +* Mapping/version array: 8b * block count = 8MB for 4GB flash and 4096/512 map/phys sizes +* Next block pointer: exactly 1 integer because STL flash is a ring buffer + filled with number of first free block followed or included in an empty sequence + +USB flash read/write cost: +* Write <8Kb cost = 4 * (Random read <8Kb cost) +* Write >=16Kb cost = 2 * (Random read >=16Kb cost) +* Best speed is achieved with I/O size >=16Kb, ideally 32Kb; bigger values aren't that better. +* Erase unit is usually around 1MB diff --git a/sftl.c b/sftl.c new file mode 100644 index 0000000..93b0c37 --- /dev/null +++ b/sftl.c @@ -0,0 +1,175 @@ +/* + * A sample, extra-simple block driver. Updated for kernel 2.6.31. + * + * (C) 2003 Eklektix, Inc. + * (C) 2010 Pat Patterson + * Redistributable under the terms of the GNU GPL. + */ + +#include +#include +#include + +#include /* printk() */ +#include /* everything... */ +#include /* error codes */ +#include /* size_t */ +#include +#include +#include +#include + +MODULE_LICENSE("Dual BSD/GPL"); +static char *Version = "1.4"; + +static int major_num = 0; +module_param(major_num, int, 0); +static int logical_block_size = 512; +module_param(logical_block_size, int, 0); +static int nsectors = 1024; /* How big the drive is */ +module_param(nsectors, int, 0); + +/* + * We can tweak our hardware sector size, but the kernel talks to us + * in terms of small sectors, always. + */ +#define KERNEL_SECTOR_SIZE 512 + +/* + * Our request queue. + */ +static struct request_queue *Queue; + +/* + * The internal representation of our device. + */ +static struct sbd_device { + unsigned long size; + spinlock_t lock; + u8 *data; + struct gendisk *gd; +} Device; + +/* + * Handle an I/O request. + */ +static void sbd_transfer(struct sbd_device *dev, sector_t sector, + unsigned long nsect, char *buffer, int write) { + unsigned long offset = sector * logical_block_size; + unsigned long nbytes = nsect * logical_block_size; + + if ((offset + nbytes) > dev->size) { + printk (KERN_NOTICE "sbd: Beyond-end write (%ld %ld)\n", offset, nbytes); + return; + } + if (write) + memcpy(dev->data + offset, buffer, nbytes); + else + memcpy(buffer, dev->data + offset, nbytes); +} + +static void sbd_request(struct request_queue *q) { + struct request *req; + + req = blk_fetch_request(q); + while (req != NULL) { + // blk_fs_request() was removed in 2.6.36 - many thanks to + // Christian Paro for the heads up and fix... + //if (!blk_fs_request(req)) { + if (req == NULL || (req->cmd_type != REQ_TYPE_FS)) { + printk (KERN_NOTICE "Skip non-CMD request\n"); + __blk_end_request_all(req, -EIO); + continue; + } + sbd_transfer(&Device, blk_rq_pos(req), blk_rq_cur_sectors(req), + req->buffer, rq_data_dir(req)); + if ( ! __blk_end_request_cur(req, 0) ) { + req = blk_fetch_request(q); + } + } +} + +/* + * The HDIO_GETGEO ioctl is handled in blkdev_ioctl(), which + * calls this. We need to implement getgeo, since we can't + * use tools such as fdisk to partition the drive otherwise. + */ +int sbd_getgeo(struct block_device * block_device, struct hd_geometry * geo) { + long size; + + /* We have no real geometry, of course, so make something up. */ + size = Device.size * (logical_block_size / KERNEL_SECTOR_SIZE); + geo->cylinders = (size & ~0x3f) >> 6; + geo->heads = 4; + geo->sectors = 16; + geo->start = 0; + return 0; +} + +/* + * The device operations structure. + */ +static struct block_device_operations sbd_ops = { + .owner = THIS_MODULE, + .getgeo = sbd_getgeo +}; + +static int __init sbd_init(void) { + /* + * Set up our internal device. + */ + Device.size = nsectors * logical_block_size; + spin_lock_init(&Device.lock); + Device.data = vmalloc(Device.size); + if (Device.data == NULL) + return -ENOMEM; + /* + * Get a request queue. + */ + Queue = blk_init_queue(sbd_request, &Device.lock); + if (Queue == NULL) + goto out; + blk_queue_logical_block_size(Queue, logical_block_size); + /* + * Get registered. + */ + major_num = register_blkdev(major_num, "sbd"); + if (major_num < 0) { + printk(KERN_WARNING "sbd: unable to get major number\n"); + goto out; + } + /* + * And the gendisk structure. + */ + Device.gd = alloc_disk(16); + if (!Device.gd) + goto out_unregister; + Device.gd->major = major_num; + Device.gd->first_minor = 0; + Device.gd->fops = &sbd_ops; + Device.gd->private_data = &Device; + strcpy(Device.gd->disk_name, "sbd0"); + set_capacity(Device.gd, nsectors); + Device.gd->queue = Queue; + add_disk(Device.gd); + + return 0; + +out_unregister: + unregister_blkdev(major_num, "sbd"); +out: + vfree(Device.data); + return -ENOMEM; +} + +static void __exit sbd_exit(void) +{ + del_gendisk(Device.gd); + put_disk(Device.gd); + unregister_blkdev(major_num, "sbd"); + blk_cleanup_queue(Queue); + vfree(Device.data); +} + +module_init(sbd_init); +module_exit(sbd_exit);