I want to write a simple Flash Translation Layer Overlay for block devices
commit
c0aa4d19cb
|
@ -0,0 +1,5 @@
|
|||
obj-m := sftl.o
|
||||
KDIR := /lib/modules/$(shell uname -r)/build
|
||||
PWD := $(shell pwd)
|
||||
default:
|
||||
$(MAKE) -C $(KDIR) SUBDIRS=$(PWD) modules
|
|
@ -0,0 +1,60 @@
|
|||
Stupid Translation Layer:
|
||||
|
||||
mapping = 16b:
|
||||
4b magic
|
||||
4b block number
|
||||
4b version number
|
||||
4b crc32
|
||||
|
||||
[block] phys block = 512
|
||||
[cluster] mapping unit = 4096? = X phys blocks, X=8
|
||||
index block = phys block
|
||||
N = index block size / 16 = phys block size / 16 = 32
|
||||
[segment] sequence of N mapping units and 1 physical block
|
||||
[erase unit] device erase unit (or management unit in case of FTLed flash like USB/SD)
|
||||
|
||||
* maintain block mappings in RAM
|
||||
* reserve at least N*(N*X+1) phys blocks for defragmentation
|
||||
* scan each (N*X+1)th device block during mount
|
||||
* => for the case of 512 byte sector mappings will eat 128MB of 4GB flash, plus 528KB reserved space
|
||||
* => for 4096b sector AND 4096b index block mappings = 16MB/4GB, but reserved space = 256MB!!!
|
||||
* => for 4096b sector and 512b index block mappings = 16MB/4GB, reserved space = 4MB
|
||||
* first just write next available map unit
|
||||
* commit mappings each N blocks or each 1 second
|
||||
* mark blocks having old version numbers as unused (only in RAM, do not touch the flash itself!)
|
||||
* N unused blocks = "free block sequence"
|
||||
* When we wrap around the ring buffer end, we must find free place to continue writing.
|
||||
(and ideally it should be exactly after the previous end). There will always be enough
|
||||
reserved space to move blocks, because each partially occupied segment has at least 1
|
||||
free block, and we have N segments reserved. We just find first available segments that
|
||||
have at least N free blocks in total, and move them to reserved space. If there is an
|
||||
offset between first moved block and the previous end of ring buffer, we decide between
|
||||
moving or skipping blocks based on <skip cost> and <full move cost>.
|
||||
For example, if the offset to first partially free segment is VERY BIG, we won't move anything.
|
||||
But we ALWAYS take first available partially free segments - because the increasing offset
|
||||
cost is almost always greater than the decrease of moving cost.
|
||||
|
||||
Here are the cleaning costs:
|
||||
Cost of skipping some segments is determined by the idea that else we could write
|
||||
them and gain more performance (totally true for FTLed devices like USB flash drive or SD
|
||||
card; but the expression differs for raw NAND).
|
||||
|
||||
E = erase unit size in blocks
|
||||
L = number of last written segment
|
||||
O = number of first moved segment
|
||||
S = N*X+1 = segment size in blocks
|
||||
<move cost> = (blocks occupied in sequence)*(READ + WRITE) + min(<skip cost>, <full move cost>)
|
||||
<full move cost> = (O-L)*S*(READ + WRITE)
|
||||
<skip cost> = WRITE*(int(O*S/E) > int(L*S/E) ? (E-(L*S)%E) + ((O*S)%E) : S*(O-L))
|
||||
<skip cost for raw NAND> = WRITE*(int(O*S/E) > int(L*S/E) ? ((O*S)%E) : 0)
|
||||
|
||||
Data structures:
|
||||
* Mapping/version array: 8b * block count = 8MB for 4GB flash and 4096/512 map/phys sizes
|
||||
* Next block pointer: exactly 1 integer because STL flash is a ring buffer
|
||||
filled with number of first free block followed or included in an empty sequence
|
||||
|
||||
USB flash read/write cost:
|
||||
* Write <8Kb cost = 4 * (Random read <8Kb cost)
|
||||
* Write >=16Kb cost = 2 * (Random read >=16Kb cost)
|
||||
* Best speed is achieved with I/O size >=16Kb, ideally 32Kb; bigger values aren't that better.
|
||||
* Erase unit is usually around 1MB
|
|
@ -0,0 +1,175 @@
|
|||
/*
|
||||
* A sample, extra-simple block driver. Updated for kernel 2.6.31.
|
||||
*
|
||||
* (C) 2003 Eklektix, Inc.
|
||||
* (C) 2010 Pat Patterson <pat at superpat dot com>
|
||||
* Redistributable under the terms of the GNU GPL.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/init.h>
|
||||
|
||||
#include <linux/kernel.h> /* printk() */
|
||||
#include <linux/fs.h> /* everything... */
|
||||
#include <linux/errno.h> /* error codes */
|
||||
#include <linux/types.h> /* size_t */
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/genhd.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/hdreg.h>
|
||||
|
||||
MODULE_LICENSE("Dual BSD/GPL");
|
||||
static char *Version = "1.4";
|
||||
|
||||
static int major_num = 0;
|
||||
module_param(major_num, int, 0);
|
||||
static int logical_block_size = 512;
|
||||
module_param(logical_block_size, int, 0);
|
||||
static int nsectors = 1024; /* How big the drive is */
|
||||
module_param(nsectors, int, 0);
|
||||
|
||||
/*
|
||||
* We can tweak our hardware sector size, but the kernel talks to us
|
||||
* in terms of small sectors, always.
|
||||
*/
|
||||
#define KERNEL_SECTOR_SIZE 512
|
||||
|
||||
/*
|
||||
* Our request queue.
|
||||
*/
|
||||
static struct request_queue *Queue;
|
||||
|
||||
/*
|
||||
* The internal representation of our device.
|
||||
*/
|
||||
static struct sbd_device {
|
||||
unsigned long size;
|
||||
spinlock_t lock;
|
||||
u8 *data;
|
||||
struct gendisk *gd;
|
||||
} Device;
|
||||
|
||||
/*
|
||||
* Handle an I/O request.
|
||||
*/
|
||||
static void sbd_transfer(struct sbd_device *dev, sector_t sector,
|
||||
unsigned long nsect, char *buffer, int write) {
|
||||
unsigned long offset = sector * logical_block_size;
|
||||
unsigned long nbytes = nsect * logical_block_size;
|
||||
|
||||
if ((offset + nbytes) > dev->size) {
|
||||
printk (KERN_NOTICE "sbd: Beyond-end write (%ld %ld)\n", offset, nbytes);
|
||||
return;
|
||||
}
|
||||
if (write)
|
||||
memcpy(dev->data + offset, buffer, nbytes);
|
||||
else
|
||||
memcpy(buffer, dev->data + offset, nbytes);
|
||||
}
|
||||
|
||||
static void sbd_request(struct request_queue *q) {
|
||||
struct request *req;
|
||||
|
||||
req = blk_fetch_request(q);
|
||||
while (req != NULL) {
|
||||
// blk_fs_request() was removed in 2.6.36 - many thanks to
|
||||
// Christian Paro for the heads up and fix...
|
||||
//if (!blk_fs_request(req)) {
|
||||
if (req == NULL || (req->cmd_type != REQ_TYPE_FS)) {
|
||||
printk (KERN_NOTICE "Skip non-CMD request\n");
|
||||
__blk_end_request_all(req, -EIO);
|
||||
continue;
|
||||
}
|
||||
sbd_transfer(&Device, blk_rq_pos(req), blk_rq_cur_sectors(req),
|
||||
req->buffer, rq_data_dir(req));
|
||||
if ( ! __blk_end_request_cur(req, 0) ) {
|
||||
req = blk_fetch_request(q);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The HDIO_GETGEO ioctl is handled in blkdev_ioctl(), which
|
||||
* calls this. We need to implement getgeo, since we can't
|
||||
* use tools such as fdisk to partition the drive otherwise.
|
||||
*/
|
||||
int sbd_getgeo(struct block_device * block_device, struct hd_geometry * geo) {
|
||||
long size;
|
||||
|
||||
/* We have no real geometry, of course, so make something up. */
|
||||
size = Device.size * (logical_block_size / KERNEL_SECTOR_SIZE);
|
||||
geo->cylinders = (size & ~0x3f) >> 6;
|
||||
geo->heads = 4;
|
||||
geo->sectors = 16;
|
||||
geo->start = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The device operations structure.
|
||||
*/
|
||||
static struct block_device_operations sbd_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.getgeo = sbd_getgeo
|
||||
};
|
||||
|
||||
static int __init sbd_init(void) {
|
||||
/*
|
||||
* Set up our internal device.
|
||||
*/
|
||||
Device.size = nsectors * logical_block_size;
|
||||
spin_lock_init(&Device.lock);
|
||||
Device.data = vmalloc(Device.size);
|
||||
if (Device.data == NULL)
|
||||
return -ENOMEM;
|
||||
/*
|
||||
* Get a request queue.
|
||||
*/
|
||||
Queue = blk_init_queue(sbd_request, &Device.lock);
|
||||
if (Queue == NULL)
|
||||
goto out;
|
||||
blk_queue_logical_block_size(Queue, logical_block_size);
|
||||
/*
|
||||
* Get registered.
|
||||
*/
|
||||
major_num = register_blkdev(major_num, "sbd");
|
||||
if (major_num < 0) {
|
||||
printk(KERN_WARNING "sbd: unable to get major number\n");
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
* And the gendisk structure.
|
||||
*/
|
||||
Device.gd = alloc_disk(16);
|
||||
if (!Device.gd)
|
||||
goto out_unregister;
|
||||
Device.gd->major = major_num;
|
||||
Device.gd->first_minor = 0;
|
||||
Device.gd->fops = &sbd_ops;
|
||||
Device.gd->private_data = &Device;
|
||||
strcpy(Device.gd->disk_name, "sbd0");
|
||||
set_capacity(Device.gd, nsectors);
|
||||
Device.gd->queue = Queue;
|
||||
add_disk(Device.gd);
|
||||
|
||||
return 0;
|
||||
|
||||
out_unregister:
|
||||
unregister_blkdev(major_num, "sbd");
|
||||
out:
|
||||
vfree(Device.data);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static void __exit sbd_exit(void)
|
||||
{
|
||||
del_gendisk(Device.gd);
|
||||
put_disk(Device.gd);
|
||||
unregister_blkdev(major_num, "sbd");
|
||||
blk_cleanup_queue(Queue);
|
||||
vfree(Device.data);
|
||||
}
|
||||
|
||||
module_init(sbd_init);
|
||||
module_exit(sbd_exit);
|
Loading…
Reference in New Issue