mke2fs: New bitmap and inode table allocation for FLEX_BG

Change the way we allocate bitmaps and inode tables if the FLEX_BG
feature is used at mke2fs time.  It places calculates a new offset for
bitmaps and inode table base on the number of groups that the user
wishes to pack together using the new "-G" option.  Creating a
filesystem with 64 block groups in a flex group can be done by:

mke2fs -j -I 256 -O flex_bg -G 32 /dev/sdX

Signed-off-by: Jose R. Santos <jrs@us.ibm.com>
Signed-off-by: Valerie Clement <valerie.clement@bull.net>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
bitmap-optimize
Theodore Ts'o 2008-04-22 08:27:01 -04:00
parent 494a1daad3
commit 9ba400027f
5 changed files with 178 additions and 6 deletions

View File

@ -27,18 +27,80 @@
#include "ext2_fs.h" #include "ext2_fs.h"
#include "ext2fs.h" #include "ext2fs.h"
/*
* This routine searches for free blocks that can allocate a full
* group of bitmaps or inode tables for a flexbg group. Returns the
* block number with a correct offset were the bitmaps and inode
* tables can be allocated continously and in order.
*/
static blk_t flexbg_offset(ext2_filsys fs, dgrp_t group, blk_t start_blk,
ext2fs_block_bitmap bmap, int offset, int size)
{
int flexbg, flexbg_size, elem_size;
blk_t last_blk, first_free = 0;
dgrp_t last_grp;
flexbg_size = 1 << fs->super->s_log_groups_per_flex;
flexbg = group / flexbg_size;
if (size > fs->super->s_blocks_per_group / 8)
size = fs->super->s_blocks_per_group / 8;
/*
* Dont do a long search if the previous block
* search is still valid.
*/
if (start_blk && group % flexbg_size) {
if (size > flexbg_size)
elem_size = fs->inode_blocks_per_group;
else
elem_size = 1;
if (ext2fs_test_block_bitmap_range(bmap, start_blk + elem_size,
size))
return start_blk + elem_size;
}
start_blk = ext2fs_group_first_block(fs, flexbg_size * flexbg);
last_grp = group | (flexbg_size - 1);
if (last_grp > fs->group_desc_count)
last_grp = fs->group_desc_count;
last_blk = ext2fs_group_last_block(fs, last_grp);
/* Find the first available block */
if (ext2fs_get_free_blocks(fs, start_blk, last_blk, 1, bmap,
&first_free))
return first_free;
if (ext2fs_get_free_blocks(fs, first_free + offset, last_blk, size,
bmap, &first_free))
return first_free;
return first_free;
}
errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group, errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
ext2fs_block_bitmap bmap) ext2fs_block_bitmap bmap)
{ {
errcode_t retval; errcode_t retval;
blk_t group_blk, start_blk, last_blk, new_blk, blk; blk_t group_blk, start_blk, last_blk, new_blk, blk;
int j; dgrp_t last_grp;
int j, rem_grps, flexbg_size = 0;
group_blk = ext2fs_group_first_block(fs, group); group_blk = ext2fs_group_first_block(fs, group);
last_blk = ext2fs_group_last_block(fs, group); last_blk = ext2fs_group_last_block(fs, group);
if (!bmap) if (!bmap)
bmap = fs->block_map; bmap = fs->block_map;
if (EXT2_HAS_INCOMPAT_FEATURE(fs->super,
EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
fs->super->s_log_groups_per_flex) {
flexbg_size = 1 << fs->super->s_log_groups_per_flex;
last_grp = group | (flexbg_size - 1);
rem_grps = last_grp - group;
if (last_grp > fs->group_desc_count)
last_grp = fs->group_desc_count;
}
/* /*
* Allocate the block and inode bitmaps, if necessary * Allocate the block and inode bitmaps, if necessary
@ -56,6 +118,15 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
} else } else
start_blk = group_blk; start_blk = group_blk;
if (flexbg_size) {
int prev_block = 0;
if (group && fs->group_desc[group-1].bg_block_bitmap)
prev_block = fs->group_desc[group-1].bg_block_bitmap;
start_blk = flexbg_offset(fs, group, prev_block, bmap,
0, rem_grps);
last_blk = ext2fs_group_last_block(fs, last_grp);
}
if (!fs->group_desc[group].bg_block_bitmap) { if (!fs->group_desc[group].bg_block_bitmap) {
retval = ext2fs_get_free_blocks(fs, start_blk, last_blk, retval = ext2fs_get_free_blocks(fs, start_blk, last_blk,
1, bmap, &new_blk); 1, bmap, &new_blk);
@ -66,6 +137,22 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
return retval; return retval;
ext2fs_mark_block_bitmap(bmap, new_blk); ext2fs_mark_block_bitmap(bmap, new_blk);
fs->group_desc[group].bg_block_bitmap = new_blk; fs->group_desc[group].bg_block_bitmap = new_blk;
if (flexbg_size) {
dgrp_t gr = ext2fs_group_of_blk(fs, new_blk);
fs->group_desc[gr].bg_free_blocks_count--;
fs->super->s_free_blocks_count--;
fs->group_desc[gr].bg_flags &= ~EXT2_BG_BLOCK_UNINIT;
ext2fs_group_desc_csum_set(fs, gr);
}
}
if (flexbg_size) {
int prev_block = 0;
if (group && fs->group_desc[group-1].bg_inode_bitmap)
prev_block = fs->group_desc[group-1].bg_inode_bitmap;
start_blk = flexbg_offset(fs, group, prev_block, bmap,
flexbg_size, rem_grps);
last_blk = ext2fs_group_last_block(fs, last_grp);
} }
if (!fs->group_desc[group].bg_inode_bitmap) { if (!fs->group_desc[group].bg_inode_bitmap) {
@ -78,11 +165,29 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
return retval; return retval;
ext2fs_mark_block_bitmap(bmap, new_blk); ext2fs_mark_block_bitmap(bmap, new_blk);
fs->group_desc[group].bg_inode_bitmap = new_blk; fs->group_desc[group].bg_inode_bitmap = new_blk;
if (flexbg_size) {
dgrp_t gr = ext2fs_group_of_blk(fs, new_blk);
fs->group_desc[gr].bg_free_blocks_count--;
fs->super->s_free_blocks_count--;
fs->group_desc[gr].bg_flags &= ~EXT2_BG_BLOCK_UNINIT;
ext2fs_group_desc_csum_set(fs, gr);
}
} }
/* /*
* Allocate the inode table * Allocate the inode table
*/ */
if (flexbg_size) {
int prev_block = 0;
if (group && fs->group_desc[group-1].bg_inode_table)
prev_block = fs->group_desc[group-1].bg_inode_table;
group_blk = flexbg_offset(fs, group, prev_block, bmap,
flexbg_size * 2,
fs->inode_blocks_per_group *
rem_grps);
last_blk = ext2fs_group_last_block(fs, last_grp);
}
if (!fs->group_desc[group].bg_inode_table) { if (!fs->group_desc[group].bg_inode_table) {
retval = ext2fs_get_free_blocks(fs, group_blk, last_blk, retval = ext2fs_get_free_blocks(fs, group_blk, last_blk,
fs->inode_blocks_per_group, fs->inode_blocks_per_group,
@ -91,12 +196,19 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
return retval; return retval;
for (j=0, blk = new_blk; for (j=0, blk = new_blk;
j < fs->inode_blocks_per_group; j < fs->inode_blocks_per_group;
j++, blk++) j++, blk++) {
ext2fs_mark_block_bitmap(bmap, blk); ext2fs_mark_block_bitmap(bmap, blk);
if (flexbg_size) {
dgrp_t gr = ext2fs_group_of_blk(fs, blk);
fs->group_desc[gr].bg_free_blocks_count--;
fs->super->s_free_blocks_count--;
fs->group_desc[gr].bg_flags &= ~EXT2_BG_BLOCK_UNINIT;
ext2fs_group_desc_csum_set(fs, gr);
}
}
fs->group_desc[group].bg_inode_table = new_blk; fs->group_desc[group].bg_inode_table = new_blk;
} }
ext2fs_group_desc_csum_set(fs, group); ext2fs_group_desc_csum_set(fs, group);
return 0; return 0;
} }

View File

@ -159,6 +159,7 @@ errcode_t ext2fs_initialize(const char *name, int flags,
set_field(s_first_meta_bg, 0); set_field(s_first_meta_bg, 0);
set_field(s_raid_stride, 0); /* default stride size: 0 */ set_field(s_raid_stride, 0); /* default stride size: 0 */
set_field(s_raid_stripe_width, 0); /* default stripe width: 0 */ set_field(s_raid_stripe_width, 0); /* default stripe width: 0 */
set_field(s_log_groups_per_flex, 0);
set_field(s_flags, 0); set_field(s_flags, 0);
if (super->s_feature_incompat & ~EXT2_LIB_FEATURE_INCOMPAT_SUPP) { if (super->s_feature_incompat & ~EXT2_LIB_FEATURE_INCOMPAT_SUPP) {
retval = EXT2_ET_UNSUPP_FEATURE; retval = EXT2_ET_UNSUPP_FEATURE;
@ -377,6 +378,10 @@ ipg_retry:
* Note that although the block bitmap, inode bitmap, and * Note that although the block bitmap, inode bitmap, and
* inode table have not been allocated (and in fact won't be * inode table have not been allocated (and in fact won't be
* by this routine), they are accounted for nevertheless. * by this routine), they are accounted for nevertheless.
*
* If FLEX_BG meta-data grouping is used, only account for the
* superblock and group descriptors (the inode tables and
* bitmaps will be accounted for when allocated).
*/ */
super->s_free_blocks_count = 0; super->s_free_blocks_count = 0;
csum_flag = EXT2_HAS_RO_COMPAT_FEATURE(fs->super, csum_flag = EXT2_HAS_RO_COMPAT_FEATURE(fs->super,
@ -397,6 +402,8 @@ ipg_retry:
fs->group_desc[i].bg_itable_unused = numblocks; fs->group_desc[i].bg_itable_unused = numblocks;
} }
numblocks = ext2fs_reserve_super_and_bgd(fs, i, fs->block_map); numblocks = ext2fs_reserve_super_and_bgd(fs, i, fs->block_map);
if (fs->super->s_log_groups_per_flex)
numblocks += 2 + fs->inode_blocks_per_group;
super->s_free_blocks_count += numblocks; super->s_free_blocks_count += numblocks;
fs->group_desc[i].bg_free_blocks_count = numblocks; fs->group_desc[i].bg_free_blocks_count = numblocks;

View File

@ -26,6 +26,10 @@ mke2fs \- create an ext2/ext3 filesystem
.I blocks-per-group .I blocks-per-group
] ]
[ [
.B \-G
.I number-of-groups
]
[
.B \-i .B \-i
.I bytes-per-inode .I bytes-per-inode
] ]
@ -245,6 +249,13 @@ option rather than manipulating the number of blocks per group.)
This option is generally used by developers who This option is generally used by developers who
are developing test cases. are developing test cases.
.TP .TP
.BI \-G " number-of-groups"
Specify the number of block goups that will be packed together to
create one large virtual block group on an ext4 filesystem. This
improves meta-data locality and performance on meta-data heavy
workloads. The number of goups must be a power of 2 and may only be
specified if the flex_bg filesystem feature is enabled.
.TP
.BI \-i " bytes-per-inode" .BI \-i " bytes-per-inode"
Specify the bytes/inode ratio. Specify the bytes/inode ratio.
.B mke2fs .B mke2fs
@ -445,6 +456,11 @@ Use hashed b-trees to speed up lookups in large directories.
.B filetype .B filetype
Store file type information in directory entries. Store file type information in directory entries.
.TP .TP
.B flex_bg
Allow bitmaps and inode tables for a block group to be placed anywhere
on the storage media (use with -G option to group meta-data in order
to create a large virtual block group).
.TP
.B has_journal .B has_journal
Create an ext3 journal (as if using the Create an ext3 journal (as if using the
.B \-j .B \-j

View File

@ -98,8 +98,9 @@ static void usage(void)
fprintf(stderr, _("Usage: %s [-c|-l filename] [-b block-size] " fprintf(stderr, _("Usage: %s [-c|-l filename] [-b block-size] "
"[-f fragment-size]\n\t[-i bytes-per-inode] [-I inode-size] " "[-f fragment-size]\n\t[-i bytes-per-inode] [-I inode-size] "
"[-J journal-options]\n" "[-J journal-options]\n"
"\t[-N number-of-inodes] [-m reserved-blocks-percentage] " "\t[-G meta group size] [-N number-of-inodes]\n"
"[-o creator-os]\n\t[-g blocks-per-group] [-L volume-label] " "\t[-m reserved-blocks-percentage] [-o creator-os]\n"
"\t[-g blocks-per-group] [-L volume-label] "
"[-M last-mounted-directory]\n\t[-O feature[,...]] " "[-M last-mounted-directory]\n\t[-O feature[,...]] "
"[-r fs-revision] [-E extended-option[,...]]\n" "[-r fs-revision] [-E extended-option[,...]]\n"
"\t[-T fs-type] [-jnqvFSV] device [blocks-count]\n"), "\t[-T fs-type] [-jnqvFSV] device [blocks-count]\n"),
@ -1105,6 +1106,7 @@ static void PRS(int argc, char *argv[])
int blocksize = 0; int blocksize = 0;
int inode_ratio = 0; int inode_ratio = 0;
int inode_size = 0; int inode_size = 0;
unsigned long flex_bg_size = 0;
double reserved_ratio = 5.0; double reserved_ratio = 5.0;
int sector_size = 0; int sector_size = 0;
int show_version_only = 0; int show_version_only = 0;
@ -1189,7 +1191,7 @@ static void PRS(int argc, char *argv[])
} }
while ((c = getopt (argc, argv, while ((c = getopt (argc, argv,
"b:cf:g:i:jl:m:no:qr:s:t:vE:FI:J:L:M:N:O:R:ST:V")) != EOF) { "b:cf:g:G:i:jl:m:no:qr:s:t:vE:FI:J:L:M:N:O:R:ST:V")) != EOF) {
switch (c) { switch (c) {
case 'b': case 'b':
blocksize = strtol(optarg, &tmp, 0); blocksize = strtol(optarg, &tmp, 0);
@ -1239,6 +1241,20 @@ static void PRS(int argc, char *argv[])
exit(1); exit(1);
} }
break; break;
case 'G':
flex_bg_size = strtoul(optarg, &tmp, 0);
if (*tmp) {
com_err(program_name, 0,
_("Illegal number for flex_bg size"));
exit(1);
}
if (flex_bg_size < 2 ||
(flex_bg_size & (flex_bg_size-1)) != 0) {
com_err(program_name, 0,
_("flex_bg size must be a power of 2"));
exit(1);
}
break;
case 'i': case 'i':
inode_ratio = strtoul(optarg, &tmp, 0); inode_ratio = strtoul(optarg, &tmp, 0);
if (inode_ratio < EXT2_MIN_BLOCK_SIZE || if (inode_ratio < EXT2_MIN_BLOCK_SIZE ||
@ -1647,6 +1663,20 @@ static void PRS(int argc, char *argv[])
if (inode_size == 0) if (inode_size == 0)
inode_size = get_int_from_profile(fs_types, "inode_size", 0); inode_size = get_int_from_profile(fs_types, "inode_size", 0);
if (!flex_bg_size && (fs_param.s_feature_incompat &
EXT4_FEATURE_INCOMPAT_FLEX_BG))
flex_bg_size = get_int_from_profile(fs_types,
"flex_bg_size", 16);
if (flex_bg_size) {
if (!(fs_param.s_feature_incompat &
EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
com_err(program_name, 0,
_("Flex_bg feature not enabled, so "
"flex_bg size may not be specified"));
exit(1);
}
fs_param.s_log_groups_per_flex = int_log2(flex_bg_size);
}
if (inode_size && fs_param.s_rev_level >= EXT2_DYNAMIC_REV) { if (inode_size && fs_param.s_rev_level >= EXT2_DYNAMIC_REV) {
if (inode_size < EXT2_GOOD_OLD_INODE_SIZE || if (inode_size < EXT2_GOOD_OLD_INODE_SIZE ||

View File

@ -301,6 +301,13 @@ specify one on the command line.
.I inode_size .I inode_size
This relation specifies the default inode size if the user does not This relation specifies the default inode size if the user does not
specify one on the command line. specify one on the command line.
.TP
.I flex_bg_size
This relation specifies the number of block goups that will be packed
together to create one large virtual block group on an ext4 filesystem.
This improves meta-data locality and performance on meta-data heavy
workloads. The number of goups must be a power of 2 and may only be
specified if the flex_bg filesystem feature is enabled.
.SH FILES .SH FILES
.TP .TP
.I /etc/mke2fs.conf .I /etc/mke2fs.conf