mirror of https://github.com/vitalif/e2fsprogs
mke2fs: New bitmap and inode table allocation for FLEX_BG
Change the way we allocate bitmaps and inode tables if the FLEX_BG feature is used at mke2fs time. It places calculates a new offset for bitmaps and inode table base on the number of groups that the user wishes to pack together using the new "-G" option. Creating a filesystem with 64 block groups in a flex group can be done by: mke2fs -j -I 256 -O flex_bg -G 32 /dev/sdX Signed-off-by: Jose R. Santos <jrs@us.ibm.com> Signed-off-by: Valerie Clement <valerie.clement@bull.net> Signed-off-by: Theodore Ts'o <tytso@mit.edu>bitmap-optimize
parent
494a1daad3
commit
9ba400027f
|
@ -27,18 +27,80 @@
|
||||||
#include "ext2_fs.h"
|
#include "ext2_fs.h"
|
||||||
#include "ext2fs.h"
|
#include "ext2fs.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This routine searches for free blocks that can allocate a full
|
||||||
|
* group of bitmaps or inode tables for a flexbg group. Returns the
|
||||||
|
* block number with a correct offset were the bitmaps and inode
|
||||||
|
* tables can be allocated continously and in order.
|
||||||
|
*/
|
||||||
|
static blk_t flexbg_offset(ext2_filsys fs, dgrp_t group, blk_t start_blk,
|
||||||
|
ext2fs_block_bitmap bmap, int offset, int size)
|
||||||
|
{
|
||||||
|
int flexbg, flexbg_size, elem_size;
|
||||||
|
blk_t last_blk, first_free = 0;
|
||||||
|
dgrp_t last_grp;
|
||||||
|
|
||||||
|
flexbg_size = 1 << fs->super->s_log_groups_per_flex;
|
||||||
|
flexbg = group / flexbg_size;
|
||||||
|
|
||||||
|
if (size > fs->super->s_blocks_per_group / 8)
|
||||||
|
size = fs->super->s_blocks_per_group / 8;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Dont do a long search if the previous block
|
||||||
|
* search is still valid.
|
||||||
|
*/
|
||||||
|
if (start_blk && group % flexbg_size) {
|
||||||
|
if (size > flexbg_size)
|
||||||
|
elem_size = fs->inode_blocks_per_group;
|
||||||
|
else
|
||||||
|
elem_size = 1;
|
||||||
|
if (ext2fs_test_block_bitmap_range(bmap, start_blk + elem_size,
|
||||||
|
size))
|
||||||
|
return start_blk + elem_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
start_blk = ext2fs_group_first_block(fs, flexbg_size * flexbg);
|
||||||
|
last_grp = group | (flexbg_size - 1);
|
||||||
|
if (last_grp > fs->group_desc_count)
|
||||||
|
last_grp = fs->group_desc_count;
|
||||||
|
last_blk = ext2fs_group_last_block(fs, last_grp);
|
||||||
|
|
||||||
|
/* Find the first available block */
|
||||||
|
if (ext2fs_get_free_blocks(fs, start_blk, last_blk, 1, bmap,
|
||||||
|
&first_free))
|
||||||
|
return first_free;
|
||||||
|
|
||||||
|
if (ext2fs_get_free_blocks(fs, first_free + offset, last_blk, size,
|
||||||
|
bmap, &first_free))
|
||||||
|
return first_free;
|
||||||
|
|
||||||
|
return first_free;
|
||||||
|
}
|
||||||
|
|
||||||
errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
|
errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
|
||||||
ext2fs_block_bitmap bmap)
|
ext2fs_block_bitmap bmap)
|
||||||
{
|
{
|
||||||
errcode_t retval;
|
errcode_t retval;
|
||||||
blk_t group_blk, start_blk, last_blk, new_blk, blk;
|
blk_t group_blk, start_blk, last_blk, new_blk, blk;
|
||||||
int j;
|
dgrp_t last_grp;
|
||||||
|
int j, rem_grps, flexbg_size = 0;
|
||||||
|
|
||||||
group_blk = ext2fs_group_first_block(fs, group);
|
group_blk = ext2fs_group_first_block(fs, group);
|
||||||
last_blk = ext2fs_group_last_block(fs, group);
|
last_blk = ext2fs_group_last_block(fs, group);
|
||||||
|
|
||||||
if (!bmap)
|
if (!bmap)
|
||||||
bmap = fs->block_map;
|
bmap = fs->block_map;
|
||||||
|
|
||||||
|
if (EXT2_HAS_INCOMPAT_FEATURE(fs->super,
|
||||||
|
EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
|
||||||
|
fs->super->s_log_groups_per_flex) {
|
||||||
|
flexbg_size = 1 << fs->super->s_log_groups_per_flex;
|
||||||
|
last_grp = group | (flexbg_size - 1);
|
||||||
|
rem_grps = last_grp - group;
|
||||||
|
if (last_grp > fs->group_desc_count)
|
||||||
|
last_grp = fs->group_desc_count;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate the block and inode bitmaps, if necessary
|
* Allocate the block and inode bitmaps, if necessary
|
||||||
|
@ -56,6 +118,15 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
|
||||||
} else
|
} else
|
||||||
start_blk = group_blk;
|
start_blk = group_blk;
|
||||||
|
|
||||||
|
if (flexbg_size) {
|
||||||
|
int prev_block = 0;
|
||||||
|
if (group && fs->group_desc[group-1].bg_block_bitmap)
|
||||||
|
prev_block = fs->group_desc[group-1].bg_block_bitmap;
|
||||||
|
start_blk = flexbg_offset(fs, group, prev_block, bmap,
|
||||||
|
0, rem_grps);
|
||||||
|
last_blk = ext2fs_group_last_block(fs, last_grp);
|
||||||
|
}
|
||||||
|
|
||||||
if (!fs->group_desc[group].bg_block_bitmap) {
|
if (!fs->group_desc[group].bg_block_bitmap) {
|
||||||
retval = ext2fs_get_free_blocks(fs, start_blk, last_blk,
|
retval = ext2fs_get_free_blocks(fs, start_blk, last_blk,
|
||||||
1, bmap, &new_blk);
|
1, bmap, &new_blk);
|
||||||
|
@ -66,6 +137,22 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
|
||||||
return retval;
|
return retval;
|
||||||
ext2fs_mark_block_bitmap(bmap, new_blk);
|
ext2fs_mark_block_bitmap(bmap, new_blk);
|
||||||
fs->group_desc[group].bg_block_bitmap = new_blk;
|
fs->group_desc[group].bg_block_bitmap = new_blk;
|
||||||
|
if (flexbg_size) {
|
||||||
|
dgrp_t gr = ext2fs_group_of_blk(fs, new_blk);
|
||||||
|
fs->group_desc[gr].bg_free_blocks_count--;
|
||||||
|
fs->super->s_free_blocks_count--;
|
||||||
|
fs->group_desc[gr].bg_flags &= ~EXT2_BG_BLOCK_UNINIT;
|
||||||
|
ext2fs_group_desc_csum_set(fs, gr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (flexbg_size) {
|
||||||
|
int prev_block = 0;
|
||||||
|
if (group && fs->group_desc[group-1].bg_inode_bitmap)
|
||||||
|
prev_block = fs->group_desc[group-1].bg_inode_bitmap;
|
||||||
|
start_blk = flexbg_offset(fs, group, prev_block, bmap,
|
||||||
|
flexbg_size, rem_grps);
|
||||||
|
last_blk = ext2fs_group_last_block(fs, last_grp);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!fs->group_desc[group].bg_inode_bitmap) {
|
if (!fs->group_desc[group].bg_inode_bitmap) {
|
||||||
|
@ -78,11 +165,29 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
|
||||||
return retval;
|
return retval;
|
||||||
ext2fs_mark_block_bitmap(bmap, new_blk);
|
ext2fs_mark_block_bitmap(bmap, new_blk);
|
||||||
fs->group_desc[group].bg_inode_bitmap = new_blk;
|
fs->group_desc[group].bg_inode_bitmap = new_blk;
|
||||||
|
if (flexbg_size) {
|
||||||
|
dgrp_t gr = ext2fs_group_of_blk(fs, new_blk);
|
||||||
|
fs->group_desc[gr].bg_free_blocks_count--;
|
||||||
|
fs->super->s_free_blocks_count--;
|
||||||
|
fs->group_desc[gr].bg_flags &= ~EXT2_BG_BLOCK_UNINIT;
|
||||||
|
ext2fs_group_desc_csum_set(fs, gr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate the inode table
|
* Allocate the inode table
|
||||||
*/
|
*/
|
||||||
|
if (flexbg_size) {
|
||||||
|
int prev_block = 0;
|
||||||
|
if (group && fs->group_desc[group-1].bg_inode_table)
|
||||||
|
prev_block = fs->group_desc[group-1].bg_inode_table;
|
||||||
|
group_blk = flexbg_offset(fs, group, prev_block, bmap,
|
||||||
|
flexbg_size * 2,
|
||||||
|
fs->inode_blocks_per_group *
|
||||||
|
rem_grps);
|
||||||
|
last_blk = ext2fs_group_last_block(fs, last_grp);
|
||||||
|
}
|
||||||
|
|
||||||
if (!fs->group_desc[group].bg_inode_table) {
|
if (!fs->group_desc[group].bg_inode_table) {
|
||||||
retval = ext2fs_get_free_blocks(fs, group_blk, last_blk,
|
retval = ext2fs_get_free_blocks(fs, group_blk, last_blk,
|
||||||
fs->inode_blocks_per_group,
|
fs->inode_blocks_per_group,
|
||||||
|
@ -91,12 +196,19 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
|
||||||
return retval;
|
return retval;
|
||||||
for (j=0, blk = new_blk;
|
for (j=0, blk = new_blk;
|
||||||
j < fs->inode_blocks_per_group;
|
j < fs->inode_blocks_per_group;
|
||||||
j++, blk++)
|
j++, blk++) {
|
||||||
ext2fs_mark_block_bitmap(bmap, blk);
|
ext2fs_mark_block_bitmap(bmap, blk);
|
||||||
|
if (flexbg_size) {
|
||||||
|
dgrp_t gr = ext2fs_group_of_blk(fs, blk);
|
||||||
|
fs->group_desc[gr].bg_free_blocks_count--;
|
||||||
|
fs->super->s_free_blocks_count--;
|
||||||
|
fs->group_desc[gr].bg_flags &= ~EXT2_BG_BLOCK_UNINIT;
|
||||||
|
ext2fs_group_desc_csum_set(fs, gr);
|
||||||
|
}
|
||||||
|
}
|
||||||
fs->group_desc[group].bg_inode_table = new_blk;
|
fs->group_desc[group].bg_inode_table = new_blk;
|
||||||
}
|
}
|
||||||
ext2fs_group_desc_csum_set(fs, group);
|
ext2fs_group_desc_csum_set(fs, group);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -159,6 +159,7 @@ errcode_t ext2fs_initialize(const char *name, int flags,
|
||||||
set_field(s_first_meta_bg, 0);
|
set_field(s_first_meta_bg, 0);
|
||||||
set_field(s_raid_stride, 0); /* default stride size: 0 */
|
set_field(s_raid_stride, 0); /* default stride size: 0 */
|
||||||
set_field(s_raid_stripe_width, 0); /* default stripe width: 0 */
|
set_field(s_raid_stripe_width, 0); /* default stripe width: 0 */
|
||||||
|
set_field(s_log_groups_per_flex, 0);
|
||||||
set_field(s_flags, 0);
|
set_field(s_flags, 0);
|
||||||
if (super->s_feature_incompat & ~EXT2_LIB_FEATURE_INCOMPAT_SUPP) {
|
if (super->s_feature_incompat & ~EXT2_LIB_FEATURE_INCOMPAT_SUPP) {
|
||||||
retval = EXT2_ET_UNSUPP_FEATURE;
|
retval = EXT2_ET_UNSUPP_FEATURE;
|
||||||
|
@ -377,6 +378,10 @@ ipg_retry:
|
||||||
* Note that although the block bitmap, inode bitmap, and
|
* Note that although the block bitmap, inode bitmap, and
|
||||||
* inode table have not been allocated (and in fact won't be
|
* inode table have not been allocated (and in fact won't be
|
||||||
* by this routine), they are accounted for nevertheless.
|
* by this routine), they are accounted for nevertheless.
|
||||||
|
*
|
||||||
|
* If FLEX_BG meta-data grouping is used, only account for the
|
||||||
|
* superblock and group descriptors (the inode tables and
|
||||||
|
* bitmaps will be accounted for when allocated).
|
||||||
*/
|
*/
|
||||||
super->s_free_blocks_count = 0;
|
super->s_free_blocks_count = 0;
|
||||||
csum_flag = EXT2_HAS_RO_COMPAT_FEATURE(fs->super,
|
csum_flag = EXT2_HAS_RO_COMPAT_FEATURE(fs->super,
|
||||||
|
@ -397,6 +402,8 @@ ipg_retry:
|
||||||
fs->group_desc[i].bg_itable_unused = numblocks;
|
fs->group_desc[i].bg_itable_unused = numblocks;
|
||||||
}
|
}
|
||||||
numblocks = ext2fs_reserve_super_and_bgd(fs, i, fs->block_map);
|
numblocks = ext2fs_reserve_super_and_bgd(fs, i, fs->block_map);
|
||||||
|
if (fs->super->s_log_groups_per_flex)
|
||||||
|
numblocks += 2 + fs->inode_blocks_per_group;
|
||||||
|
|
||||||
super->s_free_blocks_count += numblocks;
|
super->s_free_blocks_count += numblocks;
|
||||||
fs->group_desc[i].bg_free_blocks_count = numblocks;
|
fs->group_desc[i].bg_free_blocks_count = numblocks;
|
||||||
|
|
|
@ -26,6 +26,10 @@ mke2fs \- create an ext2/ext3 filesystem
|
||||||
.I blocks-per-group
|
.I blocks-per-group
|
||||||
]
|
]
|
||||||
[
|
[
|
||||||
|
.B \-G
|
||||||
|
.I number-of-groups
|
||||||
|
]
|
||||||
|
[
|
||||||
.B \-i
|
.B \-i
|
||||||
.I bytes-per-inode
|
.I bytes-per-inode
|
||||||
]
|
]
|
||||||
|
@ -245,6 +249,13 @@ option rather than manipulating the number of blocks per group.)
|
||||||
This option is generally used by developers who
|
This option is generally used by developers who
|
||||||
are developing test cases.
|
are developing test cases.
|
||||||
.TP
|
.TP
|
||||||
|
.BI \-G " number-of-groups"
|
||||||
|
Specify the number of block goups that will be packed together to
|
||||||
|
create one large virtual block group on an ext4 filesystem. This
|
||||||
|
improves meta-data locality and performance on meta-data heavy
|
||||||
|
workloads. The number of goups must be a power of 2 and may only be
|
||||||
|
specified if the flex_bg filesystem feature is enabled.
|
||||||
|
.TP
|
||||||
.BI \-i " bytes-per-inode"
|
.BI \-i " bytes-per-inode"
|
||||||
Specify the bytes/inode ratio.
|
Specify the bytes/inode ratio.
|
||||||
.B mke2fs
|
.B mke2fs
|
||||||
|
@ -445,6 +456,11 @@ Use hashed b-trees to speed up lookups in large directories.
|
||||||
.B filetype
|
.B filetype
|
||||||
Store file type information in directory entries.
|
Store file type information in directory entries.
|
||||||
.TP
|
.TP
|
||||||
|
.B flex_bg
|
||||||
|
Allow bitmaps and inode tables for a block group to be placed anywhere
|
||||||
|
on the storage media (use with -G option to group meta-data in order
|
||||||
|
to create a large virtual block group).
|
||||||
|
.TP
|
||||||
.B has_journal
|
.B has_journal
|
||||||
Create an ext3 journal (as if using the
|
Create an ext3 journal (as if using the
|
||||||
.B \-j
|
.B \-j
|
||||||
|
|
|
@ -98,8 +98,9 @@ static void usage(void)
|
||||||
fprintf(stderr, _("Usage: %s [-c|-l filename] [-b block-size] "
|
fprintf(stderr, _("Usage: %s [-c|-l filename] [-b block-size] "
|
||||||
"[-f fragment-size]\n\t[-i bytes-per-inode] [-I inode-size] "
|
"[-f fragment-size]\n\t[-i bytes-per-inode] [-I inode-size] "
|
||||||
"[-J journal-options]\n"
|
"[-J journal-options]\n"
|
||||||
"\t[-N number-of-inodes] [-m reserved-blocks-percentage] "
|
"\t[-G meta group size] [-N number-of-inodes]\n"
|
||||||
"[-o creator-os]\n\t[-g blocks-per-group] [-L volume-label] "
|
"\t[-m reserved-blocks-percentage] [-o creator-os]\n"
|
||||||
|
"\t[-g blocks-per-group] [-L volume-label] "
|
||||||
"[-M last-mounted-directory]\n\t[-O feature[,...]] "
|
"[-M last-mounted-directory]\n\t[-O feature[,...]] "
|
||||||
"[-r fs-revision] [-E extended-option[,...]]\n"
|
"[-r fs-revision] [-E extended-option[,...]]\n"
|
||||||
"\t[-T fs-type] [-jnqvFSV] device [blocks-count]\n"),
|
"\t[-T fs-type] [-jnqvFSV] device [blocks-count]\n"),
|
||||||
|
@ -1105,6 +1106,7 @@ static void PRS(int argc, char *argv[])
|
||||||
int blocksize = 0;
|
int blocksize = 0;
|
||||||
int inode_ratio = 0;
|
int inode_ratio = 0;
|
||||||
int inode_size = 0;
|
int inode_size = 0;
|
||||||
|
unsigned long flex_bg_size = 0;
|
||||||
double reserved_ratio = 5.0;
|
double reserved_ratio = 5.0;
|
||||||
int sector_size = 0;
|
int sector_size = 0;
|
||||||
int show_version_only = 0;
|
int show_version_only = 0;
|
||||||
|
@ -1189,7 +1191,7 @@ static void PRS(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
while ((c = getopt (argc, argv,
|
while ((c = getopt (argc, argv,
|
||||||
"b:cf:g:i:jl:m:no:qr:s:t:vE:FI:J:L:M:N:O:R:ST:V")) != EOF) {
|
"b:cf:g:G:i:jl:m:no:qr:s:t:vE:FI:J:L:M:N:O:R:ST:V")) != EOF) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 'b':
|
case 'b':
|
||||||
blocksize = strtol(optarg, &tmp, 0);
|
blocksize = strtol(optarg, &tmp, 0);
|
||||||
|
@ -1239,6 +1241,20 @@ static void PRS(int argc, char *argv[])
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case 'G':
|
||||||
|
flex_bg_size = strtoul(optarg, &tmp, 0);
|
||||||
|
if (*tmp) {
|
||||||
|
com_err(program_name, 0,
|
||||||
|
_("Illegal number for flex_bg size"));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (flex_bg_size < 2 ||
|
||||||
|
(flex_bg_size & (flex_bg_size-1)) != 0) {
|
||||||
|
com_err(program_name, 0,
|
||||||
|
_("flex_bg size must be a power of 2"));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
break;
|
||||||
case 'i':
|
case 'i':
|
||||||
inode_ratio = strtoul(optarg, &tmp, 0);
|
inode_ratio = strtoul(optarg, &tmp, 0);
|
||||||
if (inode_ratio < EXT2_MIN_BLOCK_SIZE ||
|
if (inode_ratio < EXT2_MIN_BLOCK_SIZE ||
|
||||||
|
@ -1647,6 +1663,20 @@ static void PRS(int argc, char *argv[])
|
||||||
|
|
||||||
if (inode_size == 0)
|
if (inode_size == 0)
|
||||||
inode_size = get_int_from_profile(fs_types, "inode_size", 0);
|
inode_size = get_int_from_profile(fs_types, "inode_size", 0);
|
||||||
|
if (!flex_bg_size && (fs_param.s_feature_incompat &
|
||||||
|
EXT4_FEATURE_INCOMPAT_FLEX_BG))
|
||||||
|
flex_bg_size = get_int_from_profile(fs_types,
|
||||||
|
"flex_bg_size", 16);
|
||||||
|
if (flex_bg_size) {
|
||||||
|
if (!(fs_param.s_feature_incompat &
|
||||||
|
EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
|
||||||
|
com_err(program_name, 0,
|
||||||
|
_("Flex_bg feature not enabled, so "
|
||||||
|
"flex_bg size may not be specified"));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
fs_param.s_log_groups_per_flex = int_log2(flex_bg_size);
|
||||||
|
}
|
||||||
|
|
||||||
if (inode_size && fs_param.s_rev_level >= EXT2_DYNAMIC_REV) {
|
if (inode_size && fs_param.s_rev_level >= EXT2_DYNAMIC_REV) {
|
||||||
if (inode_size < EXT2_GOOD_OLD_INODE_SIZE ||
|
if (inode_size < EXT2_GOOD_OLD_INODE_SIZE ||
|
||||||
|
|
|
@ -301,6 +301,13 @@ specify one on the command line.
|
||||||
.I inode_size
|
.I inode_size
|
||||||
This relation specifies the default inode size if the user does not
|
This relation specifies the default inode size if the user does not
|
||||||
specify one on the command line.
|
specify one on the command line.
|
||||||
|
.TP
|
||||||
|
.I flex_bg_size
|
||||||
|
This relation specifies the number of block goups that will be packed
|
||||||
|
together to create one large virtual block group on an ext4 filesystem.
|
||||||
|
This improves meta-data locality and performance on meta-data heavy
|
||||||
|
workloads. The number of goups must be a power of 2 and may only be
|
||||||
|
specified if the flex_bg filesystem feature is enabled.
|
||||||
.SH FILES
|
.SH FILES
|
||||||
.TP
|
.TP
|
||||||
.I /etc/mke2fs.conf
|
.I /etc/mke2fs.conf
|
||||||
|
|
Loading…
Reference in New Issue