e2fsprogs/lib/ext2fs/alloc_tables.c

260 lines
7.6 KiB
C
Raw Normal View History

1997-04-29 20:15:03 +04:00
/*
* alloc_tables.c --- Allocate tables for a newly initialized
* filesystem. Used by mke2fs when initializing a filesystem
*
* Copyright (C) 1996 Theodore Ts'o.
*
* %Begin-Header%
* This file may be redistributed under the terms of the GNU Library
* General Public License, version 2.
1997-04-29 20:15:03 +04:00
* %End-Header%
*/
#include "config.h"
1997-04-29 20:15:03 +04:00
#include <stdio.h>
#include <string.h>
#if HAVE_UNISTD_H
1997-04-29 20:15:03 +04:00
#include <unistd.h>
#endif
1997-04-29 20:15:03 +04:00
#include <fcntl.h>
#include <time.h>
#if HAVE_SYS_STAT_H
1997-04-29 20:15:03 +04:00
#include <sys/stat.h>
#endif
#if HAVE_SYS_TYPES_H
1997-04-29 20:15:03 +04:00
#include <sys/types.h>
#endif
1997-04-29 20:15:03 +04:00
Many files: inode.c (ext2fs_open_inode_scan): Initialize the group variables so that we don't need to call get_next_blockgroup() the first time around. Saves a bit of time, and prevents us from needing to assign -1 to current_group (which is an unsigned value). icount.c (insert_icount_el): Cast the estimated number of inodes from a float to an ino_t. alloc.c, alloc_tables.c, badlbocks.c, bb_compat.c, bb_inode.c, bitmaps.c, bitops.c, block.c, bmap.c, bmove.c, brel_ma.c, check_desc.c, closefs.c, cmp_bitmaps.c, dblist.c, dblist_dir.c, dir_iterate.c, dirblock.c, dupfs.c, expanddir.c, ext2fs.h, fileio.c, freefs.c, get_pathname.c, getsize.c, icount.c, initialize.c, inline.c, inode.c, irel_ma.c, ismounted.c, link.c, lookup.c, mkdir.c, namei.c, native.c, newdir.c, openfs.c, read_bb.c, read_bb_file.c, rs_bitmap.c, rw_bitmaps.c, swapfs.c, test_io.c, tst_badblocks.c, tst_getsize.c, tst_iscan.c, unix_io.c, unlink.c, valid_blk.c, version.c: If EXT2_FLAT_INCLUDES is defined, then assume all of the ext2-specific header files are in a flat directory. block.c, bmove.c, dirblock.c, fileio.c: Explicitly cast all assignments from void * to be compatible with C++. closefs.c (ext2fs_flush): Add a call to io_channel_flush() to make sure the contents of the disk are flushed to disk. dblist.c (ext2fs_add_dir_block): Change new to be new_entry to avoid C++ namespace clash. bitmaps.c (ext2fs_copy_bitmap): Change new to be new_map to avoid C++ namespace clash. ext2fs.h, bb_inode.c, block.c, bmove.c, brel.h, brel_ma.c, irel.h, irel_ma.c, dblist.c, dblist_dir.c, dir_iterate.c, ext2fsP.h, expanddir.c, get_pathname.c, inode.c, link.c, unlink.c: Change private to be priv_data (to avoid C++ namespace clash)
1998-01-19 17:47:53 +03:00
#include "ext2_fs.h"
1997-04-29 20:15:03 +04:00
#include "ext2fs.h"
#include "ext2fsP.h"
1997-04-29 20:15:03 +04:00
/*
* This routine searches for free blocks that can allocate a full
* group of bitmaps or inode tables for a flexbg group. Returns the
* block number with a correct offset were the bitmaps and inode
* tables can be allocated continously and in order.
*/
static blk64_t flexbg_offset(ext2_filsys fs, dgrp_t group, blk64_t start_blk,
ext2fs_block_bitmap bmap, int rem_grp,
int elem_size)
{
int flexbg, flexbg_size, size;
blk64_t last_blk, first_free = 0;
dgrp_t last_grp;
flexbg_size = 1 << fs->super->s_log_groups_per_flex;
flexbg = group / flexbg_size;
size = rem_grp * elem_size;
mke2fs: handle flex_bg collision with backup descriptors If a large flex_bg factor is specified and the block allocator was laying out block or inode bitmaps or inode tables, and collides with previously allocated metadata (for example the backup superblock or group descriptors) it would reset the allocator back to the beginning of the flex_bg instead of continuing past the obstruction. For example, with "-G 131072" the inode table will hit the backup descriptors in groups 1, 3, 5, 7, 9 and start interleaving with the block and inode bitmaps. That results in poorly allocated bitmaps and inode tables that are interleaved and not contiguous as was intended for flex_bg: Group 0: (Blocks 0-32767) Primary superblock at 0, Group descriptors at 1-2048 Block bitmap 2049 (+2049), Inode bitmap at 133121 (bg #4+2049) Inode table 264193-264200 (bg #8+2049) : : Group 3838: (Blocks 125763584-125796351) [INODE_UNINIT, BLOCK_UNINIT] Block bitmap 5887 (bg #0+5887), Inode bitmap 136959 (bg #4+5887) Inode table 294897-294904 (bg #8 + 32753) Group 3839: (Blocks 125796352-125829119) [INODE_UNINIT, BLOCK_UNINIT] Block bitmap 5888 (bg #0+5888), Inode bitmap 136960 (bg #4+5888) Inode table 5889-5896 (bg #0 + 5889) Group 3840: (Blocks 125829120-125861887) [INODE_UNINIT, BLOCK_UNINIT] Block bitmap 5897 (bg #0+5897), Inode bitmap 136961 (bg #4+5889) Inode table 5898-5905 (bg #0 + 5898) : : Instead, skip the intervening blocks if there aren't too many of them. That mostly keeps the flex_bg allocations from colliding, though still not perfect because there is still some overlap with the backups. This patch addresses the majority of the problem, allowing about 124k groups to be layed out perfectly, instead of less than 4k groups with the previous code. Signed-off-by: Andreas Dilger <adilger@dilger.ca> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2014-03-01 00:15:45 +04:00
if (size > (int) (fs->super->s_blocks_per_group / 4))
size = (int) fs->super->s_blocks_per_group / 4;
/*
mke2fs: handle flex_bg collision with backup descriptors If a large flex_bg factor is specified and the block allocator was laying out block or inode bitmaps or inode tables, and collides with previously allocated metadata (for example the backup superblock or group descriptors) it would reset the allocator back to the beginning of the flex_bg instead of continuing past the obstruction. For example, with "-G 131072" the inode table will hit the backup descriptors in groups 1, 3, 5, 7, 9 and start interleaving with the block and inode bitmaps. That results in poorly allocated bitmaps and inode tables that are interleaved and not contiguous as was intended for flex_bg: Group 0: (Blocks 0-32767) Primary superblock at 0, Group descriptors at 1-2048 Block bitmap 2049 (+2049), Inode bitmap at 133121 (bg #4+2049) Inode table 264193-264200 (bg #8+2049) : : Group 3838: (Blocks 125763584-125796351) [INODE_UNINIT, BLOCK_UNINIT] Block bitmap 5887 (bg #0+5887), Inode bitmap 136959 (bg #4+5887) Inode table 294897-294904 (bg #8 + 32753) Group 3839: (Blocks 125796352-125829119) [INODE_UNINIT, BLOCK_UNINIT] Block bitmap 5888 (bg #0+5888), Inode bitmap 136960 (bg #4+5888) Inode table 5889-5896 (bg #0 + 5889) Group 3840: (Blocks 125829120-125861887) [INODE_UNINIT, BLOCK_UNINIT] Block bitmap 5897 (bg #0+5897), Inode bitmap 136961 (bg #4+5889) Inode table 5898-5905 (bg #0 + 5898) : : Instead, skip the intervening blocks if there aren't too many of them. That mostly keeps the flex_bg allocations from colliding, though still not perfect because there is still some overlap with the backups. This patch addresses the majority of the problem, allowing about 124k groups to be layed out perfectly, instead of less than 4k groups with the previous code. Signed-off-by: Andreas Dilger <adilger@dilger.ca> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2014-03-01 00:15:45 +04:00
* Don't do a long search if the previous block search is still valid,
* but skip minor obstructions such as group descriptor backups.
*/
if (start_blk && start_blk < ext2fs_blocks_count(fs->super) &&
mke2fs: handle flex_bg collision with backup descriptors If a large flex_bg factor is specified and the block allocator was laying out block or inode bitmaps or inode tables, and collides with previously allocated metadata (for example the backup superblock or group descriptors) it would reset the allocator back to the beginning of the flex_bg instead of continuing past the obstruction. For example, with "-G 131072" the inode table will hit the backup descriptors in groups 1, 3, 5, 7, 9 and start interleaving with the block and inode bitmaps. That results in poorly allocated bitmaps and inode tables that are interleaved and not contiguous as was intended for flex_bg: Group 0: (Blocks 0-32767) Primary superblock at 0, Group descriptors at 1-2048 Block bitmap 2049 (+2049), Inode bitmap at 133121 (bg #4+2049) Inode table 264193-264200 (bg #8+2049) : : Group 3838: (Blocks 125763584-125796351) [INODE_UNINIT, BLOCK_UNINIT] Block bitmap 5887 (bg #0+5887), Inode bitmap 136959 (bg #4+5887) Inode table 294897-294904 (bg #8 + 32753) Group 3839: (Blocks 125796352-125829119) [INODE_UNINIT, BLOCK_UNINIT] Block bitmap 5888 (bg #0+5888), Inode bitmap 136960 (bg #4+5888) Inode table 5889-5896 (bg #0 + 5889) Group 3840: (Blocks 125829120-125861887) [INODE_UNINIT, BLOCK_UNINIT] Block bitmap 5897 (bg #0+5897), Inode bitmap 136961 (bg #4+5889) Inode table 5898-5905 (bg #0 + 5898) : : Instead, skip the intervening blocks if there aren't too many of them. That mostly keeps the flex_bg allocations from colliding, though still not perfect because there is still some overlap with the backups. This patch addresses the majority of the problem, allowing about 124k groups to be layed out perfectly, instead of less than 4k groups with the previous code. Signed-off-by: Andreas Dilger <adilger@dilger.ca> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2014-03-01 00:15:45 +04:00
ext2fs_get_free_blocks2(fs, start_blk, start_blk + size, elem_size,
bmap, &first_free) == 0)
return first_free;
start_blk = ext2fs_group_first_block2(fs, flexbg_size * flexbg);
last_grp = group | (flexbg_size - 1);
if (last_grp > fs->group_desc_count-1)
last_grp = fs->group_desc_count-1;
last_blk = ext2fs_group_last_block2(fs, last_grp);
/* Find the first available block */
if (ext2fs_get_free_blocks2(fs, start_blk, last_blk, size,
bmap, &first_free) == 0)
return first_free;
if (ext2fs_get_free_blocks2(fs, start_blk, last_blk, elem_size,
bmap, &first_free) == 0)
return first_free;
if (ext2fs_get_free_blocks2(fs, 0, last_blk, elem_size, bmap,
&first_free) == 0)
return first_free;
return first_free;
}
errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
ext2fs_block_bitmap bmap)
1997-04-29 20:15:03 +04:00
{
errcode_t retval;
blk64_t group_blk, start_blk, last_blk, new_blk;
dgrp_t last_grp = 0;
libext2fs: place metadata blocks in the last flex_bg so they are contiguous Place the allocation bitmaps and inode table blocks so they are adjacent, even in the last flexbg. Previously, after running "mke2fs -t ext4 DEV 286720", the layout of the last few block groups would look like this: Group 32: (Blocks 262145-270336) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262145 (+0), Inode bitmap at 262161 (+16) Inode table at 262177-262432 (+32) Group 33: (Blocks 270337-278528) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED] Block bitmap at 262146 (bg #32 + 1), Inode bitmap at 262162 (bg #32 + 17) Inode table at 262433-262688 (bg #32 + 288) Group 34: (Blocks 278529-286719) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262147 (bg #32 + 2), Inode bitmap at 262163 (bg #32 + 18) Inode table at 262689-262944 (bg #32 + 544) Now, they look like this: Group 32: (Blocks 262145-270336) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262145 (+0), Inode bitmap at 262148 (+3) Inode table at 262151-262406 (+6) Group 33: (Blocks 270337-278528) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED] Block bitmap at 262146 (bg #32 + 1), Inode bitmap at 262149 (bg #32 + 4) Inode table at 262407-262662 (bg #32 + 262) Group 34: (Blocks 278529-286719) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262147 (bg #32 + 2), Inode bitmap at 262150 (bg #32 + 5) Inode table at 262663-262918 (bg #32 + 518) This reduces the free space fragmentation in a freshly created file system. It also allows the following mke2fs command to succeed: mke2fs -t ext4 -b 4096 -O ^resize_inode -G $((2**20)) DEV 2130483 (Note that while this allows people to run mke2fs with insanely large flexbg sizes, this is not a recommended practice, as the kernel may refuse to resize such a file system while mounted, since it currently tries to allocate an in-memory data structure based on the size of the flexbg, and so a file system with a very large flexbg size will cause the memory allocation to fail. This will hopefully be fixed in a future kernel release, but if the goal is to force all of the metadata blocks to be at the beginning of the file system, it's better to use the packed_meta_blocks configuration parameter in mke2fs.conf.) Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2014-08-03 22:00:47 +04:00
int rem_grps = 0, flexbg_size = 0, table_offset = 0;
1997-04-29 20:15:03 +04:00
group_blk = ext2fs_group_first_block2(fs, group);
last_blk = ext2fs_group_last_block2(fs, group);
1997-04-29 21:57:00 +04:00
if (!bmap)
bmap = fs->block_map;
if (EXT2_HAS_INCOMPAT_FEATURE(fs->super,
EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
fs->super->s_log_groups_per_flex) {
flexbg_size = 1 << fs->super->s_log_groups_per_flex;
last_grp = group | (flexbg_size - 1);
if (last_grp > fs->group_desc_count-1)
last_grp = fs->group_desc_count-1;
rem_grps = last_grp - group + 1;
}
/*
* Allocate the block and inode bitmaps, if necessary
*/
if (fs->stride) {
retval = ext2fs_get_free_blocks2(fs, group_blk, last_blk,
1, bmap, &start_blk);
if (retval)
return retval;
start_blk += fs->inode_blocks_per_group;
start_blk += ((fs->stride * group) %
(last_blk - start_blk + 1));
if (start_blk >= last_blk)
1997-04-30 01:26:48 +04:00
start_blk = group_blk;
} else
start_blk = group_blk;
if (flexbg_size) {
blk64_t prev_block = 0;
libext2fs: place metadata blocks in the last flex_bg so they are contiguous Place the allocation bitmaps and inode table blocks so they are adjacent, even in the last flexbg. Previously, after running "mke2fs -t ext4 DEV 286720", the layout of the last few block groups would look like this: Group 32: (Blocks 262145-270336) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262145 (+0), Inode bitmap at 262161 (+16) Inode table at 262177-262432 (+32) Group 33: (Blocks 270337-278528) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED] Block bitmap at 262146 (bg #32 + 1), Inode bitmap at 262162 (bg #32 + 17) Inode table at 262433-262688 (bg #32 + 288) Group 34: (Blocks 278529-286719) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262147 (bg #32 + 2), Inode bitmap at 262163 (bg #32 + 18) Inode table at 262689-262944 (bg #32 + 544) Now, they look like this: Group 32: (Blocks 262145-270336) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262145 (+0), Inode bitmap at 262148 (+3) Inode table at 262151-262406 (+6) Group 33: (Blocks 270337-278528) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED] Block bitmap at 262146 (bg #32 + 1), Inode bitmap at 262149 (bg #32 + 4) Inode table at 262407-262662 (bg #32 + 262) Group 34: (Blocks 278529-286719) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262147 (bg #32 + 2), Inode bitmap at 262150 (bg #32 + 5) Inode table at 262663-262918 (bg #32 + 518) This reduces the free space fragmentation in a freshly created file system. It also allows the following mke2fs command to succeed: mke2fs -t ext4 -b 4096 -O ^resize_inode -G $((2**20)) DEV 2130483 (Note that while this allows people to run mke2fs with insanely large flexbg sizes, this is not a recommended practice, as the kernel may refuse to resize such a file system while mounted, since it currently tries to allocate an in-memory data structure based on the size of the flexbg, and so a file system with a very large flexbg size will cause the memory allocation to fail. This will hopefully be fixed in a future kernel release, but if the goal is to force all of the metadata blocks to be at the beginning of the file system, it's better to use the packed_meta_blocks configuration parameter in mke2fs.conf.) Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2014-08-03 22:00:47 +04:00
table_offset = flexbg_size;
if (group % flexbg_size)
prev_block = ext2fs_block_bitmap_loc(fs, group - 1) + 1;
libext2fs: place metadata blocks in the last flex_bg so they are contiguous Place the allocation bitmaps and inode table blocks so they are adjacent, even in the last flexbg. Previously, after running "mke2fs -t ext4 DEV 286720", the layout of the last few block groups would look like this: Group 32: (Blocks 262145-270336) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262145 (+0), Inode bitmap at 262161 (+16) Inode table at 262177-262432 (+32) Group 33: (Blocks 270337-278528) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED] Block bitmap at 262146 (bg #32 + 1), Inode bitmap at 262162 (bg #32 + 17) Inode table at 262433-262688 (bg #32 + 288) Group 34: (Blocks 278529-286719) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262147 (bg #32 + 2), Inode bitmap at 262163 (bg #32 + 18) Inode table at 262689-262944 (bg #32 + 544) Now, they look like this: Group 32: (Blocks 262145-270336) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262145 (+0), Inode bitmap at 262148 (+3) Inode table at 262151-262406 (+6) Group 33: (Blocks 270337-278528) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED] Block bitmap at 262146 (bg #32 + 1), Inode bitmap at 262149 (bg #32 + 4) Inode table at 262407-262662 (bg #32 + 262) Group 34: (Blocks 278529-286719) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262147 (bg #32 + 2), Inode bitmap at 262150 (bg #32 + 5) Inode table at 262663-262918 (bg #32 + 518) This reduces the free space fragmentation in a freshly created file system. It also allows the following mke2fs command to succeed: mke2fs -t ext4 -b 4096 -O ^resize_inode -G $((2**20)) DEV 2130483 (Note that while this allows people to run mke2fs with insanely large flexbg sizes, this is not a recommended practice, as the kernel may refuse to resize such a file system while mounted, since it currently tries to allocate an in-memory data structure based on the size of the flexbg, and so a file system with a very large flexbg size will cause the memory allocation to fail. This will hopefully be fixed in a future kernel release, but if the goal is to force all of the metadata blocks to be at the beginning of the file system, it's better to use the packed_meta_blocks configuration parameter in mke2fs.conf.) Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2014-08-03 22:00:47 +04:00
else if (last_grp == fs->group_desc_count-1) {
/*
* If we are allocating for the last flex_bg
* keep the metadata tables contiguous
*/
table_offset = last_grp & (flexbg_size - 1);
if (table_offset == 0)
table_offset = flexbg_size;
else
table_offset++;
}
mke2fs: handle flex_bg collision with backup descriptors If a large flex_bg factor is specified and the block allocator was laying out block or inode bitmaps or inode tables, and collides with previously allocated metadata (for example the backup superblock or group descriptors) it would reset the allocator back to the beginning of the flex_bg instead of continuing past the obstruction. For example, with "-G 131072" the inode table will hit the backup descriptors in groups 1, 3, 5, 7, 9 and start interleaving with the block and inode bitmaps. That results in poorly allocated bitmaps and inode tables that are interleaved and not contiguous as was intended for flex_bg: Group 0: (Blocks 0-32767) Primary superblock at 0, Group descriptors at 1-2048 Block bitmap 2049 (+2049), Inode bitmap at 133121 (bg #4+2049) Inode table 264193-264200 (bg #8+2049) : : Group 3838: (Blocks 125763584-125796351) [INODE_UNINIT, BLOCK_UNINIT] Block bitmap 5887 (bg #0+5887), Inode bitmap 136959 (bg #4+5887) Inode table 294897-294904 (bg #8 + 32753) Group 3839: (Blocks 125796352-125829119) [INODE_UNINIT, BLOCK_UNINIT] Block bitmap 5888 (bg #0+5888), Inode bitmap 136960 (bg #4+5888) Inode table 5889-5896 (bg #0 + 5889) Group 3840: (Blocks 125829120-125861887) [INODE_UNINIT, BLOCK_UNINIT] Block bitmap 5897 (bg #0+5897), Inode bitmap 136961 (bg #4+5889) Inode table 5898-5905 (bg #0 + 5898) : : Instead, skip the intervening blocks if there aren't too many of them. That mostly keeps the flex_bg allocations from colliding, though still not perfect because there is still some overlap with the backups. This patch addresses the majority of the problem, allowing about 124k groups to be layed out perfectly, instead of less than 4k groups with the previous code. Signed-off-by: Andreas Dilger <adilger@dilger.ca> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2014-03-01 00:15:45 +04:00
/* FIXME: Take backup group descriptor blocks into account
* if the flexbg allocations will grow to overlap them... */
start_blk = flexbg_offset(fs, group, prev_block, bmap,
rem_grps, 1);
last_blk = ext2fs_group_last_block2(fs, last_grp);
}
if (!ext2fs_block_bitmap_loc(fs, group)) {
retval = ext2fs_get_free_blocks2(fs, start_blk, last_blk,
1, bmap, &new_blk);
if (retval == EXT2_ET_BLOCK_ALLOC_FAIL)
retval = ext2fs_get_free_blocks2(fs, group_blk,
last_blk, 1, bmap, &new_blk);
1997-04-29 20:15:03 +04:00
if (retval)
return retval;
ext2fs_mark_block_bitmap2(bmap, new_blk);
ext2fs_block_bitmap_loc_set(fs, group, new_blk);
if (flexbg_size) {
dgrp_t gr = ext2fs_group_of_blk2(fs, new_blk);
ext2fs_bg_free_blocks_count_set(fs, gr, ext2fs_bg_free_blocks_count(fs, gr) - 1);
ext2fs_free_blocks_count_add(fs->super, -1);
ext2fs_bg_flags_clear(fs, gr, EXT2_BG_BLOCK_UNINIT);
ext2fs_group_desc_csum_set(fs, gr);
}
}
if (flexbg_size) {
blk64_t prev_block = 0;
if (group % flexbg_size)
prev_block = ext2fs_inode_bitmap_loc(fs, group - 1) + 1;
else
prev_block = ext2fs_block_bitmap_loc(fs, group) +
libext2fs: place metadata blocks in the last flex_bg so they are contiguous Place the allocation bitmaps and inode table blocks so they are adjacent, even in the last flexbg. Previously, after running "mke2fs -t ext4 DEV 286720", the layout of the last few block groups would look like this: Group 32: (Blocks 262145-270336) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262145 (+0), Inode bitmap at 262161 (+16) Inode table at 262177-262432 (+32) Group 33: (Blocks 270337-278528) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED] Block bitmap at 262146 (bg #32 + 1), Inode bitmap at 262162 (bg #32 + 17) Inode table at 262433-262688 (bg #32 + 288) Group 34: (Blocks 278529-286719) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262147 (bg #32 + 2), Inode bitmap at 262163 (bg #32 + 18) Inode table at 262689-262944 (bg #32 + 544) Now, they look like this: Group 32: (Blocks 262145-270336) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262145 (+0), Inode bitmap at 262148 (+3) Inode table at 262151-262406 (+6) Group 33: (Blocks 270337-278528) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED] Block bitmap at 262146 (bg #32 + 1), Inode bitmap at 262149 (bg #32 + 4) Inode table at 262407-262662 (bg #32 + 262) Group 34: (Blocks 278529-286719) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262147 (bg #32 + 2), Inode bitmap at 262150 (bg #32 + 5) Inode table at 262663-262918 (bg #32 + 518) This reduces the free space fragmentation in a freshly created file system. It also allows the following mke2fs command to succeed: mke2fs -t ext4 -b 4096 -O ^resize_inode -G $((2**20)) DEV 2130483 (Note that while this allows people to run mke2fs with insanely large flexbg sizes, this is not a recommended practice, as the kernel may refuse to resize such a file system while mounted, since it currently tries to allocate an in-memory data structure based on the size of the flexbg, and so a file system with a very large flexbg size will cause the memory allocation to fail. This will hopefully be fixed in a future kernel release, but if the goal is to force all of the metadata blocks to be at the beginning of the file system, it's better to use the packed_meta_blocks configuration parameter in mke2fs.conf.) Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2014-08-03 22:00:47 +04:00
table_offset;
mke2fs: handle flex_bg collision with backup descriptors If a large flex_bg factor is specified and the block allocator was laying out block or inode bitmaps or inode tables, and collides with previously allocated metadata (for example the backup superblock or group descriptors) it would reset the allocator back to the beginning of the flex_bg instead of continuing past the obstruction. For example, with "-G 131072" the inode table will hit the backup descriptors in groups 1, 3, 5, 7, 9 and start interleaving with the block and inode bitmaps. That results in poorly allocated bitmaps and inode tables that are interleaved and not contiguous as was intended for flex_bg: Group 0: (Blocks 0-32767) Primary superblock at 0, Group descriptors at 1-2048 Block bitmap 2049 (+2049), Inode bitmap at 133121 (bg #4+2049) Inode table 264193-264200 (bg #8+2049) : : Group 3838: (Blocks 125763584-125796351) [INODE_UNINIT, BLOCK_UNINIT] Block bitmap 5887 (bg #0+5887), Inode bitmap 136959 (bg #4+5887) Inode table 294897-294904 (bg #8 + 32753) Group 3839: (Blocks 125796352-125829119) [INODE_UNINIT, BLOCK_UNINIT] Block bitmap 5888 (bg #0+5888), Inode bitmap 136960 (bg #4+5888) Inode table 5889-5896 (bg #0 + 5889) Group 3840: (Blocks 125829120-125861887) [INODE_UNINIT, BLOCK_UNINIT] Block bitmap 5897 (bg #0+5897), Inode bitmap 136961 (bg #4+5889) Inode table 5898-5905 (bg #0 + 5898) : : Instead, skip the intervening blocks if there aren't too many of them. That mostly keeps the flex_bg allocations from colliding, though still not perfect because there is still some overlap with the backups. This patch addresses the majority of the problem, allowing about 124k groups to be layed out perfectly, instead of less than 4k groups with the previous code. Signed-off-by: Andreas Dilger <adilger@dilger.ca> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2014-03-01 00:15:45 +04:00
/* FIXME: Take backup group descriptor blocks into account
* if the flexbg allocations will grow to overlap them... */
start_blk = flexbg_offset(fs, group, prev_block, bmap,
rem_grps, 1);
last_blk = ext2fs_group_last_block2(fs, last_grp);
}
1997-04-29 20:15:03 +04:00
if (!ext2fs_inode_bitmap_loc(fs, group)) {
retval = ext2fs_get_free_blocks2(fs, start_blk, last_blk,
1, bmap, &new_blk);
if (retval == EXT2_ET_BLOCK_ALLOC_FAIL)
retval = ext2fs_get_free_blocks2(fs, group_blk,
last_blk, 1, bmap, &new_blk);
1997-04-29 20:15:03 +04:00
if (retval)
return retval;
ext2fs_mark_block_bitmap2(bmap, new_blk);
ext2fs_inode_bitmap_loc_set(fs, group, new_blk);
if (flexbg_size) {
dgrp_t gr = ext2fs_group_of_blk2(fs, new_blk);
ext2fs_bg_free_blocks_count_set(fs, gr, ext2fs_bg_free_blocks_count(fs, gr) - 1);
ext2fs_free_blocks_count_add(fs->super, -1);
ext2fs_bg_flags_clear(fs, gr, EXT2_BG_BLOCK_UNINIT);
ext2fs_group_desc_csum_set(fs, gr);
}
}
/*
* Allocate the inode table
*/
if (flexbg_size) {
blk64_t prev_block = 0;
if (group % flexbg_size)
prev_block = ext2fs_inode_table_loc(fs, group - 1) +
fs->inode_blocks_per_group;
else
prev_block = ext2fs_inode_bitmap_loc(fs, group) +
libext2fs: place metadata blocks in the last flex_bg so they are contiguous Place the allocation bitmaps and inode table blocks so they are adjacent, even in the last flexbg. Previously, after running "mke2fs -t ext4 DEV 286720", the layout of the last few block groups would look like this: Group 32: (Blocks 262145-270336) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262145 (+0), Inode bitmap at 262161 (+16) Inode table at 262177-262432 (+32) Group 33: (Blocks 270337-278528) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED] Block bitmap at 262146 (bg #32 + 1), Inode bitmap at 262162 (bg #32 + 17) Inode table at 262433-262688 (bg #32 + 288) Group 34: (Blocks 278529-286719) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262147 (bg #32 + 2), Inode bitmap at 262163 (bg #32 + 18) Inode table at 262689-262944 (bg #32 + 544) Now, they look like this: Group 32: (Blocks 262145-270336) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262145 (+0), Inode bitmap at 262148 (+3) Inode table at 262151-262406 (+6) Group 33: (Blocks 270337-278528) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED] Block bitmap at 262146 (bg #32 + 1), Inode bitmap at 262149 (bg #32 + 4) Inode table at 262407-262662 (bg #32 + 262) Group 34: (Blocks 278529-286719) [INODE_UNINIT, ITABLE_ZEROED] Block bitmap at 262147 (bg #32 + 2), Inode bitmap at 262150 (bg #32 + 5) Inode table at 262663-262918 (bg #32 + 518) This reduces the free space fragmentation in a freshly created file system. It also allows the following mke2fs command to succeed: mke2fs -t ext4 -b 4096 -O ^resize_inode -G $((2**20)) DEV 2130483 (Note that while this allows people to run mke2fs with insanely large flexbg sizes, this is not a recommended practice, as the kernel may refuse to resize such a file system while mounted, since it currently tries to allocate an in-memory data structure based on the size of the flexbg, and so a file system with a very large flexbg size will cause the memory allocation to fail. This will hopefully be fixed in a future kernel release, but if the goal is to force all of the metadata blocks to be at the beginning of the file system, it's better to use the packed_meta_blocks configuration parameter in mke2fs.conf.) Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2014-08-03 22:00:47 +04:00
table_offset;
mke2fs: handle flex_bg collision with backup descriptors If a large flex_bg factor is specified and the block allocator was laying out block or inode bitmaps or inode tables, and collides with previously allocated metadata (for example the backup superblock or group descriptors) it would reset the allocator back to the beginning of the flex_bg instead of continuing past the obstruction. For example, with "-G 131072" the inode table will hit the backup descriptors in groups 1, 3, 5, 7, 9 and start interleaving with the block and inode bitmaps. That results in poorly allocated bitmaps and inode tables that are interleaved and not contiguous as was intended for flex_bg: Group 0: (Blocks 0-32767) Primary superblock at 0, Group descriptors at 1-2048 Block bitmap 2049 (+2049), Inode bitmap at 133121 (bg #4+2049) Inode table 264193-264200 (bg #8+2049) : : Group 3838: (Blocks 125763584-125796351) [INODE_UNINIT, BLOCK_UNINIT] Block bitmap 5887 (bg #0+5887), Inode bitmap 136959 (bg #4+5887) Inode table 294897-294904 (bg #8 + 32753) Group 3839: (Blocks 125796352-125829119) [INODE_UNINIT, BLOCK_UNINIT] Block bitmap 5888 (bg #0+5888), Inode bitmap 136960 (bg #4+5888) Inode table 5889-5896 (bg #0 + 5889) Group 3840: (Blocks 125829120-125861887) [INODE_UNINIT, BLOCK_UNINIT] Block bitmap 5897 (bg #0+5897), Inode bitmap 136961 (bg #4+5889) Inode table 5898-5905 (bg #0 + 5898) : : Instead, skip the intervening blocks if there aren't too many of them. That mostly keeps the flex_bg allocations from colliding, though still not perfect because there is still some overlap with the backups. This patch addresses the majority of the problem, allowing about 124k groups to be layed out perfectly, instead of less than 4k groups with the previous code. Signed-off-by: Andreas Dilger <adilger@dilger.ca> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2014-03-01 00:15:45 +04:00
/* FIXME: Take backup group descriptor blocks into account
* if the flexbg allocations will grow to overlap them... */
group_blk = flexbg_offset(fs, group, prev_block, bmap,
rem_grps, fs->inode_blocks_per_group);
last_blk = ext2fs_group_last_block2(fs, last_grp);
}
if (!ext2fs_inode_table_loc(fs, group)) {
retval = ext2fs_get_free_blocks2(fs, group_blk, last_blk,
fs->inode_blocks_per_group,
bmap, &new_blk);
if (retval)
return retval;
if (flexbg_size)
ext2fs_block_alloc_stats_range(fs, new_blk,
fs->inode_blocks_per_group, +1);
else
ext2fs_mark_block_bitmap_range2(fs->block_map,
new_blk, fs->inode_blocks_per_group);
ext2fs_inode_table_loc_set(fs, group, new_blk);
}
ext2fs_group_desc_csum_set(fs, group);
return 0;
}
errcode_t ext2fs_allocate_tables(ext2_filsys fs)
{
errcode_t retval;
dgrp_t i;
struct ext2fs_numeric_progress_struct progress;
if (fs->progress_ops && fs->progress_ops->init)
(fs->progress_ops->init)(fs, &progress, NULL,
fs->group_desc_count);
1997-04-29 20:15:03 +04:00
for (i = 0; i < fs->group_desc_count; i++) {
if (fs->progress_ops && fs->progress_ops->update)
(fs->progress_ops->update)(fs, &progress, i);
retval = ext2fs_allocate_group_table(fs, i, fs->block_map);
if (retval)
return retval;
1997-04-29 20:15:03 +04:00
}
if (fs->progress_ops && fs->progress_ops->close)
(fs->progress_ops->close)(fs, &progress, NULL);
1997-04-29 20:15:03 +04:00
return 0;
}