diff --git a/realloc-inodes.c b/realloc-inodes.c index e524b59..d71f5bc 100644 --- a/realloc-inodes.c +++ b/realloc-inodes.c @@ -21,27 +21,31 @@ */ /** - * TODO fix reallocation for the case when inode tables for flex_bg do not fit into a single block group * TODO bigalloc compatibility * TODO write some tests: for inode moving (image with many files), * for block moving, including extent blocks (one sparse file with many extents), * for block moving between different groups * * The theory isn't that hard: - * 1) If shrinking - move inodes away from the end of each block group inode table - * 1.1) move each inode to the new place, mark new place as occupied, unmark old one - * 1.2) remember the old->new inode number mapping + * 1) Determine where we want to move the inode tables: + * 1.1) Create a map of non-movable blocks - these are: superblock & group descriptors, + * block bitmaps, inode bitmaps, bad blocks, and blocks in the resize inode. + * Otherwise speaking, we can move any block that either belongs to inode table + * or belongs to any inode other than the resize or bad blocks inode. + * 1.2) Using the created map, find place for each group inode table closest to the beginning + * of its flex_bg. Save these locations in memory. + * 1.3) Free the map. + * 2) If shrinking - move inodes away from the end of each block group inode table + * 2.1) move each inode to the new place, mark new place as occupied, unmark old one + * 2.2) remember the old->new inode number mapping * 2) If growing - move data away from extra blocks needed by growing inode tables: - * 2.1) Create a map of blocks that we want to free - * 2.2) Iterate through all inodes and move remembered blocks. - * It involves overwriting the whole file extent tree or block mapping... - * If some of these blocks are in the bad block inode, abort the reallocation process. - * We could possibly try to move inode tables to another location in a block group - * in that case; but it may be needed to defragment it first... :-( + * 2.2) Create a map of blocks that we want to free. + * 2.3) Iterate through all inodes and move blocks. It may involve overwriting + * the whole file extent tree or block mapping. * 3) Change all inode numbers in directory entries according to mappings from (1.2), * and then using a formula: new_num = 1 + ((old_num-1)/old_i_per_g)*new_i_per_g + ((old_num-1) % old_i_per_g) - * 4) Move parts of inode tables so they are consecutive again if flex_bg feature is active - * 5) Mark/unmark extra blocks used for inode tables + * 4) Move inode tables. + * 5) Unmark old inode table blocks, mark new ones. * 6) Change block group descriptors: bg_inode_table, bg_free_inodes_count, * bg_free_blocks_count, bg_inode_bitmap_csum, bg_itable_unused * 7) Change superblock: s_inodes_count, s_free_blocks_count, @@ -89,6 +93,7 @@ typedef struct __u32 ig_old, ig_new; // old and new inodes-per-group count __u32 ibg_old, ibg_new; // old and new inode_blocks-per-group count __u32 new_inode_count; + blk64_t *new_itable_loc; // (old->new) inode number map ext2_ino_t *inode_map; __u32 inode_map_size, inode_map_alloc; @@ -161,9 +166,10 @@ ext2_ino_t realloc_search_inode_map(realloc_data *rd, ext2_ino_t old) * Move inodes from the end of each block group inode table * so the tables can be shrinked */ -int shrink_move_inodes(realloc_data *rd) +errcode_t shrink_move_inodes(realloc_data *rd) { - int retval = 0, inode_size = EXT2_INODE_SIZE(rd->fs->super); + errcode_t retval = 0; + int inode_size = EXT2_INODE_SIZE(rd->fs->super); __u32 group, i; __u32 new_group; ext2_ino_t ino, new_ino; @@ -238,18 +244,15 @@ out: * Move data blocks from after the end of each block group inode table * so the tables can be grown */ -int extend_move_blocks(realloc_data *rd) +errcode_t extend_move_blocks(realloc_data *rd) { ext2fs_block_bitmap reserve_map; - blk64_t it_start, blk_diff, b_per_g; - dgrp_t flex_grp, n_grp, flex_count; - int retval, flexbg_size; + dgrp_t grp; + errcode_t retval; if (rd->ibg_new == rd->ibg_old) { return 0; } - blk_diff = rd->ibg_new-rd->ibg_old; - b_per_g = EXT2_BLOCKS_PER_GROUP(rd->fs->super); retval = ext2fs_allocate_block_bitmap(rd->fs, "reserved block map", &reserve_map); if (retval) { @@ -259,39 +262,16 @@ int extend_move_blocks(realloc_data *rd) { ext2fs_read_block_bitmap(rd->fs); } - // Mark reserved blocks (those we want to free) - if (EXT2_HAS_INCOMPAT_FEATURE(rd->fs->super, EXT4_FEATURE_INCOMPAT_FLEX_BG) - && rd->fs->super->s_log_groups_per_flex) + // Mark blocks we want to free as "reserved" + // Don't care about which blocks are already used by inode tables, + // because ext2fs_move_blocks only moves blocks that belong to inodes. + for (grp = 0; grp < rd->fs->group_desc_count; grp++) { - flexbg_size = 1 << rd->fs->super->s_log_groups_per_flex; - } - else - { - flexbg_size = 1; - } - flex_count = (rd->fs->group_desc_count + flexbg_size - 1) / flexbg_size; - for (flex_grp = 0; flex_grp < flex_count; flex_grp++) - { - n_grp = flexbg_size; - if (flex_grp*flexbg_size+n_grp > rd->fs->group_desc_count) - { - n_grp = rd->fs->group_desc_count-flex_grp*flexbg_size; - } - it_start = ext2fs_inode_table_loc(rd->fs, flex_grp*flexbg_size); - // Check group boundaries (the first group in flex_bg must contain all inode tables) - if ((it_start + rd->ibg_new*n_grp - 1) / b_per_g - != (it_start + rd->ibg_old*n_grp - 1) / b_per_g) - { - retval = ENOSPC; - goto out; - } - it_start += rd->ibg_old*n_grp; - ext2fs_mark_block_bitmap_range2(reserve_map, it_start, blk_diff*n_grp); + ext2fs_mark_block_bitmap_range2(reserve_map, rd->new_itable_loc[grp], rd->ibg_new); } retval = ext2fs_move_blocks(rd->fs, reserve_map, rd->fs->block_map, 0); ext2fs_mark_bb_dirty(rd->fs); ext2fs_flush(rd->fs); -out: ext2fs_free_block_bitmap(reserve_map); return retval; } @@ -318,7 +298,7 @@ static int change_inode_numbers_callback(ext2_ino_t dir, int entry, /** * Change inode numbers in all directory entries */ -int change_inode_numbers(realloc_data *rd) +errcode_t change_inode_numbers(realloc_data *rd) { ext2_ino_t ino; realloc_sort_inode_map(rd); @@ -330,16 +310,16 @@ int change_inode_numbers(realloc_data *rd) } /** - * 1) Move inode tables so they are consecutive again if flex_bg is enabled - * 2) Mark/unmark extra inode table blocks + * 1) Move inode tables + * 2) Mark/unmark new/old inode table blocks * 3) Adjust superblock and block group descriptors */ -int change_super_and_bgd(realloc_data *rd) +errcode_t change_super_and_bgd(realloc_data *rd) { - blk64_t it_start, blk; - dgrp_t grp, flex_grp, flex_count; - __u32 unus, used_ibg; - int flexbg_size, n_grp, i, retval = 0; + blk64_t blk; + dgrp_t grp; + __u32 used_ibg, i, unus; + errcode_t retval = 0; int has_gdt_csum = EXT2_HAS_RO_COMPAT_FEATURE(rd->fs->super, EXT4_FEATURE_RO_COMPAT_GDT_CSUM); void *buf = NULL; ext2fs_flush(rd->fs); @@ -347,105 +327,78 @@ int change_super_and_bgd(realloc_data *rd) { ext2fs_read_block_bitmap(rd->fs); } - if (EXT2_HAS_INCOMPAT_FEATURE(rd->fs->super, EXT4_FEATURE_INCOMPAT_FLEX_BG) - && rd->fs->super->s_log_groups_per_flex) - { - flexbg_size = 1 << rd->fs->super->s_log_groups_per_flex; - } - else - { - flexbg_size = 1; - } - flex_count = (rd->fs->group_desc_count + flexbg_size - 1) / flexbg_size; - retval = ext2fs_get_mem(EXT2_BLOCK_SIZE(rd->fs->super) * rd->ibg_new * flexbg_size, &buf); + retval = ext2fs_get_mem(EXT2_BLOCK_SIZE(rd->fs->super) * rd->ibg_new, &buf); if (retval) { goto out; } - for (flex_grp = 0; flex_grp < flex_count; flex_grp++) + for (grp = 0; grp < rd->fs->group_desc_count; grp++) { - n_grp = flexbg_size; - if (flex_grp*flexbg_size+n_grp > rd->fs->group_desc_count) + for (i = 0, blk = ext2fs_inode_table_loc(rd->fs, grp); i < rd->ibg_old; i++, blk++) { - n_grp = rd->fs->group_desc_count-flex_grp*flexbg_size; + ext2fs_block_alloc_stats2(rd->fs, blk, -1); } - it_start = ext2fs_inode_table_loc(rd->fs, flex_grp*flexbg_size); - if (rd->ibg_new != rd->ibg_old) + } + for (grp = 0; grp < rd->fs->group_desc_count; grp++) + { + for (i = 0, blk = rd->new_itable_loc[grp]; i < rd->ibg_new; i++, blk++) { - memset(buf, 0, EXT2_BLOCK_SIZE(rd->fs->super) * rd->ibg_new * n_grp); - // Read inode table(s) while skipping unitialized inode table parts - for (grp = flex_grp*flexbg_size, i = 0; i < n_grp; grp++, i++) + ext2fs_block_alloc_stats2(rd->fs, blk, -1); + } + } + for (grp = 0; grp < rd->fs->group_desc_count; grp++) + { + // Skip unitialized inode table parts + used_ibg = rd->ibg_old; + if (has_gdt_csum) + { + if (ext2fs_bg_flags_test(rd->fs, grp, EXT2_BG_INODE_UNINIT)) { - used_ibg = rd->ibg_old; - if (has_gdt_csum) - { - if (ext2fs_bg_flags_test(rd->fs, grp, EXT2_BG_INODE_UNINIT)) - { - used_ibg = 0; - } - else - { - used_ibg = (rd->ig_old - ext2fs_bg_itable_unused(rd->fs, grp)); - used_ibg = (used_ibg * EXT2_INODE_SIZE(rd->fs->super)+EXT2_BLOCK_SIZE(rd->fs->super)-1)/EXT2_BLOCK_SIZE(rd->fs->super); - } - } - if (used_ibg > 0) - { - blk = ext2fs_inode_table_loc(rd->fs, grp); - retval = io_channel_read_blk64(rd->fs->io, blk, - min(used_ibg, rd->ibg_new), - buf + i*rd->ibg_new*EXT2_BLOCK_SIZE(rd->fs->super)); - if (retval) - { - goto out; - } - } - } - // Write inode table(s) to the new place - retval = io_channel_write_blk64(rd->fs->io, it_start, rd->ibg_new * n_grp, buf); - if (retval) - { - // Exiting with badly corrupted filesystem :-( - printf("Error moving inode tables for %u groups, starting from %u\n", n_grp, flex_grp*flexbg_size); - goto out; - } - // Mark/unmark extra inode table blocks - if (rd->ibg_new < rd->ibg_old) - { - ext2fs_unmark_block_bitmap_range2(rd->fs->block_map, it_start + rd->ibg_new*n_grp, - (rd->ibg_old-rd->ibg_new)*n_grp); + used_ibg = 0; } else { - ext2fs_mark_block_bitmap_range2(rd->fs->block_map, it_start + rd->ibg_old*n_grp, - (rd->ibg_new-rd->ibg_old)*n_grp); + used_ibg = (rd->ig_old - ext2fs_bg_itable_unused(rd->fs, grp)); + used_ibg = (used_ibg * EXT2_INODE_SIZE(rd->fs->super)+EXT2_BLOCK_SIZE(rd->fs->super)-1)/EXT2_BLOCK_SIZE(rd->fs->super); } } - ext2fs_bg_free_blocks_count_set(rd->fs, flex_grp*flexbg_size, - ext2fs_bg_free_blocks_count(rd->fs, flex_grp*flexbg_size) - - (rd->ibg_new - rd->ibg_old)*n_grp); - // Change inode table locations and free inode counts - for (grp = flex_grp*flexbg_size, i = 0; i < n_grp; grp++, i++) + // Move inode table + blk = ext2fs_inode_table_loc(rd->fs, grp); + if (used_ibg > 0 && blk != rd->new_itable_loc[grp]) { - blk = it_start + rd->ibg_new*i; - ext2fs_inode_table_loc_set(rd->fs, grp, blk); - ext2fs_bg_free_inodes_count_set(rd->fs, grp, - ext2fs_bg_free_inodes_count(rd->fs, grp) + rd->ig_new - rd->ig_old); - if (has_gdt_csum) + retval = io_channel_read_blk64(rd->fs->io, blk, min(used_ibg, rd->ibg_new), buf); + if (retval) + goto out; + if (used_ibg < rd->ibg_new) { - unus = ext2fs_bg_itable_unused(rd->fs, grp); - if (rd->ig_new > rd->ig_old || unus >= rd->ig_old - rd->ig_new) - { - unus += rd->ig_new - rd->ig_old; - } - else - { - unus = 0; - } - ext2fs_bg_itable_unused_set(rd->fs, grp, unus); - ext2fs_bg_flags_clear(rd->fs, grp, EXT2_BG_BLOCK_UNINIT); - ext2fs_group_desc_csum_set(rd->fs, grp); + memset(buf + EXT2_BLOCK_SIZE(rd->fs->super) * used_ibg, 0, + EXT2_BLOCK_SIZE(rd->fs->super) * (rd->ibg_new - used_ibg)); } + retval = io_channel_write_blk64(rd->fs->io, rd->new_itable_loc[grp], rd->ibg_new, buf); + if (retval) + { + printf("Error moving inode table for block group %u\n", grp); + goto out; + } + } + // Set inode table location and free inode count + ext2fs_inode_table_loc_set(rd->fs, grp, rd->new_itable_loc[grp]); + ext2fs_bg_free_inodes_count_set(rd->fs, grp, + ext2fs_bg_free_inodes_count(rd->fs, grp) + rd->ig_new - rd->ig_old); + if (has_gdt_csum) + { + unus = ext2fs_bg_itable_unused(rd->fs, grp); + if (rd->ig_new > rd->ig_old || unus >= rd->ig_old - rd->ig_new) + { + unus += rd->ig_new - rd->ig_old; + } + else + { + unus = 0; + } + ext2fs_bg_itable_unused_set(rd->fs, grp, unus); + ext2fs_bg_flags_clear(rd->fs, grp, EXT2_BG_BLOCK_UNINIT); + ext2fs_group_desc_csum_set(rd->fs, grp); } } // Bitmaps never need to be moved because a single bitmap is always a single FS block @@ -485,13 +438,94 @@ out: return retval; } +int nonmovable_callback(ext2_filsys fs, blk64_t *blocknr, e2_blkcnt_t blockcnt, blk64_t ref_blk, int ref_offset, void *priv_data) +{ + if (blockcnt >= 0) + { + ext2fs_mark_block_bitmap2((ext2fs_block_bitmap)priv_data, *blocknr); + } + return 0; +} + +/** + * Allocate new place for all groups' inode tables and remember it. + * This is more correct, because allows us to correctly handle situations + * when flex_bg is so big that inode tables for all groups in flex_bg + * do not fit into its first group, and also allows up to honor bad blocks. + */ +errcode_t alloc_itables(realloc_data *rd) +{ + errcode_t retval = 0; + ext2fs_block_bitmap nonmovable = NULL; + dgrp_t grp, flex_grp, flex_count; + int flexbg_size, n_grp, i; + blk64_t blk, end; + retval = ext2fs_get_mem(sizeof(blk64_t) * rd->fs->group_desc_count, &rd->new_itable_loc); + if (retval) + goto out; + // Create a map of blocks we can't move + retval = ext2fs_allocate_block_bitmap(rd->fs, "non-movable block bitmap", &nonmovable); + if (retval < 0) + goto out; + retval = ext2fs_block_iterate3(rd->fs, EXT2_BAD_INO, 0, NULL, nonmovable_callback, nonmovable); + if (retval < 0) + goto out; + retval = ext2fs_block_iterate3(rd->fs, EXT2_RESIZE_INO, 0, NULL, nonmovable_callback, nonmovable); + if (retval < 0) + goto out; + for (grp = 0; grp < rd->fs->group_desc_count; grp++) + { + ext2fs_reserve_super_and_bgd(rd->fs, grp, nonmovable); + ext2fs_mark_block_bitmap2(nonmovable, ext2fs_block_bitmap_loc(rd->fs, grp)); + ext2fs_mark_block_bitmap2(nonmovable, ext2fs_inode_bitmap_loc(rd->fs, grp)); + } + // flex_bg parameters + if (EXT2_HAS_INCOMPAT_FEATURE(rd->fs->super, EXT4_FEATURE_INCOMPAT_FLEX_BG) + && rd->fs->super->s_log_groups_per_flex) + { + flexbg_size = 1 << rd->fs->super->s_log_groups_per_flex; + } + else + { + flexbg_size = 1; + } + flex_count = (rd->fs->group_desc_count + flexbg_size - 1) / flexbg_size; + // Allocate inode tables + for (flex_grp = 0; flex_grp < flex_count; flex_grp++) + { + n_grp = flexbg_size; + grp = flex_grp*flexbg_size; + if (grp+n_grp > rd->fs->group_desc_count) + { + n_grp = rd->fs->group_desc_count - grp; + } + // TODO We could use a better algorithm that would always try to find + // the biggest free sequence of blocks if it can't allocate all inode + // tables in sequence + blk = ext2fs_group_first_block2(rd->fs, grp); + end = ext2fs_group_last_block2(rd->fs, grp+n_grp-1); + for (i = 0; i < n_grp; i++, grp++) + { + retval = ext2fs_get_free_blocks2(rd->fs, blk, end, rd->ibg_new, nonmovable, &blk); + if (retval) + goto out; + rd->new_itable_loc[grp] = blk; + blk += rd->ibg_new; + } + } +out: + if (nonmovable) + ext2fs_free_block_bitmap(nonmovable); + return retval; +} + /** * Main function: change inode number of a filesystem! */ -int do_realloc(realloc_data *rd) +errcode_t do_realloc(realloc_data *rd) { __u32 ig_round; - int retval; + errcode_t retval; rd->ig_old = EXT2_INODES_PER_GROUP(rd->fs->super); rd->ig_new = rd->new_inode_count / rd->fs->group_desc_count; // inodes-per-group must be a multiple of 8 so each byte of inode bitmap is filled @@ -517,6 +551,12 @@ int do_realloc(realloc_data *rd) " - there will be wasted space in inode tables. Optimal inode count would be %u.\n", rd->new_inode_count, rd->ig_new, EXT2_BLOCK_SIZE(rd->fs->super) / EXT2_INODE_SIZE(rd->fs->super), ig_round); } + // Find where to put the new inode tables + retval = alloc_itables(rd); + if (retval) + { + return retval; + } if (rd->ig_new < rd->ig_old) { if (rd->new_inode_count < rd->fs->super->s_inodes_count - rd->fs->super->s_free_inodes_count)