e2fsck: fix e2fsck -fD directory truncation

When an extent-mapped directory is compacted by "e2fsck -fD" and
frees enough leaf blocks that it loses an extent tree index block,
the old e2fsck_rehash_dir->ext2fs_block_iterate3->write_dir_block()
code would not free the extent block, which would result in the
extent tree becoming corrupted when it is written out.

    Pass 1: Checking inodes, blocks, and sizes
    Inode 17825800, end of extent exceeds allowed value
            (logical block 710, physical block 570459684, len 1019)

This results in loss of a whole index block of directory leaf blocks
and maybe thousands or millions of files in lost+found.

Fix e2fsck_rehash_dir() to call ext2fs_punch() to free the blocks
at the end of the directory instead of trying to handle this itself
while writing out the directory.  That properly handles all of the
cases of updating the extent tree as well as accounting for blocks
that are released (both leaf blocks and index blocks).

Add a test case for compacting the directory to be smaller than the
index block that originally caused the corruption.

Signed-off-by: Andreas Dilger <andreas.dilger@intel.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
test-maint
Andreas Dilger 2015-11-30 15:26:35 -05:00 committed by Theodore Ts'o
parent e158db5377
commit 19961cd000
8 changed files with 148 additions and 36 deletions

View File

@ -52,10 +52,13 @@
#include "e2fsck.h"
#include "problem.h"
#undef REHASH_DEBUG
struct fill_dir_struct {
char *buf;
struct ext2_inode *inode;
errcode_t err;
ext2_ino_t ino;
e2fsck_t ctx;
struct hash_entry *harray;
int max_array, num_array;
@ -625,8 +628,8 @@ static errcode_t calculate_tree(ext2_filsys fs,
struct write_dir_struct {
struct out_dir *outdir;
errcode_t err;
ext2_ino_t ino;
e2fsck_t ctx;
blk64_t cleared;
};
/*
@ -643,28 +646,35 @@ static int write_dir_block(ext2_filsys fs,
blk64_t blk;
char *dir;
if (*block_nr == 0)
#ifdef REHASH_DEBUG
printf("%u: write_dir_block %lld:%lld", wd->ino, blockcnt, *block_nr);
#endif
if (*block_nr == 0) {
#ifdef REHASH_DEBUG
printf(" - skip\n");
#endif
return 0;
if (blockcnt >= wd->outdir->num) {
e2fsck_read_bitmaps(wd->ctx);
blk = *block_nr;
/*
* In theory, we only release blocks from the end of the
* directory file, so it's fine to clobber a whole cluster at
* once.
*/
if (blk % EXT2FS_CLUSTER_RATIO(fs) == 0) {
ext2fs_block_alloc_stats2(fs, blk, -1);
wd->cleared++;
}
*block_nr = 0;
return BLOCK_CHANGED;
}
if (blockcnt < 0)
/* Don't free blocks at the end of the directory, they will be
* truncated by the caller. */
if (blockcnt >= wd->outdir->num) {
#ifdef REHASH_DEBUG
printf(" - not freed\n");
#endif
return 0;
}
if (blockcnt < 0) {
#ifdef REHASH_DEBUG
printf(" - skip\n");
#endif
return 0;
}
dir = wd->outdir->buf + (blockcnt * fs->blocksize);
wd->err = ext2fs_write_dir_block3(fs, *block_nr, dir, 0);
#ifdef REHASH_DEBUG
printf(" - write (%d)\n", wd->err);
#endif
if (wd->err)
return BLOCK_ABORT;
return 0;
@ -684,10 +694,10 @@ static errcode_t write_directory(e2fsck_t ctx, ext2_filsys fs,
wd.outdir = outdir;
wd.err = 0;
wd.ino = ino;
wd.ctx = ctx;
wd.cleared = 0;
retval = ext2fs_block_iterate3(fs, ino, 0, 0,
retval = ext2fs_block_iterate3(fs, ino, 0, NULL,
write_dir_block, &wd);
if (retval)
return retval;
@ -699,14 +709,17 @@ static errcode_t write_directory(e2fsck_t ctx, ext2_filsys fs,
inode.i_flags &= ~EXT2_INDEX_FL;
else
inode.i_flags |= EXT2_INDEX_FL;
retval = ext2fs_inode_size_set(fs, &inode,
outdir->num * fs->blocksize);
#ifdef REHASH_DEBUG
printf("%u: set inode size to %u blocks = %u bytes\n",
ino, outdir->num, outdir->num * fs->blocksize);
#endif
retval = ext2fs_inode_size_set(fs, &inode, (ext2_off64_t)outdir->num *
fs->blocksize);
if (retval)
return retval;
ext2fs_iblk_sub_blocks(fs, &inode, wd.cleared);
e2fsck_write_inode(ctx, ino, &inode, "rehash_dir");
return 0;
/* ext2fs_punch() calls ext2fs_write_inode() which writes the size */
return ext2fs_punch(fs, ino, &inode, NULL, outdir->num, ~0ULL);
}
errcode_t e2fsck_rehash_dir(e2fsck_t ctx, ext2_ino_t ino)
@ -715,32 +728,25 @@ errcode_t e2fsck_rehash_dir(e2fsck_t ctx, ext2_ino_t ino)
errcode_t retval;
struct ext2_inode inode;
char *dir_buf = 0;
struct fill_dir_struct fd;
struct out_dir outdir;
struct fill_dir_struct fd = { NULL };
struct out_dir outdir = { 0 };
outdir.max = outdir.num = 0;
outdir.buf = 0;
outdir.hashes = 0;
e2fsck_read_inode(ctx, ino, &inode, "rehash_dir");
retval = ENOMEM;
fd.harray = 0;
dir_buf = malloc(inode.i_size);
if (!dir_buf)
goto errout;
fd.max_array = inode.i_size / 32;
fd.num_array = 0;
fd.harray = malloc(fd.max_array * sizeof(struct hash_entry));
if (!fd.harray)
goto errout;
fd.ino = ino;
fd.ctx = ctx;
fd.buf = dir_buf;
fd.inode = &inode;
fd.err = 0;
fd.dir_size = 0;
fd.compress = 0;
if (!(fs->super->s_feature_compat & EXT2_FEATURE_COMPAT_DIR_INDEX) ||
(inode.i_size / fs->blocksize) < 2)
fd.compress = 1;

View File

@ -0,0 +1,29 @@
Pass 1: Checking inodes, blocks, and sizes
Pass 2: Checking directory structure
Pass 3: Checking directory connectivity
Pass 3A: Optimizing directories
Pass 4: Checking reference counts
Pass 5: Checking group summary information
test_filesys: ***** FILE SYSTEM WAS MODIFIED *****
352 inodes used (41.12%, out of 856)
0 non-contiguous files (0.0%)
1 non-contiguous directory (0.3%)
# of inodes with ind/dind/tind blocks: 0/0/0
Extent depth histogram: 342/1
586 blocks used (68.94%, out of 850)
0 bad blocks
0 large files
340 regular files
3 directories
0 character device files
0 block device files
0 fifos
0 links
0 symbolic links (0 fast symbolic links)
0 sockets
------------
343 files
Exit status is 1

View File

@ -0,0 +1,7 @@
Pass 1: Checking inodes, blocks, and sizes
Pass 2: Checking directory structure
Pass 3: Checking directory connectivity
Pass 4: Checking reference counts
Pass 5: Checking group summary information
test_filesys: 352/856 files (0.3% non-contiguous), 586/850 blocks
Exit status is 0

Binary file not shown.

View File

@ -0,0 +1 @@
htree extent compression

View File

@ -0,0 +1,69 @@
#!/bin/bash
FSCK_OPT="-fyvD"
. $cmd_dir/run_e2fsck
exit $?
# This script depends on "mke2fs -d", which is only in master and not maint,
# to populate the file directory tree poorly (namely that there are no
# contiguous blocks in the directory leaf and the extent tree is large).
# Once the "mke2fs -d" option is available on the "maint" branch, the
# above few lines should be deleted, along with the "image.gz" file.
TMPDIR=${TMPDIR:-"/tmp"}
OUT=$test_name.log
FSCK_OPT="-fyvD"
SKIP_GUNZIP="true"
NAMELEN=250
SRC=$TMPDIR/$test_name.tmp
SUB=subdir
BASE=$SRC/$SUB/$(yes | tr -d '\n' | dd bs=$NAMELEN count=1 2> /dev/null)
TMPFILE=${TMPFILE:-"$TMPDIR/image"}
BSIZE=1024
> $OUT
mkdir -p $SRC/$SUB
# calculate the number of files needed to create the directory extent tree
# deep enough to exceed the in-inode index and spill into an index block.
#
# dirents per block * extents per block * (index blocks > i_blocks)
NUM=$(((BSIZE / (NAMELEN + 8)) * (BSIZE / 12) * 2))
# Create source files. Unfortunately hard links will be copied as links,
# and blocks with only NULs will be turned into holes.
if [ ! -f $BASE.1 ]; then
for N in $(seq $NUM); do
echo "foo" > $BASE.$N
done >> $OUT
fi
# make filesystem with enough inodes and blocks to hold all the test files
> $TMPFILE
NUM=$((NUM * 5 / 3))
echo "mke2fs -b $BSIZE -O dir_index,extent -d$SRC -N$NUM $TMPFILE $NUM" >> $OUT
$MKE2FS -b $BSIZE -O dir_index,extent -d$SRC -N$NUM $TMPFILE $NUM >> $OUT 2>&1
rm -r $SRC
# Run e2fsck to convert dir to htree before deleting the files, as mke2fs
# doesn't do this. Run second e2fsck to verify there is no corruption yet.
(
EXP1=$test_dir/expect.pre.1
EXP2=$test_dir/expect.pre.2
OUT1=$test_name.pre.1.log
OUT2=$test_name.pre.2.log
DESCRIPTION="$(cat $test_dir/name) setup"
. $cmd_dir/run_e2fsck
)
# generate a list of filenames for debugfs to delete, one from each leaf block
DELETE_LIST=$TMPDIR/delete.$$
$DEBUGFS -c -R "htree subdir" $TMPFILE 2>> $OUT |
grep -A2 "Reading directory block" |
awk '/yyyyy/ { print "rm '$SUB'/"$4 }' > $DELETE_LIST
$DEBUGFS -w -f $DELETE_LIST $TMPFILE >> $OUT 2>&1
rm $DELETE_LIST
cp $TMPFILE $TMPFILE.sav
. $cmd_dir/run_e2fsck

View File

@ -14,5 +14,5 @@ Pass 4: Checking reference counts
Pass 5: Checking group summary information
test_filesys: ***** FILE SYSTEM WAS MODIFIED *****
test_filesys: 47730/100192 files (0.0% non-contiguous), 13551/31745 blocks
test_filesys: 47730/100192 files (0.0% non-contiguous), 13550/31745 blocks
Exit status is 1

View File

@ -3,5 +3,5 @@ Pass 2: Checking directory structure
Pass 3: Checking directory connectivity
Pass 4: Checking reference counts
Pass 5: Checking group summary information
test_filesys: 47730/100192 files (0.0% non-contiguous), 13551/31745 blocks
test_filesys: 47730/100192 files (0.0% non-contiguous), 13550/31745 blocks
Exit status is 0