From 185c4aeaf23d48375e3506218305031663203d0f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 23 May 2008 01:00:19 -0400 Subject: [PATCH] e2fsck: Add support to check journal checksums Signed-off-by: "Theodore Ts'o" --- e2fsck/jfs_user.h | 24 ++++-- e2fsck/journal.c | 17 +++- e2fsck/problem.c | 5 ++ e2fsck/problem.h | 3 + e2fsck/recovery.c | 178 ++++++++++++++++++++++++++++++++++++---- lib/ext2fs/jfs_compat.h | 1 + lib/ext2fs/kernel-jbd.h | 40 ++++++++- 7 files changed, 244 insertions(+), 24 deletions(-) diff --git a/e2fsck/jfs_user.h b/e2fsck/jfs_user.h index 9da5a16c..09d348c8 100644 --- a/e2fsck/jfs_user.h +++ b/e2fsck/jfs_user.h @@ -70,6 +70,13 @@ typedef unsigned int __be32; extern kmem_cache_t * do_cache_create(int len); extern void do_cache_destroy(kmem_cache_t *cache); +#define __init + +/* + * Now pull in the real linux/jfs.h definitions. + */ +#include + #if (defined(E2FSCK_INCLUDE_INLINE_FUNCS) || !defined(NO_INLINE_FUNCS)) #ifdef E2FSCK_INCLUDE_INLINE_FUNCS #define _INLINE_ extern @@ -94,15 +101,20 @@ _INLINE_ void do_cache_destroy(kmem_cache_t *cache) { free(cache); } -#undef _INLINE_ -#endif - -#define __init /* - * Now pull in the real linux/jfs.h definitions. + * helper functions to deal with 32 or 64bit block numbers. */ -#include +_INLINE_ size_t journal_tag_bytes(journal_t *journal) +{ + if (JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_64BIT)) + return JBD_TAG_SIZE64; + else + return JBD_TAG_SIZE32; +} + +#undef _INLINE_ +#endif /* * Kernel compatibility functions are defined in journal.c diff --git a/e2fsck/journal.c b/e2fsck/journal.c index ec0af4b7..b701d19d 100644 --- a/e2fsck/journal.c +++ b/e2fsck/journal.c @@ -73,8 +73,12 @@ struct buffer_head *getblk(kdev_t kdev, blk_t blocknr, int blocksize) if (!bh) return NULL; +#ifdef CONFIG_JBD_DEBUG + if (journal_enable_debug >= 3) + bh_count++; +#endif jfs_debug(4, "getblk for block %lu (%d bytes)(total %d)\n", - (unsigned long) blocknr, blocksize, ++bh_count); + (unsigned long) blocknr, blocksize, bh_count); bh->b_ctx = kdev->k_ctx; if (kdev->k_dev == K_DEV_FS) @@ -798,9 +802,12 @@ no_has_journal: static errcode_t recover_ext3_journal(e2fsck_t ctx) { + struct problem_context pctx; journal_t *journal; int retval; + clear_problem_context(&pctx); + journal_init_revoke_caches(); retval = e2fsck_get_journal(ctx, &journal); if (retval) @@ -818,6 +825,14 @@ static errcode_t recover_ext3_journal(e2fsck_t ctx) if (retval) goto errout; + if (journal->j_failed_commit) { + pctx.ino = journal->j_failed_commit; + fix_problem(ctx, PR_0_JNL_TXN_CORRUPT, &pctx); + ctx->fs->super->s_state |= EXT2_ERROR_FS; + ext2fs_mark_super_dirty(ctx->fs); + } + + if (journal->j_superblock->s_errno) { ctx->fs->super->s_state |= EXT2_ERROR_FS; ext2fs_mark_super_dirty(ctx->fs); diff --git a/e2fsck/problem.c b/e2fsck/problem.c index 81ad9b0e..0fbcd895 100644 --- a/e2fsck/problem.c +++ b/e2fsck/problem.c @@ -376,6 +376,11 @@ static struct e2fsck_problem problem_table[] = { N_("Last @g @b @B uninitialized. "), PROMPT_FIX, PR_PREEN_OK }, + /* Journal transaction found corrupt */ + { PR_0_JNL_TXN_CORRUPT, + N_("Journal transaction %i was corrupt, replay was aborted.\n"), + PROMPT_NONE, 0 }, + /* Pass 1 errors */ /* Pass 1: Checking inodes, blocks, and sizes */ diff --git a/e2fsck/problem.h b/e2fsck/problem.h index 5f90a180..24e7ed70 100644 --- a/e2fsck/problem.h +++ b/e2fsck/problem.h @@ -211,6 +211,9 @@ struct problem_context { /* Last group block bitmap is uninitialized. */ #define PR_0_BB_UNINIT_LAST 0x000039 +/* Journal transaction found corrupt */ +#define PR_0_JNL_TXN_CORRUPT 0x00003A + /* * Pass 1 errors */ diff --git a/e2fsck/recovery.c b/e2fsck/recovery.c index 43bc5e5e..2edb576e 100644 --- a/e2fsck/recovery.c +++ b/e2fsck/recovery.c @@ -178,19 +178,20 @@ static int jread(struct buffer_head **bhp, journal_t *journal, * Count the number of in-use tags in a journal descriptor block. */ -static int count_tags(struct buffer_head *bh, int size) +static int count_tags(journal_t *journal, struct buffer_head *bh) { char * tagp; journal_block_tag_t * tag; - int nr = 0; + int nr = 0, size = journal->j_blocksize; + int tag_bytes = journal_tag_bytes(journal); tagp = &bh->b_data[sizeof(journal_header_t)]; - while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) { + while ((tagp - bh->b_data + tag_bytes) <= size) { tag = (journal_block_tag_t *) tagp; nr++; - tagp += sizeof(journal_block_tag_t); + tagp += tag_bytes; if (!(tag->t_flags & cpu_to_be32(JFS_FLAG_SAME_UUID))) tagp += 16; @@ -307,6 +308,46 @@ int journal_skip_recovery(journal_t *journal) return err; } +static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag) +{ + unsigned long long block = be32_to_cpu(tag->t_blocknr); + if (tag_bytes > JBD_TAG_SIZE32) + block |= (__u64)be32_to_cpu(tag->t_blocknr_high) << 32; + return block; +} + +/* + * calc_chksums calculates the checksums for the blocks described in the + * descriptor block. + */ +static int calc_chksums(journal_t *journal, struct buffer_head *bh, + unsigned long *next_log_block, __u32 *crc32_sum) +{ + int i, num_blks, err; + unsigned long io_block; + struct buffer_head *obh; + + num_blks = count_tags(journal, bh); + /* Calculate checksum of the descriptor block. */ + *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); + + for (i = 0; i < num_blks; i++) { + io_block = (*next_log_block)++; + wrap(journal, *next_log_block); + err = jread(&obh, journal, io_block); + if (err) { + printk(KERN_ERR "JBD: IO error %d recovering block " + "%lu in log\n", err, io_block); + return 1; + } else { + *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, + obh->b_size); + } + brelse(obh); + } + return 0; +} + static int do_one_pass(journal_t *journal, struct recovery_info *info, enum passtype pass) { @@ -318,11 +359,13 @@ static int do_one_pass(journal_t *journal, struct buffer_head * bh; unsigned int sequence; int blocktype; + int tag_bytes = journal_tag_bytes(journal); + __u32 crc32_sum = ~0; /* Transactional Checksums */ /* Precompute the maximum metadata descriptors in a descriptor block */ int MAX_BLOCKS_PER_DESC; MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t)) - / sizeof(journal_block_tag_t)); + / tag_bytes); /* * First thing is to establish what we expect to find in the log @@ -409,11 +452,24 @@ static int do_one_pass(journal_t *journal, switch(blocktype) { case JFS_DESCRIPTOR_BLOCK: /* If it is a valid descriptor block, replay it - * in pass REPLAY; otherwise, just skip over the - * blocks it describes. */ + * in pass REPLAY; if journal_checksums enabled, then + * calculate checksums in PASS_SCAN, otherwise, + * just skip over the blocks it describes. */ if (pass != PASS_REPLAY) { - next_log_block += - count_tags(bh, journal->j_blocksize); + if (pass == PASS_SCAN && + JFS_HAS_COMPAT_FEATURE(journal, + JFS_FEATURE_COMPAT_CHECKSUM) && + !info->end_transaction) { + if (calc_chksums(journal, bh, + &next_log_block, + &crc32_sum)) { + brelse(bh); + break; + } + brelse(bh); + continue; + } + next_log_block += count_tags(journal, bh); wrap(journal, next_log_block); brelse(bh); continue; @@ -424,7 +480,7 @@ static int do_one_pass(journal_t *journal, * getting done here! */ tagp = &bh->b_data[sizeof(journal_header_t)]; - while ((tagp - bh->b_data +sizeof(journal_block_tag_t)) + while ((tagp - bh->b_data + tag_bytes) <= journal->j_blocksize) { unsigned long io_block; @@ -494,7 +550,7 @@ static int do_one_pass(journal_t *journal, } skip_write: - tagp += sizeof(journal_block_tag_t); + tagp += tag_bytes; if (!(flags & JFS_FLAG_SAME_UUID)) tagp += 16; @@ -506,9 +562,98 @@ static int do_one_pass(journal_t *journal, continue; case JFS_COMMIT_BLOCK: - /* Found an expected commit block: not much to - * do other than move on to the next sequence + jbd_debug(3, "Commit block for #%u found\n", + next_commit_ID); + /* How to differentiate between interrupted commit + * and journal corruption ? + * + * {nth transaction} + * Checksum Verification Failed + * | + * ____________________ + * | | + * async_commit sync_commit + * | | + * | GO TO NEXT "Journal Corruption" + * | TRANSACTION + * | + * {(n+1)th transanction} + * | + * _______|______________ + * | | + * Commit block found Commit block not found + * | | + * "Journal Corruption" | + * _____________|_________ + * | | + * nth trans corrupt OR nth trans + * and (n+1)th interrupted interrupted + * before commit block + * could reach the disk. + * (Cannot find the difference in above + * mentioned conditions. Hence assume + * "Interrupted Commit".) + */ + + /* Found an expected commit block: if checksums + * are present verify them in PASS_SCAN; else not + * much to do other than move on to the next sequence * number. */ + if (pass == PASS_SCAN && + JFS_HAS_COMPAT_FEATURE(journal, + JFS_FEATURE_COMPAT_CHECKSUM)) { + int chksum_err, chksum_seen; + struct commit_header *cbh = + (struct commit_header *)bh->b_data; + unsigned found_chksum = + be32_to_cpu(cbh->h_chksum[0]); + + chksum_err = chksum_seen = 0; + + jbd_debug(3, "Checksums %x %x\n", + crc32_sum, found_chksum); + if (info->end_transaction) { + journal->j_failed_commit = + info->end_transaction; + brelse(bh); + break; + } + + if (crc32_sum == found_chksum && + cbh->h_chksum_type == JBD2_CRC32_CHKSUM && + cbh->h_chksum_size == + JBD2_CRC32_CHKSUM_SIZE) + chksum_seen = 1; + else if (!(cbh->h_chksum_type == 0 && + cbh->h_chksum_size == 0 && + found_chksum == 0 && + !chksum_seen)) + /* + * If fs is mounted using an old kernel and then + * kernel with journal_chksum is used then we + * get a situation where the journal flag has + * checksum flag set but checksums are not + * present i.e chksum = 0, in the individual + * commit blocks. + * Hence to avoid checksum failures, in this + * situation, this extra check is added. + */ + chksum_err = 1; + + if (chksum_err) { + info->end_transaction = next_commit_ID; + jbd_debug(1, "Checksum_err\n"); + + if (!JFS_HAS_INCOMPAT_FEATURE(journal, + JFS_FEATURE_INCOMPAT_ASYNC_COMMIT)){ + journal->j_failed_commit = + next_commit_ID; + brelse(bh); + break; + } + } + crc32_sum = ~0; + } brelse(bh); next_commit_ID++; continue; @@ -544,9 +689,10 @@ static int do_one_pass(journal_t *journal, * transaction marks the end of the valid log. */ - if (pass == PASS_SCAN) - info->end_transaction = next_commit_ID; - else { + if (pass == PASS_SCAN) { + if (!info->end_transaction) + info->end_transaction = next_commit_ID; + } else { /* It's really bad news if different passes end up at * different places (but possible due to IO errors). */ if (info->end_transaction != next_commit_ID) { diff --git a/lib/ext2fs/jfs_compat.h b/lib/ext2fs/jfs_compat.h index 30ad1ef5..7b8aafd7 100644 --- a/lib/ext2fs/jfs_compat.h +++ b/lib/ext2fs/jfs_compat.h @@ -45,6 +45,7 @@ struct journal_s tid_t j_transaction_sequence; __u8 j_uuid[16]; struct jbd_revoke_table_s *j_revoke; + tid_t j_failed_commit; }; #define J_ASSERT(assert) \ diff --git a/lib/ext2fs/kernel-jbd.h b/lib/ext2fs/kernel-jbd.h index 2a099d8b..158d7641 100644 --- a/lib/ext2fs/kernel-jbd.h +++ b/lib/ext2fs/kernel-jbd.h @@ -108,6 +108,30 @@ typedef struct journal_header_s __u32 h_sequence; } journal_header_t; +/* + * Checksum types. + */ +#define JBD2_CRC32_CHKSUM 1 +#define JBD2_MD5_CHKSUM 2 +#define JBD2_SHA1_CHKSUM 3 + +#define JBD2_CRC32_CHKSUM_SIZE 4 + +#define JBD2_CHECKSUM_BYTES (32 / sizeof(__u32)) +/* + * Commit block header for storing transactional checksums: + */ +struct commit_header { + __u32 h_magic; + __u32 h_blocktype; + __u32 h_sequence; + unsigned char h_chksum_type; + unsigned char h_chksum_size; + unsigned char h_padding[2]; + __u32 h_chksum[JBD2_CHECKSUM_BYTES]; + __u64 h_commit_sec; + __u32 h_commit_nsec; +}; /* * The block tag: used to describe a single buffer in the journal @@ -116,8 +140,12 @@ typedef struct journal_block_tag_s { __u32 t_blocknr; /* The on-disk block number */ __u32 t_flags; /* See below */ + __u32 t_blocknr_high; /* most-significant high 32bits. */ } journal_block_tag_t; +#define JBD_TAG_SIZE64 (sizeof(journal_block_tag_t)) +#define JBD_TAG_SIZE32 (8) + /* * The revoke descriptor: used on disk to describe a series of blocks to * be revoked from the log @@ -194,12 +222,19 @@ typedef struct journal_superblock_s ((j)->j_format_version >= 2 && \ ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask)))) +#define JFS_FEATURE_COMPAT_CHECKSUM 0x00000001 + #define JFS_FEATURE_INCOMPAT_REVOKE 0x00000001 +#define JFS_FEATURE_INCOMPAT_REVOKE 0x00000001 +#define JFS_FEATURE_INCOMPAT_64BIT 0x00000002 +#define JFS_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 + /* Features known to this kernel version: */ #define JFS_KNOWN_COMPAT_FEATURES 0 #define JFS_KNOWN_ROCOMPAT_FEATURES 0 -#define JFS_KNOWN_INCOMPAT_FEATURES JFS_FEATURE_INCOMPAT_REVOKE +#define JFS_KNOWN_INCOMPAT_FEATURES (JFS_FEATURE_INCOMPAT_REVOKE|\ + JFS_FEATURE_INCOMPAT_ASYNC_COMMIT) #ifdef __KERNEL__ @@ -548,6 +583,9 @@ struct journal_s /* The revoke table: maintains the list of revoked blocks in the current transaction. */ struct jbd_revoke_table_s *j_revoke; + + /* Failed journal commit ID */ + unsigned int j_failed_commit; }; /*