From bbd2f78cf63ab4c635d76073605d6fb1a30c277c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 1 Sep 2016 11:37:59 -0400 Subject: [PATCH] libext2fs: allow the default journal size to go as large as a gigabyte Recent research has shown that for a metadata-heavy workload, a 128 MB is journal be a bottleneck on HDD's, and that the optimal journal size is proportional to number of unique metadata blocks that can be modified (and written into the journal) in a 30 second window. One gigabyte should be sufficient for most workloads, which will be used for file systems larger than 128 gigabytes. Signed-off-by: Theodore Ts'o --- lib/ext2fs/mkjournal.c | 26 +++++++++++++++++--------- tests/d_dumpe2fs_group_only/expect | 4 ++-- tests/t_uninit_bg_rm/expect | 4 ++-- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/lib/ext2fs/mkjournal.c b/lib/ext2fs/mkjournal.c index 43ffa53f..7f78291d 100644 --- a/lib/ext2fs/mkjournal.c +++ b/lib/ext2fs/mkjournal.c @@ -334,20 +334,28 @@ out2: * Find a reasonable journal file size (in blocks) given the number of blocks * in the filesystem. For very small filesystems, it is not reasonable to * have a journal that fills more than half of the filesystem. + * + * n.b. comments assume 4k blocks */ int ext2fs_default_journal_size(__u64 num_blocks) { if (num_blocks < 2048) return -1; - if (num_blocks < 32768) - return (1024); - if (num_blocks < 256*1024) - return (4096); - if (num_blocks < 512*1024) - return (8192); - if (num_blocks < 1024*1024) - return (16384); - return 32768; + if (num_blocks < 32768) /* 128 MB */ + return (1024); /* 4 MB */ + if (num_blocks < 256*1024) /* 1 GB */ + return (4096); /* 16 MB */ + if (num_blocks < 512*1024) /* 2 GB */ + return (8192); /* 32 MB */ + if (num_blocks < 4096*1024) /* 16 GB */ + return (16384); /* 64 MB */ + if (num_blocks < 8192*1024) /* 32 GB */ + return (32768); /* 128 MB */ + if (num_blocks < 16384*1024) /* 64 GB */ + return (65536); /* 256 MB */ + if (num_blocks < 32768*1024) /* 128 GB */ + return (131072); /* 512 MB */ + return 262144; /* 1 GB */ } int ext2fs_journal_sb_start(int blocksize) diff --git a/tests/d_dumpe2fs_group_only/expect b/tests/d_dumpe2fs_group_only/expect index 78f97a2b..52610e60 100644 --- a/tests/d_dumpe2fs_group_only/expect +++ b/tests/d_dumpe2fs_group_only/expect @@ -4,7 +4,7 @@ Superblock backups stored on blocks: Allocating group tables: done Writing inode tables: done -Creating journal (32768 blocks): done +Creating journal (16384 blocks): done Writing superblocks and filesystem accounting information: done Pass 1: Checking inodes, blocks, and sizes @@ -12,7 +12,7 @@ Pass 2: Checking directory structure Pass 3: Checking directory connectivity Pass 4: Checking reference counts Pass 5: Checking group summary information -test_filesys: 11/262144 files (0.0% non-contiguous), 51278/1048576 blocks +test_filesys: 11/262144 files (0.0% non-contiguous), 34894/1048576 blocks Exit status is 0 dumpe2fs output diff --git a/tests/t_uninit_bg_rm/expect b/tests/t_uninit_bg_rm/expect index 61e9eaa8..68f8b518 100644 --- a/tests/t_uninit_bg_rm/expect +++ b/tests/t_uninit_bg_rm/expect @@ -7,7 +7,7 @@ Pass 2: Checking directory structure Pass 3: Checking directory connectivity Pass 4: Checking reference counts Pass 5: Checking group summary information -test_filesys: 11/65536 files (0.0% non-contiguous), 52294/1048576 blocks +test_filesys: 11/65536 files (0.0% non-contiguous), 35910/1048576 blocks mke2fs -q -t ext4 -O bigalloc -F -o Linux -b 1024 -C 8192 test.img 10G tune2fs -f -O ^uninit_bg test.img @@ -18,4 +18,4 @@ Pass 2: Checking directory structure Pass 3: Checking directory connectivity Pass 4: Checking reference counts Pass 5: Checking group summary information -test_filesys: 11/655360 files (0.0% non-contiguous), 199864/10485760 blocks +test_filesys: 11/655360 files (0.0% non-contiguous), 232632/10485760 blocks