From ea2d3788621cec5ed067280c7d228ec8897d2208 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 4 Oct 2012 23:30:23 -0400 Subject: [PATCH 1/6] e2freefrag: use 64-bit rbtree bitmaps Enable the use of 64-bit bitmaps, so e2freefrag will work on file systems with the 64-bit feature enabled. In addition, enable the rbtree-based bitmaps, which significantly saves the amount of memory required (from 97 megs to 1.7 megs for an empty 3T file system) at the cost of additional CPU overhead (but we will claw back some of the additional CPU overhead in the next commit). Addresses-Google-Bug: 7269948 Signed-off-by: "Theodore Ts'o" --- misc/e2freefrag.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/misc/e2freefrag.c b/misc/e2freefrag.c index 30af43e2..58f1ff53 100644 --- a/misc/e2freefrag.c +++ b/misc/e2freefrag.c @@ -249,13 +249,14 @@ static void collect_info(ext2_filsys fs, struct chunk_info *chunk_info, FILE *f) static void open_device(char *device_name, ext2_filsys *fs) { int retval; - int flag = EXT2_FLAG_FORCE; + int flag = EXT2_FLAG_FORCE | EXT2_FLAG_64BITS; retval = ext2fs_open(device_name, flag, 0, 0, unix_io_manager, fs); if (retval) { com_err(device_name, retval, "while opening filesystem"); exit(1); } + (*fs)->default_bitmap_type = EXT2FS_BMAP64_RBTREE; } #endif From 547a59a821df1cffcd0ca2c763be9ef319cb980b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 4 Oct 2012 23:38:55 -0400 Subject: [PATCH 2/6] libext2fs: optimize rb_test_bit Optimize testing for a bit in an rbtree-based bitmap for the case where the calling application is scanning through the bitmap sequentially. Previously, we did this for a set of bits which were inside an allocated extent, but we did not optimize the case where there was a large number of bits after an allocated extents which were not in use. 1111111111111110000000000000000000 ^ optimized ^not optimized In my tests of a roughly half-filled file system, the run time of e2freefrag was halved, and the cpu time spent in userspace was during e2fsck's pass 5 was reduced by a factor of 30%. Signed-off-by: "Theodore Ts'o" Reviewed-by: Lukas Czerner --- lib/ext2fs/blkmap64_rb.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/lib/ext2fs/blkmap64_rb.c b/lib/ext2fs/blkmap64_rb.c index a83f8acf..c9006f85 100644 --- a/lib/ext2fs/blkmap64_rb.c +++ b/lib/ext2fs/blkmap64_rb.c @@ -314,8 +314,8 @@ static errcode_t rb_resize_bmap(ext2fs_generic_bitmap bmap, inline static int rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit) { - struct bmap_rb_extent *rcursor; - struct rb_node *parent = NULL; + struct bmap_rb_extent *rcursor, *next_ext; + struct rb_node *parent = NULL, *next; struct rb_node **n = &bp->root.rb_node; struct bmap_rb_extent *ext; @@ -330,6 +330,18 @@ rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit) return 1; } + next = ext2fs_rb_next(&rcursor->node); + if (next) { + next_ext = ext2fs_rb_entry(next, struct bmap_rb_extent, node); + if ((bit >= rcursor->start + rcursor->count) && + (bit < next_ext->start)) { +#ifdef BMAP_STATS_OPS + bp->test_hit++; +#endif + return 0; + } + } + rcursor = *bp->wcursor; if (!rcursor) goto search_tree; From 0bcba36f3f90488d2ef7502bd3c4f4920f2c4251 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 5 Oct 2012 20:57:49 -0400 Subject: [PATCH 3/6] libext2fs: remove pointless indirection in rbtree bitmaps The code was previously allocating a single 4 or 8 byte pointer for the rcursor and wcursor fields in the ext2fs_rb_private structure; this added two extra memory allocations (which could fail), and extra indirections, for no good reason. Removing the extra indirection also makes the code more readable, so it's all upside and no downside. Signed-off-by: "Theodore Ts'o" Reviewed-by: Eric Sandeen --- lib/ext2fs/blkmap64_rb.c | 46 +++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/lib/ext2fs/blkmap64_rb.c b/lib/ext2fs/blkmap64_rb.c index c9006f85..900c0d3f 100644 --- a/lib/ext2fs/blkmap64_rb.c +++ b/lib/ext2fs/blkmap64_rb.c @@ -38,8 +38,8 @@ struct bmap_rb_extent { struct ext2fs_rb_private { struct rb_root root; - struct bmap_rb_extent **wcursor; - struct bmap_rb_extent **rcursor; + struct bmap_rb_extent *wcursor; + struct bmap_rb_extent *rcursor; #ifdef BMAP_STATS_OPS __u64 mark_hit; __u64 test_hit; @@ -148,10 +148,10 @@ inline static void rb_free_extent(struct ext2fs_rb_private *bp, struct bmap_rb_extent *ext) { - if (*bp->wcursor == ext) - *bp->wcursor = NULL; - if (*bp->rcursor == ext) - *bp->rcursor = NULL; + if (bp->wcursor == ext) + bp->wcursor = NULL; + if (bp->rcursor == ext) + bp->rcursor = NULL; ext2fs_free_mem(&ext); } @@ -165,14 +165,8 @@ static errcode_t rb_alloc_private_data (ext2fs_generic_bitmap bitmap) return retval; bp->root = RB_ROOT; - retval = ext2fs_get_mem(sizeof(struct bmap_rb_extent *), &bp->rcursor); - if (retval) - return retval; - retval = ext2fs_get_mem(sizeof(struct bmap_rb_extent *), &bp->wcursor); - if (retval) - return retval; - *bp->rcursor = NULL; - *bp->wcursor = NULL; + bp->rcursor = NULL; + bp->wcursor = NULL; #ifdef BMAP_STATS_OPS bp->test_hit = 0; @@ -215,8 +209,6 @@ static void rb_free_bmap(ext2fs_generic_bitmap bitmap) bp = (struct ext2fs_rb_private *) bitmap->private; rb_free_tree(&bp->root); - ext2fs_free_mem(&bp->rcursor); - ext2fs_free_mem(&bp->wcursor); ext2fs_free_mem(&bp); bp = 0; } @@ -235,8 +227,8 @@ static errcode_t rb_copy_bmap(ext2fs_generic_bitmap src, src_bp = (struct ext2fs_rb_private *) src->private; dest_bp = (struct ext2fs_rb_private *) dest->private; - *src_bp->rcursor = NULL; - *dest_bp->rcursor = NULL; + src_bp->rcursor = NULL; + dest_bp->rcursor = NULL; src_node = ext2fs_rb_first(&src_bp->root); while (src_node) { @@ -299,8 +291,8 @@ static errcode_t rb_resize_bmap(ext2fs_generic_bitmap bmap, } bp = (struct ext2fs_rb_private *) bmap->private; - *bp->rcursor = NULL; - *bp->wcursor = NULL; + bp->rcursor = NULL; + bp->wcursor = NULL; /* truncate tree to new_real_end size */ rb_truncate(new_real_end, &bp->root); @@ -319,7 +311,7 @@ rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit) struct rb_node **n = &bp->root.rb_node; struct bmap_rb_extent *ext; - rcursor = *bp->rcursor; + rcursor = bp->rcursor; if (!rcursor) goto search_tree; @@ -342,7 +334,7 @@ rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit) } } - rcursor = *bp->wcursor; + rcursor = bp->wcursor; if (!rcursor) goto search_tree; @@ -359,7 +351,7 @@ search_tree: else if (bit >= (ext->start + ext->count)) n = &(*n)->rb_right; else { - *bp->rcursor = ext; + bp->rcursor = ext; return 1; } } @@ -376,7 +368,7 @@ static int rb_insert_extent(__u64 start, __u64 count, struct bmap_rb_extent *ext; int retval = 0; - ext = *bp->wcursor; + ext = bp->wcursor; if (ext) { if (start >= ext->start && start <= (ext->start + ext->count)) { @@ -419,7 +411,7 @@ got_extent: new_node = &new_ext->node; ext2fs_rb_link_node(new_node, parent, n); ext2fs_rb_insert_color(new_node, root); - *bp->wcursor = new_ext; + bp->wcursor = new_ext; node = ext2fs_rb_prev(new_node); if (node) { @@ -745,8 +737,8 @@ static void rb_clear_bmap(ext2fs_generic_bitmap bitmap) bp = (struct ext2fs_rb_private *) bitmap->private; rb_free_tree(&bp->root); - *bp->rcursor = NULL; - *bp->wcursor = NULL; + bp->rcursor = NULL; + bp->wcursor = NULL; } #ifdef BMAP_STATS From fb129bba73868a41df74fec87773abd0c6c6cd4a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 5 Oct 2012 21:59:40 -0400 Subject: [PATCH 4/6] libext2fs: further optimize rb_test_bit Profiling shows that rb_test_bit() is now calling ext2fs_rb_next() a lot, and this function is now the hot spot when running e2freefrag. If we cache the results of ext2fs_rb_next(), we can eliminate those extra calls, which further speeds up both e2freefrag and e2fsck by reducing the amount of CPU time spent in userspace. Signed-off-by: "Theodore Ts'o" --- lib/ext2fs/blkmap64_rb.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/lib/ext2fs/blkmap64_rb.c b/lib/ext2fs/blkmap64_rb.c index 900c0d3f..a42eda1b 100644 --- a/lib/ext2fs/blkmap64_rb.c +++ b/lib/ext2fs/blkmap64_rb.c @@ -40,6 +40,7 @@ struct ext2fs_rb_private { struct rb_root root; struct bmap_rb_extent *wcursor; struct bmap_rb_extent *rcursor; + struct bmap_rb_extent *rcursor_next; #ifdef BMAP_STATS_OPS __u64 mark_hit; __u64 test_hit; @@ -152,6 +153,8 @@ static void rb_free_extent(struct ext2fs_rb_private *bp, bp->wcursor = NULL; if (bp->rcursor == ext) bp->rcursor = NULL; + if (bp->rcursor_next == ext) + bp->rcursor_next = NULL; ext2fs_free_mem(&ext); } @@ -166,6 +169,7 @@ static errcode_t rb_alloc_private_data (ext2fs_generic_bitmap bitmap) bp->root = RB_ROOT; bp->rcursor = NULL; + bp->rcursor_next = NULL; bp->wcursor = NULL; #ifdef BMAP_STATS_OPS @@ -306,7 +310,7 @@ static errcode_t rb_resize_bmap(ext2fs_generic_bitmap bmap, inline static int rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit) { - struct bmap_rb_extent *rcursor, *next_ext; + struct bmap_rb_extent *rcursor, *next_ext = NULL; struct rb_node *parent = NULL, *next; struct rb_node **n = &bp->root.rb_node; struct bmap_rb_extent *ext; @@ -322,9 +326,15 @@ rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit) return 1; } - next = ext2fs_rb_next(&rcursor->node); - if (next) { - next_ext = ext2fs_rb_entry(next, struct bmap_rb_extent, node); + next_ext = bp->rcursor_next; + if (!next_ext) { + next = ext2fs_rb_next(&rcursor->node); + if (next) + next_ext = ext2fs_rb_entry(next, struct bmap_rb_extent, + node); + bp->rcursor_next = next_ext; + } + if (next_ext) { if ((bit >= rcursor->start + rcursor->count) && (bit < next_ext->start)) { #ifdef BMAP_STATS_OPS @@ -333,6 +343,8 @@ rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit) return 0; } } + bp->rcursor = NULL; + bp->rcursor_next = NULL; rcursor = bp->wcursor; if (!rcursor) @@ -352,6 +364,7 @@ search_tree: n = &(*n)->rb_right; else { bp->rcursor = ext; + bp->rcursor_next = NULL; return 1; } } @@ -368,6 +381,7 @@ static int rb_insert_extent(__u64 start, __u64 count, struct bmap_rb_extent *ext; int retval = 0; + bp->rcursor_next = NULL; ext = bp->wcursor; if (ext) { if (start >= ext->start && @@ -738,6 +752,7 @@ static void rb_clear_bmap(ext2fs_generic_bitmap bitmap) rb_free_tree(&bp->root); bp->rcursor = NULL; + bp->rcursor_next = NULL; bp->wcursor = NULL; } From 51fb43dd271b1413d8203d95fe40be05caef8dd1 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 5 Oct 2012 22:01:31 -0400 Subject: [PATCH 5/6] Fix makefiles to compile e2freefrag with profiling Also fix a bug caused by a stray continuation backslash which caused the e2fsck/Makefile to fail when profiling is enabled. Signed-off-by: "Theodore Ts'o" Reviewed-by: Eric Sandeen --- e2fsck/Makefile.in | 4 ++-- misc/Makefile.in | 34 ++++++++++++++++++++-------------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/e2fsck/Makefile.in b/e2fsck/Makefile.in index a52bbe16..0c638e80 100644 --- a/e2fsck/Makefile.in +++ b/e2fsck/Makefile.in @@ -28,7 +28,7 @@ STATIC_DEPLIBS= $(DEPSTATIC_LIBQUOTA) $(STATIC_LIBEXT2FS) \ PROFILED_LIBS= $(PROFILED_LIBQUOTA) $(PROFILED_LIBEXT2FS) \ $(PROFILED_LIBCOM_ERR) $(PROFILED_LIBBLKID) $(PROFILED_LIBUUID) \ - $(PROFILED_LIBE2P) $(LIBINTL) \ + $(PROFILED_LIBE2P) $(LIBINTL) PROFILED_DEPLIBS= $(DEPPROFILED_LIBQUOTA) $(PROFILED_LIBEXT2FS) \ $(DEPPROFILED_LIBCOM_ERR) $(DEPPROFILED_LIBBLKID) \ $(DEPPROFILED_LIBUUID) $(DEPPROFILED_LIBE2P) @@ -129,7 +129,7 @@ e2fsck.static: $(OBJS) $(STATIC_DEPLIBS) $(E) " LD $@" $(Q) $(LD) $(LDFLAGS_STATIC) -o e2fsck.static $(OBJS) $(STATIC_LIBS) -e2fsck.profiled: $(PROFILED_OBJS) $(PROFILED_DEPLIBS) +e2fsck.profiled: $(OBJS) $(PROFILED_DEPLIBS) $(E) " LD $@" $(Q) $(LD) $(ALL_LDFLAGS) -g -pg -o e2fsck.profiled $(PROFILED_OBJS) \ $(PROFILED_LIBS) diff --git a/misc/Makefile.in b/misc/Makefile.in index 06921264..8a54f9f0 100644 --- a/misc/Makefile.in +++ b/misc/Makefile.in @@ -72,6 +72,7 @@ PROFILED_FSCK_OBJS= profiled/fsck.o profiled/base_device.o \ profiled/ismounted.o PROFILED_BLKID_OBJS= profiled/blkid.o PROFILED_FILEFRAG_OBJS= profiled/filefrag.o +PROFILED_E2FREEFRAG_OBJS= profiled/e2freefrag.o PROFILED_E2UNDO_OBJS= profiled/e2undo.o PROFILED_E4DEFRAG_OBJS= profiled/e4defrag.o @@ -107,7 +108,7 @@ all:: profiled $(SPROGS) $(UPROGS) $(USPROGS) $(SMANPAGES) $(UMANPAGES) \ @PROFILE_CMT@all:: tune2fs.profiled blkid.profiled e2image.profiled \ e2undo.profiled mke2fs.profiled dumpe2fs.profiled fsck.profiled \ logsave.profiled filefrag.profiled uuidgen.profiled uuidd.profiled \ - e2image.profiled e4defrag.profiled + e2image.profiled e4defrag.profiled e2freefrag.profiled profiled: @PROFILE_CMT@ $(E) " MKDIR $@" @@ -164,7 +165,7 @@ tune2fs.static: $(TUNE2FS_OBJS) $(STATIC_DEPLIBS) $(STATIC_LIBE2P) $(DEPSTATIC_L $(STATIC_LIBS) $(STATIC_LIBBLKID) $(STATIC_LIBUUID) \ $(STATIC_LIBQUOTA) $(STATIC_LIBE2P) $(LIBINTL) -tune2fs.profiled: $(PROFILED_TUNE2FS_OBJS) $(PROFILED_DEPLIBS) \ +tune2fs.profiled: $(TUNE2FS_OBJS) $(PROFILED_DEPLIBS) \ $(PROFILED_E2P) $(DEPPROFILED_LIBBLKID) $(DEPPROFILED_LIBUUID) \ $(DEPPROFILED_LIBQUOTA) $(E) " LD $@" @@ -183,7 +184,7 @@ blkid.static: $(BLKID_OBJS) $(STATIC_DEPLIBS) $(DEPSTATIC_LIBBLKID) $(Q) $(CC) $(ALL_LDFLAGS) -o blkid.static $(BLKID_OBJS) $(STATIC_LIBS) \ $(STATIC_LIBBLKID) $(LIBINTL) -blkid.profiled: $(PROFILED_BLKID_OBJS) $(DEPPROFILED_LIBBLKID) \ +blkid.profiled: $(BLKID_OBJS) $(DEPPROFILED_LIBBLKID) \ $(PROFILED_LIBEXT2FS) $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o blkid.profiled $(PROFILED_BLKID_OBJS) \ @@ -193,7 +194,7 @@ e2image: $(E2IMAGE_OBJS) $(DEPLIBS) $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -o e2image $(E2IMAGE_OBJS) $(LIBS) $(LIBINTL) -e2image.profiled: $(PROFILED_E2IMAGE_OBJS) $(PROFILED_DEPLIBS) +e2image.profiled: $(E2IMAGE_OBJS) $(PROFILED_DEPLIBS) $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o e2image.profiled \ $(PROFILED_E2IMAGE_OBJS) $(PROFILED_LIBS) $(LIBINTL) @@ -202,7 +203,7 @@ e2undo: $(E2UNDO_OBJS) $(DEPLIBS) $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -o e2undo $(E2UNDO_OBJS) $(LIBS) $(LIBINTL) -e2undo.profiled: $(PROFILED_E2UNDO_OBJS) $(PROFILED_DEPLIBS) +e2undo.profiled: $(E2UNDO_OBJS) $(PROFILED_DEPLIBS) $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o e2undo.profiled \ $(PROFILED_E2UNDO_OBJS) $(PROFILED_LIBS) $(LIBINTL) @@ -211,7 +212,7 @@ e4defrag: $(E4DEFRAG_OBJS) $(DEPLIBS) $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -o e4defrag $(E4DEFRAG_OBJS) $(LIBS) -e4defrag.profiled: $(PROFILED_E4DEFRAG_OBJS) $(PROFILED_DEPLIBS) +e4defrag.profiled: $(E4DEFRAG_OBJS) $(PROFILED_DEPLIBS) $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o e4defrag.profiled \ $(PROFILED_E4DEFRAG_OBJS) $(PROFILED_LIBS) @@ -242,7 +243,7 @@ mke2fs.static: $(MKE2FS_OBJS) $(STATIC_DEPLIBS) $(STATIC_LIBE2P) $(DEPSTATIC_LIB $(STATIC_LIBQUOTA) $(STATIC_LIBS) $(STATIC_LIBE2P) \ $(STATIC_LIBBLKID) $(STATIC_LIBUUID) $(LIBINTL) -mke2fs.profiled: $(PROFILED_MKE2FS_OBJS) $(PROFILED_DEPLIBS) \ +mke2fs.profiled: $(MKE2FS_OBJS) $(PROFILED_DEPLIBS) \ $(PROFILED_LIBE2P) $(PROFILED_DEPLIBBLKID) $(PROFILED_DEPLIBUUID) \ $(PROFILED_LIBQUOTA) $(E) " LD $@" @@ -263,7 +264,7 @@ uuidgen: $(UUIDGEN_OBJS) $(DEPLIBUUID) $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -o uuidgen $(UUIDGEN_OBJS) $(LIBUUID) $(LIBINTL) -uuidgen.profiled: $(PROFILED_UUIDGEN_OBJS) $(PROFILED_DEPLIBUUID) +uuidgen.profiled: $(UUIDGEN_OBJS) $(PROFILED_DEPLIBUUID) $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o uuidgen.profiled \ $(PROFILED_UUIDGEN_OBJS) $(PROFILED_LIBUUID) $(LIBINTL) @@ -272,7 +273,7 @@ uuidd: $(UUIDD_OBJS) $(DEPLIBUUID) $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -o uuidd $(UUIDD_OBJS) $(LIBUUID) $(LIBINTL) -uuidd.profiled: $(PROFILED_UUIDD_OBJS) $(PROFILED_DEPLIBUUID) +uuidd.profiled: $(UUIDD_OBJS) $(PROFILED_DEPLIBUUID) $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o uuidd.profiled $(PROFILED_UUIDD_OBJS) \ $(PROFILED_LIBUUID) $(LIBINTL) @@ -282,7 +283,7 @@ dumpe2fs: $(DUMPE2FS_OBJS) $(DEPLIBS) $(DEPLIBS_E2P) $(DEPLIBUUID) $(Q) $(CC) $(ALL_LDFLAGS) -o dumpe2fs $(DUMPE2FS_OBJS) $(LIBS) \ $(LIBS_E2P) $(LIBUUID) $(LIBINTL) -dumpe2fs.profiled: $(PROFILED_DUMPE2FS_OBJS) $(PROFILED_DEPLIBS) \ +dumpe2fs.profiled: $(DUMPE2FS_OBJS) $(PROFILED_DEPLIBS) \ $(PROFILED_LIBE2P) $(PROFILED_DEPLIBUUID) $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o dumpe2fs.profiled \ @@ -293,7 +294,7 @@ fsck: $(FSCK_OBJS) $(DEPLIBBLKID) $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -o fsck $(FSCK_OBJS) $(LIBBLKID) $(LIBINTL) -fsck.profiled: $(PROFILED_FSCK_OBJS) $(PROFILED_DEPLIBBLKID) +fsck.profiled: $(FSCK_OBJS) $(PROFILED_DEPLIBBLKID) $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o fsck.profiled $(PROFILED_FSCK_OBJS) \ $(PROFILED_LIBBLKID) $(LIBINTL) @@ -302,7 +303,7 @@ badblocks: $(BADBLOCKS_OBJS) $(DEPLIBS) $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -o badblocks $(BADBLOCKS_OBJS) $(LIBS) $(LIBINTL) -badblocks.profiled: $(PROFILED_BADBLOCKS_OBJS) $(PROFILED_DEPLIBS) +badblocks.profiled: $(BADBLOCKS_OBJS) $(PROFILED_DEPLIBS) $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o badblocks.profiled \ $(PROFILED_BADBLOCKS_OBJS) $(PROFILED_LIBS) $(LIBINTL) @@ -311,7 +312,7 @@ logsave: logsave.o $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -o logsave logsave.o -logsave.profiled: profiled/logsave.o +logsave.profiled: logsave.o $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o logsave.profiled profiled/logsave.o @@ -319,11 +320,16 @@ e2freefrag: $(E2FREEFRAG_OBJS) $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -o e2freefrag $(E2FREEFRAG_OBJS) $(LIBS) +e2freefrag.profiled: $(E2FREEFRAG_OBJS) $(PROFILED_DEPLIBS) + $(E) " LD $@" + $(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o e2freefrag.profiled \ + $(PROFILED_E2FREEFRAG_OBJS) $(PROFILED_LIBS) + filefrag: $(FILEFRAG_OBJS) $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -o filefrag $(FILEFRAG_OBJS) -filefrag.profiled: $(PROFILED_FILEFRAG_OBJS) +filefrag.profiled: $(FILEFRAG_OBJS) $(E) " LD $@" $(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o filefrag.profiled \ $(PROFILED_FILEFRAG_OBJS) From dd0c9a3c9cfdd250bfac283da05fd8920c823977 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 9 Oct 2012 22:45:40 -0400 Subject: [PATCH 6/6] e2fsck: only consult inode_dir_map if needed in pass4 In e2fsck_pass4(), we were consulting inode_dir_map using ext2fs_test_inode_bitmap2() for every single inode in the file system. However, there were many cases where we never needed the result of the test --- most notably if the inode is not in use. I was a bit surprised that GCC 4.7 with CFLAGS set to "-g -O2" wasn't able to optimize this out for us, but here is the pass 4 timing for an empty 3T file system before this patch: Pass 4: Memory used: 672k/772k (422k/251k), time: 3.67/ 3.66/ 0.00 and afterwards, we see a 43% improvement: Pass 4: Memory used: 672k/772k (422k/251k), time: 2.09/ 2.08/ 0.00 Signed-off-by: "Theodore Ts'o" --- e2fsck/pass4.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/e2fsck/pass4.c b/e2fsck/pass4.c index 1e632807..2d551807 100644 --- a/e2fsck/pass4.c +++ b/e2fsck/pass4.c @@ -123,7 +123,7 @@ void e2fsck_pass4(e2fsck_t ctx) /* Protect loop from wrap-around if s_inodes_count maxed */ for (i=1; i <= fs->super->s_inodes_count && i > 0; i++) { - int isdir = ext2fs_test_inode_bitmap2(ctx->inode_dir_map, i); + int isdir; if (ctx->flags & E2F_FLAG_SIGNAL_MASK) goto errout; @@ -157,6 +157,7 @@ void e2fsck_pass4(e2fsck_t ctx) ext2fs_icount_fetch(ctx->inode_count, i, &link_counted); } + isdir = ext2fs_test_inode_bitmap2(ctx->inode_dir_map, i); if (isdir && (link_counted > EXT2_LINK_MAX)) link_counted = 1; if (link_counted != link_count) {