Change e2fsck to detect and offer to delete or rename duplicate

filenames in directories when rebuilding directories using
"e2fsck -fD /dev/XXX"
bitmap-optimize
Theodore Ts'o 2003-03-14 01:43:56 -05:00
parent 220c0040fb
commit b0700a1b60
7 changed files with 277 additions and 80 deletions

View File

@ -1,3 +1,23 @@
2003-03-14 Theodore Ts'o <tytso@mit.edu>
* rehash.c (duplicate_search_and_fix): Now search for duplicates
filenames, and either prompt to remove a complete
duplicate entry, or to rename a duplicate filename.
(e2fsck_rehash_dir): Use a progress bar to report
progress, and don't print all of the directory inodes as
they are optimized.
* problem.c, problem.h (PR_2_DUPLICATE_DIRENT,
PR_2_NON_UNIQUE_FILE): New problem codes.
* unix.c (e2fsck_simple_progress), e2fsck.h: New function which
can be called to provide specialized progress bars that
are not related to the top-level pass-based completion
percentage.
* pass3.c (e2fsck_adjust_inode_count), e2fsck.h: Export previously
static function.
2003-03-06 <tytso@mit.edu>
* e2fsck.8.in: Fix minor nit in the -C option. (Addresses Debian

View File

@ -409,6 +409,8 @@ extern int e2fsck_reconnect_file(e2fsck_t ctx, ext2_ino_t inode);
extern errcode_t e2fsck_expand_directory(e2fsck_t ctx, ext2_ino_t dir,
int num, int gauranteed_size);
extern ext2_ino_t e2fsck_get_lost_and_found(e2fsck_t ctx, int fix);
extern errcode_t e2fsck_adjust_inode_count(e2fsck_t ctx, ext2_ino_t ino,
int adj);
/* region.c */
@ -456,3 +458,5 @@ extern int ext2_file_type(unsigned int mode);
/* unix.c */
extern void e2fsck_clear_progbar(e2fsck_t ctx);
extern int e2fsck_simple_progress(e2fsck_t ctx, char *label,
float percent, unsigned int dpynum);

View File

@ -45,7 +45,6 @@ static void check_root(e2fsck_t ctx);
static int check_directory(e2fsck_t ctx, struct dir_info *dir,
struct problem_context *pctx);
static void fix_dotdot(e2fsck_t ctx, struct dir_info *dir, ext2_ino_t parent);
static errcode_t adjust_inode_count(e2fsck_t ctx, ext2_ino_t ino, int adj);
static ext2fs_inode_bitmap inode_loop_detect = 0;
static ext2fs_inode_bitmap inode_done_map = 0;
@ -406,7 +405,7 @@ ext2_ino_t e2fsck_get_lost_and_found(e2fsck_t ctx, int fix)
dirinfo = e2fsck_get_dir_info(ctx, ino);
if (dirinfo)
dirinfo->parent = 0;
adjust_inode_count(ctx, ino, -1);
e2fsck_adjust_inode_count(ctx, ino, -1);
} else if (retval != EXT2_ET_FILE_NOT_FOUND) {
pctx.errcode = retval;
fix_problem(ctx, PR_3_ERR_FIND_LPF, &pctx);
@ -435,7 +434,7 @@ ext2_ino_t e2fsck_get_lost_and_found(e2fsck_t ctx, int fix)
/*
* Next find a free inode.
*/
retval = ext2fs_new_inode(fs, EXT2_ROOT_INO, 040755,
retval = ext2fs_new_inode(fs, EXT2_ROOT_INO, 040700,
ctx->inode_used_map, &ino);
if (retval) {
pctx.errcode = retval;
@ -498,7 +497,7 @@ ext2_ino_t e2fsck_get_lost_and_found(e2fsck_t ctx, int fix)
* Miscellaneous bookkeeping that needs to be kept straight.
*/
e2fsck_add_dir_info(ctx, ino, EXT2_ROOT_INO);
adjust_inode_count(ctx, EXT2_ROOT_INO, 1);
e2fsck_adjust_inode_count(ctx, EXT2_ROOT_INO, 1);
ext2fs_icount_store(ctx->inode_count, ino, 2);
ext2fs_icount_store(ctx->inode_link_info, ino, 2);
ctx->lost_and_found = ino;
@ -554,7 +553,7 @@ int e2fsck_reconnect_file(e2fsck_t ctx, ext2_ino_t ino)
fix_problem(ctx, PR_3_CANT_RECONNECT, &pctx);
return 1;
}
adjust_inode_count(ctx, ino, 1);
e2fsck_adjust_inode_count(ctx, ino, 1);
return 0;
}
@ -562,7 +561,7 @@ int e2fsck_reconnect_file(e2fsck_t ctx, ext2_ino_t ino)
/*
* Utility routine to adjust the inode counts on an inode.
*/
static errcode_t adjust_inode_count(e2fsck_t ctx, ext2_ino_t ino, int adj)
errcode_t e2fsck_adjust_inode_count(e2fsck_t ctx, ext2_ino_t ino, int adj)
{
ext2_filsys fs = ctx->fs;
errcode_t retval;
@ -628,12 +627,12 @@ static int fix_dotdot_proc(struct ext2_dir_entry *dirent,
clear_problem_context(&pctx);
retval = adjust_inode_count(fp->ctx, dirent->inode, -1);
retval = e2fsck_adjust_inode_count(fp->ctx, dirent->inode, -1);
if (retval) {
pctx.errcode = retval;
fix_problem(fp->ctx, PR_3_ADJUST_INODE, &pctx);
}
retval = adjust_inode_count(fp->ctx, fp->parent, 1);
retval = e2fsck_adjust_inode_count(fp->ctx, fp->parent, 1);
if (retval) {
pctx.errcode = retval;
fix_problem(fp->ctx, PR_3_ADJUST_INODE, &pctx);

View File

@ -1087,6 +1087,16 @@ static const struct e2fsck_problem problem_table[] = {
N_("@p @h %d: node (%B) has bad depth\n"),
PROMPT_NONE, 0 },
/* Duplicate directory entry found */
{ PR_2_DUPLICATE_DIRENT,
N_("Duplicate @E found. "),
PROMPT_CLEAR, 0 },
/* Non-unique filename found */
{ PR_2_NON_UNIQUE_FILE,
N_("@E has a non-unique filename.\nRename to %s"),
PROMPT_NULL, 0 },
/* Pass 3 errors */
/* Pass 3: Checking directory connectivity */

View File

@ -643,7 +643,13 @@ struct problem_context {
#define PR_2_HTREE_HASH_ORDER 0x02003F
/* Node in HTREE directory has bad depth */
#define PR_2_HTREE_BAD_DEPTH 0x020040
#define PR_2_HTREE_BAD_DEPTH 0x020040
/* Duplicate directory entry found */
#define PR_2_DUPLICATE_DIRENT 0x020041
/* Non-unique filename found */
#define PR_2_NON_UNIQUE_FILE 0x020042
/*
* Pass 3 errors

View File

@ -156,28 +156,6 @@ static int fill_dir_block(ext2_filsys fs,
return 0;
}
/* Used for sorting the hash entry */
static EXT2_QSORT_TYPE hash_cmp(const void *a, const void *b)
{
const struct hash_entry *he_a = (const struct hash_entry *) a;
const struct hash_entry *he_b = (const struct hash_entry *) b;
int ret;
if (he_a->hash > he_b->hash)
ret = 1;
else if (he_a->hash < he_b->hash)
ret = -1;
else {
if (he_a->minor_hash > he_b->minor_hash)
ret = 1;
else if (he_a->minor_hash < he_b->minor_hash)
ret = -1;
else
ret = 0;
}
return ret;
}
/* Used for sorting the hash entry */
static EXT2_QSORT_TYPE name_cmp(const void *a, const void *b)
{
@ -202,6 +180,28 @@ static EXT2_QSORT_TYPE name_cmp(const void *a, const void *b)
return ret;
}
/* Used for sorting the hash entry */
static EXT2_QSORT_TYPE hash_cmp(const void *a, const void *b)
{
const struct hash_entry *he_a = (const struct hash_entry *) a;
const struct hash_entry *he_b = (const struct hash_entry *) b;
int ret;
if (he_a->hash > he_b->hash)
ret = 1;
else if (he_a->hash < he_b->hash)
ret = -1;
else {
if (he_a->minor_hash > he_b->minor_hash)
ret = 1;
else if (he_a->minor_hash < he_b->minor_hash)
ret = -1;
else
ret = name_cmp(a, b);
}
return ret;
}
static errcode_t alloc_size_dir(ext2_filsys fs, struct out_dir *outdir,
int blocks)
{
@ -249,6 +249,127 @@ static errcode_t get_next_block(ext2_filsys fs, struct out_dir *outdir,
return 0;
}
/*
* This function is used to make a unique filename. We do this by
* appending ~0, and then incrementing the number. However, we cannot
* expand the length of the filename beyond the padding available in
* the directory entry.
*/
static void mutate_name(char *str, __u16 *len)
{
int i;
__u16 l = *len & 0xFF, h = *len & 0xff00;
/*
* First check to see if it looks the name has been mutated
* already
*/
for (i = l-1; i > 0; i--) {
if (!isdigit(str[i]))
break;
}
if ((i == l-1) || (str[i] != '~')) {
if (((l-1) & 3) < 2)
l += 2;
else
l = (l+3) & ~3;
str[l-2] = '~';
str[l-1] = '0';
*len = l | h;
return;
}
for (i = l-1; i >= 0; i--) {
if (isdigit(str[i])) {
if (str[i] == '9')
str[i] = '0';
else {
str[i]++;
return;
}
continue;
}
if (i == 1) {
if (str[0] == 'z')
str[0] = 'A';
else if (str[0] == 'Z') {
str[0] = '~';
str[1] = '0';
} else
str[0]++;
} else if (i > 0) {
str[i] = '1';
str[i-1] = '~';
} else {
if (str[0] == '~')
str[0] = 'a';
else
str[0]++;
}
break;
}
}
static int duplicate_search_and_fix(e2fsck_t ctx, ext2_filsys fs,
ext2_ino_t ino,
struct fill_dir_struct *fd)
{
struct problem_context pctx;
struct hash_entry *ent, *prev;
int i, j;
int fixed = 0;
char new_name[256];
__u16 new_len;
clear_problem_context(&pctx);
pctx.ino = ino;
for (i=1; i < fd->num_array; i++) {
ent = fd->harray + i;
prev = ent - 1;
if (!ent->dir->inode ||
((ent->dir->name_len & 0xFF) !=
(prev->dir->name_len & 0xFF)) ||
(strncmp(ent->dir->name, prev->dir->name,
ent->dir->name_len & 0xFF)))
continue;
pctx.dirent = ent->dir;
if ((ent->dir->inode == prev->dir->inode) &&
fix_problem(ctx, PR_2_DUPLICATE_DIRENT, &pctx)) {
e2fsck_adjust_inode_count(ctx, ent->dir->inode, -1);
ent->dir->inode = 0;
fixed++;
continue;
}
memcpy(new_name, ent->dir->name, ent->dir->name_len & 0xFF);
new_len = ent->dir->name_len;
mutate_name(new_name, &new_len);
for (j=0; j < fd->num_array; j++) {
if ((i==j) ||
((ent->dir->name_len & 0xFF) !=
(fd->harray[j].dir->name_len & 0xFF)) ||
(strncmp(new_name, fd->harray[j].dir->name,
new_len & 0xFF)))
continue;
mutate_name(new_name, &new_len);
j = -1;
}
new_name[new_len & 0xFF] = 0;
pctx.str = new_name;
if (fix_problem(ctx, PR_2_NON_UNIQUE_FILE, &pctx)) {
memcpy(ent->dir->name, new_name, new_len & 0xFF);
ent->dir->name_len = new_len;
ext2fs_dirhash(fs->super->s_def_hash_version,
ent->dir->name,
ent->dir->name_len & 0xFF,
fs->super->s_hash_seed,
&ent->hash, &ent->minor_hash);
fixed++;
}
}
return fixed;
}
static errcode_t copy_dir_entries(ext2_filsys fs,
struct fill_dir_struct *fd,
@ -277,6 +398,8 @@ static errcode_t copy_dir_entries(ext2_filsys fs,
left = fs->blocksize;
for (i=0; i < fd->num_array; i++) {
ent = fd->harray + i;
if (ent->dir->inode == 0)
continue;
rec_len = EXT2_DIR_REC_LEN(ent->dir->name_len & 0xFF);
if (rec_len > left) {
if (left)
@ -575,6 +698,7 @@ errcode_t e2fsck_rehash_dir(e2fsck_t ctx, ext2_ino_t ino)
#endif
/* Sort the list */
resort:
if (fd.compress)
qsort(fd.harray+2, fd.num_array-2,
sizeof(struct hash_entry), name_cmp);
@ -582,6 +706,12 @@ errcode_t e2fsck_rehash_dir(e2fsck_t ctx, ext2_ino_t ino)
qsort(fd.harray, fd.num_array,
sizeof(struct hash_entry), hash_cmp);
/*
* Look for duplicates
*/
if (duplicate_search_and_fix(ctx, fs, ino, &fd))
goto resort;
/*
* Copy the directory entries. In a htree directory these
* will become the leaf nodes.
@ -623,7 +753,7 @@ void e2fsck_rehash_directories(e2fsck_t ctx)
ext2_u32_iterate iter;
ext2_ino_t ino;
errcode_t retval;
int i, all_dirs, dir_index, first = 1;
int i, cur, max, all_dirs, dir_index, first = 1;
#ifdef RESOURCE_TRACK
init_resource_track(&rtrack);
@ -639,9 +769,11 @@ void e2fsck_rehash_directories(e2fsck_t ctx)
clear_problem_context(&pctx);
dir_index = ctx->fs->super->s_feature_compat & EXT2_FEATURE_COMPAT_DIR_INDEX;
if (all_dirs)
cur = 0;
if (all_dirs) {
i = 0;
else {
max = e2fsck_get_num_dirinfo(ctx);
} else {
retval = ext2fs_u32_list_iterate_begin(ctx->dirs_to_hash,
&iter);
if (retval) {
@ -649,6 +781,7 @@ void e2fsck_rehash_directories(e2fsck_t ctx)
fix_problem(ctx, PR_3A_OPTIMIZE_ITER, &pctx);
return;
}
max = ext2fs_u32_list_count(ctx->dirs_to_hash);
}
while (1) {
if (all_dirs) {
@ -666,12 +799,16 @@ void e2fsck_rehash_directories(e2fsck_t ctx)
fix_problem(ctx, PR_3A_PASS_HEADER, &pctx);
first = 0;
}
#if 0
fix_problem(ctx, PR_3A_OPTIMIZE_DIR, &pctx);
#endif
pctx.errcode = e2fsck_rehash_dir(ctx, ino);
if (pctx.errcode) {
end_problem_latch(ctx, PR_LATCH_OPTIMIZE_DIR);
fix_problem(ctx, PR_3A_OPTIMIZE_DIR_ERR, &pctx);
}
e2fsck_simple_progress(ctx, "Rebuilding directory",
(float) (++cur) / (float) max, ino);
}
end_problem_latch(ctx, PR_LATCH_OPTIMIZE_DIR);
if (!all_dirs)

View File

@ -328,6 +328,71 @@ extern void e2fsck_clear_progbar(e2fsck_t ctx)
ctx->flags &= ~E2F_FLAG_PROG_BAR;
}
int e2fsck_simple_progress(e2fsck_t ctx, char *label, float percent,
unsigned int dpynum)
{
static const char spinner[] = "\\|/-";
char buf[80];
int i;
int tick;
struct timeval tv;
int dpywidth;
if (ctx->flags & E2F_FLAG_PROG_SUPPRESS)
return 0;
/*
* Calculate the new progress position. If the
* percentage hasn't changed, then we skip out right
* away.
*/
if (ctx->progress_last_percent == (int) 10 * percent)
return 0;
ctx->progress_last_percent = (int) 10 * percent;
/*
* If we've already updated the spinner once within
* the last 1/8th of a second, no point doing it
* again.
*/
gettimeofday(&tv, NULL);
tick = (tv.tv_sec << 3) + (tv.tv_usec / (1000000 / 8));
if ((tick == ctx->progress_last_time) &&
(percent != 0.0) && (percent != 100.0))
return 0;
ctx->progress_last_time = tick;
/*
* Advance the spinner, and note that the progress bar
* will be on the screen
*/
ctx->progress_pos = (ctx->progress_pos+1) & 3;
ctx->flags |= E2F_FLAG_PROG_BAR;
dpywidth = 66 - strlen(label);
dpywidth = 8 * (dpywidth / 8);
if (dpynum)
dpywidth -= 8;
i = ((percent * dpywidth) + 50) / 100;
printf("%s: |%s%s", label, bar + (sizeof(bar) - (i+1)),
spaces + (sizeof(spaces) - (dpywidth - i + 1)));
if (percent == 100.0)
fputc('|', stdout);
else
fputc(spinner[ctx->progress_pos & 3], stdout);
if (dpynum)
printf(" %4.1f%% %u\r", percent, dpynum);
else
printf(" %4.1f%% \r", percent);
if (percent == 100.0)
e2fsck_clear_progbar(ctx);
fflush(stdout);
return 0;
}
static int e2fsck_update_progress(e2fsck_t ctx, int pass,
unsigned long cur, unsigned long max)
{
@ -346,53 +411,9 @@ static int e2fsck_update_progress(e2fsck_t ctx, int pass,
sprintf(buf, "%d %lu %lu\n", pass, cur, max);
write(ctx->progress_fd, buf, strlen(buf));
} else {
if (ctx->flags & E2F_FLAG_PROG_SUPPRESS)
return 0;
if (dpywidth == 0) {
dpywidth = 66 - strlen(ctx->device_name);
dpywidth = 8 * (dpywidth / 8);
}
/*
* Calculate the new progress position. If the
* percentage hasn't changed, then we skip out right
* away.
*/
percent = calc_percent(&e2fsck_tbl, pass, cur, max);
if (ctx->progress_last_percent == (int) 10 * percent)
return 0;
ctx->progress_last_percent = (int) 10 * percent;
/*
* If we've already updated the spinner once within
* the last 1/8th of a second, no point doing it
* again.
*/
gettimeofday(&tv, NULL);
tick = (tv.tv_sec << 3) + (tv.tv_usec / (1000000 / 8));
if ((tick == ctx->progress_last_time) &&
(cur != max) && (cur != 0))
return 0;
ctx->progress_last_time = tick;
/*
* Advance the spinner, and note that the progress bar
* will be on the screen
*/
ctx->progress_pos = (ctx->progress_pos+1) & 3;
ctx->flags |= E2F_FLAG_PROG_BAR;
i = ((percent * dpywidth) + 50) / 100;
printf("%s: |%s%s", ctx->device_name,
bar + (sizeof(bar) - (i+1)),
spaces + (sizeof(spaces) - (dpywidth - i + 1)));
if (percent == 100.0)
fputc('|', stdout);
else
fputc(spinner[ctx->progress_pos & 3], stdout);
printf(" %4.1f%% \r", percent);
if (percent == 100.0)
e2fsck_clear_progbar(ctx);
fflush(stdout);
e2fsck_simple_progress(ctx, ctx->device_name,
percent, 0);
}
return 0;
}