X-Git-Url: https://www.fleuret.org/cgi-bin/gitweb/gitweb.cgi?a=blobdiff_plain;f=finddup.c;h=2ef91df878ba97a202425b1795fd6bc966a69d09;hb=84317c21dc50141fff4a8f1fc1ac3c075e29923f;hp=5167a5956bbaeff068d489e11b1aaa96b25f66bf;hpb=ba0a93e5d70103f2aee5395eac74b05792aaa3a8;p=finddup.git diff --git a/finddup.c b/finddup.c index 5167a59..2ef91df 100644 --- a/finddup.c +++ b/finddup.c @@ -23,7 +23,7 @@ * */ -#define VERSION_NUMBER "0.9" +#define VERSION_NUMBER "1.1" #define _BSD_SOURCE @@ -40,9 +40,6 @@ #include #include #include -#ifdef WITH_MD5 -#include -#endif /* 1M really helps compared to 64k */ #define READ_BUFFER_SIZE (1024 * 1024) @@ -74,9 +71,8 @@ int same_inodes_are_different = 0; /* 1 means that comparison between two files with same inode will always be false */ -#ifdef WITH_MD5 -int use_md5 = 0; /* 1 means we keep an MD5 signature for each file */ -#endif +int sort_by_time = 0; /* 1 means to sort files in each group according + to the modification time */ /********************************************************************/ @@ -106,13 +102,10 @@ struct file_node { struct file_node *next; char *name; size_t size; + time_t atime, mtime, ctime; ino_t inode; int group_id; /* one per identical file content */ int dir_id; /* 1 for DIR1, and 2 for DIR2 */ -#ifdef WITH_MD5 - int md5_computed; - unsigned char md5[MD5_DIGEST_LENGTH]; -#endif }; void file_list_delete(struct file_node *head) { @@ -140,25 +133,6 @@ int same_content(struct file_node *f1, struct file_node *f2, char *buffer1, char *buffer2) { int fd1, fd2, s1, s2; -#ifdef WITH_MD5 - MD5_CTX c1, c2; - - if(use_md5) { - if(f1->md5_computed && f2->md5_computed) { - if(!memcmp(f1->md5, f2->md5, MD5_DIGEST_LENGTH)) { - return 0; - } - } else { - if(!f1->md5_computed) { - MD5_Init(&c1); - } - if(!f2->md5_computed) { - MD5_Init(&c2); - } - } - } -#endif - fd1 = open(f1->name, O_RDONLY); fd2 = open(f2->name, O_RDONLY); @@ -177,35 +151,14 @@ int same_content(struct file_node *f1, struct file_node *f2, if(s1 == 0) { close(fd1); close(fd2); -#ifdef WITH_MD5 - if(use_md5) { - if(!f1->md5_computed) { - MD5_Final(f1->md5, &c1); - f1->md5_computed = 1; - } - if(!f2->md5_computed) { - MD5_Final(f2->md5, &c2); - f2->md5_computed = 1; - } - } -#endif return 1; } else { if(memcmp(buffer1, buffer2, s1)) { + /* printf("size_to_read = %d\n", size_to_read); */ close(fd1); close(fd2); return 0; } -#ifdef WITH_MD5 - if(use_md5) { - if(!f1->md5_computed) { - MD5_Update(&c1, buffer1, s1); - } - if(!f2->md5_computed) { - MD5_Update(&c2, buffer2, s2); - } - } -#endif } } else { fprintf(stderr, @@ -278,12 +231,12 @@ struct file_node *scan_directory(struct file_node *tail, const char *name) { tmp->next = tail; tmp->name = strdup(name); tmp->size = sb.st_size; + tmp->atime = sb.st_atime; + tmp->mtime = sb.st_mtime; + tmp->ctime = sb.st_ctime; tmp->inode = sb.st_ino; tmp->group_id = -1; tmp->dir_id = -1; -#ifdef WITH_MD5 - tmp->md5_computed = 0; -#endif tail = tmp; } } @@ -328,12 +281,22 @@ int compare_nodes(const void *x1, const void *x2) { } else if((*f1)->group_id > (*f2)->group_id) { return 1; } else { - if((*f1)->dir_id < (*f2)->dir_id) { - return -1; - } else if((*f1)->dir_id > (*f2)->dir_id) { - return 1; + if(sort_by_time) { + if((*f1)->mtime < (*f2)->mtime) { + return -1; + } else if((*f1)->mtime > (*f2)->mtime) { + return 1; + } else { + return 0; + } } else { - return 0; + if((*f1)->dir_id < (*f2)->dir_id) { + return -1; + } else if((*f1)->dir_id > (*f2)->dir_id) { + return 1; + } else { + return 0; + } } } } @@ -557,9 +520,9 @@ void start(const char *dirname1, const char *dirname2) { } void usage(FILE *out) { - fprintf(out, "Usage: finddup [OPTION]... DIR1 [[and:|not:]DIR2]\n"); + fprintf(out, "Usage: finddup [OPTION]... [DIR1 [[and:|not:]DIR2]]\n"); fprintf(out, "Version %s (%s)\n", VERSION_NUMBER, UNAME); - fprintf(out, "Without DIR2, lists duplicated files found in DIR1. With DIR2, lists files common to both directories. With the not: prefix, lists files found in DIR1 which do not exist in DIR2. The and: prefix is the default and should be used only if you have a directory starting with 'not:'\n"); + fprintf(out, "Without DIR2, lists duplicated files found in DIR1, or the current directory if DIR1 is not provided. With DIR2, lists files common to both directories. With the not: prefix, lists files found in DIR1 which do not exist in DIR2. The and: prefix is the default and should be used only if you have a directory starting with 'not:'\n"); fprintf(out, "\n"); /* 01234567890123456789012345678901234567890123456789012345678901234567890123456789*/ fprintf(out, " -h, --help show this help\n"); @@ -568,13 +531,11 @@ void usage(FILE *out) { fprintf(out, " -c, --hide-matchings do not show which files in DIR2 corresponds to\n"); fprintf(out, " those in DIR1\n"); fprintf(out, " -g, --no-group-ids do not show the file groups\n"); + fprintf(out, " -t, --time-sort sort according to modification time in each group\n"); fprintf(out, " -p, --show-progress show progress\n"); fprintf(out, " -r, --real-paths show the real file paths\n"); fprintf(out, " -i, --same-inodes-are-different\n"); fprintf(out, " consider files with same inode as different\n"); -#ifdef WITH_MD5 - fprintf(out, " -m, --md5 use MD5 hashing\n"); -#endif fprintf(out, "\n"); fprintf(out, "Report bugs and comments to .\n"); } @@ -587,10 +548,10 @@ static struct option long_options[] = { { "real-paths", no_argument, 0, 'r' }, { "hide-matchings", no_argument, 0, 'c' }, { "no-group-ids", no_argument, 0, 'g' }, + { "time-sort", no_argument, 0, 't' }, { "ignore-dots", no_argument, 0, 'd' }, { "ignore-empty", no_argument, 0, '0' }, { "show-progress", no_argument, 0, 'p' }, - { "md5", no_argument, 0, 'm' }, { 0, 0, 0, 0 } }; @@ -599,7 +560,7 @@ int main(int argc, char **argv) { setlocale (LC_ALL, ""); - while ((c = getopt_long(argc, argv, "hircgd0pm", + while ((c = getopt_long(argc, argv, "hircgtd0pm", long_options, NULL)) != -1) { switch (c) { @@ -629,6 +590,10 @@ int main(int argc, char **argv) { show_groups = 0; break; + case 't': + sort_by_time = 1; + break; + case 'p': show_progress = 1; break; @@ -637,17 +602,6 @@ int main(int argc, char **argv) { show_hits = 0; break; - case 'm': -#ifdef WITH_MD5 - use_md5 = 1; -#else - fprintf(stderr, - "finddup has not been compiled with MD5 hashing.\n"); - usage(stderr); - exit(EXIT_FAILURE); -#endif - break; - default: usage(stderr); exit(EXIT_FAILURE); @@ -663,6 +617,9 @@ int main(int argc, char **argv) { } else if(optind + 1 == argc) { same_inodes_are_different = 1; start(argv[optind], 0); + } else if(optind == argc) { + same_inodes_are_different = 1; + start(".", 0); } else { usage(stderr); exit(EXIT_FAILURE);