X-Git-Url: https://www.fleuret.org/cgi-bin/gitweb/gitweb.cgi?a=blobdiff_plain;f=finddup.c;h=8d6e67af255088462aa88a7a9a548d8e9eba768d;hb=da07c4f78dce0021ed889ea0db818cb55d96ceed;hp=09c50d18a8f0bde786271818f2148c96dcbbbd62;hpb=00ffde6896006fe15051ef2f3036571c4d4b6404;p=finddup.git diff --git a/finddup.c b/finddup.c index 09c50d1..8d6e67a 100644 --- a/finddup.c +++ b/finddup.c @@ -1,7 +1,8 @@ /* - * finddup is a simple utility to display the files and directories - * according to their total disk occupancy. + * finddup is a simple utility find duplicated files, files common to + * several directories, or files present in one directory and not in + * another one. * * Copyright (c) 2010 Francois Fleuret * Written by Francois Fleuret @@ -22,6 +23,8 @@ * */ +#define VERSION_NUMBER "0.5" + #define _BSD_SOURCE #include @@ -46,19 +49,11 @@ typedef int64_t size_sum_t; int ignore_dotfiles = 0; /* 1 means ignore files and directories starting with a dot */ -int forced_width = 0; /* -1 means no width limit, strictly positive - means limit, 0 means not active */ - -int forced_height = 0; /* -1 means no height limit, strictly positive - means limit, 0 means not active */ - -int fancy_size_display = 0; /* 1 means to use floating values with K, M and G - as units */ - -int reverse_sorting = 0; /* 1 means to show the large ones first */ +int show_realpaths = 0; /* 1 means ignore files and directories + starting with a dot */ -int show_top = 0; /* 1 means to show the top of the sorted list - instead of the bottom */ +int show_progress = 1; /* 1 means show a progress bar when we are in a + tty */ /********************************************************************/ @@ -152,6 +147,8 @@ int same_content(struct file_with_size *f1, struct file_with_size *f2) { if(s1 == s2) { if(s1 == 0) { + close(fd1); + close(fd2); return 1; } else { if(strncmp(buffer1, buffer2, s1)) { @@ -167,6 +164,8 @@ int same_content(struct file_with_size *f1, struct file_with_size *f2) { } } } else { + if(fd1 >= 0) { close(fd1); } + if(fd2 >= 0) { close(fd2); } return 0; } } @@ -183,10 +182,10 @@ struct file_with_size *scan_directory(struct file_with_size *tail, struct dirent *dir_e; struct stat dummy; struct file_with_size *tmp; - char subname[BUFFER_SIZE]; + char subname[PATH_MAX]; if(lstat(name, &dummy) != 0) { - fprintf(stderr, "Can not stat %s: %s\n", name, strerror(errno)); + fprintf(stderr, "Can not stat \"%s\": %s\n", name, strerror(errno)); exit(EXIT_FAILURE); } @@ -199,7 +198,7 @@ struct file_with_size *scan_directory(struct file_with_size *tail, if(dir) { while((dir_e = readdir(dir))) { if(!ignore_entry(dir_e->d_name)) { - snprintf(subname, BUFFER_SIZE, "%s/%s", name, dir_e->d_name); + snprintf(subname, PATH_MAX, "%s/%s", name, dir_e->d_name); tail = scan_directory(tail, subname); } } @@ -221,13 +220,50 @@ struct file_with_size *scan_directory(struct file_with_size *tail, void start(const char *dirname1, const char *dirname2) { struct file_with_size *list1, *list2; struct file_with_size *node1, *node2; + int not_in, found; + + if(strncmp(dirname2, "not:", 4) == 0) { + not_in = 1; + dirname2 += 4; + } else { + not_in = 0; + } + list1 = scan_directory(0, dirname1); list2 = scan_directory(0, dirname2); - for(node1 = list1; node1; node1 = node1->next) { - for(node2 = list2; node2; node2 = node2->next) { - if(node1->inode != node2->inode && same_files(node1, node2)) { - printf("%s %s \n", node1->filename, node2->filename); + if(not_in) { + for(node1 = list1; node1; node1 = node1->next) { + found = 0; + + for(node2 = list2; !found && node2; node2 = node2->next) { + if(node1->inode != node2->inode && same_files(node1, node2)) { + found = 1; + } + } + + if(!found) { + if(show_realpaths) { + printf("%s\n", realpath(node1->filename, 0)); + } else { + printf("%s\n", node1->filename); + } + } + } + + } else { + + for(node1 = list1; node1; node1 = node1->next) { + for(node2 = list2; node2; node2 = node2->next) { + if(node1->inode != node2->inode && same_files(node1, node2)) { + if(show_realpaths) { + printf("%s %s\n", + realpath(node1->filename, 0), + realpath(node2->filename, 0)); + } else { + printf("%s %s\n", node1->filename, node2->filename); + } + } } } } @@ -236,6 +272,17 @@ void start(const char *dirname1, const char *dirname2) { file_list_delete(list2); } +void print_help(FILE *out) { + fprintf(out, "Usage: finddup [OPTION]... DIR1 [[not:]DIR2]\n"); + fprintf(out, "Version %s (%s)\n", VERSION_NUMBER, UNAME); + fprintf(out, "Without DIR2, lists duplicated files found in DIR1. With DIR2, lists files common to both directories. With the not: prefix, lists files found in DIR1 which do not exist in DIR2.\n"); + fprintf(out, "\n"); + fprintf(out, " -h show this help.\n"); + fprintf(out, " -r show the real file paths.\n"); + fprintf(out, "\n"); + fprintf(out, "Report bugs and comments to \n"); +} + /**********************************************************************/ int main(int argc, char **argv) { @@ -247,19 +294,22 @@ int main(int argc, char **argv) { setlocale (LC_ALL, ""); while (1) { - c = getopt(argc, argv, "h"); + c = getopt(argc, argv, "hr"); if (c == -1) break; switch (c) { case 'h': - printf("Usage: finddup [OPTION]... [FILE]...\n"); - printf("Report bugs and comments to \n"); + print_help(stdout); exit(EXIT_SUCCESS); break; + case 'r': + show_realpaths = 1; + break; + default: exit(EXIT_FAILURE); } @@ -267,8 +317,10 @@ int main(int argc, char **argv) { if(optind + 1 < argc) { start(argv[optind], argv[optind + 1]); + } else if(optind < argc) { + start(argv[optind], argv[optind]); } else { - fprintf(stderr, "%s [OPTIONS] \n", argv[0]); + print_help(stderr); exit(EXIT_FAILURE); }