From 630c461cbdb6610420863dcfe7af247d62bf004e Mon Sep 17 00:00:00 2001 From: Francois Fleuret Date: Wed, 10 Mar 2010 19:15:54 +0100 Subject: [PATCH] Added the -0 option to ignore empty files. --- finddup.1 | 3 +++ finddup.c | 57 +++++++++++++++++++++++++++++++++++-------------------- 2 files changed, 39 insertions(+), 21 deletions(-) diff --git a/finddup.1 b/finddup.1 index 71e0e57..e021a81 100644 --- a/finddup.1 +++ b/finddup.1 @@ -38,6 +38,9 @@ display help and exit \fB-d\fR ignore files and directories starting with a dot .TP +\fB-0\fR +ignore empty files +.TP \fB-c\fR do not show which files from DIR2 corresponds to files from DIR1 (hence, show only the files from DIR1 which have an identical twin in diff --git a/finddup.c b/finddup.c index adf138e..bca9c5c 100644 --- a/finddup.c +++ b/finddup.c @@ -42,6 +42,7 @@ #include #define BUFFER_SIZE 4096 +#define LARGE_BUFFER_SIZE 65536 typedef int64_t size_sum_t; @@ -50,6 +51,8 @@ typedef int64_t size_sum_t; int ignore_dotfiles = 0; /* 1 means ignore files and directories starting with a dot */ +int ignore_empty_files = 0; /* 1 means ignore empty files */ + int show_realpaths = 0; /* 1 means ignore files and directories starting with a dot */ @@ -135,15 +138,15 @@ int file_list_length(struct file_with_size *head) { int same_content(struct file_with_size *f1, struct file_with_size *f2) { int fd1, fd2, s1, s2; - char buffer1[BUFFER_SIZE], buffer2[BUFFER_SIZE]; + char buffer1[LARGE_BUFFER_SIZE], buffer2[LARGE_BUFFER_SIZE]; fd1 = open(f1->filename, O_RDONLY); fd2 = open(f2->filename, O_RDONLY); if(fd1 >= 0 && fd2 >= 0) { while(1) { - s1 = read(fd1, buffer1, BUFFER_SIZE); - s2 = read(fd2, buffer2, BUFFER_SIZE); + s1 = read(fd1, buffer1, LARGE_BUFFER_SIZE); + s2 = read(fd2, buffer2, LARGE_BUFFER_SIZE); if(s1 < 0 || s2 < 0) { close(fd1); @@ -226,13 +229,17 @@ struct file_with_size *scan_directory(struct file_with_size *tail, closedir(dir); } else { if(S_ISREG(sb.st_mode)) { - tmp = safe_malloc(sizeof(struct file_with_size)); - tmp->next = tail; - tmp->filename = strdup(name); - tmp->size = sb.st_size; - tmp->inode = sb.st_ino; - tmp->group_id = -1; - tail = tmp; + if(!ignore_entry(name)) { + if(!ignore_empty_files || sb.st_size > 0) { + tmp = safe_malloc(sizeof(struct file_with_size)); + tmp->next = tail; + tmp->filename = strdup(name); + tmp->size = sb.st_size; + tmp->inode = sb.st_ino; + tmp->group_id = -1; + tail = tmp; + } + } } } @@ -392,17 +399,19 @@ void start(const char *dirname1, const char *dirname2) { } for(node2 = list2; node2; node2 = node2->next) { - if(same_files(node1, node2)) { - if(node1->group_id < 0) { - if(node2->group_id >= 0) { - node1->group_id = node2->group_id; - } else { - node1->group_id = k; - k++; + if(node1->group_id < 0 || node2->group_id < 0) { + if(same_files(node1, node2)) { + if(node1->group_id < 0) { + if(node2->group_id >= 0) { + node1->group_id = node2->group_id; + } else { + node1->group_id = k; + k++; + } + } + if(node2->group_id < 0) { + node2->group_id = node1->group_id; } - } - if(node2->group_id < 0) { - node2->group_id = node1->group_id; } } } @@ -412,6 +421,7 @@ void start(const char *dirname1, const char *dirname2) { print_result(list1, list2); file_list_delete(list1); + if(dirname2) { file_list_delete(list2); } @@ -424,6 +434,7 @@ void print_help(FILE *out) { fprintf(out, "\n"); fprintf(out, " -h show this help\n"); fprintf(out, " -d ignore dot files and directories\n"); + fprintf(out, " -0 ignore empty files\n"); fprintf(out, " -c do not show which files in DIR2 corresponds to those in DIR1\n"); fprintf(out, " -g do not show the file groups\n"); fprintf(out, " -p show progress\n"); @@ -444,7 +455,7 @@ int main(int argc, char **argv) { setlocale (LC_ALL, ""); while (1) { - c = getopt(argc, argv, "hrcgdp"); + c = getopt(argc, argv, "hrcgd0p"); if (c == -1) break; @@ -460,6 +471,10 @@ int main(int argc, char **argv) { ignore_dotfiles = 1; break; + case '0': + ignore_empty_files = 1; + break; + case 'r': show_realpaths = 1; break; -- 2.20.1