X-Git-Url: https://www.fleuret.org/cgi-bin/gitweb/gitweb.cgi?a=blobdiff_plain;f=mymail.c;h=f9b0148dcdd2cd916605589986fe689fb9da360c;hb=bde6de220923d308cd3c645bbbcc776d7695cc3a;hp=f2397a6e25ada21942a8f46c70714f6b0cfbc9d6;hpb=f1d64e2e4f76ccc8148c58ea865fe3e0af017440;p=mymail.git diff --git a/mymail.c b/mymail.c index f2397a6..f9b0148 100644 --- a/mymail.c +++ b/mymail.c @@ -25,7 +25,7 @@ directories containing mbox files, and create a db file containing for each mail a list of fields computed from the header, or (2) read such a db file and get all the mails matching regexp-defined - conditions on the fields. + conditions on the fields, to create a resulting mbox file. It is low-tech, simple, light and fast. @@ -46,26 +46,23 @@ #include #define MYMAIL_DB_MAGIC_TOKEN "mymail_index_file" -#define VERSION "0.9.1" +#define VERSION "0.9.2" -#define MAX_NB_SEARCH_CONDITIONS 10 +#define MAX_NB_SEARCH_CONDITIONS 32 #define BUFFER_SIZE 65536 #define TOKEN_BUFFER_SIZE 1024 +#define LEADING_FROM_LINE_REGEXP "^From [^ ]* \\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\) \\(Jan\\|Feb\\|Mar\\|Apr\\|May\\|Jun\\|Jul\\|Aug\\|Sep\\|Oct\\|Nov\\|Dec\\) [ 123][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9]\n$" + regex_t leading_from_line_regexp; -char *db_filename; -char *db_filename_regexp_string; -char *db_root_path; -char *db_filename_list; -char output_filename[PATH_MAX + 1]; +/* Global variables! */ int paranoid; -int action_index; int quiet; - -time_t being_today; +char *default_search_field; +int ignore_dot_files; /********************************************************************/ @@ -107,6 +104,7 @@ struct search_condition { struct parsable_field { int id; + int cflags; char *regexp_string; regex_t regexp; }; @@ -114,31 +112,36 @@ struct parsable_field { static struct parsable_field fields_to_parse[] = { { ID_LEADING_LINE, + 0, "^From ", { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { ID_FROM, - "^\\([Ff][Rr][Oo][Mm]:\\|[Rr][Ee][Pp][Ll][Yy]-[Tt][Oo]:\\|[Ss][Ee][Nn][Dd][Ee][Rr]:\\)", + REG_ICASE, + "^\\(from\\|reply-to\\|sender\\|return-path\\): ", { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { ID_TO, - "^\\([Tt][Oo]\\|[Cc][Cc]\\|[Bb][Cc][Cc]\\): ", + REG_ICASE, + "^\\(to\\|cc\\|bcc\\): ", { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { ID_SUBJECT, - "^[Ss][Uu][Bb][Jj][Ee][Cc][Tt]: ", + REG_ICASE, + "^subject: ", { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { ID_DATE, - "^[Dd][Aa][Tt][Ee]: ", + REG_ICASE, + "^date: ", { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -162,11 +165,6 @@ char *parse_token(char *token_buffer, size_t token_buffer_size, return string; } -void remove_eof(char *c) { - while(*c && *c != '\n' && *c != '\r') c++; - *c = '\0'; -} - /********************************************************************/ /* malloc with error checking. */ @@ -213,20 +211,46 @@ void print_usage(FILE *out) { fprintf(out, " index mails\n"); fprintf(out, " -o , --output \n"); fprintf(out, " set the result file, use stdout if unset\n"); + fprintf(out, " -a , --default-search \n"); + fprintf(out, " set the default search field\n"); +} + +/*********************************************************************/ + +time_t time_for_past_day(int day) { + time_t t; + struct tm *tm; + int delta_day; + t = time(0); + tm = localtime(&t); + delta_day = (7 + tm->tm_wday - day) % 7 + 1; + return t - delta_day * 3600 * 24 + tm->tm_sec + 60 * tm->tm_min + 3600 * tm->tm_hour; } /*********************************************************************/ int ignore_entry(const char *name) { return - /* strcmp(name, ".") == 0 || */ - /* strcmp(name, "..") == 0 || */ - (name[0] == '.' && name[1] != '/'); + strcmp(name, ".") == 0 || + strcmp(name, "..") == 0 || + (ignore_dot_files && name[0] == '.' && name[1] != '/'); } -int is_a_leading_from_line(char *s) { - return strncmp(s, "From ", 5) == 0 && - regexec(&leading_from_line_regexp, s, 0, 0, 0) == 0; +int is_a_leading_from_line(char *mbox_line) { + return + + /* + + The mbox man page in qmail documentation states: + + > The reader should not attempt to take advantage of the fact + > that every From_ line (past the beginning of the file) is + > preceded by a blank line. + + */ + + strncmp(mbox_line, "From ", 5) == 0 && + regexec(&leading_from_line_regexp, mbox_line, 0, 0, 0) == 0; } int mbox_line_match_search(struct search_condition *condition, @@ -265,7 +289,9 @@ int mbox_line_match_search(struct search_condition *condition, (condition->field_id == ID_FROM && mbox_id == ID_LEADING_LINE) ) + && + regexec(&condition->regexp, mbox_value, 0, 0, 0) == 0; } } @@ -276,7 +302,6 @@ void update_body_hits(char *mail_filename, int position_in_mail, int *hits) { FILE *mail_file; int header, n; - int last_mbox_line_was_empty; char raw_mbox_line[BUFFER_SIZE]; int nb_body_hits; @@ -296,9 +321,10 @@ void update_body_hits(char *mail_filename, int position_in_mail, if(fgets(raw_mbox_line, BUFFER_SIZE, mail_file)) { while(nb_body_hits < nb_body_conditions) { - last_mbox_line_was_empty = (raw_mbox_line[0] == '\n'); + /* last_mbox_line_was_empty = (raw_mbox_line[0] == '\n'); */ + /* if(last_mbox_line_was_empty) { header = 0; } */ - if(last_mbox_line_was_empty) { header = 0; } + if(raw_mbox_line[0] == '\n') { header = 0; } if(!header) { for(n = 0; n < nb_search_conditions; n++) { @@ -313,8 +339,7 @@ void update_body_hits(char *mail_filename, int position_in_mail, } if(!fgets(raw_mbox_line, BUFFER_SIZE, mail_file) || - (last_mbox_line_was_empty && - is_a_leading_from_line(raw_mbox_line))) + (is_a_leading_from_line(raw_mbox_line))) break; } } @@ -322,22 +347,53 @@ void update_body_hits(char *mail_filename, int position_in_mail, fclose(mail_file); } -void search_in_db(const char *db_filename, - int nb_search_conditions, - struct search_condition *search_conditions, - FILE *output_file) { +void extract_mail(const char *mail_filename, unsigned long int position_in_mail, + FILE *output_file) { + char raw_mbox_line[BUFFER_SIZE]; + FILE *mail_file; + + mail_file = fopen(mail_filename, "r"); + + if(!mail_file) { + fprintf(stderr, + "mymail: Cannot open mbox '%s' for mail extraction.\n", + mail_filename); + exit(EXIT_FAILURE); + } + + fseek(mail_file, position_in_mail, SEEK_SET); + + if(fgets(raw_mbox_line, BUFFER_SIZE, mail_file)) { + fprintf(output_file, "%s", raw_mbox_line); + while(1) { + if(!fgets(raw_mbox_line, BUFFER_SIZE, mail_file) || + (is_a_leading_from_line(raw_mbox_line)) + ) + break; + fprintf(output_file, "%s", raw_mbox_line); + } + } + + fclose(mail_file); +} + +int search_in_db(const char *db_filename, + int nb_search_conditions, + struct search_condition *search_conditions, + FILE *output_file) { int hits[MAX_NB_SEARCH_CONDITIONS]; char raw_db_line[BUFFER_SIZE]; - char raw_mbox_line[BUFFER_SIZE]; char current_mail_filename[PATH_MAX + 1]; unsigned long int current_position_in_mail; char mbox_name[TOKEN_BUFFER_SIZE], *mbox_value; int mbox_id; int already_written, m, n; - int last_mbox_line_was_empty; int nb_body_conditions, nb_fulfilled_body_conditions; FILE *db_file; + int nb_extracted_mails; + + nb_extracted_mails = 0; if(!quiet) { printf("Searching in '%s' ... ", db_filename); @@ -354,6 +410,8 @@ void search_in_db(const char *db_filename, exit(EXIT_FAILURE); } + /* First, check the db file leading line integrity */ + if(fgets(raw_db_line, BUFFER_SIZE, db_file)) { if(strncmp(raw_db_line, MYMAIL_DB_MAGIC_TOKEN, strlen(MYMAIL_DB_MAGIC_TOKEN))) { fprintf(stderr, @@ -368,6 +426,8 @@ void search_in_db(const char *db_filename, exit(EXIT_FAILURE); } + /* Then parse the said db file */ + current_position_in_mail = 0; already_written = 0; @@ -398,8 +458,7 @@ void search_in_db(const char *db_filename, if(n == nb_search_conditions) { - /* all conditions but the body ones are fine, check the body - ones */ + /* Now check the body ones */ if(nb_body_conditions > 0) { update_body_hits(current_mail_filename, current_position_in_mail, @@ -418,34 +477,8 @@ void search_in_db(const char *db_filename, } if(nb_body_conditions == nb_fulfilled_body_conditions) { - FILE *mail_file; - - mail_file = fopen(current_mail_filename, "r"); - - if(!mail_file) { - fprintf(stderr, - "mymail: Cannot open mbox '%s' for mail extraction.\n", - current_mail_filename); - exit(EXIT_FAILURE); - } - - fseek(mail_file, current_position_in_mail, SEEK_SET); - - if(fgets(raw_mbox_line, BUFFER_SIZE, mail_file)) { - last_mbox_line_was_empty = 0; - fprintf(output_file, "%s", raw_mbox_line); - while(1) { - if(!fgets(raw_mbox_line, BUFFER_SIZE, mail_file) || - (last_mbox_line_was_empty && - is_a_leading_from_line(raw_mbox_line)) - ) - break; - last_mbox_line_was_empty = (raw_mbox_line[0] == '\n'); - fprintf(output_file, "%s", raw_mbox_line); - } - } - - fclose(mail_file); + nb_extracted_mails++; + extract_mail(current_mail_filename, current_position_in_mail, output_file); } } } @@ -453,9 +486,8 @@ void search_in_db(const char *db_filename, for(n = 0; n < nb_search_conditions; n++) { hits[n] = 0; } mbox_value = parse_token(position_in_file_string, TOKEN_BUFFER_SIZE, ' ', mbox_value); - mbox_value = parse_token(current_mail_filename, TOKEN_BUFFER_SIZE, ' ', mbox_value); + mbox_value = parse_token(current_mail_filename, TOKEN_BUFFER_SIZE, '\n', mbox_value); current_position_in_mail = atol(position_in_file_string); - remove_eof(current_mail_filename); already_written = 0; } @@ -479,16 +511,19 @@ void search_in_db(const char *db_filename, printf("done.\n"); fflush(stdout); } + + return nb_extracted_mails; } -void recursive_search_in_db(const char *entry_name, regex_t *db_filename_regexp, - int nb_search_conditions, - struct search_condition *search_conditions, - FILE *output_file) { +int recursive_search_in_db(const char *entry_name, regex_t *db_filename_regexp, + int nb_search_conditions, + struct search_condition *search_conditions, + FILE *output_file) { DIR *dir; struct dirent *dir_e; struct stat sb; char subname[PATH_MAX + 1]; + int nb_extracted_mails = 0; if(lstat(entry_name, &sb) != 0) { fprintf(stderr, @@ -504,9 +539,9 @@ void recursive_search_in_db(const char *entry_name, regex_t *db_filename_regexp, while((dir_e = readdir(dir))) { if(!ignore_entry(dir_e->d_name)) { snprintf(subname, PATH_MAX, "%s/%s", entry_name, dir_e->d_name); - recursive_search_in_db(subname, db_filename_regexp, - nb_search_conditions, search_conditions, - output_file); + nb_extracted_mails += recursive_search_in_db(subname, db_filename_regexp, + nb_search_conditions, search_conditions, + output_file); } } closedir(dir); @@ -517,9 +552,12 @@ void recursive_search_in_db(const char *entry_name, regex_t *db_filename_regexp, while(*s) { if(*s == '/') { filename = s+1; } s++; } if(regexec(db_filename_regexp, filename, 0, 0, 0) == 0) { - search_in_db(entry_name, nb_search_conditions, search_conditions, output_file); + nb_extracted_mails += + search_in_db(entry_name, nb_search_conditions, search_conditions, output_file); } } + + return nb_extracted_mails; } /*********************************************************************/ @@ -543,7 +581,7 @@ void index_mbox(const char *mbox_filename, char raw_mbox_line[BUFFER_SIZE], full_line[BUFFER_SIZE]; char *end_of_full_line; FILE *file; - int in_header, new_header, last_mbox_line_was_empty; + int in_header, new_header; unsigned long int position_in_file; file = fopen(mbox_filename, "r"); @@ -560,11 +598,9 @@ void index_mbox(const char *mbox_filename, position_in_file = 0; end_of_full_line = 0; full_line[0] = '\0'; - last_mbox_line_was_empty = 1; while(fgets(raw_mbox_line, BUFFER_SIZE, file)) { - if(last_mbox_line_was_empty && - is_a_leading_from_line(raw_mbox_line)) { + if(is_a_leading_from_line(raw_mbox_line)) { if(in_header) { fprintf(stderr, "Got a ^\"From \" in the header in %s:%lu.\n", @@ -578,8 +614,6 @@ void index_mbox(const char *mbox_filename, if(in_header) { in_header = 0; } } - last_mbox_line_was_empty = (raw_mbox_line[0] == '\n'); - if(in_header) { if(new_header) { fprintf(db_file, "mail %lu %s\n", position_in_file, mbox_filename); @@ -678,16 +712,18 @@ static struct option long_options[] = { { "search", 1, 0, 's' }, { "index", 0, 0, 'i' }, { "output", 1, 0, 'o' }, + { "default-search", 1, 0, 'a' }, { 0, 0, 0, 0 } }; /*********************************************************************/ -void init_condition(struct search_condition *condition, char *string) { +void init_condition(struct search_condition *condition, char *full_string) { char full_search_field[TOKEN_BUFFER_SIZE], *search_field; int m; + char *string; - string = parse_token(full_search_field, TOKEN_BUFFER_SIZE, ' ', string); + string = parse_token(full_search_field, TOKEN_BUFFER_SIZE, ' ', full_string); search_field = full_search_field; if(search_field[0] == '!') { @@ -697,9 +733,11 @@ void init_condition(struct search_condition *condition, char *string) { condition->negation = 0; } - if(strcmp(search_field, "6h") == 0) { + /* Recently */ + + if(strcmp(search_field, "8h") == 0) { condition->field_id = ID_INTERVAL; - condition->interval_start = time(0) - 3600 * 6; + condition->interval_start = time(0) - 3600 * 8; condition->interval_stop = 0; } @@ -710,13 +748,80 @@ void init_condition(struct search_condition *condition, char *string) { condition->interval_stop = 0; } + else if(strcmp(search_field, "week") == 0) { + condition->field_id = ID_INTERVAL; + condition->interval_start = time(0) - 3600 * 24 * 7; + condition->interval_stop = 0; + } + + else if(strcmp(search_field, "month") == 0) { + condition->field_id = ID_INTERVAL; + condition->interval_start = time(0) - 3600 * 24 * 31; + condition->interval_stop = 0; + } + + else if(strcmp(search_field, "year") == 0) { + condition->field_id = ID_INTERVAL; + condition->interval_start = time(0) - 3600 * 24 * 365; + condition->interval_stop = 0; + } + + /* Yesterday */ + else if(strcmp(search_field, "yesterday") == 0) { condition->field_id = ID_INTERVAL; condition->interval_start = time(0) - 2 * 3600 * 24; - condition->interval_stop = time(0) - 3600 * 24; + condition->interval_stop = condition->interval_start + 3600 * 24; + } + + /* Week days */ + + else if(strcmp(search_field, "monday") == 0) { + condition->field_id = ID_INTERVAL; + condition->interval_start = time_for_past_day(1); + condition->interval_stop = condition->interval_start + 3600 * 24; + } + + else if(strcmp(search_field, "tuesday") == 0) { + condition->field_id = ID_INTERVAL; + condition->interval_start = time_for_past_day(2); + condition->interval_stop = condition->interval_start + 3600 * 24; + } + + else if(strcmp(search_field, "wednesday") == 0) { + condition->field_id = ID_INTERVAL; + condition->interval_start = time_for_past_day(3); + condition->interval_stop = condition->interval_start + 3600 * 24; + } + + else if(strcmp(search_field, "thursday") == 0) { + condition->field_id = ID_INTERVAL; + condition->interval_start = time_for_past_day(4); + condition->interval_stop = condition->interval_start + 3600 * 24; + } + + else if(strcmp(search_field, "friday") == 0) { + condition->field_id = ID_INTERVAL; + condition->interval_start = time_for_past_day(5); + condition->interval_stop = condition->interval_start + 3600 * 24; + } + + else if(strcmp(search_field, "saturday") == 0) { + condition->field_id = ID_INTERVAL; + condition->interval_start = time_for_past_day(6); + condition->interval_stop = condition->interval_start + 3600 * 24; + } + + else if(strcmp(search_field, "sunday") == 0) { + condition->field_id = ID_INTERVAL; + condition->interval_start = time_for_past_day(7); + condition->interval_stop = condition->interval_start + 3600 * 24; } else { + + /* header-related conditions */ + condition->field_id = -1; for(m = 0; (m < MAX_ID) && condition->field_id == -1; m++) { @@ -725,6 +830,18 @@ void init_condition(struct search_condition *condition, char *string) { } } + if(condition->field_id == -1) { + if(default_search_field) { + for(m = 0; (m < MAX_ID) && condition->field_id == -1; m++) { + if(strncmp(field_names[m], + default_search_field, strlen(default_search_field)) == 0) { + condition->field_id = m; + } + } + string = full_string; + } + } + if(condition->field_id == -1) { fprintf(stderr, "mymail: Syntax error in field name \"%s\".\n", @@ -755,6 +872,12 @@ void free_condition(struct search_condition *condition) { /*********************************************************************/ int main(int argc, char **argv) { + char *db_filename; + char *db_filename_regexp_string; + char *db_root_path; + char *db_filename_list; + char output_filename[PATH_MAX + 1]; + int action_index; int error = 0, show_help = 0; const int nb_fields_to_parse = sizeof(fields_to_parse) / sizeof(struct parsable_field); char c; @@ -763,9 +886,7 @@ int main(int argc, char **argv) { FILE *output_file; struct search_condition search_conditions[MAX_NB_SEARCH_CONDITIONS]; - if(regcomp(&leading_from_line_regexp, - "^From [^ ]* \\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\) \\(Jan\\|Feb\\|Mar\\|Apr\\|May\\|Jun\\|Jul\\|Aug\\|Sep\\|Oct\\|Nov\\|Dec\\) [ 123][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9]\n$", - 0)) { + if(regcomp(&leading_from_line_regexp, LEADING_FROM_LINE_REGEXP, 0)) { fprintf(stderr, "mymail: Cannot compile leading \"from\" line regexp. That is strange.\n"); exit(EXIT_FAILURE); @@ -774,15 +895,18 @@ int main(int argc, char **argv) { paranoid = 0; action_index = 0; db_filename = 0; + db_filename_regexp_string = 0; db_root_path = 0; db_filename_list = 0; quiet = 0; + default_search_field = 0; + ignore_dot_files = 1; setlocale(LC_ALL, ""); nb_search_conditions = 0; - while ((c = getopt_long(argc, argv, "hvqip:s:d:r:l:o:", + while ((c = getopt_long(argc, argv, "hvqip:s:d:r:l:o:a:", long_options, NULL)) != -1) { switch(c) { @@ -832,6 +956,10 @@ int main(int argc, char **argv) { nb_search_conditions++; break; + case 'a': + default_search_field = optarg; + break; + default: error = 1; break; @@ -915,7 +1043,7 @@ int main(int argc, char **argv) { for(f = 0; f < nb_fields_to_parse; f++) { if(regcomp(&fields_to_parse[f].regexp, fields_to_parse[f].regexp_string, - REG_ICASE)) { + fields_to_parse[f].cflags)) { fprintf(stderr, "mymail: Syntax error in regexp \"%s\" for field \"%s\".\n", fields_to_parse[f].regexp_string, @@ -942,6 +1070,7 @@ int main(int argc, char **argv) { } else { + int nb_extracted_mails = 0; if(nb_search_conditions > 0) { @@ -958,9 +1087,9 @@ int main(int argc, char **argv) { exit(EXIT_FAILURE); } - recursive_search_in_db(db_root_path, &db_filename_regexp, - nb_search_conditions, search_conditions, - output_file); + nb_extracted_mails += recursive_search_in_db(db_root_path, &db_filename_regexp, + nb_search_conditions, search_conditions, + output_file); regfree(&db_filename_regexp); } @@ -977,7 +1106,8 @@ int main(int argc, char **argv) { s = parse_token(db_filename, PATH_MAX + 1, ';', s); if(db_filename[0]) { - search_in_db(db_filename, nb_search_conditions, search_conditions, output_file); + nb_extracted_mails += + search_in_db(db_filename, nb_search_conditions, search_conditions, output_file); } } } @@ -985,10 +1115,20 @@ int main(int argc, char **argv) { /* Search in all db files listed in the command arguments */ while(optind < argc) { - search_in_db(argv[optind], nb_search_conditions, search_conditions, output_file); + nb_extracted_mails += + search_in_db(argv[optind], nb_search_conditions, search_conditions, output_file); optind++; } } + + if(!quiet) { + if(nb_extracted_mails > 0) { + printf("Found %d matching mails.\n", nb_extracted_mails); + } else { + printf("No matching mail found.\n"); + } + } + } for(n = 0; n < nb_search_conditions; n++) {