X-Git-Url: https://www.fleuret.org/cgi-bin/gitweb/gitweb.cgi?a=blobdiff_plain;f=mymail.c;h=cc552613bf4e63a3be380ce18c7662bcfef59370;hb=bfdc0f26c78c8a47c7ddb23998dbc6ab8cc2384f;hp=bac2f45ebb0ad3fa1bd7b71451933c624a756fd4;hpb=cbe29e579b66cf31ec64c142286e7e2b865d84d7;p=mymail.git diff --git a/mymail.c b/mymail.c index bac2f45..cc55261 100644 --- a/mymail.c +++ b/mymail.c @@ -46,19 +46,23 @@ #include #define MYMAIL_DB_MAGIC_TOKEN "mymail_index_file" -#define VERSION "0.9.1" +#define VERSION "0.9.2" #define MAX_NB_SEARCH_CONDITIONS 10 #define BUFFER_SIZE 65536 #define TOKEN_BUFFER_SIZE 1024 +#define LEADING_FROM_LINE_REGEXP "^From [^ ]* \\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\) \\(Jan\\|Feb\\|Mar\\|Apr\\|May\\|Jun\\|Jul\\|Aug\\|Sep\\|Oct\\|Nov\\|Dec\\) [ 123][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9]\n$" + regex_t leading_from_line_regexp; /* Global variables! */ int paranoid; int quiet; +char *default_search_field; +int ignore_dot_files; /********************************************************************/ @@ -100,6 +104,7 @@ struct search_condition { struct parsable_field { int id; + int cflags; char *regexp_string; regex_t regexp; }; @@ -107,31 +112,36 @@ struct parsable_field { static struct parsable_field fields_to_parse[] = { { ID_LEADING_LINE, + 0, "^From ", { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { ID_FROM, - "^\\([Ff][Rr][Oo][Mm]:\\|[Rr][Ee][Pp][Ll][Yy]-[Tt][Oo]:\\|[Ss][Ee][Nn][Dd][Ee][Rr]:\\)", + REG_ICASE, + "^\\(from\\|reply-to\\|sender\\|return-path\\): ", { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { ID_TO, - "^\\([Tt][Oo]\\|[Cc][Cc]\\|[Bb][Cc][Cc]\\): ", + REG_ICASE, + "^\\(to\\|cc\\|bcc\\): ", { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { ID_SUBJECT, - "^[Ss][Uu][Bb][Jj][Ee][Cc][Tt]: ", + REG_ICASE, + "^subject: ", { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { ID_DATE, - "^[Dd][Aa][Tt][Ee]: ", + REG_ICASE, + "^date: ", { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -155,11 +165,6 @@ char *parse_token(char *token_buffer, size_t token_buffer_size, return string; } -void remove_eof(char *c) { - while(*c && *c != '\n' && *c != '\r') c++; - *c = '\0'; -} - /********************************************************************/ /* malloc with error checking. */ @@ -206,6 +211,8 @@ void print_usage(FILE *out) { fprintf(out, " index mails\n"); fprintf(out, " -o , --output \n"); fprintf(out, " set the result file, use stdout if unset\n"); + fprintf(out, " -a , --default-search \n"); + fprintf(out, " set the default search field\n"); } /*********************************************************************/ @@ -224,14 +231,26 @@ time_t time_for_past_day(int day) { int ignore_entry(const char *name) { return - /* strcmp(name, ".") == 0 || */ - /* strcmp(name, "..") == 0 || */ - (name[0] == '.' && name[1] != '/'); + strcmp(name, ".") == 0 || + strcmp(name, "..") == 0 || + (ignore_dot_files && name[0] == '.' && name[1] != '/'); } -int is_a_leading_from_line(char *s) { - return strncmp(s, "From ", 5) == 0 && - regexec(&leading_from_line_regexp, s, 0, 0, 0) == 0; +int is_a_leading_from_line(char *mbox_line) { + return + + /* + + The mbox man page in qmail documentation states: + + > The reader should not attempt to take advantage of the fact + > that every From_ line (past the beginning of the file) is + > preceded by a blank line. + + */ + + strncmp(mbox_line, "From ", 5) == 0 && + regexec(&leading_from_line_regexp, mbox_line, 0, 0, 0) == 0; } int mbox_line_match_search(struct search_condition *condition, @@ -270,7 +289,9 @@ int mbox_line_match_search(struct search_condition *condition, (condition->field_id == ID_FROM && mbox_id == ID_LEADING_LINE) ) + && + regexec(&condition->regexp, mbox_value, 0, 0, 0) == 0; } } @@ -281,7 +302,6 @@ void update_body_hits(char *mail_filename, int position_in_mail, int *hits) { FILE *mail_file; int header, n; - int last_mbox_line_was_empty; char raw_mbox_line[BUFFER_SIZE]; int nb_body_hits; @@ -301,9 +321,10 @@ void update_body_hits(char *mail_filename, int position_in_mail, if(fgets(raw_mbox_line, BUFFER_SIZE, mail_file)) { while(nb_body_hits < nb_body_conditions) { - last_mbox_line_was_empty = (raw_mbox_line[0] == '\n'); + /* last_mbox_line_was_empty = (raw_mbox_line[0] == '\n'); */ + /* if(last_mbox_line_was_empty) { header = 0; } */ - if(last_mbox_line_was_empty) { header = 0; } + if(raw_mbox_line[0] == '\n') { header = 0; } if(!header) { for(n = 0; n < nb_search_conditions; n++) { @@ -318,9 +339,38 @@ void update_body_hits(char *mail_filename, int position_in_mail, } if(!fgets(raw_mbox_line, BUFFER_SIZE, mail_file) || - (last_mbox_line_was_empty && - is_a_leading_from_line(raw_mbox_line))) + (is_a_leading_from_line(raw_mbox_line))) + break; + } + } + + fclose(mail_file); +} + +void write_mail(const char *mail_filename, unsigned long int position_in_mail, + FILE *output_file) { + char raw_mbox_line[BUFFER_SIZE]; + FILE *mail_file; + + mail_file = fopen(mail_filename, "r"); + + if(!mail_file) { + fprintf(stderr, + "mymail: Cannot open mbox '%s' for mail extraction.\n", + mail_filename); + exit(EXIT_FAILURE); + } + + fseek(mail_file, position_in_mail, SEEK_SET); + + if(fgets(raw_mbox_line, BUFFER_SIZE, mail_file)) { + fprintf(output_file, "%s", raw_mbox_line); + while(1) { + if(!fgets(raw_mbox_line, BUFFER_SIZE, mail_file) || + (is_a_leading_from_line(raw_mbox_line)) + ) break; + fprintf(output_file, "%s", raw_mbox_line); } } @@ -334,13 +384,11 @@ void search_in_db(const char *db_filename, int hits[MAX_NB_SEARCH_CONDITIONS]; char raw_db_line[BUFFER_SIZE]; - char raw_mbox_line[BUFFER_SIZE]; char current_mail_filename[PATH_MAX + 1]; unsigned long int current_position_in_mail; char mbox_name[TOKEN_BUFFER_SIZE], *mbox_value; int mbox_id; int already_written, m, n; - int last_mbox_line_was_empty; int nb_body_conditions, nb_fulfilled_body_conditions; FILE *db_file; @@ -359,6 +407,8 @@ void search_in_db(const char *db_filename, exit(EXIT_FAILURE); } + /* First, check the db file leading line integrity */ + if(fgets(raw_db_line, BUFFER_SIZE, db_file)) { if(strncmp(raw_db_line, MYMAIL_DB_MAGIC_TOKEN, strlen(MYMAIL_DB_MAGIC_TOKEN))) { fprintf(stderr, @@ -373,6 +423,8 @@ void search_in_db(const char *db_filename, exit(EXIT_FAILURE); } + /* Then parse the said db file */ + current_position_in_mail = 0; already_written = 0; @@ -403,8 +455,7 @@ void search_in_db(const char *db_filename, if(n == nb_search_conditions) { - /* all conditions but the body ones are fine, check the body - ones */ + /* Now check the body ones */ if(nb_body_conditions > 0) { update_body_hits(current_mail_filename, current_position_in_mail, @@ -423,34 +474,7 @@ void search_in_db(const char *db_filename, } if(nb_body_conditions == nb_fulfilled_body_conditions) { - FILE *mail_file; - - mail_file = fopen(current_mail_filename, "r"); - - if(!mail_file) { - fprintf(stderr, - "mymail: Cannot open mbox '%s' for mail extraction.\n", - current_mail_filename); - exit(EXIT_FAILURE); - } - - fseek(mail_file, current_position_in_mail, SEEK_SET); - - if(fgets(raw_mbox_line, BUFFER_SIZE, mail_file)) { - last_mbox_line_was_empty = 0; - fprintf(output_file, "%s", raw_mbox_line); - while(1) { - if(!fgets(raw_mbox_line, BUFFER_SIZE, mail_file) || - (last_mbox_line_was_empty && - is_a_leading_from_line(raw_mbox_line)) - ) - break; - last_mbox_line_was_empty = (raw_mbox_line[0] == '\n'); - fprintf(output_file, "%s", raw_mbox_line); - } - } - - fclose(mail_file); + write_mail(current_mail_filename, current_position_in_mail, output_file); } } } @@ -458,9 +482,8 @@ void search_in_db(const char *db_filename, for(n = 0; n < nb_search_conditions; n++) { hits[n] = 0; } mbox_value = parse_token(position_in_file_string, TOKEN_BUFFER_SIZE, ' ', mbox_value); - mbox_value = parse_token(current_mail_filename, TOKEN_BUFFER_SIZE, ' ', mbox_value); + mbox_value = parse_token(current_mail_filename, TOKEN_BUFFER_SIZE, '\n', mbox_value); current_position_in_mail = atol(position_in_file_string); - remove_eof(current_mail_filename); already_written = 0; } @@ -548,7 +571,7 @@ void index_mbox(const char *mbox_filename, char raw_mbox_line[BUFFER_SIZE], full_line[BUFFER_SIZE]; char *end_of_full_line; FILE *file; - int in_header, new_header, last_mbox_line_was_empty; + int in_header, new_header; unsigned long int position_in_file; file = fopen(mbox_filename, "r"); @@ -565,11 +588,9 @@ void index_mbox(const char *mbox_filename, position_in_file = 0; end_of_full_line = 0; full_line[0] = '\0'; - last_mbox_line_was_empty = 1; while(fgets(raw_mbox_line, BUFFER_SIZE, file)) { - if(last_mbox_line_was_empty && - is_a_leading_from_line(raw_mbox_line)) { + if(is_a_leading_from_line(raw_mbox_line)) { if(in_header) { fprintf(stderr, "Got a ^\"From \" in the header in %s:%lu.\n", @@ -583,8 +604,6 @@ void index_mbox(const char *mbox_filename, if(in_header) { in_header = 0; } } - last_mbox_line_was_empty = (raw_mbox_line[0] == '\n'); - if(in_header) { if(new_header) { fprintf(db_file, "mail %lu %s\n", position_in_file, mbox_filename); @@ -683,16 +702,18 @@ static struct option long_options[] = { { "search", 1, 0, 's' }, { "index", 0, 0, 'i' }, { "output", 1, 0, 'o' }, + { "default-search", 1, 0, 'a' }, { 0, 0, 0, 0 } }; /*********************************************************************/ -void init_condition(struct search_condition *condition, char *string) { +void init_condition(struct search_condition *condition, char *full_string) { char full_search_field[TOKEN_BUFFER_SIZE], *search_field; int m; + char *string; - string = parse_token(full_search_field, TOKEN_BUFFER_SIZE, ' ', string); + string = parse_token(full_search_field, TOKEN_BUFFER_SIZE, ' ', full_string); search_field = full_search_field; if(search_field[0] == '!') { @@ -702,7 +723,7 @@ void init_condition(struct search_condition *condition, char *string) { condition->negation = 0; } - /* Last 8 hours */ + /* Recently */ if(strcmp(search_field, "8h") == 0) { condition->field_id = ID_INTERVAL; @@ -710,8 +731,6 @@ void init_condition(struct search_condition *condition, char *string) { condition->interval_stop = 0; } - /* Today and yesterday */ - else if(strcmp(search_field, "24h") == 0 || strcmp(search_field, "today") == 0) { condition->field_id = ID_INTERVAL; @@ -719,6 +738,26 @@ void init_condition(struct search_condition *condition, char *string) { condition->interval_stop = 0; } + else if(strcmp(search_field, "week") == 0) { + condition->field_id = ID_INTERVAL; + condition->interval_start = time(0) - 3600 * 24 * 7; + condition->interval_stop = 0; + } + + else if(strcmp(search_field, "month") == 0) { + condition->field_id = ID_INTERVAL; + condition->interval_start = time(0) - 3600 * 24 * 31; + condition->interval_stop = 0; + } + + else if(strcmp(search_field, "year") == 0) { + condition->field_id = ID_INTERVAL; + condition->interval_start = time(0) - 3600 * 24 * 365; + condition->interval_stop = 0; + } + + /* Yesterday */ + else if(strcmp(search_field, "yesterday") == 0) { condition->field_id = ID_INTERVAL; condition->interval_start = time(0) - 2 * 3600 * 24; @@ -770,6 +809,9 @@ void init_condition(struct search_condition *condition, char *string) { } else { + + /* header-related conditions */ + condition->field_id = -1; for(m = 0; (m < MAX_ID) && condition->field_id == -1; m++) { @@ -778,6 +820,18 @@ void init_condition(struct search_condition *condition, char *string) { } } + if(condition->field_id == -1) { + if(default_search_field) { + for(m = 0; (m < MAX_ID) && condition->field_id == -1; m++) { + if(strncmp(field_names[m], + default_search_field, strlen(default_search_field)) == 0) { + condition->field_id = m; + } + } + string = full_string; + } + } + if(condition->field_id == -1) { fprintf(stderr, "mymail: Syntax error in field name \"%s\".\n", @@ -814,7 +868,6 @@ int main(int argc, char **argv) { char *db_filename_list; char output_filename[PATH_MAX + 1]; int action_index; - int error = 0, show_help = 0; const int nb_fields_to_parse = sizeof(fields_to_parse) / sizeof(struct parsable_field); char c; @@ -823,9 +876,7 @@ int main(int argc, char **argv) { FILE *output_file; struct search_condition search_conditions[MAX_NB_SEARCH_CONDITIONS]; - if(regcomp(&leading_from_line_regexp, - "^From [^ ]* \\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\) \\(Jan\\|Feb\\|Mar\\|Apr\\|May\\|Jun\\|Jul\\|Aug\\|Sep\\|Oct\\|Nov\\|Dec\\) [ 123][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9]\n$", - 0)) { + if(regcomp(&leading_from_line_regexp, LEADING_FROM_LINE_REGEXP, 0)) { fprintf(stderr, "mymail: Cannot compile leading \"from\" line regexp. That is strange.\n"); exit(EXIT_FAILURE); @@ -834,15 +885,18 @@ int main(int argc, char **argv) { paranoid = 0; action_index = 0; db_filename = 0; + db_filename_regexp_string = 0; db_root_path = 0; db_filename_list = 0; quiet = 0; + default_search_field = 0; + ignore_dot_files = 1; setlocale(LC_ALL, ""); nb_search_conditions = 0; - while ((c = getopt_long(argc, argv, "hvqip:s:d:r:l:o:", + while ((c = getopt_long(argc, argv, "hvqip:s:d:r:l:o:a:", long_options, NULL)) != -1) { switch(c) { @@ -892,6 +946,10 @@ int main(int argc, char **argv) { nb_search_conditions++; break; + case 'a': + default_search_field = optarg; + break; + default: error = 1; break; @@ -975,7 +1033,7 @@ int main(int argc, char **argv) { for(f = 0; f < nb_fields_to_parse; f++) { if(regcomp(&fields_to_parse[f].regexp, fields_to_parse[f].regexp_string, - REG_ICASE)) { + fields_to_parse[f].cflags)) { fprintf(stderr, "mymail: Syntax error in regexp \"%s\" for field \"%s\".\n", fields_to_parse[f].regexp_string,