X-Git-Url: https://www.fleuret.org/cgi-bin/gitweb/gitweb.cgi?p=mymail.git;a=blobdiff_plain;f=mymail.c;h=6fa18140c17872fb422026bbea7a120df621f085;hp=ff0bd45891378e80aa83439fc134b408bcc8628b;hb=406c3891838fc77131dbbbd29423b61259cd789d;hpb=376f3999a52654ea1480d9464e74225fbf7291fc diff --git a/mymail.c b/mymail.c index ff0bd45..6fa1814 100644 --- a/mymail.c +++ b/mymail.c @@ -21,11 +21,13 @@ /* - This command is a dumb mail indexer. It can either (1) scan - directories containing mbox files, and create a db file containing - for each mail a list of fields computed from the header, or (2) - read such a db file and get all the mails matching regexp-defined - conditions on the fields, to create a resulting mbox file. + mymail is a simple mail indexer. It can: + + (1) scan mbox files, and create a db file containing for each mail a + list of fields computed from its header. + + (2) read such a db file, gets all the mails matching regexp-defined + conditions on the fields, and generates a resulting mbox file. It is low-tech, simple, light and fast. @@ -46,7 +48,7 @@ #include #define MYMAIL_DB_MAGIC_TOKEN "mymail_index_file" -#define MYMAIL_VERSION "0.9.8" +#define MYMAIL_VERSION "0.9.9" #define MYMAIL_DB_FORMAT_VERSION 1 @@ -57,12 +59,20 @@ #define LEADING_FROM_LINE_REGEXP_STRING "^From .*\\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\) \\(Jan\\|Feb\\|Mar\\|Apr\\|May\\|Jun\\|Jul\\|Aug\\|Sep\\|Oct\\|Nov\\|Dec\\) [ 0123][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9]\n$" +/********************************************************************/ + +struct alias_node { + char *alias, *value; + struct alias_node *next; +}; + /* Global variables! */ int global_quiet; int global_use_leading_time; - +int global_nb_mails_max; regex_t global_leading_from_line_regexp; +struct alias_node *global_alias_list; /********************************************************************/ @@ -76,6 +86,8 @@ enum { ID_PARTICIPANT, ID_BODY, ID_TIME_INTERVAL, + ID_MAIL_ID, + ID_REFERENCE_ID, MAX_ID }; @@ -88,7 +100,9 @@ static char *field_keys[] = { "date", "part", "body", - "interval" + "interval", + "mailid", + "references" }; /********************************************************************/ @@ -102,15 +116,6 @@ struct search_condition { /********************************************************************/ -struct alias_node { - char *alias, *value; - struct alias_node *next; -}; - -struct alias_node *global_alias_list; - -/********************************************************************/ - struct parsable_field { int id; int cflags; @@ -154,6 +159,20 @@ static struct parsable_field fields_to_parse[] = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { + ID_MAIL_ID, + REG_ICASE, + "^message-id: ", + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } + }, + + { + ID_REFERENCE_ID, + REG_ICASE, + "^\\(in-reply-to\\|references\\): ", + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } + }, + }; /********************************************************************/ @@ -201,7 +220,7 @@ void *safe_malloc(size_t n) { void *p = malloc(n); if(!p && n != 0) { fprintf(stderr, - "selector: cannot allocate memory: %s\n", strerror(errno)); + "mymail: cannot allocate memory: %s\n", strerror(errno)); exit(EXIT_FAILURE); } return p; @@ -213,8 +232,9 @@ FILE *safe_fopen(const char *path, const char *mode, const char *comment) { return result; } else { fprintf(stderr, - "mymail: Cannot open file '%s' (%s) with mode \"%s\".\n", - path, comment, mode); + "mymail: Cannot open file '%s' (%s) with mode \"%s\": %s\n", + path, comment, mode, + strerror(errno)); exit(EXIT_FAILURE); } } @@ -256,6 +276,8 @@ void print_usage(FILE *out) { fprintf(out, " index mails\n"); fprintf(out, " -o , --output \n"); fprintf(out, " set the result file, use stdout if unset\n"); + fprintf(out, " -n , --nb-mails-max \n"); + fprintf(out, " set the maximum number of mails to extract\n"); fprintf(out, " -a , --default-search \n"); fprintf(out, " set the default search field\n"); } @@ -439,6 +461,7 @@ void update_time(int db_key, const char *db_value, time_t *t) { } int search_in_db(const char *db_filename, + int nb_extracted_mails, int nb_search_conditions, struct search_condition *search_conditions, FILE *output_file) { @@ -453,13 +476,10 @@ int search_in_db(const char *db_filename, int db_key; int hits[MAX_NB_SEARCH_CONDITIONS]; int nb_body_conditions, need_time; - int nb_extracted_mails; time_t mail_time; int m, n; - nb_extracted_mails = 0; - if(!global_quiet) { printf("Searching in '%s' ... ", db_filename); fflush(stdout); @@ -504,7 +524,14 @@ int search_in_db(const char *db_filename, strcpy(current_mail_filename, ""); - while(fgets(raw_db_line, BUFFER_SIZE, db_file)) { + while(nb_extracted_mails < global_nb_mails_max && + fgets(raw_db_line, BUFFER_SIZE, db_file)) { + + /* Removes the CR */ + char *s = raw_db_line; + while(*s && *s != '\n') { s++; } + *s = '\0'; + db_value = parse_token(db_key_string, TOKEN_BUFFER_SIZE, ' ', raw_db_line); if(strcmp("mail", db_key_string) == 0) { @@ -520,7 +547,7 @@ int search_in_db(const char *db_filename, for(n = 0; n < nb_search_conditions; n++) { hits[n] = 0; } db_value = parse_token(position_in_file_string, TOKEN_BUFFER_SIZE, ' ', db_value); - db_value = parse_token(current_mail_filename, PATH_MAX+1, '\n', db_value); + strncpy(current_mail_filename, db_value, PATH_MAX + 1); current_position_in_mail = atol(position_in_file_string); } @@ -543,14 +570,14 @@ int search_in_db(const char *db_filename, } } - if(current_mail_filename[0]) { - if(check_full_mail_match(current_mail_filename, - mail_time, - nb_search_conditions, search_conditions, - nb_body_conditions, hits, current_position_in_mail)) { - extract_mail(current_mail_filename, current_position_in_mail, output_file); - nb_extracted_mails++; - } + if(nb_extracted_mails < global_nb_mails_max && + current_mail_filename[0] && + check_full_mail_match(current_mail_filename, + mail_time, + nb_search_conditions, search_conditions, + nb_body_conditions, hits, current_position_in_mail)) { + extract_mail(current_mail_filename, current_position_in_mail, output_file); + nb_extracted_mails++; } fclose(db_file); @@ -564,6 +591,7 @@ int search_in_db(const char *db_filename, } int recursive_search_in_db(const char *entry_name, regex_t *db_filename_regexp, + int nb_extracted_mails, int nb_search_conditions, struct search_condition *search_conditions, FILE *output_file) { @@ -571,7 +599,6 @@ int recursive_search_in_db(const char *entry_name, regex_t *db_filename_regexp, struct dirent *dir_e; struct stat sb; char subname[PATH_MAX + 1]; - int nb_extracted_mails = 0; if(lstat(entry_name, &sb) != 0) { fprintf(stderr, @@ -586,12 +613,14 @@ int recursive_search_in_db(const char *entry_name, regex_t *db_filename_regexp, dir = opendir(entry_name); if(dir) { - while((dir_e = readdir(dir))) { + while((dir_e = readdir(dir)) && + nb_extracted_mails < global_nb_mails_max) { if(!ignore_entry(dir_e->d_name)) { snprintf(subname, PATH_MAX, "%s/%s", entry_name, dir_e->d_name); - nb_extracted_mails += recursive_search_in_db(subname, db_filename_regexp, - nb_search_conditions, search_conditions, - output_file); + nb_extracted_mails = recursive_search_in_db(subname, db_filename_regexp, + nb_extracted_mails, + nb_search_conditions, search_conditions, + output_file); } } closedir(dir); @@ -602,8 +631,10 @@ int recursive_search_in_db(const char *entry_name, regex_t *db_filename_regexp, while(*s) { if(*s == '/') { filename = s+1; } s++; } if(regexec(db_filename_regexp, filename, 0, 0, 0) == 0) { - nb_extracted_mails += - search_in_db(entry_name, nb_search_conditions, search_conditions, output_file); + nb_extracted_mails = + search_in_db(entry_name, + nb_extracted_mails, + nb_search_conditions, search_conditions, output_file); } } @@ -771,6 +802,7 @@ static struct option long_options[] = { { "index", 0, 0, 'i' }, { "output", 1, 0, 'o' }, { "default-search", 1, 0, 'a' }, + { "nb-mails-max", 1, 0, 'n' }, { 0, 0, 0, 0 } }; @@ -829,6 +861,13 @@ void init_condition(struct search_condition *condition, const char *full_string, const char *string; struct alias_node *a; + for(a = global_alias_list; a; a = a->next) { + if(strcmp(full_string, a->alias) == 0) { + full_string = a->value; + break; + } + } + string = parse_token(full_search_field, TOKEN_BUFFER_SIZE, ' ', full_string); search_field = full_search_field; @@ -839,13 +878,6 @@ void init_condition(struct search_condition *condition, const char *full_string, condition->negation = 0; } - for(a = global_alias_list; a; a = a->next) { - if(strcmp(search_field, a->alias) == 0) { - search_field = a->value; - break; - } - } - condition->db_key = -1; /* Time condition */ @@ -946,6 +978,7 @@ void read_rc_file(const char *rc_filename) { if(*s && *s != '#') { s = parse_token(command, TOKEN_BUFFER_SIZE, ' ', s); + if(strcmp(command, "alias") == 0) { struct alias_node *a = safe_malloc(sizeof(struct alias_node)); a->next = global_alias_list; @@ -1024,6 +1057,8 @@ int main(int argc, char **argv) { global_alias_list = 0; global_quiet = 0; global_use_leading_time = 0; + global_nb_mails_max = 250; + default_search_field = 0; strncpy(output_filename, "", PATH_MAX); @@ -1032,13 +1067,13 @@ int main(int argc, char **argv) { } /* - { -#warning Test code added on 2013 May 02 11:17:01 + { + #warning Test code added on 2013 May 02 11:17:01 struct alias_node *a; for(a = global_alias_list; a; a = a->next) { - printf ("ALIAS [%s] [%s]\n", a->alias, a->value); + printf ("ALIAS [%s] [%s]\n", a->alias, a->value); + } } - } */ setlocale(LC_ALL, ""); @@ -1127,12 +1162,26 @@ int main(int argc, char **argv) { default_search_field = optarg; break; + case 'n': + global_nb_mails_max = atoi(optarg); + break; + default: error = 1; break; } } + if(error) { + print_usage(stderr); + exit(EXIT_FAILURE); + } + + if(show_help) { + print_usage(stdout); + exit(EXIT_SUCCESS); + } + /* Set all the values that may defined in the arguments, through environment variables, or hard-coded */ @@ -1156,18 +1205,6 @@ int main(int argc, char **argv) { "MYMAIL_MBOX_PATTERN", 0); - /* Start the processing */ - - if(error) { - print_usage(stderr); - exit(EXIT_FAILURE); - } - - if(show_help) { - print_usage(stdout); - exit(EXIT_SUCCESS); - } - /* mbox indexing */ if(action_index) { @@ -1257,9 +1294,10 @@ int main(int argc, char **argv) { exit(EXIT_FAILURE); } - nb_extracted_mails += recursive_search_in_db(db_root_path, &db_filename_regexp, - nb_search_conditions, search_conditions, - output_file); + nb_extracted_mails = recursive_search_in_db(db_root_path, &db_filename_regexp, + nb_extracted_mails, + nb_search_conditions, search_conditions, + output_file); regfree(&db_filename_regexp); } @@ -1276,8 +1314,10 @@ int main(int argc, char **argv) { s = parse_token(db_filename, PATH_MAX + 1, ';', s); if(db_filename[0]) { - nb_extracted_mails += - search_in_db(db_filename, nb_search_conditions, search_conditions, output_file); + nb_extracted_mails = + search_in_db(db_filename, + nb_extracted_mails, + nb_search_conditions, search_conditions, output_file); } } } @@ -1285,8 +1325,10 @@ int main(int argc, char **argv) { /* Search in all db files listed in the command arguments */ while(optind < argc) { - nb_extracted_mails += - search_in_db(argv[optind], nb_search_conditions, search_conditions, output_file); + nb_extracted_mails = + search_in_db(argv[optind], + nb_extracted_mails, + nb_search_conditions, search_conditions, output_file); optind++; } }