Commit 3d4474e7 3d4474e72850b6fe1ab17643a1aa7a6708349b0e by Sam Roberts

* mailbox/parse822.c,include/mailutils/parse822.h: functions

  to parse rfc822 date-time.
* examples/mbox-explode.c: example of saving all mime attachments
  for messages in a mailbox.
* examples/mbox-dates.c: example of using the date-time parser, it
  prints all the dates in a mailbox that it can't parse.
* examples/Makefile: makes the mbox-* examples.
1 parent 29e43b89
1 2001-06-02 Sam Roberts
2 * mailbox/parse822.c,include/mailutils/parse822.h: functions
3 to parse rfc822 date-time.
4 * examples/mbox-explode.c: example of saving all mime attachments
5 for messages in a mailbox.
6 * examples/mbox-dates.c: example of using the date-time parser, it
7 prints all the dates in a mailbox that it can't parse.
8 * examples/Makefile: makes the mbox-* examples.
9
1 2001-06-01 Alain Magloire 10 2001-06-01 Alain Magloire
2 11
3 * mailbox/mbx_pop.c (pop_writeline): if buffer is NULL noop. 12 * mailbox/mbx_pop.c (pop_writeline): if buffer is NULL noop.
......
...@@ -4,13 +4,27 @@ CFLAGS = -g -I../include ...@@ -4,13 +4,27 @@ CFLAGS = -g -I../include
4 LDFLAGS = -g -static 4 LDFLAGS = -g -static
5 LIBS = ../mailbox/.libs/libmailbox.a ../lib/libmailutils.a 5 LIBS = ../mailbox/.libs/libmailbox.a ../lib/libmailutils.a
6 6
7 default: addr 7 default: addr mbox-explode mbox-dates
8 8
9 # showmail 9 # showmail
10 10
11 showmail: showmail.c $(LIBS) 11 showmail: showmail.c $(LIBS)
12 $(CC) $(CFLAGS) -o $@ $< $(LIBS) 12 $(CC) $(CFLAGS) -o $@ $< $(LIBS)
13 13
14 # example of saving MIME parts to a file
15
16 mbox-explode: mbox-explode.c $(LIBS)
17 $(CC) $(CFLAGS) -o $@ $< $(LIBS)
18
19 # example of parsing the date fields, prints all the incorrectly
20 # formatted dates in a mailbox.
21
22 mbox-dates: mbox-dates.c $(LIBS)
23 $(CC) $(CFLAGS) -o $@ $< $(LIBS)
24
25 bad-dates: mbox-dates
26 for m in ~/Mail/*; do ./mbox-dates $$m; done | tee bad-dates.out
27
14 # addr example and test 28 # addr example and test
15 29
16 test: addr 30 test: addr
...@@ -27,5 +41,5 @@ clean: ...@@ -27,5 +41,5 @@ clean:
27 rm -f *.o 41 rm -f *.o
28 42
29 empty: clean 43 empty: clean
30 rm -f addr showmail 44 rm -f addr showmail mbox-explode mbox-dates
31 45
......
1 #include <sys/types.h>
2 #include <sys/stat.h>
3
4 #include <errno.h>
5 #include <fcntl.h>
6 #include <limits.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <time.h>
11 #include <unistd.h>
12
13 #include <mailutils/mailbox.h>
14 #include <mailutils/address.h>
15 #include <mailutils/registrar.h>
16 #include <mailutils/parse822.h>
17
18
19 static const char* UserAgent(header_t hdr)
20 {
21 static char agent[128];
22 size_t sz;
23
24 if(header_get_value(hdr, "User-Agent", agent, sizeof(agent), &sz) == 0 && sz != 0)
25 return agent;
26
27 if(header_get_value(hdr, "X-Mailer", agent, sizeof(agent), &sz) == 0 && sz != 0)
28 return agent;
29
30 if(header_get_value(hdr, "Message-Id", agent, sizeof(agent), &sz) == 0 && sz != 0)
31 return agent;
32
33 return "unknown";
34 }
35 int
36 main(int argc, char **argv)
37 {
38 mailbox_t mbox;
39 size_t i;
40 size_t count = 0;
41 char *mboxname = argv[1];
42 int status;
43
44 /* Register the desire formats. */
45 {
46 list_t bookie;
47 registrar_get_list (&bookie);
48 list_append (bookie, path_record);
49 }
50
51 if ((status = mailbox_create_default (&mbox, mboxname)) != 0)
52 {
53 fprintf (stderr, "could not create <%s>: %s\n",
54 mboxname, strerror (status));
55 exit (1);
56 }
57
58 {
59 debug_t debug;
60 mailbox_get_debug (mbox, &debug);
61 // debug_set_level (debug, MU_DEBUG_TRACE|MU_DEBUG_PROT);
62 }
63
64 if ((status = mailbox_open (mbox, MU_STREAM_READ)) != 0)
65 {
66 fprintf (stderr, "could not open mbox: %s\n", strerror (status));
67 exit (1);
68 }
69
70 mailbox_messages_count (mbox, &count);
71
72 for (i = 1; i <= count; ++i)
73 {
74 message_t msg;
75 header_t hdr;
76 char mailer[128];
77 char date[128];
78 size_t len = 0;
79
80 if (
81 (status = mailbox_get_message (mbox, i, &msg)) != 0 ||
82 (status = message_get_header (msg, &hdr)) != 0
83 )
84 {
85 printf ("%s, msg %d: %s\n", mboxname, i, strerror(status));
86 continue;
87 }
88 if ((status = header_get_value (
89 hdr, MU_HEADER_DATE, date, sizeof (date), &len)) != 0)
90 {
91 printf ("%s, msg %d: NO DATE (mua? %s)\n",
92 mboxname, i, UserAgent(hdr));
93 continue;
94 }
95 else
96 {
97 const char* s = date;
98 struct tm tm;
99 char dir[] = "yyyy.mm.dd";
100 size_t nparts = 0;
101 size_t partno;
102
103 if(parse822_date_time(&s, s + strlen(s), &tm))
104 {
105 printf ("%s, msg %d: BAD DATE <%s> (mua? %s)\n",
106 mboxname, i, date, UserAgent(hdr));
107 continue;
108 }
109 }
110 }
111
112 mailbox_close (mbox);
113 mailbox_destroy (&mbox);
114
115 return status;
116 }
117
1 #include <sys/types.h>
2 #include <sys/stat.h>
3
4 #include <errno.h>
5 #include <fcntl.h>
6 #include <limits.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <time.h>
11 #include <unistd.h>
12
13 #include <mailutils/mailbox.h>
14 #include <mailutils/address.h>
15 #include <mailutils/registrar.h>
16 #include <mailutils/parse822.h>
17
18 int
19 main(int argc, char **argv)
20 {
21 mailbox_t mbox;
22 size_t i;
23 size_t count = 0;
24 char *mailbox_name = "+dbuild.details";
25 int status;
26
27 /* Register the desire formats. */
28 {
29 list_t bookie;
30 registrar_get_list (&bookie);
31 list_append (bookie, path_record);
32 }
33
34 if ((status = mailbox_create_default (&mbox, mailbox_name)) != 0)
35 {
36 fprintf (stderr, "could not create <%s>: %s\n",
37 mailbox_name, strerror (status));
38 exit (1);
39 }
40
41 {
42 debug_t debug;
43 mailbox_get_debug (mbox, &debug);
44 // debug_set_level (debug, MU_DEBUG_TRACE|MU_DEBUG_PROT);
45 }
46
47 if ((status = mailbox_open (mbox, MU_STREAM_READ)) != 0)
48 {
49 fprintf (stderr, "could not open <%s>: %s\n",
50 mailbox_name, strerror (status));
51 exit (1);
52 }
53
54 mailbox_messages_count (mbox, &count);
55
56 for (i = 1; i <= count; ++i)
57 {
58 message_t msg;
59 header_t hdr;
60 char subj[128];
61 char date[128];
62 size_t len = 0;
63
64 if (
65 (status = mailbox_get_message (mbox, i, &msg)) != 0 ||
66 (status = message_get_header (msg, &hdr)) != 0 ||
67 (status = header_get_value (
68 hdr, MU_HEADER_SUBJECT, subj, sizeof (subj), &len)) != 0 ||
69 (status = header_get_value (
70 hdr, MU_HEADER_DATE, date, sizeof (date), &len)) != 0
71 )
72 {
73 fprintf (stderr, "msg %d : %s\n", i, strerror(status));
74 exit(2);
75 }
76
77 if (strcasecmp(subj, "WTLS 1.0 Daily Build-details") == 0)
78 {
79 const char* s = date;
80 struct tm tm;
81 char dir[] = "yyyy.mm.dd";
82 size_t nparts = 0;
83 size_t partno;
84
85 if((status = parse822_date_time(&s, s + strlen(s), &tm)))
86 {
87 fprintf (stderr, "parsing <%s> failed: %s\n", date, strerror(status));
88 exit(1);
89 }
90
91 printf ("Processing for: year %d month %d day %d\n",
92 tm.tm_year + 1900, tm.tm_mon, tm.tm_mday);
93
94 snprintf(dir, sizeof(dir), "%d.%02d.%02d",
95 tm.tm_year + 1900, tm.tm_mon, tm.tm_mday);
96
97 status = mkdir(dir, 0777);
98
99 if(status != 0)
100 {
101 switch(errno)
102 {
103 case EEXIST: /* we've already done this message */
104 continue;
105 case 0:
106 break;
107 default:
108 fprintf (stderr, "mkdir %s failed: %s\n", dir, strerror(errno));
109 status = 1;
110 goto END;
111 break;
112 }
113 }
114 if((status = message_get_num_parts(msg, &nparts))) {
115 fprintf (stderr, "get num parts failed: %s\n", strerror(status));
116 break;
117 }
118
119 for(partno = 1; partno <= nparts; partno++)
120 {
121 message_t part = NULL;
122 char content[128] = "<not found>";
123 char* fname = NULL;
124 char path[PATH_MAX];
125
126 if((status = message_get_part(msg, partno, &part))) {
127 fprintf (stderr, "get part failed: %s\n", strerror(status));
128 break;
129 }
130 message_get_header (part, &hdr);
131 header_get_value (hdr, MU_HEADER_CONTENT_DISPOSITION,
132 content, sizeof (content), &len);
133
134 fname = strrchr(content, '"');
135
136 if(fname)
137 {
138 *fname = 0;
139
140 fname = strchr(content, '"') + 1;
141
142 snprintf(path, sizeof(path), "%s/%s", dir, fname);
143 printf(" filename %s\n", path);
144
145 if((status = message_save_attachment(msg, path, NULL))) {
146 fprintf (stderr, "save attachment failed: %s\n", strerror(status));
147 break;
148 }
149 }
150 }
151 status = 0;
152 }
153 }
154
155 END:
156 mailbox_close (mbox);
157 mailbox_destroy (&mbox);
158
159 return status;
160 }
161
...@@ -32,8 +32,6 @@ Things to consider: ...@@ -32,8 +32,6 @@ Things to consider:
32 - Need a way to parse ",,,", it's a valid address-list, it just doesn't 32 - Need a way to parse ",,,", it's a valid address-list, it just doesn't
33 have any addresses. 33 have any addresses.
34 34
35 - Functions for forming email addresses, quoting display-name, etc.
36
37 - The personal for ""Sam"" <sam@here> is "Sam", and for "'s@b'" <s@b> 35 - The personal for ""Sam"" <sam@here> is "Sam", and for "'s@b'" <s@b>
38 is 's@b', should I strip those outside parentheses, or is that 36 is 's@b', should I strip those outside parentheses, or is that
39 too intrusive? Maybe an apps business if it wants to? 37 too intrusive? Maybe an apps business if it wants to?
...@@ -42,11 +40,29 @@ Things to consider: ...@@ -42,11 +40,29 @@ Things to consider:
42 gets one address, or just say it is or it isn't in RFC format? 40 gets one address, or just say it is or it isn't in RFC format?
43 Right now we're strict, we'll see how it goes. 41 Right now we're strict, we'll see how it goes.
44 42
45 - parse dates?
46 - parse Received: field? 43 - parse Received: field?
47 44
48 - test for memory leaks on malloc failure 45 - test for memory leaks on malloc failure
46
49 - fix the realloc, try a struct _string { char* b, size_t sz }; 47 - fix the realloc, try a struct _string { char* b, size_t sz };
48
49 The lexer finds consecutive sequences of characters, so it should
50 define:
51
52 struct parse822_token_t {
53 const char* b; // beginning of token
54 const char* e; // one past end of token
55 }
56 typedef struc parse822_token_t TOK;
57
58 Then I can have str_append_token(), and the lexer functions can
59 look like:
60
61 int parse822_atom(const char** p, const char* e, TOK* atom);
62
63 Just a quick though, I'll have to see how many functions that will
64 actually help.
65
50 - get example addresses from rfc2822, and from the perl code. 66 - get example addresses from rfc2822, and from the perl code.
51 */ 67 */
52 68
...@@ -59,6 +75,11 @@ Things to consider: ...@@ -59,6 +75,11 @@ Things to consider:
59 #include <ctype.h> 75 #include <ctype.h>
60 #include <string.h> 76 #include <string.h>
61 #include <stdlib.h> 77 #include <stdlib.h>
78 #include <time.h>
79
80 #ifdef HAVE_STRINGS_H
81 # include <strings.h>
82 #endif
62 83
63 #include "address0.h" 84 #include "address0.h"
64 85
...@@ -1148,6 +1169,329 @@ int parse822_domain_literal(const char** p, const char* e, char** domain_literal ...@@ -1148,6 +1169,329 @@ int parse822_domain_literal(const char** p, const char* e, char** domain_literal
1148 return rc; 1169 return rc;
1149 } 1170 }
1150 1171
1172 /***** From RFC 822, 5.1 Date and Time Specification Syntax *****/
1173
1174 int parse822_day(const char** p, const char* e, int* day)
1175 {
1176 /* day = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" */
1177
1178 const char* days[] = {
1179 "Mon",
1180 "Tue",
1181 "Wed",
1182 "Thu",
1183 "Fri",
1184 "Sat",
1185 "Sun",
1186 NULL
1187 };
1188
1189 int d;
1190
1191 parse822_skip_comments(p, e);
1192
1193 if((e - *p) < 3)
1194 return EPARSE;
1195
1196 for(d = 0; days[d]; d++) {
1197 if(strncasecmp(*p, days[d], 3) == 0) {
1198 *p += 3;
1199 if(day)
1200 *day = d;
1201 return EOK;
1202 }
1203 }
1204 return EPARSE;
1205 }
1206
1207 int parse822_date(const char** p, const char* e, int* day, int* mon, int* year)
1208 {
1209 /* date = 1*2DIGIT month 2*4DIGIT
1210 * month = "Jan" / "Feb" / "Mar" / "Apr"
1211 * / "May" / "Jun" / "Jul" / "Aug"
1212 * / "Sep" / "Oct" / "Nov" / "Dec"
1213 */
1214
1215 const char* mons[] = {
1216 "Jan",
1217 "Feb",
1218 "Mar",
1219 "Apr",
1220 "May",
1221 "Jun",
1222 "Jul",
1223 "Aug",
1224 "Sep",
1225 "Oct",
1226 "Nov",
1227 "Dec",
1228 NULL
1229 };
1230
1231 const char* save = *p;
1232 int rc = EOK;
1233 int m = 0;
1234 int yr = 0;
1235 const char* yrbeg = 0;
1236
1237 parse822_skip_comments(p, e);
1238
1239 if((rc = parse822_digits(p, e, 1, 2, day))) {
1240 *p = save;
1241 return rc;
1242 }
1243
1244 parse822_skip_comments(p, e);
1245
1246 if((e - *p) < 3)
1247 return EPARSE;
1248
1249 for(m = 0; mons[m]; m++) {
1250 if(strncasecmp(*p, mons[m], 3) == 0) {
1251 *p += 3;
1252 if(mon)
1253 *mon = m;
1254 break;
1255 }
1256 }
1257
1258 if(!mons[m]) {
1259 *p = save;
1260 return EPARSE;
1261 }
1262
1263 parse822_skip_comments(p, e);
1264
1265 /* We need to count how many digits their were, and adjust the
1266 * interpretation of the year accordingly. This is from RFC 2822,
1267 * Section 4.3, Obsolete Date and Time. */
1268 yrbeg = *p;
1269
1270 if((rc = parse822_digits(p, e, 2, 4, &yr))) {
1271 *p = save;
1272 return rc;
1273 }
1274
1275 /* rationalize year to four digit, then adjust to tz notation */
1276 switch(*p - yrbeg)
1277 {
1278 case 2:
1279 if(yr >= 0 && yr <= 49) {
1280 yr += 2000;
1281 break;
1282 }
1283 case 3:
1284 yr += 1900;
1285 break;
1286 }
1287
1288 if(year)
1289 *year = yr - 1900;
1290
1291 return EOK;
1292 }
1293
1294 int parse822_time(const char** p, const char* e,
1295 int* hour, int* min, int* sec, int* tz, const char** tzname)
1296 {
1297 /* time = hour zone
1298 * hour = 2DIGIT ":" 2DIGIT [":" 2DIGIT] ; 00:00:00 - 23:59:59
1299 * zone = "UT" / "GMT" ; Universal Time
1300 * ; North American : UT
1301 * / "EST" / "EDT" ; Eastern: - 5/ - 4
1302 * / "CST" / "CDT" ; Central: - 6/ - 5
1303 * / "MST" / "MDT" ; Mountain: - 7/ - 6
1304 * / "PST" / "PDT" ; Pacific: - 8/ - 7
1305 * / 1ALPHA ; RFC 822 was wrong, RFC 2822
1306 * ; says treat these all as -0000.
1307 * / ( ("+" / "-") 4DIGIT ) ; Local differential
1308 * ; hours+min. (HHMM)
1309 */
1310
1311 struct {
1312 const char* tzname;
1313 int tz;
1314 } tzs[] = {
1315 { "UT", 0 * 60 * 60 },
1316 { "UTC", 0 * 60 * 60 },
1317 { "GMT", 0 * 60 * 60 },
1318 { "EST", -5 * 60 * 60 },
1319 { "EDT", -4 * 60 * 60 },
1320 { "CST", -6 * 60 * 60 },
1321 { "CDT", -5 * 60 * 60 },
1322 { "MST", -7 * 60 * 60 },
1323 { "MDT", -6 * 60 * 60 },
1324 { "PST", -8 * 60 * 60 },
1325 { "PDT", -7 * 60 * 60 },
1326 { NULL, }
1327 };
1328
1329 const char* save = *p;
1330 int rc = EOK;
1331 int z = 0;
1332 char* zone = NULL;
1333
1334 parse822_skip_comments(p, e);
1335
1336 if((rc = parse822_digits(p, e, 1, 2, hour))) {
1337 *p = save;
1338 return rc;
1339 }
1340
1341 if((rc = parse822_special(p, e, ':'))) {
1342 *p = save;
1343 return rc;
1344 }
1345
1346 if((rc = parse822_digits(p, e, 1, 2, min))) {
1347 *p = save;
1348 return rc;
1349 }
1350
1351 if((rc = parse822_special(p, e, ':'))) {
1352 *sec = 0;
1353 } else if((rc = parse822_digits(p, e, 1, 2, sec))) {
1354 *p = save;
1355 return rc;
1356 }
1357
1358 parse822_skip_comments(p, e);
1359
1360 if((rc = parse822_atom(p, e, &zone))) {
1361 /* zone is optional */
1362 if(tz)
1363 *tz = 0;
1364 return EOK;
1365 }
1366
1367 /* see if it's a timezone */
1368 for( ; tzs[z].tzname; z++) {
1369 if(strcasecmp(zone, tzs[z].tzname) == 0)
1370 break;
1371 }
1372 if(tzs[z].tzname) {
1373 if(tzname)
1374 *tzname = tzs[z].tzname;
1375
1376 if(tz)
1377 *tz = tzs[z].tz;
1378 } else if(strlen(zone) > 5 || strlen(zone) < 4) {
1379 str_free(&zone);
1380 return EPARSE;
1381 } else {
1382 /* zone = ( + / - ) hhmm */
1383 int hh;
1384 int mm;
1385 int sign;
1386 char* zp = zone;
1387
1388 switch(zp[0])
1389 {
1390 case '-': sign = -1; zp++; break;
1391 case '+': sign = +1; zp++; break;
1392 default: sign = 1; break;
1393 }
1394
1395 if(strspn(zp, "0123456789") != 4) {
1396 *p = save;
1397 str_free(&zone);
1398 return EPARSE;
1399 }
1400 /* convert to seconds from UTC */
1401 hh = (zone[1] - '0') * 10 + (zone[2] - '0');
1402 mm = (zone[3] - '0') * 10 + (zone[4] - '0');
1403
1404 if(tz)
1405 *tz = sign * (hh * 60 * 60 + mm * 60);
1406 }
1407
1408 str_free(&zone);
1409
1410 return EOK;
1411 };
1412
1413 #if 0
1414 For reference, especially the for the required range and values of the
1415 integer fields.
1416
1417 struct tm
1418 {
1419 int tm_sec; /* Seconds. [0-60] (1 leap second) */
1420 int tm_min; /* Minutes. [0-59] */
1421 int tm_hour; /* Hours. [0-23] */
1422 int tm_mday; /* Day. [1-31] */
1423 int tm_mon; /* Month. [0-11] */
1424 int tm_year; /* Year - 1900. */
1425 int tm_wday; /* Day of week. [0-6] */
1426 int tm_yday; /* Days in year.[0-365] */
1427 int tm_isdst; /* DST. [-1/0/1]*/
1428
1429 int tm_gmtoff; /* Seconds east of UTC. */
1430 const char *tm_zone; /* Timezone abbreviation. */
1431 };
1432 #endif
1433
1434 int parse822_date_time(const char** p, const char* e, struct tm* tm)
1435 {
1436 /* date-time = [ day "," ] date time */
1437
1438 const char* save = *p;
1439 int rc = 0;
1440
1441 int wday = 0;
1442
1443 int mday = 0;
1444 int mon = 0;
1445 int year = 0;
1446
1447 int hour = 0;
1448 int min = 0;
1449 int sec = 0;
1450
1451 int tz = 0;
1452 const char* tzname = 0;
1453
1454 if((rc = parse822_day(p, e, &wday))) {
1455 if(rc != EPARSE)
1456 return rc;
1457 } else {
1458 /* If we got a day, we MUST have a ','. */
1459 parse822_skip_comments(p, e);
1460
1461 if((rc = parse822_special(p, e, ','))) {
1462 *p = save;
1463 return rc;
1464 }
1465 }
1466
1467 if((rc = parse822_date(p, e, &mday, &mon, &year))) {
1468 *p = save;
1469 return rc;
1470 }
1471 if((rc = parse822_time(p, e, &hour, &min, &sec, &tz, &tzname))) {
1472 *p = save;
1473 return rc;
1474 }
1475
1476 if(tm) {
1477 memset (tm, 0, sizeof (*tm));
1478
1479 tm->tm_wday = wday;
1480
1481 tm->tm_mday = mday;
1482 tm->tm_mon = mon;
1483 tm->tm_year = year;
1484
1485 tm->tm_hour = hour;
1486 tm->tm_min = min;
1487 tm->tm_sec = sec;
1488
1489 /* TZ ? */
1490 }
1491
1492 return EOK;
1493 }
1494
1151 /***** From RFC 822, 3.2 Header Field Definitions *****/ 1495 /***** From RFC 822, 3.2 Header Field Definitions *****/
1152 1496
1153 int parse822_field_name(const char** p, const char* e, char** fieldname) 1497 int parse822_field_name(const char** p, const char* e, char** fieldname)
......