Commit 81973969 819739696b8b087d311655b639db7b14fef48f02 by Sergey Poznyakoff

Provide function for parsing the Content-Type header (RFC 2045).

* include/mailutils/util.h (mu_content_type, mu_param): New structs.
(mu_content_type_t): New typedef.
(mu_content_type_parse, mu_content_type_destroy): New protos.
* libmailutils/base/ctparse.c: New file.
* libmailutils/base/Makefile.am: Add new file.

* imap4d/fetch.c: Use mu_content_type_parse to parse the header.

* libmailutils/tests/conttype.c: New file.
* libmailutils/tests/Makefile.am: Add new file.
1 parent 7bc05c77
...@@ -314,6 +314,46 @@ fetch_envelope0 (mu_message_t msg) ...@@ -314,6 +314,46 @@ fetch_envelope0 (mu_message_t msg)
314 314
315 static int fetch_bodystructure0 (mu_message_t message, int extension); 315 static int fetch_bodystructure0 (mu_message_t message, int extension);
316 316
317 static int
318 format_param (void *item, void *data)
319 {
320 struct mu_param *p = item;
321 int *first = data;
322
323 if (!*first)
324 io_sendf (" ");
325 io_send_qstring (p->name);
326 io_sendf (" ");
327 io_send_qstring (p->value);
328 *first = 0;
329 return 0;
330 }
331
332 static int
333 get_content_type (mu_header_t hdr, mu_content_type_t *ctp, char const *dfl)
334 {
335 int rc;
336 char *buffer = NULL;
337
338 rc = mu_header_aget_value (hdr, MU_HEADER_CONTENT_TYPE, &buffer);
339 if (rc == 0)
340 {
341 rc = mu_content_type_parse (buffer, ctp);
342 if (rc == MU_ERR_PARSE)
343 {
344 mu_error (_("malformed content type: %s"), buffer);
345 if (dfl)
346 rc = mu_content_type_parse (dfl, ctp);
347 }
348 else if (rc)
349 mu_diag_funcall (MU_DIAG_ERROR, "mu_content_type_parse", buffer, rc);
350 free (buffer);
351 }
352 else if (rc == MU_ERR_NOENT && dfl)
353 rc = mu_content_type_parse (dfl, ctp);
354 return rc;
355 }
356
317 /* The basic fields of a non-multipart body part are in the following order: 357 /* The basic fields of a non-multipart body part are in the following order:
318 body type: 358 body type:
319 A string giving the content media type name as defined in [MIME-IMB]. 359 A string giving the content media type name as defined in [MIME-IMB].
...@@ -362,98 +402,43 @@ static int ...@@ -362,98 +402,43 @@ static int
362 bodystructure (mu_message_t msg, int extension) 402 bodystructure (mu_message_t msg, int extension)
363 { 403 {
364 mu_header_t header = NULL; 404 mu_header_t header = NULL;
365 char *buffer = NULL;
366 size_t blines = 0; 405 size_t blines = 0;
367 int message_rfc822 = 0; 406 int message_rfc822 = 0;
368 int text_plain = 0; 407 int text_plain = 0;
408 mu_content_type_t ct;
409 int rc;
369 410
370 mu_message_get_header (msg, &header); 411 mu_message_get_header (msg, &header);
371 412
372 if (mu_header_aget_value (header, MU_HEADER_CONTENT_TYPE, &buffer) == 0) 413 rc = get_content_type (header, &ct, "TEXT/PLAIN; CHARSET=US-ASCII");
373 { 414 if (rc == 0)
374 struct mu_wordsplit ws;
375 char *p;
376 size_t len;
377
378 ws.ws_delim = " \t\r\n;=";
379 ws.ws_alloc_die = imap4d_ws_alloc_die;
380 if (mu_wordsplit (buffer, &ws, IMAP4D_WS_FLAGS))
381 { 415 {
382 mu_error (_("%s failed: %s"), "mu_wordsplit", 416 if (mu_c_strcasecmp (ct->type, "MESSAGE") == 0
383 mu_wordsplit_strerror (&ws)); 417 && mu_c_strcasecmp (ct->subtype, "RFC822") == 0)
384 return RESP_BAD; /* FIXME: a better error handling, maybe? */
385 }
386
387 len = strcspn (ws.ws_wordv[0], "/");
388 if (mu_c_strcasecmp (ws.ws_wordv[0], "MESSAGE/RFC822") == 0)
389 message_rfc822 = 1; 418 message_rfc822 = 1;
390 else if (mu_c_strncasecmp (ws.ws_wordv[0], "TEXT", len) == 0) 419 else if (mu_c_strcasecmp (ct->type, "TEXT") == 0)
391 text_plain = 1; 420 text_plain = 1;
392 421
393 ws.ws_wordv[0][len++] = 0; 422 io_send_qstring (ct->type);
394 p = ws.ws_wordv[0];
395 io_send_qstring (p);
396 io_sendf (" "); 423 io_sendf (" ");
397 io_send_qstring (ws.ws_wordv[0] + len); 424 io_send_qstring (ct->subtype);
398 425
399 /* body parameter parenthesized list: Content-type attributes */ 426 /* body parameter parenthesized list: Content-type attributes */
400 if (ws.ws_wordc > 1) 427 if (mu_list_is_empty (ct->param))
428 io_sendf (" NIL");
429 else
401 { 430 {
402 int space = 0; 431 int first = 1;
403 char *lvalue = NULL;
404 int i;
405
406 io_sendf (" ("); 432 io_sendf (" (");
407 for (i = 1; i < ws.ws_wordc; i++) 433 mu_list_foreach (ct->param, format_param, &first);
408 {
409 /* body parameter parenthesized list:
410 Content-type parameter list. */
411 if (lvalue)
412 {
413 if (space)
414 io_sendf (" ");
415 io_send_qstring (lvalue);
416 lvalue = NULL;
417 space = 1;
418 }
419
420 switch (ws.ws_wordv[i][0])
421 {
422 case ';':
423 continue;
424
425 case '=':
426 if (++i < ws.ws_wordc)
427 {
428 io_sendf (" ");
429 io_send_qstring (ws.ws_wordv[i]);
430 }
431 break;
432
433 default:
434 lvalue = ws.ws_wordv[i];
435 }
436 }
437
438 if (lvalue)
439 {
440 if (space)
441 io_sendf (" ");
442 io_send_qstring (lvalue);
443 }
444
445 io_sendf (")"); 434 io_sendf (")");
446 } 435 }
447 else 436 mu_content_type_destroy (&ct);
448 io_sendf (" NIL");
449 mu_wordsplit_free (&ws);
450 free (buffer);
451 } 437 }
452 else 438 else
453 { 439 {
454 /* Default? If Content-Type is not present consider as text/plain. */ 440 mu_diag_funcall (MU_DIAG_ERROR, "get_content_type", NULL, rc);
455 io_sendf ("\"TEXT\" \"PLAIN\" (\"CHARSET\" \"US-ASCII\")"); 441 return RESP_BAD; /* FIXME: a better error handling, maybe? */
456 text_plain = 1;
457 } 442 }
458 443
459 /* body id: Content-ID. */ 444 /* body id: Content-ID. */
...@@ -546,8 +531,9 @@ fetch_bodystructure0 (mu_message_t message, int extension) ...@@ -546,8 +531,9 @@ fetch_bodystructure0 (mu_message_t message, int extension)
546 mu_message_is_multipart (message, &is_multipart); 531 mu_message_is_multipart (message, &is_multipart);
547 if (is_multipart) 532 if (is_multipart)
548 { 533 {
549 char *buffer = NULL; 534 mu_content_type_t ct;
550 mu_header_t header = NULL; 535 mu_header_t header = NULL;
536 int rc;
551 537
552 mu_message_get_num_parts (message, &nparts); 538 mu_message_get_num_parts (message, &nparts);
553 539
...@@ -564,79 +550,32 @@ fetch_bodystructure0 (mu_message_t message, int extension) ...@@ -564,79 +550,32 @@ fetch_bodystructure0 (mu_message_t message, int extension)
564 mu_message_get_header (message, &header); 550 mu_message_get_header (message, &header);
565 551
566 /* The subtype. */ 552 /* The subtype. */
567 if (mu_header_aget_value (header, MU_HEADER_CONTENT_TYPE, &buffer) == 0) 553 rc = get_content_type (header, &ct, NULL);
568 { 554 if (rc == 0)
569 struct mu_wordsplit ws;
570 char *s;
571
572 ws.ws_delim = " \t\r\n;=";
573 ws.ws_alloc_die = imap4d_ws_alloc_die;
574 if (mu_wordsplit (buffer, &ws, IMAP4D_WS_FLAGS))
575 { 555 {
576 mu_error (_("%s failed: %s"), "mu_wordsplit",
577 mu_wordsplit_strerror (&ws));
578 return RESP_BAD; /* FIXME: a better error handling, maybe? */
579 }
580
581 s = strchr (ws.ws_wordv[0], '/');
582 if (s)
583 s++;
584 io_sendf (" "); 556 io_sendf (" ");
585 io_send_qstring (s); 557 io_send_qstring (ct->subtype);
586 558
587 /* The extension data for multipart. */ 559 /* The extension data for multipart. */
588 if (extension) 560 if (extension && !mu_list_is_empty (ct->param))
589 { 561 {
590 int space = 0; 562 int first = 1;
591 char *lvalue = NULL;
592
593 io_sendf (" ("); 563 io_sendf (" (");
594 for (i = 1; i < ws.ws_wordc; i++) 564 mu_list_foreach (ct->param, format_param, &first);
595 {
596 /* body parameter parenthesized list:
597 Content-type parameter list. */
598 if (lvalue)
599 {
600 if (space)
601 io_sendf (" ");
602 io_send_qstring (lvalue);
603 lvalue = NULL;
604 space = 1;
605 }
606
607 switch (ws.ws_wordv[i][0])
608 {
609 case ';':
610 continue;
611
612 case '=':
613 if (++i < ws.ws_wordc)
614 {
615 io_sendf (" ");
616 io_send_qstring (ws.ws_wordv[i]);
617 }
618 break;
619
620 default:
621 lvalue = ws.ws_wordv[i];
622 }
623 }
624 if (lvalue)
625 {
626 if (space)
627 io_sendf (" ");
628 io_send_qstring (lvalue);
629 }
630 io_sendf (")"); 565 io_sendf (")");
631 } 566 }
632 else 567 else
633 io_sendf (" NIL"); 568 io_sendf (" NIL");
634 mu_wordsplit_free (&ws); 569 mu_content_type_destroy (&ct);
635 free (buffer);
636 } 570 }
637 else 571 else if (rc == MU_ERR_NOENT)
638 /* No content-type header */ 572 /* No content-type header */
639 io_sendf (" NIL"); 573 io_sendf (" NIL");
574 else
575 {
576 mu_diag_funcall (MU_DIAG_ERROR, "get_content_type", NULL, rc);
577 return RESP_BAD; /* FIXME: a better error handling, maybe? */
578 }
640 579
641 /* body disposition: Content-Disposition. */ 580 /* body disposition: Content-Disposition. */
642 fetch_send_header_list (header, MU_HEADER_CONTENT_DISPOSITION, 581 fetch_send_header_list (header, MU_HEADER_CONTENT_DISPOSITION,
...@@ -729,7 +668,6 @@ fetch_get_part_rfc822 (struct fetch_function_closure *ffc, ...@@ -729,7 +668,6 @@ fetch_get_part_rfc822 (struct fetch_function_closure *ffc,
729 mu_message_t msg = frt->msg, retmsg = NULL; 668 mu_message_t msg = frt->msg, retmsg = NULL;
730 size_t i; 669 size_t i;
731 mu_header_t header; 670 mu_header_t header;
732 const char *hval;
733 671
734 if (ffc->nset == 0) 672 if (ffc->nset == 0)
735 { 673 {
...@@ -739,31 +677,21 @@ fetch_get_part_rfc822 (struct fetch_function_closure *ffc, ...@@ -739,31 +677,21 @@ fetch_get_part_rfc822 (struct fetch_function_closure *ffc,
739 677
740 for (i = 0; i < ffc->nset; i++) 678 for (i = 0; i < ffc->nset; i++)
741 { 679 {
680 mu_content_type_t ct;
681 int rc;
682
742 if (mu_message_get_part (msg, ffc->section_part[i], &msg)) 683 if (mu_message_get_part (msg, ffc->section_part[i], &msg))
743 return NULL; 684 return NULL;
744 685
745 if (mu_message_get_header (msg, &header)) 686 if (mu_message_get_header (msg, &header))
746 return NULL; 687 return NULL;
747 688
748 if (mu_header_sget_value (header, MU_HEADER_CONTENT_TYPE, &hval) == 0) 689 rc = get_content_type (header, &ct, NULL);
749 {
750 struct mu_wordsplit ws;
751 int rc;
752
753 ws.ws_delim = " \t\r\n;=";
754 ws.ws_alloc_die = imap4d_ws_alloc_die;
755 if (mu_wordsplit (hval, &ws, IMAP4D_WS_FLAGS))
756 {
757 mu_error (_("%s failed: %s"), "mu_wordsplit",
758 mu_wordsplit_strerror (&ws));
759 return NULL;
760 }
761
762 rc = mu_c_strcasecmp (ws.ws_wordv[0], "MESSAGE/RFC822");
763 mu_wordsplit_free (&ws);
764
765 if (rc == 0) 690 if (rc == 0)
766 { 691 {
692 if (mu_c_strcasecmp (ct->type, "MESSAGE") == 0
693 && mu_c_strcasecmp (ct->subtype, "RFC822") == 0)
694 {
767 rc = mu_message_unencapsulate (msg, &retmsg, NULL); 695 rc = mu_message_unencapsulate (msg, &retmsg, NULL);
768 if (rc) 696 if (rc)
769 { 697 {
...@@ -778,7 +706,10 @@ fetch_get_part_rfc822 (struct fetch_function_closure *ffc, ...@@ -778,7 +706,10 @@ fetch_get_part_rfc822 (struct fetch_function_closure *ffc,
778 } 706 }
779 msg = retmsg; 707 msg = retmsg;
780 } 708 }
709 mu_content_type_destroy (&ct);
781 } 710 }
711 else if (rc != MU_ERR_NOENT)
712 mu_diag_funcall (MU_DIAG_ERROR, "get_content_type", NULL, rc);
782 } 713 }
783 714
784 return retmsg; 715 return retmsg;
......
...@@ -124,6 +124,29 @@ int mu_rfc2822_references (mu_message_t msg, char **pstr); ...@@ -124,6 +124,29 @@ int mu_rfc2822_references (mu_message_t msg, char **pstr);
124 int mu_rfc2822_in_reply_to (mu_message_t msg, char **pstr); 124 int mu_rfc2822_in_reply_to (mu_message_t msg, char **pstr);
125 125
126 /* ----------------------- */ 126 /* ----------------------- */
127 /* ----------------------- */
128 struct mu_content_type
129 {
130 char *type;
131 char *subtype;
132 char *trailer;
133 mu_list_t param;
134 };
135
136 typedef struct mu_content_type *mu_content_type_t;
137
138 struct mu_param
139 {
140 char *name;
141 char *value;
142 };
143
144
145
146 int mu_content_type_parse (const char *input, mu_content_type_t *retct);
147 void mu_content_type_destroy (mu_content_type_t *pptr);
148
149 /* ----------------------- */
127 /* Filter+iconv */ 150 /* Filter+iconv */
128 /* ----------------------- */ 151 /* ----------------------- */
129 int mu_decode_filter (mu_stream_t *pfilter, mu_stream_t input, 152 int mu_decode_filter (mu_stream_t *pfilter, mu_stream_t input,
......
...@@ -24,8 +24,9 @@ libbase_la_SOURCES = \ ...@@ -24,8 +24,9 @@ libbase_la_SOURCES = \
24 argcvjoin.c\ 24 argcvjoin.c\
25 argcvrem.c\ 25 argcvrem.c\
26 assoc.c\ 26 assoc.c\
27 filesafety.c\ 27 ctparse.c\
28 daemon.c\ 28 daemon.c\
29 filesafety.c\
29 fdwait.c\ 30 fdwait.c\
30 fgetpwent.c\ 31 fgetpwent.c\
31 filename.c\ 32 filename.c\
......
1 #if HAVE_CONFIG_H
2 # include <config.h>
3 #endif
4
5 #include <stdlib.h>
6 #include <string.h>
7 #include <mailutils/types.h>
8 #include <mailutils/cstr.h>
9 #include <mailutils/cctype.h>
10 #include <mailutils/util.h>
11 #include <mailutils/errno.h>
12
13 void
14 mu_param_free (void *data)
15 {
16 struct mu_param *p = data;
17 free (p->name);
18 free (p->value);
19 free (p);
20 }
21
22 int
23 mu_param_cmp (void const *a, void const *b)
24 {
25 struct mu_param const *p1 = a;
26 struct mu_param const *p2 = b;
27
28 return mu_c_strcasecmp (p1->name, p2->name);
29 }
30
31 static int parse_param (const char **input_ptr, mu_content_type_t ct);
32 static int parse_params (const char *input, mu_content_type_t ct);
33 static int parse_subtype (const char *input, mu_content_type_t ct);
34 static int parse_type (const char *input, mu_content_type_t ct);
35
36 static int
37 parse_type (const char *input, mu_content_type_t ct)
38 {
39 size_t i;
40
41 for (i = 0; input[i] != '/'; i++)
42 {
43 if (input[i] == 0
44 || !(mu_isalnum (input[i]) || input[i] == '-' || input[i] == '_'))
45 return MU_ERR_PARSE;
46 }
47 ct->type = malloc (i);
48 if (!ct->type)
49 return ENOMEM;
50 memcpy (ct->type, input, i);
51 ct->type[i] = 0;
52
53 return parse_subtype (input + i + 1, ct);
54 }
55
56 static int
57 parse_subtype (const char *input, mu_content_type_t ct)
58 {
59 size_t i;
60
61 for (i = 0; !(input[i] == 0 || input[i] == ';'); i++)
62 {
63 if (input[i] == 0
64 || !(mu_isalnum (input[i]) || input[i] == '-' || input[i] == '_'))
65 return MU_ERR_PARSE;
66 }
67 ct->subtype = malloc (i);
68 if (!ct->subtype)
69 return ENOMEM;
70 memcpy (ct->subtype, input, i);
71 ct->subtype[i] = 0;
72
73 return parse_params (input + i, ct);
74 }
75
76 static int
77 parse_params (const char *input, mu_content_type_t ct)
78 {
79 int rc;
80
81 rc = mu_list_create (&ct->param);
82 if (rc)
83 return rc;
84 mu_list_set_destroy_item (ct->param, mu_param_free);
85 mu_list_set_comparator (ct->param, mu_param_cmp);
86
87 while (*input == ';')
88 {
89 input = mu_str_skip_class (input + 1, MU_CTYPE_BLANK);
90 rc = parse_param (&input, ct);
91 if (rc)
92 return rc;
93 }
94
95 if (*input)
96 {
97 input = mu_str_skip_class (input, MU_CTYPE_BLANK);
98 ct->trailer = strdup (input);
99 if (!ct->trailer)
100 return ENOMEM;
101 }
102
103 return rc;
104 }
105
106 static char tspecials[] = "()<>@,;:\\\"/[]?=";
107
108 #define ISTOKEN(c) ((unsigned char)(c) > ' ' && !strchr (tspecials, c))
109
110 static int
111 parse_param (const char **input_ptr, mu_content_type_t ct)
112 {
113 const char *input = *input_ptr;
114 size_t i = 0;
115 size_t namelen;
116 size_t valstart, vallen;
117 struct mu_param *p;
118 int rc;
119 unsigned quotechar = 0;
120
121 while (ISTOKEN (input[i]))
122 i++;
123 namelen = i;
124
125 if (input[i] != '=')
126 return MU_ERR_PARSE;
127 i++;
128 if (input[i] == '"')
129 {
130 i++;
131 valstart = i;
132 while (input[i] != '"')
133 {
134 if (input[i] == '\\')
135 {
136 quotechar++;
137 i++;
138 }
139 if (!input[i])
140 return MU_ERR_PARSE;
141 i++;
142 }
143 vallen = i - valstart - quotechar;
144 i++;
145 }
146 else
147 {
148 valstart = i;
149 while (ISTOKEN (input[i]))
150 i++;
151 vallen = i - valstart;
152 }
153
154 p = malloc (sizeof (*p));
155 if (!p)
156 return ENOMEM;
157 p->name = malloc (namelen + 1);
158 p->value = malloc (vallen + 1);
159 if (!p->name || !p->value)
160 {
161 mu_param_free (p);
162 return ENOMEM;
163 }
164
165 memcpy (p->name, input, namelen);
166 p->name[namelen] = 0;
167 if (quotechar)
168 {
169 size_t j;
170 const char *src = input + valstart;
171
172 for (i = j = 0; j < vallen; i++, j++)
173 {
174 if (src[j] == '\\')
175 j++;
176 p->value[i] = src[j];
177 }
178 p->value[i] = 0;
179 }
180 else
181 {
182 memcpy (p->value, input + valstart, vallen);
183 p->value[vallen] = 0;
184 }
185
186 rc = mu_list_append (ct->param, p);
187 if (rc)
188 {
189 mu_param_free (p);
190 return rc;
191 }
192
193 *input_ptr = input + i;
194
195 return 0;
196 }
197
198 int
199 mu_content_type_parse (const char *input, mu_content_type_t *retct)
200 {
201 int rc;
202 mu_content_type_t ct;
203
204 ct = calloc (1, sizeof (*ct));
205 if (!ct)
206 return errno;
207
208 rc = parse_type (mu_str_skip_class (input, MU_CTYPE_BLANK), ct);
209 if (rc)
210 mu_content_type_destroy (&ct);
211 else
212 *retct = ct;
213
214 return rc;
215 }
216
217 void
218 mu_content_type_destroy (mu_content_type_t *pptr)
219 {
220 if (pptr && *pptr)
221 {
222 mu_content_type_t ct = *pptr;
223 free (ct->type);
224 free (ct->subtype);
225 free (ct->trailer);
226 mu_list_destroy (&ct->param);
227 free (ct);
228 *pptr = NULL;
229 }
230 }
...@@ -42,6 +42,7 @@ AM_CPPFLAGS = @MU_LIB_COMMON_INCLUDES@ ...@@ -42,6 +42,7 @@ AM_CPPFLAGS = @MU_LIB_COMMON_INCLUDES@
42 noinst_PROGRAMS = \ 42 noinst_PROGRAMS = \
43 addr\ 43 addr\
44 cidr\ 44 cidr\
45 conttype\
45 debugspec\ 46 debugspec\
46 decode2047\ 47 decode2047\
47 encode2047\ 48 encode2047\
......
1 #include <config.h>
2 #include <mailutils/mailutils.h>
3 #include <assert.h>
4
5 static int
6 print_param (void *item, void *data)
7 {
8 size_t *n = data;
9 struct mu_param *p = item;
10 printf ("%2zu: %s=%s\n", *n, p->name, p->value);
11 ++*n;
12 return 0;
13 }
14
15 int
16 parse (char const *input)
17 {
18 mu_content_type_t ct;
19 int rc;
20
21 rc = mu_content_type_parse (input, &ct);
22 if (rc)
23 {
24 mu_error ("%s", mu_strerror (rc));
25 return 1;
26 }
27
28 printf ("type = %s\n", ct->type);
29 printf ("subtype = %s\n", ct->subtype);
30 if (ct->trailer)
31 printf ("trailer = %s\n", ct->trailer);
32 if (!mu_list_is_empty (ct->param))
33 {
34 size_t n = 0;
35 mu_list_foreach (ct->param, print_param, &n);
36 }
37 mu_content_type_destroy (&ct);
38 return 0;
39 }
40
41 int
42 main (int argc, char **argv)
43 {
44 char *buf = NULL;
45 size_t size = 0, n;
46 int rc;
47
48 mu_set_program_name (argv[0]);
49 mu_stdstream_setup (MU_STDSTREAM_RESET_NONE);
50
51 if (argc == 2)
52 return parse (argv[1]);
53 while ((rc = mu_stream_getline (mu_strin, &buf, &size, &n)) == 0 && n > 0)
54 {
55 mu_rtrim_class (buf, MU_CTYPE_ENDLN);
56 if (parse (buf))
57 rc = 1;
58 }
59 return rc;
60 }
61