Commit 81973969 819739696b8b087d311655b639db7b14fef48f02 by Sergey Poznyakoff

Provide function for parsing the Content-Type header (RFC 2045).

* include/mailutils/util.h (mu_content_type, mu_param): New structs.
(mu_content_type_t): New typedef.
(mu_content_type_parse, mu_content_type_destroy): New protos.
* libmailutils/base/ctparse.c: New file.
* libmailutils/base/Makefile.am: Add new file.

* imap4d/fetch.c: Use mu_content_type_parse to parse the header.

* libmailutils/tests/conttype.c: New file.
* libmailutils/tests/Makefile.am: Add new file.
1 parent 7bc05c77
......@@ -314,6 +314,46 @@ fetch_envelope0 (mu_message_t msg)
static int fetch_bodystructure0 (mu_message_t message, int extension);
static int
format_param (void *item, void *data)
{
struct mu_param *p = item;
int *first = data;
if (!*first)
io_sendf (" ");
io_send_qstring (p->name);
io_sendf (" ");
io_send_qstring (p->value);
*first = 0;
return 0;
}
static int
get_content_type (mu_header_t hdr, mu_content_type_t *ctp, char const *dfl)
{
int rc;
char *buffer = NULL;
rc = mu_header_aget_value (hdr, MU_HEADER_CONTENT_TYPE, &buffer);
if (rc == 0)
{
rc = mu_content_type_parse (buffer, ctp);
if (rc == MU_ERR_PARSE)
{
mu_error (_("malformed content type: %s"), buffer);
if (dfl)
rc = mu_content_type_parse (dfl, ctp);
}
else if (rc)
mu_diag_funcall (MU_DIAG_ERROR, "mu_content_type_parse", buffer, rc);
free (buffer);
}
else if (rc == MU_ERR_NOENT && dfl)
rc = mu_content_type_parse (dfl, ctp);
return rc;
}
/* The basic fields of a non-multipart body part are in the following order:
body type:
A string giving the content media type name as defined in [MIME-IMB].
......@@ -362,98 +402,43 @@ static int
bodystructure (mu_message_t msg, int extension)
{
mu_header_t header = NULL;
char *buffer = NULL;
size_t blines = 0;
int message_rfc822 = 0;
int text_plain = 0;
mu_content_type_t ct;
int rc;
mu_message_get_header (msg, &header);
if (mu_header_aget_value (header, MU_HEADER_CONTENT_TYPE, &buffer) == 0)
rc = get_content_type (header, &ct, "TEXT/PLAIN; CHARSET=US-ASCII");
if (rc == 0)
{
struct mu_wordsplit ws;
char *p;
size_t len;
ws.ws_delim = " \t\r\n;=";
ws.ws_alloc_die = imap4d_ws_alloc_die;
if (mu_wordsplit (buffer, &ws, IMAP4D_WS_FLAGS))
{
mu_error (_("%s failed: %s"), "mu_wordsplit",
mu_wordsplit_strerror (&ws));
return RESP_BAD; /* FIXME: a better error handling, maybe? */
}
len = strcspn (ws.ws_wordv[0], "/");
if (mu_c_strcasecmp (ws.ws_wordv[0], "MESSAGE/RFC822") == 0)
if (mu_c_strcasecmp (ct->type, "MESSAGE") == 0
&& mu_c_strcasecmp (ct->subtype, "RFC822") == 0)
message_rfc822 = 1;
else if (mu_c_strncasecmp (ws.ws_wordv[0], "TEXT", len) == 0)
else if (mu_c_strcasecmp (ct->type, "TEXT") == 0)
text_plain = 1;
ws.ws_wordv[0][len++] = 0;
p = ws.ws_wordv[0];
io_send_qstring (p);
io_send_qstring (ct->type);
io_sendf (" ");
io_send_qstring (ws.ws_wordv[0] + len);
io_send_qstring (ct->subtype);
/* body parameter parenthesized list: Content-type attributes */
if (ws.ws_wordc > 1)
if (mu_list_is_empty (ct->param))
io_sendf (" NIL");
else
{
int space = 0;
char *lvalue = NULL;
int i;
int first = 1;
io_sendf (" (");
for (i = 1; i < ws.ws_wordc; i++)
{
/* body parameter parenthesized list:
Content-type parameter list. */
if (lvalue)
{
if (space)
io_sendf (" ");
io_send_qstring (lvalue);
lvalue = NULL;
space = 1;
}
switch (ws.ws_wordv[i][0])
{
case ';':
continue;
case '=':
if (++i < ws.ws_wordc)
{
io_sendf (" ");
io_send_qstring (ws.ws_wordv[i]);
}
break;
default:
lvalue = ws.ws_wordv[i];
}
}
if (lvalue)
{
if (space)
io_sendf (" ");
io_send_qstring (lvalue);
}
mu_list_foreach (ct->param, format_param, &first);
io_sendf (")");
}
else
io_sendf (" NIL");
mu_wordsplit_free (&ws);
free (buffer);
mu_content_type_destroy (&ct);
}
else
{
/* Default? If Content-Type is not present consider as text/plain. */
io_sendf ("\"TEXT\" \"PLAIN\" (\"CHARSET\" \"US-ASCII\")");
text_plain = 1;
mu_diag_funcall (MU_DIAG_ERROR, "get_content_type", NULL, rc);
return RESP_BAD; /* FIXME: a better error handling, maybe? */
}
/* body id: Content-ID. */
......@@ -542,13 +527,14 @@ fetch_bodystructure0 (mu_message_t message, int extension)
size_t nparts = 1;
size_t i;
int is_multipart = 0;
mu_message_is_multipart (message, &is_multipart);
if (is_multipart)
{
char *buffer = NULL;
mu_content_type_t ct;
mu_header_t header = NULL;
int rc;
mu_message_get_num_parts (message, &nparts);
/* Get all the sub messages. */
......@@ -564,79 +550,32 @@ fetch_bodystructure0 (mu_message_t message, int extension)
mu_message_get_header (message, &header);
/* The subtype. */
if (mu_header_aget_value (header, MU_HEADER_CONTENT_TYPE, &buffer) == 0)
rc = get_content_type (header, &ct, NULL);
if (rc == 0)
{
struct mu_wordsplit ws;
char *s;
ws.ws_delim = " \t\r\n;=";
ws.ws_alloc_die = imap4d_ws_alloc_die;
if (mu_wordsplit (buffer, &ws, IMAP4D_WS_FLAGS))
{
mu_error (_("%s failed: %s"), "mu_wordsplit",
mu_wordsplit_strerror (&ws));
return RESP_BAD; /* FIXME: a better error handling, maybe? */
}
s = strchr (ws.ws_wordv[0], '/');
if (s)
s++;
io_sendf (" ");
io_send_qstring (s);
io_send_qstring (ct->subtype);
/* The extension data for multipart. */
if (extension)
if (extension && !mu_list_is_empty (ct->param))
{
int space = 0;
char *lvalue = NULL;
int first = 1;
io_sendf (" (");
for (i = 1; i < ws.ws_wordc; i++)
{
/* body parameter parenthesized list:
Content-type parameter list. */
if (lvalue)
{
if (space)
io_sendf (" ");
io_send_qstring (lvalue);
lvalue = NULL;
space = 1;
}
switch (ws.ws_wordv[i][0])
{
case ';':
continue;
case '=':
if (++i < ws.ws_wordc)
{
io_sendf (" ");
io_send_qstring (ws.ws_wordv[i]);
}
break;
default:
lvalue = ws.ws_wordv[i];
}
}
if (lvalue)
{
if (space)
io_sendf (" ");
io_send_qstring (lvalue);
}
mu_list_foreach (ct->param, format_param, &first);
io_sendf (")");
}
else
io_sendf (" NIL");
mu_wordsplit_free (&ws);
free (buffer);
mu_content_type_destroy (&ct);
}
else
else if (rc == MU_ERR_NOENT)
/* No content-type header */
io_sendf (" NIL");
else
{
mu_diag_funcall (MU_DIAG_ERROR, "get_content_type", NULL, rc);
return RESP_BAD; /* FIXME: a better error handling, maybe? */
}
/* body disposition: Content-Disposition. */
fetch_send_header_list (header, MU_HEADER_CONTENT_DISPOSITION,
......@@ -729,7 +668,6 @@ fetch_get_part_rfc822 (struct fetch_function_closure *ffc,
mu_message_t msg = frt->msg, retmsg = NULL;
size_t i;
mu_header_t header;
const char *hval;
if (ffc->nset == 0)
{
......@@ -739,30 +677,20 @@ fetch_get_part_rfc822 (struct fetch_function_closure *ffc,
for (i = 0; i < ffc->nset; i++)
{
mu_content_type_t ct;
int rc;
if (mu_message_get_part (msg, ffc->section_part[i], &msg))
return NULL;
if (mu_message_get_header (msg, &header))
return NULL;
if (mu_header_sget_value (header, MU_HEADER_CONTENT_TYPE, &hval) == 0)
{
struct mu_wordsplit ws;
int rc;
ws.ws_delim = " \t\r\n;=";
ws.ws_alloc_die = imap4d_ws_alloc_die;
if (mu_wordsplit (hval, &ws, IMAP4D_WS_FLAGS))
{
mu_error (_("%s failed: %s"), "mu_wordsplit",
mu_wordsplit_strerror (&ws));
return NULL;
}
rc = mu_c_strcasecmp (ws.ws_wordv[0], "MESSAGE/RFC822");
mu_wordsplit_free (&ws);
if (rc == 0)
rc = get_content_type (header, &ct, NULL);
if (rc == 0)
{
if (mu_c_strcasecmp (ct->type, "MESSAGE") == 0
&& mu_c_strcasecmp (ct->subtype, "RFC822") == 0)
{
rc = mu_message_unencapsulate (msg, &retmsg, NULL);
if (rc)
......@@ -778,7 +706,10 @@ fetch_get_part_rfc822 (struct fetch_function_closure *ffc,
}
msg = retmsg;
}
mu_content_type_destroy (&ct);
}
else if (rc != MU_ERR_NOENT)
mu_diag_funcall (MU_DIAG_ERROR, "get_content_type", NULL, rc);
}
return retmsg;
......
......@@ -124,6 +124,29 @@ int mu_rfc2822_references (mu_message_t msg, char **pstr);
int mu_rfc2822_in_reply_to (mu_message_t msg, char **pstr);
/* ----------------------- */
/* ----------------------- */
struct mu_content_type
{
char *type;
char *subtype;
char *trailer;
mu_list_t param;
};
typedef struct mu_content_type *mu_content_type_t;
struct mu_param
{
char *name;
char *value;
};
int mu_content_type_parse (const char *input, mu_content_type_t *retct);
void mu_content_type_destroy (mu_content_type_t *pptr);
/* ----------------------- */
/* Filter+iconv */
/* ----------------------- */
int mu_decode_filter (mu_stream_t *pfilter, mu_stream_t input,
......
......@@ -24,8 +24,9 @@ libbase_la_SOURCES = \
argcvjoin.c\
argcvrem.c\
assoc.c\
filesafety.c\
ctparse.c\
daemon.c\
filesafety.c\
fdwait.c\
fgetpwent.c\
filename.c\
......
#if HAVE_CONFIG_H
# include <config.h>
#endif
#include <stdlib.h>
#include <string.h>
#include <mailutils/types.h>
#include <mailutils/cstr.h>
#include <mailutils/cctype.h>
#include <mailutils/util.h>
#include <mailutils/errno.h>
void
mu_param_free (void *data)
{
struct mu_param *p = data;
free (p->name);
free (p->value);
free (p);
}
int
mu_param_cmp (void const *a, void const *b)
{
struct mu_param const *p1 = a;
struct mu_param const *p2 = b;
return mu_c_strcasecmp (p1->name, p2->name);
}
static int parse_param (const char **input_ptr, mu_content_type_t ct);
static int parse_params (const char *input, mu_content_type_t ct);
static int parse_subtype (const char *input, mu_content_type_t ct);
static int parse_type (const char *input, mu_content_type_t ct);
static int
parse_type (const char *input, mu_content_type_t ct)
{
size_t i;
for (i = 0; input[i] != '/'; i++)
{
if (input[i] == 0
|| !(mu_isalnum (input[i]) || input[i] == '-' || input[i] == '_'))
return MU_ERR_PARSE;
}
ct->type = malloc (i);
if (!ct->type)
return ENOMEM;
memcpy (ct->type, input, i);
ct->type[i] = 0;
return parse_subtype (input + i + 1, ct);
}
static int
parse_subtype (const char *input, mu_content_type_t ct)
{
size_t i;
for (i = 0; !(input[i] == 0 || input[i] == ';'); i++)
{
if (input[i] == 0
|| !(mu_isalnum (input[i]) || input[i] == '-' || input[i] == '_'))
return MU_ERR_PARSE;
}
ct->subtype = malloc (i);
if (!ct->subtype)
return ENOMEM;
memcpy (ct->subtype, input, i);
ct->subtype[i] = 0;
return parse_params (input + i, ct);
}
static int
parse_params (const char *input, mu_content_type_t ct)
{
int rc;
rc = mu_list_create (&ct->param);
if (rc)
return rc;
mu_list_set_destroy_item (ct->param, mu_param_free);
mu_list_set_comparator (ct->param, mu_param_cmp);
while (*input == ';')
{
input = mu_str_skip_class (input + 1, MU_CTYPE_BLANK);
rc = parse_param (&input, ct);
if (rc)
return rc;
}
if (*input)
{
input = mu_str_skip_class (input, MU_CTYPE_BLANK);
ct->trailer = strdup (input);
if (!ct->trailer)
return ENOMEM;
}
return rc;
}
static char tspecials[] = "()<>@,;:\\\"/[]?=";
#define ISTOKEN(c) ((unsigned char)(c) > ' ' && !strchr (tspecials, c))
static int
parse_param (const char **input_ptr, mu_content_type_t ct)
{
const char *input = *input_ptr;
size_t i = 0;
size_t namelen;
size_t valstart, vallen;
struct mu_param *p;
int rc;
unsigned quotechar = 0;
while (ISTOKEN (input[i]))
i++;
namelen = i;
if (input[i] != '=')
return MU_ERR_PARSE;
i++;
if (input[i] == '"')
{
i++;
valstart = i;
while (input[i] != '"')
{
if (input[i] == '\\')
{
quotechar++;
i++;
}
if (!input[i])
return MU_ERR_PARSE;
i++;
}
vallen = i - valstart - quotechar;
i++;
}
else
{
valstart = i;
while (ISTOKEN (input[i]))
i++;
vallen = i - valstart;
}
p = malloc (sizeof (*p));
if (!p)
return ENOMEM;
p->name = malloc (namelen + 1);
p->value = malloc (vallen + 1);
if (!p->name || !p->value)
{
mu_param_free (p);
return ENOMEM;
}
memcpy (p->name, input, namelen);
p->name[namelen] = 0;
if (quotechar)
{
size_t j;
const char *src = input + valstart;
for (i = j = 0; j < vallen; i++, j++)
{
if (src[j] == '\\')
j++;
p->value[i] = src[j];
}
p->value[i] = 0;
}
else
{
memcpy (p->value, input + valstart, vallen);
p->value[vallen] = 0;
}
rc = mu_list_append (ct->param, p);
if (rc)
{
mu_param_free (p);
return rc;
}
*input_ptr = input + i;
return 0;
}
int
mu_content_type_parse (const char *input, mu_content_type_t *retct)
{
int rc;
mu_content_type_t ct;
ct = calloc (1, sizeof (*ct));
if (!ct)
return errno;
rc = parse_type (mu_str_skip_class (input, MU_CTYPE_BLANK), ct);
if (rc)
mu_content_type_destroy (&ct);
else
*retct = ct;
return rc;
}
void
mu_content_type_destroy (mu_content_type_t *pptr)
{
if (pptr && *pptr)
{
mu_content_type_t ct = *pptr;
free (ct->type);
free (ct->subtype);
free (ct->trailer);
mu_list_destroy (&ct->param);
free (ct);
*pptr = NULL;
}
}
......@@ -42,6 +42,7 @@ AM_CPPFLAGS = @MU_LIB_COMMON_INCLUDES@
noinst_PROGRAMS = \
addr\
cidr\
conttype\
debugspec\
decode2047\
encode2047\
......
#include <config.h>
#include <mailutils/mailutils.h>
#include <assert.h>
static int
print_param (void *item, void *data)
{
size_t *n = data;
struct mu_param *p = item;
printf ("%2zu: %s=%s\n", *n, p->name, p->value);
++*n;
return 0;
}
int
parse (char const *input)
{
mu_content_type_t ct;
int rc;
rc = mu_content_type_parse (input, &ct);
if (rc)
{
mu_error ("%s", mu_strerror (rc));
return 1;
}
printf ("type = %s\n", ct->type);
printf ("subtype = %s\n", ct->subtype);
if (ct->trailer)
printf ("trailer = %s\n", ct->trailer);
if (!mu_list_is_empty (ct->param))
{
size_t n = 0;
mu_list_foreach (ct->param, print_param, &n);
}
mu_content_type_destroy (&ct);
return 0;
}
int
main (int argc, char **argv)
{
char *buf = NULL;
size_t size = 0, n;
int rc;
mu_set_program_name (argv[0]);
mu_stdstream_setup (MU_STDSTREAM_RESET_NONE);
if (argc == 2)
return parse (argv[1]);
while ((rc = mu_stream_getline (mu_strin, &buf, &size, &n)) == 0 && n > 0)
{
mu_rtrim_class (buf, MU_CTYPE_ENDLN);
if (parse (buf))
rc = 1;
}
return rc;
}