Commit eae41894 eae41894c2614ddc2c3732014cb62bf68ab4e16a by Sergey Poznyakoff

Fix parsing of Content-Type and Content-Disposition headers

Correctly handle character set and language info embedded in
parameter values, and eventual parameter value continuations
as per RFC 2231, Section 3.

* include/mailutils/assoc.h (mu_assoc_is_empty): New proto.
* libmailutils/base/assoc.c (mu_assoc_is_empty): New function.

* include/mailutils/mime.h (mu_mime_header_parse): Output charset is
char const *.
* libmailutils/mime/mimehdr.c (mu_mime_header_parse): Likewise.

* include/mailutils/util.h (mu_content_type) <param>: Change type to
mu_assoc_t
(mu_param): Remove structure declaration.
(mu_content_type_parse): Take an optional output charset.
* libmailutils/base/ctparse.c (mu_content_type_parse): Rewrite using
mu_mime_header_parse.

* imap4d/fetch.c (send_parameter_list):  Rewrite using mu_mime_header_parse.
(format_param): Use base64 for parameter values with explicit charsets.

* libmailutils/tests/conttype.c: Reflect the changes to struct
mu_content_type
1 parent 7dee238e
......@@ -17,7 +17,7 @@
#include "imap4d.h"
#include <ctype.h>
#include <mailutils/argcv.h>
#include <mailutils/assoc.h>
/* Taken from RFC2060
fetch ::= "FETCH" SPACE set SPACE ("ALL" / "FULL" /
......@@ -164,95 +164,47 @@ fetch_send_header_address (mu_header_t header, const char *name,
fetch_send_address (defval);
}
static void
imap4d_ws_alloc_die (struct mu_wordsplit *wsp)
{
imap4d_bye (ERR_NO_MEM);
}
#define IMAP4D_WS_FLAGS \
(MU_WRDSF_DEFFLAGS | MU_WRDSF_DELIM | \
MU_WRDSF_ENOMEMABRT | MU_WRDSF_ALLOC_DIE)
static int format_param (char const *name, void *item, void *data);
/* Send parameter list for the bodystructure. */
static void
send_parameter_list (const char *buffer)
{
struct mu_wordsplit ws;
int rc;
char *value;
mu_assoc_t param;
if (!buffer)
if (!buffer || mu_str_skip_class (buffer, MU_CTYPE_BLANK)[0] == 0)
{
io_sendf ("NIL");
return;
}
ws.ws_delim = " \t\r\n;=";
ws.ws_alloc_die = imap4d_ws_alloc_die;
if (mu_wordsplit (buffer, &ws, IMAP4D_WS_FLAGS))
rc = mu_mime_header_parse (buffer, NULL, &value, &param);
if (rc)
{
mu_error (_("%s failed: %s"), "mu_wordsplit",
mu_wordsplit_strerror (&ws));
return; /* FIXME: a better error handling, maybe? */
}
if (ws.ws_wordc == 0)
mu_diag_funcall (MU_DIAG_ERROR, "mu_content_type_parse", buffer, rc);
io_sendf ("NIL");
else
{
char *p;
io_sendf ("(");
p = ws.ws_wordv[0];
io_send_qstring (p);
if (ws.ws_wordc > 1)
{
int i, space = 0;
char *lvalue = NULL;
return;
}
io_sendf ("(");
for (i = 1; i < ws.ws_wordc; i++)
{
if (lvalue)
{
if (space)
io_send_qstring (value);
io_sendf (" ");
io_send_qstring (lvalue);
lvalue = NULL;
space = 1;
}
switch (ws.ws_wordv[i][0])
{
case ';':
continue;
case '=':
if (++i < ws.ws_wordc)
if (mu_assoc_is_empty (param))
{
io_sendf (" ");
io_send_qstring (ws.ws_wordv[i]);
}
break;
default:
lvalue = ws.ws_wordv[i];
}
io_sendf ("NIL");
}
if (lvalue)
else
{
if (space)
io_sendf (" ");
io_send_qstring (lvalue);
}
int first = 1;
io_sendf ("(");
mu_assoc_foreach (param, format_param, &first);
io_sendf (")");
}
else
io_sendf (" NIL");
io_sendf (")");
}
mu_wordsplit_free (&ws);
free (value);
mu_assoc_destroy (&param);
}
static void
......@@ -263,7 +215,7 @@ fetch_send_header_list (mu_header_t header, const char *name,
if (space)
io_sendf (" ");
if (mu_header_aget_value (header, name, &buffer) == 0)
if (mu_header_aget_value_unfold (header, name, &buffer) == 0)
{
send_parameter_list (buffer);
free (buffer);
......@@ -315,15 +267,31 @@ fetch_envelope0 (mu_message_t msg)
static int fetch_bodystructure0 (mu_message_t message, int extension);
static int
format_param (void *item, void *data)
format_param (char const *name, void *item, void *data)
{
struct mu_param *p = item;
struct mu_mime_param *p = item;
int *first = data;
if (!*first)
io_sendf (" ");
io_send_qstring (p->name);
io_send_qstring (name);
io_sendf (" ");
if (p->cset)
{
char *text;
int rc = mu_rfc2047_encode (p->cset, "base64", p->value, &text);
if (rc == 0)
{
io_send_qstring (text);
free (text);
}
else
{
mu_diag_funcall (MU_DIAG_ERROR, "mu_rfc2047_encode", p->value, rc);
io_send_qstring (p->value);
}
}
else
io_send_qstring (p->value);
*first = 0;
return 0;
......@@ -338,19 +306,19 @@ get_content_type (mu_header_t hdr, mu_content_type_t *ctp, char const *dfl)
rc = mu_header_aget_value_unfold (hdr, MU_HEADER_CONTENT_TYPE, &buffer);
if (rc == 0)
{
rc = mu_content_type_parse (buffer, ctp);
rc = mu_content_type_parse (buffer, NULL, ctp);
if (rc == MU_ERR_PARSE)
{
mu_error (_("malformed content type: %s"), buffer);
if (dfl)
rc = mu_content_type_parse (dfl, ctp);
rc = mu_content_type_parse (dfl, NULL, ctp);
}
else if (rc)
mu_diag_funcall (MU_DIAG_ERROR, "mu_content_type_parse", buffer, rc);
free (buffer);
}
else if (rc == MU_ERR_NOENT && dfl)
rc = mu_content_type_parse (dfl, ctp);
rc = mu_content_type_parse (dfl, NULL, ctp);
return rc;
}
......@@ -424,13 +392,13 @@ bodystructure (mu_message_t msg, int extension)
io_send_qstring (ct->subtype);
/* body parameter parenthesized list: Content-type attributes */
if (mu_list_is_empty (ct->param))
if (mu_assoc_is_empty (ct->param))
io_sendf (" NIL");
else
{
int first = 1;
io_sendf (" (");
mu_list_foreach (ct->param, format_param, &first);
mu_assoc_foreach (ct->param, format_param, &first);
io_sendf (")");
}
mu_content_type_destroy (&ct);
......@@ -557,11 +525,11 @@ fetch_bodystructure0 (mu_message_t message, int extension)
io_send_qstring (ct->subtype);
/* The extension data for multipart. */
if (extension && !mu_list_is_empty (ct->param))
if (extension && !mu_assoc_is_empty (ct->param))
{
int first = 1;
io_sendf (" (");
mu_list_foreach (ct->param, format_param, &first);
mu_assoc_foreach (ct->param, format_param, &first);
io_sendf (")");
}
else
......@@ -581,7 +549,7 @@ fetch_bodystructure0 (mu_message_t message, int extension)
fetch_send_header_list (header, MU_HEADER_CONTENT_DISPOSITION,
NULL, 1);
/* body language: Content-Language. */
fetch_send_header_list (header, MU_HEADER_CONTENT_LANGUAGE,
fetch_send_header_value (header, MU_HEADER_CONTENT_LANGUAGE,
NULL, 1);
}
else
......
......@@ -42,6 +42,7 @@ int mu_assoc_get_iterator (mu_assoc_t assoc, mu_iterator_t *piterator);
int mu_assoc_remove (mu_assoc_t assoc, const char *name);
int mu_assoc_set_destroy_item (mu_assoc_t assoc, mu_deallocator_t fn);
int mu_assoc_count (mu_assoc_t assoc, size_t *pcount);
int mu_assoc_is_empty (mu_assoc_t assoc);
typedef int (*mu_assoc_action_t) (char const *, void *, void *);
int mu_assoc_foreach (mu_assoc_t assoc, mu_assoc_action_t action, void *data);
......
......@@ -70,7 +70,7 @@ int mu_base64_decode (const unsigned char *input, size_t input_len,
int mu_mime_param_assoc_create (mu_assoc_t *passoc);
int mu_mime_param_assoc_add (mu_assoc_t assoc, const char *name);
int mu_mime_header_parse (const char *text, char *charset, char **pvalue,
int mu_mime_header_parse (const char *text, const char *charset, char **pvalue,
mu_assoc_t *paramtab);
int mu_mime_header_parse_subset (const char *text, const char *charset,
char **pvalue,
......
......@@ -131,18 +131,13 @@ struct mu_content_type
char *type;
char *subtype;
char *trailer;
mu_list_t param;
mu_assoc_t param;
};
typedef struct mu_content_type *mu_content_type_t;
struct mu_param
{
char *name;
char *value;
};
int mu_content_type_parse (const char *input, mu_content_type_t *retct);
int mu_content_type_parse (const char *input, const char *charset,
mu_content_type_t *retct);
void mu_content_type_destroy (mu_content_type_t *pptr);
/* ----------------------- */
......
......@@ -643,6 +643,12 @@ mu_assoc_count (mu_assoc_t assoc, size_t *pcount)
}
int
mu_assoc_is_empty (mu_assoc_t assoc)
{
return assoc == NULL || assoc->head == NULL;
}
int
mu_assoc_foreach (mu_assoc_t assoc, mu_assoc_action_t action, void *data)
{
mu_iterator_t itr;
......
......@@ -21,206 +21,68 @@
#include <stdlib.h>
#include <string.h>
#include <mailutils/types.h>
#include <mailutils/cstr.h>
#include <mailutils/cctype.h>
#include <mailutils/mime.h>
#include <mailutils/assoc.h>
#include <mailutils/util.h>
#include <mailutils/errno.h>
void
mu_param_free (void *data)
{
struct mu_param *p = data;
free (p->name);
free (p->value);
free (p);
}
int
mu_param_cmp (void const *a, void const *b)
{
struct mu_param const *p1 = a;
struct mu_param const *p2 = b;
return mu_c_strcasecmp (p1->name, p2->name);
}
static int parse_param (const char **input_ptr, mu_content_type_t ct);
static int parse_params (const char *input, mu_content_type_t ct);
static int parse_subtype (const char *input, mu_content_type_t ct);
static int parse_type (const char *input, mu_content_type_t ct);
static int
parse_type (const char *input, mu_content_type_t ct)
{
size_t i;
for (i = 0; input[i] != '/'; i++)
{
if (input[i] == 0
|| !(mu_isalnum (input[i]) || input[i] == '-' || input[i] == '_'))
return MU_ERR_PARSE;
}
ct->type = malloc (i + 1);
if (!ct->type)
return ENOMEM;
memcpy (ct->type, input, i);
ct->type[i] = 0;
return parse_subtype (input + i + 1, ct);
}
static char tspecials[] = "()<>@,;:\\\"/[]?=";
#define ISTOKEN(c) ((unsigned char)(c) > ' ' && !strchr (tspecials, c))
static int
parse_subtype (const char *input, mu_content_type_t ct)
{
size_t i;
for (i = 0; !(input[i] == 0 || input[i] == ';'); i++)
{
if (!ISTOKEN (input[i]))
return MU_ERR_PARSE;
}
ct->subtype = malloc (i + 1);
if (!ct->subtype)
return ENOMEM;
memcpy (ct->subtype, input, i);
ct->subtype[i] = 0;
return parse_params (input + i, ct);
}
static int
parse_params (const char *input, mu_content_type_t ct)
content_type_parse (const char *input, const char *charset,
mu_content_type_t ct)
{
int rc;
char *value, *p;
rc = mu_list_create (&ct->param);
rc = mu_mime_header_parse (input, charset, &value, &ct->param);
if (rc)
return rc;
mu_list_set_destroy_item (ct->param, mu_param_free);
mu_list_set_comparator (ct->param, mu_param_cmp);
while (*input == ';')
p = strchr (value, '/');
if (p)
{
input = mu_str_skip_class (input + 1, MU_CTYPE_SPACE);
rc = parse_param (&input, ct);
if (rc)
return rc;
}
if (*input)
size_t len = p - value;
ct->type = malloc (len + 1);
if (!ct->type)
{
input = mu_str_skip_class (input, MU_CTYPE_SPACE);
ct->trailer = strdup (input);
if (!ct->trailer)
return ENOMEM;
}
rc = errno;
free (value);
return rc;
}
static int
parse_param (const char **input_ptr, mu_content_type_t ct)
{
const char *input = *input_ptr;
size_t i = 0;
size_t namelen;
size_t valstart, vallen;
struct mu_param *p;
int rc;
unsigned quotechar = 0;
while (ISTOKEN (input[i]))
i++;
namelen = i;
if (input[i] != '=')
return MU_ERR_PARSE;
i++;
if (input[i] == '"')
{
i++;
valstart = i;
while (input[i] != '"')
{
if (input[i] == '\\')
{
quotechar++;
i++;
}
if (!input[i])
return MU_ERR_PARSE;
i++;
}
vallen = i - valstart - quotechar;
i++;
}
else
{
valstart = i;
while (ISTOKEN (input[i]))
i++;
vallen = i - valstart;
}
memcpy (ct->type, value, len);
ct->type[len] = 0;
p = malloc (sizeof (*p));
if (!p)
return ENOMEM;
p->name = malloc (namelen + 1);
p->value = malloc (vallen + 1);
if (!p->name || !p->value)
{
mu_param_free (p);
return ENOMEM;
}
memcpy (p->name, input, namelen);
p->name[namelen] = 0;
if (quotechar)
{
size_t j;
const char *src = input + valstart;
for (i = j = 0; j < vallen; i++, j++)
ct->subtype = strdup (p + 1);
if (!ct->subtype)
{
if (src[j] == '\\')
j++;
p->value[i] = src[j];
rc = errno;
free (value);
return rc;
}
p->value[i] = 0;
}
else
{
memcpy (p->value, input + valstart, vallen);
p->value[vallen] = 0;
ct->type = value;
ct->subtype = NULL;
}
rc = mu_list_append (ct->param, p);
if (rc)
{
mu_param_free (p);
return rc;
}
*input_ptr = input + i;
return 0;
}
int
mu_content_type_parse (const char *input, mu_content_type_t *retct)
mu_content_type_parse (const char *input, const char *charset,
mu_content_type_t *retct)
{
int rc;
mu_content_type_t ct;
if (!input)
return EINVAL;
if (!retct)
return MU_ERR_OUT_PTR_NULL;
ct = calloc (1, sizeof (*ct));
if (!ct)
return errno;
rc = parse_type (mu_str_skip_class (input, MU_CTYPE_SPACE), ct);
rc = content_type_parse (input, charset, ct);
if (rc)
mu_content_type_destroy (&ct);
else
......@@ -238,7 +100,7 @@ mu_content_type_destroy (mu_content_type_t *pptr)
free (ct->type);
free (ct->subtype);
free (ct->trailer);
mu_list_destroy (&ct->param);
mu_assoc_destroy (&ct->param);
free (ct);
*pptr = NULL;
}
......
......@@ -639,7 +639,7 @@ mu_mime_header_parse_subset (const char *text, const char *cset,
corresponding data are of no interest to the caller.
*/
int
mu_mime_header_parse (const char *text, char *cset, char **pvalue,
mu_mime_header_parse (const char *text, char const *cset, char **pvalue,
mu_assoc_t *passoc)
{
int rc;
......
......@@ -3,11 +3,11 @@
#include <assert.h>
static int
print_param (void *item, void *data)
print_param (char const *name, void *item, void *data)
{
size_t *n = data;
struct mu_param *p = item;
printf ("%2zu: %s=%s\n", *n, p->name, p->value);
struct mu_mime_param *p = item;
printf ("%2zu: %s=%s\n", *n, name, p->value);
++*n;
return 0;
}
......@@ -18,7 +18,7 @@ parse (char const *input)
mu_content_type_t ct;
int rc;
rc = mu_content_type_parse (input, &ct);
rc = mu_content_type_parse (input, NULL, &ct);
if (rc)
{
mu_error ("%s", mu_strerror (rc));
......@@ -29,10 +29,10 @@ parse (char const *input)
printf ("subtype = %s\n", ct->subtype);
if (ct->trailer)
printf ("trailer = %s\n", ct->trailer);
if (!mu_list_is_empty (ct->param))
if (!mu_assoc_is_empty (ct->param))
{
size_t n = 0;
mu_list_foreach (ct->param, print_param, &n);
mu_assoc_foreach (ct->param, print_param, &n);
}
mu_content_type_destroy (&ct);
return 0;
......