Commit 3c5aed39 3c5aed3965c70e6e1a894005b1adc7a407d0aae9 by Sergey Poznyakoff

Re-implement parser for RFC-2231 header fields.

The new implementation is able to return all parameters at once,
in an associative array. A subset of parameters can be requested.
The data are automatically converted to the output charset.

In addition, RFC-2047 parser is extended to support language
specifications (RFC-2231, chapter 5).

* include/mailutils/message.h (MU_MIMEHDR_CSINFO)
(MU_MIMEHDR_MULTILINE): Remove, not public anymore.
(mu_mimehdr_get_param,mu_mimehdr_aget_param)
(mu_message_aget_attachment_name)
(mu_message_get_attachment_name): Remove pflags agrument.
* include/mailutils/mime.h (mu_mime_param): New struct.
(mu_rfc2047_decode_param)
(mu_mime_header_parse,mu_mime_header_parse_subset): New proto.
* libmailutils/base/rfc2047.c (_rfc2047_decode_param): New
auxiliary function.  Use memory stream to collect data.
(mu_rfc2047_decode): Rewrite as a wrapper around the above.
(mu_rfc2047_decode_param): New function.
* libmailutils/filter/decode.c (mu_decode_filter_args): Pass actual
(instead of maximal) number of arguments to mu_filter_chain_create.
* libmailutils/mime/mimehdr.c: Rewrite from scratch.
* libmailutils/tests/.gitignore: Add mimehdr.
* libmailutils/tests/Makefile.am (noinst_PROGRAMS): Add mimehdr.
(TESTSUITE_AT): Add mimehdr.at.
* libmailutils/tests/mimehdr.at: New test.
* libmailutils/tests/mimehdr.c: New test program.
* libmailutils/tests/testsuite.at: Include.
1 parent c22d07a3
......@@ -162,31 +162,20 @@ extern int mu_mime_io_buffer_aget_charset (mu_mime_io_buffer_t info,
const char **charset);
/* Bit values for *pflags in functions below */
#define MU_MIMEHDR_MULTILINE 0x01 /* Parameter was multiline */
#define MU_MIMEHDR_CSINFO 0x02 /* Parameter contains charset/language
info */
extern int mu_mimehdr_get_disp (const char *str, char *buf, size_t bufsz,
size_t *retsz);
extern int mu_mimehdr_aget_disp (const char *str, char **pvalue);
extern int mu_mimehdr_get_param (const char *str, const char *param,
char *buf, size_t bufsz, size_t *retsz,
int *pflags);
char *buf, size_t bufsz, size_t *retsz);
extern int mu_mimehdr_aget_param (const char *str, const char *param,
char **pval, int *pflags);
extern int mu_mimehdr_decode_param (const char *value, int csinfo,
const char *charset,
char **pval, char **plang);
char **pval);
extern int mu_mimehdr_aget_decoded_param (const char *str, const char *param,
const char *charset,
char **pval, char **plang);
extern int mu_message_get_attachment_name (mu_message_t, char *name,
size_t bufsz, size_t* sz,
int *pflags);
extern int mu_message_aget_attachment_name (mu_message_t, char **name,
int *pflags);
size_t bufsz, size_t* sz);
extern int mu_message_aget_attachment_name (mu_message_t, char **name);
extern int mu_message_aget_decoded_attachment_name (mu_message_t msg,
const char *charset,
char **name,
......
......@@ -29,6 +29,13 @@
extern "C" {
#endif
struct mu_mime_param
{
char *lang;
char *cset;
char *value;
};
int mu_mime_create (mu_mime_t *pmime, mu_message_t msg, int flags);
void mu_mime_destroy (mu_mime_t *pmime);
void mu_mime_ref (mu_mime_t mime);
......@@ -49,13 +56,22 @@ int mu_rfc2047_decode (const char *tocode, const char *fromstr,
int mu_rfc2047_encode (const char *charset, const char *encoding,
const char *text, char **result);
int mu_rfc2047_decode_param (const char *tocode, const char *input,
struct mu_mime_param *param);
int mu_base64_encode (const unsigned char *input, size_t input_len,
unsigned char **output, size_t * output_len);
int mu_base64_decode (const unsigned char *input, size_t input_len,
unsigned char **output, size_t * output_len);
int mu_mime_header_parse (const char *text, char *charset, char **pvalue,
mu_assoc_t *paramtab);
int mu_mime_header_parse_subset (const char *text, const char *charset,
char **pvalue,
mu_assoc_t assoc);
#ifdef __cplusplus
}
#endif
......
......@@ -28,20 +28,9 @@
#include <mailutils/stream.h>
#include <mailutils/filter.h>
#include <mailutils/errno.h>
#include <mailutils/mime.h>
#include <mailutils/util.h>
static int
realloc_buffer (char **bufp, size_t *bufsizep, size_t incr)
{
size_t newsize = *bufsizep + incr;
char *newp = realloc (*bufp, newsize);
if (newp == NULL)
return 1;
*bufp = newp;
*bufsizep = newsize;
return 0;
}
int
getword (char **pret, const char **pstr, int delim)
{
......@@ -65,52 +54,32 @@ getword (char **pret, const char **pstr, int delim)
return 0;
}
int
mu_rfc2047_decode (const char *tocode, const char *input, char **ptostr)
static int
_rfc2047_decode_param (const char *tocode, const char *input,
struct mu_mime_param *param)
{
int status = 0;
const char *fromstr;
char *buffer;
size_t bufsize;
size_t bufpos;
size_t run_count = 0;
char *fromcode = NULL;
char *encoding_type = NULL;
char *encoded_text = NULL;
char *tocodetmp = NULL;
mu_stream_t str;
#define BUFINC 128
#define CHKBUF(count) do { \
if (bufpos+count >= bufsize) \
{ \
size_t s = bufpos + count - bufsize; \
if (s < BUFINC) \
s = BUFINC; \
if (realloc_buffer (&buffer, &bufsize, s)) \
{ \
free (buffer); \
free (fromcode); \
free (encoding_type); \
free (encoded_text); \
return ENOMEM; \
} \
} \
} while (0)
if (!input)
return EINVAL;
if (!ptostr)
return MU_ERR_OUT_PTR_NULL;
memset (param, 0, sizeof (*param));
fromstr = input;
status = mu_memory_stream_create (&str, MU_STREAM_RDWR);
if (status)
return status;
/* Allocate the buffer. It is assumed that encoded string is always
longer than it's decoded variant, so it's safe to use its length
as the first estimate */
bufsize = strlen (fromstr) + 1;
buffer = malloc (bufsize);
if (buffer == NULL)
return ENOMEM;
bufpos = 0;
if (tocode && (param->cset = strdup (tocode)) == NULL)
{
mu_stream_destroy (&str);
return ENOMEM;
}
fromstr = input;
while (*fromstr)
{
......@@ -119,13 +88,39 @@ mu_rfc2047_decode (const char *tocode, const char *input, char **ptostr)
mu_stream_t filter = NULL;
mu_stream_t in_stream = NULL;
const char *filter_type = NULL;
size_t nbytes = 0, size;
size_t size;
const char *sp = fromstr + 2;
char tmp[128];
char *lang;
status = getword (&fromcode, &sp, '?');
if (status)
break;
lang = strchr (fromcode, '*');
if (lang)
*lang++ = 0;
if (!param->cset)
{
param->cset = strdup (fromcode);
if (!param->cset)
{
status = ENOMEM;
break;
}
}
if (lang && !param->lang && (param->lang = strdup (lang)) == NULL)
{
status = ENOMEM;
break;
}
if (!tocode)
{
if ((tocodetmp = strdup (fromcode)) == NULL)
{
status = ENOMEM;
break;
}
tocode = tocodetmp;
}
status = getword (&encoding_type, &sp, '?');
if (status)
break;
......@@ -162,22 +157,12 @@ mu_rfc2047_decode (const char *tocode, const char *input, char **ptostr)
mu_static_memory_stream_create (&in_stream, encoded_text, size);
mu_stream_seek (in_stream, 0, MU_SEEK_SET, NULL);
status = mu_decode_filter (&filter, in_stream, filter_type, fromcode,
tocode);
status = mu_decode_filter (&filter, in_stream, filter_type,
fromcode, tocode);
mu_stream_unref (in_stream);
if (status != 0)
break;
while ((status =
mu_stream_read (filter, tmp, sizeof (tmp), &nbytes)) == 0
&& nbytes)
{
CHKBUF (nbytes);
memcpy (buffer + bufpos, tmp, nbytes);
bufpos += nbytes;
}
mu_stream_close (filter);
status = mu_stream_copy (str, filter, 0, NULL);
mu_stream_destroy (&filter);
if (status)
......@@ -198,44 +183,89 @@ mu_rfc2047_decode (const char *tocode, const char *input, char **ptostr)
{
if (--run_count)
{
CHKBUF (run_count);
memcpy (buffer + bufpos, fromstr - run_count, run_count);
bufpos += run_count;
status = mu_stream_write (str, fromstr - run_count,
run_count, NULL);
if (status)
break;
run_count = 0;
}
CHKBUF (1);
buffer[bufpos++] = *fromstr++;
status = mu_stream_write (str, fromstr, 1, NULL);
if (status)
break;
fromstr++;
}
}
else
{
CHKBUF (1);
buffer[bufpos++] = *fromstr++;
status = mu_stream_write (str, fromstr, 1, NULL);
if (status)
break;
fromstr++;
}
}
if (*fromstr)
{
size_t len = strlen (fromstr);
CHKBUF (len);
memcpy (buffer + bufpos, fromstr, len);
bufpos += len;
}
if (status == 0 && *fromstr)
status = mu_stream_write (str, fromstr, strlen (fromstr), NULL);
CHKBUF (1);
buffer[bufpos++] = 0;
free (fromcode);
free (encoding_type);
free (encoded_text);
free (tocodetmp);
if (status == 0)
{
mu_off_t size;
if (status)
free (buffer);
else
*ptostr = realloc (buffer, bufpos);
mu_stream_size (str, &size);
param->value = malloc (size + 1);
if (!param->value)
status = ENOMEM;
else
{
mu_stream_seek (str, 0, MU_SEEK_SET, NULL);
status = mu_stream_read (str, param->value, size, NULL);
param->value[size] = 0;
}
}
mu_stream_destroy (&str);
return status;
}
int
mu_rfc2047_decode_param (const char *tocode, const char *input,
struct mu_mime_param *param)
{
int rc;
struct mu_mime_param tmp;
if (!input)
return EINVAL;
if (!param)
return MU_ERR_OUT_PTR_NULL;
rc = _rfc2047_decode_param (tocode, input, &tmp);
if (rc == 0)
*param = tmp;
return rc;
}
int
mu_rfc2047_decode (const char *tocode, const char *input, char **ptostr)
{
int rc;
struct mu_mime_param param;
if (!input)
return EINVAL;
if (!ptostr)
return MU_ERR_OUT_PTR_NULL;
rc = _rfc2047_decode_param (tocode, input, &param);
free (param.cset);
free (param.lang);
if (rc == 0)
*ptostr = param.value;
return rc;
}
/**
Encode a header according to RFC 2047
......
......@@ -73,7 +73,7 @@ mu_decode_filter_args (mu_stream_t *pfilter, mu_stream_t input,
rc = mu_filter_chain_create (pfilter, input,
MU_FILTER_DECODE, MU_STREAM_READ,
xargc, xargv);
i, xargv);
free (xargv);
return rc;
}
......
......@@ -16,6 +16,7 @@ fsfolder
imapio
listop
mailcap
mimehdr
prop
scantime
strftime
......
......@@ -51,6 +51,7 @@ noinst_PROGRAMS = \
imapio\
listop\
mailcap\
mimehdr\
prop\
scantime\
strftime\
......@@ -88,6 +89,7 @@ TESTSUITE_AT = \
linecon.at\
list.at\
mailcap.at\
mimehdr.at\
prop.at\
scantime.at\
strftime.at\
......
# This file is part of GNU Mailutils. -*- Autotest -*-
# Copyright (C) 2011 Free Software Foundation, Inc.
#
# GNU Mailutils is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 3, or (at
# your option) any later version.
#
# GNU Mailutils is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>.
# Warning: This text contains 8-bit UTF-8
AT_BANNER(RFC 2231 header fields)
dnl ---------------------------------------------------------------------
dnl MIMEHDR([NAME], [KW], [OPT], [INPUT], [STDOUT = `'], [STDERR = `'])
dnl
m4_pushdef([MIMEHDR],[
m4_pushdef([MU_TEST_GROUP],[mimehdr])
m4_pushdef([MU_TEST_KEYWORDS],[mimehdr rfc2231])
m4_pushdef([MU_TEST_COMMAND],[mimehdr $3])
MU_GENERIC_TEST([$1],[$2],[$4],[],[$5],[$6])
m4_popdef([MU_TEST_COMMAND])
m4_popdef([MU_TEST_KEYWORDS])
m4_popdef([MU_TEST_GROUP])
])
dnl ---------------------------------------------------------------------
MIMEHDR([simple],[mimehdr00 mimehdr-simple],
[],
[message/external-body; access-type=URL;
URL="ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"
],
[message/external-body
access-type=URL
URL=ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar
])
MIMEHDR([continuation],[mimehdr01 mimehdr-cont mimehdr-cont-00],
[],
[message/external-body; access-type=URL;
URL*0="ftp://";
URL*1="cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"
],
[message/external-body
access-type=URL
URL=ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar
])
MIMEHDR([charset (2047)],[mimehdr02 mimehdr-charset-rfc2047 mimehdr-charset-00],
[],
[attachment; charset=utf-8;
filename==?UTF-8?B?zrHPgc+HzrXOr86/IM6zzrnOsSDPhM63zr0gzrTOv866zrnOvM6xz4POr86x==?=
],
[attachment
charset=utf-8
filename=αρχείο για την δοκιμασία
])
MIMEHDR([charset with language (2047)],[mimehdr03 mimehdr-charset-rfc2047 mimehdr-charset-01],
[],
[attachment; charset=utf-8;
filename==?UTF-8*el?B?zrHPgc+HzrXOr86/IM6zzrnOsSDPhM63zr0gzrTOv866zrnOvM6xz4POr86x==?=
],
[attachment
charset=utf-8
filename(lang:el/UTF-8)=αρχείο για την δοκιμασία
])
MIMEHDR([no charset (2231)],[mimehdr04 mimehdr-no-charset-rfc2231 mimehdr-nocharset-00],
[],
[attachment; charset=utf-8;
filename*=%CE%B1%CF%81%CF%87%CE%B5%CE%AF%CE%BF%20%CE%B3%CE%B9%CE%B1%20%CF%84%CE%B7%CE%BD%20%CE%B4%CE%BF%CE%BA%CE%B9%CE%BC%CE%B1%CF%83%CE%AF%CE%B1
],
[attachment
charset=utf-8
filename=αρχείο για την δοκιμασία
])
MIMEHDR([charset (2231)],[mimehdr05 mimehdr-charset-rfc2231 mimehdr-charset-rfc2231-00 mimehdr-charset-03],
[],
[attachment; charset=utf-8;
filename*=UTF-8''%CE%B1%CF%81%CF%87%CE%B5%CE%AF%CE%BF%20%CE%B3%CE%B9%CE%B1%20%CF%84%CE%B7%CE%BD%20%CE%B4%CE%BF%CE%BA%CE%B9%CE%BC%CE%B1%CF%83%CE%AF%CE%B1
],
[attachment
charset=utf-8
filename(lang:/UTF-8)=αρχείο για την δοκιμασία
])
MIMEHDR([charset with language (2231)],[mimehdr06 mimehdr-charset-rfc2231 mimehdr-charset-rfc2231-01 mimehdr-charset-04],
[],
[attachment; charset=utf-8;
filename*=UTF-8'el_GR'%CE%B1%CF%81%CF%87%CE%B5%CE%AF%CE%BF%20%CE%B3%CE%B9%CE%B1%20%CF%84%CE%B7%CE%BD%20%CE%B4%CE%BF%CE%BA%CE%B9%CE%BC%CE%B1%CF%83%CE%AF%CE%B1
],
[attachment
charset=utf-8
filename(lang:el_GR/UTF-8)=αρχείο για την δοκιμασία
])
MIMEHDR([charset with language and continuation (2231)],[mimehdr07 mimehdr-charset-rfc2231 mimehdr-charset-rfc2231-02 mimehdr-charset-05],
[],
[attachment; charset=utf-8;
filename*00*=UTF-8'el_GR'%CE%B1%CF%81%CF%87%CE%B5;
filename*01*=%CE%AF%CE%BF%20%CE%B3%CE;
filename*02*=%B9%CE%B1%20%CF%84%CE%B7;
filename*03*=%CE%BD%20%CE%B4%CE%BF%CE;
filename*04*=%BA%CE%B9%CE%BC%CE%B1%CF%83%CE%AF%CE%B1
],
[attachment
charset=utf-8
filename(lang:el_GR/UTF-8)=αρχείο για την δοκιμασία
])
MIMEHDR([combined charset, lang and cset],[mimehdr08 mimehdr-comb mimehdr-charset-rfc2231],
[],
[application/x-stuff
title*0*=us-ascii'en'This%20is%20even%20more%20
title*1*=%2A%2A%2Afun%2A%2A%2A%20
title*2="isn't it!"
],
[application/x-stuff
title(lang:en/us-ascii)=This is even more ***fun*** isn't it!
])
m4_popdef([MIMEHDR])
/* GNU Mailutils -- a suite of utilities for electronic mail
Copyright (C) 2005, 2007, 2009, 2010, 2011 Free Software Foundation,
Inc.
GNU Mailutils is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GNU Mailutils is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>. */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <stdlib.h>
#include <string.h>
#include <mailutils/assoc.h>
#include <mailutils/header.h>
#include <mailutils/message.h>
#include <mailutils/mime.h>
#include <mailutils/iterator.h>
#include <mailutils/stream.h>
#include <mailutils/stdstream.h>
#include <mailutils/util.h>
#include <mailutils/cstr.h>
#include <mailutils/cctype.h>
#include <mailutils/error.h>
#include <mailutils/errno.h>
struct named_param
{
const char *name;
struct mu_mime_param const *param;
};
static int
sort_names (void const *a, void const *b)
{
struct named_param const *pa = a;
struct named_param const *pb = b;
return mu_c_strcasecmp (pa->name, pb->name);
}
static int
print_named_param (void *item, void *data)
{
struct named_param const *p = item;
struct mu_mime_param const *param = p->param;
mu_printf ("%s", p->name);
if (param->lang)
mu_printf ("(lang:%s/%s)", param->lang, param->cset);
mu_printf ("=%s\n", param->value);
return 0;
}
int
main (int argc, char **argv)
{
int i;
mu_stream_t tmp;
mu_transport_t trans[2];
char *value;
mu_assoc_t assoc;
mu_iterator_t itr;
mu_list_t list;
char *charset = NULL;
mu_set_program_name (argv[0]);
for (i = 1; i < argc; i++)
{
char *opt = argv[i];
if (strncmp (opt, "-debug=", 7) == 0)
mu_debug_parse_spec (opt + 7);
else if (strncmp (opt, "-charset=", 9) == 0)
charset = opt + 9;
else if (strcmp (opt, "-h") == 0 || strcmp (opt, "-help") == 0)
{
mu_printf ("usage: %s [-charset=cs] [-debug=SPEC]", mu_program_name);
return 0;
}
else
{
mu_error ("unknown option %s", opt);
return 1;
}
}
if (i != argc)
{
mu_error ("too many arguments");
return 1;
}
MU_ASSERT (mu_memory_stream_create (&tmp, MU_STREAM_RDWR));
MU_ASSERT (mu_stream_copy (tmp, mu_strin, 0, NULL));
MU_ASSERT (mu_stream_write (tmp, "", 1, NULL));
MU_ASSERT (mu_stream_ioctl (tmp, MU_IOCTL_TRANSPORT, MU_IOCTL_OP_GET,
trans));
MU_ASSERT (mu_mime_header_parse ((char*)trans[0], charset, &value, &assoc));
mu_printf ("%s\n", value);
MU_ASSERT (mu_list_create (&list));
MU_ASSERT (mu_assoc_get_iterator (assoc, &itr));
for (mu_iterator_first (itr); !mu_iterator_is_done (itr);
mu_iterator_next (itr))
{
const char *name;
struct mu_mime_param *p;
struct named_param *np;
mu_iterator_current_kv (itr, (const void **)&name, (void**)&p);
np = malloc (sizeof (*np));
if (!np)
abort ();
np->name = name;
np->param = p;
MU_ASSERT (mu_list_append (list, np));
}
mu_iterator_destroy (&itr);
mu_list_sort (list, sort_names);
mu_list_foreach (list, print_named_param, NULL);
return 0;
}
......@@ -96,4 +96,6 @@ m4_include([imapio.at])
m4_include([scantime.at])
m4_include([strftime.at])
m4_include([fsaf.at])
\ No newline at end of file
m4_include([fsaf.at])
m4_include([mimehdr.at])
......