Commit 88533ff1 88533ff1f33c6e5f1b4304812d59627fcefa724f by Sergey Poznyakoff

Rewritten by Kidong Lee using filters and streams.

1 parent 5e3b9905
/* GNU Mailutils -- a suite of utilities for electronic mail
Copyright (C) 2003, 2004 Free Software Foundation, Inc.
Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
......@@ -208,413 +208,6 @@ rfc2047_decode (const char *tocode, const char *input, char **ptostr)
}
/* ==================================================
RFC 2047 Encoder
================================================== */
#define MAX_QUOTE 75
/* Be more conservative in what we quote than in RFC2045, as in some
circumstances, additional symbols (like parenthesis) must be quoted
in headers. This is never a problem for the recipient, except for
the extra overhead in the message size */
static int
must_quote (char c)
{
if (((c > 32) && (c <= 57)) ||
((c >= 64) && (c <= 126)))
return 0;
return 1;
}
/* State of the encoder */
typedef struct _encoder rfc2047_encoder;
struct _encoder {
/* Name of the encoding (either B or Q) */
char encoding;
/* Charset of the input stream */
const char * charset;
/* Compute the size of the next character (in bytes), according to
the charset */
int (* charcount) (const char *);
/* Size of the next character (in bytes) */
int charblock;
/* TRUE if we need to open a quoted-word at the next byte */
int must_open;
/* Pointer on the current input byte */
const unsigned char * src;
/* Pointer on the current output byte and on the complete output */
char * dst, * result;
/* todo: number of bytes remaining in the input, done: number of
bytes written in the output, quotesize: number of bytes in the
current quoted-word */
int todo, done, quotesize;
/* Virtual methods implemented for the encoders:
count: return how many bytes would be used by inserting the
current input and updates 'charblock'
next: quote the current input byte on the output
flush: output any pending byte
*/
int (* count) (rfc2047_encoder * enc);
int (* next) (rfc2047_encoder * enc);
void (* flush) (rfc2047_encoder * enc);
/* Extra data for the Base64 encoder */
unsigned char buffer [4];
int state;
};
/* --------------------------------------------------
Quoted-words building blocks
-------------------------------------------------- */
/* Write the opening of a quoted-word and return the minimum number of
bytes it will use */
static int
_open_quote (const char * charset,
char encoding,
char ** dst, int * done)
{
int len = strlen (charset) + 5;
(* done) += len;
if (* dst)
{
sprintf (* dst, "=?%s?%c?", charset, encoding);
(* dst) += len;
}
/* in the initial length of the quote we already count the final ?= */
return len + 2;
}
/* Terminate a quoted-word */
static void
_close_quote (char ** dst, int * done)
{
* done += 2;
if (* dst)
{
strcpy (* dst, "?=");
(* dst) += 2;
}
}
/* Call this function before the beginning of a quoted-word */
static void
init_quoted (rfc2047_encoder * enc)
{
enc->must_open = 1;
}
/* Insert the current byte in the quoted-word (handling maximum
quoted-word sizes,...) */
static void
insert_quoted (rfc2047_encoder * enc)
{
if (enc->must_open)
{
enc->must_open = 0;
/* The quotesize holds the known size of the quoted-word, even
if all the bytes have not yet been inserted in the output
stream. */
enc->quotesize =
_open_quote (enc->charset, enc->encoding,
& enc->dst, & enc->done) + enc->count (enc);
}
else
{
if (enc->charblock == 0)
{
/* The quotesize holds the known size of the quoted-word,
even if all the bytes have not yet been inserted in the
output stream. */
enc->quotesize += enc->count (enc);
if (enc->quotesize > MAX_QUOTE)
{
/* Start a new quoted-word */
_close_quote (& enc->dst, & enc->done);
if (enc->dst) * (enc->dst ++) = ' ';
enc->done ++;
enc->quotesize = _open_quote (enc->charset, enc->encoding,
& enc->dst, & enc->done);
}
}
}
/* We are ready to process one more byte from the input stream */
enc->charblock --;
enc->next (enc);
}
/* Flush the current quoted-word */
static void
flush_quoted (rfc2047_encoder * enc)
{
if (enc->must_open) return;
enc->flush (enc);
_close_quote (& enc->dst, & enc->done);
}
/* Insert the current byte unquoted */
static void
insert_unquoted (rfc2047_encoder * enc)
{
if (enc->dst) * (enc->dst ++) = * (enc->src);
enc->src ++;
enc->todo --;
enc->done ++;
}
/* Check if the next word will need to be quoted */
static int
is_next_quoted (const char * src)
{
while (isspace (* src)) src ++;
while (* src)
{
if (isspace (* src)) return 0;
if (must_quote (* src)) return 1;
src ++;
}
return 0;
}
/* --------------------------------------------------
Known character encodings
-------------------------------------------------- */
static int
ce_single_byte (const char * src)
{
return 1;
}
static int
ce_utf_8 (const char * src)
{
unsigned char c = * src;
if (c <= 0x7F) return 1;
if (c >= 0xFC) return 6;
if (c >= 0xF8) return 5;
if (c >= 0xF0) return 4;
if (c >= 0xE0) return 3;
if (c >= 0xC0) return 2;
/* otherwise, this is not a first byte (and the UTF-8 is possibly
broken), continue with a single byte. */
return 1;
}
/* --------------------------------------------------
Quoted-printable encoder
-------------------------------------------------- */
static void
qp_init (rfc2047_encoder * enc)
{
return;
}
static int
qp_count (rfc2047_encoder * enc)
{
int len = 0, todo;
unsigned const char * curr;
/* count the size of a complete (multibyte) character */
enc->charblock = enc->charcount (enc->src);
for (todo = 0, curr = enc->src ;
todo < enc->charblock && * curr;
todo ++, curr ++)
{
len += must_quote (* curr) ? 3 : 1;
}
return len;
}
static const char _hexdigit[16] = "0123456789ABCDEF";
static int
qp_next (rfc2047_encoder * enc)
{
int done;
if (* enc->src == '_' || must_quote (* enc->src))
{
/* special encoding of space as a '_' to increase readability */
if (* enc->src == ' ')
{
if (enc->dst)
{
* (enc->dst ++) = '_';
}
done = 1;
}
else {
/* default encoding */
if (enc->dst)
{
* (enc->dst ++) = '=';
* (enc->dst ++) = _hexdigit [* (enc->src) >> 4];
* (enc->dst ++) = _hexdigit [* (enc->src) & 0xF];
}
done = 3;
}
}
else
{
if (enc->dst)
{
* (enc->dst ++) = * enc->src;
}
done = 1;
}
enc->src ++;
enc->done += done;
enc->todo --;
return done;
}
static void
qp_flush (rfc2047_encoder * enc)
{
return;
}
/* --------------------------------------------------
Base64 encoder
-------------------------------------------------- */
const char *b64 =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static void
base64_init (rfc2047_encoder * enc)
{
enc->state = 0;
return;
}
static int
base64_count (rfc2047_encoder * enc)
{
int len = 0, todo;
/* Check the size of a complete (multibyte) character */
enc->charblock = enc->charcount (enc->src);
for (todo = 0 ; todo < enc->charblock; todo ++)
{
/* Count the size of the encoded block only once, at the first
byte transmitted. */
len += ((enc->state + todo) % 3 == 0) ? 4 : 0;
}
return len;
}
static int
base64_next (rfc2047_encoder * enc)
{
enc->buffer [enc->state ++] = * (enc->src ++);
enc->todo --;
if (enc->state < 3) return 0;
/* We have a full quantum */
if (enc->dst)
{
* (enc->dst ++) = b64 [(enc->buffer[0] >> 2)];
* (enc->dst ++) = b64 [((enc->buffer[0] & 0x3) << 4) | (enc->buffer[1] >> 4)];
* (enc->dst ++) = b64 [((enc->buffer[1] & 0xF) << 2) | (enc->buffer[2] >> 6)];
* (enc->dst ++) = b64 [(enc->buffer[2] & 0x3F)];
}
enc->done += 4;
enc->state = 0;
return 4;
}
static void
base64_flush (rfc2047_encoder * enc)
{
if (enc->state == 0) return;
if (enc->dst)
{
switch (enc->state)
{
case 1:
* (enc->dst ++) = b64 [(enc->buffer[0] >> 2)];
* (enc->dst ++) = b64 [((enc->buffer[0] & 0x3) << 4)];
* (enc->dst ++) = '=';
* (enc->dst ++) = '=';
break;
case 2:
* (enc->dst ++) = b64 [(enc->buffer[0] >> 2)];
* (enc->dst ++) = b64 [((enc->buffer[0] & 0x3) << 4) | (enc->buffer[1] >> 4)];
* (enc->dst ++) = b64 [((enc->buffer[1] & 0xF) << 2)];
* (enc->dst ++) = '=';
break;
}
}
enc->done += 4;
enc->state = 0;
return;
}
/* States of the RFC2047 encoder */
enum {
ST_SPACE, /* waiting for non-quoted whitespace */
ST_WORD, /* waiting for non-quoted word */
ST_QUOTED, /* waiting for quoted word */
ST_QUOTED_SPACE, /* waiting for quoted whitespace */
};
/**
Encode a header according to RFC 2047
......@@ -631,149 +224,55 @@ enum {
*/
int
rfc2047_encode (const char *charset, const char *encoding,
const char *text, char ** result)
const char *text, char **result)
{
rfc2047_encoder enc;
stream_t input_stream;
stream_t output_stream;
char encoding_char = '\0';
int rc;
int is_compose;
int state;
if (charset == NULL || encoding == NULL || text == NULL)
return MU_ERR_BAD_2047_INPUT;
if (!charset || !encoding || !text)
return EINVAL;
if (!result)
return MU_ERR_OUT_PTR_NULL;
/* Check for a known encoding */
do
{
if (strcasecmp (encoding, "base64") == 0)
{
base64_init (& enc);
enc.encoding = 'B';
enc.next = base64_next;
enc.count = base64_count;
enc.flush = base64_flush;
break;
}
if (strcasecmp (encoding, "quoted-printable") == 0)
{
qp_init (& enc);
enc.encoding = 'Q';
enc.next = qp_next;
enc.count = qp_count;
enc.flush = qp_flush;
break;
}
return MU_ERR_NOENT;
}
while (0);
/* Check for a known charset */
do
{
if (strcasecmp (charset, "utf-8") == 0)
{
enc.charcount = ce_utf_8;
break;
}
enc.charcount = ce_single_byte;
}
while (0);
enc.dst = NULL;
enc.charset = charset;
/* proceed in two passes: estimate the required space, then fill */
for (is_compose = 0 ; is_compose <= 1 ; is_compose ++)
{
state = ST_SPACE;
enc.src = text;
enc.todo = strlen (text);
enc.done = 0;
while (enc.todo)
{
switch (state)
{
case ST_SPACE:
if (isspace (* enc.src))
{
insert_unquoted (& enc);
break;
}
if (is_next_quoted (enc.src))
{
init_quoted (& enc);
state = ST_QUOTED;
}
encoding_char = 'B';
else if (strcasecmp (encoding, "quoted-printable") == 0)
encoding_char = 'Q';
else
{
state = ST_WORD;
}
break;
return MU_ERR_BAD_2047_INPUT;
case ST_WORD:
if (isspace (* enc.src))
{
state = ST_SPACE;
break;
}
memory_stream_create (&input_stream, 0, 0);
stream_sequential_write (input_stream, text, strlen (text));
insert_unquoted (& enc);
break;
filter_create (&output_stream, input_stream, encoding, MU_FILTER_ENCODE,
MU_STREAM_READ);
case ST_QUOTED:
if (isspace (* enc.src))
{
if (is_next_quoted (enc.src))
{
state = ST_QUOTED_SPACE;
}
else
{
flush_quoted (& enc);
state = ST_SPACE;
}
break;
}
/* Assume strlen(qp_encoded_text) <= strlen(text) * 3 */
/* malloced length is composed of:
"=?"
charset
"?"
B or Q
"?"
encoded_text
"?="
zero terminator */
insert_quoted (& enc);
break;
case ST_QUOTED_SPACE:
if (! isspace (* enc.src))
*result = malloc (2 + strlen (charset) + 3 + strlen (text) * 3 + 3);
if (*result)
{
state = ST_QUOTED;
break;
}
insert_quoted (& enc);
break;
}
}
sprintf (*result, "=?%s?%c?", charset, encoding_char);
if (state == ST_QUOTED ||
state == ST_QUOTED_SPACE)
{
flush_quoted (& enc);
}
rc = stream_sequential_read (output_stream, *result + strlen (*result),
strlen (text) * 3, NULL);
if (enc.dst == NULL)
{
enc.dst = malloc (enc.done + 1);
if (enc.dst == NULL) return -ENOMEM;
enc.result = enc.dst;
}
strcat (*result, "?=");
}
else
rc = ENOMEM;
* (enc.dst) = '\0';
* result = enc.result;
stream_destroy (&input_stream, NULL);
stream_destroy (&output_stream, NULL);
return 0;
return rc;
}
......