Rewritten by Kidong Lee using filters and streams.

Sergey Poznyakoff
Commit 88533ff1 ... 88533ff1f33c6e5f1b4304812d59627fcefa724f authored 2005-05-23 19:57:53 +0000 by Sergey Poznyakoff
Showing 1 changed file with 37 additions and 538 deletions
mailbox/rfc2047.c
--- a/mailbox/rfc2047.c
View file @88533ff
+++ b/mailbox/rfc2047.c
View file @88533ff
 /* GNU Mailutils -- a suite of utilities for electronic mail
-   Copyright (C) 2003, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -208,413 +208,6 @@ rfc2047_decode (const char *tocode, const char *input, char **ptostr)
 }
-/* ==================================================
-   RFC 2047 Encoder
-   ================================================== */
-#define MAX_QUOTE 75
-/* Be more conservative in what we quote than in RFC2045, as in some
-   circumstances, additional symbols (like parenthesis) must be quoted
-   in headers. This is never a problem for the recipient, except for
-   the extra overhead in the message size */
-static int
-must_quote (char c)
-{
-  if (((c >  32) && (c <= 57))  || 
-      ((c >= 64) && (c <= 126)))
-    return 0;
-  return 1;
-}
-/* State of the encoder */
-typedef struct _encoder rfc2047_encoder;
-struct _encoder {
-  /* Name of the encoding (either B or Q) */
-  char encoding;
-  /* Charset of the input stream */
-  const char * charset;
-  /* Compute the size of the next character (in bytes), according to
-     the charset */
-  int (* charcount) (const char *);
-  /* Size of the next character (in bytes) */
-  int charblock;
-  /* TRUE if we need to open a quoted-word at the next byte */
-  int must_open;
-  /* Pointer on the current input byte */
-  const unsigned char * src;
-  /* Pointer on the current output byte and on the complete output */
-  char * dst, * result;
-  /* todo: number of bytes remaining in the input, done: number of
-     bytes written in the output, quotesize: number of bytes in the
-     current quoted-word */
-  int todo, done, quotesize;
-  /* Virtual methods implemented for the encoders:
-      count: return how many bytes would be used by inserting the
-             current input and updates 'charblock'
-      next:  quote the current input byte on the output
-      flush: output any pending byte
-  */
-  int  (* count) (rfc2047_encoder * enc);
-  int  (* next)  (rfc2047_encoder * enc);
-  void (* flush) (rfc2047_encoder * enc);
-  /* Extra data for the Base64 encoder */
-  unsigned char buffer [4];
-  int  state;
-};
-/* --------------------------------------------------
-   Quoted-words building blocks 
-   -------------------------------------------------- */
-/* Write the opening of a quoted-word and return the minimum number of
-   bytes it will use */
-static int
-_open_quote (const char * charset,
-	    char encoding,
-	    char ** dst, int * done)
-{
-  int len = strlen (charset) + 5;
-  (* done) += len;
-  if (* dst)
-    {
-      sprintf (* dst, "=?%s?%c?", charset, encoding);
-      (* dst) += len;
-    }
-  /* in the initial length of the quote we already count the final ?= */
-  return len + 2;
-}
-/* Terminate a quoted-word */
-static void
-_close_quote (char ** dst, int * done)
-{
-  * done += 2;
-  if (* dst)
-    {
-      strcpy (* dst, "?=");
-      (* dst) += 2;
-    }
-}
-/* Call this function before the beginning of a quoted-word */
-static void
-init_quoted (rfc2047_encoder * enc)
-{
-  enc->must_open = 1;
-}
-/* Insert the current byte in the quoted-word (handling maximum
-   quoted-word sizes,...) */
-static void
-insert_quoted (rfc2047_encoder * enc)
-{
-  if (enc->must_open)
-    {
-      enc->must_open = 0;
-      /* The quotesize holds the known size of the quoted-word, even
-	 if all the bytes have not yet been inserted in the output
-	 stream. */
-      enc->quotesize = 
-	_open_quote (enc->charset, enc->encoding, 
-		     & enc->dst, & enc->done) + enc->count (enc);
-    }
-  else 
-    {
-      if (enc->charblock == 0)
-	{
-	  /* The quotesize holds the known size of the quoted-word,
-	     even if all the bytes have not yet been inserted in the
-	     output stream. */
-	  enc->quotesize += enc->count (enc);
-	  if (enc->quotesize > MAX_QUOTE)
-	    {
-	      /* Start a new quoted-word */
-	      _close_quote (& enc->dst, & enc->done);
-	      if (enc->dst) * (enc->dst ++) = ' ';
-	      enc->done ++;
-	      enc->quotesize = _open_quote (enc->charset, enc->encoding, 
-					    & enc->dst, & enc->done);
-	    }
-	}
-    }
-  /* We are ready to process one more byte from the input stream */
-  enc->charblock --;
-  enc->next (enc);
-}
-/* Flush the current quoted-word */
-static void
-flush_quoted (rfc2047_encoder * enc)
-{
-  if (enc->must_open) return;
-  enc->flush (enc);
-  _close_quote (& enc->dst, & enc->done);
-}
-/* Insert the current byte unquoted */
-static void
-insert_unquoted (rfc2047_encoder * enc)
-{
-  if (enc->dst) * (enc->dst ++) = * (enc->src);
-  enc->src ++;
-  enc->todo --;
-  enc->done ++;
-}
-/* Check if the next word will need to be quoted */
-static int
-is_next_quoted (const char * src) 
-{
-  while (isspace (* src)) src ++;
-  while (* src) 
-    {
-      if (isspace (* src)) return 0;
-      if (must_quote (* src)) return 1;
-      src ++;
-    }
-  return 0;
-}
-/* --------------------------------------------------
-   Known character encodings
-   -------------------------------------------------- */
-static int
-ce_single_byte (const char * src)
-{
-  return 1;
-}
-static int
-ce_utf_8 (const char * src)
-{
-  unsigned char c = * src;
-  if (c <= 0x7F) return 1;
-  if (c >= 0xFC) return 6;
-  if (c >= 0xF8) return 5;
-  if (c >= 0xF0) return 4;
-  if (c >= 0xE0) return 3;
-  if (c >= 0xC0) return 2;
-  /* otherwise, this is not a first byte (and the UTF-8 is possibly
-     broken), continue with a single byte. */
-  return 1;
-}
-/* --------------------------------------------------
-   Quoted-printable encoder 
-   -------------------------------------------------- */
-static void
-qp_init (rfc2047_encoder * enc)
-{
-  return;
-}
-static int
-qp_count (rfc2047_encoder * enc)
-{
-  int len = 0, todo;
-  unsigned const char * curr;
-  /* count the size of a complete (multibyte) character */
-  enc->charblock = enc->charcount (enc->src);
-  for (todo = 0, curr = enc->src ;
-       todo < enc->charblock && * curr; 
-       todo ++, curr ++)
-    {
-      len += must_quote (* curr) ? 3 : 1;
-    }
-  return len;
-}
-static const char _hexdigit[16] = "0123456789ABCDEF";
-static int
-qp_next (rfc2047_encoder * enc)
-{
-  int done;
-  if (* enc->src == '_' || must_quote (* enc->src))
-    {
-      /* special encoding of space as a '_' to increase readability */
-      if (* enc->src == ' ')
-	{
-	  if (enc->dst)
-	    {
-	      * (enc->dst ++) = '_';
-	    }
-	  done = 1;
-	}
-      else {
-	/* default encoding */
-	if (enc->dst)
-	  {
-	    * (enc->dst ++) = '=';
-	    * (enc->dst ++) = _hexdigit [* (enc->src) >> 4];
-	    * (enc->dst ++) = _hexdigit [* (enc->src) & 0xF];
-	  }
-	done = 3;
-      }
-    }
-  else
-    {
-      if (enc->dst)
-	{
-	  * (enc->dst ++) = * enc->src;
-	}
-      done = 1;
-    }
-  enc->src ++;
-  enc->done += done;
-  enc->todo --;
-  return done;
-}
-static void
-qp_flush (rfc2047_encoder * enc)
-{
-  return;
-}
-/* --------------------------------------------------
-   Base64 encoder 
-   -------------------------------------------------- */
-const char *b64 =
-"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-static void
-base64_init (rfc2047_encoder * enc)
-{
-  enc->state = 0;
-  return;
-}
-static int
-base64_count (rfc2047_encoder * enc)
-{
-  int len = 0, todo;
-  /* Check the size of a complete (multibyte) character */
-  enc->charblock = enc->charcount (enc->src);
-  for (todo = 0 ; todo < enc->charblock; todo ++)
-    {
-      /* Count the size of the encoded block only once, at the first
-	 byte transmitted. */
-      len += ((enc->state + todo) % 3 == 0) ? 4 : 0;
-    }
-  return len;
-}
-static int
-base64_next (rfc2047_encoder * enc)
-{
-  enc->buffer [enc->state ++] = * (enc->src ++);
-  enc->todo --;
-  if (enc->state < 3) return 0;
-  /* We have a full quantum */
-  if (enc->dst)
-    {
-      * (enc->dst ++) = b64 [(enc->buffer[0] >> 2)];
-      * (enc->dst ++) = b64 [((enc->buffer[0] & 0x3) << 4) | (enc->buffer[1] >> 4)];
-      * (enc->dst ++) = b64 [((enc->buffer[1] & 0xF) << 2) | (enc->buffer[2] >> 6)];
-      * (enc->dst ++) = b64 [(enc->buffer[2] & 0x3F)];
-    }
-  enc->done += 4;
-  enc->state = 0;
-  return 4;
-}
-static void
-base64_flush (rfc2047_encoder * enc)
-{
-  if (enc->state == 0) return;
-  if (enc->dst) 
-    {
-      switch (enc->state)
-	{
-	case 1:
-	  * (enc->dst ++) = b64 [(enc->buffer[0] >> 2)];
-	  * (enc->dst ++) = b64 [((enc->buffer[0] & 0x3) << 4)];
-	  * (enc->dst ++) = '=';
-	  * (enc->dst ++) = '=';
-	  break;
-	case 2:
-	  * (enc->dst ++) = b64 [(enc->buffer[0] >> 2)];
-	  * (enc->dst ++) = b64 [((enc->buffer[0] & 0x3) << 4) | (enc->buffer[1] >> 4)];
-	  * (enc->dst ++) = b64 [((enc->buffer[1] & 0xF) << 2)];
-	  * (enc->dst ++) = '=';
-	  break;
-	}
-    }
-  enc->done += 4;
-  enc->state = 0;
-  return;
-}
-/* States of the RFC2047 encoder */
-enum {
-  ST_SPACE,   /* waiting for non-quoted whitespace */
-  ST_WORD,    /* waiting for non-quoted word */
-  ST_QUOTED,  /* waiting for quoted word */
-  ST_QUOTED_SPACE, /* waiting for quoted whitespace */
-};
 /**
   Encode a header according to RFC 2047
@@ -631,149 +224,55 @@ enum {
 */
 int
 rfc2047_encode (const char *charset, const char *encoding,
-		const char *text, char ** result)
+		const char *text, char **result)
 {
-  rfc2047_encoder enc;
+  stream_t input_stream;
+  stream_t output_stream;
+  char encoding_char = '\0';
+  int rc;
-  int is_compose;
+  if (charset == NULL || encoding == NULL || text == NULL)
-  int state;
+    return MU_ERR_BAD_2047_INPUT;
-  if (!charset || !encoding || !text)
-    return EINVAL;
-  if (!result)
-    return MU_ERR_OUT_PTR_NULL;
-  /* Check for a known encoding */
-  do 
-    {
  if (strcasecmp (encoding, "base64") == 0)
-	{
+    encoding_char = 'B';
-	  base64_init (& enc);
+  else if (strcasecmp (encoding, "quoted-printable") == 0)
-	  enc.encoding = 'B';
+    encoding_char = 'Q';
-	  enc.next  = base64_next;
-	  enc.count = base64_count;
-	  enc.flush = base64_flush;
-	  break;
-	}
-      if (strcasecmp (encoding, "quoted-printable") == 0) 
-	{
-	  qp_init (& enc);
-	  enc.encoding = 'Q';
-	  enc.next  = qp_next;
-	  enc.count = qp_count;
-	  enc.flush = qp_flush;
-	  break;
-	}
-      return MU_ERR_NOENT;
-    } 
-  while (0);
-  /* Check for a known charset */
-  do
-    {
-      if (strcasecmp (charset, "utf-8") == 0)
-	{
-	  enc.charcount = ce_utf_8;
-	  break;
-	}
-      enc.charcount = ce_single_byte;
-    }
-  while (0);
-  enc.dst = NULL;
-  enc.charset = charset;
-  /* proceed in two passes: estimate the required space, then fill */
-  for (is_compose = 0 ; is_compose <= 1 ; is_compose ++) 
-    {
-      state = ST_SPACE;
-      enc.src  = text;
-      enc.todo = strlen (text);
-      enc.done = 0;
-      while (enc.todo) 
-	{
-	  switch (state) 
-	    {
-	    case ST_SPACE:
-	      if (isspace (* enc.src)) 
-		{
-		  insert_unquoted (& enc);
-		  break;
-		}
-	      if (is_next_quoted (enc.src)) 
-		{
-		  init_quoted (& enc);
-		  state = ST_QUOTED;
-		}
  else
-		{
+    return MU_ERR_BAD_2047_INPUT;
-		  state = ST_WORD;
-		}
-	      break;
-	    case ST_WORD:
+  memory_stream_create (&input_stream, 0, 0);
-	      if (isspace (* enc.src)) 
+  stream_sequential_write (input_stream, text, strlen (text));
-		{
-		  state = ST_SPACE;
-		  break;
-		}
-	      insert_unquoted (& enc);
+  filter_create (&output_stream, input_stream, encoding, MU_FILTER_ENCODE,
-	      break;
+		 MU_STREAM_READ);
-	    case ST_QUOTED:
+  /* Assume strlen(qp_encoded_text) <= strlen(text) * 3 */
-	      if (isspace (* enc.src))
+  /* malloced length is composed of:
-		{
+      "=?"  
-		  if (is_next_quoted (enc.src))
+      charset 
-		    {
+      "?"
-		      state = ST_QUOTED_SPACE;
+      B or Q
-		    }
+      "?" 
-		  else
+      encoded_text
-		    {
+      "?="
-		      flush_quoted (& enc);
+      zero terminator */
-		      state = ST_SPACE;
-		    }
-		  break;
-		}
-	      insert_quoted (& enc);
+  *result = malloc (2 + strlen (charset) + 3 + strlen (text) * 3 + 3);
-	      break;
+  if (*result)
-	    case ST_QUOTED_SPACE:
-	      if (! isspace (* enc.src))
    {
-		  state = ST_QUOTED;
+      sprintf (*result, "=?%s?%c?", charset, encoding_char);
-		  break;
-		}
-	      insert_quoted (& enc);
-	      break;
-	    }
-	}
-      if (state == ST_QUOTED ||
+      rc = stream_sequential_read (output_stream, *result + strlen (*result),
-	  state == ST_QUOTED_SPACE)
+				   strlen (text) * 3, NULL);
-	{
-	  flush_quoted (& enc);
-	}
-      if (enc.dst == NULL) 
+      strcat (*result, "?=");
-	{
-	  enc.dst = malloc (enc.done + 1);
-	  if (enc.dst == NULL) return -ENOMEM;
-	  enc.result = enc.dst;
-	}
    }
+  else
+    rc = ENOMEM;
-  * (enc.dst) = '\0';
+  stream_destroy (&input_stream, NULL);
-  * result = enc.result;
+  stream_destroy (&output_stream, NULL);
-  return 0;
+  return rc;
 }