Commit 88533ff1 88533ff1f33c6e5f1b4304812d59627fcefa724f by Sergey Poznyakoff

Rewritten by Kidong Lee using filters and streams.

1 parent 5e3b9905
1 /* GNU Mailutils -- a suite of utilities for electronic mail 1 /* GNU Mailutils -- a suite of utilities for electronic mail
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc. 2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3 3
4 This library is free software; you can redistribute it and/or 4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public 5 modify it under the terms of the GNU Lesser General Public
...@@ -208,413 +208,6 @@ rfc2047_decode (const char *tocode, const char *input, char **ptostr) ...@@ -208,413 +208,6 @@ rfc2047_decode (const char *tocode, const char *input, char **ptostr)
208 } 208 }
209 209
210 210
211
212 /* ==================================================
213 RFC 2047 Encoder
214 ================================================== */
215
216 #define MAX_QUOTE 75
217
218 /* Be more conservative in what we quote than in RFC2045, as in some
219 circumstances, additional symbols (like parenthesis) must be quoted
220 in headers. This is never a problem for the recipient, except for
221 the extra overhead in the message size */
222 static int
223 must_quote (char c)
224 {
225 if (((c > 32) && (c <= 57)) ||
226 ((c >= 64) && (c <= 126)))
227 return 0;
228
229 return 1;
230 }
231
232
233 /* State of the encoder */
234 typedef struct _encoder rfc2047_encoder;
235
236 struct _encoder {
237 /* Name of the encoding (either B or Q) */
238 char encoding;
239
240 /* Charset of the input stream */
241 const char * charset;
242
243 /* Compute the size of the next character (in bytes), according to
244 the charset */
245 int (* charcount) (const char *);
246
247 /* Size of the next character (in bytes) */
248 int charblock;
249
250 /* TRUE if we need to open a quoted-word at the next byte */
251 int must_open;
252
253 /* Pointer on the current input byte */
254 const unsigned char * src;
255
256 /* Pointer on the current output byte and on the complete output */
257 char * dst, * result;
258
259 /* todo: number of bytes remaining in the input, done: number of
260 bytes written in the output, quotesize: number of bytes in the
261 current quoted-word */
262 int todo, done, quotesize;
263
264 /* Virtual methods implemented for the encoders:
265
266 count: return how many bytes would be used by inserting the
267 current input and updates 'charblock'
268 next: quote the current input byte on the output
269 flush: output any pending byte
270 */
271 int (* count) (rfc2047_encoder * enc);
272 int (* next) (rfc2047_encoder * enc);
273 void (* flush) (rfc2047_encoder * enc);
274
275 /* Extra data for the Base64 encoder */
276 unsigned char buffer [4];
277 int state;
278 };
279
280
281 /* --------------------------------------------------
282 Quoted-words building blocks
283 -------------------------------------------------- */
284
285 /* Write the opening of a quoted-word and return the minimum number of
286 bytes it will use */
287 static int
288 _open_quote (const char * charset,
289 char encoding,
290 char ** dst, int * done)
291 {
292 int len = strlen (charset) + 5;
293
294 (* done) += len;
295
296 if (* dst)
297 {
298 sprintf (* dst, "=?%s?%c?", charset, encoding);
299 (* dst) += len;
300 }
301
302 /* in the initial length of the quote we already count the final ?= */
303 return len + 2;
304 }
305
306 /* Terminate a quoted-word */
307 static void
308 _close_quote (char ** dst, int * done)
309 {
310 * done += 2;
311
312 if (* dst)
313 {
314 strcpy (* dst, "?=");
315 (* dst) += 2;
316 }
317 }
318
319
320 /* Call this function before the beginning of a quoted-word */
321 static void
322 init_quoted (rfc2047_encoder * enc)
323 {
324 enc->must_open = 1;
325 }
326
327 /* Insert the current byte in the quoted-word (handling maximum
328 quoted-word sizes,...) */
329 static void
330 insert_quoted (rfc2047_encoder * enc)
331 {
332 if (enc->must_open)
333 {
334 enc->must_open = 0;
335
336 /* The quotesize holds the known size of the quoted-word, even
337 if all the bytes have not yet been inserted in the output
338 stream. */
339 enc->quotesize =
340 _open_quote (enc->charset, enc->encoding,
341 & enc->dst, & enc->done) + enc->count (enc);
342 }
343 else
344 {
345 if (enc->charblock == 0)
346 {
347 /* The quotesize holds the known size of the quoted-word,
348 even if all the bytes have not yet been inserted in the
349 output stream. */
350 enc->quotesize += enc->count (enc);
351 if (enc->quotesize > MAX_QUOTE)
352 {
353 /* Start a new quoted-word */
354 _close_quote (& enc->dst, & enc->done);
355
356 if (enc->dst) * (enc->dst ++) = ' ';
357 enc->done ++;
358
359 enc->quotesize = _open_quote (enc->charset, enc->encoding,
360 & enc->dst, & enc->done);
361 }
362 }
363 }
364
365 /* We are ready to process one more byte from the input stream */
366 enc->charblock --;
367 enc->next (enc);
368 }
369
370 /* Flush the current quoted-word */
371 static void
372 flush_quoted (rfc2047_encoder * enc)
373 {
374 if (enc->must_open) return;
375
376 enc->flush (enc);
377 _close_quote (& enc->dst, & enc->done);
378 }
379
380
381 /* Insert the current byte unquoted */
382 static void
383 insert_unquoted (rfc2047_encoder * enc)
384 {
385 if (enc->dst) * (enc->dst ++) = * (enc->src);
386 enc->src ++;
387 enc->todo --;
388 enc->done ++;
389 }
390
391
392 /* Check if the next word will need to be quoted */
393 static int
394 is_next_quoted (const char * src)
395 {
396 while (isspace (* src)) src ++;
397
398 while (* src)
399 {
400 if (isspace (* src)) return 0;
401 if (must_quote (* src)) return 1;
402
403 src ++;
404 }
405
406 return 0;
407 }
408
409
410 /* --------------------------------------------------
411 Known character encodings
412 -------------------------------------------------- */
413
414 static int
415 ce_single_byte (const char * src)
416 {
417 return 1;
418 }
419
420 static int
421 ce_utf_8 (const char * src)
422 {
423 unsigned char c = * src;
424
425 if (c <= 0x7F) return 1;
426
427 if (c >= 0xFC) return 6;
428 if (c >= 0xF8) return 5;
429 if (c >= 0xF0) return 4;
430 if (c >= 0xE0) return 3;
431 if (c >= 0xC0) return 2;
432
433 /* otherwise, this is not a first byte (and the UTF-8 is possibly
434 broken), continue with a single byte. */
435 return 1;
436 }
437
438
439 /* --------------------------------------------------
440 Quoted-printable encoder
441 -------------------------------------------------- */
442
443 static void
444 qp_init (rfc2047_encoder * enc)
445 {
446 return;
447 }
448
449 static int
450 qp_count (rfc2047_encoder * enc)
451 {
452 int len = 0, todo;
453 unsigned const char * curr;
454
455 /* count the size of a complete (multibyte) character */
456 enc->charblock = enc->charcount (enc->src);
457
458 for (todo = 0, curr = enc->src ;
459 todo < enc->charblock && * curr;
460 todo ++, curr ++)
461 {
462 len += must_quote (* curr) ? 3 : 1;
463 }
464
465 return len;
466 }
467
468 static const char _hexdigit[16] = "0123456789ABCDEF";
469
470 static int
471 qp_next (rfc2047_encoder * enc)
472 {
473 int done;
474
475 if (* enc->src == '_' || must_quote (* enc->src))
476 {
477 /* special encoding of space as a '_' to increase readability */
478 if (* enc->src == ' ')
479 {
480 if (enc->dst)
481 {
482 * (enc->dst ++) = '_';
483 }
484
485 done = 1;
486 }
487 else {
488 /* default encoding */
489 if (enc->dst)
490 {
491 * (enc->dst ++) = '=';
492 * (enc->dst ++) = _hexdigit [* (enc->src) >> 4];
493 * (enc->dst ++) = _hexdigit [* (enc->src) & 0xF];
494 }
495
496 done = 3;
497 }
498 }
499 else
500 {
501 if (enc->dst)
502 {
503 * (enc->dst ++) = * enc->src;
504 }
505
506 done = 1;
507 }
508
509 enc->src ++;
510
511 enc->done += done;
512 enc->todo --;
513
514 return done;
515 }
516
517 static void
518 qp_flush (rfc2047_encoder * enc)
519 {
520 return;
521 }
522
523
524 /* --------------------------------------------------
525 Base64 encoder
526 -------------------------------------------------- */
527
528 const char *b64 =
529 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
530
531 static void
532 base64_init (rfc2047_encoder * enc)
533 {
534 enc->state = 0;
535 return;
536 }
537
538 static int
539 base64_count (rfc2047_encoder * enc)
540 {
541 int len = 0, todo;
542
543 /* Check the size of a complete (multibyte) character */
544 enc->charblock = enc->charcount (enc->src);
545
546 for (todo = 0 ; todo < enc->charblock; todo ++)
547 {
548 /* Count the size of the encoded block only once, at the first
549 byte transmitted. */
550 len += ((enc->state + todo) % 3 == 0) ? 4 : 0;
551 }
552
553 return len;
554 }
555
556 static int
557 base64_next (rfc2047_encoder * enc)
558 {
559 enc->buffer [enc->state ++] = * (enc->src ++);
560 enc->todo --;
561
562 if (enc->state < 3) return 0;
563
564 /* We have a full quantum */
565 if (enc->dst)
566 {
567 * (enc->dst ++) = b64 [(enc->buffer[0] >> 2)];
568 * (enc->dst ++) = b64 [((enc->buffer[0] & 0x3) << 4) | (enc->buffer[1] >> 4)];
569 * (enc->dst ++) = b64 [((enc->buffer[1] & 0xF) << 2) | (enc->buffer[2] >> 6)];
570 * (enc->dst ++) = b64 [(enc->buffer[2] & 0x3F)];
571 }
572
573 enc->done += 4;
574
575 enc->state = 0;
576 return 4;
577 }
578
579 static void
580 base64_flush (rfc2047_encoder * enc)
581 {
582 if (enc->state == 0) return;
583
584 if (enc->dst)
585 {
586 switch (enc->state)
587 {
588 case 1:
589 * (enc->dst ++) = b64 [(enc->buffer[0] >> 2)];
590 * (enc->dst ++) = b64 [((enc->buffer[0] & 0x3) << 4)];
591 * (enc->dst ++) = '=';
592 * (enc->dst ++) = '=';
593 break;
594
595 case 2:
596 * (enc->dst ++) = b64 [(enc->buffer[0] >> 2)];
597 * (enc->dst ++) = b64 [((enc->buffer[0] & 0x3) << 4) | (enc->buffer[1] >> 4)];
598 * (enc->dst ++) = b64 [((enc->buffer[1] & 0xF) << 2)];
599 * (enc->dst ++) = '=';
600 break;
601 }
602 }
603
604 enc->done += 4;
605 enc->state = 0;
606 return;
607 }
608
609
610 /* States of the RFC2047 encoder */
611 enum {
612 ST_SPACE, /* waiting for non-quoted whitespace */
613 ST_WORD, /* waiting for non-quoted word */
614 ST_QUOTED, /* waiting for quoted word */
615 ST_QUOTED_SPACE, /* waiting for quoted whitespace */
616 };
617
618 /** 211 /**
619 Encode a header according to RFC 2047 212 Encode a header according to RFC 2047
620 213
...@@ -631,149 +224,55 @@ enum { ...@@ -631,149 +224,55 @@ enum {
631 */ 224 */
632 int 225 int
633 rfc2047_encode (const char *charset, const char *encoding, 226 rfc2047_encode (const char *charset, const char *encoding,
634 const char *text, char ** result) 227 const char *text, char **result)
635 { 228 {
636 rfc2047_encoder enc; 229 stream_t input_stream;
230 stream_t output_stream;
231 char encoding_char = '\0';
232 int rc;
637 233
638 int is_compose; 234 if (charset == NULL || encoding == NULL || text == NULL)
639 int state; 235 return MU_ERR_BAD_2047_INPUT;
640 236
641 if (!charset || !encoding || !text)
642 return EINVAL;
643 if (!result)
644 return MU_ERR_OUT_PTR_NULL;
645
646 /* Check for a known encoding */
647 do
648 {
649 if (strcasecmp (encoding, "base64") == 0) 237 if (strcasecmp (encoding, "base64") == 0)
650 { 238 encoding_char = 'B';
651 base64_init (& enc); 239 else if (strcasecmp (encoding, "quoted-printable") == 0)
652 enc.encoding = 'B'; 240 encoding_char = 'Q';
653 enc.next = base64_next;
654 enc.count = base64_count;
655 enc.flush = base64_flush;
656 break;
657 }
658
659 if (strcasecmp (encoding, "quoted-printable") == 0)
660 {
661 qp_init (& enc);
662 enc.encoding = 'Q';
663 enc.next = qp_next;
664 enc.count = qp_count;
665 enc.flush = qp_flush;
666 break;
667 }
668
669 return MU_ERR_NOENT;
670 }
671 while (0);
672
673 /* Check for a known charset */
674 do
675 {
676 if (strcasecmp (charset, "utf-8") == 0)
677 {
678 enc.charcount = ce_utf_8;
679 break;
680 }
681
682 enc.charcount = ce_single_byte;
683 }
684 while (0);
685
686 enc.dst = NULL;
687 enc.charset = charset;
688
689 /* proceed in two passes: estimate the required space, then fill */
690 for (is_compose = 0 ; is_compose <= 1 ; is_compose ++)
691 {
692 state = ST_SPACE;
693
694 enc.src = text;
695 enc.todo = strlen (text);
696 enc.done = 0;
697
698 while (enc.todo)
699 {
700
701 switch (state)
702 {
703 case ST_SPACE:
704 if (isspace (* enc.src))
705 {
706 insert_unquoted (& enc);
707 break;
708 }
709
710 if (is_next_quoted (enc.src))
711 {
712 init_quoted (& enc);
713 state = ST_QUOTED;
714 }
715 else 241 else
716 { 242 return MU_ERR_BAD_2047_INPUT;
717 state = ST_WORD;
718 }
719 break;
720 243
721 case ST_WORD: 244 memory_stream_create (&input_stream, 0, 0);
722 if (isspace (* enc.src)) 245 stream_sequential_write (input_stream, text, strlen (text));
723 {
724 state = ST_SPACE;
725 break;
726 }
727 246
728 insert_unquoted (& enc); 247 filter_create (&output_stream, input_stream, encoding, MU_FILTER_ENCODE,
729 break; 248 MU_STREAM_READ);
730 249
731 case ST_QUOTED: 250 /* Assume strlen(qp_encoded_text) <= strlen(text) * 3 */
732 if (isspace (* enc.src)) 251 /* malloced length is composed of:
733 { 252 "=?"
734 if (is_next_quoted (enc.src)) 253 charset
735 { 254 "?"
736 state = ST_QUOTED_SPACE; 255 B or Q
737 } 256 "?"
738 else 257 encoded_text
739 { 258 "?="
740 flush_quoted (& enc); 259 zero terminator */
741 state = ST_SPACE;
742 }
743 break;
744 }
745 260
746 insert_quoted (& enc); 261 *result = malloc (2 + strlen (charset) + 3 + strlen (text) * 3 + 3);
747 break; 262 if (*result)
748
749 case ST_QUOTED_SPACE:
750 if (! isspace (* enc.src))
751 { 263 {
752 state = ST_QUOTED; 264 sprintf (*result, "=?%s?%c?", charset, encoding_char);
753 break;
754 }
755
756 insert_quoted (& enc);
757 break;
758 }
759 }
760 265
761 if (state == ST_QUOTED || 266 rc = stream_sequential_read (output_stream, *result + strlen (*result),
762 state == ST_QUOTED_SPACE) 267 strlen (text) * 3, NULL);
763 {
764 flush_quoted (& enc);
765 }
766 268
767 if (enc.dst == NULL) 269 strcat (*result, "?=");
768 {
769 enc.dst = malloc (enc.done + 1);
770 if (enc.dst == NULL) return -ENOMEM;
771 enc.result = enc.dst;
772 }
773 } 270 }
271 else
272 rc = ENOMEM;
774 273
775 * (enc.dst) = '\0'; 274 stream_destroy (&input_stream, NULL);
776 * result = enc.result; 275 stream_destroy (&output_stream, NULL);
777 276
778 return 0; 277 return rc;
779 } 278 }
......