Rewritten by Kidong Lee using filters and streams.
Showing
1 changed file
with
37 additions
and
538 deletions
1 | /* GNU Mailutils -- a suite of utilities for electronic mail | 1 | /* GNU Mailutils -- a suite of utilities for electronic mail |
2 | Copyright (C) 2003, 2004 Free Software Foundation, Inc. | 2 | Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. |
3 | 3 | ||
4 | This library is free software; you can redistribute it and/or | 4 | This library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public | 5 | modify it under the terms of the GNU Lesser General Public |
... | @@ -208,413 +208,6 @@ rfc2047_decode (const char *tocode, const char *input, char **ptostr) | ... | @@ -208,413 +208,6 @@ rfc2047_decode (const char *tocode, const char *input, char **ptostr) |
208 | } | 208 | } |
209 | 209 | ||
210 | 210 | ||
211 | |||
212 | /* ================================================== | ||
213 | RFC 2047 Encoder | ||
214 | ================================================== */ | ||
215 | |||
216 | #define MAX_QUOTE 75 | ||
217 | |||
218 | /* Be more conservative in what we quote than in RFC2045, as in some | ||
219 | circumstances, additional symbols (like parenthesis) must be quoted | ||
220 | in headers. This is never a problem for the recipient, except for | ||
221 | the extra overhead in the message size */ | ||
222 | static int | ||
223 | must_quote (char c) | ||
224 | { | ||
225 | if (((c > 32) && (c <= 57)) || | ||
226 | ((c >= 64) && (c <= 126))) | ||
227 | return 0; | ||
228 | |||
229 | return 1; | ||
230 | } | ||
231 | |||
232 | |||
233 | /* State of the encoder */ | ||
234 | typedef struct _encoder rfc2047_encoder; | ||
235 | |||
236 | struct _encoder { | ||
237 | /* Name of the encoding (either B or Q) */ | ||
238 | char encoding; | ||
239 | |||
240 | /* Charset of the input stream */ | ||
241 | const char * charset; | ||
242 | |||
243 | /* Compute the size of the next character (in bytes), according to | ||
244 | the charset */ | ||
245 | int (* charcount) (const char *); | ||
246 | |||
247 | /* Size of the next character (in bytes) */ | ||
248 | int charblock; | ||
249 | |||
250 | /* TRUE if we need to open a quoted-word at the next byte */ | ||
251 | int must_open; | ||
252 | |||
253 | /* Pointer on the current input byte */ | ||
254 | const unsigned char * src; | ||
255 | |||
256 | /* Pointer on the current output byte and on the complete output */ | ||
257 | char * dst, * result; | ||
258 | |||
259 | /* todo: number of bytes remaining in the input, done: number of | ||
260 | bytes written in the output, quotesize: number of bytes in the | ||
261 | current quoted-word */ | ||
262 | int todo, done, quotesize; | ||
263 | |||
264 | /* Virtual methods implemented for the encoders: | ||
265 | |||
266 | count: return how many bytes would be used by inserting the | ||
267 | current input and updates 'charblock' | ||
268 | next: quote the current input byte on the output | ||
269 | flush: output any pending byte | ||
270 | */ | ||
271 | int (* count) (rfc2047_encoder * enc); | ||
272 | int (* next) (rfc2047_encoder * enc); | ||
273 | void (* flush) (rfc2047_encoder * enc); | ||
274 | |||
275 | /* Extra data for the Base64 encoder */ | ||
276 | unsigned char buffer [4]; | ||
277 | int state; | ||
278 | }; | ||
279 | |||
280 | |||
281 | /* -------------------------------------------------- | ||
282 | Quoted-words building blocks | ||
283 | -------------------------------------------------- */ | ||
284 | |||
285 | /* Write the opening of a quoted-word and return the minimum number of | ||
286 | bytes it will use */ | ||
287 | static int | ||
288 | _open_quote (const char * charset, | ||
289 | char encoding, | ||
290 | char ** dst, int * done) | ||
291 | { | ||
292 | int len = strlen (charset) + 5; | ||
293 | |||
294 | (* done) += len; | ||
295 | |||
296 | if (* dst) | ||
297 | { | ||
298 | sprintf (* dst, "=?%s?%c?", charset, encoding); | ||
299 | (* dst) += len; | ||
300 | } | ||
301 | |||
302 | /* in the initial length of the quote we already count the final ?= */ | ||
303 | return len + 2; | ||
304 | } | ||
305 | |||
306 | /* Terminate a quoted-word */ | ||
307 | static void | ||
308 | _close_quote (char ** dst, int * done) | ||
309 | { | ||
310 | * done += 2; | ||
311 | |||
312 | if (* dst) | ||
313 | { | ||
314 | strcpy (* dst, "?="); | ||
315 | (* dst) += 2; | ||
316 | } | ||
317 | } | ||
318 | |||
319 | |||
320 | /* Call this function before the beginning of a quoted-word */ | ||
321 | static void | ||
322 | init_quoted (rfc2047_encoder * enc) | ||
323 | { | ||
324 | enc->must_open = 1; | ||
325 | } | ||
326 | |||
327 | /* Insert the current byte in the quoted-word (handling maximum | ||
328 | quoted-word sizes,...) */ | ||
329 | static void | ||
330 | insert_quoted (rfc2047_encoder * enc) | ||
331 | { | ||
332 | if (enc->must_open) | ||
333 | { | ||
334 | enc->must_open = 0; | ||
335 | |||
336 | /* The quotesize holds the known size of the quoted-word, even | ||
337 | if all the bytes have not yet been inserted in the output | ||
338 | stream. */ | ||
339 | enc->quotesize = | ||
340 | _open_quote (enc->charset, enc->encoding, | ||
341 | & enc->dst, & enc->done) + enc->count (enc); | ||
342 | } | ||
343 | else | ||
344 | { | ||
345 | if (enc->charblock == 0) | ||
346 | { | ||
347 | /* The quotesize holds the known size of the quoted-word, | ||
348 | even if all the bytes have not yet been inserted in the | ||
349 | output stream. */ | ||
350 | enc->quotesize += enc->count (enc); | ||
351 | if (enc->quotesize > MAX_QUOTE) | ||
352 | { | ||
353 | /* Start a new quoted-word */ | ||
354 | _close_quote (& enc->dst, & enc->done); | ||
355 | |||
356 | if (enc->dst) * (enc->dst ++) = ' '; | ||
357 | enc->done ++; | ||
358 | |||
359 | enc->quotesize = _open_quote (enc->charset, enc->encoding, | ||
360 | & enc->dst, & enc->done); | ||
361 | } | ||
362 | } | ||
363 | } | ||
364 | |||
365 | /* We are ready to process one more byte from the input stream */ | ||
366 | enc->charblock --; | ||
367 | enc->next (enc); | ||
368 | } | ||
369 | |||
370 | /* Flush the current quoted-word */ | ||
371 | static void | ||
372 | flush_quoted (rfc2047_encoder * enc) | ||
373 | { | ||
374 | if (enc->must_open) return; | ||
375 | |||
376 | enc->flush (enc); | ||
377 | _close_quote (& enc->dst, & enc->done); | ||
378 | } | ||
379 | |||
380 | |||
381 | /* Insert the current byte unquoted */ | ||
382 | static void | ||
383 | insert_unquoted (rfc2047_encoder * enc) | ||
384 | { | ||
385 | if (enc->dst) * (enc->dst ++) = * (enc->src); | ||
386 | enc->src ++; | ||
387 | enc->todo --; | ||
388 | enc->done ++; | ||
389 | } | ||
390 | |||
391 | |||
392 | /* Check if the next word will need to be quoted */ | ||
393 | static int | ||
394 | is_next_quoted (const char * src) | ||
395 | { | ||
396 | while (isspace (* src)) src ++; | ||
397 | |||
398 | while (* src) | ||
399 | { | ||
400 | if (isspace (* src)) return 0; | ||
401 | if (must_quote (* src)) return 1; | ||
402 | |||
403 | src ++; | ||
404 | } | ||
405 | |||
406 | return 0; | ||
407 | } | ||
408 | |||
409 | |||
410 | /* -------------------------------------------------- | ||
411 | Known character encodings | ||
412 | -------------------------------------------------- */ | ||
413 | |||
414 | static int | ||
415 | ce_single_byte (const char * src) | ||
416 | { | ||
417 | return 1; | ||
418 | } | ||
419 | |||
420 | static int | ||
421 | ce_utf_8 (const char * src) | ||
422 | { | ||
423 | unsigned char c = * src; | ||
424 | |||
425 | if (c <= 0x7F) return 1; | ||
426 | |||
427 | if (c >= 0xFC) return 6; | ||
428 | if (c >= 0xF8) return 5; | ||
429 | if (c >= 0xF0) return 4; | ||
430 | if (c >= 0xE0) return 3; | ||
431 | if (c >= 0xC0) return 2; | ||
432 | |||
433 | /* otherwise, this is not a first byte (and the UTF-8 is possibly | ||
434 | broken), continue with a single byte. */ | ||
435 | return 1; | ||
436 | } | ||
437 | |||
438 | |||
439 | /* -------------------------------------------------- | ||
440 | Quoted-printable encoder | ||
441 | -------------------------------------------------- */ | ||
442 | |||
443 | static void | ||
444 | qp_init (rfc2047_encoder * enc) | ||
445 | { | ||
446 | return; | ||
447 | } | ||
448 | |||
449 | static int | ||
450 | qp_count (rfc2047_encoder * enc) | ||
451 | { | ||
452 | int len = 0, todo; | ||
453 | unsigned const char * curr; | ||
454 | |||
455 | /* count the size of a complete (multibyte) character */ | ||
456 | enc->charblock = enc->charcount (enc->src); | ||
457 | |||
458 | for (todo = 0, curr = enc->src ; | ||
459 | todo < enc->charblock && * curr; | ||
460 | todo ++, curr ++) | ||
461 | { | ||
462 | len += must_quote (* curr) ? 3 : 1; | ||
463 | } | ||
464 | |||
465 | return len; | ||
466 | } | ||
467 | |||
468 | static const char _hexdigit[16] = "0123456789ABCDEF"; | ||
469 | |||
470 | static int | ||
471 | qp_next (rfc2047_encoder * enc) | ||
472 | { | ||
473 | int done; | ||
474 | |||
475 | if (* enc->src == '_' || must_quote (* enc->src)) | ||
476 | { | ||
477 | /* special encoding of space as a '_' to increase readability */ | ||
478 | if (* enc->src == ' ') | ||
479 | { | ||
480 | if (enc->dst) | ||
481 | { | ||
482 | * (enc->dst ++) = '_'; | ||
483 | } | ||
484 | |||
485 | done = 1; | ||
486 | } | ||
487 | else { | ||
488 | /* default encoding */ | ||
489 | if (enc->dst) | ||
490 | { | ||
491 | * (enc->dst ++) = '='; | ||
492 | * (enc->dst ++) = _hexdigit [* (enc->src) >> 4]; | ||
493 | * (enc->dst ++) = _hexdigit [* (enc->src) & 0xF]; | ||
494 | } | ||
495 | |||
496 | done = 3; | ||
497 | } | ||
498 | } | ||
499 | else | ||
500 | { | ||
501 | if (enc->dst) | ||
502 | { | ||
503 | * (enc->dst ++) = * enc->src; | ||
504 | } | ||
505 | |||
506 | done = 1; | ||
507 | } | ||
508 | |||
509 | enc->src ++; | ||
510 | |||
511 | enc->done += done; | ||
512 | enc->todo --; | ||
513 | |||
514 | return done; | ||
515 | } | ||
516 | |||
517 | static void | ||
518 | qp_flush (rfc2047_encoder * enc) | ||
519 | { | ||
520 | return; | ||
521 | } | ||
522 | |||
523 | |||
524 | /* -------------------------------------------------- | ||
525 | Base64 encoder | ||
526 | -------------------------------------------------- */ | ||
527 | |||
528 | const char *b64 = | ||
529 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | ||
530 | |||
531 | static void | ||
532 | base64_init (rfc2047_encoder * enc) | ||
533 | { | ||
534 | enc->state = 0; | ||
535 | return; | ||
536 | } | ||
537 | |||
538 | static int | ||
539 | base64_count (rfc2047_encoder * enc) | ||
540 | { | ||
541 | int len = 0, todo; | ||
542 | |||
543 | /* Check the size of a complete (multibyte) character */ | ||
544 | enc->charblock = enc->charcount (enc->src); | ||
545 | |||
546 | for (todo = 0 ; todo < enc->charblock; todo ++) | ||
547 | { | ||
548 | /* Count the size of the encoded block only once, at the first | ||
549 | byte transmitted. */ | ||
550 | len += ((enc->state + todo) % 3 == 0) ? 4 : 0; | ||
551 | } | ||
552 | |||
553 | return len; | ||
554 | } | ||
555 | |||
556 | static int | ||
557 | base64_next (rfc2047_encoder * enc) | ||
558 | { | ||
559 | enc->buffer [enc->state ++] = * (enc->src ++); | ||
560 | enc->todo --; | ||
561 | |||
562 | if (enc->state < 3) return 0; | ||
563 | |||
564 | /* We have a full quantum */ | ||
565 | if (enc->dst) | ||
566 | { | ||
567 | * (enc->dst ++) = b64 [(enc->buffer[0] >> 2)]; | ||
568 | * (enc->dst ++) = b64 [((enc->buffer[0] & 0x3) << 4) | (enc->buffer[1] >> 4)]; | ||
569 | * (enc->dst ++) = b64 [((enc->buffer[1] & 0xF) << 2) | (enc->buffer[2] >> 6)]; | ||
570 | * (enc->dst ++) = b64 [(enc->buffer[2] & 0x3F)]; | ||
571 | } | ||
572 | |||
573 | enc->done += 4; | ||
574 | |||
575 | enc->state = 0; | ||
576 | return 4; | ||
577 | } | ||
578 | |||
579 | static void | ||
580 | base64_flush (rfc2047_encoder * enc) | ||
581 | { | ||
582 | if (enc->state == 0) return; | ||
583 | |||
584 | if (enc->dst) | ||
585 | { | ||
586 | switch (enc->state) | ||
587 | { | ||
588 | case 1: | ||
589 | * (enc->dst ++) = b64 [(enc->buffer[0] >> 2)]; | ||
590 | * (enc->dst ++) = b64 [((enc->buffer[0] & 0x3) << 4)]; | ||
591 | * (enc->dst ++) = '='; | ||
592 | * (enc->dst ++) = '='; | ||
593 | break; | ||
594 | |||
595 | case 2: | ||
596 | * (enc->dst ++) = b64 [(enc->buffer[0] >> 2)]; | ||
597 | * (enc->dst ++) = b64 [((enc->buffer[0] & 0x3) << 4) | (enc->buffer[1] >> 4)]; | ||
598 | * (enc->dst ++) = b64 [((enc->buffer[1] & 0xF) << 2)]; | ||
599 | * (enc->dst ++) = '='; | ||
600 | break; | ||
601 | } | ||
602 | } | ||
603 | |||
604 | enc->done += 4; | ||
605 | enc->state = 0; | ||
606 | return; | ||
607 | } | ||
608 | |||
609 | |||
610 | /* States of the RFC2047 encoder */ | ||
611 | enum { | ||
612 | ST_SPACE, /* waiting for non-quoted whitespace */ | ||
613 | ST_WORD, /* waiting for non-quoted word */ | ||
614 | ST_QUOTED, /* waiting for quoted word */ | ||
615 | ST_QUOTED_SPACE, /* waiting for quoted whitespace */ | ||
616 | }; | ||
617 | |||
618 | /** | 211 | /** |
619 | Encode a header according to RFC 2047 | 212 | Encode a header according to RFC 2047 |
620 | 213 | ||
... | @@ -631,149 +224,55 @@ enum { | ... | @@ -631,149 +224,55 @@ enum { |
631 | */ | 224 | */ |
632 | int | 225 | int |
633 | rfc2047_encode (const char *charset, const char *encoding, | 226 | rfc2047_encode (const char *charset, const char *encoding, |
634 | const char *text, char ** result) | 227 | const char *text, char **result) |
635 | { | 228 | { |
636 | rfc2047_encoder enc; | 229 | stream_t input_stream; |
230 | stream_t output_stream; | ||
231 | char encoding_char = '\0'; | ||
232 | int rc; | ||
637 | 233 | ||
638 | int is_compose; | 234 | if (charset == NULL || encoding == NULL || text == NULL) |
639 | int state; | 235 | return MU_ERR_BAD_2047_INPUT; |
640 | 236 | ||
641 | if (!charset || !encoding || !text) | ||
642 | return EINVAL; | ||
643 | if (!result) | ||
644 | return MU_ERR_OUT_PTR_NULL; | ||
645 | |||
646 | /* Check for a known encoding */ | ||
647 | do | ||
648 | { | ||
649 | if (strcasecmp (encoding, "base64") == 0) | 237 | if (strcasecmp (encoding, "base64") == 0) |
650 | { | 238 | encoding_char = 'B'; |
651 | base64_init (& enc); | 239 | else if (strcasecmp (encoding, "quoted-printable") == 0) |
652 | enc.encoding = 'B'; | 240 | encoding_char = 'Q'; |
653 | enc.next = base64_next; | ||
654 | enc.count = base64_count; | ||
655 | enc.flush = base64_flush; | ||
656 | break; | ||
657 | } | ||
658 | |||
659 | if (strcasecmp (encoding, "quoted-printable") == 0) | ||
660 | { | ||
661 | qp_init (& enc); | ||
662 | enc.encoding = 'Q'; | ||
663 | enc.next = qp_next; | ||
664 | enc.count = qp_count; | ||
665 | enc.flush = qp_flush; | ||
666 | break; | ||
667 | } | ||
668 | |||
669 | return MU_ERR_NOENT; | ||
670 | } | ||
671 | while (0); | ||
672 | |||
673 | /* Check for a known charset */ | ||
674 | do | ||
675 | { | ||
676 | if (strcasecmp (charset, "utf-8") == 0) | ||
677 | { | ||
678 | enc.charcount = ce_utf_8; | ||
679 | break; | ||
680 | } | ||
681 | |||
682 | enc.charcount = ce_single_byte; | ||
683 | } | ||
684 | while (0); | ||
685 | |||
686 | enc.dst = NULL; | ||
687 | enc.charset = charset; | ||
688 | |||
689 | /* proceed in two passes: estimate the required space, then fill */ | ||
690 | for (is_compose = 0 ; is_compose <= 1 ; is_compose ++) | ||
691 | { | ||
692 | state = ST_SPACE; | ||
693 | |||
694 | enc.src = text; | ||
695 | enc.todo = strlen (text); | ||
696 | enc.done = 0; | ||
697 | |||
698 | while (enc.todo) | ||
699 | { | ||
700 | |||
701 | switch (state) | ||
702 | { | ||
703 | case ST_SPACE: | ||
704 | if (isspace (* enc.src)) | ||
705 | { | ||
706 | insert_unquoted (& enc); | ||
707 | break; | ||
708 | } | ||
709 | |||
710 | if (is_next_quoted (enc.src)) | ||
711 | { | ||
712 | init_quoted (& enc); | ||
713 | state = ST_QUOTED; | ||
714 | } | ||
715 | else | 241 | else |
716 | { | 242 | return MU_ERR_BAD_2047_INPUT; |
717 | state = ST_WORD; | ||
718 | } | ||
719 | break; | ||
720 | 243 | ||
721 | case ST_WORD: | 244 | memory_stream_create (&input_stream, 0, 0); |
722 | if (isspace (* enc.src)) | 245 | stream_sequential_write (input_stream, text, strlen (text)); |
723 | { | ||
724 | state = ST_SPACE; | ||
725 | break; | ||
726 | } | ||
727 | 246 | ||
728 | insert_unquoted (& enc); | 247 | filter_create (&output_stream, input_stream, encoding, MU_FILTER_ENCODE, |
729 | break; | 248 | MU_STREAM_READ); |
730 | 249 | ||
731 | case ST_QUOTED: | 250 | /* Assume strlen(qp_encoded_text) <= strlen(text) * 3 */ |
732 | if (isspace (* enc.src)) | 251 | /* malloced length is composed of: |
733 | { | 252 | "=?" |
734 | if (is_next_quoted (enc.src)) | 253 | charset |
735 | { | 254 | "?" |
736 | state = ST_QUOTED_SPACE; | 255 | B or Q |
737 | } | 256 | "?" |
738 | else | 257 | encoded_text |
739 | { | 258 | "?=" |
740 | flush_quoted (& enc); | 259 | zero terminator */ |
741 | state = ST_SPACE; | ||
742 | } | ||
743 | break; | ||
744 | } | ||
745 | 260 | ||
746 | insert_quoted (& enc); | 261 | *result = malloc (2 + strlen (charset) + 3 + strlen (text) * 3 + 3); |
747 | break; | 262 | if (*result) |
748 | |||
749 | case ST_QUOTED_SPACE: | ||
750 | if (! isspace (* enc.src)) | ||
751 | { | 263 | { |
752 | state = ST_QUOTED; | 264 | sprintf (*result, "=?%s?%c?", charset, encoding_char); |
753 | break; | ||
754 | } | ||
755 | |||
756 | insert_quoted (& enc); | ||
757 | break; | ||
758 | } | ||
759 | } | ||
760 | 265 | ||
761 | if (state == ST_QUOTED || | 266 | rc = stream_sequential_read (output_stream, *result + strlen (*result), |
762 | state == ST_QUOTED_SPACE) | 267 | strlen (text) * 3, NULL); |
763 | { | ||
764 | flush_quoted (& enc); | ||
765 | } | ||
766 | 268 | ||
767 | if (enc.dst == NULL) | 269 | strcat (*result, "?="); |
768 | { | ||
769 | enc.dst = malloc (enc.done + 1); | ||
770 | if (enc.dst == NULL) return -ENOMEM; | ||
771 | enc.result = enc.dst; | ||
772 | } | ||
773 | } | 270 | } |
271 | else | ||
272 | rc = ENOMEM; | ||
774 | 273 | ||
775 | * (enc.dst) = '\0'; | 274 | stream_destroy (&input_stream, NULL); |
776 | * result = enc.result; | 275 | stream_destroy (&output_stream, NULL); |
777 | 276 | ||
778 | return 0; | 277 | return rc; |
779 | } | 278 | } | ... | ... |
-
Please register or sign in to post a comment