implemented rfc2047 encoder
Showing
3 changed files
with
462 additions
and
0 deletions
1 | 2003-02-07 Frederic Gobry <frederic.gobry@smartdata.ch> | ||
2 | |||
3 | * mailbox/rfc2047.c: implemented a preliminary RFC2047 encoder. | ||
4 | |||
1 | 2003-02-06 Sergey Poznyakoff | 5 | 2003-02-06 Sergey Poznyakoff |
2 | 6 | ||
3 | * libsieve/actions.c (build_mime): Call message_unref. | 7 | * libsieve/actions.c (build_mime): Call message_unref. | ... | ... |
... | @@ -44,6 +44,8 @@ int mime_get_message __P ((mime_t mime, message_t *msg)); | ... | @@ -44,6 +44,8 @@ int mime_get_message __P ((mime_t mime, message_t *msg)); |
44 | int rfc2047_decode __P((const char *tocode, const char *fromstr, | 44 | int rfc2047_decode __P((const char *tocode, const char *fromstr, |
45 | char **ptostr)); | 45 | char **ptostr)); |
46 | 46 | ||
47 | int rfc2047_encode __P((const char *charset, const char *encoding, | ||
48 | const char *text, char **result)); | ||
47 | 49 | ||
48 | #ifdef __cplusplus | 50 | #ifdef __cplusplus |
49 | } | 51 | } | ... | ... |
... | @@ -157,3 +157,459 @@ rfc2047_decode (const char *tocode, const char *input, char **ptostr) | ... | @@ -157,3 +157,459 @@ rfc2047_decode (const char *tocode, const char *input, char **ptostr) |
157 | return status; | 157 | return status; |
158 | } | 158 | } |
159 | 159 | ||
160 | |||
161 | |||
162 | /* -------------------------------------------------- | ||
163 | RFC 2047 Encoder | ||
164 | -------------------------------------------------- */ | ||
165 | |||
166 | #define MAX_QUOTE 75 | ||
167 | |||
168 | /* Be more conservative in what we quote. This is never a problem for | ||
169 | the recipient, except for the extra overhead in the message size */ | ||
170 | static int | ||
171 | must_quote (char c) | ||
172 | { | ||
173 | if (((c > 32) && (c <= 57)) || | ||
174 | ((c >= 64) && (c <= 126))) | ||
175 | return 0; | ||
176 | |||
177 | return 1; | ||
178 | } | ||
179 | |||
180 | typedef struct _encoder rfc2047_encoder; | ||
181 | |||
182 | struct _encoder { | ||
183 | char encoding; | ||
184 | const char * charset; | ||
185 | |||
186 | int must_open; | ||
187 | |||
188 | const unsigned char * src; | ||
189 | char * dst, * result; | ||
190 | |||
191 | int todo, done, quotesize; | ||
192 | |||
193 | int (* count) (rfc2047_encoder * enc); | ||
194 | void (* next) (rfc2047_encoder * enc); | ||
195 | void (* flush) (rfc2047_encoder * enc); | ||
196 | |||
197 | unsigned char buffer [4]; | ||
198 | |||
199 | int state; | ||
200 | |||
201 | }; | ||
202 | |||
203 | |||
204 | static int | ||
205 | _open_quote (const char * charset, | ||
206 | char encoding, | ||
207 | char ** dst, int * done) | ||
208 | { | ||
209 | int len = strlen (charset) + 5; | ||
210 | |||
211 | (* done) += len; | ||
212 | |||
213 | if (* dst) | ||
214 | { | ||
215 | sprintf (* dst, "=?%s?%c?", charset, encoding); | ||
216 | (* dst) += len; | ||
217 | } | ||
218 | |||
219 | /* in the initial length of the quote we already count the final ?= */ | ||
220 | return len + 2; | ||
221 | } | ||
222 | |||
223 | static void | ||
224 | _close_quote (char ** dst, int * done) | ||
225 | { | ||
226 | * done += 2; | ||
227 | |||
228 | if (* dst) | ||
229 | { | ||
230 | strcpy (* dst, "?="); | ||
231 | (* dst) += 2; | ||
232 | } | ||
233 | } | ||
234 | |||
235 | static void | ||
236 | init_quoted (rfc2047_encoder * enc) | ||
237 | { | ||
238 | enc->must_open = 1; | ||
239 | } | ||
240 | |||
241 | |||
242 | static void | ||
243 | insert_quoted (rfc2047_encoder * enc) | ||
244 | { | ||
245 | int size; | ||
246 | |||
247 | if (enc->must_open) | ||
248 | { | ||
249 | enc->must_open = 0; | ||
250 | enc->quotesize = _open_quote (enc->charset, enc->encoding, | ||
251 | & enc->dst, & enc->done); | ||
252 | } | ||
253 | else | ||
254 | { | ||
255 | size = enc->count (enc); | ||
256 | |||
257 | if (enc->quotesize + size > MAX_QUOTE) | ||
258 | { | ||
259 | _close_quote (& enc->dst, & enc->done); | ||
260 | |||
261 | if (enc->dst) * (enc->dst ++) = ' '; | ||
262 | enc->done ++; | ||
263 | |||
264 | enc->quotesize = _open_quote (enc->charset, enc->encoding, | ||
265 | & enc->dst, & enc->done); | ||
266 | } | ||
267 | } | ||
268 | |||
269 | enc->next (enc); | ||
270 | } | ||
271 | |||
272 | static void | ||
273 | flush_quoted (rfc2047_encoder * enc) | ||
274 | { | ||
275 | if (enc->must_open) return; | ||
276 | |||
277 | enc->flush (enc); | ||
278 | _close_quote (& enc->dst, & enc->done); | ||
279 | } | ||
280 | |||
281 | |||
282 | |||
283 | static void | ||
284 | insert_unquoted (rfc2047_encoder * enc) | ||
285 | { | ||
286 | if (enc->dst) * (enc->dst ++) = * (enc->src); | ||
287 | enc->src ++; | ||
288 | enc->todo --; | ||
289 | enc->done ++; | ||
290 | } | ||
291 | |||
292 | |||
293 | static int | ||
294 | is_next_quoted (const char * src) | ||
295 | { | ||
296 | while (isspace (* src)) src ++; | ||
297 | |||
298 | while (* src) | ||
299 | { | ||
300 | if (isspace (* src)) return 0; | ||
301 | if (must_quote (* src)) return 1; | ||
302 | |||
303 | src ++; | ||
304 | } | ||
305 | |||
306 | return 0; | ||
307 | } | ||
308 | |||
309 | |||
310 | /* Quoted-printable encoder */ | ||
311 | |||
312 | static void | ||
313 | qp_init (rfc2047_encoder * enc) | ||
314 | { | ||
315 | return; | ||
316 | } | ||
317 | |||
318 | static int | ||
319 | qp_count (rfc2047_encoder * enc) | ||
320 | { | ||
321 | return must_quote (* enc->src) ? 3 : 1; | ||
322 | } | ||
323 | |||
324 | static const char _hexdigit[16] = "0123456789ABCDEF"; | ||
325 | |||
326 | static void | ||
327 | qp_next (rfc2047_encoder * enc) | ||
328 | { | ||
329 | if (must_quote (* enc->src)) | ||
330 | { | ||
331 | /* special encoding of space as a '_' to increase readability */ | ||
332 | if (* enc->src == ' ') | ||
333 | { | ||
334 | if (enc->dst) | ||
335 | { | ||
336 | * (enc->dst ++) = '_'; | ||
337 | enc->src ++; | ||
338 | } | ||
339 | |||
340 | enc->done ++; | ||
341 | enc->quotesize ++; | ||
342 | } | ||
343 | else { | ||
344 | /* default encoding */ | ||
345 | if (enc->dst) | ||
346 | { | ||
347 | * (enc->dst ++) = '='; | ||
348 | * (enc->dst ++) = _hexdigit [* (enc->src) >> 4]; | ||
349 | * (enc->dst ++) = _hexdigit [* (enc->src) & 0xF]; | ||
350 | |||
351 | enc->src ++; | ||
352 | } | ||
353 | |||
354 | enc->done += 3; | ||
355 | enc->quotesize += 3; | ||
356 | } | ||
357 | } | ||
358 | else | ||
359 | { | ||
360 | if (enc->dst) | ||
361 | { | ||
362 | * (enc->dst ++) = * (enc->src ++); | ||
363 | } | ||
364 | |||
365 | enc->done ++; | ||
366 | enc->quotesize ++; | ||
367 | } | ||
368 | |||
369 | enc->todo --; | ||
370 | } | ||
371 | |||
372 | static void | ||
373 | qp_flush (rfc2047_encoder * enc) | ||
374 | { | ||
375 | return; | ||
376 | } | ||
377 | |||
378 | |||
379 | /* Base64 encoder */ | ||
380 | |||
381 | const char *b64 = | ||
382 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | ||
383 | |||
384 | static void | ||
385 | base64_init (rfc2047_encoder * enc) | ||
386 | { | ||
387 | enc->state = 0; | ||
388 | return; | ||
389 | } | ||
390 | |||
391 | static int | ||
392 | base64_count (rfc2047_encoder * enc) | ||
393 | { | ||
394 | /* Count the size of the encoded block only once, at the first byte | ||
395 | transmitted. */ | ||
396 | if (enc->state == 0) return 4; | ||
397 | return 0; | ||
398 | } | ||
399 | |||
400 | static void | ||
401 | base64_next (rfc2047_encoder * enc) | ||
402 | { | ||
403 | enc->buffer [enc->state ++] = * (enc->src ++); | ||
404 | |||
405 | enc->todo --; | ||
406 | |||
407 | /* We have a full quantum */ | ||
408 | if (enc->state >= 3) | ||
409 | { | ||
410 | if (enc->dst) | ||
411 | { | ||
412 | * (enc->dst ++) = b64 [(enc->src[0] >> 2)]; | ||
413 | * (enc->dst ++) = b64 [((enc->src[0] & 0x3) << 4) | (enc->src[1] >> 4)]; | ||
414 | * (enc->dst ++) = b64 [((enc->src[1] & 0xF) << 2) | (enc->src[2] >> 6)]; | ||
415 | * (enc->dst ++) = b64 [(enc->src[2] & 0x3F)]; | ||
416 | |||
417 | enc->src += 3; | ||
418 | } | ||
419 | |||
420 | enc->done += 4; | ||
421 | enc->quotesize += 4; | ||
422 | |||
423 | enc->state = 0; | ||
424 | } | ||
425 | return; | ||
426 | } | ||
427 | |||
428 | static void | ||
429 | base64_flush (rfc2047_encoder * enc) | ||
430 | { | ||
431 | if (enc->state == 0) return; | ||
432 | |||
433 | if (enc->dst) | ||
434 | { | ||
435 | switch (enc->state) | ||
436 | { | ||
437 | case 1: | ||
438 | * (enc->dst ++) = b64 [(enc->src[0] >> 2)]; | ||
439 | * (enc->dst ++) = b64 [((enc->src[0] & 0x3) << 4)]; | ||
440 | * (enc->dst ++) = '='; | ||
441 | * (enc->dst ++) = '='; | ||
442 | break; | ||
443 | |||
444 | case 2: | ||
445 | * (enc->dst ++) = b64 [(enc->src[0] >> 2)]; | ||
446 | * (enc->dst ++) = b64 [((enc->src[0] & 0x3) << 4) | (enc->src[1] >> 4)]; | ||
447 | * (enc->dst ++) = b64 [((enc->src[1] & 0xF) << 2)]; | ||
448 | * (enc->dst ++) = '='; | ||
449 | break; | ||
450 | } | ||
451 | } | ||
452 | |||
453 | enc->done += 4; | ||
454 | enc->quotesize += 4; | ||
455 | enc->state = 0; | ||
456 | return; | ||
457 | } | ||
458 | |||
459 | |||
460 | /* States of the RFC2047 encoder */ | ||
461 | enum { | ||
462 | ST_SPACE, /* waiting for non-quoted whitespace */ | ||
463 | ST_WORD, /* waiting for non-quoted word */ | ||
464 | ST_QUOTED, /* waiting for quoted word */ | ||
465 | ST_QUOTED_SPACE, /* waiting for quoted whitespace */ | ||
466 | }; | ||
467 | |||
468 | |||
469 | |||
470 | /** | ||
471 | Encode a header according to RFC 2047 | ||
472 | |||
473 | @param charset | ||
474 | Charset of the text to encode | ||
475 | @param encoding | ||
476 | Requested encoding (must be "base64" or "quoted-printable") | ||
477 | @param text | ||
478 | Actual text to encode | ||
479 | @param result [OUT] | ||
480 | Encoded string | ||
481 | */ | ||
482 | int | ||
483 | rfc2047_encode (const char *charset, const char *encoding, | ||
484 | const char *text, char ** result) | ||
485 | { | ||
486 | rfc2047_encoder enc; | ||
487 | |||
488 | int is_compose; | ||
489 | int state; | ||
490 | |||
491 | if (! charset || | ||
492 | ! encoding || | ||
493 | ! text || | ||
494 | ! result) return EINVAL; | ||
495 | |||
496 | do | ||
497 | { | ||
498 | if (strcmp (encoding, "base64") == 0) | ||
499 | { | ||
500 | base64_init (& enc); | ||
501 | enc.encoding = 'B'; | ||
502 | enc.next = base64_next; | ||
503 | enc.count = base64_count; | ||
504 | enc.flush = base64_flush; | ||
505 | break; | ||
506 | } | ||
507 | |||
508 | if (strcmp (encoding, "quoted-printable") == 0) | ||
509 | { | ||
510 | qp_init (& enc); | ||
511 | enc.encoding = 'Q'; | ||
512 | enc.next = qp_next; | ||
513 | enc.count = qp_count; | ||
514 | enc.flush = qp_flush; | ||
515 | break; | ||
516 | } | ||
517 | |||
518 | return ENOENT; | ||
519 | } | ||
520 | while (0); | ||
521 | |||
522 | enc.dst = NULL; | ||
523 | enc.charset = charset; | ||
524 | |||
525 | /* proceed in two passes: count, then fill */ | ||
526 | for (is_compose = 0 ; is_compose <= 1 ; is_compose ++) | ||
527 | { | ||
528 | state = ST_SPACE; | ||
529 | |||
530 | enc.src = text; | ||
531 | enc.todo = strlen (text); | ||
532 | enc.done = 0; | ||
533 | |||
534 | while (enc.todo) | ||
535 | { | ||
536 | |||
537 | switch (state) | ||
538 | { | ||
539 | case ST_SPACE: | ||
540 | if (isspace (* enc.src)) | ||
541 | { | ||
542 | insert_unquoted (& enc); | ||
543 | break; | ||
544 | } | ||
545 | |||
546 | if (is_next_quoted (enc.src)) | ||
547 | { | ||
548 | init_quoted (& enc); | ||
549 | state = ST_QUOTED; | ||
550 | } | ||
551 | else | ||
552 | { | ||
553 | state = ST_WORD; | ||
554 | } | ||
555 | break; | ||
556 | |||
557 | case ST_WORD: | ||
558 | if (isspace (* enc.src)) | ||
559 | { | ||
560 | state = ST_SPACE; | ||
561 | break; | ||
562 | } | ||
563 | |||
564 | insert_unquoted (& enc); | ||
565 | break; | ||
566 | |||
567 | case ST_QUOTED: | ||
568 | if (isspace (* enc.src)) | ||
569 | { | ||
570 | if (is_next_quoted (enc.src)) | ||
571 | { | ||
572 | state = ST_QUOTED_SPACE; | ||
573 | } | ||
574 | else | ||
575 | { | ||
576 | flush_quoted (& enc); | ||
577 | state = ST_SPACE; | ||
578 | } | ||
579 | break; | ||
580 | } | ||
581 | |||
582 | insert_quoted (& enc); | ||
583 | break; | ||
584 | |||
585 | case ST_QUOTED_SPACE: | ||
586 | if (! isspace (* enc.src)) | ||
587 | { | ||
588 | state = ST_QUOTED; | ||
589 | break; | ||
590 | } | ||
591 | |||
592 | insert_quoted (& enc); | ||
593 | break; | ||
594 | } | ||
595 | } | ||
596 | |||
597 | if (state == ST_QUOTED || | ||
598 | state == ST_QUOTED_SPACE) | ||
599 | { | ||
600 | flush_quoted (& enc); | ||
601 | } | ||
602 | |||
603 | if (enc.dst == NULL) | ||
604 | { | ||
605 | enc.dst = malloc (enc.done + 1); | ||
606 | if (enc.dst == NULL) return -ENOMEM; | ||
607 | enc.result = enc.dst; | ||
608 | } | ||
609 | } | ||
610 | |||
611 | * (enc.dst) = '\0'; | ||
612 | * result = enc.result; | ||
613 | |||
614 | return 0; | ||
615 | } | ... | ... |
-
Please register or sign in to post a comment