'Q' encoding: encode question mark properly; limit length of encoded words
* libmailutils/base/rfc2047.c (mu_rfc2047_encode): Limit length of encoded word to 75 bytes. * libmailutils/filter/qpflt.c: Treat '?' as special character in Q encoder. * libmailutils/tests/encode2047.at: Add more tests. * libmailutils/tests/encode2047.c: Use mailutils string I/O
Showing
4 changed files
with
110 additions
and
123 deletions
... | @@ -286,17 +286,23 @@ mu_rfc2047_decode (const char *tocode, const char *input, char **ptostr) | ... | @@ -286,17 +286,23 @@ mu_rfc2047_decode (const char *tocode, const char *input, char **ptostr) |
286 | 286 | ||
287 | @return 0 on success | 287 | @return 0 on success |
288 | */ | 288 | */ |
289 | |||
290 | #define MAX_ENCODED_WORD 75 | ||
291 | |||
289 | int | 292 | int |
290 | mu_rfc2047_encode (const char *charset, const char *encoding, | 293 | mu_rfc2047_encode (const char *charset, const char *encoding, |
291 | const char *text, char **result) | 294 | const char *text, char **result) |
292 | { | 295 | { |
293 | mu_stream_t input_stream; | 296 | mu_stream_t input_stream; |
294 | mu_stream_t output_stream; | 297 | mu_stream_t inter_stream; |
295 | int rc; | 298 | int rc; |
296 | 299 | ||
297 | if (charset == NULL || encoding == NULL || text == NULL) | 300 | if (charset == NULL || encoding == NULL || text == NULL) |
298 | return EINVAL; | 301 | return EINVAL; |
299 | 302 | ||
303 | if (strlen (charset) > MAX_ENCODED_WORD - 8) | ||
304 | return EINVAL; | ||
305 | |||
300 | if (strcmp (encoding, "base64") == 0) | 306 | if (strcmp (encoding, "base64") == 0) |
301 | encoding = "B"; | 307 | encoding = "B"; |
302 | else if (strcmp (encoding, "quoted-printable") == 0) | 308 | else if (strcmp (encoding, "quoted-printable") == 0) |
... | @@ -304,42 +310,67 @@ mu_rfc2047_encode (const char *charset, const char *encoding, | ... | @@ -304,42 +310,67 @@ mu_rfc2047_encode (const char *charset, const char *encoding, |
304 | else if (encoding[1] || !strchr ("BQ", encoding[0])) | 310 | else if (encoding[1] || !strchr ("BQ", encoding[0])) |
305 | return MU_ERR_BAD_2047_ENCODING; | 311 | return MU_ERR_BAD_2047_ENCODING; |
306 | 312 | ||
313 | |||
307 | rc = mu_static_memory_stream_create (&input_stream, text, strlen (text)); | 314 | rc = mu_static_memory_stream_create (&input_stream, text, strlen (text)); |
308 | if (rc) | 315 | if (rc) |
309 | return rc; | 316 | return rc; |
310 | rc = mu_filter_create (&output_stream, input_stream, | 317 | rc = mu_filter_create (&inter_stream, input_stream, |
311 | encoding, MU_FILTER_ENCODE, MU_STREAM_READ); | 318 | encoding, MU_FILTER_ENCODE, MU_STREAM_READ); |
312 | mu_stream_unref (input_stream); | 319 | mu_stream_unref (input_stream); |
313 | if (rc == 0) | 320 | if (rc == 0) |
314 | { | 321 | { |
315 | /* Assume strlen(qp_encoded_text) <= strlen(text) * 3 */ | 322 | mu_stream_t output_stream; |
316 | /* malloced length is composed of: | 323 | rc = mu_memory_stream_create (&output_stream, MU_STREAM_RDWR); |
317 | "=?" | 324 | if (rc == 0) |
318 | charset | ||
319 | "?" | ||
320 | B or Q | ||
321 | "?" | ||
322 | encoded_text | ||
323 | "?=" | ||
324 | zero terminator */ | ||
325 | |||
326 | *result = malloc (2 + strlen (charset) + 3 + strlen (text) * 3 + 3); | ||
327 | if (*result) | ||
328 | { | 325 | { |
329 | char *p = *result; | 326 | char buf[MAX_ENCODED_WORD]; |
330 | size_t s; | 327 | size_t start, bs, n; |
331 | 328 | ||
332 | p += sprintf (p, "=?%s?%s?", charset, encoding); | 329 | start = snprintf (buf, sizeof buf, "=?%s?%s?", charset, encoding); |
330 | bs = sizeof buf - start - 2; | ||
333 | 331 | ||
334 | rc = mu_stream_read (output_stream, | 332 | while (1) |
335 | p, | 333 | { |
336 | strlen (text) * 3, &s); | 334 | rc = mu_stream_read (inter_stream, buf + start, bs, &n); |
335 | if (rc || n == 0) | ||
336 | break; | ||
337 | rc = mu_stream_write (output_stream, buf, n + start, NULL); | ||
338 | if (rc) | ||
339 | break; | ||
340 | rc = mu_stream_write (output_stream, "?=", 2, NULL); | ||
341 | if (rc) | ||
342 | break; | ||
343 | if (n == bs) | ||
344 | rc = mu_stream_write (output_stream, "\n ", 2, NULL); | ||
345 | else | ||
346 | break; | ||
347 | } | ||
348 | |||
349 | if (rc == 0) | ||
350 | { | ||
351 | mu_off_t sz; | ||
352 | char *ptr; | ||
353 | |||
354 | mu_stream_size (output_stream, &sz); | ||
355 | ptr = malloc (sz + 1); | ||
356 | if (!ptr) | ||
357 | rc = ENOMEM; | ||
358 | else | ||
359 | { | ||
360 | if ((rc = mu_stream_seek (output_stream, 0, MU_SEEK_SET, | ||
361 | NULL)) == 0 | ||
362 | && (rc = mu_stream_read (output_stream, ptr, sz, NULL)) | ||
363 | == 0) | ||
364 | { | ||
365 | ptr[sz] = 0; | ||
366 | *result = ptr; | ||
367 | } | ||
368 | } | ||
369 | } | ||
337 | 370 | ||
338 | strcpy (p + s, "?="); | 371 | mu_stream_destroy (&output_stream); |
339 | } | 372 | } |
340 | else | 373 | mu_stream_destroy (&inter_stream); |
341 | rc = ENOMEM; | ||
342 | mu_stream_destroy (&output_stream); | ||
343 | } | 374 | } |
344 | else | 375 | else |
345 | mu_stream_destroy (&input_stream); | 376 | mu_stream_destroy (&input_stream); | ... | ... |
... | @@ -37,7 +37,7 @@ _qp_decoder (void *xd, | ... | @@ -37,7 +37,7 @@ _qp_decoder (void *xd, |
37 | size_t isize; | 37 | size_t isize; |
38 | char *optr; | 38 | char *optr; |
39 | size_t osize; | 39 | size_t osize; |
40 | int underscore_special = *(int*)xd; | 40 | char *specials = xd; |
41 | 41 | ||
42 | switch (cmd) | 42 | switch (cmd) |
43 | { | 43 | { |
... | @@ -143,7 +143,7 @@ _qp_decoder (void *xd, | ... | @@ -143,7 +143,7 @@ _qp_decoder (void *xd, |
143 | consumed += 2; | 143 | consumed += 2; |
144 | } | 144 | } |
145 | } | 145 | } |
146 | else if (underscore_special && c == '_') | 146 | else if (c == '_' && specials && strchr (specials, c)) |
147 | { | 147 | { |
148 | *optr++ = ' '; | 148 | *optr++ = ' '; |
149 | nbytes++; | 149 | nbytes++; |
... | @@ -175,7 +175,7 @@ _qp_encoder (void *xd, | ... | @@ -175,7 +175,7 @@ _qp_encoder (void *xd, |
175 | size_t isize; | 175 | size_t isize; |
176 | char *optr; | 176 | char *optr; |
177 | size_t osize; | 177 | size_t osize; |
178 | int underscore_special = *(int*)xd; | 178 | char *specials = xd; |
179 | 179 | ||
180 | switch (cmd) | 180 | switch (cmd) |
181 | { | 181 | { |
... | @@ -203,7 +203,7 @@ _qp_encoder (void *xd, | ... | @@ -203,7 +203,7 @@ _qp_encoder (void *xd, |
203 | 203 | ||
204 | /* candidate byte to convert */ | 204 | /* candidate byte to convert */ |
205 | c = *(unsigned char*) iptr; | 205 | c = *(unsigned char*) iptr; |
206 | if (underscore_special && c == '_') | 206 | if (specials && strchr (specials, c)) |
207 | simple_char = 0; | 207 | simple_char = 0; |
208 | else | 208 | else |
209 | simple_char = (c >= 32 && c <= 60) | 209 | simple_char = (c >= 32 && c <= 60) |
... | @@ -216,7 +216,7 @@ _qp_encoder (void *xd, | ... | @@ -216,7 +216,7 @@ _qp_encoder (void *xd, |
216 | /* a non-quoted character uses up one byte */ | 216 | /* a non-quoted character uses up one byte */ |
217 | if (nbytes + 1 > osize) | 217 | if (nbytes + 1 > osize) |
218 | break; | 218 | break; |
219 | if (underscore_special && c == ' ') | 219 | if (c == ' ' && specials && strchr (specials, '_')) |
220 | *optr++ = '_'; | 220 | *optr++ = '_'; |
221 | else | 221 | else |
222 | *optr++ = c; | 222 | *optr++ = c; |
... | @@ -248,20 +248,9 @@ _qp_encoder (void *xd, | ... | @@ -248,20 +248,9 @@ _qp_encoder (void *xd, |
248 | return mu_filter_ok; | 248 | return mu_filter_ok; |
249 | } | 249 | } |
250 | 250 | ||
251 | static int | ||
252 | alloc_qp (void **pret, int mode MU_ARG_UNUSED, int argc, const char **argv) | ||
253 | { | ||
254 | int *x = malloc (sizeof (*x)); | ||
255 | if (!x) | ||
256 | return ENOMEM; | ||
257 | *x = 0; | ||
258 | *pret = x; | ||
259 | return 0; | ||
260 | } | ||
261 | |||
262 | static struct _mu_filter_record _qp_filter = { | 251 | static struct _mu_filter_record _qp_filter = { |
263 | "quoted-printable", | 252 | "quoted-printable", |
264 | alloc_qp, | 253 | NULL, |
265 | _qp_encoder, | 254 | _qp_encoder, |
266 | _qp_decoder | 255 | _qp_decoder |
267 | }; | 256 | }; |
... | @@ -271,10 +260,7 @@ mu_filter_record_t mu_qp_filter = &_qp_filter; | ... | @@ -271,10 +260,7 @@ mu_filter_record_t mu_qp_filter = &_qp_filter; |
271 | static int | 260 | static int |
272 | alloc_Q (void **pret, int mode MU_ARG_UNUSED, int argc, const char **argv) | 261 | alloc_Q (void **pret, int mode MU_ARG_UNUSED, int argc, const char **argv) |
273 | { | 262 | { |
274 | int *x = malloc (sizeof (*x)); | 263 | char *x = strdup ("_?"); |
275 | if (!x) | ||
276 | return ENOMEM; | ||
277 | *x = 1; | ||
278 | *pret = x; | 264 | *pret = x; |
279 | return 0; | 265 | return 0; |
280 | } | 266 | } | ... | ... |
... | @@ -20,7 +20,7 @@ dnl | ... | @@ -20,7 +20,7 @@ dnl |
20 | m4_pushdef([TESTENC2047],[ | 20 | m4_pushdef([TESTENC2047],[ |
21 | m4_pushdef([MU_TEST_GROUP],[Encode 2047]) | 21 | m4_pushdef([MU_TEST_GROUP],[Encode 2047]) |
22 | m4_pushdef([MU_TEST_KEYWORDS],[encode2047 encode]) | 22 | m4_pushdef([MU_TEST_KEYWORDS],[encode2047 encode]) |
23 | m4_pushdef([MU_TEST_COMMAND],[encode2047 -eB $3]) | 23 | m4_pushdef([MU_TEST_COMMAND],[encode2047 $3]) |
24 | MU_GENERIC_TEST([$1],[$2],[$4],[],[$5 | 24 | MU_GENERIC_TEST([$1],[$2],[$4],[],[$5 |
25 | ]) | 25 | ]) |
26 | m4_popdef([MU_TEST_COMMAND]) | 26 | m4_popdef([MU_TEST_COMMAND]) |
... | @@ -29,25 +29,40 @@ m4_popdef([MU_TEST_GROUP]) | ... | @@ -29,25 +29,40 @@ m4_popdef([MU_TEST_GROUP]) |
29 | ]) | 29 | ]) |
30 | 30 | ||
31 | TESTENC2047([8-bit input],[enc01], | 31 | TESTENC2047([8-bit input],[enc01], |
32 | [-c koi8-r -o], | 32 | [-eB -c koi8-r -o], |
33 | [\\345\326\305\304\316\305\327\316\331\312\040\317\324\336\305\324], | 33 | [\345\326\305\304\316\305\327\316\331\312\040\317\324\336\305\324], |
34 | [=?koi8-r?B?5dbFxM7F187ZyiDP1N7F1A==?=]) | 34 | [=?koi8-r?B?5dbFxM7F187ZyiDP1N7F1A==?=]) |
35 | 35 | ||
36 | TESTENC2047([padding 1],[enc02], | 36 | TESTENC2047([padding 1],[enc02], |
37 | [], | 37 | [-eB], |
38 | [abcd], | 38 | [abcd], |
39 | [=?iso-8859-1?B?YWJjZA==?=]) | 39 | [=?iso-8859-1?B?YWJjZA==?=]) |
40 | 40 | ||
41 | TESTENC2047([padding 2],[enc03], | 41 | TESTENC2047([padding 2],[enc03], |
42 | [], | 42 | [-eB], |
43 | [abcdef], | 43 | [abcdef], |
44 | [=?iso-8859-1?B?YWJjZGVm?=]) | 44 | [=?iso-8859-1?B?YWJjZGVm?=]) |
45 | 45 | ||
46 | TESTENC2047([padding 3],[enc04], | 46 | TESTENC2047([padding 3],[enc04], |
47 | [-cUTF-8], | 47 | [-eB -cUTF-8], |
48 | [Wichtige Mitteilung zur Schaltung Ihres Anschlusses], | 48 | [Wichtige Mitteilung zur Schaltung Ihres Anschlusses], |
49 | [=?UTF-8?B?V2ljaHRpZ2UgTWl0dGVpbHVuZyB6dXIgU2NoYWx0dW5nIElocmVzIEFuc2NobHVzc2Vz?=]) | 49 | [=?UTF-8?B?V2ljaHRpZ2UgTWl0dGVpbHVuZyB6dXIgU2NoYWx0dW5nIElocmVzIEFuc2NobHV?= |
50 | 50 | =?UTF-8?B?zc2Vz?=]) | |
51 | |||
52 | TESTENC2047([specials],[enc05], | ||
53 | [-eQ], | ||
54 | [_?=], | ||
55 | [=?iso-8859-1?Q?=5F=3F=3D?=]) | ||
56 | |||
57 | TESTENC2047([length limit],[enc06], | ||
58 | [-cUTF-8 -eQ], | ||
59 | [J'interdis aux marchands de vanter trop leur marchandises. Car ils se font vite pédagogues et t'enseignent comme but ce qui n'est par essence qu'un moyen, et te trompant ainsi sur la route à suivre les voilà bientôt qui te dégradent, car si leur musique est vulgaire ils te fabriquent pour te la vendre une âme vulgaire.], | ||
60 | [=?UTF-8?Q?J'interdis_aux_marchands_de_vanter_trop_leur_marchandises._Car_?= | ||
61 | =?UTF-8?Q?ils_se_font_vite_p=C3=A9dagogues_et_t'enseignent_comme_but_ce_q?= | ||
62 | =?UTF-8?Q?ui_n'est_par_essence_qu'un_moyen,_et_te_trompant_ainsi_sur_la_r?= | ||
63 | =?UTF-8?Q?oute_=C3=A0_suivre_les_voil=C3=A0_bient=C3=B4t_qui_te_d=C3=A9gr?= | ||
64 | =?UTF-8?Q?adent,_car_si_leur_musique_est_vulgaire_ils_te_fabriquent_pour_?= | ||
65 | =?UTF-8?Q?te_la_vendre_une_=C3=A2me_vulgaire.?=]) | ||
51 | 66 | ||
52 | m4_popdef([TESTENC2047]) | 67 | m4_popdef([TESTENC2047]) |
53 | 68 | ... | ... |
... | @@ -104,24 +104,23 @@ decode_octal (char *buf) | ... | @@ -104,24 +104,23 @@ decode_octal (char *buf) |
104 | int | 104 | int |
105 | main (int argc, char *argv[]) | 105 | main (int argc, char *argv[]) |
106 | { | 106 | { |
107 | int c; | 107 | int rc; |
108 | char buf[256]; | 108 | char *buf = NULL; |
109 | char vbuf[256]; | 109 | size_t size = 0; |
110 | char *charset = strdup ("iso-8859-1"); | 110 | size_t n; |
111 | char *encoding = strdup ("quoted-printable"); | 111 | char *charset = "iso-8859-1"; |
112 | char *encoding = "quoted-printable"; | ||
112 | int octal = 0; | 113 | int octal = 0; |
113 | 114 | ||
114 | while ((c = getopt (argc, argv, "c:e:hot")) != EOF) | 115 | while ((rc = getopt (argc, argv, "c:e:hot")) != EOF) |
115 | switch (c) | 116 | switch (rc) |
116 | { | 117 | { |
117 | case 'c': | 118 | case 'c': |
118 | free (charset); | 119 | charset = optarg; |
119 | charset = strdup (optarg); | ||
120 | break; | 120 | break; |
121 | 121 | ||
122 | case 'e': | 122 | case 'e': |
123 | free (encoding); | 123 | encoding = optarg; |
124 | encoding = strdup (optarg); | ||
125 | break; | 124 | break; |
126 | 125 | ||
127 | case 'o': | 126 | case 'o': |
... | @@ -140,67 +139,23 @@ main (int argc, char *argv[]) | ... | @@ -140,67 +139,23 @@ main (int argc, char *argv[]) |
140 | exit (1); | 139 | exit (1); |
141 | } | 140 | } |
142 | 141 | ||
143 | while (fgets (buf, sizeof (buf), stdin)) | 142 | mu_stdstream_setup (MU_STDSTREAM_RESET_NONE); |
143 | while ((rc = mu_stream_getline (mu_strin, &buf, &size, &n)) == 0 && n > 0) | ||
144 | { | 144 | { |
145 | int len; | 145 | char *p; |
146 | char *p = NULL; | 146 | |
147 | char *cmd; | 147 | mu_rtrim_class (buf, MU_CTYPE_ENDLN); |
148 | int rc; | ||
149 | |||
150 | len = strlen (buf); | ||
151 | if (len > 0 && buf[len - 1] == '\n') | ||
152 | buf[len - 1] = 0; | ||
153 | strncpy(vbuf, buf, sizeof vbuf); | ||
154 | cmd = vbuf; | ||
155 | if (cmd[0] == '\\') | ||
156 | { | ||
157 | if (cmd[1] == 0) | ||
158 | { | ||
159 | fprintf (stderr, "Unfinished command\n"); | ||
160 | continue; | ||
161 | } | ||
162 | |||
163 | for (p = cmd + 2; *p && *p == ' '; p++) | ||
164 | ; | ||
165 | switch (cmd[1]) | ||
166 | { | ||
167 | case 'c': | ||
168 | free (charset); | ||
169 | charset = strdup (p); | ||
170 | continue; | ||
171 | |||
172 | case 'e': | ||
173 | free (encoding); | ||
174 | encoding = strdup (p); | ||
175 | continue; | ||
176 | |||
177 | case 'o': | ||
178 | octal = 1; | ||
179 | continue; | ||
180 | |||
181 | case 't': | ||
182 | octal = 0; | ||
183 | continue; | ||
184 | |||
185 | case '\\': | ||
186 | cmd++; | ||
187 | break; | ||
188 | |||
189 | default: | ||
190 | fprintf (stderr, "Unknown command\n"); | ||
191 | continue; | ||
192 | } | ||
193 | } | ||
194 | |||
195 | if (octal) | 148 | if (octal) |
196 | decode_octal (cmd); | 149 | decode_octal (buf); |
197 | 150 | ||
198 | rc = mu_rfc2047_encode (charset, encoding, cmd, &p); | 151 | rc = mu_rfc2047_encode (charset, encoding, buf, &p); |
199 | if (rc) | 152 | if (rc) |
200 | fprintf (stderr, "%s", mu_strerror (rc)); | 153 | mu_diag_funcall (MU_DIAG_ERROR, "mu_rfc2047_encode", NULL, rc); |
201 | else if (p) | 154 | else if (p) |
202 | printf ("%s\n", p); | 155 | mu_printf ("%s\n", p); |
203 | free (p); | 156 | free (p); |
204 | } | 157 | } |
205 | return 0; | 158 | if (rc) |
159 | mu_diag_funcall (MU_DIAG_ERROR, "mu_stream_getline", NULL, rc); | ||
160 | return 0; | ||
206 | } | 161 | } | ... | ... |
-
Please register or sign in to post a comment