Commit 480f7d0b 480f7d0b369cc32ca30df6f10ed16c7308beab63 by Sergey Poznyakoff

'Q' encoding: encode question mark properly; limit length of encoded words

* libmailutils/base/rfc2047.c (mu_rfc2047_encode): Limit length
of encoded word to 75 bytes.
* libmailutils/filter/qpflt.c: Treat '?' as special character in
Q encoder.
* libmailutils/tests/encode2047.at: Add more tests.
* libmailutils/tests/encode2047.c: Use mailutils string I/O
1 parent afaecb9b
...@@ -286,17 +286,23 @@ mu_rfc2047_decode (const char *tocode, const char *input, char **ptostr) ...@@ -286,17 +286,23 @@ mu_rfc2047_decode (const char *tocode, const char *input, char **ptostr)
286 286
287 @return 0 on success 287 @return 0 on success
288 */ 288 */
289
290 #define MAX_ENCODED_WORD 75
291
289 int 292 int
290 mu_rfc2047_encode (const char *charset, const char *encoding, 293 mu_rfc2047_encode (const char *charset, const char *encoding,
291 const char *text, char **result) 294 const char *text, char **result)
292 { 295 {
293 mu_stream_t input_stream; 296 mu_stream_t input_stream;
294 mu_stream_t output_stream; 297 mu_stream_t inter_stream;
295 int rc; 298 int rc;
296 299
297 if (charset == NULL || encoding == NULL || text == NULL) 300 if (charset == NULL || encoding == NULL || text == NULL)
298 return EINVAL; 301 return EINVAL;
299 302
303 if (strlen (charset) > MAX_ENCODED_WORD - 8)
304 return EINVAL;
305
300 if (strcmp (encoding, "base64") == 0) 306 if (strcmp (encoding, "base64") == 0)
301 encoding = "B"; 307 encoding = "B";
302 else if (strcmp (encoding, "quoted-printable") == 0) 308 else if (strcmp (encoding, "quoted-printable") == 0)
...@@ -304,42 +310,67 @@ mu_rfc2047_encode (const char *charset, const char *encoding, ...@@ -304,42 +310,67 @@ mu_rfc2047_encode (const char *charset, const char *encoding,
304 else if (encoding[1] || !strchr ("BQ", encoding[0])) 310 else if (encoding[1] || !strchr ("BQ", encoding[0]))
305 return MU_ERR_BAD_2047_ENCODING; 311 return MU_ERR_BAD_2047_ENCODING;
306 312
313
307 rc = mu_static_memory_stream_create (&input_stream, text, strlen (text)); 314 rc = mu_static_memory_stream_create (&input_stream, text, strlen (text));
308 if (rc) 315 if (rc)
309 return rc; 316 return rc;
310 rc = mu_filter_create (&output_stream, input_stream, 317 rc = mu_filter_create (&inter_stream, input_stream,
311 encoding, MU_FILTER_ENCODE, MU_STREAM_READ); 318 encoding, MU_FILTER_ENCODE, MU_STREAM_READ);
312 mu_stream_unref (input_stream); 319 mu_stream_unref (input_stream);
313 if (rc == 0) 320 if (rc == 0)
314 { 321 {
315 /* Assume strlen(qp_encoded_text) <= strlen(text) * 3 */ 322 mu_stream_t output_stream;
316 /* malloced length is composed of: 323 rc = mu_memory_stream_create (&output_stream, MU_STREAM_RDWR);
317 "=?" 324 if (rc == 0)
318 charset
319 "?"
320 B or Q
321 "?"
322 encoded_text
323 "?="
324 zero terminator */
325
326 *result = malloc (2 + strlen (charset) + 3 + strlen (text) * 3 + 3);
327 if (*result)
328 { 325 {
329 char *p = *result; 326 char buf[MAX_ENCODED_WORD];
330 size_t s; 327 size_t start, bs, n;
331 328
332 p += sprintf (p, "=?%s?%s?", charset, encoding); 329 start = snprintf (buf, sizeof buf, "=?%s?%s?", charset, encoding);
330 bs = sizeof buf - start - 2;
333 331
334 rc = mu_stream_read (output_stream, 332 while (1)
335 p, 333 {
336 strlen (text) * 3, &s); 334 rc = mu_stream_read (inter_stream, buf + start, bs, &n);
335 if (rc || n == 0)
336 break;
337 rc = mu_stream_write (output_stream, buf, n + start, NULL);
338 if (rc)
339 break;
340 rc = mu_stream_write (output_stream, "?=", 2, NULL);
341 if (rc)
342 break;
343 if (n == bs)
344 rc = mu_stream_write (output_stream, "\n ", 2, NULL);
345 else
346 break;
347 }
348
349 if (rc == 0)
350 {
351 mu_off_t sz;
352 char *ptr;
353
354 mu_stream_size (output_stream, &sz);
355 ptr = malloc (sz + 1);
356 if (!ptr)
357 rc = ENOMEM;
358 else
359 {
360 if ((rc = mu_stream_seek (output_stream, 0, MU_SEEK_SET,
361 NULL)) == 0
362 && (rc = mu_stream_read (output_stream, ptr, sz, NULL))
363 == 0)
364 {
365 ptr[sz] = 0;
366 *result = ptr;
367 }
368 }
369 }
337 370
338 strcpy (p + s, "?="); 371 mu_stream_destroy (&output_stream);
339 } 372 }
340 else 373 mu_stream_destroy (&inter_stream);
341 rc = ENOMEM;
342 mu_stream_destroy (&output_stream);
343 } 374 }
344 else 375 else
345 mu_stream_destroy (&input_stream); 376 mu_stream_destroy (&input_stream);
......
...@@ -37,7 +37,7 @@ _qp_decoder (void *xd, ...@@ -37,7 +37,7 @@ _qp_decoder (void *xd,
37 size_t isize; 37 size_t isize;
38 char *optr; 38 char *optr;
39 size_t osize; 39 size_t osize;
40 int underscore_special = *(int*)xd; 40 char *specials = xd;
41 41
42 switch (cmd) 42 switch (cmd)
43 { 43 {
...@@ -143,7 +143,7 @@ _qp_decoder (void *xd, ...@@ -143,7 +143,7 @@ _qp_decoder (void *xd,
143 consumed += 2; 143 consumed += 2;
144 } 144 }
145 } 145 }
146 else if (underscore_special && c == '_') 146 else if (c == '_' && specials && strchr (specials, c))
147 { 147 {
148 *optr++ = ' '; 148 *optr++ = ' ';
149 nbytes++; 149 nbytes++;
...@@ -175,7 +175,7 @@ _qp_encoder (void *xd, ...@@ -175,7 +175,7 @@ _qp_encoder (void *xd,
175 size_t isize; 175 size_t isize;
176 char *optr; 176 char *optr;
177 size_t osize; 177 size_t osize;
178 int underscore_special = *(int*)xd; 178 char *specials = xd;
179 179
180 switch (cmd) 180 switch (cmd)
181 { 181 {
...@@ -203,7 +203,7 @@ _qp_encoder (void *xd, ...@@ -203,7 +203,7 @@ _qp_encoder (void *xd,
203 203
204 /* candidate byte to convert */ 204 /* candidate byte to convert */
205 c = *(unsigned char*) iptr; 205 c = *(unsigned char*) iptr;
206 if (underscore_special && c == '_') 206 if (specials && strchr (specials, c))
207 simple_char = 0; 207 simple_char = 0;
208 else 208 else
209 simple_char = (c >= 32 && c <= 60) 209 simple_char = (c >= 32 && c <= 60)
...@@ -216,7 +216,7 @@ _qp_encoder (void *xd, ...@@ -216,7 +216,7 @@ _qp_encoder (void *xd,
216 /* a non-quoted character uses up one byte */ 216 /* a non-quoted character uses up one byte */
217 if (nbytes + 1 > osize) 217 if (nbytes + 1 > osize)
218 break; 218 break;
219 if (underscore_special && c == ' ') 219 if (c == ' ' && specials && strchr (specials, '_'))
220 *optr++ = '_'; 220 *optr++ = '_';
221 else 221 else
222 *optr++ = c; 222 *optr++ = c;
...@@ -248,20 +248,9 @@ _qp_encoder (void *xd, ...@@ -248,20 +248,9 @@ _qp_encoder (void *xd,
248 return mu_filter_ok; 248 return mu_filter_ok;
249 } 249 }
250 250
251 static int
252 alloc_qp (void **pret, int mode MU_ARG_UNUSED, int argc, const char **argv)
253 {
254 int *x = malloc (sizeof (*x));
255 if (!x)
256 return ENOMEM;
257 *x = 0;
258 *pret = x;
259 return 0;
260 }
261
262 static struct _mu_filter_record _qp_filter = { 251 static struct _mu_filter_record _qp_filter = {
263 "quoted-printable", 252 "quoted-printable",
264 alloc_qp, 253 NULL,
265 _qp_encoder, 254 _qp_encoder,
266 _qp_decoder 255 _qp_decoder
267 }; 256 };
...@@ -271,10 +260,7 @@ mu_filter_record_t mu_qp_filter = &_qp_filter; ...@@ -271,10 +260,7 @@ mu_filter_record_t mu_qp_filter = &_qp_filter;
271 static int 260 static int
272 alloc_Q (void **pret, int mode MU_ARG_UNUSED, int argc, const char **argv) 261 alloc_Q (void **pret, int mode MU_ARG_UNUSED, int argc, const char **argv)
273 { 262 {
274 int *x = malloc (sizeof (*x)); 263 char *x = strdup ("_?");
275 if (!x)
276 return ENOMEM;
277 *x = 1;
278 *pret = x; 264 *pret = x;
279 return 0; 265 return 0;
280 } 266 }
......
...@@ -20,7 +20,7 @@ dnl ...@@ -20,7 +20,7 @@ dnl
20 m4_pushdef([TESTENC2047],[ 20 m4_pushdef([TESTENC2047],[
21 m4_pushdef([MU_TEST_GROUP],[Encode 2047]) 21 m4_pushdef([MU_TEST_GROUP],[Encode 2047])
22 m4_pushdef([MU_TEST_KEYWORDS],[encode2047 encode]) 22 m4_pushdef([MU_TEST_KEYWORDS],[encode2047 encode])
23 m4_pushdef([MU_TEST_COMMAND],[encode2047 -eB $3]) 23 m4_pushdef([MU_TEST_COMMAND],[encode2047 $3])
24 MU_GENERIC_TEST([$1],[$2],[$4],[],[$5 24 MU_GENERIC_TEST([$1],[$2],[$4],[],[$5
25 ]) 25 ])
26 m4_popdef([MU_TEST_COMMAND]) 26 m4_popdef([MU_TEST_COMMAND])
...@@ -29,25 +29,40 @@ m4_popdef([MU_TEST_GROUP]) ...@@ -29,25 +29,40 @@ m4_popdef([MU_TEST_GROUP])
29 ]) 29 ])
30 30
31 TESTENC2047([8-bit input],[enc01], 31 TESTENC2047([8-bit input],[enc01],
32 [-c koi8-r -o], 32 [-eB -c koi8-r -o],
33 [\\345\326\305\304\316\305\327\316\331\312\040\317\324\336\305\324], 33 [\345\326\305\304\316\305\327\316\331\312\040\317\324\336\305\324],
34 [=?koi8-r?B?5dbFxM7F187ZyiDP1N7F1A==?=]) 34 [=?koi8-r?B?5dbFxM7F187ZyiDP1N7F1A==?=])
35 35
36 TESTENC2047([padding 1],[enc02], 36 TESTENC2047([padding 1],[enc02],
37 [], 37 [-eB],
38 [abcd], 38 [abcd],
39 [=?iso-8859-1?B?YWJjZA==?=]) 39 [=?iso-8859-1?B?YWJjZA==?=])
40 40
41 TESTENC2047([padding 2],[enc03], 41 TESTENC2047([padding 2],[enc03],
42 [], 42 [-eB],
43 [abcdef], 43 [abcdef],
44 [=?iso-8859-1?B?YWJjZGVm?=]) 44 [=?iso-8859-1?B?YWJjZGVm?=])
45 45
46 TESTENC2047([padding 3],[enc04], 46 TESTENC2047([padding 3],[enc04],
47 [-cUTF-8], 47 [-eB -cUTF-8],
48 [Wichtige Mitteilung zur Schaltung Ihres Anschlusses], 48 [Wichtige Mitteilung zur Schaltung Ihres Anschlusses],
49 [=?UTF-8?B?V2ljaHRpZ2UgTWl0dGVpbHVuZyB6dXIgU2NoYWx0dW5nIElocmVzIEFuc2NobHVzc2Vz?=]) 49 [=?UTF-8?B?V2ljaHRpZ2UgTWl0dGVpbHVuZyB6dXIgU2NoYWx0dW5nIElocmVzIEFuc2NobHV?=
50 50 =?UTF-8?B?zc2Vz?=])
51
52 TESTENC2047([specials],[enc05],
53 [-eQ],
54 [_?=],
55 [=?iso-8859-1?Q?=5F=3F=3D?=])
56
57 TESTENC2047([length limit],[enc06],
58 [-cUTF-8 -eQ],
59 [J'interdis aux marchands de vanter trop leur marchandises. Car ils se font vite pédagogues et t'enseignent comme but ce qui n'est par essence qu'un moyen, et te trompant ainsi sur la route à suivre les voilà bientôt qui te dégradent, car si leur musique est vulgaire ils te fabriquent pour te la vendre une âme vulgaire.],
60 [=?UTF-8?Q?J'interdis_aux_marchands_de_vanter_trop_leur_marchandises._Car_?=
61 =?UTF-8?Q?ils_se_font_vite_p=C3=A9dagogues_et_t'enseignent_comme_but_ce_q?=
62 =?UTF-8?Q?ui_n'est_par_essence_qu'un_moyen,_et_te_trompant_ainsi_sur_la_r?=
63 =?UTF-8?Q?oute_=C3=A0_suivre_les_voil=C3=A0_bient=C3=B4t_qui_te_d=C3=A9gr?=
64 =?UTF-8?Q?adent,_car_si_leur_musique_est_vulgaire_ils_te_fabriquent_pour_?=
65 =?UTF-8?Q?te_la_vendre_une_=C3=A2me_vulgaire.?=])
51 66
52 m4_popdef([TESTENC2047]) 67 m4_popdef([TESTENC2047])
53 68
......
...@@ -104,24 +104,23 @@ decode_octal (char *buf) ...@@ -104,24 +104,23 @@ decode_octal (char *buf)
104 int 104 int
105 main (int argc, char *argv[]) 105 main (int argc, char *argv[])
106 { 106 {
107 int c; 107 int rc;
108 char buf[256]; 108 char *buf = NULL;
109 char vbuf[256]; 109 size_t size = 0;
110 char *charset = strdup ("iso-8859-1"); 110 size_t n;
111 char *encoding = strdup ("quoted-printable"); 111 char *charset = "iso-8859-1";
112 char *encoding = "quoted-printable";
112 int octal = 0; 113 int octal = 0;
113 114
114 while ((c = getopt (argc, argv, "c:e:hot")) != EOF) 115 while ((rc = getopt (argc, argv, "c:e:hot")) != EOF)
115 switch (c) 116 switch (rc)
116 { 117 {
117 case 'c': 118 case 'c':
118 free (charset); 119 charset = optarg;
119 charset = strdup (optarg);
120 break; 120 break;
121 121
122 case 'e': 122 case 'e':
123 free (encoding); 123 encoding = optarg;
124 encoding = strdup (optarg);
125 break; 124 break;
126 125
127 case 'o': 126 case 'o':
...@@ -140,67 +139,23 @@ main (int argc, char *argv[]) ...@@ -140,67 +139,23 @@ main (int argc, char *argv[])
140 exit (1); 139 exit (1);
141 } 140 }
142 141
143 while (fgets (buf, sizeof (buf), stdin)) 142 mu_stdstream_setup (MU_STDSTREAM_RESET_NONE);
143 while ((rc = mu_stream_getline (mu_strin, &buf, &size, &n)) == 0 && n > 0)
144 { 144 {
145 int len; 145 char *p;
146 char *p = NULL; 146
147 char *cmd; 147 mu_rtrim_class (buf, MU_CTYPE_ENDLN);
148 int rc;
149
150 len = strlen (buf);
151 if (len > 0 && buf[len - 1] == '\n')
152 buf[len - 1] = 0;
153 strncpy(vbuf, buf, sizeof vbuf);
154 cmd = vbuf;
155 if (cmd[0] == '\\')
156 {
157 if (cmd[1] == 0)
158 {
159 fprintf (stderr, "Unfinished command\n");
160 continue;
161 }
162
163 for (p = cmd + 2; *p && *p == ' '; p++)
164 ;
165 switch (cmd[1])
166 {
167 case 'c':
168 free (charset);
169 charset = strdup (p);
170 continue;
171
172 case 'e':
173 free (encoding);
174 encoding = strdup (p);
175 continue;
176
177 case 'o':
178 octal = 1;
179 continue;
180
181 case 't':
182 octal = 0;
183 continue;
184
185 case '\\':
186 cmd++;
187 break;
188
189 default:
190 fprintf (stderr, "Unknown command\n");
191 continue;
192 }
193 }
194
195 if (octal) 148 if (octal)
196 decode_octal (cmd); 149 decode_octal (buf);
197 150
198 rc = mu_rfc2047_encode (charset, encoding, cmd, &p); 151 rc = mu_rfc2047_encode (charset, encoding, buf, &p);
199 if (rc) 152 if (rc)
200 fprintf (stderr, "%s", mu_strerror (rc)); 153 mu_diag_funcall (MU_DIAG_ERROR, "mu_rfc2047_encode", NULL, rc);
201 else if (p) 154 else if (p)
202 printf ("%s\n", p); 155 mu_printf ("%s\n", p);
203 free (p); 156 free (p);
204 } 157 }
205 return 0; 158 if (rc)
159 mu_diag_funcall (MU_DIAG_ERROR, "mu_stream_getline", NULL, rc);
160 return 0;
206 } 161 }
......