Commit f9b47847 f9b4784732c9ab114ea1e16b617b0bd9b7d6b81d by Sergey Poznyakoff

(argcv_scan,xtonum): Improved parser

(argcv_unescape_char): Renamed to argcv_unquote_char
(argcv_escape_char): Renamed to argcv_quote_char.
(argcv_quoted_length,argcv_quote_copy): New functions
(argcv_unquote_copy): (ex. unescape_copy): Fixed handling of
escaped sequences.
(argcv_get,argcv_free,argcv_string): Standardized return value.
1 parent 7061bd30
...@@ -16,8 +16,12 @@ ...@@ -16,8 +16,12 @@
16 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 16 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 Boston, MA 02110-1301 USA */ 17 Boston, MA 02110-1301 USA */
18 18
19 #include <ctype.h> 19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
20 22
23 #include <ctype.h>
24 #include <errno.h>
21 #include <mailutils/argcv.h> 25 #include <mailutils/argcv.h>
22 26
23 /* 27 /*
...@@ -29,14 +33,15 @@ ...@@ -29,14 +33,15 @@
29 */ 33 */
30 34
31 #define isws(c) ((c)==' '||(c)=='\t'||(c)=='\n') 35 #define isws(c) ((c)==' '||(c)=='\t'||(c)=='\n')
32 #define isdelim(c,delim) ((c)=='"'||strchr(delim,(c))!=NULL) 36 #define isdelim(c,delim) (strchr(delim,(c))!=NULL)
33 37
34 static int 38 static int
35 argcv_scan (int len, const char *command, const char *delim, const char* cmnt, 39 argcv_scan (int len, const char *command, const char *delim, const char* cmnt,
36 int *start, int *end, int *save) 40 int *start, int *end, int *save)
37 { 41 {
38 int i = 0; 42 int i = 0;
39 43 int expect_delim;
44
40 for (;;) 45 for (;;)
41 { 46 {
42 i = *save; 47 i = *save;
...@@ -64,9 +69,27 @@ argcv_scan (int len, const char *command, const char *delim, const char* cmnt, ...@@ -64,9 +69,27 @@ argcv_scan (int len, const char *command, const char *delim, const char* cmnt,
64 break; 69 break;
65 /* Skip until next whitespace character or end of line. Honor 70 /* Skip until next whitespace character or end of line. Honor
66 escaped whitespace. */ 71 escaped whitespace. */
67 while (++i < len && 72 expect_delim = 0;
68 !((isws (command[i]) && command[i-1] != '\\') 73 while (++i < len)
69 || isdelim (command[i], delim))); 74 {
75 if (expect_delim)
76 {
77 if (command[i-1] != '\\' && command[i] == expect_delim)
78 expect_delim = 0;
79 else
80 continue;
81 }
82
83 if (command[i-1] != '\\')
84 {
85 if (command[i] == '\'' || command[i] == '"')
86 expect_delim = command[i];
87 else if (isws (command[i]) || isdelim (command[i], delim))
88 break;
89 }
90 else
91 i++; /* skip the escaped character */
92 }
70 i--; 93 i--;
71 break; 94 break;
72 } 95 }
...@@ -93,14 +116,14 @@ argcv_scan (int len, const char *command, const char *delim, const char* cmnt, ...@@ -93,14 +116,14 @@ argcv_scan (int len, const char *command, const char *delim, const char* cmnt,
93 return *save; 116 return *save;
94 } 117 }
95 118
96 static char escape_transtab[] = "\\\\a\ab\bf\fn\nr\rt\t"; 119 static char quote_transtab[] = "\\\\a\ab\bf\fn\nr\rt\t";
97 120
98 int 121 int
99 argcv_unescape_char (int c) 122 argcv_unquote_char (int c)
100 { 123 {
101 char *p; 124 char *p;
102 125
103 for (p = escape_transtab; *p; p += 2) 126 for (p = quote_transtab; *p; p += 2)
104 { 127 {
105 if (*p == c) 128 if (*p == c)
106 return p[1]; 129 return p[1];
...@@ -109,12 +132,12 @@ argcv_unescape_char (int c) ...@@ -109,12 +132,12 @@ argcv_unescape_char (int c)
109 } 132 }
110 133
111 int 134 int
112 argcv_escape_char (int c) 135 argcv_quote_char (int c)
113 { 136 {
114 char *p; 137 char *p;
115 138
116 for (p = escape_transtab + sizeof(escape_transtab) - 2; 139 for (p = quote_transtab + sizeof(quote_transtab) - 2;
117 p > escape_transtab; p -= 2) 140 p > quote_transtab; p -= 2)
118 { 141 {
119 if (*p == c) 142 if (*p == c)
120 return p[-1]; 143 return p[-1];
...@@ -122,26 +145,31 @@ argcv_escape_char (int c) ...@@ -122,26 +145,31 @@ argcv_escape_char (int c)
122 return -1; 145 return -1;
123 } 146 }
124 147
148 #define to_num(c) \
149 (isdigit(c) ? c - '0' : (isxdigit(c) ? toupper(c) - 'A' + 10 : 255 ))
125 150
126 static int 151 static int
127 xtonum (const char *src, int base, size_t cnt) 152 xtonum (int *pval, const char *src, int base, int cnt)
128 { 153 {
129 int val; 154 int i, val;
130 char *p;
131 char tmp[4]; /* At most three characters + zero */
132 155
133 /* Notice: No use to check `cnt'. It should be either 2 or 3 */ 156 for (i = 0, val = 0; i < cnt; i++, src++)
134 memcpy (tmp, src, cnt); 157 {
135 tmp[cnt] = 0; 158 int n = *(unsigned char*)src;
136 val = strtoul (tmp, &p, base); 159 if (n > 127 || (n = to_num(n)) >= base)
137 return (*p == 0) ? val : -1; 160 break;
161 val = val*base + n;
162 }
163 *pval = val;
164 return i;
138 } 165 }
139 166
140 static size_t 167 size_t
141 escaped_length (const char *str, int *quote) 168 argcv_quoted_length (const char *str, int *quote)
142 { 169 {
143 size_t len = 0; 170 size_t len = 0;
144 171
172 *quote = 0;
145 for (; *str; str++) 173 for (; *str; str++)
146 { 174 {
147 if (*str == ' ') 175 if (*str == ' ')
...@@ -154,9 +182,9 @@ escaped_length (const char *str, int *quote) ...@@ -154,9 +182,9 @@ escaped_length (const char *str, int *quote)
154 len += 2; 182 len += 2;
155 *quote = 1; 183 *quote = 1;
156 } 184 }
157 else if (isprint (*str)) 185 else if (*str != '\t' && *str != '\\' && isprint (*str))
158 len++; 186 len++;
159 else if (argcv_escape_char (*str) != -1) 187 else if (argcv_quote_char (*str) != -1)
160 len += 2; 188 len += 2;
161 else 189 else
162 len += 4; 190 len += 4;
...@@ -164,84 +192,85 @@ escaped_length (const char *str, int *quote) ...@@ -164,84 +192,85 @@ escaped_length (const char *str, int *quote)
164 return len; 192 return len;
165 } 193 }
166 194
167 static void 195 void
168 unescape_copy (char *dst, const char *src, size_t n) 196 argcv_unquote_copy (char *dst, const char *src, size_t n)
169 { 197 {
198 int i;
170 int c; 199 int c;
200 int expect_delim = 0;
171 201
172 while (n > 0) 202 for (i = 0; i < n; )
173 { 203 {
174 n--; 204 switch (src[i])
175 if (*src == '\\')
176 { 205 {
177 switch (*++src) 206 case '\'':
207 case '"':
208 ++i;
209 if (expect_delim)
210 expect_delim = 0;
211 else
212 expect_delim = src[i];
213 break;
214
215 case '\\':
216 ++i;
217 if (src[i] == 'x' || src[i] == 'X')
178 { 218 {
179 case 'x': 219 if (n - i < 2)
180 case 'X':
181 ++src;
182 --n;
183 if (n == 0)
184 { 220 {
185 *dst++ = '\\'; 221 *dst++ = '\\';
186 *dst++ = src[-1]; 222 *dst++ = src[i++];
187 } 223 }
188 else 224 else
189 { 225 {
190 c = xtonum(src, 16, 2); 226 int off = xtonum(&c, src + i + 1, 16, 2);
191 if (c == -1) 227 if (off == 0)
192 { 228 {
193 *dst++ = '\\'; 229 *dst++ = '\\';
194 *dst++ = src[-1]; 230 *dst++ = src[i++];
195 } 231 }
196 else 232 else
197 { 233 {
198 *dst++ = c; 234 *dst++ = c;
199 src += 2; 235 i += off + 1;
200 n -= 2;
201 } 236 }
202 } 237 }
203 break; 238 }
204 239 else if ((unsigned char)src[i] < 128 && isdigit(src[i]))
205 case '0': 240 {
206 ++src; 241 if (n - i < 1)
207 --n;
208 if (n == 0)
209 { 242 {
210 *dst++ = '\\'; 243 *dst++ = '\\';
211 *dst++ = src[-1]; 244 *dst++ = src[i++];
212 } 245 }
213 else 246 else
214 { 247 {
215 c = xtonum(src, 8, 3); 248 int off = xtonum(&c, src+i, 8, 3);
216 if (c == -1) 249 if (off == 0)
217 { 250 {
218 *dst++ = '\\'; 251 *dst++ = '\\';
219 *dst++ = src[-1]; 252 *dst++ = src[i++];
220 } 253 }
221 else 254 else
222 { 255 {
223 *dst++ = c; 256 *dst++ = c;
224 src += 3; 257 i += off;
225 n -= 3;
226 } 258 }
227 } 259 }
228 break;
229
230 default:
231 *dst++ = argcv_unescape_char (*src++);
232 n--;
233 } 260 }
234 } 261 else
235 else 262 *dst++ = argcv_unquote_char (src[i++]);
236 { 263 break;
237 *dst++ = *src++; 264
265 default:
266 *dst++ = src[i++];
238 } 267 }
239 } 268 }
240 *dst = 0; 269 *dst = 0;
241 } 270 }
242 271
243 static void 272 void
244 escape_copy (char *dst, const char *src) 273 argcv_quote_copy (char *dst, const char *src)
245 { 274 {
246 for (; *src; src++) 275 for (; *src; src++)
247 { 276 {
...@@ -250,11 +279,11 @@ escape_copy (char *dst, const char *src) ...@@ -250,11 +279,11 @@ escape_copy (char *dst, const char *src)
250 *dst++ = '\\'; 279 *dst++ = '\\';
251 *dst++ = '"'; 280 *dst++ = '"';
252 } 281 }
253 else if (*src != '\t' && isprint(*src)) 282 else if (*src != '\t' && *src != '\\' && isprint(*src))
254 *dst++ = *src; 283 *dst++ = *src;
255 else 284 else
256 { 285 {
257 int c = argcv_escape_char (*src); 286 int c = argcv_quote_char (*src);
258 *dst++ = '\\'; 287 *dst++ = '\\';
259 if (c != -1) 288 if (c != -1)
260 *dst++ = c; 289 *dst++ = c;
...@@ -270,7 +299,7 @@ escape_copy (char *dst, const char *src) ...@@ -270,7 +299,7 @@ escape_copy (char *dst, const char *src)
270 } 299 }
271 300
272 int 301 int
273 argcv_get (const char *command, const char *delim, const char* cmnt, 302 argcv_get (const char *command, const char *delim, const char *cmnt,
274 int *argc, char ***argv) 303 int *argc, char ***argv)
275 { 304 {
276 int len = strlen (command); 305 int len = strlen (command);
...@@ -287,7 +316,9 @@ argcv_get (const char *command, const char *delim, const char* cmnt, ...@@ -287,7 +316,9 @@ argcv_get (const char *command, const char *delim, const char* cmnt,
287 (*argc)++; 316 (*argc)++;
288 317
289 *argv = calloc ((*argc + 1), sizeof (char *)); 318 *argv = calloc ((*argc + 1), sizeof (char *));
290 319 if (*argv == NULL)
320 return ENOMEM;
321
291 i = 0; 322 i = 0;
292 save = 0; 323 save = 0;
293 for (i = 0; i < *argc; i++) 324 for (i = 0; i < *argc; i++)
...@@ -304,8 +335,8 @@ argcv_get (const char *command, const char *delim, const char* cmnt, ...@@ -304,8 +335,8 @@ argcv_get (const char *command, const char *delim, const char* cmnt,
304 n = end - start + 1; 335 n = end - start + 1;
305 (*argv)[i] = calloc (n+1, sizeof (char)); 336 (*argv)[i] = calloc (n+1, sizeof (char));
306 if ((*argv)[i] == NULL) 337 if ((*argv)[i] == NULL)
307 return 1; 338 return ENOMEM;
308 unescape_copy ((*argv)[i], &command[start], n); 339 argcv_unquote_copy ((*argv)[i], &command[start], n);
309 (*argv)[i][n] = 0; 340 (*argv)[i][n] = 0;
310 } 341 }
311 (*argv)[i] = NULL; 342 (*argv)[i] = NULL;
...@@ -324,7 +355,7 @@ argcv_free (int argc, char **argv) ...@@ -324,7 +355,7 @@ argcv_free (int argc, char **argv)
324 if (argv[argc]) 355 if (argv[argc])
325 free (argv[argc]); 356 free (argv[argc]);
326 free (argv); 357 free (argv);
327 return 1; 358 return 0;
328 } 359 }
329 360
330 /* Take a argv an make string separated by ' '. */ 361 /* Take a argv an make string separated by ' '. */
...@@ -337,19 +368,19 @@ argcv_string (int argc, char **argv, char **pstring) ...@@ -337,19 +368,19 @@ argcv_string (int argc, char **argv, char **pstring)
337 368
338 /* No need. */ 369 /* No need. */
339 if (pstring == NULL) 370 if (pstring == NULL)
340 return 1; 371 return EINVAL;
341 372
342 buffer = malloc (1); 373 buffer = malloc (1);
343 if (buffer == NULL) 374 if (buffer == NULL)
344 return 1; 375 return ENOMEM;
345 *buffer = '\0'; 376 *buffer = '\0';
346 377
347 for (len = i = j = 0; i < argc; i++) 378 for (len = i = j = 0; i < argc; i++)
348 { 379 {
349 int quote = 0; 380 int quote;
350 int toklen; 381 int toklen;
351 382
352 toklen = escaped_length (argv[i], &quote); 383 toklen = argcv_quoted_length (argv[i], &quote);
353 384
354 len += toklen + 2; 385 len += toklen + 2;
355 if (quote) 386 if (quote)
...@@ -357,13 +388,13 @@ argcv_string (int argc, char **argv, char **pstring) ...@@ -357,13 +388,13 @@ argcv_string (int argc, char **argv, char **pstring)
357 388
358 buffer = realloc (buffer, len); 389 buffer = realloc (buffer, len);
359 if (buffer == NULL) 390 if (buffer == NULL)
360 return 1; 391 return ENOMEM;
361 392
362 if (i != 0) 393 if (i != 0)
363 buffer[j++] = ' '; 394 buffer[j++] = ' ';
364 if (quote) 395 if (quote)
365 buffer[j++] = '"'; 396 buffer[j++] = '"';
366 escape_copy (buffer + j, argv[i]); 397 argcv_quote_copy (buffer + j, argv[i]);
367 j += toklen; 398 j += toklen;
368 if (quote) 399 if (quote)
369 buffer[j++] = '"'; 400 buffer[j++] = '"';
...@@ -377,21 +408,3 @@ argcv_string (int argc, char **argv, char **pstring) ...@@ -377,21 +408,3 @@ argcv_string (int argc, char **argv, char **pstring)
377 return 0; 408 return 0;
378 } 409 }
379 410
380 #if 0
381 char *command = "set prompt=\"& \a\\\"\" \\x25\\0145\\098\\ta";
382
383 main(int xargc, char **xargv)
384 {
385 int i, argc;
386 char **argv;
387 char *s;
388
389 argcv_get (xargv[1] ? xargv[1]:command, "=", "#", &argc, &argv);
390 printf ("%d args:\n", argc);
391 for (i = 0; i < argc; i++)
392 printf ("%s\n", argv[i]);
393 printf ("===\n");
394 argcv_string (argc, argv, &s);
395 printf ("%s\n", s);
396 }
397 #endif
......