Implemented escaping/unescaping of non-printable characters
Showing
1 changed file
with
212 additions
and
28 deletions
... | @@ -52,7 +52,9 @@ argcv_scan (int len, const char *command, const char *delim, const char* cmnt, | ... | @@ -52,7 +52,9 @@ argcv_scan (int len, const char *command, const char *delim, const char* cmnt, |
52 | { | 52 | { |
53 | case '"': | 53 | case '"': |
54 | case '\'': | 54 | case '\'': |
55 | while (++i < len && command[i] != command[*start]) | 55 | while (++i < len |
56 | && (command[i] != command[*start] | ||
57 | || command[i-1] == '\\')) | ||
56 | ; | 58 | ; |
57 | if (i < len) /* found matching quote */ | 59 | if (i < len) /* found matching quote */ |
58 | break; | 60 | break; |
... | @@ -89,9 +91,185 @@ argcv_scan (int len, const char *command, const char *delim, const char* cmnt, | ... | @@ -89,9 +91,185 @@ argcv_scan (int len, const char *command, const char *delim, const char* cmnt, |
89 | return *save; | 91 | return *save; |
90 | } | 92 | } |
91 | 93 | ||
94 | static char escape_transtab[] = "\\\\a\ab\bf\fn\nr\rt\t"; | ||
95 | |||
96 | static int | ||
97 | unescape_char (int c) | ||
98 | { | ||
99 | char *p; | ||
100 | |||
101 | for (p = escape_transtab; *p; p += 2) | ||
102 | { | ||
103 | if (*p == c) | ||
104 | return p[1]; | ||
105 | } | ||
106 | return c; | ||
107 | } | ||
108 | |||
109 | static int | ||
110 | escape_char (int c) | ||
111 | { | ||
112 | char *p; | ||
113 | |||
114 | for (p = escape_transtab + sizeof(escape_transtab) - 2; | ||
115 | p > escape_transtab; p -= 2) | ||
116 | { | ||
117 | if (*p == c) | ||
118 | return p[-1]; | ||
119 | } | ||
120 | return c; | ||
121 | } | ||
122 | |||
123 | |||
124 | static int | ||
125 | xtonum (const char *src, int base, size_t cnt) | ||
126 | { | ||
127 | int val; | ||
128 | char *p; | ||
129 | char tmp[4]; /* At most three characters + zero */ | ||
130 | |||
131 | /* Notice: No use to check `cnt'. It should be either 2 or 3 */ | ||
132 | memcpy (tmp, src, cnt); | ||
133 | tmp[cnt] = 0; | ||
134 | val = strtoul (tmp, &p, base); | ||
135 | return (*p == 0) ? val : -1; | ||
136 | } | ||
137 | |||
138 | static size_t | ||
139 | escaped_length (const char *str, int *quote) | ||
140 | { | ||
141 | size_t len = 0; | ||
142 | |||
143 | for (; *str; str++) | ||
144 | { | ||
145 | if (*str == ' ') | ||
146 | { | ||
147 | len++; | ||
148 | *quote = 1; | ||
149 | } | ||
150 | else if (*str == '"') | ||
151 | { | ||
152 | len += 2; | ||
153 | *quote = 1; | ||
154 | } | ||
155 | else if (isprint (*str)) | ||
156 | len++; | ||
157 | else if (escape_char (*str) != -1) | ||
158 | len += 2; | ||
159 | else | ||
160 | len += 4; | ||
161 | } | ||
162 | return len; | ||
163 | } | ||
164 | |||
165 | static void | ||
166 | unescape_copy (char *dst, const char *src, size_t n) | ||
167 | { | ||
168 | int c; | ||
169 | |||
170 | while (n > 0) | ||
171 | { | ||
172 | n--; | ||
173 | if (*src == '\\') | ||
174 | { | ||
175 | switch (*++src) | ||
176 | { | ||
177 | case 'x': | ||
178 | case 'X': | ||
179 | ++src; | ||
180 | --n; | ||
181 | if (n == 0) | ||
182 | { | ||
183 | *dst++ = '\\'; | ||
184 | *dst++ = src[-1]; | ||
185 | } | ||
186 | else | ||
187 | { | ||
188 | c = xtonum(src, 16, 2); | ||
189 | if (c == -1) | ||
190 | { | ||
191 | *dst++ = '\\'; | ||
192 | *dst++ = src[-1]; | ||
193 | } | ||
194 | else | ||
195 | { | ||
196 | *dst++ = c; | ||
197 | src += 2; | ||
198 | n -= 2; | ||
199 | } | ||
200 | } | ||
201 | break; | ||
202 | |||
203 | case '0': | ||
204 | ++src; | ||
205 | --n; | ||
206 | if (n == 0) | ||
207 | { | ||
208 | *dst++ = '\\'; | ||
209 | *dst++ = src[-1]; | ||
210 | } | ||
211 | else | ||
212 | { | ||
213 | c = xtonum(src, 8, 3); | ||
214 | if (c == -1) | ||
215 | { | ||
216 | *dst++ = '\\'; | ||
217 | *dst++ = src[-1]; | ||
218 | } | ||
219 | else | ||
220 | { | ||
221 | *dst++ = c; | ||
222 | src += 3; | ||
223 | n -= 3; | ||
224 | } | ||
225 | } | ||
226 | break; | ||
227 | |||
228 | default: | ||
229 | *dst++ = unescape_char (*src++); | ||
230 | n--; | ||
231 | } | ||
232 | } | ||
233 | else | ||
234 | { | ||
235 | *dst++ = *src++; | ||
236 | } | ||
237 | } | ||
238 | *dst = 0; | ||
239 | } | ||
240 | |||
241 | static void | ||
242 | escape_copy (char *dst, const char *src) | ||
243 | { | ||
244 | for (; *src; src++) | ||
245 | { | ||
246 | if (*src == '"') | ||
247 | { | ||
248 | *dst++ = '\\'; | ||
249 | *dst++ = '"'; | ||
250 | } | ||
251 | else if (*src != '\t' && isprint(*src)) | ||
252 | *dst++ = *src; | ||
253 | else | ||
254 | { | ||
255 | int c = escape_char (*src); | ||
256 | *dst++ = '\\'; | ||
257 | if (c != -1) | ||
258 | *dst++ = c; | ||
259 | else | ||
260 | { | ||
261 | char tmp[4]; | ||
262 | snprintf (tmp, sizeof tmp, "%03o", *src); | ||
263 | memcpy (dst, tmp, 3); | ||
264 | dst += 3; | ||
265 | } | ||
266 | } | ||
267 | } | ||
268 | } | ||
269 | |||
92 | int | 270 | int |
93 | argcv_get (const char *command, const char *delim, const char* cmnt, | 271 | argcv_get (const char *command, const char *delim, const char* cmnt, |
94 | int *argc, char ***argv) | 272 | int *argc, char ***argv) |
95 | { | 273 | { |
96 | int len = strlen (command); | 274 | int len = strlen (command); |
97 | int i = 0; | 275 | int i = 0; |
... | @@ -115,14 +293,8 @@ argcv_get (const char *command, const char *delim, const char* cmnt, | ... | @@ -115,14 +293,8 @@ argcv_get (const char *command, const char *delim, const char* cmnt, |
115 | int n; | 293 | int n; |
116 | argcv_scan (len, command, delim, cmnt, &start, &end, &save); | 294 | argcv_scan (len, command, delim, cmnt, &start, &end, &save); |
117 | 295 | ||
118 | /* FIXME: this is the right place to do unescaping as well | 296 | if ((command[start] == '"' || command[end] == '\'') |
119 | as stripping of quotes. */ | 297 | && command[end] == command[start]) |
120 | if (command[start] == '"' && command[end] == '"') | ||
121 | { | ||
122 | start++; | ||
123 | end--; | ||
124 | } | ||
125 | else if (command[start] == '\'' && command[end] == '\'') | ||
126 | { | 298 | { |
127 | start++; | 299 | start++; |
128 | end--; | 300 | end--; |
... | @@ -131,7 +303,7 @@ argcv_get (const char *command, const char *delim, const char* cmnt, | ... | @@ -131,7 +303,7 @@ argcv_get (const char *command, const char *delim, const char* cmnt, |
131 | (*argv)[i] = calloc (n+1, sizeof (char)); | 303 | (*argv)[i] = calloc (n+1, sizeof (char)); |
132 | if ((*argv)[i] == NULL) | 304 | if ((*argv)[i] == NULL) |
133 | return 1; | 305 | return 1; |
134 | memcpy ((*argv)[i], &command[start], n); | 306 | unescape_copy ((*argv)[i], &command[start], n); |
135 | (*argv)[i][n] = 0; | 307 | (*argv)[i][n] = 0; |
136 | } | 308 | } |
137 | (*argv)[i] = NULL; | 309 | (*argv)[i] = NULL; |
... | @@ -158,8 +330,7 @@ argcv_free (int argc, char **argv) | ... | @@ -158,8 +330,7 @@ argcv_free (int argc, char **argv) |
158 | int | 330 | int |
159 | argcv_string (int argc, char **argv, char **pstring) | 331 | argcv_string (int argc, char **argv, char **pstring) |
160 | { | 332 | { |
161 | int i; | 333 | size_t i, j, len; |
162 | size_t len; | ||
163 | char *buffer; | 334 | char *buffer; |
164 | 335 | ||
165 | /* No need. */ | 336 | /* No need. */ |
... | @@ -171,41 +342,54 @@ argcv_string (int argc, char **argv, char **pstring) | ... | @@ -171,41 +342,54 @@ argcv_string (int argc, char **argv, char **pstring) |
171 | return 1; | 342 | return 1; |
172 | *buffer = '\0'; | 343 | *buffer = '\0'; |
173 | 344 | ||
174 | for (len = i = 0; i < argc; i++) | 345 | for (len = i = j = 0; i < argc; i++) |
175 | { | 346 | { |
176 | len += strlen (argv[i]) + 2; | 347 | int quote = 0; |
348 | int toklen; | ||
349 | |||
350 | toklen = escaped_length (argv[i], "e); | ||
351 | |||
352 | len += toklen + 2; | ||
353 | if (quote) | ||
354 | len += 2; | ||
355 | |||
177 | buffer = realloc (buffer, len); | 356 | buffer = realloc (buffer, len); |
178 | if (buffer == NULL) | 357 | if (buffer == NULL) |
179 | return 1; | 358 | return 1; |
359 | |||
180 | if (i != 0) | 360 | if (i != 0) |
181 | strcat (buffer, " "); | 361 | buffer[j++] = ' '; |
182 | strcat (buffer, argv[i]); | 362 | if (quote) |
363 | buffer[j++] = '"'; | ||
364 | escape_copy (buffer + j, argv[i]); | ||
365 | j += toklen; | ||
366 | if (quote) | ||
367 | buffer[j++] = '"'; | ||
183 | } | 368 | } |
184 | 369 | ||
185 | /* Strip off trailing space. */ | 370 | for (; j > 0 && isspace (buffer[j-1]); j--) |
186 | if (*buffer != '\0') | 371 | ; |
187 | { | 372 | buffer[j] = 0; |
188 | while (buffer[strlen (buffer) - 1] == ' ') | ||
189 | { | ||
190 | buffer[strlen (buffer) - 1] = '\0'; | ||
191 | } | ||
192 | } | ||
193 | if (pstring) | 373 | if (pstring) |
194 | *pstring = buffer; | 374 | *pstring = buffer; |
195 | return 0; | 375 | return 0; |
196 | } | 376 | } |
197 | 377 | ||
198 | #if 0 | 378 | #if 0 |
199 | char *command = "set prompt=\"& \" "; | 379 | char *command = "set prompt=\"& \a\\\"\" \\x25\\0145\\098\\ta"; |
200 | 380 | ||
201 | main(int xargc, char **xargv) | 381 | main(int xargc, char **xargv) |
202 | { | 382 | { |
203 | int i, argc; | 383 | int i, argc; |
204 | char **argv; | 384 | char **argv; |
205 | 385 | char *s; | |
386 | |||
206 | argcv_get (xargv[1] ? xargv[1]:command, "=", "#", &argc, &argv); | 387 | argcv_get (xargv[1] ? xargv[1]:command, "=", "#", &argc, &argv); |
207 | printf ("%d args:\n", argc); | 388 | printf ("%d args:\n", argc); |
208 | for (i = 0; i < argc; i++) | 389 | for (i = 0; i < argc; i++) |
209 | printf ("%s\n", argv[i]); | 390 | printf ("%s\n", argv[i]); |
391 | printf ("===\n"); | ||
392 | argcv_string (argc, argv, &s); | ||
393 | printf ("%s\n", s); | ||
210 | } | 394 | } |
211 | #endif | 395 | #endif | ... | ... |
-
Please register or sign in to post a comment