Parser for format strings.
Showing
1 changed file
with
586 additions
and
0 deletions
mh/mh_fmtgram.y
0 → 100644
1 | %{ | ||
2 | /* GNU mailutils - a suite of utilities for electronic mail | ||
3 | Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc. | ||
4 | |||
5 | This program is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published by | ||
7 | the Free Software Foundation; either version 2, or (at your option) | ||
8 | any later version. | ||
9 | |||
10 | This program is distributed in the hope that it will be useful, | ||
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | GNU General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with this program; if not, write to the Free Software | ||
17 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ | ||
18 | |||
19 | #include <mh.h> | ||
20 | #define obstack_chunk_alloc malloc | ||
21 | #define obstack_chunk_free free | ||
22 | #include <obstack.h> | ||
23 | |||
24 | static mh_format_t format; /* Format structure being built */ | ||
25 | static size_t pc; /* Program counter. Poins to current | ||
26 | cell in format.prog */ | ||
27 | static struct obstack stack; /* Temporary token storage */ | ||
28 | |||
29 | #define FORMAT_INC 64 /* Increase format.prog by that many | ||
30 | cells each time pc reaches | ||
31 | format.progsize */ | ||
32 | |||
33 | static size_t mh_code_op (mh_opcode_t op); | ||
34 | static size_t mh_code_string (char *string); | ||
35 | static size_t mh_code_number (int num); | ||
36 | static size_t mh_code_builtin (mh_builtin_t *bp, int argtype); | ||
37 | static void branch_fixup (size_t pc); /* Fix-up conditional branches */ | ||
38 | |||
39 | /* Lexical tie-ins */ | ||
40 | static int in_escape; /* Set when inside an escape sequence */ | ||
41 | static int want_function; /* Set when expecting function name */ | ||
42 | %} | ||
43 | |||
44 | %union { | ||
45 | char *str; | ||
46 | int num; | ||
47 | int type; | ||
48 | struct { | ||
49 | size_t cond; | ||
50 | size_t end; | ||
51 | } elif_list; | ||
52 | size_t pc; | ||
53 | mh_builtin_t *builtin; | ||
54 | }; | ||
55 | %token <num> NUMBER | ||
56 | %token <str> STRING | ||
57 | %token <builtin> FUNCTION | ||
58 | %token IF ELIF ELSE FI | ||
59 | %token OBRACE CBRACE OCURLY CCURLY | ||
60 | %token <num> FMTSPEC | ||
61 | %token BOGUS | ||
62 | %type <type> cond_expr component funcall item argument escape literal | ||
63 | %type <elif_list> elif_part elif_list else_part | ||
64 | %type <pc> cond end else elif | ||
65 | %type <builtin> function | ||
66 | |||
67 | %% | ||
68 | |||
69 | input : list | ||
70 | ; | ||
71 | |||
72 | list : pitem | ||
73 | | list pitem | ||
74 | ; | ||
75 | |||
76 | pitem : item | ||
77 | { | ||
78 | switch ($1) | ||
79 | { | ||
80 | case mhtype_none: | ||
81 | break; | ||
82 | |||
83 | case mhtype_num: | ||
84 | mh_code_op (mhop_num_print); | ||
85 | break; | ||
86 | |||
87 | case mhtype_str: | ||
88 | mh_code_op (mhop_str_print); | ||
89 | break; | ||
90 | |||
91 | default: | ||
92 | yyerror ("UNEXPECTED item TYPE"); | ||
93 | abort (); | ||
94 | } | ||
95 | } | ||
96 | ; | ||
97 | |||
98 | item : literal | ||
99 | | escape | ||
100 | { | ||
101 | in_escape = 0; | ||
102 | } | ||
103 | ; | ||
104 | |||
105 | literal : STRING | ||
106 | { | ||
107 | mh_code_string ($1); | ||
108 | mh_code_op (mhop_str_asgn); | ||
109 | $$ = mhtype_str; | ||
110 | } | ||
111 | | NUMBER | ||
112 | { | ||
113 | mh_code_number ($1); | ||
114 | mh_code_op (mhop_num_asgn); | ||
115 | $$ = mhtype_num; | ||
116 | } | ||
117 | ; | ||
118 | |||
119 | escape : component | ||
120 | | funcall | ||
121 | | cntl | ||
122 | { | ||
123 | $$ = mhtype_none; | ||
124 | } | ||
125 | ; | ||
126 | |||
127 | component : fmtspec OCURLY STRING CCURLY | ||
128 | { | ||
129 | if (strcasecmp ($3, "body") == 0) | ||
130 | { | ||
131 | mh_code_op (mhop_body); | ||
132 | } | ||
133 | else | ||
134 | { | ||
135 | mh_code_string ($3); | ||
136 | mh_code_op (mhop_header); | ||
137 | } | ||
138 | $$ = mhtype_str; | ||
139 | } | ||
140 | ; | ||
141 | |||
142 | obrace : OBRACE | ||
143 | { | ||
144 | in_escape++; | ||
145 | } | ||
146 | ; | ||
147 | |||
148 | cbrace : CBRACE | ||
149 | { | ||
150 | in_escape--; | ||
151 | } | ||
152 | ; | ||
153 | |||
154 | funcall : fmtspec obrace { want_function = 1;} function { want_function = 0; } argument cbrace | ||
155 | { | ||
156 | switch ($6) | ||
157 | { | ||
158 | case mhtype_num: | ||
159 | mh_code_op (mhop_num_to_arg); | ||
160 | break; | ||
161 | case mhtype_str: | ||
162 | mh_code_op (mhop_str_to_arg); | ||
163 | } | ||
164 | if (!mh_code_builtin ($4, $6)) | ||
165 | YYERROR; | ||
166 | $$ = $4->type; | ||
167 | } | ||
168 | ; | ||
169 | |||
170 | fmtspec : /* empty */ | ||
171 | | FMTSPEC | ||
172 | { | ||
173 | mh_code_op (mhop_fmtspec); | ||
174 | mh_code_op ($1); | ||
175 | } | ||
176 | ; | ||
177 | |||
178 | function : FUNCTION | ||
179 | | STRING | ||
180 | { | ||
181 | yyerror ("undefined function"); | ||
182 | mh_error ($1); | ||
183 | YYERROR; | ||
184 | } | ||
185 | ; | ||
186 | |||
187 | argument : /* empty */ | ||
188 | { | ||
189 | $$ = mhtype_none; | ||
190 | } | ||
191 | | literal | ||
192 | | escape | ||
193 | ; | ||
194 | |||
195 | /* 1 2 3 4 5 6 7 */ | ||
196 | cntl : if cond list end elif_part else_part FI | ||
197 | { | ||
198 | size_t start_pc = 0, end_pc = 0; | ||
199 | |||
200 | /* Fixup first condition */ | ||
201 | if ($5.cond) | ||
202 | format.prog[$2] = $5.cond - $2; | ||
203 | else if ($6.cond) | ||
204 | format.prog[$2] = $6.cond - $2; | ||
205 | else | ||
206 | format.prog[$2] = $4 - $2 - 1; | ||
207 | |||
208 | /* Link all "false" lists */ | ||
209 | if ($6.cond) | ||
210 | { | ||
211 | start_pc = end_pc = $6.end; | ||
212 | } | ||
213 | if ($5.cond) | ||
214 | { | ||
215 | if (start_pc) | ||
216 | format.prog[end_pc] = $5.end; | ||
217 | else | ||
218 | start_pc = $5.end; | ||
219 | end_pc = $5.end; | ||
220 | for (; format.prog[end_pc]; end_pc = format.prog[end_pc]) | ||
221 | ; | ||
222 | } | ||
223 | |||
224 | if (start_pc) | ||
225 | format.prog[end_pc] = $4; | ||
226 | else | ||
227 | start_pc = $4; | ||
228 | |||
229 | /* Now, fixup the end branches */ | ||
230 | branch_fixup (start_pc); | ||
231 | format.prog[start_pc] = pc - start_pc; | ||
232 | } | ||
233 | ; | ||
234 | |||
235 | if : IF | ||
236 | { | ||
237 | in_escape++; | ||
238 | } | ||
239 | ; | ||
240 | |||
241 | elif : ELIF | ||
242 | { | ||
243 | in_escape++; | ||
244 | $$ = pc; | ||
245 | } | ||
246 | ; | ||
247 | |||
248 | end : /* empty */ | ||
249 | { | ||
250 | mh_code_op (mhop_branch); | ||
251 | $$ = mh_code_op (0); | ||
252 | } | ||
253 | ; | ||
254 | |||
255 | cond : cond_expr | ||
256 | { | ||
257 | in_escape--; | ||
258 | if ($1 == mhtype_str) | ||
259 | mh_code_op (mhop_str_branch); | ||
260 | else | ||
261 | mh_code_op (mhop_num_branch); | ||
262 | $$ = mh_code_op (0); | ||
263 | } | ||
264 | |||
265 | cond_expr : component | ||
266 | | funcall | ||
267 | ; | ||
268 | |||
269 | elif_part : /* empty */ | ||
270 | { | ||
271 | $$.cond = 0; | ||
272 | $$.end = 0; | ||
273 | } | ||
274 | | elif_list end | ||
275 | { | ||
276 | $$.cond = $1.cond; | ||
277 | format.prog[$2] = $1.end; | ||
278 | $$.end = $2; | ||
279 | } | ||
280 | ; | ||
281 | |||
282 | elif_list : elif cond list | ||
283 | { | ||
284 | $$.cond = $1; | ||
285 | format.prog[$2] = pc - $2 + 2; | ||
286 | $$.end = 0; | ||
287 | } | ||
288 | | elif_list end elif cond list | ||
289 | { | ||
290 | format.prog[$4] = pc - $4 + 2; | ||
291 | $$.cond = $1.cond; | ||
292 | format.prog[$2] = $1.end; | ||
293 | $$.end = $2; | ||
294 | } | ||
295 | ; | ||
296 | |||
297 | else_part : /* empty */ | ||
298 | { | ||
299 | $$.cond = 0; | ||
300 | $$.end = 0; | ||
301 | } | ||
302 | | else list end | ||
303 | { | ||
304 | $$.cond = $1; | ||
305 | $$.end = $3; | ||
306 | } | ||
307 | ; | ||
308 | |||
309 | else : ELSE | ||
310 | { | ||
311 | $$ = pc; | ||
312 | } | ||
313 | ; | ||
314 | |||
315 | %% | ||
316 | |||
317 | static char *start; | ||
318 | static char *curp; | ||
319 | |||
320 | int | ||
321 | yyerror (char *s) | ||
322 | { | ||
323 | int len; | ||
324 | mh_error ("%s: %s", start, s); | ||
325 | len = curp - start; | ||
326 | mh_error ("%*.*s^", len, len, ""); | ||
327 | return 0; | ||
328 | } | ||
329 | |||
330 | #define isdelim(c) (strchr("%<>?|(){} ",c) != NULL) | ||
331 | |||
332 | static int percent; | ||
333 | static int backslash(int c); | ||
334 | |||
335 | int | ||
336 | yylex () | ||
337 | { | ||
338 | if (*curp == '%') | ||
339 | { | ||
340 | curp++; | ||
341 | percent = 1; | ||
342 | if (isdigit (*curp) || *curp == '-') | ||
343 | { | ||
344 | int num = 0; | ||
345 | int flags = 0; | ||
346 | |||
347 | if (*curp == '-') | ||
348 | { | ||
349 | curp++; | ||
350 | flags = MH_FMT_RALIGN; | ||
351 | } | ||
352 | if (*curp == '0') | ||
353 | flags |= MH_FMT_ZEROPAD; | ||
354 | while (*curp && isdigit (*curp)) | ||
355 | num = num * 10 + *curp++ - '0'; | ||
356 | yylval.num = num | flags; | ||
357 | return FMTSPEC; | ||
358 | } | ||
359 | } | ||
360 | |||
361 | if (percent) | ||
362 | { | ||
363 | percent = 0; | ||
364 | switch (*curp++) | ||
365 | { | ||
366 | case '<': | ||
367 | return IF; | ||
368 | case '>': | ||
369 | return FI; | ||
370 | case '?': | ||
371 | return ELIF; | ||
372 | case '|': | ||
373 | return ELSE; | ||
374 | case '%': | ||
375 | return '%'; | ||
376 | case '(': | ||
377 | return OBRACE; | ||
378 | case '{': | ||
379 | return OCURLY; | ||
380 | default: | ||
381 | return BOGUS; | ||
382 | } | ||
383 | } | ||
384 | |||
385 | if (in_escape) | ||
386 | switch (*curp) | ||
387 | { | ||
388 | case '(': | ||
389 | curp++; | ||
390 | return OBRACE; | ||
391 | case '{': | ||
392 | curp++; | ||
393 | return OCURLY; | ||
394 | case '0':case '1':case '2':case '3':case '4': | ||
395 | case '5':case '6':case '7':case '8':case '9': | ||
396 | yylval.num = strtol (curp, &curp, 0); | ||
397 | return NUMBER; | ||
398 | } | ||
399 | |||
400 | switch (*curp) | ||
401 | { | ||
402 | case ')': | ||
403 | curp++; | ||
404 | return CBRACE; | ||
405 | case '}': | ||
406 | curp++; | ||
407 | return CCURLY; | ||
408 | case 0: | ||
409 | return 0; | ||
410 | } | ||
411 | |||
412 | do | ||
413 | { | ||
414 | if (*curp == '\\') | ||
415 | { | ||
416 | int c = backslash (*++curp); | ||
417 | obstack_1grow (&stack, c); | ||
418 | } | ||
419 | else | ||
420 | obstack_1grow (&stack, *curp); | ||
421 | curp++; | ||
422 | } | ||
423 | while (*curp && !isdelim(*curp)); | ||
424 | |||
425 | obstack_1grow (&stack, 0); | ||
426 | yylval.str = obstack_finish (&stack); | ||
427 | |||
428 | if (want_function) | ||
429 | { | ||
430 | int rest; | ||
431 | mh_builtin_t *bp = mh_lookup_builtin (yylval.str, &rest); | ||
432 | if (bp) | ||
433 | { | ||
434 | curp -= rest; | ||
435 | yylval.builtin = bp; | ||
436 | return FUNCTION; | ||
437 | } | ||
438 | } | ||
439 | |||
440 | return STRING; | ||
441 | } | ||
442 | |||
443 | int | ||
444 | mh_format_parse (char *format_str, mh_format_t *fmt) | ||
445 | { | ||
446 | int rc; | ||
447 | |||
448 | start = curp = format_str; | ||
449 | obstack_init (&stack); | ||
450 | format.progsize = 0; | ||
451 | pc = 0; | ||
452 | mh_code_op (mhop_stop); | ||
453 | |||
454 | in_escape = 0; | ||
455 | percent = 0; | ||
456 | |||
457 | rc = yyparse (); | ||
458 | mh_code_op (mhop_stop); | ||
459 | obstack_free (&stack, NULL); | ||
460 | if (rc) | ||
461 | { | ||
462 | mh_format_free (&format); | ||
463 | return 1; | ||
464 | } | ||
465 | *fmt = format; | ||
466 | return 0; | ||
467 | } | ||
468 | |||
469 | int | ||
470 | backslash(int c) | ||
471 | { | ||
472 | static char transtab[] = "b\bf\fn\nr\rt\t"; | ||
473 | char *p; | ||
474 | |||
475 | for (p = transtab; *p; p += 2) | ||
476 | { | ||
477 | if (*p == c) | ||
478 | return p[1]; | ||
479 | } | ||
480 | return c; | ||
481 | } | ||
482 | |||
483 | void | ||
484 | branch_fixup (size_t epc) | ||
485 | { | ||
486 | size_t prev = format.prog[epc]; | ||
487 | if (!prev) | ||
488 | return; | ||
489 | branch_fixup (prev); | ||
490 | format.prog[prev] = epc - prev - 1; | ||
491 | } | ||
492 | |||
493 | |||
494 | /* Make sure there are at least `count' entries available in the prog | ||
495 | buffer */ | ||
496 | void | ||
497 | prog_reserve (size_t count) | ||
498 | { | ||
499 | if (pc + count >= format.progsize) | ||
500 | { | ||
501 | size_t inc = (count + 1) / FORMAT_INC + 1; | ||
502 | format.progsize += inc * FORMAT_INC; | ||
503 | format.prog = xrealloc (format.prog, | ||
504 | format.progsize * sizeof format.prog[0]); | ||
505 | } | ||
506 | } | ||
507 | |||
508 | size_t | ||
509 | mh_code_string (char *string) | ||
510 | { | ||
511 | int length = strlen (string) + 1; | ||
512 | size_t count = (length + sizeof (mh_opcode_t)) / sizeof (mh_opcode_t); | ||
513 | size_t start_pc = pc; | ||
514 | |||
515 | mh_code_op (mhop_str_arg); | ||
516 | prog_reserve (count); | ||
517 | format.prog[pc++] = (mh_opcode_t) count; | ||
518 | memcpy (&format.prog[pc], string, length); | ||
519 | pc += count; | ||
520 | return start_pc; | ||
521 | } | ||
522 | |||
523 | size_t | ||
524 | mh_code_op (mh_opcode_t op) | ||
525 | { | ||
526 | prog_reserve (1); | ||
527 | format.prog[pc] = op; | ||
528 | return pc++; | ||
529 | } | ||
530 | |||
531 | size_t | ||
532 | mh_code_number (int num) | ||
533 | { | ||
534 | return mh_code_op ((mh_opcode_t) num); | ||
535 | } | ||
536 | |||
537 | size_t | ||
538 | mh_code_builtin (mh_builtin_t *bp, int argtype) | ||
539 | { | ||
540 | size_t start_pc = pc; | ||
541 | if (bp->argtype != argtype) | ||
542 | { | ||
543 | if (argtype == mhtype_none) | ||
544 | { | ||
545 | if (bp->optarg) | ||
546 | { | ||
547 | switch (bp->argtype) | ||
548 | { | ||
549 | case mhtype_num: | ||
550 | mh_code_op (mhop_num_to_arg); | ||
551 | break; | ||
552 | case mhtype_str: | ||
553 | mh_code_op (mhop_str_to_arg); | ||
554 | break; | ||
555 | default: | ||
556 | yyerror ("UNKNOWN ARGTYPE"); | ||
557 | abort (); | ||
558 | } | ||
559 | } | ||
560 | else | ||
561 | { | ||
562 | mh_error ("missing argument for %s", bp->name); | ||
563 | return 0; | ||
564 | } | ||
565 | } | ||
566 | else | ||
567 | { | ||
568 | switch (bp->argtype) | ||
569 | { | ||
570 | case mhtype_none: | ||
571 | mh_error ("extra arguments to %s", bp->name); | ||
572 | return 0; | ||
573 | case mhtype_num: | ||
574 | mh_code_op (mhop_str_to_num); | ||
575 | break; | ||
576 | case mhtype_str: | ||
577 | mh_code_op (mhop_num_to_str); | ||
578 | break; | ||
579 | } | ||
580 | } | ||
581 | } | ||
582 | mh_code_op (mhop_call); | ||
583 | mh_code_op ((mh_opcode_t)bp->fun); | ||
584 | return start_pc; | ||
585 | } | ||
586 |
-
Please register or sign in to post a comment