Commit 08eb5318 08eb53189f1d47e69847b2d39323366f24c5d163 by Sergey Poznyakoff

Parser for format strings.

1 parent ce09968a
1 %{
2 /* GNU mailutils - a suite of utilities for electronic mail
3 Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
18
19 #include <mh.h>
20 #define obstack_chunk_alloc malloc
21 #define obstack_chunk_free free
22 #include <obstack.h>
23
24 static mh_format_t format; /* Format structure being built */
25 static size_t pc; /* Program counter. Poins to current
26 cell in format.prog */
27 static struct obstack stack; /* Temporary token storage */
28
29 #define FORMAT_INC 64 /* Increase format.prog by that many
30 cells each time pc reaches
31 format.progsize */
32
33 static size_t mh_code_op (mh_opcode_t op);
34 static size_t mh_code_string (char *string);
35 static size_t mh_code_number (int num);
36 static size_t mh_code_builtin (mh_builtin_t *bp, int argtype);
37 static void branch_fixup (size_t pc); /* Fix-up conditional branches */
38
39 /* Lexical tie-ins */
40 static int in_escape; /* Set when inside an escape sequence */
41 static int want_function; /* Set when expecting function name */
42 %}
43
44 %union {
45 char *str;
46 int num;
47 int type;
48 struct {
49 size_t cond;
50 size_t end;
51 } elif_list;
52 size_t pc;
53 mh_builtin_t *builtin;
54 };
55 %token <num> NUMBER
56 %token <str> STRING
57 %token <builtin> FUNCTION
58 %token IF ELIF ELSE FI
59 %token OBRACE CBRACE OCURLY CCURLY
60 %token <num> FMTSPEC
61 %token BOGUS
62 %type <type> cond_expr component funcall item argument escape literal
63 %type <elif_list> elif_part elif_list else_part
64 %type <pc> cond end else elif
65 %type <builtin> function
66
67 %%
68
69 input : list
70 ;
71
72 list : pitem
73 | list pitem
74 ;
75
76 pitem : item
77 {
78 switch ($1)
79 {
80 case mhtype_none:
81 break;
82
83 case mhtype_num:
84 mh_code_op (mhop_num_print);
85 break;
86
87 case mhtype_str:
88 mh_code_op (mhop_str_print);
89 break;
90
91 default:
92 yyerror ("UNEXPECTED item TYPE");
93 abort ();
94 }
95 }
96 ;
97
98 item : literal
99 | escape
100 {
101 in_escape = 0;
102 }
103 ;
104
105 literal : STRING
106 {
107 mh_code_string ($1);
108 mh_code_op (mhop_str_asgn);
109 $$ = mhtype_str;
110 }
111 | NUMBER
112 {
113 mh_code_number ($1);
114 mh_code_op (mhop_num_asgn);
115 $$ = mhtype_num;
116 }
117 ;
118
119 escape : component
120 | funcall
121 | cntl
122 {
123 $$ = mhtype_none;
124 }
125 ;
126
127 component : fmtspec OCURLY STRING CCURLY
128 {
129 if (strcasecmp ($3, "body") == 0)
130 {
131 mh_code_op (mhop_body);
132 }
133 else
134 {
135 mh_code_string ($3);
136 mh_code_op (mhop_header);
137 }
138 $$ = mhtype_str;
139 }
140 ;
141
142 obrace : OBRACE
143 {
144 in_escape++;
145 }
146 ;
147
148 cbrace : CBRACE
149 {
150 in_escape--;
151 }
152 ;
153
154 funcall : fmtspec obrace { want_function = 1;} function { want_function = 0; } argument cbrace
155 {
156 switch ($6)
157 {
158 case mhtype_num:
159 mh_code_op (mhop_num_to_arg);
160 break;
161 case mhtype_str:
162 mh_code_op (mhop_str_to_arg);
163 }
164 if (!mh_code_builtin ($4, $6))
165 YYERROR;
166 $$ = $4->type;
167 }
168 ;
169
170 fmtspec : /* empty */
171 | FMTSPEC
172 {
173 mh_code_op (mhop_fmtspec);
174 mh_code_op ($1);
175 }
176 ;
177
178 function : FUNCTION
179 | STRING
180 {
181 yyerror ("undefined function");
182 mh_error ($1);
183 YYERROR;
184 }
185 ;
186
187 argument : /* empty */
188 {
189 $$ = mhtype_none;
190 }
191 | literal
192 | escape
193 ;
194
195 /* 1 2 3 4 5 6 7 */
196 cntl : if cond list end elif_part else_part FI
197 {
198 size_t start_pc = 0, end_pc = 0;
199
200 /* Fixup first condition */
201 if ($5.cond)
202 format.prog[$2] = $5.cond - $2;
203 else if ($6.cond)
204 format.prog[$2] = $6.cond - $2;
205 else
206 format.prog[$2] = $4 - $2 - 1;
207
208 /* Link all "false" lists */
209 if ($6.cond)
210 {
211 start_pc = end_pc = $6.end;
212 }
213 if ($5.cond)
214 {
215 if (start_pc)
216 format.prog[end_pc] = $5.end;
217 else
218 start_pc = $5.end;
219 end_pc = $5.end;
220 for (; format.prog[end_pc]; end_pc = format.prog[end_pc])
221 ;
222 }
223
224 if (start_pc)
225 format.prog[end_pc] = $4;
226 else
227 start_pc = $4;
228
229 /* Now, fixup the end branches */
230 branch_fixup (start_pc);
231 format.prog[start_pc] = pc - start_pc;
232 }
233 ;
234
235 if : IF
236 {
237 in_escape++;
238 }
239 ;
240
241 elif : ELIF
242 {
243 in_escape++;
244 $$ = pc;
245 }
246 ;
247
248 end : /* empty */
249 {
250 mh_code_op (mhop_branch);
251 $$ = mh_code_op (0);
252 }
253 ;
254
255 cond : cond_expr
256 {
257 in_escape--;
258 if ($1 == mhtype_str)
259 mh_code_op (mhop_str_branch);
260 else
261 mh_code_op (mhop_num_branch);
262 $$ = mh_code_op (0);
263 }
264
265 cond_expr : component
266 | funcall
267 ;
268
269 elif_part : /* empty */
270 {
271 $$.cond = 0;
272 $$.end = 0;
273 }
274 | elif_list end
275 {
276 $$.cond = $1.cond;
277 format.prog[$2] = $1.end;
278 $$.end = $2;
279 }
280 ;
281
282 elif_list : elif cond list
283 {
284 $$.cond = $1;
285 format.prog[$2] = pc - $2 + 2;
286 $$.end = 0;
287 }
288 | elif_list end elif cond list
289 {
290 format.prog[$4] = pc - $4 + 2;
291 $$.cond = $1.cond;
292 format.prog[$2] = $1.end;
293 $$.end = $2;
294 }
295 ;
296
297 else_part : /* empty */
298 {
299 $$.cond = 0;
300 $$.end = 0;
301 }
302 | else list end
303 {
304 $$.cond = $1;
305 $$.end = $3;
306 }
307 ;
308
309 else : ELSE
310 {
311 $$ = pc;
312 }
313 ;
314
315 %%
316
317 static char *start;
318 static char *curp;
319
320 int
321 yyerror (char *s)
322 {
323 int len;
324 mh_error ("%s: %s", start, s);
325 len = curp - start;
326 mh_error ("%*.*s^", len, len, "");
327 return 0;
328 }
329
330 #define isdelim(c) (strchr("%<>?|(){} ",c) != NULL)
331
332 static int percent;
333 static int backslash(int c);
334
335 int
336 yylex ()
337 {
338 if (*curp == '%')
339 {
340 curp++;
341 percent = 1;
342 if (isdigit (*curp) || *curp == '-')
343 {
344 int num = 0;
345 int flags = 0;
346
347 if (*curp == '-')
348 {
349 curp++;
350 flags = MH_FMT_RALIGN;
351 }
352 if (*curp == '0')
353 flags |= MH_FMT_ZEROPAD;
354 while (*curp && isdigit (*curp))
355 num = num * 10 + *curp++ - '0';
356 yylval.num = num | flags;
357 return FMTSPEC;
358 }
359 }
360
361 if (percent)
362 {
363 percent = 0;
364 switch (*curp++)
365 {
366 case '<':
367 return IF;
368 case '>':
369 return FI;
370 case '?':
371 return ELIF;
372 case '|':
373 return ELSE;
374 case '%':
375 return '%';
376 case '(':
377 return OBRACE;
378 case '{':
379 return OCURLY;
380 default:
381 return BOGUS;
382 }
383 }
384
385 if (in_escape)
386 switch (*curp)
387 {
388 case '(':
389 curp++;
390 return OBRACE;
391 case '{':
392 curp++;
393 return OCURLY;
394 case '0':case '1':case '2':case '3':case '4':
395 case '5':case '6':case '7':case '8':case '9':
396 yylval.num = strtol (curp, &curp, 0);
397 return NUMBER;
398 }
399
400 switch (*curp)
401 {
402 case ')':
403 curp++;
404 return CBRACE;
405 case '}':
406 curp++;
407 return CCURLY;
408 case 0:
409 return 0;
410 }
411
412 do
413 {
414 if (*curp == '\\')
415 {
416 int c = backslash (*++curp);
417 obstack_1grow (&stack, c);
418 }
419 else
420 obstack_1grow (&stack, *curp);
421 curp++;
422 }
423 while (*curp && !isdelim(*curp));
424
425 obstack_1grow (&stack, 0);
426 yylval.str = obstack_finish (&stack);
427
428 if (want_function)
429 {
430 int rest;
431 mh_builtin_t *bp = mh_lookup_builtin (yylval.str, &rest);
432 if (bp)
433 {
434 curp -= rest;
435 yylval.builtin = bp;
436 return FUNCTION;
437 }
438 }
439
440 return STRING;
441 }
442
443 int
444 mh_format_parse (char *format_str, mh_format_t *fmt)
445 {
446 int rc;
447
448 start = curp = format_str;
449 obstack_init (&stack);
450 format.progsize = 0;
451 pc = 0;
452 mh_code_op (mhop_stop);
453
454 in_escape = 0;
455 percent = 0;
456
457 rc = yyparse ();
458 mh_code_op (mhop_stop);
459 obstack_free (&stack, NULL);
460 if (rc)
461 {
462 mh_format_free (&format);
463 return 1;
464 }
465 *fmt = format;
466 return 0;
467 }
468
469 int
470 backslash(int c)
471 {
472 static char transtab[] = "b\bf\fn\nr\rt\t";
473 char *p;
474
475 for (p = transtab; *p; p += 2)
476 {
477 if (*p == c)
478 return p[1];
479 }
480 return c;
481 }
482
483 void
484 branch_fixup (size_t epc)
485 {
486 size_t prev = format.prog[epc];
487 if (!prev)
488 return;
489 branch_fixup (prev);
490 format.prog[prev] = epc - prev - 1;
491 }
492
493
494 /* Make sure there are at least `count' entries available in the prog
495 buffer */
496 void
497 prog_reserve (size_t count)
498 {
499 if (pc + count >= format.progsize)
500 {
501 size_t inc = (count + 1) / FORMAT_INC + 1;
502 format.progsize += inc * FORMAT_INC;
503 format.prog = xrealloc (format.prog,
504 format.progsize * sizeof format.prog[0]);
505 }
506 }
507
508 size_t
509 mh_code_string (char *string)
510 {
511 int length = strlen (string) + 1;
512 size_t count = (length + sizeof (mh_opcode_t)) / sizeof (mh_opcode_t);
513 size_t start_pc = pc;
514
515 mh_code_op (mhop_str_arg);
516 prog_reserve (count);
517 format.prog[pc++] = (mh_opcode_t) count;
518 memcpy (&format.prog[pc], string, length);
519 pc += count;
520 return start_pc;
521 }
522
523 size_t
524 mh_code_op (mh_opcode_t op)
525 {
526 prog_reserve (1);
527 format.prog[pc] = op;
528 return pc++;
529 }
530
531 size_t
532 mh_code_number (int num)
533 {
534 return mh_code_op ((mh_opcode_t) num);
535 }
536
537 size_t
538 mh_code_builtin (mh_builtin_t *bp, int argtype)
539 {
540 size_t start_pc = pc;
541 if (bp->argtype != argtype)
542 {
543 if (argtype == mhtype_none)
544 {
545 if (bp->optarg)
546 {
547 switch (bp->argtype)
548 {
549 case mhtype_num:
550 mh_code_op (mhop_num_to_arg);
551 break;
552 case mhtype_str:
553 mh_code_op (mhop_str_to_arg);
554 break;
555 default:
556 yyerror ("UNKNOWN ARGTYPE");
557 abort ();
558 }
559 }
560 else
561 {
562 mh_error ("missing argument for %s", bp->name);
563 return 0;
564 }
565 }
566 else
567 {
568 switch (bp->argtype)
569 {
570 case mhtype_none:
571 mh_error ("extra arguments to %s", bp->name);
572 return 0;
573 case mhtype_num:
574 mh_code_op (mhop_str_to_num);
575 break;
576 case mhtype_str:
577 mh_code_op (mhop_num_to_str);
578 break;
579 }
580 }
581 }
582 mh_code_op (mhop_call);
583 mh_code_op ((mh_opcode_t)bp->fun);
584 return start_pc;
585 }
586