Commit 192e01af 192e01af153545dadb14b01db810054726e152a3 by Sergey Poznyakoff

inline-comment: bugfixes.

Fixes following bugs:

1. In whitespace-must-follow mode a comment sequence immediately followed
by a newline caused the next line to be treated as a comment.  Thus the
following input:

;
text

produced empty output (see test icmt-dec-08).

2. Line number information was not emitted before lines starting
with a comment sequence substring, e.g.:

; comment
;valid line

See test icmt-dec-09.

* libmailutils/filter/inline-comment.c (ilcmt_state)
Withdraw ilcmt_rollback_com state, use ilcmt_rollback instead.
Withdraw ilcmt_newline_ac state, use ILCMT_EMIT_LINE_INFO flag instead.
(ILCMT_EMIT_LINE_INFO): New flag.
(ilcmt_data) <replay>: Remove.
<rollback_buffer, rollback_size>
<rollback_index>: New members.
(init_rollback): New function.
(_ilcmt_decoder): Use ilcmt_rollback state.
<ilcmt_comment_ws>: Do not skip newline following the comment.  Emit
line info before rolling back, if required.
(_ilcmt_encoder): Use ilcmt_rollback state.
* libmailutils/tests/inline-comment.at: Rename tests.
Add tests for newline after a comment (icmt-dec-08) and
line info with whitespace-follow mode (icmt-dec-09).
1 parent 7e1ce572
...@@ -59,29 +59,28 @@ enum ilcmt_state ...@@ -59,29 +59,28 @@ enum ilcmt_state
59 { 59 {
60 ilcmt_initial, 60 ilcmt_initial,
61 ilcmt_newline, 61 ilcmt_newline,
62 ilcmt_newline_ac,
63 ilcmt_copy, 62 ilcmt_copy,
64 ilcmt_comment, 63 ilcmt_comment,
65 ilcmt_partial, 64 ilcmt_partial,
66 ilcmt_comment_ws, 65 ilcmt_comment_ws,
67 ilcmt_ws, 66 ilcmt_ws,
68 ilcmt_rollback, 67 ilcmt_rollback,
69 ilcmt_rollback_ws, 68 ilcmt_rollback_ws
70 ilcmt_rollback_com
71 }; 69 };
72 70
73 #define ILCMT_REMOVE_EMPTY_LINES 0x01 /* Just that :) */ 71 #define ILCMT_REMOVE_EMPTY_LINES 0x001 /* Just that :) */
74 #define ILCMT_SQUEEZE_WS 0x02 /* Replace a series of whitespace 72 #define ILCMT_SQUEEZE_WS 0x002 /* Replace a series of whitespace
75 characters with a single space */ 73 characters with a single space */
76 #define ILCMT_FOLLOW_WS 0x04 /* In decode mode: comment sequence is 74 #define ILCMT_FOLLOW_WS 0x004 /* In decode mode: comment sequence is
77 recognized only if followed by a 75 recognized only if followed by a
78 whitespace character. 76 whitespace character.
79 In encode mode: output a space after 77 In encode mode: output a space after
80 each comment prefix. */ 78 each comment prefix. */
81 #define ILCMT_LINE_INFO 0x08 /* Emit line number information */ 79 #define ILCMT_LINE_INFO 0x008 /* Emit line number information */
82 80
83 #define ILCMT_COMMENT_STATIC 0x0100 81 #define ILCMT_COMMENT_STATIC 0x0100
84 #define ILCMT_LINE_INFO_STATIC 0x0200 82 #define ILCMT_LINE_INFO_STATIC 0x0200
83 #define ILCMT_EMIT_LINE_INFO 0x0400
85 84
86 struct ilcmt_data 85 struct ilcmt_data
87 { 86 {
...@@ -94,15 +93,15 @@ struct ilcmt_data ...@@ -94,15 +93,15 @@ struct ilcmt_data
94 char sbuf[3]; /* Static location for single-character strings */ 93 char sbuf[3]; /* Static location for single-character strings */
95 char *buf; /* Rollback buffer ... */ 94 char *buf; /* Rollback buffer ... */
96 size_t size; /* and its size */ 95 size_t size; /* and its size */
97 size_t level; /* In ilcmt_partial and ilcmt_rollback_com 96 size_t level; /* In ilcmt_partial state: number of bytes matched
98 states: number of bytes matched in the comment 97 in the comment sequence.
99 sequence. 98 In other states: number of characters
100 In ilcmt_initial, ilcmt_newline, and
101 ilcmt_rollback_ws states: number of characters
102 stored in buf. */ 99 stored in buf. */
103 size_t replay; /* Index of the next-to-be-replayed character 100 /* Rollback info: */
104 in buf (if state==ilcmt_rollback_ws) or 101 char *rollback_buffer; /* Actual rollback data */
105 comment (if state==ilcmt_rollback_com) */ 102 size_t rollback_size; /* Size of rollback data */
103 size_t rollback_index; /* Index of the next-to-be-replayed character
104 in rollback_buffer */
106 }; 105 };
107 106
108 #define ILCMT_BUF_INIT 80 107 #define ILCMT_BUF_INIT 80
...@@ -141,6 +140,15 @@ _ilcmt_free (struct ilcmt_data *pd) ...@@ -141,6 +140,15 @@ _ilcmt_free (struct ilcmt_data *pd)
141 free (pd->buf); 140 free (pd->buf);
142 } 141 }
143 142
143 static void
144 init_rollback (struct ilcmt_data *pd, char *buf, size_t size)
145 {
146 pd->state = ilcmt_rollback;
147 pd->rollback_buffer = buf;
148 pd->rollback_size = size;
149 pd->rollback_index = 0;
150 }
151
144 static enum mu_filter_result 152 static enum mu_filter_result
145 _ilcmt_decoder (void *xd, enum mu_filter_command cmd, 153 _ilcmt_decoder (void *xd, enum mu_filter_command cmd,
146 struct mu_filter_io *iobuf) 154 struct mu_filter_io *iobuf)
...@@ -174,21 +182,19 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd, ...@@ -174,21 +182,19 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd,
174 { 182 {
175 case ilcmt_initial: 183 case ilcmt_initial:
176 case ilcmt_newline: 184 case ilcmt_newline:
177 case ilcmt_newline_ac:
178 if (*iptr == *pd->comment) 185 if (*iptr == *pd->comment)
179 { 186 {
180 iptr++; 187 iptr++;
181 pd->level = 1; 188 pd->level = 1;
182 pd->state = ilcmt_partial; 189 pd->state = ilcmt_partial;
183 } 190 }
184 else if (pd->state == ilcmt_newline_ac) 191 else if (pd->flags & ILCMT_EMIT_LINE_INFO)
185 { 192 {
186 mu_asnprintf (&pd->buf, &pd->size, "%s %lu\n", 193 mu_asnprintf (&pd->buf, &pd->size, "%s %lu\n",
187 pd->line_info_starter, 194 pd->line_info_starter,
188 pd->line_number); 195 pd->line_number);
189 pd->level = strlen (pd->buf); 196 init_rollback (pd, pd->buf, strlen (pd->buf));
190 pd->replay = 0; 197 pd->flags &= ~ILCMT_EMIT_LINE_INFO;
191 pd->state = ilcmt_rollback;
192 } 198 }
193 else if (*iptr == '\n') 199 else if (*iptr == '\n')
194 { 200 {
...@@ -243,22 +249,32 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd, ...@@ -243,22 +249,32 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd,
243 else 249 else
244 { 250 {
245 /* Begin restoring */ 251 /* Begin restoring */
246 pd->replay = 0; 252 init_rollback (pd, pd->comment, pd->level);
247 pd->state = ilcmt_rollback_com;
248 } 253 }
249 break; 254 break;
250 255
251 case ilcmt_comment_ws: 256 case ilcmt_comment_ws:
252 if (mu_isspace (*iptr)) 257 if (mu_isspace (*iptr))
253 { 258 {
254 iptr++; 259 if (*iptr != '\n')
260 iptr++;
255 pd->state = ilcmt_comment; 261 pd->state = ilcmt_comment;
256 } 262 }
257 else 263 else
258 { 264 {
259 /* Begin restoring */ 265 /* Begin restoring */
260 pd->replay = 0; 266 if (pd->flags & ILCMT_EMIT_LINE_INFO)
261 pd->state = ilcmt_rollback_com; 267 {
268 mu_asnprintf (&pd->buf, &pd->size, "%s %lu\n%.*s",
269 pd->line_info_starter,
270 pd->line_number,
271 pd->level,
272 pd->comment);
273 init_rollback (pd, pd->buf, strlen (pd->buf));
274 pd->flags &= ~ILCMT_EMIT_LINE_INFO;
275 }
276 else
277 init_rollback (pd, pd->comment, pd->level);
262 } 278 }
263 break; 279 break;
264 280
...@@ -280,7 +296,7 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd, ...@@ -280,7 +296,7 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd,
280 } 296 }
281 else 297 else
282 { 298 {
283 pd->replay = 0; 299 init_rollback (pd, pd->buf, pd->level);
284 pd->state = ilcmt_rollback_ws; 300 pd->state = ilcmt_rollback_ws;
285 } 301 }
286 break; 302 break;
...@@ -298,18 +314,11 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd, ...@@ -298,18 +314,11 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd,
298 { 314 {
299 pd->line_number++; 315 pd->line_number++;
300 if (pd->flags & ILCMT_LINE_INFO) 316 if (pd->flags & ILCMT_LINE_INFO)
301 pd->state = ilcmt_newline_ac; 317 pd->flags |= ILCMT_EMIT_LINE_INFO;
302 else 318 pd->state = ilcmt_newline;
303 pd->state = ilcmt_newline;
304 } 319 }
305 break; 320 break;
306 321
307 case ilcmt_rollback_com:
308 *optr++ = pd->comment[pd->replay++];
309 if (pd->replay == pd->level)
310 pd->state = ilcmt_copy;
311 break;
312
313 case ilcmt_rollback_ws: 322 case ilcmt_rollback_ws:
314 if (pd->flags & ILCMT_SQUEEZE_WS) 323 if (pd->flags & ILCMT_SQUEEZE_WS)
315 { 324 {
...@@ -319,8 +328,8 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd, ...@@ -319,8 +328,8 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd,
319 } 328 }
320 /* fall through */ 329 /* fall through */
321 case ilcmt_rollback: 330 case ilcmt_rollback:
322 *optr++ = pd->buf[pd->replay++]; 331 *optr++ = pd->rollback_buffer[pd->rollback_index++];
323 if (pd->replay == pd->level) 332 if (pd->rollback_index == pd->rollback_size)
324 pd->state = ilcmt_copy; 333 pd->state = ilcmt_copy;
325 } 334 }
326 } 335 }
...@@ -363,11 +372,10 @@ _ilcmt_encoder (void *xd, ...@@ -363,11 +372,10 @@ _ilcmt_encoder (void *xd,
363 { 372 {
364 case ilcmt_initial: 373 case ilcmt_initial:
365 case ilcmt_newline: 374 case ilcmt_newline:
366 pd->replay = 0; 375 init_rollback (pd, pd->comment, pd->length);
367 pd->state = ilcmt_rollback_com; 376 case ilcmt_rollback:
368 case ilcmt_rollback_com: 377 *optr++ = pd->rollback_buffer[pd->rollback_index++];
369 *optr++ = pd->comment[pd->replay++]; 378 if (pd->rollback_index == pd->rollback_size)
370 if (pd->replay == pd->length)
371 pd->state = (pd->flags & ILCMT_FOLLOW_WS) ? 379 pd->state = (pd->flags & ILCMT_FOLLOW_WS) ?
372 ilcmt_ws : ilcmt_copy; 380 ilcmt_ws : ilcmt_copy;
373 break; 381 break;
...@@ -404,7 +412,7 @@ alloc_state (void **pret, int mode MU_ARG_UNUSED, int argc, const char **argv) ...@@ -404,7 +412,7 @@ alloc_state (void **pret, int mode MU_ARG_UNUSED, int argc, const char **argv)
404 412
405 pd->flags = 0; 413 pd->flags = 0;
406 pd->buf = NULL; 414 pd->buf = NULL;
407 pd->size = pd->level = pd->replay = 0; 415 pd->size = pd->level = 0;
408 pd->line_number = 1; 416 pd->line_number = 1;
409 417
410 for (i = 1; i < argc; i++) 418 for (i = 1; i < argc; i++)
......
...@@ -40,7 +40,7 @@ AT_CLEANUP ...@@ -40,7 +40,7 @@ AT_CLEANUP
40 40
41 dnl ------------------------------------------------------------------- 41 dnl -------------------------------------------------------------------
42 42
43 INLINECOM([default], [icmt00], [], 43 INLINECOM([default], [icmt-dec-00], [],
44 [; comment 1 44 [; comment 1
45 text 1 45 text 1
46 ; comment 2 46 ; comment 2
...@@ -55,7 +55,7 @@ text 2 ...@@ -55,7 +55,7 @@ text 2
55 text 3 55 text 3
56 ]) 56 ])
57 57
58 INLINECOM([change comment starter], [icmt02], [-- %], 58 INLINECOM([change comment starter], [icmt-dec-01], [-- %],
59 [% comment 1 59 [% comment 1
60 text 1 60 text 1
61 % comment 2 61 % comment 2
...@@ -70,7 +70,7 @@ text 2 ...@@ -70,7 +70,7 @@ text 2
70 text 3 70 text 3
71 ]) 71 ])
72 72
73 INLINECOM([remove empty lines],[icmt03],[-- -r], 73 INLINECOM([remove empty lines],[icmt-dec-02],[-- -r],
74 [; comment 1 74 [; comment 1
75 text 1 75 text 1
76 ; comment 2 76 ; comment 2
...@@ -90,7 +90,7 @@ text 4 ...@@ -90,7 +90,7 @@ text 4
90 text 5[]dnl 90 text 5[]dnl
91 ]) 91 ])
92 92
93 INLINECOM([remove empty lines/squeeze whitespace],[icmt04],[-- -r -s], 93 INLINECOM([remove empty lines/squeeze whitespace],[icmt-dec-03],[-- -r -s],
94 [; comment 1 94 [; comment 1
95 text 1 95 text 1
96 ; comment 2 96 ; comment 2
...@@ -110,7 +110,7 @@ text 4 ...@@ -110,7 +110,7 @@ text 4
110 text 5[]dnl 110 text 5[]dnl
111 ]) 111 ])
112 112
113 INLINECOM([multichar comment starter],[icmt05],[-- rem], 113 INLINECOM([multichar comment starter],[icmt-dec-04],[-- rem],
114 [Text line 1 114 [Text line 1
115 rem remark text 115 rem remark text
116 Text line 2 116 Text line 2
...@@ -122,7 +122,7 @@ Text line 2 ...@@ -122,7 +122,7 @@ Text line 2
122 Text line 3 122 Text line 3
123 ]) 123 ])
124 124
125 INLINECOM([multichar comment starter; follow ws; follow ws mode],[icmt06], 125 INLINECOM([multichar comment starter; follow ws; follow ws mode],[icmt-dec-05],
126 [-- rem -S], 126 [-- rem -S],
127 [Text line 1 127 [Text line 1
128 rem remark text 128 rem remark text
...@@ -136,7 +136,7 @@ remark text (sic!) ...@@ -136,7 +136,7 @@ remark text (sic!)
136 Text line 3 136 Text line 3
137 ]) 137 ])
138 138
139 INLINECOM([multichar; squeeze; remove empty],[icmt06],[-- rem -S -r], 139 INLINECOM([multichar; squeeze; remove empty],[icmt-dec-06],[-- rem -S -r],
140 [rem comment 1 140 [rem comment 1
141 text 1 141 text 1
142 rem comment 2 142 rem comment 2
...@@ -156,7 +156,7 @@ text 4 ...@@ -156,7 +156,7 @@ text 4
156 text 5[]dnl 156 text 5[]dnl
157 ]) 157 ])
158 158
159 INLINECOM([line info facility],[icmt07 line-info],[-- -i ';line' ';'], 159 INLINECOM([line info facility],[icmt-dec-07 line-info],[-- -i ';line' ';'],
160 [; initial comment 160 [; initial comment
161 this is line 2 161 this is line 2
162 this is line 3 162 this is line 3
...@@ -174,11 +174,43 @@ this is line 3 ...@@ -174,11 +174,43 @@ this is line 3
174 this is line 9 174 this is line 9
175 ]) 175 ])
176 176
177 INLINECOM([line info: newline after comment],[icmt-dec-08 line-info],
178 [-- -i ';line' -S ';'],
179 [;
180 text
181 ],
182 [;line 2
183 text
184 ])
185
186 INLINECOM([line info with follow ws],[icmt-dec-09 line-info follow-ws],
187 [-- ';' -S -i ';'],
188 [; Comment line
189 ;:directive=1
190 first line
191 ; comment
192 second line
193 ;:directive=2
194 ;
195 ;:directive=3
196 last line
197 ],
198 [; 2
199 ;:directive=1
200 first line
201 ; 5
202 second line
203 ;:directive=2
204 ; 8
205 ;:directive=3
206 last line
207 ])
208
177 dnl ------------------------------------------------------------------- 209 dnl -------------------------------------------------------------------
178 210
179 m4_define([FILTER_MODE],[encode]) 211 m4_define([FILTER_MODE],[encode])
180 212
181 INLINECOM([encode],[icmt08],[], 213 INLINECOM([encode],[icmt-enc-00],[],
182 [C'est dans dix ans je m'en irai 214 [C'est dans dix ans je m'en irai
183 J'entends le loup et le renard chanter 215 J'entends le loup et le renard chanter
184 J'entends le loup, le renard et la belette 216 J'entends le loup, le renard et la belette
...@@ -188,7 +220,7 @@ J'entends le loup et le renard chanter], ...@@ -188,7 +220,7 @@ J'entends le loup et le renard chanter],
188 ;J'entends le loup, le renard et la belette 220 ;J'entends le loup, le renard et la belette
189 ;J'entends le loup et le renard chanter]) 221 ;J'entends le loup et le renard chanter])
190 222
191 INLINECOM([encode multichar; add ws],[icmt09],[-- NB: -S], 223 INLINECOM([encode multichar; add ws],[icmt-enc-01],[-- NB: -S],
192 [Tri martolod yaouank 224 [Tri martolod yaouank
193 O voned da voyagi 225 O voned da voyagi
194 Gant'el oant bet kaset 226 Gant'el oant bet kaset
......