inline-comment: bugfixes.
Fixes following bugs: 1. In whitespace-must-follow mode a comment sequence immediately followed by a newline caused the next line to be treated as a comment. Thus the following input: ; text produced empty output (see test icmt-dec-08). 2. Line number information was not emitted before lines starting with a comment sequence substring, e.g.: ; comment ;valid line See test icmt-dec-09. * libmailutils/filter/inline-comment.c (ilcmt_state) Withdraw ilcmt_rollback_com state, use ilcmt_rollback instead. Withdraw ilcmt_newline_ac state, use ILCMT_EMIT_LINE_INFO flag instead. (ILCMT_EMIT_LINE_INFO): New flag. (ilcmt_data) <replay>: Remove. <rollback_buffer, rollback_size> <rollback_index>: New members. (init_rollback): New function. (_ilcmt_decoder): Use ilcmt_rollback state. <ilcmt_comment_ws>: Do not skip newline following the comment. Emit line info before rolling back, if required. (_ilcmt_encoder): Use ilcmt_rollback state. * libmailutils/tests/inline-comment.at: Rename tests. Add tests for newline after a comment (icmt-dec-08) and line info with whitespace-follow mode (icmt-dec-09).
Showing
2 changed files
with
98 additions
and
58 deletions
... | @@ -59,29 +59,28 @@ enum ilcmt_state | ... | @@ -59,29 +59,28 @@ enum ilcmt_state |
59 | { | 59 | { |
60 | ilcmt_initial, | 60 | ilcmt_initial, |
61 | ilcmt_newline, | 61 | ilcmt_newline, |
62 | ilcmt_newline_ac, | ||
63 | ilcmt_copy, | 62 | ilcmt_copy, |
64 | ilcmt_comment, | 63 | ilcmt_comment, |
65 | ilcmt_partial, | 64 | ilcmt_partial, |
66 | ilcmt_comment_ws, | 65 | ilcmt_comment_ws, |
67 | ilcmt_ws, | 66 | ilcmt_ws, |
68 | ilcmt_rollback, | 67 | ilcmt_rollback, |
69 | ilcmt_rollback_ws, | 68 | ilcmt_rollback_ws |
70 | ilcmt_rollback_com | ||
71 | }; | 69 | }; |
72 | 70 | ||
73 | #define ILCMT_REMOVE_EMPTY_LINES 0x01 /* Just that :) */ | 71 | #define ILCMT_REMOVE_EMPTY_LINES 0x001 /* Just that :) */ |
74 | #define ILCMT_SQUEEZE_WS 0x02 /* Replace a series of whitespace | 72 | #define ILCMT_SQUEEZE_WS 0x002 /* Replace a series of whitespace |
75 | characters with a single space */ | 73 | characters with a single space */ |
76 | #define ILCMT_FOLLOW_WS 0x04 /* In decode mode: comment sequence is | 74 | #define ILCMT_FOLLOW_WS 0x004 /* In decode mode: comment sequence is |
77 | recognized only if followed by a | 75 | recognized only if followed by a |
78 | whitespace character. | 76 | whitespace character. |
79 | In encode mode: output a space after | 77 | In encode mode: output a space after |
80 | each comment prefix. */ | 78 | each comment prefix. */ |
81 | #define ILCMT_LINE_INFO 0x08 /* Emit line number information */ | 79 | #define ILCMT_LINE_INFO 0x008 /* Emit line number information */ |
82 | 80 | ||
83 | #define ILCMT_COMMENT_STATIC 0x0100 | 81 | #define ILCMT_COMMENT_STATIC 0x0100 |
84 | #define ILCMT_LINE_INFO_STATIC 0x0200 | 82 | #define ILCMT_LINE_INFO_STATIC 0x0200 |
83 | #define ILCMT_EMIT_LINE_INFO 0x0400 | ||
85 | 84 | ||
86 | struct ilcmt_data | 85 | struct ilcmt_data |
87 | { | 86 | { |
... | @@ -94,15 +93,15 @@ struct ilcmt_data | ... | @@ -94,15 +93,15 @@ struct ilcmt_data |
94 | char sbuf[3]; /* Static location for single-character strings */ | 93 | char sbuf[3]; /* Static location for single-character strings */ |
95 | char *buf; /* Rollback buffer ... */ | 94 | char *buf; /* Rollback buffer ... */ |
96 | size_t size; /* and its size */ | 95 | size_t size; /* and its size */ |
97 | size_t level; /* In ilcmt_partial and ilcmt_rollback_com | 96 | size_t level; /* In ilcmt_partial state: number of bytes matched |
98 | states: number of bytes matched in the comment | 97 | in the comment sequence. |
99 | sequence. | 98 | In other states: number of characters |
100 | In ilcmt_initial, ilcmt_newline, and | ||
101 | ilcmt_rollback_ws states: number of characters | ||
102 | stored in buf. */ | 99 | stored in buf. */ |
103 | size_t replay; /* Index of the next-to-be-replayed character | 100 | /* Rollback info: */ |
104 | in buf (if state==ilcmt_rollback_ws) or | 101 | char *rollback_buffer; /* Actual rollback data */ |
105 | comment (if state==ilcmt_rollback_com) */ | 102 | size_t rollback_size; /* Size of rollback data */ |
103 | size_t rollback_index; /* Index of the next-to-be-replayed character | ||
104 | in rollback_buffer */ | ||
106 | }; | 105 | }; |
107 | 106 | ||
108 | #define ILCMT_BUF_INIT 80 | 107 | #define ILCMT_BUF_INIT 80 |
... | @@ -141,6 +140,15 @@ _ilcmt_free (struct ilcmt_data *pd) | ... | @@ -141,6 +140,15 @@ _ilcmt_free (struct ilcmt_data *pd) |
141 | free (pd->buf); | 140 | free (pd->buf); |
142 | } | 141 | } |
143 | 142 | ||
143 | static void | ||
144 | init_rollback (struct ilcmt_data *pd, char *buf, size_t size) | ||
145 | { | ||
146 | pd->state = ilcmt_rollback; | ||
147 | pd->rollback_buffer = buf; | ||
148 | pd->rollback_size = size; | ||
149 | pd->rollback_index = 0; | ||
150 | } | ||
151 | |||
144 | static enum mu_filter_result | 152 | static enum mu_filter_result |
145 | _ilcmt_decoder (void *xd, enum mu_filter_command cmd, | 153 | _ilcmt_decoder (void *xd, enum mu_filter_command cmd, |
146 | struct mu_filter_io *iobuf) | 154 | struct mu_filter_io *iobuf) |
... | @@ -174,21 +182,19 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd, | ... | @@ -174,21 +182,19 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd, |
174 | { | 182 | { |
175 | case ilcmt_initial: | 183 | case ilcmt_initial: |
176 | case ilcmt_newline: | 184 | case ilcmt_newline: |
177 | case ilcmt_newline_ac: | ||
178 | if (*iptr == *pd->comment) | 185 | if (*iptr == *pd->comment) |
179 | { | 186 | { |
180 | iptr++; | 187 | iptr++; |
181 | pd->level = 1; | 188 | pd->level = 1; |
182 | pd->state = ilcmt_partial; | 189 | pd->state = ilcmt_partial; |
183 | } | 190 | } |
184 | else if (pd->state == ilcmt_newline_ac) | 191 | else if (pd->flags & ILCMT_EMIT_LINE_INFO) |
185 | { | 192 | { |
186 | mu_asnprintf (&pd->buf, &pd->size, "%s %lu\n", | 193 | mu_asnprintf (&pd->buf, &pd->size, "%s %lu\n", |
187 | pd->line_info_starter, | 194 | pd->line_info_starter, |
188 | pd->line_number); | 195 | pd->line_number); |
189 | pd->level = strlen (pd->buf); | 196 | init_rollback (pd, pd->buf, strlen (pd->buf)); |
190 | pd->replay = 0; | 197 | pd->flags &= ~ILCMT_EMIT_LINE_INFO; |
191 | pd->state = ilcmt_rollback; | ||
192 | } | 198 | } |
193 | else if (*iptr == '\n') | 199 | else if (*iptr == '\n') |
194 | { | 200 | { |
... | @@ -243,22 +249,32 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd, | ... | @@ -243,22 +249,32 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd, |
243 | else | 249 | else |
244 | { | 250 | { |
245 | /* Begin restoring */ | 251 | /* Begin restoring */ |
246 | pd->replay = 0; | 252 | init_rollback (pd, pd->comment, pd->level); |
247 | pd->state = ilcmt_rollback_com; | ||
248 | } | 253 | } |
249 | break; | 254 | break; |
250 | 255 | ||
251 | case ilcmt_comment_ws: | 256 | case ilcmt_comment_ws: |
252 | if (mu_isspace (*iptr)) | 257 | if (mu_isspace (*iptr)) |
253 | { | 258 | { |
254 | iptr++; | 259 | if (*iptr != '\n') |
260 | iptr++; | ||
255 | pd->state = ilcmt_comment; | 261 | pd->state = ilcmt_comment; |
256 | } | 262 | } |
257 | else | 263 | else |
258 | { | 264 | { |
259 | /* Begin restoring */ | 265 | /* Begin restoring */ |
260 | pd->replay = 0; | 266 | if (pd->flags & ILCMT_EMIT_LINE_INFO) |
261 | pd->state = ilcmt_rollback_com; | 267 | { |
268 | mu_asnprintf (&pd->buf, &pd->size, "%s %lu\n%.*s", | ||
269 | pd->line_info_starter, | ||
270 | pd->line_number, | ||
271 | pd->level, | ||
272 | pd->comment); | ||
273 | init_rollback (pd, pd->buf, strlen (pd->buf)); | ||
274 | pd->flags &= ~ILCMT_EMIT_LINE_INFO; | ||
275 | } | ||
276 | else | ||
277 | init_rollback (pd, pd->comment, pd->level); | ||
262 | } | 278 | } |
263 | break; | 279 | break; |
264 | 280 | ||
... | @@ -280,7 +296,7 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd, | ... | @@ -280,7 +296,7 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd, |
280 | } | 296 | } |
281 | else | 297 | else |
282 | { | 298 | { |
283 | pd->replay = 0; | 299 | init_rollback (pd, pd->buf, pd->level); |
284 | pd->state = ilcmt_rollback_ws; | 300 | pd->state = ilcmt_rollback_ws; |
285 | } | 301 | } |
286 | break; | 302 | break; |
... | @@ -298,18 +314,11 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd, | ... | @@ -298,18 +314,11 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd, |
298 | { | 314 | { |
299 | pd->line_number++; | 315 | pd->line_number++; |
300 | if (pd->flags & ILCMT_LINE_INFO) | 316 | if (pd->flags & ILCMT_LINE_INFO) |
301 | pd->state = ilcmt_newline_ac; | 317 | pd->flags |= ILCMT_EMIT_LINE_INFO; |
302 | else | 318 | pd->state = ilcmt_newline; |
303 | pd->state = ilcmt_newline; | ||
304 | } | 319 | } |
305 | break; | 320 | break; |
306 | 321 | ||
307 | case ilcmt_rollback_com: | ||
308 | *optr++ = pd->comment[pd->replay++]; | ||
309 | if (pd->replay == pd->level) | ||
310 | pd->state = ilcmt_copy; | ||
311 | break; | ||
312 | |||
313 | case ilcmt_rollback_ws: | 322 | case ilcmt_rollback_ws: |
314 | if (pd->flags & ILCMT_SQUEEZE_WS) | 323 | if (pd->flags & ILCMT_SQUEEZE_WS) |
315 | { | 324 | { |
... | @@ -319,8 +328,8 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd, | ... | @@ -319,8 +328,8 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd, |
319 | } | 328 | } |
320 | /* fall through */ | 329 | /* fall through */ |
321 | case ilcmt_rollback: | 330 | case ilcmt_rollback: |
322 | *optr++ = pd->buf[pd->replay++]; | 331 | *optr++ = pd->rollback_buffer[pd->rollback_index++]; |
323 | if (pd->replay == pd->level) | 332 | if (pd->rollback_index == pd->rollback_size) |
324 | pd->state = ilcmt_copy; | 333 | pd->state = ilcmt_copy; |
325 | } | 334 | } |
326 | } | 335 | } |
... | @@ -363,11 +372,10 @@ _ilcmt_encoder (void *xd, | ... | @@ -363,11 +372,10 @@ _ilcmt_encoder (void *xd, |
363 | { | 372 | { |
364 | case ilcmt_initial: | 373 | case ilcmt_initial: |
365 | case ilcmt_newline: | 374 | case ilcmt_newline: |
366 | pd->replay = 0; | 375 | init_rollback (pd, pd->comment, pd->length); |
367 | pd->state = ilcmt_rollback_com; | 376 | case ilcmt_rollback: |
368 | case ilcmt_rollback_com: | 377 | *optr++ = pd->rollback_buffer[pd->rollback_index++]; |
369 | *optr++ = pd->comment[pd->replay++]; | 378 | if (pd->rollback_index == pd->rollback_size) |
370 | if (pd->replay == pd->length) | ||
371 | pd->state = (pd->flags & ILCMT_FOLLOW_WS) ? | 379 | pd->state = (pd->flags & ILCMT_FOLLOW_WS) ? |
372 | ilcmt_ws : ilcmt_copy; | 380 | ilcmt_ws : ilcmt_copy; |
373 | break; | 381 | break; |
... | @@ -404,7 +412,7 @@ alloc_state (void **pret, int mode MU_ARG_UNUSED, int argc, const char **argv) | ... | @@ -404,7 +412,7 @@ alloc_state (void **pret, int mode MU_ARG_UNUSED, int argc, const char **argv) |
404 | 412 | ||
405 | pd->flags = 0; | 413 | pd->flags = 0; |
406 | pd->buf = NULL; | 414 | pd->buf = NULL; |
407 | pd->size = pd->level = pd->replay = 0; | 415 | pd->size = pd->level = 0; |
408 | pd->line_number = 1; | 416 | pd->line_number = 1; |
409 | 417 | ||
410 | for (i = 1; i < argc; i++) | 418 | for (i = 1; i < argc; i++) | ... | ... |
... | @@ -40,7 +40,7 @@ AT_CLEANUP | ... | @@ -40,7 +40,7 @@ AT_CLEANUP |
40 | 40 | ||
41 | dnl ------------------------------------------------------------------- | 41 | dnl ------------------------------------------------------------------- |
42 | 42 | ||
43 | INLINECOM([default], [icmt00], [], | 43 | INLINECOM([default], [icmt-dec-00], [], |
44 | [; comment 1 | 44 | [; comment 1 |
45 | text 1 | 45 | text 1 |
46 | ; comment 2 | 46 | ; comment 2 |
... | @@ -55,7 +55,7 @@ text 2 | ... | @@ -55,7 +55,7 @@ text 2 |
55 | text 3 | 55 | text 3 |
56 | ]) | 56 | ]) |
57 | 57 | ||
58 | INLINECOM([change comment starter], [icmt02], [-- %], | 58 | INLINECOM([change comment starter], [icmt-dec-01], [-- %], |
59 | [% comment 1 | 59 | [% comment 1 |
60 | text 1 | 60 | text 1 |
61 | % comment 2 | 61 | % comment 2 |
... | @@ -70,7 +70,7 @@ text 2 | ... | @@ -70,7 +70,7 @@ text 2 |
70 | text 3 | 70 | text 3 |
71 | ]) | 71 | ]) |
72 | 72 | ||
73 | INLINECOM([remove empty lines],[icmt03],[-- -r], | 73 | INLINECOM([remove empty lines],[icmt-dec-02],[-- -r], |
74 | [; comment 1 | 74 | [; comment 1 |
75 | text 1 | 75 | text 1 |
76 | ; comment 2 | 76 | ; comment 2 |
... | @@ -90,7 +90,7 @@ text 4 | ... | @@ -90,7 +90,7 @@ text 4 |
90 | text 5[]dnl | 90 | text 5[]dnl |
91 | ]) | 91 | ]) |
92 | 92 | ||
93 | INLINECOM([remove empty lines/squeeze whitespace],[icmt04],[-- -r -s], | 93 | INLINECOM([remove empty lines/squeeze whitespace],[icmt-dec-03],[-- -r -s], |
94 | [; comment 1 | 94 | [; comment 1 |
95 | text 1 | 95 | text 1 |
96 | ; comment 2 | 96 | ; comment 2 |
... | @@ -110,7 +110,7 @@ text 4 | ... | @@ -110,7 +110,7 @@ text 4 |
110 | text 5[]dnl | 110 | text 5[]dnl |
111 | ]) | 111 | ]) |
112 | 112 | ||
113 | INLINECOM([multichar comment starter],[icmt05],[-- rem], | 113 | INLINECOM([multichar comment starter],[icmt-dec-04],[-- rem], |
114 | [Text line 1 | 114 | [Text line 1 |
115 | rem remark text | 115 | rem remark text |
116 | Text line 2 | 116 | Text line 2 |
... | @@ -122,7 +122,7 @@ Text line 2 | ... | @@ -122,7 +122,7 @@ Text line 2 |
122 | Text line 3 | 122 | Text line 3 |
123 | ]) | 123 | ]) |
124 | 124 | ||
125 | INLINECOM([multichar comment starter; follow ws; follow ws mode],[icmt06], | 125 | INLINECOM([multichar comment starter; follow ws; follow ws mode],[icmt-dec-05], |
126 | [-- rem -S], | 126 | [-- rem -S], |
127 | [Text line 1 | 127 | [Text line 1 |
128 | rem remark text | 128 | rem remark text |
... | @@ -136,7 +136,7 @@ remark text (sic!) | ... | @@ -136,7 +136,7 @@ remark text (sic!) |
136 | Text line 3 | 136 | Text line 3 |
137 | ]) | 137 | ]) |
138 | 138 | ||
139 | INLINECOM([multichar; squeeze; remove empty],[icmt06],[-- rem -S -r], | 139 | INLINECOM([multichar; squeeze; remove empty],[icmt-dec-06],[-- rem -S -r], |
140 | [rem comment 1 | 140 | [rem comment 1 |
141 | text 1 | 141 | text 1 |
142 | rem comment 2 | 142 | rem comment 2 |
... | @@ -156,7 +156,7 @@ text 4 | ... | @@ -156,7 +156,7 @@ text 4 |
156 | text 5[]dnl | 156 | text 5[]dnl |
157 | ]) | 157 | ]) |
158 | 158 | ||
159 | INLINECOM([line info facility],[icmt07 line-info],[-- -i ';line' ';'], | 159 | INLINECOM([line info facility],[icmt-dec-07 line-info],[-- -i ';line' ';'], |
160 | [; initial comment | 160 | [; initial comment |
161 | this is line 2 | 161 | this is line 2 |
162 | this is line 3 | 162 | this is line 3 |
... | @@ -174,11 +174,43 @@ this is line 3 | ... | @@ -174,11 +174,43 @@ this is line 3 |
174 | this is line 9 | 174 | this is line 9 |
175 | ]) | 175 | ]) |
176 | 176 | ||
177 | INLINECOM([line info: newline after comment],[icmt-dec-08 line-info], | ||
178 | [-- -i ';line' -S ';'], | ||
179 | [; | ||
180 | text | ||
181 | ], | ||
182 | [;line 2 | ||
183 | text | ||
184 | ]) | ||
185 | |||
186 | INLINECOM([line info with follow ws],[icmt-dec-09 line-info follow-ws], | ||
187 | [-- ';' -S -i ';'], | ||
188 | [; Comment line | ||
189 | ;:directive=1 | ||
190 | first line | ||
191 | ; comment | ||
192 | second line | ||
193 | ;:directive=2 | ||
194 | ; | ||
195 | ;:directive=3 | ||
196 | last line | ||
197 | ], | ||
198 | [; 2 | ||
199 | ;:directive=1 | ||
200 | first line | ||
201 | ; 5 | ||
202 | second line | ||
203 | ;:directive=2 | ||
204 | ; 8 | ||
205 | ;:directive=3 | ||
206 | last line | ||
207 | ]) | ||
208 | |||
177 | dnl ------------------------------------------------------------------- | 209 | dnl ------------------------------------------------------------------- |
178 | 210 | ||
179 | m4_define([FILTER_MODE],[encode]) | 211 | m4_define([FILTER_MODE],[encode]) |
180 | 212 | ||
181 | INLINECOM([encode],[icmt08],[], | 213 | INLINECOM([encode],[icmt-enc-00],[], |
182 | [C'est dans dix ans je m'en irai | 214 | [C'est dans dix ans je m'en irai |
183 | J'entends le loup et le renard chanter | 215 | J'entends le loup et le renard chanter |
184 | J'entends le loup, le renard et la belette | 216 | J'entends le loup, le renard et la belette |
... | @@ -188,7 +220,7 @@ J'entends le loup et le renard chanter], | ... | @@ -188,7 +220,7 @@ J'entends le loup et le renard chanter], |
188 | ;J'entends le loup, le renard et la belette | 220 | ;J'entends le loup, le renard et la belette |
189 | ;J'entends le loup et le renard chanter]) | 221 | ;J'entends le loup et le renard chanter]) |
190 | 222 | ||
191 | INLINECOM([encode multichar; add ws],[icmt09],[-- NB: -S], | 223 | INLINECOM([encode multichar; add ws],[icmt-enc-01],[-- NB: -S], |
192 | [Tri martolod yaouank | 224 | [Tri martolod yaouank |
193 | O voned da voyagi | 225 | O voned da voyagi |
194 | Gant'el oant bet kaset | 226 | Gant'el oant bet kaset | ... | ... |
-
Please register or sign in to post a comment