Commit 192e01af 192e01af153545dadb14b01db810054726e152a3 by Sergey Poznyakoff

inline-comment: bugfixes.

Fixes following bugs:

1. In whitespace-must-follow mode a comment sequence immediately followed
by a newline caused the next line to be treated as a comment.  Thus the
following input:

;
text

produced empty output (see test icmt-dec-08).

2. Line number information was not emitted before lines starting
with a comment sequence substring, e.g.:

; comment
;valid line

See test icmt-dec-09.

* libmailutils/filter/inline-comment.c (ilcmt_state)
Withdraw ilcmt_rollback_com state, use ilcmt_rollback instead.
Withdraw ilcmt_newline_ac state, use ILCMT_EMIT_LINE_INFO flag instead.
(ILCMT_EMIT_LINE_INFO): New flag.
(ilcmt_data) <replay>: Remove.
<rollback_buffer, rollback_size>
<rollback_index>: New members.
(init_rollback): New function.
(_ilcmt_decoder): Use ilcmt_rollback state.
<ilcmt_comment_ws>: Do not skip newline following the comment.  Emit
line info before rolling back, if required.
(_ilcmt_encoder): Use ilcmt_rollback state.
* libmailutils/tests/inline-comment.at: Rename tests.
Add tests for newline after a comment (icmt-dec-08) and
line info with whitespace-follow mode (icmt-dec-09).
1 parent 7e1ce572
......@@ -59,29 +59,28 @@ enum ilcmt_state
{
ilcmt_initial,
ilcmt_newline,
ilcmt_newline_ac,
ilcmt_copy,
ilcmt_comment,
ilcmt_partial,
ilcmt_comment_ws,
ilcmt_ws,
ilcmt_rollback,
ilcmt_rollback_ws,
ilcmt_rollback_com
ilcmt_rollback_ws
};
#define ILCMT_REMOVE_EMPTY_LINES 0x01 /* Just that :) */
#define ILCMT_SQUEEZE_WS 0x02 /* Replace a series of whitespace
characters with a single space */
#define ILCMT_FOLLOW_WS 0x04 /* In decode mode: comment sequence is
recognized only if followed by a
whitespace character.
In encode mode: output a space after
each comment prefix. */
#define ILCMT_LINE_INFO 0x08 /* Emit line number information */
#define ILCMT_REMOVE_EMPTY_LINES 0x001 /* Just that :) */
#define ILCMT_SQUEEZE_WS 0x002 /* Replace a series of whitespace
characters with a single space */
#define ILCMT_FOLLOW_WS 0x004 /* In decode mode: comment sequence is
recognized only if followed by a
whitespace character.
In encode mode: output a space after
each comment prefix. */
#define ILCMT_LINE_INFO 0x008 /* Emit line number information */
#define ILCMT_COMMENT_STATIC 0x0100
#define ILCMT_LINE_INFO_STATIC 0x0200
#define ILCMT_EMIT_LINE_INFO 0x0400
struct ilcmt_data
{
......@@ -94,15 +93,15 @@ struct ilcmt_data
char sbuf[3]; /* Static location for single-character strings */
char *buf; /* Rollback buffer ... */
size_t size; /* and its size */
size_t level; /* In ilcmt_partial and ilcmt_rollback_com
states: number of bytes matched in the comment
sequence.
In ilcmt_initial, ilcmt_newline, and
ilcmt_rollback_ws states: number of characters
size_t level; /* In ilcmt_partial state: number of bytes matched
in the comment sequence.
In other states: number of characters
stored in buf. */
size_t replay; /* Index of the next-to-be-replayed character
in buf (if state==ilcmt_rollback_ws) or
comment (if state==ilcmt_rollback_com) */
/* Rollback info: */
char *rollback_buffer; /* Actual rollback data */
size_t rollback_size; /* Size of rollback data */
size_t rollback_index; /* Index of the next-to-be-replayed character
in rollback_buffer */
};
#define ILCMT_BUF_INIT 80
......@@ -141,6 +140,15 @@ _ilcmt_free (struct ilcmt_data *pd)
free (pd->buf);
}
static void
init_rollback (struct ilcmt_data *pd, char *buf, size_t size)
{
pd->state = ilcmt_rollback;
pd->rollback_buffer = buf;
pd->rollback_size = size;
pd->rollback_index = 0;
}
static enum mu_filter_result
_ilcmt_decoder (void *xd, enum mu_filter_command cmd,
struct mu_filter_io *iobuf)
......@@ -174,21 +182,19 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd,
{
case ilcmt_initial:
case ilcmt_newline:
case ilcmt_newline_ac:
if (*iptr == *pd->comment)
{
iptr++;
pd->level = 1;
pd->state = ilcmt_partial;
}
else if (pd->state == ilcmt_newline_ac)
else if (pd->flags & ILCMT_EMIT_LINE_INFO)
{
mu_asnprintf (&pd->buf, &pd->size, "%s %lu\n",
pd->line_info_starter,
pd->line_number);
pd->level = strlen (pd->buf);
pd->replay = 0;
pd->state = ilcmt_rollback;
init_rollback (pd, pd->buf, strlen (pd->buf));
pd->flags &= ~ILCMT_EMIT_LINE_INFO;
}
else if (*iptr == '\n')
{
......@@ -243,22 +249,32 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd,
else
{
/* Begin restoring */
pd->replay = 0;
pd->state = ilcmt_rollback_com;
init_rollback (pd, pd->comment, pd->level);
}
break;
case ilcmt_comment_ws:
if (mu_isspace (*iptr))
{
iptr++;
if (*iptr != '\n')
iptr++;
pd->state = ilcmt_comment;
}
else
{
/* Begin restoring */
pd->replay = 0;
pd->state = ilcmt_rollback_com;
if (pd->flags & ILCMT_EMIT_LINE_INFO)
{
mu_asnprintf (&pd->buf, &pd->size, "%s %lu\n%.*s",
pd->line_info_starter,
pd->line_number,
pd->level,
pd->comment);
init_rollback (pd, pd->buf, strlen (pd->buf));
pd->flags &= ~ILCMT_EMIT_LINE_INFO;
}
else
init_rollback (pd, pd->comment, pd->level);
}
break;
......@@ -280,7 +296,7 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd,
}
else
{
pd->replay = 0;
init_rollback (pd, pd->buf, pd->level);
pd->state = ilcmt_rollback_ws;
}
break;
......@@ -298,18 +314,11 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd,
{
pd->line_number++;
if (pd->flags & ILCMT_LINE_INFO)
pd->state = ilcmt_newline_ac;
else
pd->state = ilcmt_newline;
pd->flags |= ILCMT_EMIT_LINE_INFO;
pd->state = ilcmt_newline;
}
break;
case ilcmt_rollback_com:
*optr++ = pd->comment[pd->replay++];
if (pd->replay == pd->level)
pd->state = ilcmt_copy;
break;
case ilcmt_rollback_ws:
if (pd->flags & ILCMT_SQUEEZE_WS)
{
......@@ -319,8 +328,8 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd,
}
/* fall through */
case ilcmt_rollback:
*optr++ = pd->buf[pd->replay++];
if (pd->replay == pd->level)
*optr++ = pd->rollback_buffer[pd->rollback_index++];
if (pd->rollback_index == pd->rollback_size)
pd->state = ilcmt_copy;
}
}
......@@ -363,11 +372,10 @@ _ilcmt_encoder (void *xd,
{
case ilcmt_initial:
case ilcmt_newline:
pd->replay = 0;
pd->state = ilcmt_rollback_com;
case ilcmt_rollback_com:
*optr++ = pd->comment[pd->replay++];
if (pd->replay == pd->length)
init_rollback (pd, pd->comment, pd->length);
case ilcmt_rollback:
*optr++ = pd->rollback_buffer[pd->rollback_index++];
if (pd->rollback_index == pd->rollback_size)
pd->state = (pd->flags & ILCMT_FOLLOW_WS) ?
ilcmt_ws : ilcmt_copy;
break;
......@@ -404,7 +412,7 @@ alloc_state (void **pret, int mode MU_ARG_UNUSED, int argc, const char **argv)
pd->flags = 0;
pd->buf = NULL;
pd->size = pd->level = pd->replay = 0;
pd->size = pd->level = 0;
pd->line_number = 1;
for (i = 1; i < argc; i++)
......
......@@ -40,7 +40,7 @@ AT_CLEANUP
dnl -------------------------------------------------------------------
INLINECOM([default], [icmt00], [],
INLINECOM([default], [icmt-dec-00], [],
[; comment 1
text 1
; comment 2
......@@ -55,7 +55,7 @@ text 2
text 3
])
INLINECOM([change comment starter], [icmt02], [-- %],
INLINECOM([change comment starter], [icmt-dec-01], [-- %],
[% comment 1
text 1
% comment 2
......@@ -70,7 +70,7 @@ text 2
text 3
])
INLINECOM([remove empty lines],[icmt03],[-- -r],
INLINECOM([remove empty lines],[icmt-dec-02],[-- -r],
[; comment 1
text 1
; comment 2
......@@ -90,7 +90,7 @@ text 4
text 5[]dnl
])
INLINECOM([remove empty lines/squeeze whitespace],[icmt04],[-- -r -s],
INLINECOM([remove empty lines/squeeze whitespace],[icmt-dec-03],[-- -r -s],
[; comment 1
text 1
; comment 2
......@@ -110,7 +110,7 @@ text 4
text 5[]dnl
])
INLINECOM([multichar comment starter],[icmt05],[-- rem],
INLINECOM([multichar comment starter],[icmt-dec-04],[-- rem],
[Text line 1
rem remark text
Text line 2
......@@ -122,7 +122,7 @@ Text line 2
Text line 3
])
INLINECOM([multichar comment starter; follow ws; follow ws mode],[icmt06],
INLINECOM([multichar comment starter; follow ws; follow ws mode],[icmt-dec-05],
[-- rem -S],
[Text line 1
rem remark text
......@@ -136,7 +136,7 @@ remark text (sic!)
Text line 3
])
INLINECOM([multichar; squeeze; remove empty],[icmt06],[-- rem -S -r],
INLINECOM([multichar; squeeze; remove empty],[icmt-dec-06],[-- rem -S -r],
[rem comment 1
text 1
rem comment 2
......@@ -156,7 +156,7 @@ text 4
text 5[]dnl
])
INLINECOM([line info facility],[icmt07 line-info],[-- -i ';line' ';'],
INLINECOM([line info facility],[icmt-dec-07 line-info],[-- -i ';line' ';'],
[; initial comment
this is line 2
this is line 3
......@@ -174,11 +174,43 @@ this is line 3
this is line 9
])
INLINECOM([line info: newline after comment],[icmt-dec-08 line-info],
[-- -i ';line' -S ';'],
[;
text
],
[;line 2
text
])
INLINECOM([line info with follow ws],[icmt-dec-09 line-info follow-ws],
[-- ';' -S -i ';'],
[; Comment line
;:directive=1
first line
; comment
second line
;:directive=2
;
;:directive=3
last line
],
[; 2
;:directive=1
first line
; 5
second line
;:directive=2
; 8
;:directive=3
last line
])
dnl -------------------------------------------------------------------
m4_define([FILTER_MODE],[encode])
INLINECOM([encode],[icmt08],[],
INLINECOM([encode],[icmt-enc-00],[],
[C'est dans dix ans je m'en irai
J'entends le loup et le renard chanter
J'entends le loup, le renard et la belette
......@@ -188,7 +220,7 @@ J'entends le loup et le renard chanter],
;J'entends le loup, le renard et la belette
;J'entends le loup et le renard chanter])
INLINECOM([encode multichar; add ws],[icmt09],[-- NB: -S],
INLINECOM([encode multichar; add ws],[icmt-enc-01],[-- NB: -S],
[Tri martolod yaouank
O voned da voyagi
Gant'el oant bet kaset
......