Commit 04474c29 04474c2918cc831e953dc37ae1d90cac170cd5b0 by Sergey Poznyakoff

Improve the inline-comment filter.

* libmailutils/filter/inline-comment.c: Recognize multi-char
comment sequences.
Implement encode mode.
* libmailutils/tests/inline-comment.at: Test the new modes.

* mail/escape.c (quote0): Re-implement using inline-comment
encode mode.
* mail/mailvar.c (mailvar_tab): Fix a typo.
1 parent 88009983
......@@ -14,19 +14,31 @@
You should have received a copy of the GNU General Public License
along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>. */
/* This filter removes all inline comments from the input.
An inline comment begins with a comment starter character at the beginning
of a line and ends with the newline character.
/* In decode mode, this filter removes all inline comments from the input.
An inline comment begins with a comment character sequence at the beginning
of a line and ends with the next newline character.
Default comment starter is semicolon. Another comment starter can
Default comment sequence is semicolon. Another sequence can
be given as the first argument to the filter creation routine.
Limitations:
1. Comment starter must be a single character. This should be
fixed in future.
2. Comment starter must appear at the beginning of a line. This is
hard (if not downright impossible) to fix, because the filter
has no information about lexical structure of the input stream.
The following options modify this behavior:
-r Remove empty lines, i.e. the ones that contain only whitespace
characters.
-s Squeeze whitespace. Each sequence of two or more whitespace
characters encountered on input is replaced by a single space
character on output.
-S A "following whitespace mode". A comment sequence is recognized
only if followed by a whitespace character. The character itself
is retained on output.
In encode mode, this filter adds a comment sequence at the beginning
of each line.
The following options apply to this mode:
-S A "following whitespace mode". A single space character is
output after each inserted comment sequence.
*/
#ifdef HAVE_CONFIG_H
......@@ -44,31 +56,41 @@ enum ilcmt_state
ilcmt_newline,
ilcmt_copy,
ilcmt_comment,
ilcmt_partial,
ilcmt_comment_ws,
ilcmt_ws,
ilcmt_rollback
ilcmt_rollback_ws,
ilcmt_rollback_com
};
#define ILCMT_REMOVE_EMPTY_LINES 0x01
#define ILCMT_SQUEEZE_WS 0x02
#define ILCMT_REMOVE_EMPTY_LINES 0x01 /* Just that :) */
#define ILCMT_SQUEEZE_WS 0x02 /* Replace a series of whitespace
characters with a single space */
#define ILCMT_FOLLOW_WS 0x04 /* In decode mode: comment sequence is
recognized only if followed by a
whitespace character.
In encode mode: output a space after
each comment prefix. */
struct ilcmt_data
{
enum ilcmt_state state;
int cstart;
int flags;
char *buf;
size_t size;
size_t level;
size_t replay;
enum ilcmt_state state; /* Current state */
char *comment; /* Comment sequence ... */
size_t length; /* and its length */
int flags; /* ILCMT_ flags */
char *buf; /* Rollback buffer ... */
size_t size; /* and its size */
size_t level; /* In ilcmt_partial and ilcmt_rollback_com
states: number of bytes matched in the comment
sequence.
In ilcmt_initial, ilcmt_newline, and
ilcmt_rollback_ws states: number of characters
stored in buf. */
size_t replay; /* Index of the next-to-be-replayed character
in buf (if state==ilcmt_rollback_ws) or
comment (if state==ilcmt_rollback_com) */
};
enum ilcmt_action
{
action_echo,
action_noecho,
action_error
};
#define ILCMT_BUF_INIT 80
#define ILCMT_BUF_INCR 16
......@@ -94,85 +116,182 @@ ilcmt_save (struct ilcmt_data *pd, int c)
return 0;
}
static enum ilcmt_action
new_ilcmt_state (struct ilcmt_data *pd, int c)
static enum mu_filter_result
_ilcmt_decoder (void *xd, enum mu_filter_command cmd,
struct mu_filter_io *iobuf)
{
switch (pd->state)
struct ilcmt_data *pd = xd;
const unsigned char *iptr, *iend;
char *optr, *oend;
switch (cmd)
{
case ilcmt_initial:
case ilcmt_newline:
if (c == pd->cstart)
{
pd->state = ilcmt_comment;
return action_noecho;
}
else if (c == '\n')
{
if (pd->flags & ILCMT_REMOVE_EMPTY_LINES)
return action_noecho;
}
else if (mu_isspace (c))
case mu_filter_init:
pd->state = ilcmt_initial;
return mu_filter_ok;
case mu_filter_done:
free (pd->comment);
free (pd->buf);
return mu_filter_ok;
default:
break;
}
iptr = (const unsigned char *) iobuf->input;
iend = iptr + iobuf->isize;
optr = iobuf->output;
oend = optr + iobuf->osize;
while (iptr < iend && optr < oend)
{
switch (pd->state)
{
if (pd->flags & ILCMT_REMOVE_EMPTY_LINES)
case ilcmt_initial:
case ilcmt_newline:
if (*iptr == *pd->comment)
{
pd->state = ilcmt_ws;
pd->level = 0;
if (!(pd->flags & ILCMT_SQUEEZE_WS))
iptr++;
pd->level = 1;
pd->state = ilcmt_partial;
}
else if (*iptr == '\n')
{
if (pd->flags & ILCMT_REMOVE_EMPTY_LINES)
{
if (ilcmt_save (pd, c))
return action_error;
iptr++;
continue;
}
return action_noecho;
else
*optr++ = *iptr++;
}
}
pd->state = ilcmt_copy;
break;
else if (mu_isspace (*iptr))
{
if (pd->flags & ILCMT_REMOVE_EMPTY_LINES)
{
pd->state = ilcmt_ws;
pd->level = 0;
if (!(pd->flags & ILCMT_SQUEEZE_WS))
{
if (ilcmt_save (pd, *iptr))
return mu_filter_failure;
}
iptr++;
}
else
{
*optr++ = *iptr++;
pd->state = ilcmt_copy;
}
}
else
{
*optr++ = *iptr++;
pd->state = ilcmt_copy;
}
break;
case ilcmt_partial:
if (pd->level == pd->length)
{
if (pd->flags & ILCMT_FOLLOW_WS)
pd->state = ilcmt_comment_ws;
else
pd->state = ilcmt_comment;
}
else if (*iptr == pd->comment[pd->level])
{
iptr++;
pd->level++;
}
else
{
/* Begin restoring */
pd->replay = 0;
pd->state = ilcmt_rollback_com;
}
break;
case ilcmt_ws:
if (c == '\n')
pd->state = ilcmt_newline;
else if (mu_isspace (c))
{
if (!(pd->flags & ILCMT_SQUEEZE_WS))
case ilcmt_comment_ws:
if (mu_isspace (*iptr))
{
if (ilcmt_save (pd, c))
return action_error;
iptr++;
pd->state = ilcmt_comment;
}
}
else
{
pd->replay = 0;
pd->state = ilcmt_rollback;
}
return action_noecho;
else
{
/* Begin restoring */
pd->replay = 0;
pd->state = ilcmt_rollback_com;
}
break;
case ilcmt_ws:
if (*iptr == '\n')
{
iptr++;
pd->state = ilcmt_newline;
}
else if (mu_isspace (*iptr))
{
if (!(pd->flags & ILCMT_SQUEEZE_WS))
{
if (ilcmt_save (pd, *iptr))
return mu_filter_failure;
}
iptr++;
}
else
{
pd->replay = 0;
pd->state = ilcmt_rollback_ws;
}
break;
case ilcmt_copy:
if (c == '\n')
pd->state = ilcmt_newline;
break;
case ilcmt_comment:
if (c == '\n')
pd->state = ilcmt_newline;
return action_noecho;
default:
/* should not happen */
break;
case ilcmt_copy:
if ((*optr++ = *iptr++) == '\n')
pd->state = ilcmt_newline;
break;
case ilcmt_comment:
if (*iptr++ == '\n')
pd->state = ilcmt_newline;
break;
case ilcmt_rollback_com:
*optr++ = pd->comment[pd->replay++];
if (pd->replay == pd->level)
pd->state = ilcmt_copy;
break;
case ilcmt_rollback_ws:
if (pd->flags & ILCMT_SQUEEZE_WS)
{
*optr++ = ' ';
pd->state = ilcmt_copy;
}
else
{
*optr++ = pd->buf[pd->replay++];
if (pd->replay == pd->level)
pd->state = ilcmt_copy;
}
}
}
return action_echo;
iobuf->isize = iptr - (const unsigned char *) iobuf->input;
iobuf->osize = optr - iobuf->output;
return mu_filter_ok;
}
static enum mu_filter_result
_ilcmt_decoder (void *xd, enum mu_filter_command cmd,
_ilcmt_encoder (void *xd,
enum mu_filter_command cmd,
struct mu_filter_io *iobuf)
{
struct ilcmt_data *pd = xd;
size_t i, j;
const unsigned char *iptr;
size_t isize;
char *optr;
size_t osize;
const unsigned char *iptr, *iend;
char *optr, *oend;
switch (cmd)
{
......@@ -181,6 +300,8 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd,
return mu_filter_ok;
case mu_filter_done:
free (pd->comment);
free (pd->buf);
return mu_filter_ok;
default:
......@@ -188,80 +309,62 @@ _ilcmt_decoder (void *xd, enum mu_filter_command cmd,
}
iptr = (const unsigned char *) iobuf->input;
isize = iobuf->isize;
iend = iptr + iobuf->isize;
optr = iobuf->output;
osize = iobuf->osize;
oend = optr + iobuf->osize;
i = j = 0;
if (pd->state == ilcmt_rollback)
while (iptr < iend && optr < oend)
{
if (pd->flags & ILCMT_SQUEEZE_WS)
switch (pd->state)
{
if (j == osize)
{
iobuf->osize = 1;
return mu_filter_moreoutput;
}
optr[j++] = ' ';
case ilcmt_initial:
case ilcmt_newline:
pd->replay = 0;
pd->state = ilcmt_rollback_com;
case ilcmt_rollback_com:
*optr++ = pd->comment[pd->replay++];
if (pd->replay == pd->length)
pd->state = (pd->flags & ILCMT_FOLLOW_WS) ?
ilcmt_ws : ilcmt_copy;
break;
case ilcmt_ws:
*optr++ = ' ';
pd->state = ilcmt_copy;
case ilcmt_copy:
if ((*optr++ = *iptr++) == '\n')
pd->state = ilcmt_newline;
break;
default:
/* The rest of states is not used, so: */
abort ();
}
else
while (j < osize)
{
if (pd->replay == pd->level)
{
pd->state = ilcmt_copy;
break;
}
optr[j++] = pd->buf[pd->replay++];
}
if (pd->state == ilcmt_copy)
{
/* Clear the buffer state. */
pd->level = pd->replay = 0;
}
}
for (; i < isize && j < osize; i++)
{
unsigned char c = *iptr++;
enum ilcmt_action action = new_ilcmt_state (pd, c);
if (action == action_echo)
optr[j++] = c;
else if (action == action_noecho)
{
if (pd->state == ilcmt_rollback)
break;
}
else
return mu_filter_failure;
}
iobuf->isize = i;
iobuf->osize = j;
iobuf->isize = iptr - (const unsigned char *) iobuf->input;
iobuf->osize = optr - iobuf->output;
return mu_filter_ok;
}
static int
alloc_state (void **pret, int mode MU_ARG_UNUSED, int argc, const char **argv)
{
struct ilcmt_data *pd = malloc (sizeof (*pd));
int i;
const char *comment = ";";
if (!pd)
return ENOMEM;
pd->cstart = ';';
pd->flags = 0;
pd->buf = NULL;
pd->size = pd->level = pd->replay = 0;
for (i = 1; i < argc; i++)
{
if (argv[i][1] == 0)
pd->cstart = argv[i][0];
else if (argv[i][0] == '-')
if (argv[i][0] == '-')
{
switch (argv[i][1])
{
......@@ -273,13 +376,26 @@ alloc_state (void **pret, int mode MU_ARG_UNUSED, int argc, const char **argv)
pd->flags |= ILCMT_SQUEEZE_WS;
break;
case 'S':
pd->flags |= ILCMT_FOLLOW_WS;
break;
default:
free (pd);
return MU_ERR_PARSE;
}
}
else
comment = argv[i];
}
pd->comment = strdup (comment);
if (!pd->comment)
{
free (pd);
return ENOMEM;
}
pd->length = strlen (comment);
*pret = pd;
return 0;
......@@ -289,7 +405,7 @@ static struct _mu_filter_record _inline_comment_filter = {
"INLINE-COMMENT",
0,
alloc_state,
NULL,
_ilcmt_encoder,
_ilcmt_decoder
};
......
......@@ -14,12 +14,14 @@
# You should have received a copy of the GNU General Public License
# along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>.
m4_pushdef([FILTER_MODE],[decode])
dnl -------------------------------------------------------------------
dnl INLINECOM(NAME, [KW = `'], [OPTS = `'], [TEXT], [OUTPUT = `'])
dnl -------------------------------------------------------------------
m4_pushdef([INLINECOM],[
AT_SETUP([inline-comment: $1])
AT_KEYWORDS([filter inline-comment icmt $2])
AT_KEYWORDS([filter FILTER_MODE inline-comment icmt $2])
sed 's/\$.*//' > input <<EOT
$4
EOT
......@@ -27,10 +29,10 @@ sed 's/\$.*//' > expout <<EOT
$5
EOT
AT_CHECK([fltst inline-comment decode read $3 < input],
AT_CHECK([fltst inline-comment FILTER_MODE read $3 < input],
[0],
[expout])
AT_CHECK([fltst inline-comment decode write $3 < input],
AT_CHECK([fltst inline-comment FILTER_MODE write $3 < input],
[0],
[expout])
AT_CLEANUP
......@@ -108,6 +110,74 @@ text 4
text 5[]dnl
])
INLINECOM([multichar comment starter],[icmt05],[-- rem],
[Text line 1
rem remark text
Text line 2
remark text (sic!)
Text line 3
],
[Text line 1
Text line 2
Text line 3
])
INLINECOM([multichar comment starter; follow ws; follow ws mode],[icmt06],
[-- rem -S],
[Text line 1
rem remark text
Text line 2
remark text (sic!)
Text line 3
],
[Text line 1
Text line 2
remark text (sic!)
Text line 3
])
INLINECOM([multichar; squeeze; remove empty],[icmt06],[-- rem -S -r],
[rem comment 1
text 1
rem comment 2
rem comment 3
text 2
text 3
$
text 4
text 5
],
[text 1
text 2
text 3
text 4
text 5[]dnl
])
m4_define([FILTER_MODE],[encode])
INLINECOM([encode],[icmt07],[],
[C'est dans dix ans je m'en irai
J'entends le loup et le renard chanter
J'entends le loup, le renard et la belette
J'entends le loup et le renard chanter],
[;C'est dans dix ans je m'en irai
;J'entends le loup et le renard chanter
;J'entends le loup, le renard et la belette
;J'entends le loup et le renard chanter])
INLINECOM([encode multichar; add ws],[icmt07],[-- NB: -S],
[Tri martolod yaouank
O voned da voyagi
Gant'el oant bet kaset
Betek an douar nevez],
[NB: Tri martolod yaouank
NB: O voned da voyagi
NB: Gant'el oant bet kaset
NB: Betek an douar nevez])
m4_popdef([INLINECOM])
m4_popdef([FILTER_MODE])
# End of inline-comment.at
......
......@@ -447,12 +447,35 @@ quote0 (msgset_t *mspec, mu_message_t mesg, void *data)
size_t size = 0;
size_t n = 0;
char *prefix = "\t";
mu_stream_t outstr, flt;
char *argv[3];
fprintf (stdout, _("Interpolating: %lu\n"),
(unsigned long) mspec->msg_part[0]);
mailvar_get (&prefix, "indentprefix", mailvar_type_string, 0);
fflush (ofile);
rc = mu_stdio_stream_create (&outstr, fileno (ofile), MU_STREAM_WRITE);
if (rc)
{
mu_diag_funcall (MU_DIAG_ERROR, "mu_stdio_stream_create", NULL, rc);
return rc;
}
argv[0] = "INLINE-COMMENT";
argv[1] = prefix;
argv[2] = NULL;
rc = mu_filter_create_args (&flt, outstr, "INLINE-COMMENT",
2, argv,
MU_FILTER_ENCODE,
MU_STREAM_WRITE);
mu_stream_unref (outstr);
if (rc)
{
mu_diag_funcall (MU_DIAG_ERROR, "mu_filter_create_args", NULL, rc);
return rc;
}
if (*(int*)data)
{
size_t i, num = 0;
......@@ -468,20 +491,15 @@ quote0 (msgset_t *mspec, mu_message_t mesg, void *data)
{
const char *value;
fprintf (ofile, "%s%s: ", prefix, sptr);
mu_stream_printf (flt, "%s: ", sptr);
if (mu_header_sget_value (hdr, sptr, &value) == 0)
{
for (; *value; value++)
{
fputc (*value, ofile);
if (*value == '\n')
fprintf (ofile, "%s", prefix);
}
fputc ('\n', ofile);
mu_stream_write (flt, value, strlen (value), NULL);
mu_stream_write (flt, "\n", 1, NULL);
}
}
}
fprintf (ofile, "%s\n", prefix);
mu_stream_write (flt, "\n", 1, NULL);
mu_message_get_body (mesg, &body);
rc = mu_body_get_streamref (body, &stream);
}
......@@ -494,11 +512,11 @@ quote0 (msgset_t *mspec, mu_message_t mesg, void *data)
return rc;
}
/* FIXME: Use mu_stream_copy? */
while (mu_stream_getline (stream, &buffer, &size, &n) == 0 && n != 0)
fprintf (ofile, "%s%s", prefix, buffer);
free (buffer);
mu_stream_copy (flt, stream, 0, NULL);
mu_stream_destroy (&stream);
mu_stream_destroy (&flt);
return 0;
}
......
......@@ -217,7 +217,7 @@ struct mailvar_symbol mailvar_tab[] =
set_replyregex },
{ { "save", },
MAILVAR_TYPEMASK (mailvar_type_boolean),
N_("stored aborted messages in the user's dead.file") },
N_("store aborted messages in the user's dead.file") },
{ { "screen", },
MAILVAR_TYPEMASK (mailvar_type_number),
N_("number of lines on terminal screen"),
......