Commit 1e6dc073 1e6dc073c8b89d740c7eeba747965dfb76be5f2a by Sergey Poznyakoff

Implement RFC 5228, 2.4.2.4 (Encoding Characters Using "encoded-character")

* libmu_sieve/encoded.c: New file.
* libmu_sieve/strexp.c: New file.
* libmu_sieve/Makefile.am: Add new files.
* libmu_sieve/require.c (mu_sieve_require): Understand "encoded-character".
* libmu_sieve/sieve-priv.h (mu_i_sv_interp_t): New typedef.
(mu_i_sv_expand_encoded_char): New proto.
(mu_sieve_require_encoded_character): New proto.
* libmu_sieve/sieve.l (string): Rewrite via line_.* functions.  This
fixes memory leaks on mu_sieve_machine_destroy.
(line_finish): Expand ${} sequences before returning.
(mu_sieve_require_encoded_character): New function.
* sieve/tests/enc-char.at: New file.
* sieve/tests/Makefile.am: Add enc-char.at
* sieve/tests/testsuite.at: Likewise.
1 parent 05df35d0
......@@ -123,10 +123,10 @@ void mu_sieve_debug_init (void);
/* Memory allocation functions */
void *mu_sieve_alloc (size_t size);
void *mu_sieve_palloc (mu_list_t * pool, size_t size);
void *mu_sieve_prealloc (mu_list_t * pool, void *ptr, size_t size);
void mu_sieve_pfree (mu_list_t * pool, void *ptr);
char *mu_sieve_pstrdup (mu_list_t * pool, const char *str);
void *mu_sieve_palloc (mu_list_t *pool, size_t size);
void *mu_sieve_prealloc (mu_list_t *pool, void *ptr, size_t size);
void mu_sieve_pfree (mu_list_t *pool, void *ptr);
char *mu_sieve_pstrdup (mu_list_t *pool, const char *str);
void *mu_sieve_malloc (mu_sieve_machine_t mach, size_t size);
char *mu_sieve_mstrdup (mu_sieve_machine_t mach, const char *str);
......@@ -134,7 +134,7 @@ void *mu_sieve_mrealloc (mu_sieve_machine_t mach, void *ptr, size_t size);
void mu_sieve_mfree (mu_sieve_machine_t mach, void *ptr);
mu_sieve_value_t *mu_sieve_value_create (mu_sieve_data_type type, void *data);
void mu_sieve_slist_destroy (mu_list_t * plist);
void mu_sieve_slist_destroy (mu_list_t *plist);
/* Symbol space functions */
mu_sieve_register_t *mu_sieve_test_lookup (mu_sieve_machine_t mach,
......@@ -148,8 +148,8 @@ int mu_sieve_register_test_ext (mu_sieve_machine_t mach,
mu_sieve_tag_group_t *tags, int required);
int mu_sieve_register_test (mu_sieve_machine_t mach,
const char *name, mu_sieve_handler_t handler,
mu_sieve_data_type * arg_types,
mu_sieve_tag_group_t * tags, int required);
mu_sieve_data_type *arg_types,
mu_sieve_tag_group_t *tags, int required);
int mu_sieve_register_action_ext (mu_sieve_machine_t mach,
const char *name, mu_sieve_handler_t handler,
......@@ -158,8 +158,8 @@ int mu_sieve_register_action_ext (mu_sieve_machine_t mach,
mu_sieve_tag_group_t *tags, int required);
int mu_sieve_register_action (mu_sieve_machine_t mach,
const char *name, mu_sieve_handler_t handler,
mu_sieve_data_type * arg_types,
mu_sieve_tag_group_t * tags, int required);
mu_sieve_data_type *arg_types,
mu_sieve_tag_group_t *tags, int required);
int mu_sieve_register_comparator (mu_sieve_machine_t mach, const char *name,
int required, mu_sieve_comparator_t is,
mu_sieve_comparator_t contains,
......@@ -177,8 +177,8 @@ mu_sieve_comparator_t mu_sieve_comparator_lookup (mu_sieve_machine_t mach,
mu_sieve_comparator_t mu_sieve_get_comparator (mu_sieve_machine_t mach,
mu_list_t tags);
int mu_sieve_str_to_relcmp (const char *str, mu_sieve_relcmp_t * test,
mu_sieve_relcmpn_t * stest);
int mu_sieve_str_to_relcmp (const char *str, mu_sieve_relcmp_t *test,
mu_sieve_relcmpn_t *stest);
mu_sieve_relcmp_t mu_sieve_get_relcmp (mu_sieve_machine_t mach,
mu_list_t tags);
......@@ -195,12 +195,12 @@ int mu_sieve_match_part_checker (mu_sieve_machine_t mach,
mu_list_t args);
/* Operations in value lists */
mu_sieve_value_t *mu_sieve_value_get (mu_list_t vlist, size_t index);
int mu_sieve_vlist_do (mu_sieve_value_t * val, mu_list_action_t ac,
int mu_sieve_vlist_do (mu_sieve_value_t *val, mu_list_action_t ac,
void *data);
int mu_sieve_vlist_compare (mu_sieve_value_t * a, mu_sieve_value_t * b,
int mu_sieve_vlist_compare (mu_sieve_value_t *a, mu_sieve_value_t *b,
mu_sieve_comparator_t comp,
mu_sieve_relcmp_t test, mu_sieve_retrieve_t ac,
void *data, size_t * count);
void *data, size_t *count);
/* Functions to create and destroy sieve machine */
int mu_sieve_machine_init (mu_sieve_machine_t *mach);
......@@ -208,7 +208,7 @@ int mu_sieve_machine_dup (mu_sieve_machine_t const in,
mu_sieve_machine_t *out);
int mu_sieve_machine_inherit (mu_sieve_machine_t const in,
mu_sieve_machine_t *out);
void mu_sieve_machine_destroy (mu_sieve_machine_t * pmach);
void mu_sieve_machine_destroy (mu_sieve_machine_t *pmach);
int mu_sieve_machine_add_destructor (mu_sieve_machine_t mach,
mu_sieve_destructor_t destr, void *ptr);
......
......@@ -30,6 +30,7 @@ libmu_sieve_la_SOURCES = \
actions.c\
conf.c\
comparator.c\
encoded.c\
load.c\
prog.c\
register.c\
......@@ -39,6 +40,7 @@ libmu_sieve_la_SOURCES = \
sieve-gram.c\
sieve-gram.h\
sieve-lex.c\
strexp.c\
tests.c\
util.c
libmu_sieve_la_LIBADD = ${MU_LIB_MAILUTILS} @LTDL_LIB@
......
/* GNU Mailutils -- a suite of utilities for electronic mail
Copyright (C) 2016 Free Software Foundation, Inc.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General
Public License along with this library. If not, see
<http://www.gnu.org/licenses/>. */
/* The enchoded-character extension for Sieve (RFC 5228, 2.4.2.4) */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <sieve-priv.h>
#include <mailutils/cctype.h>
typedef int (*convfun) (char const *str, size_t len, size_t *ncons, mu_opool_t pool);
static int hexconv (char const *str, size_t len, size_t *ncons, mu_opool_t pool);
static int uniconv (char const *str, size_t len, size_t *ncons, mu_opool_t pool);
struct convertor
{
char const *pfx;
size_t len;
convfun fun;
};
static struct convertor conv[] = {
{ "hex", 3, hexconv },
{ "unicode", 7, uniconv },
{ NULL }
};
static convfun
findconv (char const **pstr, size_t *plen)
{
struct convertor *cp;
char const *str = *pstr;
size_t len = *plen;
for (cp = conv; cp->pfx; cp++)
{
if (len > cp->len && strncasecmp (str, cp->pfx, cp->len) == 0 &&
str[cp->len] == ':')
{
*pstr += cp->len + 1;
*plen -= cp->len + 1;
return cp->fun;
}
}
return NULL;
}
int
mu_i_sv_expand_encoded_char (char const *input, size_t len,
char **exp, void *data)
{
int rc;
convfun fn;
mu_opool_t pool;
fn = findconv (&input, &len);
if (!fn)
return MU_ERR_NOENT;
rc = mu_opool_create (&pool, MU_OPOOL_DEFAULT);
if (rc)
return rc;
while (rc == 0 && len > 0)
{
if (mu_isblank (*input))
{
++input;
--len;
}
else if (mu_isxdigit (*input))
{
size_t n;
rc = fn (input, len, &n, pool);
if (rc)
break;
input += n;
len -= n;
}
else
{
rc = EILSEQ;
break;
}
}
if (rc == 0)
{
size_t len;
char *p = mu_opool_finish (pool, &len);
char *res;
res = malloc (len + 1);
if (!res)
rc = errno;
else
{
memcpy (res, p, len);
res[len] = 0;
*exp = res;
}
}
mu_opool_destroy (&pool);
return rc;
}
static int
hexconv (char const *str, size_t len, size_t *ncons, mu_opool_t pool)
{
char c;
if (len < 2)
return EILSEQ;
else
{
c = mu_hex2ul (*str);
++str;
if (!mu_isxdigit (*str))
return EILSEQ;
c = (c << 4) + mu_hex2ul (*str);
mu_opool_append_char (pool, c);
}
*ncons = 2;
return 0;
}
static int
utf8_wctomb (unsigned int wc, mu_opool_t pool)
{
int count;
char r[6];
/* FIXME: This implementation allows for full UTF-8 range. RFC 5228
states on page 10, that "It is an error for a script to use a hexadecimal
value that isn't in either the range 0 to D7FF or the range E000 to
10FFFF". I'm not sure that this limitation should be honored */
if (wc < 0x80)
count = 1;
else if (wc < 0x800)
count = 2;
else if (wc < 0x10000)
count = 3;
else if (wc < 0x200000)
count = 4;
else if (wc < 0x4000000)
count = 5;
else if (wc <= 0x7fffffff)
count = 6;
else
return EILSEQ;
switch (count)
{
/* Note: code falls through cases! */
case 6:
r[5] = 0x80 | (wc & 0x3f);
wc = wc >> 6;
wc |= 0x4000000;
case 5:
r[4] = 0x80 | (wc & 0x3f);
wc = wc >> 6;
wc |= 0x200000;
case 4:
r[3] = 0x80 | (wc & 0x3f);
wc = wc >> 6;
wc |= 0x10000;
case 3:
r[2] = 0x80 | (wc & 0x3f);
wc = wc >> 6;
wc |= 0x800;
case 2:
r[1] = 0x80 | (wc & 0x3f);
wc = wc >> 6;
wc |= 0xc0;
case 1:
r[0] = wc;
}
mu_opool_append (pool, r, count);
return 0;
}
static int
uniconv (char const *str, size_t len, size_t *ncons, mu_opool_t pool)
{
unsigned int wc = 0;
size_t i;
for (i = 0; i < len; i++)
{
if (i >= 12)
return EILSEQ;
if (!mu_isxdigit (str[i]))
break;
wc = (wc << 4) + mu_hex2ul (str[i]);
}
*ncons = i;
return utf8_wctomb (wc, pool);
}
......@@ -68,6 +68,11 @@ mu_sieve_require (mu_sieve_machine_t mach, mu_list_t slist)
reqfn = mu_sieve_require_relational;
text = "";
}
else if (strcmp (name, "encoded-character") == 0) /* RFC 5228, 2.4.2.4 */
{
reqfn = mu_sieve_require_encoded_character;
text = "";
}
else
{
reqfn = mu_sieve_require_action;
......
......@@ -204,3 +204,14 @@ void mu_i_sv_trace (mu_sieve_machine_t mach, const char *what,
void mu_i_sv_argf (mu_stream_t str, mu_list_t list);
void mu_i_sv_valf (mu_stream_t str, mu_sieve_value_t *val);
typedef int (*mu_i_sv_interp_t) (char const *, size_t, char **, void *);
int mu_i_sv_string_expand (char const *input,
mu_i_sv_interp_t interp, void *data, char **ret);
int mu_i_sv_expand_encoded_char (char const *input, size_t len, char **exp, void *data);
int mu_sieve_require_encoded_character (mu_sieve_machine_t mach,
const char *name);
......
......@@ -529,11 +529,11 @@ number ()
}
static int
string ()
string (void)
{
yylval.string = mu_sieve_malloc (mu_sieve_machine, yyleng - 1);
memcpy (yylval.string, yytext + 1, yyleng - 2);
yylval.string[yyleng - 2] = 0;
line_begin ();
line_add (yytext + 1, yyleng - 2);
line_finish ();
return STRING;
}
......@@ -615,13 +615,6 @@ multiline_begin (void)
}
static void
line_finish (void)
{
mu_opool_append_char (mu_sieve_machine->string_pool, 0);
yylval.string = mu_opool_finish (mu_sieve_machine->string_pool, NULL);
}
static void
multiline_finish (void)
{
line_finish ();
......@@ -648,3 +641,56 @@ str_unescape (char *text, size_t len)
str[len - 1] = 0;
return str;
}
enum
{
interp_encoded_character,
interp_variable
};
#define MAXINTERP (interp_variable+1)
mu_i_sv_interp_t interpreter[MAXINTERP];
static void
line_finish (void)
{
int i;
char *str;
mu_opool_append_char (mu_sieve_machine->string_pool, 0);
str = mu_opool_finish (mu_sieve_machine->string_pool, NULL);
for (i = 0; i < MAXINTERP; i++)
{
if (interpreter[i])
{
char *exp;
int rc = mu_i_sv_string_expand (str, interpreter[i], NULL, &exp);
if (rc == 0)
{
mu_opool_free (mu_sieve_machine->string_pool, str);
mu_opool_appendz (mu_sieve_machine->string_pool, exp);
mu_opool_append_char (mu_sieve_machine->string_pool, 0);
free (exp);
str = mu_opool_finish (mu_sieve_machine->string_pool, NULL);
}
else if (rc == MU_ERR_CANCELED)
continue;
else
{
mu_diag_at_locus (MU_LOG_ERROR, &mu_sieve_locus,
_("error expandind string: %s"),
mu_strerror (rc));
break;
}
}
}
yylval.string = str;
}
int
mu_sieve_require_encoded_character (mu_sieve_machine_t mach,
const char *name)
{
interpreter[interp_encoded_character] = mu_i_sv_expand_encoded_char;
return 0;
}
......
/* GNU Mailutils -- a suite of utilities for electronic mail
Copyright (C) 2016 Free Software Foundation, Inc.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General
Public License along with this library. If not, see
<http://www.gnu.org/licenses/>. */
/* String expander */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <sieve-priv.h>
enum segm_type
{
segm_copy, /* reference to a fragment of the source string */
segm_repl /* replacement */
};
struct string_segment
{
enum segm_type type; /* Segment type */
size_t beg; /* Beginning of the source string fragmen */
size_t end; /* End of it */
char *repl; /* Replacement, if type == segm_repl */
};
struct stringbuf
{
char const *string; /* Source string */
size_t length; /* Length of the source */
size_t pos; /* Offset of the current character */
mu_list_t seglist; /* List of segments */
jmp_buf errbuf; /* Return location on failure */
char *expansion; /* Expanded string */
char *endptr; /* Used when assembling expansion */
mu_i_sv_interp_t interp;
void *data;
};
static struct string_segment *
segment_alloc (struct stringbuf *buf, size_t beg, enum segm_type type)
{
int rc;
struct string_segment *segm;
segm = malloc (sizeof *segm);
if (!segm)
longjmp (buf->errbuf, ENOMEM);
segm->type = type;
segm->beg = beg;
segm->end = buf->pos - 1;
rc = mu_list_append (buf->seglist, segm);
if (rc)
longjmp (buf->errbuf, rc);
return segm;
}
static void
segm_free (void *data)
{
struct string_segment *segm = data;
if (segm->type == segm_repl)
free (segm->repl);
free (segm);
}
static void
string_next_fragment (struct stringbuf *buf)
{
size_t beg;
struct string_segment *segm;
char *exp;
beg = buf->pos;
while (buf->pos < buf->length)
{
if (buf->string[buf->pos] == '$' && buf->pos + 1 < buf->length
&& buf->string[buf->pos + 1] == '{')
break;
buf->pos++;
}
segm = segment_alloc (buf, beg, segm_copy);
if (buf->pos == buf->length)
return;
beg = buf->pos;
buf->pos += 2;
/* Look for closing brace */
while (buf->pos < buf->length)
{
if (buf->string[buf->pos] == '$' && buf->pos + 1 < buf->length
&& buf->string[buf->pos + 1] == '{')
{
/* Found nested reference. Update verbatim segment and restart */
segm->end = buf->pos - 1;
beg = buf->pos;
buf->pos++;
}
else if (buf->string[buf->pos] == '}')
break;
buf->pos++;
}
if (buf->pos == buf->length)
{
/* No references found */
segm->end = buf->pos - 1;
return;
}
if (buf->interp (buf->string + beg + 2, buf->pos - beg - 2, &exp,
buf->data) == 0)
{
segm = segment_alloc (buf, beg, segm_repl);
segm->repl = exp;
}
else
segm->end = buf->pos;
buf->pos++;
}
struct segm_stat
{
size_t end;
size_t len;
};
static int
update_len (void *item, void *data)
{
struct string_segment *segm = item;
struct segm_stat *st = data;
switch (segm->type)
{
case segm_copy:
if (segm->beg == st->end)
st->end = segm->end;
st->len += segm->end - segm->beg + 1;
break;
case segm_repl:
st->len += strlen (segm->repl);
break;
}
return 0;
}
static int
append_segm (void *item, void *data)
{
struct string_segment *segm = item;
struct stringbuf *buf = data;
size_t len;
switch (segm->type)
{
case segm_copy:
len = segm->end - segm->beg + 1;
memcpy (buf->endptr, buf->string + segm->beg, len);
break;
case segm_repl:
len = strlen (segm->repl);
memcpy (buf->endptr, segm->repl, len);
}
buf->endptr += len;
return 0;
}
static void
string_split (struct stringbuf *buf)
{
while (buf->pos < buf->length)
string_next_fragment (buf);
}
static int
string_assemble (struct stringbuf *buf)
{
struct segm_stat st;
st.len = 0;
st.end = 0;
mu_list_foreach (buf->seglist, update_len, &st);
if (st.end == buf->length - 1)
return MU_ERR_CANCELED;
buf->expansion = malloc (st.len + 1);
if (!buf->expansion)
longjmp (buf->errbuf, ENOMEM);
buf->endptr = buf->expansion;
mu_list_foreach (buf->seglist, append_segm, buf);
*buf->endptr = 0;
return 0;
}
int
mu_i_sv_string_expand (char const *input,
mu_i_sv_interp_t interp, void *data, char **ret)
{
struct stringbuf sb;
int rc;
sb.string = input;
sb.length = strlen (input);
sb.pos = 0;
rc = mu_list_create (&sb.seglist);
if (rc)
return rc;
mu_list_set_destroy_item (sb.seglist, segm_free);
sb.expansion = NULL;
sb.endptr = NULL;
sb.interp = interp;
sb.data = data;
rc = setjmp (sb.errbuf);
if (rc == 0)
{
string_split (&sb);
rc = string_assemble (&sb);
if (rc == 0)
*ret = sb.expansion;
}
mu_list_destroy (&sb.seglist);
return rc;
}
......@@ -47,6 +47,7 @@ TESTSUITE_AT = \
anyof.at\
delheader.at\
compile.at\
enc-char.at\
envelope.at\
exists.at\
ext.at\
......
# This file is part of GNU Mailutils. -*- Autotest -*-
# Copyright (C) 2016 Free Software Foundation, Inc.
#
# GNU Mailutils is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 3, or (at
# your option) any later version.
#
# GNU Mailutils is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>.
AT_SETUP([encoded-character])
AT_KEYWORDS([encoded-character enc-char])
AT_CHECK([
AT_DATA([prog],[[require ["reject", "encoded-character"];
reject "$${hex:40}";
reject "${hex: 40 }";
reject "${HEX: 40}";
reject "${hex:40";
reject "${hex:400}";
reject "${hex:4${hex:30}}";
reject "${unicode:40}";
reject "${ unicode:40}";
reject "${UNICODE:40}";
reject "${UnICoDE:0000040}";
reject "${Unicode:40}";
reject "${Unicode:Cool}";
reject "Now ${hex: 69 73 20}the${unicode:20 74}${hex:69 6d}e";
reject "Unbalanced ${hex: 73 65 71 uence";
reject "Nested ${hex: 73 65 71 ${hex: 75 65 6E}}ce";
reject "Invalid ${hex: 73 RE}";
]])
sieve MUT_SIEVE_CMDLINE MUT_SIEVE_OPTIONS -D prog | grep ACTION
],
[0],
[ 9: ACTION: reject "$@"
16: ACTION: reject "@"
23: ACTION: reject "@"
30: ACTION: reject "${hex:40"
37: ACTION: reject "${hex:400}"
44: ACTION: reject "${hex:40}"
51: ACTION: reject "@"
58: ACTION: reject "${ unicode:40}"
65: ACTION: reject "@"
72: ACTION: reject "@"
79: ACTION: reject "@"
86: ACTION: reject "${Unicode:Cool}"
93: ACTION: reject "Now is the time"
100: ACTION: reject "Unbalanced ${hex: 73 65 71 uence"
107: ACTION: reject "Nested ${hex: 73 65 71 uen}ce"
114: ACTION: reject "Invalid ${hex: 73 RE}"
])
AT_CLEANUP
......@@ -113,6 +113,7 @@ m4_include([version.at])
AT_BANNER(Compilation)
m4_include([compile.at])
m4_include([enc-char.at])
AT_BANNER(Actions)
m4_include([false.at])
......