Commit df608ed0 df608ed0cfb5f0db57d1ff97eb208ceb46f06e17 by Sergey Poznyakoff

New API for converting globbing patterns to extended POSIX regex

* include/mailutils/opool.h (mu_nonlocal_jmp_t): New type.
(mu_opool_setjmp,mu_opool_clrjmp): New functions.
(mu_opool_setup_nonlocal_jump): New macro.
* libmailutils/base/opool.c (_mu_opool)<jmp>: New field.
(alloc_bucket): Do a non-local jump on out of memory condition,
if jmp is not NULL.
(mu_opool_setjmp,mu_opool_clrjmp): New functions.

* libmailutils/base/glob.c: New file.
* libmailutils/base/Makefile.am: Add glob.c

* include/mailutils/glob.h: New file.
* include/mailutils/mailutils.h: Include glob.h

* libmailutils/tests/globtest.c: New file.
* libmailutils/tests/globtest.at: New test.
* libmailutils/tests/Makefile.am: Add new files.
* libmailutils/tests/testsuite.at: Include new test.
1 parent f8a0fd0f
......@@ -32,6 +32,7 @@
#include <mailutils/error.h>
#include <mailutils/filter.h>
#include <mailutils/folder.h>
#include <mailutils/glob.h>
#include <mailutils/header.h>
#include <mailutils/iterator.h>
#include <mailutils/kwd.h>
......
......@@ -19,6 +19,7 @@
#define _MAILUTILS_OPOOL_H
#include <mailutils/types.h>
#include <setjmp.h>
#ifndef MU_OPOOL_BUCKET_SIZE
# define MU_OPOOL_BUCKET_SIZE 1024
......@@ -33,6 +34,27 @@ int mu_opool_create (mu_opool_t *pret, int flags);
int mu_opool_set_bucket_size (mu_opool_t opool, size_t size);
int mu_opool_get_bucket_size (mu_opool_t opool, size_t *psize);
struct mu_nonlocal_jmp
{
jmp_buf buf;
struct mu_nonlocal_jmp *next;
};
typedef struct mu_nonlocal_jmp mu_nonlocal_jmp_t;
void mu_opool_setjmp (mu_opool_t opool, mu_nonlocal_jmp_t *err);
void mu_opool_clrjmp (mu_opool_t opool);
#define mu_opool_setup_nonlocal_jump(p,jb) \
do \
{ \
int __rc = setjmp (jb.buf); \
if (__rc) \
return __rc; \
mu_opool_setjmp (p, &jb); \
} \
while (0)
/* Merge all data from *SRC into *DST. If the latter is NULL, create
it. On success, free *SRC and initialize it with NULL. */
int mu_opool_union (mu_opool_t *dst, mu_opool_t *src);
......
......@@ -34,6 +34,7 @@ libbase_la_SOURCES = \
getcwd.c\
getmaxfd.c\
getpass.c\
glob.c\
hostname.c\
iterator.c\
kwd.c\
......
/* GNU Mailutils -- a suite of utilities for electronic mail
Copyright (C) 2007, 2009-2012, 2014-2016 Free Software Foundation,
Inc.
GNU Mailutils is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GNU Mailutils is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>. */
#if HAVE_CONFIG_H
# include <config.h>
#endif
#include <mailutils/opool.h>
#include <mailutils/error.h>
#include <mailutils/errno.h>
#include <mailutils/glob.h>
#include <regex.h>
#include <string.h>
#include <stdlib.h>
static void
parse_character_class (unsigned char const *str, mu_opool_t pool,
unsigned char const **endp)
{
unsigned char const *cur;
cur = str + 1;
if (*cur == '!')
cur++;
if (*cur == ']')
cur++;
while (*cur && *cur != ']')
{
int c = *cur++;
if (c == '\\')
cur++;
else if (c >= 0xc2)
{
size_t len;
if (c < 0xe0)
len = 1;
else if (c < 0xf0)
len = 2;
else if (c < 0xf8)
len = 3;
else
/* Invalid UTF-8 sequence; skip. */
continue;
while (len-- && *cur)
cur++;
}
}
if (*cur == ']')
{
/* Valid character class */
mu_opool_append_char (pool, *str);
str++;
if (*str == '!')
{
mu_opool_append_char (pool, '^');
str++;
}
while (str < cur)
{
if (*str == '[')
mu_opool_append_char (pool, '\\');
mu_opool_append_char (pool, *str);
str++;
}
mu_opool_append_char (pool, ']');
*endp = cur + 1;
}
else
{
mu_opool_append_char (pool, '\\');
mu_opool_append_char (pool, *str);
str++;
*endp = str;
}
}
int
mu_glob_to_regex_opool (char const *pattern, mu_opool_t pool, int flags)
{
unsigned char const *str = (unsigned char const *) pattern;
mu_nonlocal_jmp_t jmp;
if (!(flags & MU_GLOBF_SUB))
flags |= MU_GLOBF_COLLAPSE;
mu_opool_setup_nonlocal_jump (pool, jmp);
while (*str)
{
int c = *str++;
if (c < 0x80)
{
switch (c)
{
case '\\':
mu_opool_append_char (pool, '\\');
if (*str && strchr ("?*[", *str))
{
mu_opool_append_char (pool, *str);
str++;
}
else
mu_opool_append_char (pool, '\\');
break;
case '?':
if (flags & MU_GLOBF_SUB)
mu_opool_append_char (pool, '(');
mu_opool_append_char (pool, '.');
if (flags & MU_GLOBF_SUB)
mu_opool_append_char (pool, ')');
break;
case '*':
if (flags & MU_GLOBF_COLLAPSE)
{
while (*str == '*')
str++;
}
if (flags & MU_GLOBF_SUB)
{
while (*str == '*')
{
mu_opool_append (pool, "()", 2);
str++;
}
mu_opool_append_char (pool, '(');
mu_opool_append (pool, ".*", 2);
mu_opool_append_char (pool, ')');
}
else
mu_opool_append (pool, ".*", 2);
break;
case '[':
parse_character_class (str - 1, pool, &str);
break;
case '(':
case ')':
case '{':
case '}':
case '^':
case '$':
case ']':
case '|':
case '.':
mu_opool_append_char (pool, '\\');
mu_opool_append_char (pool, c);
break;
default:
mu_opool_append_char (pool, c);
}
}
else
{
mu_opool_append_char (pool, c);
if (c >= 0xc2)
{
size_t len;
if (c < 0xe0)
len = 1;
else if (c < 0xf0)
len = 2;
else if (c < 0xf8)
len = 3;
else
/* Invalid UTF-8 sequence; skip. */
continue;
for (; len-- && *str; str++)
mu_opool_append_char (pool, *str);
}
}
}
mu_opool_clrjmp (pool);
return 0;
}
int
mu_glob_to_regex (char **rxstr, char const *pattern, int flags)
{
mu_opool_t pool;
int rc;
mu_nonlocal_jmp_t jmp;
rc = mu_opool_create (&pool, MU_OPOOL_DEFAULT);
if (rc)
return rc;
mu_opool_setup_nonlocal_jump (pool, jmp);
mu_opool_append_char (pool, '^');
rc = mu_glob_to_regex_opool (pattern, pool, flags);
if (rc == 0)
{
mu_opool_append_char (pool, '$');
mu_opool_append_char (pool, 0);
*rxstr = mu_opool_detach (pool, NULL);
}
mu_opool_clrjmp (pool);
mu_opool_destroy (&pool);
return rc;
}
int
mu_glob_compile (regex_t *rx, char const *pattern, int flags)
{
char *str;
int rc;
int rxflags;
rc = mu_glob_to_regex (&str, pattern, flags);
if (rc)
return rc;
rxflags = REG_EXTENDED;
if (flags & MU_GLOBF_ICASE)
rxflags |= REG_ICASE;
if (!(flags & MU_GLOBF_SUB))
rxflags |= REG_NOSUB;
rc = regcomp (rx, str, rxflags);
if (rc)
{
size_t size = regerror (rc, rx, NULL, 0);
char *errbuf = malloc (size + 1);
if (errbuf)
{
regerror (rc, rx, errbuf, size);
mu_error ("INTERNAL ERROR: can't compile regular expression \"%s\": %s",
str, mu_strerror (rc));
}
else
mu_error ("INTERNAL ERROR: can't compile regular expression \"%s\"",
str);
mu_error ("INTERNAL ERROR: expression compiled from globbing pattern: %s",
pattern);
free (errbuf);
}
free (str);
return rc;
}
......@@ -53,6 +53,7 @@ struct _mu_opool
int flags; /* Flag bits */
size_t bucket_size; /* Default bucket size */
size_t itr_count; /* Number of iterators created for this pool */
mu_nonlocal_jmp_t *jmp; /* Buffer for non-local exit */
union mu_opool_bucket *bkt_head, *bkt_tail;
union mu_opool_bucket *bkt_fini; /* List of finished objects */
};
......@@ -65,6 +66,8 @@ alloc_bucket (struct _mu_opool *opool, size_t size)
{
if (opool->flags & MU_OPOOL_ENOMEMABRT)
mu_alloc_die ();
if (opool->jmp)
longjmp (opool->jmp->buf, ENOMEM);
}
else
{
......@@ -122,10 +125,30 @@ mu_opool_create (mu_opool_t *pret, int flags)
x->bucket_size = MU_OPOOL_BUCKET_SIZE;
x->itr_count = 0;
x->bkt_head = x->bkt_tail = x->bkt_fini = NULL;
x->jmp = NULL;
*pret = x;
return 0;
}
void
mu_opool_setjmp (mu_opool_t opool, mu_nonlocal_jmp_t *jmp)
{
if (jmp)
{
jmp->next = opool->jmp;
opool->jmp = jmp;
}
else
mu_opool_clrjmp (opool);
}
void
mu_opool_clrjmp (mu_opool_t opool)
{
if (opool->jmp)
opool->jmp = opool->jmp->next;
}
int
mu_opool_set_bucket_size (mu_opool_t opool, size_t size)
{
......
......@@ -49,6 +49,7 @@ noinst_PROGRAMS = \
fsaf\
fsaftomod\
fsfolder\
globtest\
imapio\
listop\
mailcap\
......@@ -94,6 +95,7 @@ TESTSUITE_AT = \
fsfolder02.at\
hdrflt.at\
htmlent.at\
globtest.at\
imapio.at\
inline-comment.at\
linecon.at\
......
# This file is part of GNU Mailutils. -*- Autotest -*-
# Copyright (C) 2016 Free Software Foundation, Inc.
#
# GNU Mailutils is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 3, or (at
# your option) any later version.
#
# GNU Mailutils is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>.
AT_BANNER([Globbing patterns])
dnl ------------------------------------------------------------------
dnl GLOBCOMP (pattern, regex, [FLAGS])
m4_pushdef([GLOBCOMP],
[AT_SETUP([[$1][]m4_foreach([flag],[m4_shift(m4_shift($@))],[ flag])])
AT_KEYWORDS([glob])
AT_CHECK([globtest m4_foreach([flag],[m4_shift(m4_shift($@))],[dnl
m4_if(flag,[sub],[ -s],[icase],[ -i],[collapse],[ -c])]) '[$1]'],[0],[[^$2$]
])
AT_CLEANUP
])
dnl ------------------------------------------------------------------
GLOBCOMP(abab, abab)
GLOBCOMP(a*c, a.*c)
GLOBCOMP(a*c?d, a(.*)c(.)d, sub)
GLOBCOMP(a***c, a.*c)
GLOBCOMP(a***c, a()()(.*)c, sub)
GLOBCOMP(a***c, a(.*)c, sub, collapse)
GLOBCOMP([{$|a$$], [\{\$\|a\$\$])
GLOBCOMP([a[0-9A-Z]c], [a[0-9A-Z]c])
GLOBCOMP([a[!a-z]c], [a[^a-z]c])
GLOBCOMP([a[!]z@:>@], [a[^]z@:>@])
GLOBCOMP([a@<:@cde], [a\@<:@cde])
GLOBCOMP([a[@<:@ba]], [a[\@<:@ba]])
GLOBCOMP([*.c], [.*\.c])
GLOBCOMP([a\],[a\\])
m4_popdef([GLOBCOMP])
\ No newline at end of file
#include <config.h>
#include <mailutils/mailutils.h>
/* globtest PATTERN [WORD...]
*/
int
main (int argc, char **argv)
{
char *pattern = NULL;
int flags = 0;
int rc;
int i;
mu_set_program_name (argv[0]);
for (i = 1; i < argc; i++)
{
char *a = argv[i];
if (strcmp (a, "-i") == 0)
flags |= MU_GLOBF_ICASE;
else if (strcmp (a, "-s") == 0)
flags |= MU_GLOBF_SUB;
else if (strcmp (a, "-c") == 0)
flags |= MU_GLOBF_COLLAPSE;
else if (strcmp (a, "--") == 0)
{
i++;
break;
}
else if (*a != '-')
break;
else
{
mu_error ("unknown option %s", a);
return 1;
}
}
if (i == argc)
{
mu_printf ("usage: %s [-ics] PATTERN [WORD...]\n", mu_program_name);
return 1;
}
pattern = argv[i++];
if (i == argc)
{
char *regstr;
rc = mu_glob_to_regex (&regstr, pattern, flags);
if (rc)
{
mu_error ("convert: %s", mu_strerror (rc));
return 1;
}
mu_printf ("%s\n", regstr);
free (regstr);
}
else
{
regex_t regex;
size_t nmatch = 0;
regmatch_t *matches = NULL;
rc = mu_glob_compile (&regex, pattern, flags);
if (rc)
{
mu_error ("compile: %s", mu_strerror (rc));
return 1;
}
if (flags & MU_GLOBF_SUB)
{
nmatch = regex.re_nsub + 1;
matches = mu_calloc (nmatch, sizeof matches[0]);
}
for (; i < argc; i++)
{
char *a = argv[i];
rc = regexec (&regex, a, nmatch, matches, 0);
mu_printf ("%s: %s\n", a, rc == 0 ? "OK" : "NO");
if (flags & MU_GLOBF_SUB)
{
size_t j;
for (j = 0; j < nmatch; j++)
printf ("%02d: %.*s\n", j,
matches[j].rm_eo - matches[j].rm_so,
a + matches[j].rm_so);
}
}
}
return 0;
}
......@@ -180,3 +180,5 @@ m4_include([modtofsaf.at])
m4_include([mimehdr.at])
m4_include([msgset.at])
m4_include([globtest.at])
......