Commit 7a77cc6f 7a77cc6f80ae220c6adee5c862b53c43ed515270 by Sergey Poznyakoff

New string functions

* libmailutils/string/strcount.c (mu_str_count): Take an array of
(ASCII) characters to count occurrences of.  Optionally store individual
counts in an array passed as the 3rd argument.
* include/mailutils/cstr.h (mu_str_count): Change proto.
(mu_c_str_escape, mu_c_str_escape_trans)
(mu_c_str_unescape_inplace, mu_c_str_unescape)
(mu_c_str_unescape_trans): New protos.
* libmailutils/string/cstrescape.c: New file.
* libmailutils/string/cstrunescape.c: New file.
* libmailutils/string/Makefile.am: Add new files.
1 parent 8571d58b
......@@ -32,7 +32,7 @@ main (int argc, char **argv)
}
if (!mu_file_name_is_safe (argv[0])
|| (argv[0][0] == '/' && mu_str_count (argv[0], '/') < 2))
|| (argv[0][0] == '/' && mu_str_count (argv[0], "/", NULL) < 2))
{
mu_error ("unsafe file name");
return 1;
......
......@@ -36,13 +36,13 @@ main (int argc, char **argv)
}
if (!mu_file_name_is_safe (argv[0])
|| (argv[0][0] == '/' && mu_str_count (argv[0], '/') < 2))
|| (argv[0][0] == '/' && mu_str_count (argv[0], "/", NULL) < 2))
{
mu_error ("%s: unsafe file name", argv[0]);
return 1;
}
if (!mu_file_name_is_safe (argv[1])
|| (argv[1][0] == '/' && mu_str_count (argv[1], '/') < 2))
|| (argv[1][0] == '/' && mu_str_count (argv[1], "/", NULL) < 2))
{
mu_error ("%sunsafe file name", argv[0]);
return 1;
......
......@@ -46,7 +46,18 @@ char *mu_str_stripws (char *string);
int mu_string_split (const char *string, char *delim, mu_list_t list);
size_t mu_str_count (char const *str, int chr);
size_t mu_str_count (char const *str, char const *chr, size_t *cnt);
int mu_c_str_escape (char const *str, char const *chr, char const *xtab,
char **ret_str);
int mu_c_str_escape_trans (char const *str, char const *trans, char **ret_str);
int mu_c_str_unescape_inplace (char *str, char const *chr, char const *xtab);
int mu_c_str_unescape (char const *str, char const *chr, char const *xtab,
char **ret_str);
int mu_c_str_unescape_trans (char const *str, char const *trans,
char **ret_str);
#ifdef __cplusplus
}
......
......@@ -21,6 +21,8 @@ libstring_la_SOURCES = \
cpystr.c\
cstrcasecmp.c\
cstrcasestr.c\
cstrescape.c\
cstrunescape.c\
cstrlower.c\
cstrupper.c\
hexstr.c\
......
/* GNU Mailutils -- a suite of utilities for electronic mail
Copyright (C) 2016 Free Software Foundation, Inc.
GNU Mailutils is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GNU Mailutils is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
#include <stdlib.h>
#include <string.h>
#include <mailutils/cstr.h>
#include <mailutils/errno.h>
/* Examines STR for occurrences of characters from CHR. Returns in RET_STR
a malloc'ed string where each occurrence of CHR[i] is replaced by a
backslash, followed by XTAB[i].
If CHR is NULL, RET_STR contains a malloc'ed copy of STR (XTAB is
ignored).
If XTAB is NULL, XTAB = CHR is assumed.
Callers are advised to include backslash into both CHR and XTAB.
Returns 0 on success, error code if an error occurred.
Example:
Escape each occurrence of backslash and double quote:
mu_c_str_escape (str, "\\\"", NULL, &ret_str)
*/
int
mu_c_str_escape (char const *str, char const *chr, char const *xtab,
char **ret_str)
{
char *newstr;
size_t n;
int c;
if (!ret_str)
return MU_ERR_OUT_PTR_NULL;
if (!str)
{
*ret_str = NULL;
return 0;
}
if (!chr)
{
newstr = strdup (str);
if (!newstr)
return errno;
*ret_str = newstr;
return 0;
}
n = strlen (chr);
if (xtab)
{
if (strlen (xtab) != n)
return EINVAL;
}
else
xtab = chr;
n = mu_str_count (str, chr, NULL);
newstr = malloc (strlen (str) + n + 1);
if (!newstr)
return errno;
*ret_str = newstr;
if (n == 0)
{
strcpy (newstr, str);
return 0;
}
while ((c = *str++) != 0)
{
char *p = strchr (chr, c);
if (p)
{
*newstr++ = '\\';
*newstr++ = xtab[p - chr];
}
else
*newstr++ = c;
}
*newstr = 0;
return 0;
}
/* Escape certain characters in STR. Return allocated string in RET_STR.
Escapable characters are defined by the array TRANS, which consists of an
even number of elements. Each pair of characters in this array contains:
TRANS[i+1] - character to be escaped
TRANS[i] - character to use in escape sequence for TRANS[i+1].
Each TRANS[i+1] is replaced by backslash + TRANS[i].
Returns 0 on success, or error code if an error occurred.
E.g., to escape control characters, backslash and double-quote using
C convention:
mu_c_str_escape_trans (str, "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v", &ret)
See also mu_wordsplit_c_escape_tab in wordsplit.c
*/
int
mu_c_str_escape_trans (char const *str, char const *trans, char **ret_str)
{
char *chr, *xtab;
size_t n, i;
int rc;
if (trans)
{
n = strlen (trans);
if (n % 2)
return EINVAL;
chr = malloc (n + 2);
if (!chr)
return errno;
xtab = chr + n / 2 + 1;
for (i = 0; i < n; i += 2)
{
chr[i / 2] = trans[i + 1];
xtab[i / 2] = trans[i];
}
chr[i / 2] = xtab[i / 2] = 0;
}
else
{
chr = xtab = NULL;
}
rc = mu_c_str_escape (str, chr, xtab, ret_str);
free (chr);
return rc;
}
/* GNU Mailutils -- a suite of utilities for electronic mail
Copyright (C) 2016 Free Software Foundation, Inc.
GNU Mailutils is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GNU Mailutils is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
#include <stdlib.h>
#include <string.h>
#include <mailutils/cstr.h>
#include <mailutils/errno.h>
/* Copy characters from IN to OUT. Replace each occurrences of backslash
followed by a character XTAB[i] with CHR[i].
OUT should be large enough to accomodate the translated string (same length
as IN, in the worst case). It is OK if IN==OUT.
Both XTAB and CHR must not be NULL and must contain the same number of
elements.
*/
static void
c_str_unescape (char const *in, char *out, char const *chr, char const *xtab)
{
size_t i, j;
for (i = j = 0; in[i]; i++, j++)
{
if (in[i] == '\\')
break;
out[j] = in[i];
}
if (in[i])
{
for (; in[i]; i++, j++)
{
if (in[i] == '\\')
{
char *p;
if (in[i+1] && (p = strchr (xtab, in[i+1])) != NULL)
{
out[j] = chr[p - xtab];
i++;
continue;
}
}
out[j] = in[i];
}
}
out[j] = 0;
}
/* Modifies STR, by replacing each occurrence of \ followed by a charater
XTAB[i] with CHR[i].
Either XTAB or CHR can be NULL, in which case XTAB=CHR is assumed.
If both XTAB and CHR are NULL, STR is unchanged.
STR == NULL is OK.
Returns 0 success, and EINVAL if lengths of CHR and XTAB differ.
*/
int
mu_c_str_unescape_inplace (char *str, char const *chr, char const *xtab)
{
if (!str)
return 0;
if (!xtab)
{
if (chr)
xtab = chr;
else
return 0;
}
else if (!chr)
chr = xtab;
else if (strlen (chr) != strlen (xtab))
return EINVAL;
c_str_unescape (str, str, chr, xtab);
return 0;
}
/* A counterpart of mu_c_str_escape. Creates an allocated (using malloc(3))
copy of STR, where each occurrence of \ followed by a charater XTAB[i]
is replaced with single character CHR[i].
Either XTAB or CHR can be NULL, in which case XTAB=CHR is assumed.
If both XTAB and CHR are NULL, the result is *RET_STR will contain exact
malloc'ed copy of STR.
STR == NULL is OK: in that case *RET_STR will also be NULL.
Returns 0 success, EINVAL if lengths of CHR and XTAB differ,
MU_ERR_OUT_PTR_NULL if RET_STR is NULL, and ENOMEM if memory allocation
failed.
*/
int
mu_c_str_unescape (char const *str, char const *chr, char const *xtab,
char **ret_str)
{
char *newstr;
size_t i, size;
if (!ret_str)
return MU_ERR_OUT_PTR_NULL;
if (!str)
{
*ret_str = NULL;
return 0;
}
if (!xtab)
{
if (chr)
xtab = chr;
else
{
char *p = strdup (str);
if (!p)
return errno;
*ret_str = p;
}
}
else if (!chr)
chr = xtab;
else if (strlen (chr) != strlen (xtab))
return EINVAL;
size = 0;
for (i = 0; str[i]; i++)
{
if (str[i] == '\\' && str[i + 1] && strchr (xtab, str[i + 1]))
i++;
size++;
}
newstr = malloc (size + 1);
if (!newstr)
return errno;
*ret_str = newstr;
c_str_unescape (str, newstr, chr, xtab);
return 0;
}
/* A counterpart of mu_c_str_escape_trans.
Creates an allocated (using malloc(3)) copy of STR, where each occurrence
of \ followed by TRANS[i] is replaced by TRANS[i+1].
Returns 0 on success, or error code if an error occurred.
See also mu_wordsplit_c_escape_tab in wordsplit.c
*/
int
mu_c_str_unescape_trans (char const *str, char const *trans, char **ret_str)
{
char *chr, *xtab;
size_t n, i;
int rc;
if (trans)
{
n = strlen (trans);
if (n % 2)
return EINVAL;
chr = malloc (n + 2);
if (!chr)
return errno;
xtab = chr + n / 2 + 1;
for (i = 0; i < n; i += 2)
{
chr[i / 2] = trans[i + 1];
xtab[i / 2] = trans[i];
}
chr[i / 2] = xtab[i / 2] = 0;
}
else
{
chr = xtab = NULL;
}
rc = mu_c_str_unescape (str, chr, xtab, ret_str);
free (chr);
return rc;
}
......@@ -16,29 +16,45 @@
#include <config.h>
#include <limits.h>
#include <string.h>
#include <mailutils/util.h>
/* Return the number of occurrences of the ASCII character CHR in the
UTF-8 string STR. */
/* Count number of occurrences of each ASCII character from CHR in the
UTF-8 string STR. Unless CNT is NULL, fill it with the counts for
each character (so that CNT[i] contains number of occurrences of
CHR[i]). Return total number of occurrences. */
size_t
mu_str_count (char const *str, int chr)
mu_str_count (char const *str, char const *chr, size_t *cnt)
{
unsigned char c;
size_t count = 0;
int consume = 0;
size_t count = 0;
if (!str || chr < 0 || chr > UCHAR_MAX)
if (!str || !chr)
return 0;
if (cnt)
{
int i;
for (i = 0; chr[i]; i++)
cnt[i] = 0;
}
while ((c = *str++) != 0)
{
if (consume)
consume--;
else if (c < 0xc0)
{
if (c == chr)
char *p = strchr (chr, c);
if (p)
{
if (cnt)
cnt[p - chr]++;
count++;
}
}
else if (c & 0xc0)
consume = 1;
else if (c & 0xe0)
......