Commit 7a77cc6f 7a77cc6f80ae220c6adee5c862b53c43ed515270 by Sergey Poznyakoff

New string functions

* libmailutils/string/strcount.c (mu_str_count): Take an array of
(ASCII) characters to count occurrences of.  Optionally store individual
counts in an array passed as the 3rd argument.
* include/mailutils/cstr.h (mu_str_count): Change proto.
(mu_c_str_escape, mu_c_str_escape_trans)
(mu_c_str_unescape_inplace, mu_c_str_unescape)
(mu_c_str_unescape_trans): New protos.
* libmailutils/string/cstrescape.c: New file.
* libmailutils/string/cstrunescape.c: New file.
* libmailutils/string/Makefile.am: Add new files.
1 parent 8571d58b
...@@ -32,7 +32,7 @@ main (int argc, char **argv) ...@@ -32,7 +32,7 @@ main (int argc, char **argv)
32 } 32 }
33 33
34 if (!mu_file_name_is_safe (argv[0]) 34 if (!mu_file_name_is_safe (argv[0])
35 || (argv[0][0] == '/' && mu_str_count (argv[0], '/') < 2)) 35 || (argv[0][0] == '/' && mu_str_count (argv[0], "/", NULL) < 2))
36 { 36 {
37 mu_error ("unsafe file name"); 37 mu_error ("unsafe file name");
38 return 1; 38 return 1;
......
...@@ -36,13 +36,13 @@ main (int argc, char **argv) ...@@ -36,13 +36,13 @@ main (int argc, char **argv)
36 } 36 }
37 37
38 if (!mu_file_name_is_safe (argv[0]) 38 if (!mu_file_name_is_safe (argv[0])
39 || (argv[0][0] == '/' && mu_str_count (argv[0], '/') < 2)) 39 || (argv[0][0] == '/' && mu_str_count (argv[0], "/", NULL) < 2))
40 { 40 {
41 mu_error ("%s: unsafe file name", argv[0]); 41 mu_error ("%s: unsafe file name", argv[0]);
42 return 1; 42 return 1;
43 } 43 }
44 if (!mu_file_name_is_safe (argv[1]) 44 if (!mu_file_name_is_safe (argv[1])
45 || (argv[1][0] == '/' && mu_str_count (argv[1], '/') < 2)) 45 || (argv[1][0] == '/' && mu_str_count (argv[1], "/", NULL) < 2))
46 { 46 {
47 mu_error ("%sunsafe file name", argv[0]); 47 mu_error ("%sunsafe file name", argv[0]);
48 return 1; 48 return 1;
......
...@@ -46,7 +46,18 @@ char *mu_str_stripws (char *string); ...@@ -46,7 +46,18 @@ char *mu_str_stripws (char *string);
46 46
47 int mu_string_split (const char *string, char *delim, mu_list_t list); 47 int mu_string_split (const char *string, char *delim, mu_list_t list);
48 48
49 size_t mu_str_count (char const *str, int chr); 49 size_t mu_str_count (char const *str, char const *chr, size_t *cnt);
50
51 int mu_c_str_escape (char const *str, char const *chr, char const *xtab,
52 char **ret_str);
53 int mu_c_str_escape_trans (char const *str, char const *trans, char **ret_str);
54
55 int mu_c_str_unescape_inplace (char *str, char const *chr, char const *xtab);
56 int mu_c_str_unescape (char const *str, char const *chr, char const *xtab,
57 char **ret_str);
58 int mu_c_str_unescape_trans (char const *str, char const *trans,
59 char **ret_str);
60
50 61
51 #ifdef __cplusplus 62 #ifdef __cplusplus
52 } 63 }
......
...@@ -21,6 +21,8 @@ libstring_la_SOURCES = \ ...@@ -21,6 +21,8 @@ libstring_la_SOURCES = \
21 cpystr.c\ 21 cpystr.c\
22 cstrcasecmp.c\ 22 cstrcasecmp.c\
23 cstrcasestr.c\ 23 cstrcasestr.c\
24 cstrescape.c\
25 cstrunescape.c\
24 cstrlower.c\ 26 cstrlower.c\
25 cstrupper.c\ 27 cstrupper.c\
26 hexstr.c\ 28 hexstr.c\
......
1 /* GNU Mailutils -- a suite of utilities for electronic mail
2 Copyright (C) 2016 Free Software Foundation, Inc.
3
4 GNU Mailutils is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
8
9 GNU Mailutils is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>. */
16
17 #include <config.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <mailutils/cstr.h>
21 #include <mailutils/errno.h>
22
23 /* Examines STR for occurrences of characters from CHR. Returns in RET_STR
24 a malloc'ed string where each occurrence of CHR[i] is replaced by a
25 backslash, followed by XTAB[i].
26
27 If CHR is NULL, RET_STR contains a malloc'ed copy of STR (XTAB is
28 ignored).
29
30 If XTAB is NULL, XTAB = CHR is assumed.
31
32 Callers are advised to include backslash into both CHR and XTAB.
33
34 Returns 0 on success, error code if an error occurred.
35
36 Example:
37
38 Escape each occurrence of backslash and double quote:
39
40 mu_c_str_escape (str, "\\\"", NULL, &ret_str)
41 */
42 int
43 mu_c_str_escape (char const *str, char const *chr, char const *xtab,
44 char **ret_str)
45 {
46 char *newstr;
47 size_t n;
48 int c;
49
50 if (!ret_str)
51 return MU_ERR_OUT_PTR_NULL;
52
53 if (!str)
54 {
55 *ret_str = NULL;
56 return 0;
57 }
58
59 if (!chr)
60 {
61 newstr = strdup (str);
62 if (!newstr)
63 return errno;
64 *ret_str = newstr;
65 return 0;
66 }
67
68 n = strlen (chr);
69
70 if (xtab)
71 {
72 if (strlen (xtab) != n)
73 return EINVAL;
74 }
75 else
76 xtab = chr;
77
78 n = mu_str_count (str, chr, NULL);
79
80 newstr = malloc (strlen (str) + n + 1);
81 if (!newstr)
82 return errno;
83 *ret_str = newstr;
84
85 if (n == 0)
86 {
87 strcpy (newstr, str);
88 return 0;
89 }
90
91 while ((c = *str++) != 0)
92 {
93 char *p = strchr (chr, c);
94
95 if (p)
96 {
97 *newstr++ = '\\';
98 *newstr++ = xtab[p - chr];
99 }
100 else
101 *newstr++ = c;
102 }
103 *newstr = 0;
104
105 return 0;
106 }
107
108 /* Escape certain characters in STR. Return allocated string in RET_STR.
109
110 Escapable characters are defined by the array TRANS, which consists of an
111 even number of elements. Each pair of characters in this array contains:
112
113 TRANS[i+1] - character to be escaped
114 TRANS[i] - character to use in escape sequence for TRANS[i+1].
115
116 Each TRANS[i+1] is replaced by backslash + TRANS[i].
117
118 Returns 0 on success, or error code if an error occurred.
119
120 E.g., to escape control characters, backslash and double-quote using
121 C convention:
122
123 mu_c_str_escape_trans (str, "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v", &ret)
124
125 See also mu_wordsplit_c_escape_tab in wordsplit.c
126 */
127 int
128 mu_c_str_escape_trans (char const *str, char const *trans, char **ret_str)
129 {
130 char *chr, *xtab;
131 size_t n, i;
132 int rc;
133
134 if (trans)
135 {
136 n = strlen (trans);
137 if (n % 2)
138 return EINVAL;
139 chr = malloc (n + 2);
140 if (!chr)
141 return errno;
142 xtab = chr + n / 2 + 1;
143 for (i = 0; i < n; i += 2)
144 {
145 chr[i / 2] = trans[i + 1];
146 xtab[i / 2] = trans[i];
147 }
148 chr[i / 2] = xtab[i / 2] = 0;
149 }
150 else
151 {
152 chr = xtab = NULL;
153 }
154
155 rc = mu_c_str_escape (str, chr, xtab, ret_str);
156
157 free (chr);
158
159 return rc;
160 }
161
162
1 /* GNU Mailutils -- a suite of utilities for electronic mail
2 Copyright (C) 2016 Free Software Foundation, Inc.
3
4 GNU Mailutils is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
8
9 GNU Mailutils is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>. */
16
17 #include <config.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <mailutils/cstr.h>
21 #include <mailutils/errno.h>
22
23 /* Copy characters from IN to OUT. Replace each occurrences of backslash
24 followed by a character XTAB[i] with CHR[i].
25
26 OUT should be large enough to accomodate the translated string (same length
27 as IN, in the worst case). It is OK if IN==OUT.
28
29 Both XTAB and CHR must not be NULL and must contain the same number of
30 elements.
31 */
32 static void
33 c_str_unescape (char const *in, char *out, char const *chr, char const *xtab)
34 {
35 size_t i, j;
36
37 for (i = j = 0; in[i]; i++, j++)
38 {
39 if (in[i] == '\\')
40 break;
41 out[j] = in[i];
42 }
43
44 if (in[i])
45 {
46 for (; in[i]; i++, j++)
47 {
48 if (in[i] == '\\')
49 {
50 char *p;
51 if (in[i+1] && (p = strchr (xtab, in[i+1])) != NULL)
52 {
53 out[j] = chr[p - xtab];
54 i++;
55 continue;
56 }
57 }
58 out[j] = in[i];
59 }
60 }
61 out[j] = 0;
62 }
63
64 /* Modifies STR, by replacing each occurrence of \ followed by a charater
65 XTAB[i] with CHR[i].
66
67 Either XTAB or CHR can be NULL, in which case XTAB=CHR is assumed.
68
69 If both XTAB and CHR are NULL, STR is unchanged.
70
71 STR == NULL is OK.
72
73 Returns 0 success, and EINVAL if lengths of CHR and XTAB differ.
74 */
75 int
76 mu_c_str_unescape_inplace (char *str, char const *chr, char const *xtab)
77 {
78 if (!str)
79 return 0;
80 if (!xtab)
81 {
82 if (chr)
83 xtab = chr;
84 else
85 return 0;
86 }
87 else if (!chr)
88 chr = xtab;
89 else if (strlen (chr) != strlen (xtab))
90 return EINVAL;
91 c_str_unescape (str, str, chr, xtab);
92 return 0;
93 }
94
95 /* A counterpart of mu_c_str_escape. Creates an allocated (using malloc(3))
96 copy of STR, where each occurrence of \ followed by a charater XTAB[i]
97 is replaced with single character CHR[i].
98
99 Either XTAB or CHR can be NULL, in which case XTAB=CHR is assumed.
100
101 If both XTAB and CHR are NULL, the result is *RET_STR will contain exact
102 malloc'ed copy of STR.
103
104 STR == NULL is OK: in that case *RET_STR will also be NULL.
105
106 Returns 0 success, EINVAL if lengths of CHR and XTAB differ,
107 MU_ERR_OUT_PTR_NULL if RET_STR is NULL, and ENOMEM if memory allocation
108 failed.
109 */
110 int
111 mu_c_str_unescape (char const *str, char const *chr, char const *xtab,
112 char **ret_str)
113 {
114 char *newstr;
115 size_t i, size;
116
117 if (!ret_str)
118 return MU_ERR_OUT_PTR_NULL;
119
120 if (!str)
121 {
122 *ret_str = NULL;
123 return 0;
124 }
125
126 if (!xtab)
127 {
128 if (chr)
129 xtab = chr;
130 else
131 {
132 char *p = strdup (str);
133 if (!p)
134 return errno;
135 *ret_str = p;
136 }
137 }
138 else if (!chr)
139 chr = xtab;
140 else if (strlen (chr) != strlen (xtab))
141 return EINVAL;
142
143 size = 0;
144 for (i = 0; str[i]; i++)
145 {
146 if (str[i] == '\\' && str[i + 1] && strchr (xtab, str[i + 1]))
147 i++;
148 size++;
149 }
150
151 newstr = malloc (size + 1);
152 if (!newstr)
153 return errno;
154 *ret_str = newstr;
155
156 c_str_unescape (str, newstr, chr, xtab);
157
158 return 0;
159 }
160
161 /* A counterpart of mu_c_str_escape_trans.
162
163 Creates an allocated (using malloc(3)) copy of STR, where each occurrence
164 of \ followed by TRANS[i] is replaced by TRANS[i+1].
165
166 Returns 0 on success, or error code if an error occurred.
167
168 See also mu_wordsplit_c_escape_tab in wordsplit.c
169 */
170 int
171 mu_c_str_unescape_trans (char const *str, char const *trans, char **ret_str)
172 {
173 char *chr, *xtab;
174 size_t n, i;
175 int rc;
176
177 if (trans)
178 {
179 n = strlen (trans);
180 if (n % 2)
181 return EINVAL;
182 chr = malloc (n + 2);
183 if (!chr)
184 return errno;
185 xtab = chr + n / 2 + 1;
186 for (i = 0; i < n; i += 2)
187 {
188 chr[i / 2] = trans[i + 1];
189 xtab[i / 2] = trans[i];
190 }
191 chr[i / 2] = xtab[i / 2] = 0;
192 }
193 else
194 {
195 chr = xtab = NULL;
196 }
197
198 rc = mu_c_str_unescape (str, chr, xtab, ret_str);
199
200 free (chr);
201
202 return rc;
203 }
204
...@@ -16,28 +16,44 @@ ...@@ -16,28 +16,44 @@
16 16
17 #include <config.h> 17 #include <config.h>
18 #include <limits.h> 18 #include <limits.h>
19 #include <string.h>
19 #include <mailutils/util.h> 20 #include <mailutils/util.h>
20 21
21 /* Return the number of occurrences of the ASCII character CHR in the 22 /* Count number of occurrences of each ASCII character from CHR in the
22 UTF-8 string STR. */ 23 UTF-8 string STR. Unless CNT is NULL, fill it with the counts for
24 each character (so that CNT[i] contains number of occurrences of
25 CHR[i]). Return total number of occurrences. */
23 size_t 26 size_t
24 mu_str_count (char const *str, int chr) 27 mu_str_count (char const *str, char const *chr, size_t *cnt)
25 { 28 {
26 unsigned char c; 29 unsigned char c;
27 size_t count = 0;
28 int consume = 0; 30 int consume = 0;
29 31 size_t count = 0;
30 if (!str || chr < 0 || chr > UCHAR_MAX) 32
33 if (!str || !chr)
31 return 0; 34 return 0;
32 35
36 if (cnt)
37 {
38 int i;
39
40 for (i = 0; chr[i]; i++)
41 cnt[i] = 0;
42 }
43
33 while ((c = *str++) != 0) 44 while ((c = *str++) != 0)
34 { 45 {
35 if (consume) 46 if (consume)
36 consume--; 47 consume--;
37 else if (c < 0xc0) 48 else if (c < 0xc0)
38 { 49 {
39 if (c == chr) 50 char *p = strchr (chr, c);
40 count++; 51 if (p)
52 {
53 if (cnt)
54 cnt[p - chr]++;
55 count++;
56 }
41 } 57 }
42 else if (c & 0xc0) 58 else if (c & 0xc0)
43 consume = 1; 59 consume = 1;
......