New string functions
* libmailutils/string/strcount.c (mu_str_count): Take an array of (ASCII) characters to count occurrences of. Optionally store individual counts in an array passed as the 3rd argument. * include/mailutils/cstr.h (mu_str_count): Change proto. (mu_c_str_escape, mu_c_str_escape_trans) (mu_c_str_unescape_inplace, mu_c_str_unescape) (mu_c_str_unescape_trans): New protos. * libmailutils/string/cstrescape.c: New file. * libmailutils/string/cstrunescape.c: New file. * libmailutils/string/Makefile.am: Add new files.
Showing
7 changed files
with
407 additions
and
12 deletions
... | @@ -32,7 +32,7 @@ main (int argc, char **argv) | ... | @@ -32,7 +32,7 @@ main (int argc, char **argv) |
32 | } | 32 | } |
33 | 33 | ||
34 | if (!mu_file_name_is_safe (argv[0]) | 34 | if (!mu_file_name_is_safe (argv[0]) |
35 | || (argv[0][0] == '/' && mu_str_count (argv[0], '/') < 2)) | 35 | || (argv[0][0] == '/' && mu_str_count (argv[0], "/", NULL) < 2)) |
36 | { | 36 | { |
37 | mu_error ("unsafe file name"); | 37 | mu_error ("unsafe file name"); |
38 | return 1; | 38 | return 1; | ... | ... |
... | @@ -36,13 +36,13 @@ main (int argc, char **argv) | ... | @@ -36,13 +36,13 @@ main (int argc, char **argv) |
36 | } | 36 | } |
37 | 37 | ||
38 | if (!mu_file_name_is_safe (argv[0]) | 38 | if (!mu_file_name_is_safe (argv[0]) |
39 | || (argv[0][0] == '/' && mu_str_count (argv[0], '/') < 2)) | 39 | || (argv[0][0] == '/' && mu_str_count (argv[0], "/", NULL) < 2)) |
40 | { | 40 | { |
41 | mu_error ("%s: unsafe file name", argv[0]); | 41 | mu_error ("%s: unsafe file name", argv[0]); |
42 | return 1; | 42 | return 1; |
43 | } | 43 | } |
44 | if (!mu_file_name_is_safe (argv[1]) | 44 | if (!mu_file_name_is_safe (argv[1]) |
45 | || (argv[1][0] == '/' && mu_str_count (argv[1], '/') < 2)) | 45 | || (argv[1][0] == '/' && mu_str_count (argv[1], "/", NULL) < 2)) |
46 | { | 46 | { |
47 | mu_error ("%sunsafe file name", argv[0]); | 47 | mu_error ("%sunsafe file name", argv[0]); |
48 | return 1; | 48 | return 1; | ... | ... |
... | @@ -46,7 +46,18 @@ char *mu_str_stripws (char *string); | ... | @@ -46,7 +46,18 @@ char *mu_str_stripws (char *string); |
46 | 46 | ||
47 | int mu_string_split (const char *string, char *delim, mu_list_t list); | 47 | int mu_string_split (const char *string, char *delim, mu_list_t list); |
48 | 48 | ||
49 | size_t mu_str_count (char const *str, int chr); | 49 | size_t mu_str_count (char const *str, char const *chr, size_t *cnt); |
50 | |||
51 | int mu_c_str_escape (char const *str, char const *chr, char const *xtab, | ||
52 | char **ret_str); | ||
53 | int mu_c_str_escape_trans (char const *str, char const *trans, char **ret_str); | ||
54 | |||
55 | int mu_c_str_unescape_inplace (char *str, char const *chr, char const *xtab); | ||
56 | int mu_c_str_unescape (char const *str, char const *chr, char const *xtab, | ||
57 | char **ret_str); | ||
58 | int mu_c_str_unescape_trans (char const *str, char const *trans, | ||
59 | char **ret_str); | ||
60 | |||
50 | 61 | ||
51 | #ifdef __cplusplus | 62 | #ifdef __cplusplus |
52 | } | 63 | } | ... | ... |
... | @@ -21,6 +21,8 @@ libstring_la_SOURCES = \ | ... | @@ -21,6 +21,8 @@ libstring_la_SOURCES = \ |
21 | cpystr.c\ | 21 | cpystr.c\ |
22 | cstrcasecmp.c\ | 22 | cstrcasecmp.c\ |
23 | cstrcasestr.c\ | 23 | cstrcasestr.c\ |
24 | cstrescape.c\ | ||
25 | cstrunescape.c\ | ||
24 | cstrlower.c\ | 26 | cstrlower.c\ |
25 | cstrupper.c\ | 27 | cstrupper.c\ |
26 | hexstr.c\ | 28 | hexstr.c\ | ... | ... |
libmailutils/string/cstrescape.c
0 → 100644
1 | /* GNU Mailutils -- a suite of utilities for electronic mail | ||
2 | Copyright (C) 2016 Free Software Foundation, Inc. | ||
3 | |||
4 | GNU Mailutils is free software; you can redistribute it and/or modify | ||
5 | it under the terms of the GNU General Public License as published by | ||
6 | the Free Software Foundation; either version 3, or (at your option) | ||
7 | any later version. | ||
8 | |||
9 | GNU Mailutils is distributed in the hope that it will be useful, | ||
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | GNU General Public License for more details. | ||
13 | |||
14 | You should have received a copy of the GNU General Public License | ||
15 | along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>. */ | ||
16 | |||
17 | #include <config.h> | ||
18 | #include <stdlib.h> | ||
19 | #include <string.h> | ||
20 | #include <mailutils/cstr.h> | ||
21 | #include <mailutils/errno.h> | ||
22 | |||
23 | /* Examines STR for occurrences of characters from CHR. Returns in RET_STR | ||
24 | a malloc'ed string where each occurrence of CHR[i] is replaced by a | ||
25 | backslash, followed by XTAB[i]. | ||
26 | |||
27 | If CHR is NULL, RET_STR contains a malloc'ed copy of STR (XTAB is | ||
28 | ignored). | ||
29 | |||
30 | If XTAB is NULL, XTAB = CHR is assumed. | ||
31 | |||
32 | Callers are advised to include backslash into both CHR and XTAB. | ||
33 | |||
34 | Returns 0 on success, error code if an error occurred. | ||
35 | |||
36 | Example: | ||
37 | |||
38 | Escape each occurrence of backslash and double quote: | ||
39 | |||
40 | mu_c_str_escape (str, "\\\"", NULL, &ret_str) | ||
41 | */ | ||
42 | int | ||
43 | mu_c_str_escape (char const *str, char const *chr, char const *xtab, | ||
44 | char **ret_str) | ||
45 | { | ||
46 | char *newstr; | ||
47 | size_t n; | ||
48 | int c; | ||
49 | |||
50 | if (!ret_str) | ||
51 | return MU_ERR_OUT_PTR_NULL; | ||
52 | |||
53 | if (!str) | ||
54 | { | ||
55 | *ret_str = NULL; | ||
56 | return 0; | ||
57 | } | ||
58 | |||
59 | if (!chr) | ||
60 | { | ||
61 | newstr = strdup (str); | ||
62 | if (!newstr) | ||
63 | return errno; | ||
64 | *ret_str = newstr; | ||
65 | return 0; | ||
66 | } | ||
67 | |||
68 | n = strlen (chr); | ||
69 | |||
70 | if (xtab) | ||
71 | { | ||
72 | if (strlen (xtab) != n) | ||
73 | return EINVAL; | ||
74 | } | ||
75 | else | ||
76 | xtab = chr; | ||
77 | |||
78 | n = mu_str_count (str, chr, NULL); | ||
79 | |||
80 | newstr = malloc (strlen (str) + n + 1); | ||
81 | if (!newstr) | ||
82 | return errno; | ||
83 | *ret_str = newstr; | ||
84 | |||
85 | if (n == 0) | ||
86 | { | ||
87 | strcpy (newstr, str); | ||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | while ((c = *str++) != 0) | ||
92 | { | ||
93 | char *p = strchr (chr, c); | ||
94 | |||
95 | if (p) | ||
96 | { | ||
97 | *newstr++ = '\\'; | ||
98 | *newstr++ = xtab[p - chr]; | ||
99 | } | ||
100 | else | ||
101 | *newstr++ = c; | ||
102 | } | ||
103 | *newstr = 0; | ||
104 | |||
105 | return 0; | ||
106 | } | ||
107 | |||
108 | /* Escape certain characters in STR. Return allocated string in RET_STR. | ||
109 | |||
110 | Escapable characters are defined by the array TRANS, which consists of an | ||
111 | even number of elements. Each pair of characters in this array contains: | ||
112 | |||
113 | TRANS[i+1] - character to be escaped | ||
114 | TRANS[i] - character to use in escape sequence for TRANS[i+1]. | ||
115 | |||
116 | Each TRANS[i+1] is replaced by backslash + TRANS[i]. | ||
117 | |||
118 | Returns 0 on success, or error code if an error occurred. | ||
119 | |||
120 | E.g., to escape control characters, backslash and double-quote using | ||
121 | C convention: | ||
122 | |||
123 | mu_c_str_escape_trans (str, "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v", &ret) | ||
124 | |||
125 | See also mu_wordsplit_c_escape_tab in wordsplit.c | ||
126 | */ | ||
127 | int | ||
128 | mu_c_str_escape_trans (char const *str, char const *trans, char **ret_str) | ||
129 | { | ||
130 | char *chr, *xtab; | ||
131 | size_t n, i; | ||
132 | int rc; | ||
133 | |||
134 | if (trans) | ||
135 | { | ||
136 | n = strlen (trans); | ||
137 | if (n % 2) | ||
138 | return EINVAL; | ||
139 | chr = malloc (n + 2); | ||
140 | if (!chr) | ||
141 | return errno; | ||
142 | xtab = chr + n / 2 + 1; | ||
143 | for (i = 0; i < n; i += 2) | ||
144 | { | ||
145 | chr[i / 2] = trans[i + 1]; | ||
146 | xtab[i / 2] = trans[i]; | ||
147 | } | ||
148 | chr[i / 2] = xtab[i / 2] = 0; | ||
149 | } | ||
150 | else | ||
151 | { | ||
152 | chr = xtab = NULL; | ||
153 | } | ||
154 | |||
155 | rc = mu_c_str_escape (str, chr, xtab, ret_str); | ||
156 | |||
157 | free (chr); | ||
158 | |||
159 | return rc; | ||
160 | } | ||
161 | |||
162 |
libmailutils/string/cstrunescape.c
0 → 100644
1 | /* GNU Mailutils -- a suite of utilities for electronic mail | ||
2 | Copyright (C) 2016 Free Software Foundation, Inc. | ||
3 | |||
4 | GNU Mailutils is free software; you can redistribute it and/or modify | ||
5 | it under the terms of the GNU General Public License as published by | ||
6 | the Free Software Foundation; either version 3, or (at your option) | ||
7 | any later version. | ||
8 | |||
9 | GNU Mailutils is distributed in the hope that it will be useful, | ||
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | GNU General Public License for more details. | ||
13 | |||
14 | You should have received a copy of the GNU General Public License | ||
15 | along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>. */ | ||
16 | |||
17 | #include <config.h> | ||
18 | #include <stdlib.h> | ||
19 | #include <string.h> | ||
20 | #include <mailutils/cstr.h> | ||
21 | #include <mailutils/errno.h> | ||
22 | |||
23 | /* Copy characters from IN to OUT. Replace each occurrences of backslash | ||
24 | followed by a character XTAB[i] with CHR[i]. | ||
25 | |||
26 | OUT should be large enough to accomodate the translated string (same length | ||
27 | as IN, in the worst case). It is OK if IN==OUT. | ||
28 | |||
29 | Both XTAB and CHR must not be NULL and must contain the same number of | ||
30 | elements. | ||
31 | */ | ||
32 | static void | ||
33 | c_str_unescape (char const *in, char *out, char const *chr, char const *xtab) | ||
34 | { | ||
35 | size_t i, j; | ||
36 | |||
37 | for (i = j = 0; in[i]; i++, j++) | ||
38 | { | ||
39 | if (in[i] == '\\') | ||
40 | break; | ||
41 | out[j] = in[i]; | ||
42 | } | ||
43 | |||
44 | if (in[i]) | ||
45 | { | ||
46 | for (; in[i]; i++, j++) | ||
47 | { | ||
48 | if (in[i] == '\\') | ||
49 | { | ||
50 | char *p; | ||
51 | if (in[i+1] && (p = strchr (xtab, in[i+1])) != NULL) | ||
52 | { | ||
53 | out[j] = chr[p - xtab]; | ||
54 | i++; | ||
55 | continue; | ||
56 | } | ||
57 | } | ||
58 | out[j] = in[i]; | ||
59 | } | ||
60 | } | ||
61 | out[j] = 0; | ||
62 | } | ||
63 | |||
64 | /* Modifies STR, by replacing each occurrence of \ followed by a charater | ||
65 | XTAB[i] with CHR[i]. | ||
66 | |||
67 | Either XTAB or CHR can be NULL, in which case XTAB=CHR is assumed. | ||
68 | |||
69 | If both XTAB and CHR are NULL, STR is unchanged. | ||
70 | |||
71 | STR == NULL is OK. | ||
72 | |||
73 | Returns 0 success, and EINVAL if lengths of CHR and XTAB differ. | ||
74 | */ | ||
75 | int | ||
76 | mu_c_str_unescape_inplace (char *str, char const *chr, char const *xtab) | ||
77 | { | ||
78 | if (!str) | ||
79 | return 0; | ||
80 | if (!xtab) | ||
81 | { | ||
82 | if (chr) | ||
83 | xtab = chr; | ||
84 | else | ||
85 | return 0; | ||
86 | } | ||
87 | else if (!chr) | ||
88 | chr = xtab; | ||
89 | else if (strlen (chr) != strlen (xtab)) | ||
90 | return EINVAL; | ||
91 | c_str_unescape (str, str, chr, xtab); | ||
92 | return 0; | ||
93 | } | ||
94 | |||
95 | /* A counterpart of mu_c_str_escape. Creates an allocated (using malloc(3)) | ||
96 | copy of STR, where each occurrence of \ followed by a charater XTAB[i] | ||
97 | is replaced with single character CHR[i]. | ||
98 | |||
99 | Either XTAB or CHR can be NULL, in which case XTAB=CHR is assumed. | ||
100 | |||
101 | If both XTAB and CHR are NULL, the result is *RET_STR will contain exact | ||
102 | malloc'ed copy of STR. | ||
103 | |||
104 | STR == NULL is OK: in that case *RET_STR will also be NULL. | ||
105 | |||
106 | Returns 0 success, EINVAL if lengths of CHR and XTAB differ, | ||
107 | MU_ERR_OUT_PTR_NULL if RET_STR is NULL, and ENOMEM if memory allocation | ||
108 | failed. | ||
109 | */ | ||
110 | int | ||
111 | mu_c_str_unescape (char const *str, char const *chr, char const *xtab, | ||
112 | char **ret_str) | ||
113 | { | ||
114 | char *newstr; | ||
115 | size_t i, size; | ||
116 | |||
117 | if (!ret_str) | ||
118 | return MU_ERR_OUT_PTR_NULL; | ||
119 | |||
120 | if (!str) | ||
121 | { | ||
122 | *ret_str = NULL; | ||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | if (!xtab) | ||
127 | { | ||
128 | if (chr) | ||
129 | xtab = chr; | ||
130 | else | ||
131 | { | ||
132 | char *p = strdup (str); | ||
133 | if (!p) | ||
134 | return errno; | ||
135 | *ret_str = p; | ||
136 | } | ||
137 | } | ||
138 | else if (!chr) | ||
139 | chr = xtab; | ||
140 | else if (strlen (chr) != strlen (xtab)) | ||
141 | return EINVAL; | ||
142 | |||
143 | size = 0; | ||
144 | for (i = 0; str[i]; i++) | ||
145 | { | ||
146 | if (str[i] == '\\' && str[i + 1] && strchr (xtab, str[i + 1])) | ||
147 | i++; | ||
148 | size++; | ||
149 | } | ||
150 | |||
151 | newstr = malloc (size + 1); | ||
152 | if (!newstr) | ||
153 | return errno; | ||
154 | *ret_str = newstr; | ||
155 | |||
156 | c_str_unescape (str, newstr, chr, xtab); | ||
157 | |||
158 | return 0; | ||
159 | } | ||
160 | |||
161 | /* A counterpart of mu_c_str_escape_trans. | ||
162 | |||
163 | Creates an allocated (using malloc(3)) copy of STR, where each occurrence | ||
164 | of \ followed by TRANS[i] is replaced by TRANS[i+1]. | ||
165 | |||
166 | Returns 0 on success, or error code if an error occurred. | ||
167 | |||
168 | See also mu_wordsplit_c_escape_tab in wordsplit.c | ||
169 | */ | ||
170 | int | ||
171 | mu_c_str_unescape_trans (char const *str, char const *trans, char **ret_str) | ||
172 | { | ||
173 | char *chr, *xtab; | ||
174 | size_t n, i; | ||
175 | int rc; | ||
176 | |||
177 | if (trans) | ||
178 | { | ||
179 | n = strlen (trans); | ||
180 | if (n % 2) | ||
181 | return EINVAL; | ||
182 | chr = malloc (n + 2); | ||
183 | if (!chr) | ||
184 | return errno; | ||
185 | xtab = chr + n / 2 + 1; | ||
186 | for (i = 0; i < n; i += 2) | ||
187 | { | ||
188 | chr[i / 2] = trans[i + 1]; | ||
189 | xtab[i / 2] = trans[i]; | ||
190 | } | ||
191 | chr[i / 2] = xtab[i / 2] = 0; | ||
192 | } | ||
193 | else | ||
194 | { | ||
195 | chr = xtab = NULL; | ||
196 | } | ||
197 | |||
198 | rc = mu_c_str_unescape (str, chr, xtab, ret_str); | ||
199 | |||
200 | free (chr); | ||
201 | |||
202 | return rc; | ||
203 | } | ||
204 |
... | @@ -16,28 +16,44 @@ | ... | @@ -16,28 +16,44 @@ |
16 | 16 | ||
17 | #include <config.h> | 17 | #include <config.h> |
18 | #include <limits.h> | 18 | #include <limits.h> |
19 | #include <string.h> | ||
19 | #include <mailutils/util.h> | 20 | #include <mailutils/util.h> |
20 | 21 | ||
21 | /* Return the number of occurrences of the ASCII character CHR in the | 22 | /* Count number of occurrences of each ASCII character from CHR in the |
22 | UTF-8 string STR. */ | 23 | UTF-8 string STR. Unless CNT is NULL, fill it with the counts for |
24 | each character (so that CNT[i] contains number of occurrences of | ||
25 | CHR[i]). Return total number of occurrences. */ | ||
23 | size_t | 26 | size_t |
24 | mu_str_count (char const *str, int chr) | 27 | mu_str_count (char const *str, char const *chr, size_t *cnt) |
25 | { | 28 | { |
26 | unsigned char c; | 29 | unsigned char c; |
27 | size_t count = 0; | ||
28 | int consume = 0; | 30 | int consume = 0; |
29 | 31 | size_t count = 0; | |
30 | if (!str || chr < 0 || chr > UCHAR_MAX) | 32 | |
33 | if (!str || !chr) | ||
31 | return 0; | 34 | return 0; |
32 | 35 | ||
36 | if (cnt) | ||
37 | { | ||
38 | int i; | ||
39 | |||
40 | for (i = 0; chr[i]; i++) | ||
41 | cnt[i] = 0; | ||
42 | } | ||
43 | |||
33 | while ((c = *str++) != 0) | 44 | while ((c = *str++) != 0) |
34 | { | 45 | { |
35 | if (consume) | 46 | if (consume) |
36 | consume--; | 47 | consume--; |
37 | else if (c < 0xc0) | 48 | else if (c < 0xc0) |
38 | { | 49 | { |
39 | if (c == chr) | 50 | char *p = strchr (chr, c); |
40 | count++; | 51 | if (p) |
52 | { | ||
53 | if (cnt) | ||
54 | cnt[p - chr]++; | ||
55 | count++; | ||
56 | } | ||
41 | } | 57 | } |
42 | else if (c & 0xc0) | 58 | else if (c & 0xc0) |
43 | consume = 1; | 59 | consume = 1; | ... | ... |
-
Please register or sign in to post a comment