Improve wordsplit.
* include/mailutils/wordsplit.h (mu_wordsplit) <ws_escape>: New member. (MU_WRDSF_ESCAPE): New flag. (mu_wordsplit_general_unquote_copy): New function. * libmailutils/string/wordsplit.c: Remove empty nodes after whitespace trimming. If MU_WRDSF_ESCAPE is set, remove only backslashes appearing in front of a character in ws_escape. * libmailutils/tests/wordsplit.at: Test new features.
Showing
3 changed files
with
66 additions
and
12 deletions
... | @@ -28,6 +28,7 @@ struct mu_wordsplit | ... | @@ -28,6 +28,7 @@ struct mu_wordsplit |
28 | int ws_flags; | 28 | int ws_flags; |
29 | const char *ws_delim; | 29 | const char *ws_delim; |
30 | const char *ws_comment; | 30 | const char *ws_comment; |
31 | const char *ws_escape; | ||
31 | void (*ws_alloc_die) (struct mu_wordsplit *wsp); | 32 | void (*ws_alloc_die) (struct mu_wordsplit *wsp); |
32 | void (*ws_error) (const char *, ...) | 33 | void (*ws_error) (const char *, ...) |
33 | __attribute__ ((__format__ (__printf__, 1, 2))); | 34 | __attribute__ ((__format__ (__printf__, 1, 2))); |
... | @@ -110,6 +111,9 @@ struct mu_wordsplit | ... | @@ -110,6 +111,9 @@ struct mu_wordsplit |
110 | stored in the element that follows its name. */ | 111 | stored in the element that follows its name. */ |
111 | #define MU_WRDSF_ENV_KV 0x4000000 | 112 | #define MU_WRDSF_ENV_KV 0x4000000 |
112 | 113 | ||
114 | /* ws_escape is set */ | ||
115 | #define MU_WRDSF_ESCAPE 0x8000000 | ||
116 | |||
113 | #define MU_WRDSF_DEFFLAGS \ | 117 | #define MU_WRDSF_DEFFLAGS \ |
114 | (MU_WRDSF_NOVAR | MU_WRDSF_NOCMD | \ | 118 | (MU_WRDSF_NOVAR | MU_WRDSF_NOCMD | \ |
115 | MU_WRDSF_QUOTE | MU_WRDSF_SQUEEZE_DELIMS | MU_WRDSF_CESCAPES) | 119 | MU_WRDSF_QUOTE | MU_WRDSF_SQUEEZE_DELIMS | MU_WRDSF_CESCAPES) |
... | @@ -132,6 +136,8 @@ int mu_wordsplit_c_unquote_char (int c); | ... | @@ -132,6 +136,8 @@ int mu_wordsplit_c_unquote_char (int c); |
132 | int mu_wordsplit_c_quote_char (int c); | 136 | int mu_wordsplit_c_quote_char (int c); |
133 | size_t mu_wordsplit_c_quoted_length (const char *str, int quote_hex, | 137 | size_t mu_wordsplit_c_quoted_length (const char *str, int quote_hex, |
134 | int *quote); | 138 | int *quote); |
139 | void mu_wordsplit_general_unquote_copy (char *dst, const char *src, size_t n, | ||
140 | const char *escapable); | ||
135 | void mu_wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n); | 141 | void mu_wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n); |
136 | void mu_wordsplit_c_unquote_copy (char *dst, const char *src, size_t n); | 142 | void mu_wordsplit_c_unquote_copy (char *dst, const char *src, size_t n); |
137 | void mu_wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex); | 143 | void mu_wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex); | ... | ... |
... | @@ -500,7 +500,12 @@ wsnode_quoteremoval (struct mu_wordsplit *wsp) | ... | @@ -500,7 +500,12 @@ wsnode_quoteremoval (struct mu_wordsplit *wsp) |
500 | p->v.word = newstr; | 500 | p->v.word = newstr; |
501 | p->flags |= _WSNF_WORD; | 501 | p->flags |= _WSNF_WORD; |
502 | } | 502 | } |
503 | uqfn (p->v.word, str, slen); | 503 | |
504 | if (wsp->ws_flags & MU_WRDSF_ESCAPE) | ||
505 | mu_wordsplit_general_unquote_copy (p->v.word, str, slen, | ||
506 | wsp->ws_escape); | ||
507 | else | ||
508 | uqfn (p->v.word, str, slen); | ||
504 | } | 509 | } |
505 | } | 510 | } |
506 | return 0; | 511 | return 0; |
... | @@ -906,32 +911,39 @@ node_expand_vars (struct mu_wordsplit *wsp, struct mu_wordsplit_node *node) | ... | @@ -906,32 +911,39 @@ node_expand_vars (struct mu_wordsplit *wsp, struct mu_wordsplit_node *node) |
906 | return 0; | 911 | return 0; |
907 | } | 912 | } |
908 | 913 | ||
909 | static int | 914 | /* Remove NULL lists */ |
910 | mu_wordsplit_varexp (struct mu_wordsplit *wsp) | 915 | static void |
916 | wsnode_nullelim (struct mu_wordsplit *wsp) | ||
911 | { | 917 | { |
912 | struct mu_wordsplit_node *p; | 918 | struct mu_wordsplit_node *p; |
913 | 919 | ||
914 | for (p = wsp->ws_head; p;) | 920 | for (p = wsp->ws_head; p;) |
915 | { | 921 | { |
916 | struct mu_wordsplit_node *next = p->next; | 922 | struct mu_wordsplit_node *next = p->next; |
917 | if (!(p->flags & _WSNF_NOEXPAND)) | 923 | if (p->flags & _WSNF_NULL) |
918 | if (node_expand_vars (wsp, p)) | 924 | { |
919 | return 1; | 925 | wsnode_remove (wsp, p); |
926 | wsnode_free (p); | ||
927 | } | ||
920 | p = next; | 928 | p = next; |
921 | } | 929 | } |
930 | } | ||
931 | |||
932 | static int | ||
933 | mu_wordsplit_varexp (struct mu_wordsplit *wsp) | ||
934 | { | ||
935 | struct mu_wordsplit_node *p; | ||
922 | 936 | ||
923 | /* Remove NULL lists */ | ||
924 | for (p = wsp->ws_head; p;) | 937 | for (p = wsp->ws_head; p;) |
925 | { | 938 | { |
926 | struct mu_wordsplit_node *next = p->next; | 939 | struct mu_wordsplit_node *next = p->next; |
927 | if (p->flags & _WSNF_NULL) | 940 | if (!(p->flags & _WSNF_NOEXPAND)) |
928 | { | 941 | if (node_expand_vars (wsp, p)) |
929 | wsnode_remove (wsp, p); | 942 | return 1; |
930 | wsnode_free (p); | ||
931 | } | ||
932 | p = next; | 943 | p = next; |
933 | } | 944 | } |
934 | 945 | ||
946 | wsnode_nullelim (wsp); | ||
935 | return 0; | 947 | return 0; |
936 | } | 948 | } |
937 | 949 | ||
... | @@ -959,7 +971,11 @@ mu_wordsplit_trimws (struct mu_wordsplit *wsp) | ... | @@ -959,7 +971,11 @@ mu_wordsplit_trimws (struct mu_wordsplit *wsp) |
959 | for (n = p->v.segm.end; n > p->v.segm.beg && ISWS (wsp->ws_input[n-1]); | 971 | for (n = p->v.segm.end; n > p->v.segm.beg && ISWS (wsp->ws_input[n-1]); |
960 | n--); | 972 | n--); |
961 | p->v.segm.end = n; | 973 | p->v.segm.end = n; |
974 | if (p->v.segm.beg == p->v.segm.end) | ||
975 | p->flags |= _WSNF_NULL; | ||
962 | } | 976 | } |
977 | |||
978 | wsnode_nullelim (wsp); | ||
963 | } | 979 | } |
964 | 980 | ||
965 | static int | 981 | static int |
... | @@ -1217,6 +1233,21 @@ mu_wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote) | ... | @@ -1217,6 +1233,21 @@ mu_wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote) |
1217 | } | 1233 | } |
1218 | 1234 | ||
1219 | void | 1235 | void |
1236 | mu_wordsplit_general_unquote_copy (char *dst, const char *src, size_t n, | ||
1237 | const char *escapable) | ||
1238 | { | ||
1239 | int i; | ||
1240 | |||
1241 | for (i = 0; i < n;) | ||
1242 | { | ||
1243 | if (src[i] == '\\' && i < n && strchr (escapable, src[i+1])) | ||
1244 | i++; | ||
1245 | *dst++ = src[i++]; | ||
1246 | } | ||
1247 | *dst = 0; | ||
1248 | } | ||
1249 | |||
1250 | void | ||
1220 | mu_wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n) | 1251 | mu_wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n) |
1221 | { | 1252 | { |
1222 | int i; | 1253 | int i; | ... | ... |
... | @@ -313,6 +313,15 @@ TESTWSP([C escapes off],[],[-cescapes], | ... | @@ -313,6 +313,15 @@ TESTWSP([C escapes off],[],[-cescapes], |
313 | 3: newnline | 313 | 3: newnline |
314 | ]) | 314 | ]) |
315 | 315 | ||
316 | TESTWSP([ws elimination],[],[delim ' ()' ws return_delims], | ||
317 | [( list items )], | ||
318 | [NF: 4 | ||
319 | 0: ( | ||
320 | 1: list | ||
321 | 2: items | ||
322 | 3: ) | ||
323 | ]) | ||
324 | |||
316 | TESTWSP([empty quotes],[],[delim : ws return_delims], | 325 | TESTWSP([empty quotes],[],[delim : ws return_delims], |
317 | [t=""], | 326 | [t=""], |
318 | [NF: 1 | 327 | [NF: 1 |
... | @@ -338,4 +347,12 @@ TESTWSP([suppress ws trimming within quotes],[], | ... | @@ -338,4 +347,12 @@ TESTWSP([suppress ws trimming within quotes],[], |
338 | 4: "formatfield=In message %{text}, " | 347 | 4: "formatfield=In message %{text}, " |
339 | ]) | 348 | ]) |
340 | 349 | ||
350 | TESTWSP([unescape],[],[-default novar nocmd quote escape '\"'], | ||
351 | [\Seen "quote \"" "bs \\"], | ||
352 | [NF: 3 | ||
353 | 0: \\Seen | ||
354 | 1: "quote \"" | ||
355 | 2: "bs \\" | ||
356 | ]) | ||
357 | |||
341 | m4_popdef([TESTWSP]) | 358 | m4_popdef([TESTWSP]) | ... | ... |
-
Please register or sign in to post a comment