Commit 2ec525ac 2ec525acff5a090dcba8a0c7c166bbe745340902 by Sergey Poznyakoff

Improve wordsplit.

* include/mailutils/wordsplit.h (mu_wordsplit) <ws_escape>: New member.
(MU_WRDSF_ESCAPE): New flag.
(mu_wordsplit_general_unquote_copy): New function.
* libmailutils/string/wordsplit.c: Remove empty nodes after whitespace
trimming.  If MU_WRDSF_ESCAPE is set, remove only backslashes appearing
in front of a character in ws_escape.
* libmailutils/tests/wordsplit.at: Test new features.
1 parent b26b2120
...@@ -28,6 +28,7 @@ struct mu_wordsplit ...@@ -28,6 +28,7 @@ struct mu_wordsplit
28 int ws_flags; 28 int ws_flags;
29 const char *ws_delim; 29 const char *ws_delim;
30 const char *ws_comment; 30 const char *ws_comment;
31 const char *ws_escape;
31 void (*ws_alloc_die) (struct mu_wordsplit *wsp); 32 void (*ws_alloc_die) (struct mu_wordsplit *wsp);
32 void (*ws_error) (const char *, ...) 33 void (*ws_error) (const char *, ...)
33 __attribute__ ((__format__ (__printf__, 1, 2))); 34 __attribute__ ((__format__ (__printf__, 1, 2)));
...@@ -110,6 +111,9 @@ struct mu_wordsplit ...@@ -110,6 +111,9 @@ struct mu_wordsplit
110 stored in the element that follows its name. */ 111 stored in the element that follows its name. */
111 #define MU_WRDSF_ENV_KV 0x4000000 112 #define MU_WRDSF_ENV_KV 0x4000000
112 113
114 /* ws_escape is set */
115 #define MU_WRDSF_ESCAPE 0x8000000
116
113 #define MU_WRDSF_DEFFLAGS \ 117 #define MU_WRDSF_DEFFLAGS \
114 (MU_WRDSF_NOVAR | MU_WRDSF_NOCMD | \ 118 (MU_WRDSF_NOVAR | MU_WRDSF_NOCMD | \
115 MU_WRDSF_QUOTE | MU_WRDSF_SQUEEZE_DELIMS | MU_WRDSF_CESCAPES) 119 MU_WRDSF_QUOTE | MU_WRDSF_SQUEEZE_DELIMS | MU_WRDSF_CESCAPES)
...@@ -132,6 +136,8 @@ int mu_wordsplit_c_unquote_char (int c); ...@@ -132,6 +136,8 @@ int mu_wordsplit_c_unquote_char (int c);
132 int mu_wordsplit_c_quote_char (int c); 136 int mu_wordsplit_c_quote_char (int c);
133 size_t mu_wordsplit_c_quoted_length (const char *str, int quote_hex, 137 size_t mu_wordsplit_c_quoted_length (const char *str, int quote_hex,
134 int *quote); 138 int *quote);
139 void mu_wordsplit_general_unquote_copy (char *dst, const char *src, size_t n,
140 const char *escapable);
135 void mu_wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n); 141 void mu_wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n);
136 void mu_wordsplit_c_unquote_copy (char *dst, const char *src, size_t n); 142 void mu_wordsplit_c_unquote_copy (char *dst, const char *src, size_t n);
137 void mu_wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex); 143 void mu_wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex);
......
...@@ -500,7 +500,12 @@ wsnode_quoteremoval (struct mu_wordsplit *wsp) ...@@ -500,7 +500,12 @@ wsnode_quoteremoval (struct mu_wordsplit *wsp)
500 p->v.word = newstr; 500 p->v.word = newstr;
501 p->flags |= _WSNF_WORD; 501 p->flags |= _WSNF_WORD;
502 } 502 }
503 uqfn (p->v.word, str, slen); 503
504 if (wsp->ws_flags & MU_WRDSF_ESCAPE)
505 mu_wordsplit_general_unquote_copy (p->v.word, str, slen,
506 wsp->ws_escape);
507 else
508 uqfn (p->v.word, str, slen);
504 } 509 }
505 } 510 }
506 return 0; 511 return 0;
...@@ -906,32 +911,39 @@ node_expand_vars (struct mu_wordsplit *wsp, struct mu_wordsplit_node *node) ...@@ -906,32 +911,39 @@ node_expand_vars (struct mu_wordsplit *wsp, struct mu_wordsplit_node *node)
906 return 0; 911 return 0;
907 } 912 }
908 913
909 static int 914 /* Remove NULL lists */
910 mu_wordsplit_varexp (struct mu_wordsplit *wsp) 915 static void
916 wsnode_nullelim (struct mu_wordsplit *wsp)
911 { 917 {
912 struct mu_wordsplit_node *p; 918 struct mu_wordsplit_node *p;
913 919
914 for (p = wsp->ws_head; p;) 920 for (p = wsp->ws_head; p;)
915 { 921 {
916 struct mu_wordsplit_node *next = p->next; 922 struct mu_wordsplit_node *next = p->next;
917 if (!(p->flags & _WSNF_NOEXPAND)) 923 if (p->flags & _WSNF_NULL)
918 if (node_expand_vars (wsp, p)) 924 {
919 return 1; 925 wsnode_remove (wsp, p);
926 wsnode_free (p);
927 }
920 p = next; 928 p = next;
921 } 929 }
930 }
931
932 static int
933 mu_wordsplit_varexp (struct mu_wordsplit *wsp)
934 {
935 struct mu_wordsplit_node *p;
922 936
923 /* Remove NULL lists */
924 for (p = wsp->ws_head; p;) 937 for (p = wsp->ws_head; p;)
925 { 938 {
926 struct mu_wordsplit_node *next = p->next; 939 struct mu_wordsplit_node *next = p->next;
927 if (p->flags & _WSNF_NULL) 940 if (!(p->flags & _WSNF_NOEXPAND))
928 { 941 if (node_expand_vars (wsp, p))
929 wsnode_remove (wsp, p); 942 return 1;
930 wsnode_free (p);
931 }
932 p = next; 943 p = next;
933 } 944 }
934 945
946 wsnode_nullelim (wsp);
935 return 0; 947 return 0;
936 } 948 }
937 949
...@@ -959,7 +971,11 @@ mu_wordsplit_trimws (struct mu_wordsplit *wsp) ...@@ -959,7 +971,11 @@ mu_wordsplit_trimws (struct mu_wordsplit *wsp)
959 for (n = p->v.segm.end; n > p->v.segm.beg && ISWS (wsp->ws_input[n-1]); 971 for (n = p->v.segm.end; n > p->v.segm.beg && ISWS (wsp->ws_input[n-1]);
960 n--); 972 n--);
961 p->v.segm.end = n; 973 p->v.segm.end = n;
974 if (p->v.segm.beg == p->v.segm.end)
975 p->flags |= _WSNF_NULL;
962 } 976 }
977
978 wsnode_nullelim (wsp);
963 } 979 }
964 980
965 static int 981 static int
...@@ -1217,6 +1233,21 @@ mu_wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote) ...@@ -1217,6 +1233,21 @@ mu_wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
1217 } 1233 }
1218 1234
1219 void 1235 void
1236 mu_wordsplit_general_unquote_copy (char *dst, const char *src, size_t n,
1237 const char *escapable)
1238 {
1239 int i;
1240
1241 for (i = 0; i < n;)
1242 {
1243 if (src[i] == '\\' && i < n && strchr (escapable, src[i+1]))
1244 i++;
1245 *dst++ = src[i++];
1246 }
1247 *dst = 0;
1248 }
1249
1250 void
1220 mu_wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n) 1251 mu_wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n)
1221 { 1252 {
1222 int i; 1253 int i;
......
...@@ -313,6 +313,15 @@ TESTWSP([C escapes off],[],[-cescapes], ...@@ -313,6 +313,15 @@ TESTWSP([C escapes off],[],[-cescapes],
313 3: newnline 313 3: newnline
314 ]) 314 ])
315 315
316 TESTWSP([ws elimination],[],[delim ' ()' ws return_delims],
317 [( list items )],
318 [NF: 4
319 0: (
320 1: list
321 2: items
322 3: )
323 ])
324
316 TESTWSP([empty quotes],[],[delim : ws return_delims], 325 TESTWSP([empty quotes],[],[delim : ws return_delims],
317 [t=""], 326 [t=""],
318 [NF: 1 327 [NF: 1
...@@ -338,4 +347,12 @@ TESTWSP([suppress ws trimming within quotes],[], ...@@ -338,4 +347,12 @@ TESTWSP([suppress ws trimming within quotes],[],
338 4: "formatfield=In message %{text}, " 347 4: "formatfield=In message %{text}, "
339 ]) 348 ])
340 349
350 TESTWSP([unescape],[],[-default novar nocmd quote escape '\"'],
351 [\Seen "quote \"" "bs \\"],
352 [NF: 3
353 0: \\Seen
354 1: "quote \""
355 2: "bs \\"
356 ])
357
341 m4_popdef([TESTWSP]) 358 m4_popdef([TESTWSP])
......