Commit 9316bc63 9316bc63c12f6caa0303bb0c89a5831a90e523b4 by Sergey Poznyakoff

wordsplit: add incremental mode.

* include/mailutils/wordsplit.h (MU_WRDSF_INCREMENTAL): New flag.
(MU_WRDSE_NOINPUT): New error code.
* libmailutils/string/wordsplit.c (mu_wordsplit_init0): New function.
(mu_wordsplit_init): Call mu_wordsplit_init0.
(expvar): Use MU_WRDSF_WS instead of MU_WRDSF_SQUEEZE_DELIMS in
call to subordinate mu_wordsplit.
(skip_delim): If both MU_WRDSF_RETURN_DELIMS and MU_WRDSF_SQUEEZE_DELIMS
are given, squeeze only the same delimiter characters.
* libmailutils/tests/wordsplit.at: Add tests for incremental mode.
* libmailutils/tests/wsp.c: Support incremental mode.
1 parent 45ecec0d
...@@ -46,7 +46,7 @@ struct mu_wordsplit ...@@ -46,7 +46,7 @@ struct mu_wordsplit
46 struct mu_wordsplit_node *ws_head, *ws_tail; 46 struct mu_wordsplit_node *ws_head, *ws_tail;
47 }; 47 };
48 48
49 /* Wordsplit flags. Only 3 bits of a 32-bit word remain unused. 49 /* Wordsplit flags. Only 2 bits of a 32-bit word remain unused.
50 It is getting crowded... */ 50 It is getting crowded... */
51 /* Append the words found to the array resulting from a previous 51 /* Append the words found to the array resulting from a previous
52 call. */ 52 call. */
...@@ -120,6 +120,9 @@ struct mu_wordsplit ...@@ -120,6 +120,9 @@ struct mu_wordsplit
120 /* ws_escape is set */ 120 /* ws_escape is set */
121 #define MU_WRDSF_ESCAPE 0x10000000 121 #define MU_WRDSF_ESCAPE 0x10000000
122 122
123 /* Incremental mode */
124 #define MU_WRDSF_INCREMENTAL 0x20000000
125
123 #define MU_WRDSF_DEFFLAGS \ 126 #define MU_WRDSF_DEFFLAGS \
124 (MU_WRDSF_NOVAR | MU_WRDSF_NOCMD | \ 127 (MU_WRDSF_NOVAR | MU_WRDSF_NOCMD | \
125 MU_WRDSF_QUOTE | MU_WRDSF_SQUEEZE_DELIMS | MU_WRDSF_CESCAPES) 128 MU_WRDSF_QUOTE | MU_WRDSF_SQUEEZE_DELIMS | MU_WRDSF_CESCAPES)
...@@ -131,6 +134,7 @@ struct mu_wordsplit ...@@ -131,6 +134,7 @@ struct mu_wordsplit
131 #define MU_WRDSE_USAGE 4 134 #define MU_WRDSE_USAGE 4
132 #define MU_WRDSE_CBRACE 5 135 #define MU_WRDSE_CBRACE 5
133 #define MU_WRDSE_UNDEF 6 136 #define MU_WRDSE_UNDEF 6
137 #define MU_WRDSE_NOINPUT 7
134 138
135 int mu_wordsplit (const char *s, struct mu_wordsplit *p, int flags); 139 int mu_wordsplit (const char *s, struct mu_wordsplit *p, int flags);
136 int mu_wordsplit_len (const char *s, size_t len, 140 int mu_wordsplit_len (const char *s, size_t len,
......
...@@ -85,6 +85,25 @@ _wsplt_nomem (struct mu_wordsplit *wsp) ...@@ -85,6 +85,25 @@ _wsplt_nomem (struct mu_wordsplit *wsp)
85 return wsp->ws_errno; 85 return wsp->ws_errno;
86 } 86 }
87 87
88 static void
89 mu_wordsplit_init0 (struct mu_wordsplit *wsp)
90 {
91 if (wsp->ws_flags & MU_WRDSF_REUSE)
92 {
93 if (!(wsp->ws_flags & MU_WRDSF_APPEND))
94 mu_wordsplit_free_words (wsp);
95 }
96 else
97 {
98 wsp->ws_wordv = NULL;
99 wsp->ws_wordc = 0;
100 wsp->ws_wordn = 0;
101 }
102
103 wsp->ws_errno = 0;
104 wsp->ws_head = wsp->ws_tail = NULL;
105 }
106
88 static int 107 static int
89 mu_wordsplit_init (struct mu_wordsplit *wsp, const char *input, size_t len, 108 mu_wordsplit_init (struct mu_wordsplit *wsp, const char *input, size_t len,
90 int flags) 109 int flags)
...@@ -140,24 +159,13 @@ mu_wordsplit_init (struct mu_wordsplit *wsp, const char *input, size_t len, ...@@ -140,24 +159,13 @@ mu_wordsplit_init (struct mu_wordsplit *wsp, const char *input, size_t len,
140 if (!(wsp->ws_flags & MU_WRDSF_COMMENT)) 159 if (!(wsp->ws_flags & MU_WRDSF_COMMENT))
141 wsp->ws_comment = NULL; 160 wsp->ws_comment = NULL;
142 161
143 if (wsp->ws_flags & MU_WRDSF_REUSE)
144 {
145 if (!(wsp->ws_flags & MU_WRDSF_APPEND))
146 mu_wordsplit_free_words (wsp);
147 }
148 else
149 {
150 wsp->ws_wordv = NULL;
151 wsp->ws_wordc = 0;
152 wsp->ws_wordn = 0;
153 }
154
155 if (!(wsp->ws_flags & MU_WRDSF_CLOSURE)) 162 if (!(wsp->ws_flags & MU_WRDSF_CLOSURE))
156 wsp->ws_closure = NULL; 163 wsp->ws_closure = NULL;
157 164
158 wsp->ws_endp = 0; 165 wsp->ws_endp = 0;
159 wsp->ws_errno = 0; 166
160 wsp->ws_head = wsp->ws_tail = NULL; 167 mu_wordsplit_init0 (wsp);
168
161 return 0; 169 return 0;
162 } 170 }
163 171
...@@ -815,7 +823,7 @@ expvar (struct mu_wordsplit *wsp, const char *str, size_t len, ...@@ -815,7 +823,7 @@ expvar (struct mu_wordsplit *wsp, const char *str, size_t len,
815 ws.ws_delim = wsp->ws_delim; 823 ws.ws_delim = wsp->ws_delim;
816 if (mu_wordsplit (value, &ws, 824 if (mu_wordsplit (value, &ws,
817 MU_WRDSF_NOVAR | MU_WRDSF_NOCMD | 825 MU_WRDSF_NOVAR | MU_WRDSF_NOCMD |
818 MU_WRDSF_DELIM | MU_WRDSF_SQUEEZE_DELIMS)) 826 MU_WRDSF_DELIM | MU_WRDSF_WS))
819 { 827 {
820 mu_wordsplit_free (&ws); 828 mu_wordsplit_free (&ws);
821 return 1; 829 return 1;
...@@ -1017,10 +1025,20 @@ skip_delim (struct mu_wordsplit *wsp) ...@@ -1017,10 +1025,20 @@ skip_delim (struct mu_wordsplit *wsp)
1017 size_t start = wsp->ws_endp; 1025 size_t start = wsp->ws_endp;
1018 if (wsp->ws_flags & MU_WRDSF_SQUEEZE_DELIMS) 1026 if (wsp->ws_flags & MU_WRDSF_SQUEEZE_DELIMS)
1019 { 1027 {
1028 if ((wsp->ws_flags & MU_WRDSF_RETURN_DELIMS) &&
1029 ISDELIM (wsp, wsp->ws_input[start]))
1030 {
1031 int delim = wsp->ws_input[start];
1020 do 1032 do
1021 start++; 1033 start++;
1022 while (start < wsp->ws_len 1034 while (start < wsp->ws_len && delim == wsp->ws_input[start]);
1023 && ISDELIM (wsp, wsp->ws_input[start])); 1035 }
1036 else
1037 {
1038 do
1039 start++;
1040 while (start < wsp->ws_len && ISDELIM (wsp, wsp->ws_input[start]));
1041 }
1024 start--; 1042 start--;
1025 } 1043 }
1026 1044
...@@ -1151,7 +1169,8 @@ scan_word (struct mu_wordsplit *wsp, size_t start) ...@@ -1151,7 +1169,8 @@ scan_word (struct mu_wordsplit *wsp, size_t start)
1151 if (mu_wordsplit_add_segm (wsp, start, i, flags)) 1169 if (mu_wordsplit_add_segm (wsp, start, i, flags))
1152 return _MU_WRDS_ERR; 1170 return _MU_WRDS_ERR;
1153 wsp->ws_endp = i; 1171 wsp->ws_endp = i;
1154 1172 if (wsp->ws_flags & MU_WRDSF_INCREMENTAL)
1173 return _MU_WRDS_EOF;
1155 return _MU_WRDS_OK; 1174 return _MU_WRDS_OK;
1156 } 1175 }
1157 1176
...@@ -1366,33 +1385,26 @@ mu_wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex) ...@@ -1366,33 +1385,26 @@ mu_wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex)
1366 } 1385 }
1367 } 1386 }
1368 1387
1369 int 1388 static int
1370 mu_wordsplit_len (const char *command, size_t len, struct mu_wordsplit *wsp, 1389 wordsplit_process_list (struct mu_wordsplit *wsp, size_t start)
1371 int flags)
1372 { 1390 {
1373 int rc;
1374 size_t start = 0;
1375
1376 rc = mu_wordsplit_init (wsp, command, len, flags);
1377 if (rc)
1378 return rc;
1379
1380 if (wsp->ws_flags & MU_WRDSF_SHOWDBG)
1381 wsp->ws_debug ("Input:%.*s;", (int)len, command);
1382
1383 if (wsp->ws_flags & MU_WRDSF_NOSPLIT) 1391 if (wsp->ws_flags & MU_WRDSF_NOSPLIT)
1384 { 1392 {
1385 /* Treat entire input as a quoted argument */ 1393 /* Treat entire input as a quoted argument */
1386 if (mu_wordsplit_add_segm (wsp, 0, len, _WSNF_QUOTE)) 1394 if (mu_wordsplit_add_segm (wsp, start, wsp->ws_len, _WSNF_QUOTE))
1387 return wsp->ws_errno; 1395 return wsp->ws_errno;
1388 } 1396 }
1389 else 1397 else
1390 { 1398 {
1399 int rc;
1400
1391 while ((rc = scan_word (wsp, start)) == _MU_WRDS_OK) 1401 while ((rc = scan_word (wsp, start)) == _MU_WRDS_OK)
1392 start = skip_delim (wsp); 1402 start = skip_delim (wsp);
1393 /* Make sure tail element is not joinable */ 1403 /* Make sure tail element is not joinable */
1394 if (wsp->ws_tail) 1404 if (wsp->ws_tail)
1395 wsp->ws_tail->flags &= ~_WSNF_JOIN; 1405 wsp->ws_tail->flags &= ~_WSNF_JOIN;
1406 if (rc == _MU_WRDS_ERR)
1407 return wsp->ws_errno;
1396 } 1408 }
1397 1409
1398 if (wsp->ws_flags & MU_WRDSF_SHOWDBG) 1410 if (wsp->ws_flags & MU_WRDSF_SHOWDBG)
...@@ -1400,11 +1412,6 @@ mu_wordsplit_len (const char *command, size_t len, struct mu_wordsplit *wsp, ...@@ -1400,11 +1412,6 @@ mu_wordsplit_len (const char *command, size_t len, struct mu_wordsplit *wsp,
1400 wsp->ws_debug ("Initial list:"); 1412 wsp->ws_debug ("Initial list:");
1401 mu_wordsplit_dump_nodes (wsp); 1413 mu_wordsplit_dump_nodes (wsp);
1402 } 1414 }
1403 if (rc)
1404 {
1405 mu_wordsplit_free_nodes (wsp);
1406 return wsp->ws_errno;
1407 }
1408 1415
1409 if (wsp->ws_flags & MU_WRDSF_WS) 1416 if (wsp->ws_flags & MU_WRDSF_WS)
1410 { 1417 {
...@@ -1450,10 +1457,75 @@ mu_wordsplit_len (const char *command, size_t len, struct mu_wordsplit *wsp, ...@@ -1450,10 +1457,75 @@ mu_wordsplit_len (const char *command, size_t len, struct mu_wordsplit *wsp,
1450 wsp->ws_debug ("Coalesced list:"); 1457 wsp->ws_debug ("Coalesced list:");
1451 mu_wordsplit_dump_nodes (wsp); 1458 mu_wordsplit_dump_nodes (wsp);
1452 } 1459 }
1453
1454 mu_wordsplit_finish (wsp);
1455 } 1460 }
1456 while (0); 1461 while (0);
1462 return wsp->ws_errno;
1463 }
1464
1465 int
1466 mu_wordsplit_len (const char *command, size_t length, struct mu_wordsplit *wsp,
1467 int flags)
1468 {
1469 int rc;
1470 size_t start;
1471 const char *cmdptr;
1472 size_t cmdlen;
1473
1474 if (!command)
1475 {
1476 if (!(flags & MU_WRDSF_INCREMENTAL))
1477 return EINVAL;
1478
1479 start = skip_delim (wsp);
1480 if (wsp->ws_endp == wsp->ws_len)
1481 {
1482 wsp->ws_errno = MU_WRDSE_NOINPUT;
1483 if (wsp->ws_flags & MU_WRDSF_SHOWERR)
1484 mu_wordsplit_perror (wsp);
1485 return wsp->ws_errno;
1486 }
1487
1488 cmdptr = wsp->ws_input + wsp->ws_endp;
1489 cmdlen = wsp->ws_len - wsp->ws_endp;
1490 wsp->ws_flags |= MU_WRDSF_REUSE;
1491 mu_wordsplit_init0 (wsp);
1492 }
1493 else
1494 {
1495 cmdptr = command;
1496 cmdlen = length;
1497 start = 0;
1498 rc = mu_wordsplit_init (wsp, cmdptr, cmdlen, flags);
1499 if (rc)
1500 return rc;
1501 }
1502
1503 if (wsp->ws_flags & MU_WRDSF_SHOWDBG)
1504 wsp->ws_debug ("Input:%.*s;", (int)cmdlen, cmdptr);
1505
1506 rc = wordsplit_process_list (wsp, start);
1507 if (rc == 0 && (flags & MU_WRDSF_INCREMENTAL))
1508 {
1509 while (!wsp->ws_head && wsp->ws_endp < wsp->ws_len)
1510 {
1511 start = skip_delim (wsp);
1512 if (wsp->ws_flags & MU_WRDSF_SHOWDBG)
1513 {
1514 cmdptr = wsp->ws_input + wsp->ws_endp;
1515 cmdlen = wsp->ws_len - wsp->ws_endp;
1516 wsp->ws_debug ("Restart:%.*s;", (int)cmdlen, cmdptr);
1517 }
1518 rc = wordsplit_process_list (wsp, start);
1519 if (rc)
1520 break;
1521 }
1522 }
1523 if (rc)
1524 {
1525 mu_wordsplit_free_nodes (wsp);
1526 return rc;
1527 }
1528 mu_wordsplit_finish (wsp);
1457 mu_wordsplit_free_nodes (wsp); 1529 mu_wordsplit_free_nodes (wsp);
1458 return wsp->ws_errno; 1530 return wsp->ws_errno;
1459 } 1531 }
...@@ -1461,7 +1533,7 @@ mu_wordsplit_len (const char *command, size_t len, struct mu_wordsplit *wsp, ...@@ -1461,7 +1533,7 @@ mu_wordsplit_len (const char *command, size_t len, struct mu_wordsplit *wsp,
1461 int 1533 int
1462 mu_wordsplit (const char *command, struct mu_wordsplit *ws, int flags) 1534 mu_wordsplit (const char *command, struct mu_wordsplit *ws, int flags)
1463 { 1535 {
1464 return mu_wordsplit_len (command, strlen (command), ws, flags); 1536 return mu_wordsplit_len (command, command ? strlen (command) : 0, ws, flags);
1465 } 1537 }
1466 1538
1467 void 1539 void
...@@ -1523,6 +1595,10 @@ mu_wordsplit_perror (struct mu_wordsplit *wsp) ...@@ -1523,6 +1595,10 @@ mu_wordsplit_perror (struct mu_wordsplit *wsp)
1523 wsp->ws_error (_("undefined variable")); 1595 wsp->ws_error (_("undefined variable"));
1524 break; 1596 break;
1525 1597
1598 case MU_WRDSE_NOINPUT:
1599 wsp->ws_error (_("input exhausted"));
1600 break;
1601
1526 default: 1602 default:
1527 wsp->ws_error (_("unknown error")); 1603 wsp->ws_error (_("unknown error"));
1528 } 1604 }
...@@ -1532,10 +1608,11 @@ const char *_mu_wordsplit_errstr[] = { ...@@ -1532,10 +1608,11 @@ const char *_mu_wordsplit_errstr[] = {
1532 N_("no error"), 1608 N_("no error"),
1533 N_("missing closing quote"), 1609 N_("missing closing quote"),
1534 N_("memory exhausted"), 1610 N_("memory exhausted"),
1535 N_("variable expansion and command substitution " "are not yet supported"), 1611 N_("command substitution is not yet supported"),
1536 N_("invalid mu_wordsplit usage"), 1612 N_("invalid mu_wordsplit usage"),
1537 N_("unbalanced curly brace"), 1613 N_("unbalanced curly brace"),
1538 N_("undefined variable") 1614 N_("undefined variable"),
1615 N_("input exhausted")
1539 }; 1616 };
1540 int _mu_wordsplit_nerrs = 1617 int _mu_wordsplit_nerrs =
1541 sizeof (_mu_wordsplit_errstr) / sizeof (_mu_wordsplit_errstr[0]); 1618 sizeof (_mu_wordsplit_errstr) / sizeof (_mu_wordsplit_errstr[0]);
......
...@@ -373,4 +373,52 @@ TESTWSP([squote],[],[-default novar nocmd squote], ...@@ -373,4 +373,52 @@ TESTWSP([squote],[],[-default novar nocmd squote],
373 3: it 373 3: it
374 ]) 374 ])
375 375
376 TESTWSP([incremental],[],[incremental],
377 [incremental "input test" line
378
379
380 ],
381 [NF: 1
382 0: incremental
383 NF: 1
384 0: "input test"
385 NF: 1
386 0: line
387 ],
388 [input exhausted
389 ])
390
391 TESTWSP([incremental append],[],[incremental append],
392 [incremental "input test" line
393
394
395 ],
396 [NF: 1
397 0: incremental
398 NF: 2
399 0: incremental
400 1: "input test"
401 NF: 3
402 0: incremental
403 1: "input test"
404 2: line
405 ],
406 [input exhausted
407 ])
408
409 TESTWSP([incremental ws],[],[return_delims -squeeze_delims incremental ws],
410 [a list test
411
412
413 ],
414 [NF: 1
415 0: a
416 NF: 1
417 0: list
418 NF: 1
419 0: test
420 ],
421 [input exhausted
422 ])
423
376 m4_popdef([TESTWSP]) 424 m4_popdef([TESTWSP])
......
...@@ -49,6 +49,7 @@ struct mu_kwd bool_keytab[] = { ...@@ -49,6 +49,7 @@ struct mu_kwd bool_keytab[] = {
49 { "cescapes", MU_WRDSF_CESCAPES }, 49 { "cescapes", MU_WRDSF_CESCAPES },
50 { "default", MU_WRDSF_DEFFLAGS }, 50 { "default", MU_WRDSF_DEFFLAGS },
51 { "env_kv", MU_WRDSF_ENV_KV }, 51 { "env_kv", MU_WRDSF_ENV_KV },
52 { "incremental", MU_WRDSF_INCREMENTAL },
52 { NULL, 0 } 53 { NULL, 0 }
53 }; 54 };
54 55
...@@ -164,7 +165,7 @@ make_env_kv () ...@@ -164,7 +165,7 @@ make_env_kv ()
164 int 165 int
165 main (int argc, char **argv) 166 main (int argc, char **argv)
166 { 167 {
167 char buf[1024]; 168 char buf[1024], *ptr;
168 int i, offarg = 0; 169 int i, offarg = 0;
169 int trimnl_option = 0; 170 int trimnl_option = 0;
170 int plaintext_option = 0; 171 int plaintext_option = 0;
...@@ -172,6 +173,7 @@ main (int argc, char **argv) ...@@ -172,6 +173,7 @@ main (int argc, char **argv)
172 MU_WRDSF_ENOMEMABRT | 173 MU_WRDSF_ENOMEMABRT |
173 MU_WRDSF_ENV | MU_WRDSF_SHOWERR; 174 MU_WRDSF_ENV | MU_WRDSF_SHOWERR;
174 struct mu_wordsplit ws; 175 struct mu_wordsplit ws;
176 int next_call = 0;
175 177
176 for (i = 1; i < argc; i++) 178 for (i = 1; i < argc; i++)
177 { 179 {
...@@ -297,14 +299,38 @@ main (int argc, char **argv) ...@@ -297,14 +299,38 @@ main (int argc, char **argv)
297 else 299 else
298 ws.ws_env = (const char **) environ; 300 ws.ws_env = (const char **) environ;
299 301
300 while (fgets (buf, sizeof (buf), stdin)) 302 if (wsflags & MU_WRDSF_INCREMENTAL)
303 trimnl_option = 1;
304
305 next_call = 0;
306 while ((ptr = fgets (buf, sizeof (buf), stdin)))
301 { 307 {
302 int rc; 308 int rc;
303 size_t i; 309 size_t i;
304 310
305 if (trimnl_option) 311 if (trimnl_option)
306 mu_rtrim_cset (buf, "\n"); 312 mu_rtrim_cset (ptr, "\n");
307 rc = mu_wordsplit (buf, &ws, wsflags); 313
314 if (wsflags & MU_WRDSF_INCREMENTAL)
315 {
316 if (next_call)
317 {
318 if (*ptr == 0)
319 ptr = NULL;
320 else
321 free ((void*)ws.ws_input);
322 }
323 else
324 next_call = 1;
325 if (ptr)
326 {
327 ptr = strdup (ptr);
328 if (!ptr)
329 abort ();
330 }
331 }
332
333 rc = mu_wordsplit (ptr, &ws, wsflags);
308 if (rc) 334 if (rc)
309 { 335 {
310 if (!(wsflags & MU_WRDSF_SHOWERR)) 336 if (!(wsflags & MU_WRDSF_SHOWERR))
......