Commit f160ca75 f160ca75991d5bf994afe3cb5bc549b113b14bbd by Sergey Poznyakoff

Port wordsplit from grecs 20616b88

The updated version supports tilde and pathname expansion, command
substitution and standard shell-like replacement constructs in
variable substitution, such as ${X:-V} etc.

* include/mailutils/wordsplit.h: Update.
* libmailutils/string/wordsplit.c: Update.
* libmailutils/tests/wsp.c: Update.
* libmailutils/tests/wordsplit.at: Update unescape test.

* libmailutils/imapio/create.c (mu_imapio_create): Initialize ws_escape
array.
* libmailutils/mime/mimehdr.c (_mime_header_parse): Likewise.
* libmailutils/tests/modmesg.c: Use mu_wordsplit with MU_WRDSF_NOSPLIT
to run expansions on the string.
* mu/shell.c (shell_prompt): Likewise.
1 parent cd2126be
/* wordsplit - a word splitter
Copyright (C) 2009, 2010 Sergey Poznyakoff
Copyright (C) 2009-2015 Sergey Poznyakoff
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
......@@ -23,42 +23,99 @@
extern "C" {
#endif
struct mu_wordsplit
typedef struct mu_wordsplit mu_wordsplit_t;
/* Structure used to direct the splitting. Members marked with [Input]
can be defined before calling mu_wordsplit(), those marked with [Output]
provide return values when the function returns. If neither mark is
used, the member is internal and must not be used by the caller.
In the comments below, the
identifiers in parentheses indicate bits that must be set (or unset, if
starting with !) in the ws_flags to initialize or use the given member.
If not redefined explicitly, most of them are set to some reasonable
default value upon entry to mu_wordsplit(). */
struct mu_wordsplit
{
size_t ws_wordc;
char **ws_wordv;
size_t ws_offs;
size_t ws_wordn;
int ws_flags;
const char *ws_delim;
const char *ws_comment;
const char *ws_escape;
void (*ws_alloc_die) (struct mu_wordsplit *wsp);
size_t ws_wordc; /* [Output] Number of words in ws_wordv. */
char **ws_wordv; /* [Output] Array of parsed out words. */
size_t ws_offs; /* [Input] (MU_WRDSF_DOOFFS) Number of initial
elements in ws_wordv to fill with NULLs. */
size_t ws_wordn; /* Number of elements ws_wordv can accomodate. */
int ws_flags; /* [Input] Flags passed to mu_wordsplit. */
int ws_options; /* [Input] (MU_WRDSF_PATHEXPAND)
Additional options. */
const char *ws_delim; /* [Input] (MU_WRDSF_DELIM) Word delimiters. */
const char *ws_comment; /* [Input] (MU_WRDSF_COMMENT) Comment characters. */
const char *ws_escape[2]; /* [Input] (MU_WRDSF_ESCAPE) Characters to be escaped
with backslash. */
void (*ws_alloc_die) (mu_wordsplit_t *wsp);
/* [Input] (MU_WRDSF_ALLOC_DIE) Function called when
out of memory. Must not return. */
void (*ws_error) (const char *, ...)
__attribute__ ((__format__ (__printf__, 1, 2)));
__attribute__ ((__format__ (__printf__, 1, 2)));
/* [Input] (MU_WRDSF_ERROR) Function used for error
reporting */
void (*ws_debug) (const char *, ...)
__attribute__ ((__format__ (__printf__, 1, 2)));
const char **ws_env;
const char *(*ws_getvar) (const char *, size_t, void *);
void *ws_closure;
__attribute__ ((__format__ (__printf__, 1, 2)));
/* [Input] (MU_WRDSF_DEBUG) Function used for debug
output. */
const char **ws_env; /* [Input] (MU_WRDSF_ENV, !MU_WRDSF_NOVAR) Array of
environment variables. */
char **ws_envbuf;
size_t ws_envidx;
size_t ws_envsiz;
const char *ws_input;
size_t ws_len;
size_t ws_endp;
int ws_errno;
int (*ws_getvar) (char **ret, const char *var, size_t len, void *clos);
/* [Input] (MU_WRDSF_GETVAR, !MU_WRDSF_NOVAR) Looks up
the name VAR (LEN bytes long) in the table of
variables and if found returns in memory
location pointed to by RET the value of that
variable. Returns WRDSE_OK (0) on success,
and an error code (see WRDSE_* defines below)
on error. User-specific errors can be returned
by storing the error diagnostic string in RET
and returning WRDSE_USERERR.
Whatever is stored in RET, it must be allocated
using malloc(3). */
void *ws_closure; /* [Input] (MU_WRDSF_CLOSURE) Passed as the CLOS
argument to ws_getvar and ws_command. */
int (*ws_command) (char **ret, const char *cmd, size_t len, char **argv,
void *clos);
/* [Input] (!MU_WRDSF_NOCMD) Returns in the memory
location pointed to by RET the expansion of
the command CMD (LEN bytes nong). If MU_WRDSF_ARGV
flag is set, ARGV contains CMD split out to
words. Otherwise ARGV is NULL.
See ws_getvar for a discussion of possible
return values. */
const char *ws_input; /* Input string (the S argument to mu_wordsplit. */
size_t ws_len; /* Length of ws_input. */
size_t ws_endp; /* Points past the last processed byte in
ws_input. */
int ws_errno; /* [Output] Error code, if an error occurred. */
char *ws_usererr; /* Points to textual description of
the error, if ws_errno is WRDSE_USERERR. Must
be allocated with malloc(3). */
struct mu_wordsplit_node *ws_head, *ws_tail;
/* Doubly-linked list of parsed out nodes. */
int ws_lvl; /* Invocation nesting level. */
};
/* Wordsplit flags. Only 2 bits of a 32-bit word remain unused.
It is getting crowded... */
/* Initial size for ws_env, if allocated automatically */
#define MU_WORDSPLIT_ENV_INIT 16
/* Mu_Wordsplit flags. */
/* Append the words found to the array resulting from a previous
call. */
#define MU_WRDSF_APPEND 0x00000001
/* Insert we_offs initial NULLs in the array ws_wordv.
/* Insert ws_offs initial NULLs in the array ws_wordv.
(These are not counted in the returned ws_wordc.) */
#define MU_WRDSF_DOOFFS 0x00000002
/* Don't do command substitution. Reserved for future use. */
/* Don't do command substitution. */
#define MU_WRDSF_NOCMD 0x00000004
/* The parameter p resulted from a previous call to
mu_wordsplit(), and mu_wordsplit_free() was not called. Reuse the
......@@ -66,10 +123,8 @@ struct mu_wordsplit
#define MU_WRDSF_REUSE 0x00000008
/* Print errors */
#define MU_WRDSF_SHOWERR 0x00000010
/* Consider it an error if an undefined shell variable
is expanded. */
/* Consider it an error if an undefined variable is expanded. */
#define MU_WRDSF_UNDEF 0x00000020
/* Don't do variable expansion. */
#define MU_WRDSF_NOVAR 0x00000040
/* Abort on ENOMEM error */
......@@ -80,7 +135,7 @@ struct mu_wordsplit
#define MU_WRDSF_SQUOTE 0x00000200
/* Handle double quotes */
#define MU_WRDSF_DQUOTE 0x00000400
/* Handle quotes and escape directives */
/* Handle single and double quotes */
#define MU_WRDSF_QUOTE (MU_WRDSF_SQUOTE|MU_WRDSF_DQUOTE)
/* Replace each input sequence of repeated delimiters with a single
delimiter */
......@@ -108,59 +163,92 @@ struct mu_wordsplit
/* Don't split input into words. Useful for side effects. */
#define MU_WRDSF_NOSPLIT 0x00400000
/* Keep undefined variables in place, instead of expanding them to
empty string */
empty strings. */
#define MU_WRDSF_KEEPUNDEF 0x00800000
/* Warn about undefined variables */
#define MU_WRDSF_WARNUNDEF 0x01000000
/* Handle C escapes */
#define MU_WRDSF_CESCAPES 0x02000000
/* ws_closure is set */
#define MU_WRDSF_CLOSURE 0x04000000
/* ws_env is a Key/Value environment, i.e. the value of a variable is
stored in the element that follows its name. */
#define MU_WRDSF_ENV_KV 0x08000000
/* ws_escape is set */
#define MU_WRDSF_ESCAPE 0x10000000
/* Incremental mode */
#define MU_WRDSF_INCREMENTAL 0x20000000
/* Perform pathname and tilde expansion */
#define MU_WRDSF_PATHEXPAND 0x40000000
/* ws_options is initialized */
#define MU_WRDSF_OPTIONS 0x80000000
#define MU_WRDSF_DEFFLAGS \
(MU_WRDSF_NOVAR | MU_WRDSF_NOCMD | \
MU_WRDSF_QUOTE | MU_WRDSF_SQUEEZE_DELIMS | MU_WRDSF_CESCAPES)
#define MU_WRDSE_EOF 0
/* Remove the word that produces empty string after path expansion */
#define MU_WRDSO_NULLGLOB 0x00000001
/* Print error message if path expansion produces empty string */
#define MU_WRDSO_FAILGLOB 0x00000002
/* Allow a leading period to be matched by metacharacters. */
#define MU_WRDSO_DOTGLOB 0x00000004
/* ws_command needs argv parameter */
#define MU_WRDSO_ARGV 0x00000008
/* Keep backslash in unrecognized escape sequences in words */
#define MU_WRDSO_BSKEEP_WORD 0x00000010
/* Handle octal escapes in words */
#define MU_WRDSO_OESC_WORD 0x00000020
/* Handle hex escapes in words */
#define MU_WRDSO_XESC_WORD 0x00000040
/* Keep backslash in unrecognized escape sequences in quoted strings */
#define MU_WRDSO_BSKEEP_QUOTE 0x00000100
/* Handle octal escapes in quoted strings */
#define MU_WRDSO_OESC_QUOTE 0x00000200
/* Handle hex escapes in quoted strings */
#define MU_WRDSO_XESC_QUOTE 0x00000400
#define MU_WRDSO_BSKEEP MU_WRDSO_BSKEEP_WORD
#define MU_WRDSO_OESC MU_WRDSO_OESC_WORD
#define MU_WRDSO_XESC MU_WRDSO_XESC_WORD
/* Set escape option F in WS for words (Q==0) or quoted strings (Q==1) */
#define MU_WRDSO_ESC_SET(ws,q,f) ((ws)->ws_options |= ((f) << 4*(q)))
/* Test WS for escape option F for words (Q==0) or quoted strings (Q==1) */
#define MU_WRDSO_ESC_TEST(ws,q,f) ((ws)->ws_options & ((f) << 4*(q)))
#define MU_WRDSE_OK 0
#define MU_WRDSE_EOF MU_WRDSE_OK
#define MU_WRDSE_QUOTE 1
#define MU_WRDSE_NOSPACE 2
#define MU_WRDSE_NOSUPP 3
#define MU_WRDSE_USAGE 4
#define MU_WRDSE_CBRACE 5
#define MU_WRDSE_UNDEF 6
#define MU_WRDSE_NOINPUT 7
int mu_wordsplit (const char *s, struct mu_wordsplit *p, int flags);
int mu_wordsplit_len (const char *s, size_t len,
struct mu_wordsplit *p, int flags);
void mu_wordsplit_free (struct mu_wordsplit *p);
void mu_wordsplit_free_words (struct mu_wordsplit *ws);
#define MU_WRDSE_USAGE 3
#define MU_WRDSE_CBRACE 4
#define MU_WRDSE_UNDEF 5
#define MU_WRDSE_NOINPUT 6
#define MU_WRDSE_PAREN 7
#define MU_WRDSE_GLOBERR 8
#define MU_WRDSE_USERERR 9
int mu_wordsplit (const char *s, mu_wordsplit_t *ws, int flags);
int mu_wordsplit_len (const char *s, size_t len, mu_wordsplit_t *ws, int flags);
void mu_wordsplit_free (mu_wordsplit_t *ws);
void mu_wordsplit_free_words (mu_wordsplit_t *ws);
void mu_wordsplit_free_envbuf (mu_wordsplit_t *ws);
void mu_wordsplit_getwords (mu_wordsplit_t *ws, int *wordc, char ***wordv);
int mu_wordsplit_c_unquote_char (int c);
int mu_wordsplit_c_quote_char (int c);
size_t mu_wordsplit_c_quoted_length (const char *str, int quote_hex,
int *quote);
void mu_wordsplit_general_unquote_copy (char *dst, const char *src, size_t n,
const char *escapable);
void mu_wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n);
void mu_wordsplit_c_unquote_copy (char *dst, const char *src, size_t n);
size_t mu_wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote);
void mu_wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex);
void mu_wordsplit_perror (struct mu_wordsplit *ws);
const char *mu_wordsplit_strerror (struct mu_wordsplit *ws);
void mu_wordsplit_perror (mu_wordsplit_t *ws);
const char *mu_wordsplit_strerror (mu_wordsplit_t *ws);
void mu_wordsplit_clearerr (mu_wordsplit_t *ws);
#ifdef __cplusplus
}
#endif
#endif
......
......@@ -29,7 +29,10 @@ mu_imapio_create (mu_imapio_t *iop, mu_stream_t str, int server)
io->_imap_stream = str;
mu_stream_ref (str);
io->_imap_ws.ws_delim = " \t()[]";
io->_imap_ws.ws_escape = "\\\"";
io->_imap_ws.ws_escape[0] = NULL;
io->_imap_ws.ws_escape[1] = "\\\\\"\"";
MU_WRDSO_ESC_SET (&io->_imap_ws, 0, MU_WRDSO_BSKEEP);
MU_WRDSO_ESC_SET (&io->_imap_ws, 1, MU_WRDSO_BSKEEP);
io->_imap_ws_flags = MU_WRDSF_DELIM |
MU_WRDSF_ESCAPE |
MU_WRDSF_NOVAR |
......@@ -37,7 +40,8 @@ mu_imapio_create (mu_imapio_t *iop, mu_stream_t str, int server)
MU_WRDSF_DQUOTE |
MU_WRDSF_RETURN_DELIMS |
MU_WRDSF_WS |
MU_WRDSF_APPEND;
MU_WRDSF_APPEND |
MU_WRDSF_OPTIONS;
io->_imap_server = server;
*iop = io;
return 0;
......
......@@ -293,12 +293,14 @@ _mime_header_parse (const char *text, char **pvalue,
size_t i;
ws.ws_delim = " \t\r\n;";
ws.ws_escape = "\\\"";
ws.ws_escape[0] = ws.ws_escape[1] = "\\\\\"\"";
MU_WRDSO_ESC_SET (&ws, 0, MU_WRDSO_BSKEEP);
MU_WRDSO_ESC_SET (&ws, 1, MU_WRDSO_BSKEEP);
if (mu_wordsplit (text, &ws,
MU_WRDSF_DELIM | MU_WRDSF_ESCAPE |
MU_WRDSF_NOVAR | MU_WRDSF_NOCMD |
MU_WRDSF_DQUOTE | MU_WRDSF_SQUEEZE_DELIMS |
MU_WRDSF_RETURN_DELIMS | MU_WRDSF_WS))
MU_WRDSF_RETURN_DELIMS | MU_WRDSF_WS | MU_WRDSF_OPTIONS))
{
mu_debug (MU_DEBCAT_MIME, MU_DEBUG_ERROR,
(_("wordsplit: %s"), mu_wordsplit_strerror (&ws)));
......
/* wordsplit - a word splitter
Copyright (C) 2009, 2010 Sergey Poznyakoff
Copyright (C) 2009-2015 Sergey Poznyakoff
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
......@@ -31,6 +31,8 @@
#include <string.h>
#include <stdio.h>
#include <stdarg.h>
#include <pwd.h>
#include <glob.h>
#include <mailutils/nls.h>
#include <mailutils/wordsplit.h>
......@@ -47,6 +49,9 @@
#define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c))
#define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127)
#define ISVARBEG(c) (ISALPHA(c) || c == '_')
#define ISVARCHR(c) (ISALNUM(c) || c == '_')
#define ALLOC_INIT 128
#define ALLOC_INCR 128
......@@ -71,6 +76,15 @@ _wsplt_error (const char *fmt, ...)
static void mu_wordsplit_free_nodes (struct mu_wordsplit *);
static int
_wsplt_seterr (struct mu_wordsplit *wsp, int ec)
{
wsp->ws_errno = ec;
if (wsp->ws_flags & MU_WRDSF_SHOWERR)
mu_wordsplit_perror (wsp);
return ec;
}
static int
_wsplt_nomem (struct mu_wordsplit *wsp)
{
errno = ENOMEM;
......@@ -85,6 +99,62 @@ _wsplt_nomem (struct mu_wordsplit *wsp)
return wsp->ws_errno;
}
static int mu_wordsplit_run (const char *command, size_t length,
struct mu_wordsplit *wsp,
int flags, int lvl);
static int
_wsplt_subsplit (struct mu_wordsplit *wsp, struct mu_wordsplit *wss,
char const *str, int len,
int flags)
{
wss->ws_delim = wsp->ws_delim;
wss->ws_debug = wsp->ws_debug;
wss->ws_error = wsp->ws_error;
wss->ws_alloc_die = wsp->ws_alloc_die;
if (!(flags & MU_WRDSF_NOVAR))
{
wss->ws_env = wsp->ws_env;
wss->ws_getvar = wsp->ws_getvar;
flags |= wsp->ws_flags & (MU_WRDSF_ENV | MU_WRDSF_ENV_KV | MU_WRDSF_GETVAR);
}
if (!(flags & MU_WRDSF_NOCMD))
{
wss->ws_command = wsp->ws_command;
}
if ((flags & (MU_WRDSF_NOVAR|MU_WRDSF_NOCMD)) != (MU_WRDSF_NOVAR|MU_WRDSF_NOCMD))
{
wss->ws_closure = wsp->ws_closure;
flags |= wsp->ws_flags & MU_WRDSF_CLOSURE;
}
wss->ws_options = wsp->ws_options;
flags |= MU_WRDSF_DELIM
| MU_WRDSF_ALLOC_DIE
| MU_WRDSF_ERROR
| MU_WRDSF_DEBUG
| (wsp->ws_flags & (MU_WRDSF_SHOWDBG | MU_WRDSF_SHOWERR | MU_WRDSF_OPTIONS));
return mu_wordsplit_run (str, len, wss, flags, wsp->ws_lvl + 1);
}
static void
_wsplt_seterr_sub (struct mu_wordsplit *wsp, struct mu_wordsplit *wss)
{
if (wsp->ws_errno == MU_WRDSE_USERERR)
free (wsp->ws_usererr);
wsp->ws_errno = wss->ws_errno;
if (wss->ws_errno == MU_WRDSE_USERERR)
{
wsp->ws_usererr = wss->ws_usererr;
wss->ws_errno = MU_WRDSE_EOF;
wss->ws_usererr = NULL;
}
}
static void
mu_wordsplit_init0 (struct mu_wordsplit *wsp)
{
......@@ -92,6 +162,7 @@ mu_wordsplit_init0 (struct mu_wordsplit *wsp)
{
if (!(wsp->ws_flags & MU_WRDSF_APPEND))
mu_wordsplit_free_words (wsp);
mu_wordsplit_clearerr (wsp);
}
else
{
......@@ -102,11 +173,13 @@ mu_wordsplit_init0 (struct mu_wordsplit *wsp)
wsp->ws_errno = 0;
wsp->ws_head = wsp->ws_tail = NULL;
}
}
char mu_wordsplit_c_escape_tab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
static int
mu_wordsplit_init (struct mu_wordsplit *wsp, const char *input, size_t len,
int flags)
int flags)
{
wsp->ws_flags = flags;
......@@ -115,23 +188,21 @@ mu_wordsplit_init (struct mu_wordsplit *wsp, const char *input, size_t len,
if (!(wsp->ws_flags & MU_WRDSF_ERROR))
wsp->ws_error = _wsplt_error;
if (!(wsp->ws_flags & MU_WRDSF_NOVAR)
&& !(wsp->ws_flags & (MU_WRDSF_ENV | MU_WRDSF_GETVAR)))
if (!(wsp->ws_flags & MU_WRDSF_NOVAR))
{
errno = EINVAL;
wsp->ws_errno = MU_WRDSE_USAGE;
if (wsp->ws_flags & MU_WRDSF_SHOWERR)
mu_wordsplit_perror (wsp);
return wsp->ws_errno;
/* These will be initialized on first variable assignment */
wsp->ws_envidx = wsp->ws_envsiz = 0;
wsp->ws_envbuf = NULL;
}
if (!(wsp->ws_flags & MU_WRDSF_NOCMD))
{
errno = EINVAL;
wsp->ws_errno = MU_WRDSE_NOSUPP;
if (wsp->ws_flags & MU_WRDSF_SHOWERR)
mu_wordsplit_perror (wsp);
return wsp->ws_errno;
if (!wsp->ws_command)
{
_wsplt_seterr (wsp, MU_WRDSE_USAGE);
errno = EINVAL;
return wsp->ws_errno;
}
}
if (wsp->ws_flags & MU_WRDSF_SHOWDBG)
......@@ -162,8 +233,35 @@ mu_wordsplit_init (struct mu_wordsplit *wsp, const char *input, size_t len,
if (!(wsp->ws_flags & MU_WRDSF_CLOSURE))
wsp->ws_closure = NULL;
wsp->ws_endp = 0;
if (!(wsp->ws_flags & MU_WRDSF_OPTIONS))
wsp->ws_options = 0;
if (wsp->ws_flags & MU_WRDSF_ESCAPE)
{
if (!wsp->ws_escape[0])
wsp->ws_escape[0] = "";
if (!wsp->ws_escape[1])
wsp->ws_escape[1] = "";
}
else
{
if (wsp->ws_flags & MU_WRDSF_CESCAPES)
{
wsp->ws_escape[0] = mu_wordsplit_c_escape_tab;
wsp->ws_escape[1] = mu_wordsplit_c_escape_tab;
wsp->ws_options |= MU_WRDSO_OESC_QUOTE | MU_WRDSO_OESC_WORD
| MU_WRDSO_XESC_QUOTE | MU_WRDSO_XESC_WORD;
}
else
{
wsp->ws_escape[0] = "";
wsp->ws_escape[1] = "\\\\\"\"";
wsp->ws_options |= MU_WRDSO_BSKEEP_QUOTE;
}
}
wsp->ws_endp = 0;
mu_wordsplit_init0 (wsp);
return 0;
......@@ -202,14 +300,15 @@ alloc_space (struct mu_wordsplit *wsp, size_t count)
/* Node state flags */
#define _WSNF_NULL 0x01 /* null node (a noop) */
#define _WSNF_NULL 0x01 /* null node (a noop) */
#define _WSNF_WORD 0x02 /* node contains word in v.word */
#define _WSNF_QUOTE 0x04 /* text is quoted */
#define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */
#define _WSNF_JOIN 0x10 /* node must be joined with the next node */
#define _WSNF_SEXP 0x20 /* is a sed expression */
#define _WSNF_DELIM 0x40 /* node is a delimiter */
#define _WSNF_EMPTYOK 0x0100 /* special flag indicating that
#define _WSNF_EMPTYOK 0x0100 /* special flag indicating that
mu_wordsplit_add_segm must add the
segment even if it is empty */
......@@ -232,7 +331,7 @@ struct mu_wordsplit_node
static const char *
wsnode_flagstr (int flags)
{
static char retbuf[6];
static char retbuf[7];
char *p = retbuf;
if (flags & _WSNF_WORD)
......@@ -257,6 +356,10 @@ wsnode_flagstr (int flags)
*p++ = 's';
else
*p++ = '-';
if (flags & _WSNF_DELIM)
*p++ = 'd';
else
*p++ = '-';
*p = 0;
return retbuf;
}
......@@ -374,8 +477,7 @@ wsnode_insert (struct mu_wordsplit *wsp, struct mu_wordsplit_node *node,
}
static int
mu_wordsplit_add_segm (struct mu_wordsplit *wsp, size_t beg, size_t end,
int flg)
mu_wordsplit_add_segm (struct mu_wordsplit *wsp, size_t beg, size_t end, int flg)
{
struct mu_wordsplit_node *node;
int rc;
......@@ -385,7 +487,7 @@ mu_wordsplit_add_segm (struct mu_wordsplit *wsp, size_t beg, size_t end,
rc = wsnode_new (wsp, &node);
if (rc)
return rc;
node->flags = flg & ~(_WSNF_WORD|_WSNF_EMPTYOK);
node->flags = flg & ~(_WSNF_WORD | _WSNF_EMPTYOK);
node->v.segm.beg = beg;
node->v.segm.end = end;
wsnode_append (wsp, node);
......@@ -415,12 +517,14 @@ mu_wordsplit_dump_nodes (struct mu_wordsplit *wsp)
for (p = wsp->ws_head, n = 0; p; p = p->next, n++)
{
if (p->flags & _WSNF_WORD)
wsp->ws_debug ("%4d: %p: %#04x (%s):%s;",
wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%s;",
wsp->ws_lvl,
n, p, p->flags, wsnode_flagstr (p->flags), p->v.word);
else
wsp->ws_debug ("%4d: %p: %#04x (%s):%.*s;",
wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%.*s;",
wsp->ws_lvl,
n, p, p->flags, wsnode_flagstr (p->flags),
(int)(p->v.segm.end - p->v.segm.beg),
(int) (p->v.segm.end - p->v.segm.beg),
wsp->ws_input + p->v.segm.beg);
}
}
......@@ -437,7 +541,8 @@ coalesce_segment (struct mu_wordsplit *wsp, struct mu_wordsplit_node *node)
{
len += wsnode_len (p);
}
len += wsnode_len (p);
if (p)
len += wsnode_len (p);
end = p;
buf = malloc (len + 1);
......@@ -456,6 +561,7 @@ coalesce_segment (struct mu_wordsplit *wsp, struct mu_wordsplit_node *node)
cur += slen;
if (p != node)
{
node->flags |= p->flags & _WSNF_QUOTE;
wsnode_remove (wsp, p);
stop = p == end;
wsnode_free (p);
......@@ -475,13 +581,15 @@ coalesce_segment (struct mu_wordsplit *wsp, struct mu_wordsplit_node *node)
return 0;
}
static void mu_wordsplit_string_unquote_copy (struct mu_wordsplit *ws,
int inquote,
char *dst, const char *src,
size_t n);
static int
wsnode_quoteremoval (struct mu_wordsplit *wsp)
{
struct mu_wordsplit_node *p;
void (*uqfn) (char *, const char *, size_t) =
(wsp->ws_flags & MU_WRDSF_CESCAPES) ?
mu_wordsplit_c_unquote_copy : mu_wordsplit_sh_unquote_copy;
for (p = wsp->ws_head; p; p = p->next)
{
......@@ -509,11 +617,8 @@ wsnode_quoteremoval (struct mu_wordsplit *wsp)
p->flags |= _WSNF_WORD;
}
if (wsp->ws_flags & MU_WRDSF_ESCAPE)
mu_wordsplit_general_unquote_copy (p->v.word, str, slen,
wsp->ws_escape);
else
uqfn (p->v.word, str, slen);
mu_wordsplit_string_unquote_copy (wsp, p->flags & _WSNF_QUOTE,
p->v.word, str, slen);
}
}
return 0;
......@@ -606,10 +711,10 @@ node_split_prefix (struct mu_wordsplit *wsp,
}
static int
find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff)
find_closing_paren (const char *str, size_t i, size_t len, size_t *poff,
char *paren)
{
enum
{ st_init, st_squote, st_dquote } state = st_init;
enum { st_init, st_squote, st_dquote } state = st_init;
size_t level = 1;
for (; i < len; i++)
......@@ -619,18 +724,23 @@ find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff)
case st_init:
switch (str[i])
{
case '{':
level++;
break;
case '}':
if (--level == 0)
default:
if (str[i] == paren[0])
{
level++;
break;
}
else if (str[i] == paren[1])
{
*poff = i;
return 0;
if (--level == 0)
{
*poff = i;
return 0;
}
break;
}
break;
case '"':
state = st_dquote;
break;
......@@ -657,13 +767,14 @@ find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff)
return 1;
}
static const char *
mu_wordsplit_find_env (struct mu_wordsplit *wsp, const char *name, size_t len)
static int
mu_wordsplit_find_env (struct mu_wordsplit *wsp, const char *name, size_t len,
char const **ret)
{
size_t i;
if (!(wsp->ws_flags & MU_WRDSF_ENV))
return NULL;
return MU_WRDSE_UNDEF;
if (wsp->ws_flags & MU_WRDSF_ENV_KV)
{
......@@ -672,29 +783,139 @@ mu_wordsplit_find_env (struct mu_wordsplit *wsp, const char *name, size_t len)
{
size_t elen = strlen (wsp->ws_env[i]);
if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0)
return wsp->ws_env[i + 1];
{
*ret = wsp->ws_env[i + 1];
return MU_WRDSE_OK;
}
/* Skip the value. Break the loop if it is NULL. */
i++;
if (wsp->ws_env[i] == NULL)
break;
}
}
else
else if (wsp->ws_env)
{
/* Usual (A=B) environment. */
for (i = 0; wsp->ws_env[i]; i++)
{
size_t j;
const char *var = wsp->ws_env[i];
for (j = 0; j < len; j++)
if (name[j] != var[j])
break;
if (j == len && var[j] == '=')
return var + j + 1;
{
*ret = var + j + 1;
return MU_WRDSE_OK;
}
}
}
return MU_WRDSE_UNDEF;
}
static int
wsplt_assign_var (struct mu_wordsplit *wsp, const char *name, size_t namelen,
char *value)
{
int n = (wsp->ws_flags & MU_WRDSF_ENV_KV) ? 2 : 1;
char *v;
if (wsp->ws_envidx + n >= wsp->ws_envsiz)
{
size_t sz;
char **newenv;
if (!wsp->ws_envbuf)
{
if (wsp->ws_flags & MU_WRDSF_ENV)
{
size_t i = 0, j;
if (wsp->ws_env)
{
for (; wsp->ws_env[i]; i++)
;
}
sz = i + n + 1;
newenv = calloc (sz, sizeof(newenv[0]));
if (!newenv)
return _wsplt_nomem (wsp);
for (j = 0; j < i; j++)
{
newenv[j] = strdup (wsp->ws_env[j]);
if (!newenv[j])
{
for (; j > 1; j--)
free (newenv[j-1]);
free (newenv[j-1]);
return _wsplt_nomem (wsp);
}
}
newenv[j] = NULL;
wsp->ws_envbuf = newenv;
wsp->ws_envidx = i;
wsp->ws_envsiz = sz;
wsp->ws_env = (const char**) wsp->ws_envbuf;
}
else
{
newenv = calloc (MU_WORDSPLIT_ENV_INIT, sizeof(newenv[0]));
if (!newenv)
return _wsplt_nomem (wsp);
wsp->ws_envbuf = newenv;
wsp->ws_envidx = 0;
wsp->ws_envsiz = MU_WORDSPLIT_ENV_INIT;
wsp->ws_env = (const char**) wsp->ws_envbuf;
wsp->ws_flags |= MU_WRDSF_ENV;
}
}
else
{
wsp->ws_envsiz *= 2;
newenv = realloc (wsp->ws_envbuf,
wsp->ws_envsiz * sizeof (wsp->ws_envbuf[0]));
if (!newenv)
return _wsplt_nomem (wsp);
wsp->ws_envbuf = newenv;
wsp->ws_env = (const char**) wsp->ws_envbuf;
}
}
if (wsp->ws_flags & MU_WRDSF_ENV_KV)
{
/* A key-value pair environment */
char *p = malloc (namelen + 1);
if (!p)
return _wsplt_nomem (wsp);
memcpy (p, name, namelen);
p[namelen] = 0;
v = strdup (value);
if (!v)
{
free (p);
return _wsplt_nomem (wsp);
}
wsp->ws_env[wsp->ws_envidx++] = p;
wsp->ws_env[wsp->ws_envidx++] = v;
}
else
{
v = malloc (namelen + strlen(value) + 2);
if (!v)
return _wsplt_nomem (wsp);
memcpy (v, name, namelen);
v[namelen++] = '=';
strcpy(v + namelen, value);
wsp->ws_env[wsp->ws_envidx++] = v;
}
return NULL;
wsp->ws_env[wsp->ws_envidx++] = NULL;
return MU_WRDSE_OK;
}
static int
......@@ -703,15 +924,17 @@ expvar (struct mu_wordsplit *wsp, const char *str, size_t len,
{
size_t i = 0;
const char *defstr = NULL;
const char *value;
char *value;
const char *vptr;
struct mu_wordsplit_node *newnode;
const char *start = str - 1;
if (ISALPHA (str[0]) || str[0] == '_')
int rc;
struct mu_wordsplit ws;
if (ISVARBEG (str[0]))
{
for (i = 1; i < len; i++)
if (!(ISALNUM (str[i]) || str[i] == '_'))
if (!ISVARCHR (str[i]))
break;
*pend = str + i - 1;
}
......@@ -720,30 +943,36 @@ expvar (struct mu_wordsplit *wsp, const char *str, size_t len,
str++;
len--;
for (i = 1; i < len; i++)
if (str[i] == '}' || str[i] == ':')
break;
if (str[i] == ':')
{
size_t j;
defstr = str + i + 1;
if (find_closing_cbrace (str, i + 1, len, &j))
if (str[i] == ':')
{
wsp->ws_errno = MU_WRDSE_CBRACE;
return 1;
size_t j;
defstr = str + i + 1;
if (find_closing_paren (str, i + 1, len, &j, "{}"))
return _wsplt_seterr (wsp, MU_WRDSE_CBRACE);
*pend = str + j;
break;
}
else if (str[i] == '}')
{
defstr = NULL;
*pend = str + i;
break;
}
else if (strchr ("-+?=", str[i]))
{
size_t j;
defstr = str + i;
if (find_closing_paren (str, i, len, &j, "{}"))
return _wsplt_seterr (wsp, MU_WRDSE_CBRACE);
*pend = str + j;
break;
}
*pend = str + j;
}
else if (str[i] == '}')
{
defstr = NULL;
*pend = str + i;
}
else
{
wsp->ws_errno = MU_WRDSE_CBRACE;
return 1;
}
if (i == len)
return _wsplt_seterr (wsp, MU_WRDSE_CBRACE);
}
else
{
......@@ -767,32 +996,135 @@ expvar (struct mu_wordsplit *wsp, const char *str, size_t len,
i - its length
defstr - default replacement str */
vptr = mu_wordsplit_find_env (wsp, str, i);
if (vptr)
if (defstr && strchr("-+?=", defstr[0]) == 0)
{
value = strdup (vptr);
if (!value)
return _wsplt_nomem (wsp);
rc = MU_WRDSE_UNDEF;
defstr = NULL;
}
else if (wsp->ws_flags & MU_WRDSF_GETVAR)
value = wsp->ws_getvar (str, i, wsp->ws_closure);
else if (wsp->ws_flags & MU_WRDSF_UNDEF)
else
{
wsp->ws_errno = MU_WRDSE_UNDEF;
if (wsp->ws_flags & MU_WRDSF_SHOWERR)
mu_wordsplit_perror (wsp);
return 1;
rc = mu_wordsplit_find_env (wsp, str, i, &vptr);
if (rc == MU_WRDSE_OK)
{
value = strdup (vptr);
if (!value)
rc = MU_WRDSE_NOSPACE;
}
else if (wsp->ws_flags & MU_WRDSF_GETVAR)
rc = wsp->ws_getvar (&value, str, i, wsp->ws_closure);
else
rc = MU_WRDSE_UNDEF;
if (rc == MU_WRDSE_OK && value[0] == 0 && defstr && defstr[-1] == ':')
{
free (value);
rc = MU_WRDSE_UNDEF;
}
}
else
switch (rc)
{
if (wsp->ws_flags & MU_WRDSF_WARNUNDEF)
wsp->ws_error (_("warning: undefined variable `%.*s'"), (int) i, str);
if (wsp->ws_flags & MU_WRDSF_KEEPUNDEF)
value = NULL;
case MU_WRDSE_OK:
if (defstr && *defstr == '+')
{
size_t size = *pend - ++defstr;
rc = _wsplt_subsplit (wsp, &ws, defstr, size,
MU_WRDSF_NOSPLIT | MU_WRDSF_WS | MU_WRDSF_QUOTE |
(wsp->ws_flags &
(MU_WRDSF_NOVAR | MU_WRDSF_NOCMD)));
if (rc)
return rc;
free (value);
value = ws.ws_wordv[0];
ws.ws_wordv[0] = NULL;
mu_wordsplit_free (&ws);
}
break;
case MU_WRDSE_UNDEF:
if (defstr)
{
size_t size;
if (*defstr == '-' || *defstr == '=')
{
size = *pend - ++defstr;
rc = _wsplt_subsplit (wsp, &ws, defstr, size,
MU_WRDSF_NOSPLIT | MU_WRDSF_WS | MU_WRDSF_QUOTE |
(wsp->ws_flags &
(MU_WRDSF_NOVAR | MU_WRDSF_NOCMD)));
if (rc)
return rc;
value = ws.ws_wordv[0];
ws.ws_wordv[0] = NULL;
mu_wordsplit_free (&ws);
if (defstr[-1] == '=')
wsplt_assign_var (wsp, str, i, value);
}
else
{
if (*defstr == '?')
{
size = *pend - ++defstr;
if (size == 0)
wsp->ws_error (_("%.*s: variable null or not set"),
(int) i, str);
else
{
rc = _wsplt_subsplit (wsp, &ws, defstr, size,
MU_WRDSF_NOSPLIT | MU_WRDSF_WS |
MU_WRDSF_QUOTE |
(wsp->ws_flags &
(MU_WRDSF_NOVAR | MU_WRDSF_NOCMD)));
if (rc == 0)
wsp->ws_error ("%.*s: %s",
(int) i, str, ws.ws_wordv[0]);
else
wsp->ws_error (_("%.*s: %.*s"),
(int) i, str, (int) size, defstr);
mu_wordsplit_free (&ws);
}
}
value = NULL;
}
}
else if (wsp->ws_flags & MU_WRDSF_UNDEF)
{
_wsplt_seterr (wsp, MU_WRDSE_UNDEF);
return 1;
}
else
value = "";
{
if (wsp->ws_flags & MU_WRDSF_WARNUNDEF)
wsp->ws_error (_("warning: undefined variable `%.*s'"),
(int) i, str);
if (wsp->ws_flags & MU_WRDSF_KEEPUNDEF)
value = NULL;
else
{
value = strdup ("");
if (!value)
return _wsplt_nomem (wsp);
}
}
break;
case MU_WRDSE_NOSPACE:
return _wsplt_nomem (wsp);
case MU_WRDSE_USERERR:
if (wsp->ws_errno == MU_WRDSE_USERERR)
free (wsp->ws_usererr);
wsp->ws_usererr = value;
/* fall through */
default:
_wsplt_seterr (wsp, rc);
return 1;
}
/* FIXME: handle defstr */
if (value)
{
if (flg & _WSNF_QUOTE)
......@@ -802,12 +1134,11 @@ expvar (struct mu_wordsplit *wsp, const char *str, size_t len,
wsnode_insert (wsp, newnode, *ptail, 0);
*ptail = newnode;
newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
newnode->v.word = strdup (value);
if (!newnode->v.word)
return _wsplt_nomem (wsp);
newnode->v.word = value;
}
else if (*value == 0)
{
free (value);
/* Empty string is a special case */
if (wsnode_new (wsp, &newnode))
return 1;
......@@ -818,13 +1149,15 @@ expvar (struct mu_wordsplit *wsp, const char *str, size_t len,
else
{
struct mu_wordsplit ws;
int i;
ws.ws_delim = wsp->ws_delim;
if (mu_wordsplit (value, &ws,
MU_WRDSF_NOVAR | MU_WRDSF_NOCMD |
MU_WRDSF_DELIM | MU_WRDSF_WS))
int i, rc;
rc = _wsplt_subsplit (wsp, &ws, value, strlen (value),
MU_WRDSF_NOVAR | MU_WRDSF_NOCMD |
MU_WRDSF_QUOTE);
free (value);
if (rc)
{
_wsplt_seterr_sub (wsp, &ws);
mu_wordsplit_free (&ws);
return 1;
}
......@@ -871,7 +1204,19 @@ expvar (struct mu_wordsplit *wsp, const char *str, size_t len,
}
static int
node_expand_vars (struct mu_wordsplit *wsp, struct mu_wordsplit_node *node)
begin_var_p (int c)
{
return c == '{' || ISVARBEG (c);
}
static int
node_expand (struct mu_wordsplit *wsp, struct mu_wordsplit_node *node,
int (*beg_p) (int),
int (*ws_exp_fn) (struct mu_wordsplit *wsp,
const char *str, size_t len,
struct mu_wordsplit_node **ptail,
const char **pend,
int flg))
{
const char *str = wsnode_ptr (wsp, node);
size_t slen = wsnode_len (node);
......@@ -887,7 +1232,7 @@ node_expand_vars (struct mu_wordsplit *wsp, struct mu_wordsplit_node *node)
p++;
continue;
}
if (*p == '$')
if (*p == '$' && beg_p (p[1]))
{
size_t n = p - str;
......@@ -896,8 +1241,8 @@ node_expand_vars (struct mu_wordsplit *wsp, struct mu_wordsplit_node *node)
if (node_split_prefix (wsp, &tail, node, off, n, _WSNF_JOIN))
return 1;
p++;
if (expvar (wsp, p, slen - n, &tail, &p,
node->flags & (_WSNF_JOIN | _WSNF_QUOTE)))
if (ws_exp_fn (wsp, p, slen - n, &tail, &p,
node->flags & (_WSNF_JOIN | _WSNF_QUOTE)))
return 1;
off += p - str + 1;
str = p + 1;
......@@ -908,7 +1253,7 @@ node_expand_vars (struct mu_wordsplit *wsp, struct mu_wordsplit_node *node)
if (tail != node)
tail->flags |= _WSNF_JOIN;
if (node_split_prefix (wsp, &tail, node, off, p - str,
node->flags & _WSNF_JOIN))
node->flags & (_WSNF_JOIN|_WSNF_QUOTE)))
return 1;
}
if (tail != node)
......@@ -918,8 +1263,8 @@ node_expand_vars (struct mu_wordsplit *wsp, struct mu_wordsplit_node *node)
}
return 0;
}
/* Remove NULL lists */
/* Remove NULL nodes from the list */
static void
wsnode_nullelim (struct mu_wordsplit *wsp)
{
......@@ -928,6 +1273,8 @@ wsnode_nullelim (struct mu_wordsplit *wsp)
for (p = wsp->ws_head; p;)
{
struct mu_wordsplit_node *next = p->next;
if (p->flags & _WSNF_DELIM && p->prev)
p->prev->flags &= ~_WSNF_JOIN;
if (p->flags & _WSNF_NULL)
{
wsnode_remove (wsp, p);
......@@ -946,7 +1293,7 @@ mu_wordsplit_varexp (struct mu_wordsplit *wsp)
{
struct mu_wordsplit_node *next = p->next;
if (!(p->flags & _WSNF_NOEXPAND))
if (node_expand_vars (wsp, p))
if (node_expand (wsp, p, begin_var_p, expvar))
return 1;
p = next;
}
......@@ -955,72 +1302,408 @@ mu_wordsplit_varexp (struct mu_wordsplit *wsp)
return 0;
}
/* Strip off any leading and trailing whitespace. This function is called
right after the initial scanning, therefore it assumes that every
node in the list is a text reference node. */
static void
mu_wordsplit_trimws (struct mu_wordsplit *wsp)
static int
begin_cmd_p (int c)
{
struct mu_wordsplit_node *p;
for (p = wsp->ws_head; p; p = p->next)
{
size_t n;
if (p->flags & _WSNF_QUOTE)
continue;
/* Skip leading whitespace: */
for (n = p->v.segm.beg; n < p->v.segm.end && ISWS (wsp->ws_input[n]);
n++)
;
p->v.segm.beg = n;
/* Trim trailing whitespace */
for (n = p->v.segm.end; n > p->v.segm.beg && ISWS (wsp->ws_input[n-1]);
n--);
p->v.segm.end = n;
if (p->v.segm.beg == p->v.segm.end)
p->flags |= _WSNF_NULL;
}
wsnode_nullelim (wsp);
return c == '(';
}
static int
skip_sed_expr (const char *command, size_t i, size_t len)
expcmd (struct mu_wordsplit *wsp, const char *str, size_t len,
struct mu_wordsplit_node **ptail, const char **pend, int flg)
{
int state;
int rc;
size_t j;
char *value;
struct mu_wordsplit_node *newnode;
str++;
len--;
do
if (find_closing_paren (str, 0, len, &j, "()"))
{
int delim;
_wsplt_seterr (wsp, MU_WRDSE_PAREN);
return 1;
}
if (command[i] == ';')
i++;
if (!(command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1])))
break;
*pend = str + j;
if (wsp->ws_options & MU_WRDSO_ARGV)
{
struct mu_wordsplit ws;
delim = command[++i];
state = 1;
for (i++; i < len; i++)
rc = _wsplt_subsplit (wsp, &ws, str, j,
MU_WRDSF_NOVAR | MU_WRDSF_NOCMD |
MU_WRDSF_WS | MU_WRDSF_QUOTE);
if (rc)
{
if (state == 3)
{
if (command[i] == delim || !ISALNUM (command[i]))
break;
}
else if (command[i] == '\\')
i++;
else if (command[i] == delim)
state++;
_wsplt_seterr_sub (wsp, &ws);
mu_wordsplit_free (&ws);
return 1;
}
rc = wsp->ws_command (&value, str, j, ws.ws_wordv, wsp->ws_closure);
mu_wordsplit_free (&ws);
}
while (state == 3 && i < len && command[i] == ';');
return i;
}
static size_t
skip_delim (struct mu_wordsplit *wsp)
else
rc = wsp->ws_command (&value, str, j, NULL, wsp->ws_closure);
if (rc == MU_WRDSE_NOSPACE)
return _wsplt_nomem (wsp);
else if (rc)
{
if (rc == MU_WRDSE_USERERR)
{
if (wsp->ws_errno == MU_WRDSE_USERERR)
free (wsp->ws_usererr);
wsp->ws_usererr = value;
}
_wsplt_seterr (wsp, rc);
return 1;
}
if (value)
{
if (flg & _WSNF_QUOTE)
{
if (wsnode_new (wsp, &newnode))
return 1;
wsnode_insert (wsp, newnode, *ptail, 0);
*ptail = newnode;
newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
newnode->v.word = value;
}
else if (*value == 0)
{
free (value);
/* Empty string is a special case */
if (wsnode_new (wsp, &newnode))
return 1;
wsnode_insert (wsp, newnode, *ptail, 0);
*ptail = newnode;
newnode->flags = _WSNF_NULL;
}
else
{
struct mu_wordsplit ws;
int i, rc;
rc = _wsplt_subsplit (wsp, &ws, value, strlen (value),
MU_WRDSF_NOVAR | MU_WRDSF_NOCMD |
MU_WRDSF_WS | MU_WRDSF_QUOTE);
free (value);
if (rc)
{
_wsplt_seterr_sub (wsp, &ws);
mu_wordsplit_free (&ws);
return 1;
}
for (i = 0; i < ws.ws_wordc; i++)
{
if (wsnode_new (wsp, &newnode))
return 1;
wsnode_insert (wsp, newnode, *ptail, 0);
*ptail = newnode;
newnode->flags = _WSNF_WORD |
_WSNF_NOEXPAND |
(i + 1 < ws.ws_wordc ? (flg & ~_WSNF_JOIN) : flg);
newnode->v.word = strdup (ws.ws_wordv[i]);
if (!newnode->v.word)
return _wsplt_nomem (wsp);
}
mu_wordsplit_free (&ws);
}
}
else
{
if (wsnode_new (wsp, &newnode))
return 1;
wsnode_insert (wsp, newnode, *ptail, 0);
*ptail = newnode;
newnode->flags = _WSNF_NULL;
}
return 0;
}
static int
mu_wordsplit_cmdexp (struct mu_wordsplit *wsp)
{
struct mu_wordsplit_node *p;
for (p = wsp->ws_head; p;)
{
struct mu_wordsplit_node *next = p->next;
if (!(p->flags & _WSNF_NOEXPAND))
if (node_expand (wsp, p, begin_cmd_p, expcmd))
return 1;
p = next;
}
wsnode_nullelim (wsp);
return 0;
}
/* Strip off any leading and trailing whitespace. This function is called
right after the initial scanning, therefore it assumes that every
node in the list is a text reference node. */
static int
mu_wordsplit_trimws (struct mu_wordsplit *wsp)
{
struct mu_wordsplit_node *p;
for (p = wsp->ws_head; p; p = p->next)
{
size_t n;
if (!(p->flags & _WSNF_QUOTE))
{
/* Skip leading whitespace: */
for (n = p->v.segm.beg; n < p->v.segm.end && ISWS (wsp->ws_input[n]);
n++)
;
p->v.segm.beg = n;
}
while (p->next && (p->flags & _WSNF_JOIN))
p = p->next;
if (p->flags & _WSNF_QUOTE)
continue;
/* Trim trailing whitespace */
for (n = p->v.segm.end;
n > p->v.segm.beg && ISWS (wsp->ws_input[n - 1]); n--);
p->v.segm.end = n;
if (p->v.segm.beg == p->v.segm.end)
p->flags |= _WSNF_NULL;
}
wsnode_nullelim (wsp);
return 0;
}
static int
mu_wordsplit_tildexpand (struct mu_wordsplit *wsp)
{
struct mu_wordsplit_node *p;
char *uname = NULL;
size_t usize = 0;
for (p = wsp->ws_head; p; p = p->next)
{
const char *str;
if (p->flags & _WSNF_QUOTE)
continue;
str = wsnode_ptr (wsp, p);
if (str[0] == '~')
{
size_t i, size, dlen;
size_t slen = wsnode_len (p);
struct passwd *pw;
char *newstr;
for (i = 1; i < slen && str[i] != '/'; i++)
;
if (i == slen)
continue;
if (i > 1)
{
if (i > usize)
{
char *p = realloc (uname, i);
if (!p)
{
free (uname);
return _wsplt_nomem (wsp);
}
uname = p;
usize = i;
}
--i;
memcpy (uname, str + 1, i);
uname[i] = 0;
pw = getpwnam (uname);
}
else
pw = getpwuid (getuid ());
if (!pw)
continue;
dlen = strlen (pw->pw_dir);
size = slen - i + dlen;
newstr = malloc (size);
if (!newstr)
{
free (uname);
return _wsplt_nomem (wsp);
}
--size;
memcpy (newstr, pw->pw_dir, dlen);
memcpy (newstr + dlen, str + i + 1, slen - i - 1);
newstr[size] = 0;
if (p->flags & _WSNF_WORD)
free (p->v.word);
p->v.word = newstr;
p->flags |= _WSNF_WORD;
}
}
free (uname);
return 0;
}
static int
isglob (const char *s, int l)
{
while (l--)
{
if (strchr ("*?[", *s++))
return 1;
}
return 0;
}
static int
mu_wordsplit_pathexpand (struct mu_wordsplit *wsp)
{
struct mu_wordsplit_node *p, *next;
char *pattern = NULL;
size_t patsize = 0;
size_t slen;
int flags = 0;
#ifdef GLOB_PERIOD
if (wsp->ws_options & MU_WRDSO_DOTGLOB)
flags = GLOB_PERIOD;
#endif
for (p = wsp->ws_head; p; p = next)
{
const char *str;
next = p->next;
if (p->flags & _WSNF_QUOTE)
continue;
str = wsnode_ptr (wsp, p);
slen = wsnode_len (p);
if (isglob (str, slen))
{
int i;
glob_t g;
struct mu_wordsplit_node *prev;
if (slen + 1 > patsize)
{
char *p = realloc (pattern, slen + 1);
if (!p)
return _wsplt_nomem (wsp);
pattern = p;
patsize = slen + 1;
}
memcpy (pattern, str, slen);
pattern[slen] = 0;
switch (glob (pattern, flags, NULL, &g))
{
case 0:
break;
case GLOB_NOSPACE:
free (pattern);
return _wsplt_nomem (wsp);
case GLOB_NOMATCH:
if (wsp->ws_options & MU_WRDSO_NULLGLOB)
{
wsnode_remove (wsp, p);
wsnode_free (p);
}
else if (wsp->ws_options & MU_WRDSO_FAILGLOB)
{
char buf[128];
if (wsp->ws_errno == MU_WRDSE_USERERR)
free (wsp->ws_usererr);
snprintf (buf, sizeof (buf), _("no files match pattern %s"),
pattern);
free (pattern);
wsp->ws_usererr = strdup (buf);
if (!wsp->ws_usererr)
return _wsplt_nomem (wsp);
else
return _wsplt_seterr (wsp, MU_WRDSE_USERERR);
}
continue;
default:
free (pattern);
return _wsplt_seterr (wsp, MU_WRDSE_GLOBERR);
}
prev = p;
for (i = 0; i < g.gl_pathc; i++)
{
struct mu_wordsplit_node *newnode;
char *newstr;
if (wsnode_new (wsp, &newnode))
return 1;
newstr = strdup (g.gl_pathv[i]);
if (!newstr)
return _wsplt_nomem (wsp);
newnode->v.word = newstr;
newnode->flags |= _WSNF_WORD|_WSNF_QUOTE;
wsnode_insert (wsp, newnode, prev, 0);
prev = newnode;
}
globfree (&g);
wsnode_remove (wsp, p);
wsnode_free (p);
}
}
free (pattern);
return 0;
}
static int
skip_sed_expr (const char *command, size_t i, size_t len)
{
int state;
do
{
int delim;
if (command[i] == ';')
i++;
if (!(command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1])))
break;
delim = command[++i];
state = 1;
for (i++; i < len; i++)
{
if (state == 3)
{
if (command[i] == delim || !ISALNUM (command[i]))
break;
}
else if (command[i] == '\\')
i++;
else if (command[i] == delim)
state++;
}
}
while (state == 3 && i < len && command[i] == ';');
return i;
}
static size_t
skip_delim (struct mu_wordsplit *wsp)
{
size_t start = wsp->ws_endp;
if (wsp->ws_flags & MU_WRDSF_SQUEEZE_DELIMS)
......@@ -1065,7 +1748,7 @@ scan_qstring (struct mu_wordsplit *wsp, size_t start, size_t * end)
j++;
if (j < len && command[j] == q)
{
int flags = _WSNF_QUOTE|_WSNF_EMPTYOK;
int flags = _WSNF_QUOTE | _WSNF_EMPTYOK;
if (q == '\'')
flags |= _WSNF_NOEXPAND;
if (mu_wordsplit_add_segm (wsp, start + 1, j, flags))
......@@ -1075,9 +1758,7 @@ scan_qstring (struct mu_wordsplit *wsp, size_t start, size_t * end)
else
{
wsp->ws_endp = start;
wsp->ws_errno = MU_WRDSE_QUOTE;
if (wsp->ws_flags & MU_WRDSF_SHOWERR)
mu_wordsplit_perror (wsp);
_wsplt_seterr (wsp, MU_WRDSE_QUOTE);
return _MU_WRDS_ERR;
}
return 0;
......@@ -1147,6 +1828,18 @@ scan_word (struct mu_wordsplit *wsp, size_t start)
}
}
if (command[i] == '$')
{
if (!(wsp->ws_flags & MU_WRDSF_NOVAR)
&& command[i+1] == '{'
&& find_closing_paren (command, i + 2, len, &i, "{}") == 0)
continue;
if (!(wsp->ws_flags & MU_WRDSF_NOCMD)
&& command[i+1] == '('
&& find_closing_paren (command, i + 2, len, &i, "()") == 0)
continue;
}
if (ISDELIM (wsp, command[i]))
break;
else
......@@ -1156,6 +1849,7 @@ scan_word (struct mu_wordsplit *wsp, size_t start)
else if (wsp->ws_flags & MU_WRDSF_RETURN_DELIMS)
{
i++;
flags |= _WSNF_DELIM;
}
else if (!(wsp->ws_flags & MU_WRDSF_SQUEEZE_DELIMS))
flags |= _WSNF_EMPTYOK;
......@@ -1170,35 +1864,6 @@ scan_word (struct mu_wordsplit *wsp, size_t start)
return _MU_WRDS_OK;
}
static char quote_transtab[] = "\\\\a\ab\bf\fn\nr\rt\tv\v";
int
mu_wordsplit_c_unquote_char (int c)
{
char *p;
for (p = quote_transtab; *p; p += 2)
{
if (*p == c)
return p[1];
}
return c;
}
int
mu_wordsplit_c_quote_char (int c)
{
char *p;
for (p = quote_transtab + sizeof (quote_transtab) - 2;
p > quote_transtab; p -= 2)
{
if (*p == c)
return p[-1];
}
return -1;
}
#define to_num(c) \
(ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 ))
......@@ -1228,7 +1893,7 @@ mu_wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
{
if (strchr (" \"", *str))
*quote = 1;
if (*str == ' ')
len++;
else if (*str == '"')
......@@ -1239,7 +1904,7 @@ mu_wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
len += 3;
else
{
if (mu_wordsplit_c_quote_char (*str) != -1)
if (mu_wordsplit_c_quote_char (*str))
len += 2;
else
len += 4;
......@@ -1248,47 +1913,56 @@ mu_wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
return len;
}
void
mu_wordsplit_general_unquote_copy (char *dst, const char *src, size_t n,
const char *escapable)
int
wsplt_unquote_char (const char *transtab, int c)
{
int i;
for (i = 0; i < n;)
while (*transtab && transtab[1])
{
if (src[i] == '\\' && i < n && strchr (escapable, src[i+1]))
i++;
*dst++ = src[i++];
if (*transtab++ == c)
return *transtab;
++transtab;
}
*dst = 0;
return 0;
}
void
mu_wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n)
int
wsplt_quote_char (const char *transtab, int c)
{
int i;
for (i = 0; i < n;)
for (; *transtab && transtab[1]; transtab += 2)
{
if (src[i] == '\\')
i++;
*dst++ = src[i++];
if (transtab[1] == c)
return *transtab;
}
*dst = 0;
return 0;
}
int
mu_wordsplit_c_unquote_char (int c)
{
return wsplt_unquote_char (mu_wordsplit_c_escape_tab, c);
}
int
mu_wordsplit_c_quote_char (int c)
{
return wsplt_quote_char (mu_wordsplit_c_escape_tab, c);
}
void
mu_wordsplit_c_unquote_copy (char *dst, const char *src, size_t n)
mu_wordsplit_string_unquote_copy (struct mu_wordsplit *ws, int inquote,
char *dst, const char *src, size_t n)
{
int i = 0;
int c;
inquote = !!inquote;
while (i < n)
{
if (src[i] == '\\')
{
++i;
if (src[i] == 'x' || src[i] == 'X')
if (MU_WRDSO_ESC_TEST (ws, inquote, MU_WRDSO_XESC)
&& (src[i] == 'x' || src[i] == 'X'))
{
if (n - i < 2)
{
......@@ -1311,7 +1985,8 @@ mu_wordsplit_c_unquote_copy (char *dst, const char *src, size_t n)
}
}
}
else if ((unsigned char) src[i] < 128 && ISDIGIT (src[i]))
else if (MU_WRDSO_ESC_TEST (ws, inquote, MU_WRDSO_OESC)
&& (unsigned char) src[i] < 128 && ISDIGIT (src[i]))
{
if (n - i < 1)
{
......@@ -1333,8 +2008,17 @@ mu_wordsplit_c_unquote_copy (char *dst, const char *src, size_t n)
}
}
}
else if ((c = wsplt_unquote_char (ws->ws_escape[inquote], src[i])))
{
*dst++ = c;
++i;
}
else
*dst++ = mu_wordsplit_c_unquote_char (src[i++]);
{
if (MU_WRDSO_ESC_TEST (ws, inquote, MU_WRDSO_BSKEEP))
*dst++ = '\\';
*dst++ = src[i++];
}
}
else
*dst++ = src[i++];
......@@ -1368,7 +2052,7 @@ mu_wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex)
{
int c = mu_wordsplit_c_quote_char (*src);
*dst++ = '\\';
if (c != -1)
if (c)
*dst++ = c;
else
{
......@@ -1381,9 +2065,37 @@ mu_wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex)
}
}
struct exptab
{
char *descr;
int flag;
int opt;
int (*expansion) (struct mu_wordsplit *wsp);
};
#define EXPOPT_NEG 0x01
#define EXPOPT_COALESCE 0x02
static struct exptab exptab[] = {
{ N_("WS trimming"), MU_WRDSF_WS, 0, mu_wordsplit_trimws },
{ N_("tilde expansion"), MU_WRDSF_PATHEXPAND, 0, mu_wordsplit_tildexpand },
{ N_("variable expansion"), MU_WRDSF_NOVAR, EXPOPT_NEG,
mu_wordsplit_varexp },
{ N_("quote removal"), 0, EXPOPT_NEG,
wsnode_quoteremoval },
{ N_("command substitution"), MU_WRDSF_NOCMD, EXPOPT_NEG|EXPOPT_COALESCE,
mu_wordsplit_cmdexp },
{ N_("coalesce list"), 0, EXPOPT_NEG|EXPOPT_COALESCE,
NULL },
{ N_("path expansion"), MU_WRDSF_PATHEXPAND, 0, mu_wordsplit_pathexpand },
{ NULL }
};
static int
wordsplit_process_list (struct mu_wordsplit *wsp, size_t start)
mu_wordsplit_process_list (struct mu_wordsplit *wsp, size_t start)
{
struct exptab *p;
if (wsp->ws_flags & MU_WRDSF_NOSPLIT)
{
/* Treat entire input as a quoted argument */
......@@ -1393,7 +2105,7 @@ wordsplit_process_list (struct mu_wordsplit *wsp, size_t start)
else
{
int rc;
while ((rc = scan_word (wsp, start)) == _MU_WRDS_OK)
start = skip_delim (wsp);
/* Make sure tail element is not joinable */
......@@ -1405,62 +2117,44 @@ wordsplit_process_list (struct mu_wordsplit *wsp, size_t start)
if (wsp->ws_flags & MU_WRDSF_SHOWDBG)
{
wsp->ws_debug ("Initial list:");
wsp->ws_debug ("(%02d) %s", wsp->ws_lvl, _("Initial list:"));
mu_wordsplit_dump_nodes (wsp);
}
if (wsp->ws_flags & MU_WRDSF_WS)
{
/* Trim leading and trailing whitespace */
mu_wordsplit_trimws (wsp);
if (wsp->ws_flags & MU_WRDSF_SHOWDBG)
{
wsp->ws_debug ("After WS trimming:");
mu_wordsplit_dump_nodes (wsp);
}
}
/* Expand variables (FIXME: & commands) */
if (!(wsp->ws_flags & MU_WRDSF_NOVAR))
{
if (mu_wordsplit_varexp (wsp))
{
mu_wordsplit_free_nodes (wsp);
return wsp->ws_errno;
}
if (wsp->ws_flags & MU_WRDSF_SHOWDBG)
{
wsp->ws_debug ("Expanded list:");
mu_wordsplit_dump_nodes (wsp);
}
}
do
for (p = exptab; p->descr; p++)
{
if (wsnode_quoteremoval (wsp))
break;
if (wsp->ws_flags & MU_WRDSF_SHOWDBG)
if ((p->opt & EXPOPT_NEG)
? !(wsp->ws_flags & p->flag) : (wsp->ws_flags & p->flag))
{
wsp->ws_debug ("After quote removal:");
mu_wordsplit_dump_nodes (wsp);
}
if (wsnode_coalesce (wsp))
break;
if (wsp->ws_flags & MU_WRDSF_SHOWDBG)
{
wsp->ws_debug ("Coalesced list:");
mu_wordsplit_dump_nodes (wsp);
if (p->opt & EXPOPT_COALESCE)
{
if (wsnode_coalesce (wsp))
break;
if (wsp->ws_flags & MU_WRDSF_SHOWDBG)
{
wsp->ws_debug ("(%02d) %s", wsp->ws_lvl,
_("Coalesced list:"));
mu_wordsplit_dump_nodes (wsp);
}
}
if (p->expansion)
{
if (p->expansion (wsp))
break;
if (wsp->ws_flags & MU_WRDSF_SHOWDBG)
{
wsp->ws_debug ("(%02d) %s", wsp->ws_lvl, _(p->descr));
mu_wordsplit_dump_nodes (wsp);
}
}
}
}
while (0);
return wsp->ws_errno;
}
}
int
mu_wordsplit_len (const char *command, size_t length, struct mu_wordsplit *wsp,
int flags)
static int
mu_wordsplit_run (const char *command, size_t length, struct mu_wordsplit *wsp,
int flags, int lvl)
{
int rc;
size_t start;
......@@ -1474,13 +2168,8 @@ mu_wordsplit_len (const char *command, size_t length, struct mu_wordsplit *wsp,
start = skip_delim (wsp);
if (wsp->ws_endp == wsp->ws_len)
{
wsp->ws_errno = MU_WRDSE_NOINPUT;
if (wsp->ws_flags & MU_WRDSF_SHOWERR)
mu_wordsplit_perror (wsp);
return wsp->ws_errno;
}
return _wsplt_seterr (wsp, MU_WRDSE_NOINPUT);
cmdptr = wsp->ws_input + wsp->ws_endp;
cmdlen = wsp->ws_len - wsp->ws_endp;
wsp->ws_flags |= MU_WRDSF_REUSE;
......@@ -1494,12 +2183,13 @@ mu_wordsplit_len (const char *command, size_t length, struct mu_wordsplit *wsp,
rc = mu_wordsplit_init (wsp, cmdptr, cmdlen, flags);
if (rc)
return rc;
wsp->ws_lvl = lvl;
}
if (wsp->ws_flags & MU_WRDSF_SHOWDBG)
wsp->ws_debug ("Input:%.*s;", (int)cmdlen, cmdptr);
wsp->ws_debug (_("(%02d) Input:%.*s;"), wsp->ws_lvl, (int) cmdlen, cmdptr);
rc = wordsplit_process_list (wsp, start);
rc = mu_wordsplit_process_list (wsp, start);
if (rc == 0 && (flags & MU_WRDSF_INCREMENTAL))
{
while (!wsp->ws_head && wsp->ws_endp < wsp->ws_len)
......@@ -1509,9 +2199,10 @@ mu_wordsplit_len (const char *command, size_t length, struct mu_wordsplit *wsp,
{
cmdptr = wsp->ws_input + wsp->ws_endp;
cmdlen = wsp->ws_len - wsp->ws_endp;
wsp->ws_debug ("Restart:%.*s;", (int)cmdlen, cmdptr);
wsp->ws_debug (_("(%02d) Restart:%.*s;"),
wsp->ws_lvl, (int) cmdlen, cmdptr);
}
rc = wordsplit_process_list (wsp, start);
rc = mu_wordsplit_process_list (wsp, start);
if (rc)
break;
}
......@@ -1527,6 +2218,13 @@ mu_wordsplit_len (const char *command, size_t length, struct mu_wordsplit *wsp,
}
int
mu_wordsplit_len (const char *command, size_t length, struct mu_wordsplit *wsp,
int flags)
{
return mu_wordsplit_run (command, length, wsp, flags, 0);
}
int
mu_wordsplit (const char *command, struct mu_wordsplit *ws, int flags)
{
return mu_wordsplit_len (command, command ? strlen (command) : 0, ws, flags);
......@@ -1550,65 +2248,62 @@ mu_wordsplit_free_words (struct mu_wordsplit *ws)
}
void
mu_wordsplit_free_envbuf (struct mu_wordsplit *ws)
{
if (ws->ws_flags & MU_WRDSF_NOCMD)
return;
if (ws->ws_envbuf)
{
size_t i;
for (i = 0; ws->ws_envbuf[i]; i++)
free (ws->ws_envbuf[i]);
free (ws->ws_envbuf);
ws->ws_envidx = ws->ws_envsiz = 0;
ws->ws_envbuf = NULL;
}
}
void
mu_wordsplit_clearerr (struct mu_wordsplit *ws)
{
if (ws->ws_errno == MU_WRDSE_USERERR)
free (ws->ws_usererr);
ws->ws_usererr = NULL;
ws->ws_errno = MU_WRDSE_OK;
}
void
mu_wordsplit_free (struct mu_wordsplit *ws)
{
mu_wordsplit_free_words (ws);
free (ws->ws_wordv);
ws->ws_wordv = NULL;
mu_wordsplit_free_envbuf (ws);
}
void
mu_wordsplit_perror (struct mu_wordsplit *wsp)
mu_wordsplit_getwords (struct mu_wordsplit *ws, int *wordc, char ***wordv)
{
switch (wsp->ws_errno)
{
case MU_WRDSE_EOF:
wsp->ws_error (_("no error"));
break;
case MU_WRDSE_QUOTE:
wsp->ws_error (_("missing closing %c (start near #%lu)"),
wsp->ws_input[wsp->ws_endp],
(unsigned long) wsp->ws_endp);
break;
case MU_WRDSE_NOSPACE:
wsp->ws_error (_("memory exhausted"));
break;
case MU_WRDSE_NOSUPP:
wsp->ws_error (_("command substitution is not yet supported"));
case MU_WRDSE_USAGE:
wsp->ws_error (_("invalid mu_wordsplit usage"));
break;
case MU_WRDSE_CBRACE:
wsp->ws_error (_("unbalanced curly brace"));
break;
case MU_WRDSE_UNDEF:
wsp->ws_error (_("undefined variable"));
break;
case MU_WRDSE_NOINPUT:
wsp->ws_error (_("input exhausted"));
break;
default:
wsp->ws_error (_("unknown error"));
}
char **p = realloc (ws->ws_wordv,
(ws->ws_wordc + 1) * sizeof (ws->ws_wordv[0]));
*wordv = p ? p : ws->ws_wordv;
*wordc = ws->ws_wordc;
ws->ws_wordv = NULL;
ws->ws_wordc = 0;
ws->ws_wordn = 0;
}
const char *_mu_wordsplit_errstr[] = {
N_("no error"),
N_("missing closing quote"),
N_("memory exhausted"),
N_("command substitution is not yet supported"),
N_("invalid mu_wordsplit usage"),
N_("invalid wordsplit usage"),
N_("unbalanced curly brace"),
N_("undefined variable"),
N_("input exhausted")
N_("input exhausted"),
N_("unbalanced parenthesis"),
N_("globbing error")
};
int _mu_wordsplit_nerrs =
sizeof (_mu_wordsplit_errstr) / sizeof (_mu_wordsplit_errstr[0]);
......@@ -1616,7 +2311,26 @@ int _mu_wordsplit_nerrs =
const char *
mu_wordsplit_strerror (struct mu_wordsplit *ws)
{
if (ws->ws_errno == MU_WRDSE_USERERR)
return ws->ws_usererr;
if (ws->ws_errno < _mu_wordsplit_nerrs)
return _mu_wordsplit_errstr[ws->ws_errno];
return N_("unknown error");
}
void
mu_wordsplit_perror (struct mu_wordsplit *wsp)
{
switch (wsp->ws_errno)
{
case MU_WRDSE_QUOTE:
wsp->ws_error (_("missing closing %c (start near #%lu)"),
wsp->ws_input[wsp->ws_endp],
(unsigned long) wsp->ws_endp);
break;
default:
wsp->ws_error (mu_wordsplit_strerror (wsp));
}
}
......
......@@ -32,7 +32,6 @@ main (int argc, char **argv)
mu_stream_t stream = NULL;
mu_header_t hdr;
mu_body_t body;
char *buf = NULL;
mu_set_program_name (argv[0]);
......@@ -78,15 +77,20 @@ main (int argc, char **argv)
}
else if (strcmp (argv[i], "-t") == 0)
{
size_t len;
mu_wordsplit_t ws;
i++;
assert (argv[i] != NULL);
len = strlen (argv[i]);
buf = realloc (buf, len + 1);
mu_wordsplit_c_unquote_copy (buf, argv[i], len);
assert (buf != NULL);
assert (mu_stream_write (stream, buf,
strlen (buf), NULL) == 0);
if (mu_wordsplit (argv[i], &ws,
MU_WRDSF_NOSPLIT | MU_WRDSF_DEFFLAGS))
{
mu_error ("mu_wordsplit: %s", mu_wordsplit_strerror (&ws));
exit (1);
}
else
assert (mu_stream_write (stream, ws.ws_wordv[0],
strlen (ws.ws_wordv[0]), NULL) == 0);
mu_wordsplit_free (&ws);
}
else
mu_error ("ignoring unknown argument %s", argv[i]);
......
......@@ -347,7 +347,7 @@ TESTWSP([suppress ws trimming within quotes],[],
4: "formatfield=In message %{text}, "
])
TESTWSP([unescape],[],[-default novar nocmd quote escape '\"'],
TESTWSP([unescape],[],[-default novar nocmd quote escape :+:'\\""'],
[\Seen "quote \"" "bs \\"],
[NF: 3
0: \\Seen
......
......@@ -22,6 +22,7 @@
#include <stdio.h>
#include <string.h>
#include <mailutils/wordsplit.h>
#include <mailutils/alloc.h>
#include <mailutils/kwd.h>
#include <mailutils/errno.h>
#include <mailutils/error.h>
......@@ -29,6 +30,8 @@
extern char **environ;
char *progname;
struct mu_kwd bool_keytab[] = {
{ "append", MU_WRDSF_APPEND },
/*{ "reuse", MU_WRDSF_REUSE },*/
......@@ -50,6 +53,14 @@ struct mu_kwd bool_keytab[] = {
{ "default", MU_WRDSF_DEFFLAGS },
{ "env_kv", MU_WRDSF_ENV_KV },
{ "incremental", MU_WRDSF_INCREMENTAL },
{ "pathexpand", MU_WRDSF_PATHEXPAND },
{ NULL, 0 }
};
struct mu_kwd opt_keytab[] = {
{ "nullglob", MU_WRDSO_NULLGLOB },
{ "failglob", MU_WRDSO_FAILGLOB },
{ "dotglob", MU_WRDSO_DOTGLOB },
{ NULL, 0 }
};
......@@ -65,18 +76,27 @@ help ()
{
size_t i;
printf ("usage: wsp [options]\n");
printf ("usage: %s [options] [VAR=VALUE...]\n", progname);
printf ("options are:\n");
printf (" [-]trimnl\n");
printf (" [-]plaintext\n");
printf (" -env\n");
printf (" env sys|none|null\n");
putchar ('\n');
for (i = 0; bool_keytab[i].name; i++)
printf (" [-]%s\n", bool_keytab[i].name);
putchar ('\n');
for (i = 0; string_keytab[i].name; i++)
{
printf (" -%s\n", bool_keytab[i].name);
printf (" %s ARG\n", bool_keytab[i].name);
printf (" -%s\n", string_keytab[i].name);
printf (" %s ARG\n", string_keytab[i].name);
}
printf (" escape-word ARG\n");
printf (" escape-quote ARG\n");
putchar ('\n');
for (i = 0; opt_keytab[i].name; i++)
{
printf (" [-]%s\n", opt_keytab[i].name);
}
putchar ('\n');
printf (" -dooffs\n");
......@@ -104,12 +124,7 @@ print_qword (const char *word, int plaintext)
if (size >= qlen)
{
qlen = size + 1;
qbuf = realloc (qbuf, qlen);
if (!qbuf)
{
mu_error ("not enough memory");
abort ();
}
qbuf = mu_realloc (qbuf, qlen);
}
mu_wordsplit_c_quote_copy (qbuf, word, 0);
qbuf[size] = 0;
......@@ -131,50 +146,179 @@ make_env_kv ()
;
size = (i - 1) * 2 + 1;
newenv = calloc (size, sizeof (newenv[0]));
if (!newenv)
{
mu_error ("not enough memory");
exit (1);
}
newenv = mu_calloc (size, sizeof (newenv[0]));
for (i = j = 0; environ[i]; i++)
{
size_t len = strcspn (environ[i], "=");
char *p = malloc (len+1);
if (!p)
{
mu_error ("not enough memory");
exit (1);
}
char *p = mu_alloc (len+1);
memcpy (p, environ[i], len);
p[len] = 0;
newenv[j++] = p;
p = strdup (environ[i] + len + 1);
if (!p)
{
mu_error ("not enough memory");
exit (1);
}
p = mu_strdup (environ[i] + len + 1);
newenv[j++] = p;
}
newenv[j] = NULL;
return newenv;
}
static int
wsp_getvar (char **ret, const char *vptr, size_t vlen, void *data)
{
char **base = data;
int i;
for (i = 0; base[i]; i++)
{
size_t l = strcspn (base[i], "=");
if (l == vlen && memcmp (base[i], vptr, vlen) == 0)
{
char *p = strdup (base[i] + vlen + 1);
if (p == NULL)
return MU_WRDSE_NOSPACE;
*ret = p;
return MU_WRDSE_OK;
}
}
return MU_WRDSE_UNDEF;
}
static int
wsp_runcmd (char **ret, const char *str, size_t len, char **argv, void *closure)
{
FILE *fp;
char *cmd;
int c, lastc;
char *buffer = NULL;
size_t bufsize = 0;
size_t buflen = 0;
cmd = malloc (len + 1);
if (!cmd)
return MU_WRDSE_NOSPACE;
memcpy (cmd, str, len);
cmd[len] = 0;
fp = popen(cmd, "r");
if (!fp)
{
size_t size = 0;
ret = NULL;
if (mu_asprintf (ret, &size, "can't run %s: %s",
cmd, strerror (errno)))
return MU_WRDSE_NOSPACE;
else
return MU_WRDSE_USERERR;
}
while ((c = fgetc (fp)) != EOF)
{
lastc = c;
if (c == '\n')
c = ' ';
if (buflen == bufsize)
{
char *p;
if (bufsize == 0)
bufsize = 80;
else
bufsize *= 2;
p = realloc (buffer, bufsize);
if (!p)
{
free (buffer);
free (cmd);
return MU_WRDSE_NOSPACE;
}
buffer = p;
}
buffer[buflen++] = c;
}
if (buffer)
{
if (lastc == '\n')
--buflen;
buffer[buflen] = 0;
}
pclose (fp);
free (cmd);
*ret = buffer;
return MU_WRDSE_OK;
}
enum env_type
{
env_none,
env_null,
env_sys
};
struct mu_kwd env_keytab[] = {
{ "none", env_none },
{ "null", env_null },
{ "sys", env_sys },
{ NULL }
};
static void
set_escape_string (mu_wordsplit_t *ws, int *wsflags, int q, const char *str)
{
if (*str == ':')
{
while (*++str != ':')
{
int f;
switch (*str)
{
case '+':
f = MU_WRDSO_BSKEEP;
break;
case '0':
f = MU_WRDSO_OESC;
break;
case 'x':
f = MU_WRDSO_XESC;
break;
default:
fprintf (stderr, "%s: invalid escape flag near %s\n",
progname, str);
abort ();
}
MU_WRDSO_ESC_SET (ws, q, f);
}
*wsflags |= MU_WRDSF_OPTIONS;
++str;
}
ws->ws_escape[q] = str;
}
int
main (int argc, char **argv)
{
char buf[1024], *ptr;
char buf[1024], *ptr, *saved_ptr;
int i, offarg = 0;
int trimnl_option = 0;
int plaintext_option = 0;
int wsflags = (MU_WRDSF_DEFFLAGS & ~MU_WRDSF_NOVAR) |
MU_WRDSF_ENOMEMABRT |
MU_WRDSF_ENV | MU_WRDSF_SHOWERR;
struct mu_wordsplit ws;
MU_WRDSF_SHOWERR;
mu_wordsplit_t ws;
int next_call = 0;
char *fenvbase[128];
size_t fenvidx = 0;
size_t fenvmax = sizeof (fenvbase) / sizeof (fenvbase[0]);
int use_env = env_sys;
progname = argv[0];
ws.ws_options = 0;
for (i = 1; i < argc; i++)
{
char *opt = argv[i];
......@@ -212,7 +356,31 @@ main (int argc, char **argv)
plaintext_option = !negate;
continue;
}
if (strcmp (opt, "env") == 0)
{
if (negate)
use_env = env_none;
else
{
i++;
if (i == argc)
{
fprintf (stderr, "%s: missing argument for env\n",
progname);
exit (1);
}
if (mu_kwd_xlat_name (env_keytab, argv[i], &use_env))
{
fprintf (stderr, "%s: invalid argument for env\n",
progname);
exit (1);
}
}
continue;
}
if (mu_kwd_xlat_name (bool_keytab, opt, &flag) == 0)
{
if (negate)
......@@ -231,7 +399,8 @@ main (int argc, char **argv)
i++;
if (i == argc)
{
mu_error ("%s missing argument", opt);
fprintf (stderr, "%s: missing argument for %s\n",
progname, opt);
exit (1);
}
......@@ -246,7 +415,8 @@ main (int argc, char **argv)
break;
case MU_WRDSF_ESCAPE:
ws.ws_escape = argv[i];
set_escape_string (&ws, &wsflags, 0, argv[i]);
set_escape_string (&ws, &wsflags, 1, argv[i]);
break;
}
......@@ -255,6 +425,27 @@ main (int argc, char **argv)
continue;
}
if (strcmp (opt, "escape-word") == 0
|| strcmp (opt, "escape-quote") == 0)
{
int q = opt[7] == 'q';
i++;
if (i == argc)
{
fprintf (stderr, "%s: missing argument for %s\n",
progname, opt);
exit (1);
}
if (!(wsflags & MU_WRDSF_ESCAPE))
{
wsflags |= MU_WRDSF_ESCAPE;
ws.ws_escape[!q] = NULL;
}
set_escape_string (&ws, &wsflags, q, argv[i]);
continue;
}
if (strcmp (opt, "dooffs") == 0)
{
if (negate)
......@@ -267,20 +458,23 @@ main (int argc, char **argv)
if (i == argc)
{
mu_error ("%s missing arguments", opt);
fprintf (stderr, "%s: missing arguments for %s\n",
progname, opt);
exit (1);
}
ws.ws_offs = strtoul (argv[i], &p, 10);
if (*p)
{
mu_error ("invalid number: %s", argv[i]);
fprintf (stderr, "%s: invalid number: %s\n",
progname, argv[i]);
exit (1);
}
i++;
if (i + ws.ws_offs > argc)
{
mu_error ("%s: not enough arguments", opt);
fprintf (stderr, "%s: not enough arguments for %s\n",
progname, opt);
exit (1);
}
offarg = i;
......@@ -290,15 +484,65 @@ main (int argc, char **argv)
continue;
}
mu_error ("%s: unrecognized argument", opt);
if (mu_kwd_xlat_name (opt_keytab, opt, &flag) == 0)
{
wsflags |= MU_WRDSF_OPTIONS;
if (negate)
ws.ws_options &= ~flag;
else
ws.ws_options |= flag;
continue;
}
if (strchr (opt, '='))
{
if (fenvidx < fenvmax - 1)
{
fenvbase[fenvidx++] = opt;
continue;
}
else
{
fprintf (stderr, "%s: environment too big\n", progname);
exit (1);
}
}
fprintf (stderr, "%s: unrecognized argument: %s\n",
progname, opt);
exit (1);
}
if (wsflags & MU_WRDSF_ENV_KV)
ws.ws_env = (const char **) make_env_kv ();
else
ws.ws_env = (const char **) environ;
if (fenvidx)
{
fenvbase[fenvidx] = NULL;
wsflags |= MU_WRDSF_GETVAR | MU_WRDSF_CLOSURE;
ws.ws_getvar = wsp_getvar;
ws.ws_closure = fenvbase;
}
switch (use_env)
{
case env_null:
wsflags |= MU_WRDSF_ENV;
ws.ws_env = NULL;
break;
case env_none:
break;
case env_sys:
wsflags |= MU_WRDSF_ENV;
if (wsflags & MU_WRDSF_ENV_KV)
ws.ws_env = (const char **) make_env_kv ();
else
ws.ws_env = (const char **) environ;
break;
}
if (!(wsflags & MU_WRDSF_NOCMD))
ws.ws_command = wsp_runcmd;
if (wsflags & MU_WRDSF_INCREMENTAL)
trimnl_option = 1;
......@@ -309,7 +553,11 @@ main (int argc, char **argv)
size_t i;
if (trimnl_option)
mu_rtrim_cset (ptr, "\n");
{
size_t len = strlen (ptr);
if (len && ptr[len-1] == '\n')
ptr[len-1] = 0;
}
if (wsflags & MU_WRDSF_INCREMENTAL)
{
......@@ -318,16 +566,12 @@ main (int argc, char **argv)
if (*ptr == 0)
ptr = NULL;
else
free ((void*)ws.ws_input);
free (saved_ptr);
}
else
next_call = 1;
if (ptr)
{
ptr = strdup (ptr);
if (!ptr)
abort ();
}
ptr = saved_ptr = mu_strdup (ptr);
}
rc = mu_wordsplit (ptr, &ws, wsflags);
......@@ -344,8 +588,8 @@ main (int argc, char **argv)
ws.ws_wordv[i] = argv[offarg + i];
offarg = 0;
}
wsflags |= MU_WRDSF_REUSE;
wsflags |= MU_WRDSF_REUSE | (ws.ws_flags & MU_WRDSF_ENV);
printf ("NF: %lu", (unsigned long) ws.ws_wordc);
if (wsflags & MU_WRDSF_DOOFFS)
printf (" (%lu)", (unsigned long) ws.ws_offs);
......
......@@ -252,12 +252,16 @@ shell_help (int argc, char **argv)
static int
shell_prompt (int argc, char **argv)
{
size_t size;
mu_wordsplit_t ws;
free (mutool_shell_prompt);
size = strlen (argv[1]);
mutool_shell_prompt = mu_alloc (size + 1);
mu_wordsplit_c_unquote_copy (mutool_shell_prompt, argv[1], size);
if (mu_wordsplit (argv[1], &ws, MU_WRDSF_NOSPLIT | MU_WRDSF_DEFFLAGS))
mu_error ("mu_wordsplit: %s", mu_wordsplit_strerror (&ws));
else
{
free (mutool_shell_prompt);
mutool_shell_prompt = mu_strdup (ws.ws_wordv[0]);
}
mu_wordsplit_free (&ws);
return 0;
}
......@@ -521,7 +525,7 @@ execute_line (char *line)
int status = 0;
ws.ws_comment = "#";
ws.ws_escape = "\\\"";
ws.ws_escape[0] = ws.ws_escape[1] = "\\\\\"\"";
rc = mu_wordsplit (line, &ws,
MU_WRDSF_DEFFLAGS|MU_WRDSF_COMMENT|MU_WRDSF_ESCAPE|
MU_WRDSF_INCREMENTAL|MU_WRDSF_APPEND);
......