/* argcv.c - simple functions for parsing input based on whitespace
   Copyright (C) 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2010 Free
   Software Foundation, Inc.

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 3 of the License, or (at your option) any later version.

   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General
   Public License along with this library.  If not, see 
   <http://www.gnu.org/licenses/>. */

#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

#include <ctype.h>
#include <errno.h>
#define MU_ARCGV_DEPRECATED
#include <mailutils/argcv.h>

/* Keep mailutils namespace clean */
#define argcv_get            mu_argcv_get 
#define argcv_get_n          mu_argcv_get_n 
#define argcv_get_np         mu_argcv_get_np 
#define argcv_unquote_char   mu_argcv_unquote_char
#define argcv_quote_char     mu_argcv_quote_char  
#define argcv_quoted_length  mu_argcv_quoted_length
#define argcv_unquote_copy   mu_argcv_unquote_copy 
#define argcv_quote_copy     mu_argcv_quote_copy     

/*
 * takes a string and splits it into several strings, breaking at ' '
 * command is the string to split
 * the number of strings is placed into argc
 * the split strings are put into argv
 * returns 0 on success, nonzero on failure
 */

#define isws(c) ((c)==' '||(c)=='\t'||(c)=='\n')
#define isdelim(c,delim) (strchr(delim,(c))!=NULL)

struct argcv_info
{
  int len;
  const char *command;
  const char *delim;
  const char *comment;
  int flags;
  
  int start;
  int end;
  int save;
  int finish_pos;
};

static void
init_argcv_info (struct argcv_info *ap, int flags,
		 int len, const char *command, const char *delim,
		 const char *comment)
{
  memset (ap, 0, sizeof *ap);
  ap->len = len;
  ap->command = command;
  ap->delim = delim;
  ap->comment = comment;
  ap->flags = flags;
}

static int
argcv_scan (struct argcv_info *ap)
{
  int i = 0;
  int len = ap->len;
  const char *command = ap->command;
  const char *delim = ap->delim;
  const char *comment = ap->comment;
  
  for (;;)
    {
      i = ap->save;

      if (i >= len)
	return i + 1;

      /* Skip initial whitespace */
      while (i < len && isws (command[i]))
	i++;
      ap->start = i;

      if (!isdelim (command[i], delim))
	{
	  while (i < len)
	    {
	      if (command[i] == '\\')
		{
		  if (++i == len)
		    break;
		  i++;
		  continue;
		}
	      
	      if (command[i] == '\'' || command[i] == '"')
		{
		  int j;
		  for (j = i + 1; j < len && command[j] != command[i]; j++)
		    if (command[j] == '\\')
		      j++;
		  if (j < len)
		    i = j + 1;
		  else
		    i++;
		}
	      else if (isws (command[i]) || isdelim (command[i], delim))
		break;
	      else
		i++; /* skip the escaped character */
	    }
	  i--;
	}
      else if (!(ap->flags & MU_ARGCV_RETURN_DELIMS))
	{
	  while (i < len && isdelim (command[i], delim))
	    i++;
	  ap->save = i;
	  continue;
	}
      

      ap->end = i;
      ap->save = ap->finish_pos = i + 1;

      /* If we have a token, and it starts with a comment character, skip
         to the newline and restart the token search. */
      if (ap->save <= len)
	{
	  if (strchr (comment, command[ap->start]) != NULL)
	    {
	      ap->finish_pos = ap->start;
	      i = ap->save;
	      while (i < len && command[i] != '\n')
		i++;

	      ap->save = i;
	      continue;
	    }
	}
      break;
    }
  return ap->save;
}

static char quote_transtab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";

int
argcv_unquote_char (int c)
{
  char *p;

  for (p = quote_transtab; *p; p += 2)
    {
      if (*p == c)
	return p[1];
    }
  return c;
}

int
argcv_quote_char (int c)
{
  char *p;
  
  for (p = quote_transtab + sizeof(quote_transtab) - 2;
       p > quote_transtab; p -= 2)
    {
      if (*p == c)
	return p[-1];
    }
  return -1;
}
  
#define to_num(c) \
  (isdigit(c) ? c - '0' : (isxdigit(c) ? toupper(c) - 'A' + 10 : 255 ))

static int
xtonum (int *pval, const char *src, int base, int cnt)
{
  int i, val;
  
  for (i = 0, val = 0; i < cnt; i++, src++)
    {
      int n = *(unsigned char*)src;
      if (n > 127 || (n = to_num(n)) >= base)
	break;
      val = val*base + n;
    }
  *pval = val;
  return i;
}

size_t
argcv_quoted_length (const char *str, int *quote)
{
  size_t len = 0;

  *quote = 0;
  for (; *str; str++)
    {
      if (*str == ' ')
	{
	  len++;
	  *quote = 1;
	}
      else if (*str == '"')
	{
	  len += 2;
	  *quote = 1;
	}
      else if (*str != '\t' && *str != '\\' && isprint (*str))
	len++;
      else if (argcv_quote_char (*str) != -1)
	len += 2;
      else
	len += 4;
    }
  return len;
}

void
argcv_unquote_copy (char *dst, const char *src, size_t n)
{
  int i = 0;
  int c;
  int expect_delim = 0; 
    
  while (i < n)
    {
      switch (src[i])
	{
	case '\'':
	case '"':
	  if (!expect_delim)
	    {
	      const char *p;
	      
	      for (p = src+i+1; *p && *p != src[i]; p++)
		if (*p == '\\')
		  p++;
	      if (*p)
		expect_delim = src[i++];
	      else
		*dst++ = src[i++];
	    }
	  else if (expect_delim == src[i])
	    ++i;
	  else
	    *dst++ = src[i++];
	  break;
	  
	case '\\':
	  ++i;
	  if (src[i] == 'x' || src[i] == 'X')
	    {
	      if (n - i < 2)
		{
		  *dst++ = '\\';
		  *dst++ = src[i++];
		}
	      else 
		{
		  int off = xtonum(&c, src + i + 1, 16, 2);
		  if (off == 0)
		    {
		      *dst++ = '\\';
		      *dst++ = src[i++];
		    }
		  else
		    {
		      *dst++ = c;
		      i += off + 1;
		    }
		}
	    }
	  else if ((unsigned char)src[i] < 128 && isdigit (src[i]))
	    {
	      if (n - i < 1)
		{
		  *dst++ = '\\';
		  *dst++ = src[i++];
		}
	      else
		{
		  int off = xtonum (&c, src+i, 8, 3);
		  if (off == 0)
		    {
		      *dst++ = '\\';
		      *dst++ = src[i++];
		    }
		  else
		    {
		      *dst++ = c;
		      i += off;
		    }
		}
	    }
	  else
	    *dst++ = argcv_unquote_char (src[i++]);
	  break;
	  
	default:
	  *dst++ = src[i++];
	}
    }
  *dst = 0;
}

void
argcv_quote_copy (char *dst, const char *src)
{
  for (; *src; src++)
    {
      if (*src == '"')
	{
	  *dst++ = '\\';
	  *dst++ = *src;
	}
      else if (*src != '\t' && *src != '\\' && isprint(*src))
	*dst++ = *src;      
      else
	{
	  int c = argcv_quote_char (*src);
	  *dst++ = '\\';
	  if (c != -1)
	    *dst++ = c;
	  else
	    {
	      char tmp[4];
	      snprintf (tmp, sizeof tmp, "%03o", *(unsigned char*)src);
	      memcpy (dst, tmp, 3);
	      dst += 3;
	    }
	}
    }
}

int
argcv_get_np (const char *command, int len,
	      const char *delim, const char *cmnt,
	      int flags,
	      int *pargc, char ***pargv, char **endp)
{
  int i = 0;
  struct argcv_info info;
  int argc;
  char **argv;
  
  if (!delim)
    delim = "";
  if (!cmnt)
    cmnt = "";

  init_argcv_info (&info, flags, len, command, delim, cmnt);

  /* Count number of arguments */
  argc = 0;
  while (argcv_scan (&info) <= len)
    argc++;

  argv = calloc ((argc + 1), sizeof (char *));
  if (argv == NULL)
    return ENOMEM;
  
  i = 0;
  info.save = 0;
  for (i = 0; i < argc; i++)
    {
      int n;
      int unquote;
      
      argcv_scan (&info);

      if ((command[info.start] == '"' || command[info.end] == '\'')
	  && command[info.end] == command[info.start])
	{
	  if (info.start < info.end)
	    {
	      info.start++;
	      info.end--;
	    }
	  unquote = 0;
	}
      else
	unquote = 1;
      
      n = info.end - info.start + 1;
      argv[i] = calloc (n + 1,  sizeof (char));
      if (argv[i] == NULL)
	{
	  mu_argcv_free (i, argv);
	  return ENOMEM;
	}
      if (unquote)
	argcv_unquote_copy (argv[i], &command[info.start], n);
      else
	memcpy (argv[i], &command[info.start], n);
      argv[i][n] = 0;
    }
  argv[i] = NULL;

  *pargc = argc;
  *pargv = argv;
  if (endp)
    *endp = (char*) (command + info.finish_pos);
  return 0;
}

int
argcv_get_n (const char *command, int len, const char *delim, const char *cmnt,
	     int *pargc, char ***pargv)
{
  return argcv_get_np (command, len, delim, cmnt, MU_ARGCV_RETURN_DELIMS,
		       pargc, pargv, NULL);
}

int
argcv_get (const char *command, const char *delim, const char *cmnt,
	   int *argc, char ***argv)
{
  return argcv_get_n (command, strlen (command), delim, cmnt, argc, argv);
}