New parser from Sam.

Alain Magloire
Commit 9c5a3d47 ... 9c5a3d47e072d74af7e4b2a82c09277245638e24 authored 2001-04-07 02:46:47 +0000 by Alain Magloire
Showing 2 changed files with 879 additions and 646 deletions
mailbox/include/address0.h
mailbox/parse822.c
--- a/mailbox/include/address0.h
View file @9c5a3d4
+++ b/mailbox/include/address0.h
View file @9c5a3d4
@@ -38,7 +38,7 @@ extern "C" {
 /*
 * The data-structure representing an RFC822 MAILBOX. It may be
- * one MAILBOX in a list of them, as found in an ADDRESS list or
+ * one MAILBOX or a list of them, as found in an ADDRESS or
 * a MAILBOX list (as found in a GROUP).
 *
 * Capitalized names are from RFC 822, section 6.1 (Address Syntax).
@@ -62,7 +62,8 @@ struct _address
  char *route;
  	/* the optional ROUTE in the ROUTE-ADDR form of MAILBOX */
-//  size_t num; -- didn't appear to be used anywhere...
+/*  size_t num; this didn't appear to be used anywhere... so I commented
+ it out, is that ok? -sam */
  struct _address *next;
 };
--- a/mailbox/parse822.c
View file @9c5a3d4
+++ b/mailbox/parse822.c
View file @9c5a3d4
@@ -15,6 +15,41 @@
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+/* vi:sw=4:ts=8 */
+/*
+Things to (maybe) do:
+  - groups used to return the number of addresses, now it returns
+    success... but doesn't create an _address for 'foo:;'. Should
+    it create one with just a personal?
+x - C comments only.
+x - no C++ reserved words.
+x - fix is_digit() to be like the other is functions
+  - what should return codes be, possible errors are:
+     . no mem (ENOMEM)
+     . function wasn't called correctly, usually a missing argument (EINVAL)
+     . invalid syntax found during parsing (ENOENT)
+     All functions should return ==0 on success, and errno on failure.
+x - const-correct the APIs
+x - "new = (char*) realloc()", cast not needed
+x - mailbox_t* nuked in favor of address_t
+x - fix handful of memory leaks detected by Alain
+  - test for memory leaks, so I don't have to rely on Alains sharp eyes
+  - fix the realloc, try a struct _string { char* b, size_t sz };
+x - where does parse822.h go?
+  - parse field names and bodies
+  - parse dates (pull from Mail++)
+  - parse Received: field
+x - check RFC again, can groups be nested? No!
+  - should we do best effort parsing, so parsing "sam@locahost, foo@"
+    gets one address, or just say it is or it isn't in RFC format?
+*/
 #ifdef HAVE_CONFIG_H
 # include <config.h>
 #endif
@@ -27,904 +62,1101 @@
 #include "address0.h"
-int GetDigits(char** p, char* e, int min, int max, int* digits);
+#include <mailutils/parse822.h>
-int GetSpecial(char** p, char* e, char c);
-int GetComment(char** p, char* e, char** comment);
+#ifdef EOK
-int GetAtom(char** p, char* e, char** atom);
+# undef EOK
-int GetQuotedPair(char** p, char* e, char** qpair);
+#endif
-int GetQuotedString(char** p, char* e, char** qstr);
-int GetWord(char** p, char* e, char** word);
-int GetPhrase(char** p, char* e, char** phrase);
-int GetAddressList(address_t* a, char* s);
-int GetMailBox(char** p, char* e, address_t* a);
-int GetGroup(char** p, char* e, address_t* a);
-int GetAddress(char** p, char* e, address_t* a);
-int GetRouteAddr(char** p, char* e, address_t* a);
-int GetRoute(char** p, char* e, char** route);
-int GetAddrSpec(char** p, char* e, address_t* a);
-int GetLocalPart(char** p, char* e, char** local_part);
-int GetDomain(char** p, char* e, char** domain);
-int GetSubDomain(char** p, char* e, char** sub_domain);
-int GetDomainRef(char** p, char* e, char** domain_ref);
-int GetDomainLiteral(char** p, char* e, char** domain_literal);
-// Some convenience functions for dealing with dynamically re-sized
-// strings.
-int StrAppendN(char** to, char* from, size_t n)
-{
-	size_t l = 0;
-	/* if not to, then silently discard data */
-	if(!to) {
-		return 1;
-	}
-	if(*to) {
+#define EOK	0
-		char* new;
+#define EPARSE	ENOENT
-		l = strlen(*to);
+/*
+ * Some convenience functions for dealing with dynamically re-sized
+ * strings.
+ */
-		new = (char*) realloc(*to, l + n + 1);
+static int str_append_n(char** to, const char* from, size_t n)
+{
+    size_t l = 0;
-		if(!new) {
+    /* if not to, then silently discard data */
-			return 0;
+    if(!to) {
-		}
+	return EOK;
+    }
-		*to = new;
-	} else {
+    if(*to) {
-		*to = (char*) malloc(n + 1);
+	char* bigger;
+	l = strlen(*to);
+	bigger = realloc(*to, l + n + 1);
+	if(!bigger) {
+	    return ENOMEM;
 	}
+	*to = bigger;
+    } else {
+	*to = malloc(n + 1);
+    }
-	strncpy(&to[0][l], from, n);
+    strncpy(&to[0][l], from, n);
-	/* strncpy is lame, nul terminate our buffer */
+    /* strncpy is lame, nul terminate our buffer */
-	to[0][l + n] = 0;
+    to[0][l + n] = 0;
-	return 1;
+    return EOK;
 }
+static int str_append(char** to, const char* from)
-int StrAppend(char** to, char* from)
 {
-	return StrAppendN(to, from, strlen(from));
+    return str_append_n(to, from, strlen(from));
 }
+static int str_append_char(char** to, char c)
-int StrAppendChar(char** to, char c)
 {
-	return StrAppendN(to, &c, 1);
+    return str_append_n(to, &c, 1);
 }
+static int str_append_range(char** to, const char* b, const char* e)
-int StrAppendRange(char** to, char* b, char* e)
 {
-	return StrAppendN(to, b, e - b);
+    return str_append_n(to, b, e - b);
 }
+static void str_free(char** s)
-void StrFree(char** s)
 {
-	if(s && *s) {
+    if(s && *s) {
-		free(*s);
+	free(*s);
-		*s = 0;
+	*s = 0;
-	}
+    }
 }
-//
+/*
-// MRfc822Tokenizer
+ * Character Classification - could be rewritten in a C library
-//
+ * independent way, my system's C library matches the RFC
+ * definitions. I don't know if that's guaranteed.
-//
+ *
-// Character Classification - could be rewritten in a C library
+ * Note that all return values are:
-// independent way, my system's C library matches the RFC
+ *   1 -> TRUE
-// definitions, but I don't know if that's guaranteed.
+ *   0 -> FALSE
-//
+ * This may be appear different than the 0 == success return
-int IsCHAR(char c)
+ * values of the other functions, but I was getting lost in
+ * boolean arithmetic.
+ */
+int parse822_is_char(char c)
 {
-	return isascii(c);
+    return isascii(c);
 }
-int IsDIGIT(char** p, char* e)
+int parse822_is_digit(char c)
 {
-	// DIGIT = <any ASCII decimal digit>
+    /* digit = <any ASCII decimal digit> */
-	if(*p == e) {
+    return isdigit(c);
-		return 0;
-	}
-	return isdigit(**p);
 }
-int IsCTL(char c)
+int parse822_is_ctl(char c)
 {
-	return iscntrl(c) || c == 127 /* DEL */;
+    return iscntrl(c) || c == 127 /* DEL */;
 }
-int IsSPACE(char c)
+int parse822_is_space(char c)
 {
-	return c == ' ';
+    return c == ' ';
 }
-int IsHTAB(char c)
+int parse822_is_htab(char c)
 {
-	return c == '\t';
+    return c == '\t';
 }
-int IsLWSPChar(char c)
+int parse822_is_lwsp_char(char c)
 {
-	return IsSPACE(c) || IsHTAB(c);
+    return parse822_is_space(c) || parse822_is_htab(c);
 }
-int IsSpecial(char c)
+int parse822_is_special(char c)
 {
-	return strchr("()<>@,;:\\\".[]", c) ? 1 : 0;
+    return strchr("()<>@,;:\\\".[]", c) ? 1 : 0;
 }
-int IsAtomChar(char c)
+int parse822_is_atom_char(char c)
 {
-	return IsCHAR(c) && !IsSpecial(c) && !IsSPACE(c) && !IsCTL(c);
+    return
+	parse822_is_char(c) &&
+	!parse822_is_special(c) &&
+	!parse822_is_space(c) &&
+	!parse822_is_ctl(c)
+	;
 }
-int IsQText(char c)
+int parse822_is_q_text(char c)
 {
-	return IsCHAR(c)
+    return
-		&& c != '"'
+	parse822_is_char(c)
-		&& c != '\\'
+	&& c != '"'
-		&& c != '\r';
+	&& c != '\\'
+	&& c != '\r'
+	;
 }
-int IsDText(char c)
+int parse822_is_d_text(char c)
 {
-	return IsCHAR(c)
+    return
-		&& c != '['
+	parse822_is_char(c)
-		&& c != ']'
+	&& c != '['
-		&& c != '\\'
+	&& c != ']'
-		&& c != '\r';
+	&& c != '\\'
+	&& c != '\r'
+	;
 }
-int IsSmtpQ(char c)
+/*
+ * SMTP's version of qtext, called <q> in the RFC 821 syntax,
+ * also excludes <LF>.
+ */
+int parse822_is_smtp_q(char c)
 {
-	return IsQText(c)
+    return parse822_is_q_text(c)
-		&& c != '\n';
+	&& c != '\n';
 }
-//
+/*
-// Lexical Analysis - these tokens are all from RFC822,
+ * Lexical Analysis - these tokens are all from RFC822,
-// section 3.3, Lexical Tokens, though not all tokens are
+ * section 3.3, Lexical Tokens, though not all tokens are
-// implemented.
+ * implemented. The names match those used int the extended
-//
+ * BNF of the RFC where possible.
+ */
-int SkipWs(char** p, char* e)
+int parse822_skip_ws(const char** p, const char* e)
 {
-	int ws = 0;
+    while((*p != e) && parse822_is_lwsp_char(**p)) {
+	*p += 1;
-	while((*p != e) && IsLWSPChar(**p)) {
+    }
-		++ws;
+    return EOK;
-		*p += 1;
-	}
-	return ws;
 }
-int SkipComments(char** p, char* e)
+int parse822_skip_comments(const char** p, const char* e)
 {
-	int comments;
+    int status;
-	while(GetComment(p, e, 0))
+    while((status = parse822_comment(p, e, 0)) == EOK)
-		comments++;
+	;
-	return comments++;
+    return EOK;
 }
-int GetDigits(char** p, char* e,
+int parse822_digits(const char** p, const char* e,
-		int min, int max, int* digits)
+	int min, int max, int* digits)
 {
-	char* save = *p;
+    const char* save = *p;
-	int i = 0;
+    int i = 0;
-	assert(digits);
+    assert(digits);
-	*digits = 0;
+    *digits = 0;
-	while(IsDIGIT(p, e)) {
+    while(*p < e && parse822_is_digit(**p)) {
-		*digits = *digits * 10 + **p - '0';
+	*digits = *digits * 10 + **p - '0';
-		*p += 1;
+	*p += 1;
-		++i;
+	++i;
-		if(max != 0 && i == max) {
+	if(max != 0 && i == max) {
-			break;
+	    break;
-		}
-	}
-	if(i < min) {
-		*p = save;
-		return 0;
 	}
+    }
+    if(i < min) {
+	*p = save;
+	return EPARSE;
+    }
-	return 1;
+    return EOK;
 }
-int GetSpecial(char** p, char* e, char c)
+int parse822_special(const char** p, const char* e, char c)
 {
-	SkipWs(p, e); // not comments, they start with a special...
+    parse822_skip_ws(p, e); /* not comments, they start with a special... */
-	if((*p != e) && **p == c) {
+    if((*p != e) && **p == c) {
-		*p += 1;
+	*p += 1;
-		return 1;
+	return EOK;
-	}
+    }
-	return 0;
+    return EPARSE;
 }
-int GetComment(char** p, char* e, char** comment)
+int parse822_comment(const char** p, const char* e, char** comment)
 {
-	// comment = "(" *(ctext / quoted-pair / comment) ")"
+    /* comment = "(" *(ctext / quoted-pair / comment) ")"
-	// ctext = <any CHAR except "(", ")", "\", & CR, including LWSP>
+     * ctext = <any char except "(", ")", "\", & CR, including lwsp>
+     */
-	if(!GetSpecial(p, e, '(')) {
+    const char* save = *p;
-		return 0;
+    int rc;
-	}
+    if((rc = parse822_special(p, e, '('))) {
-	while(*p != e) {
+	return rc;
-		char c = **p;
+    }
-		if(c == ')') {
+    while(*p != e) {
-			*p += 1;
+	char c = **p;
-			return 1; // found end-of-comment
-		} else if(c == '(') {
+	if(c == ')') {
-			GetComment(p, e, comment);
+	    *p += 1;
-		} else if(c == '\\') {
+	    return EOK; /* found end-of-comment */
-			GetQuotedPair(p, e, comment);
+	} else if(c == '(') {
-		} else if(c == '\r') {
+	    rc = parse822_comment(p, e, comment);
-			// invalid character...
+	} else if(c == '\\') {
-			*p += 1;
+	    rc = parse822_quoted_pair(p, e, comment);
-		} else if(IsCHAR(c)) {
+	} else if(c == '\r') {
-			StrAppendChar(comment, c);
+	    /* invalid character... */
-			*p += 1;
+	    *p += 1;
-		} else {
+	} else if(parse822_is_char(c)) {
-			// invalid character... should I append it?
+	    rc = str_append_char(comment, c);
-			*p += 1;
+	    *p += 1;
-		}
+	} else {
+	    /* invalid character... */
+	    *p += 1;
 	}
-	return 0; // end-of-comment not found
+	if(rc != EOK)
+	    break;
+    }
+    if(*p == e) {
+	rc = EPARSE; /* end-of-comment not found */
+    }
+    *p = save;
+    assert(rc != EOK);
+    return rc;
 }
-int GetAtom(char** p, char* e, char** atom)
+int parse822_atom(const char** p, const char* e, char** atom)
 {
-	// atom = 1*<an atom char>
+    /* atom = 1*<an atom char> */
-	int ok = 0;
+    const char* save = *p;
+    int rc = EPARSE;
-	SkipComments(p, e);
+    parse822_skip_comments(p, e);
-	while((*p != e) && IsAtomChar(**p))
+    save = *p;
-	{
-		++ok;
+    while((*p != e) && parse822_is_atom_char(**p))
-		StrAppendChar(atom, **p);
+    {
-		*p += 1;
+	rc = str_append_char(atom, **p);
+	*p += 1;
+	if(rc != EOK) {
+	    *p = save;
+	    break;
 	}
-	return ok;
+    }
+    return rc;
 }
-int GetQuotedPair(char** p, char* e, char** qpair)
+int parse822_quoted_pair(const char** p, const char* e, char** qpair)
 {
-	// quoted-pair = "\" CHAR
+    /* quoted-pair = "\" char */
-	/* need TWO characters to be available */
+    int rc;
-	if((e - *p) < 2)
-		return 0;
-	if(**p != '\\')
+    /* need TWO characters to be available */
-		return 0;
+    if((e - *p) < 2)
+	return EPARSE;
-	*p += 1;
+    if(**p != '\\')
-	if(*p == e)
+	return EPARSE;
-		return 0;
-	StrAppendChar(qpair, **p);
+    if((rc = str_append_char(qpair, *(*p + 1))))
+	return rc;
-	*p += 1;
+    *p += 2;
+    return EOK;
+}
-	return 1;
+int parse822_quoted_string(const char** p, const char* e, char** qstr)
-}
+{
-int GetQuotedString(char** p, char* e, char** qstr)
+    /* quoted-string = <"> *(qtext/quoted-pair) <">
-{
+     * qtext = char except <">, "\", & CR, including lwsp-char
-	// quoted-string = <"> *(qtext/quoted-pair) <">
+     */
-	// qtext = CHAR except <">, "\", & CR, including LWSP-char
+    const char* save = *p;
-	SkipComments(p, e);
+    int rc;
-	if(!GetSpecial(p, e, '"'))
+    parse822_skip_comments(p, e);
-		return 0;
+    save = *p;
-	while(*p != e)
-	{
+    if((rc = parse822_special(p, e, '"')))
-		char c = **p;
+	return rc;
-		if(c == '"') {
+    while(*p != e)
-			*p += 1;
+    {
-			return 1; // found end-of-qstr
+	char c = **p;
-		} else if(c == '\\') {
-			GetQuotedPair(p, e, qstr);
+	if(c == '"') {
-		} else if(c == '\r') {
+	    *p += 1;
-			// invalid character...
+	    return EOK; /* found end-of-qstr */
-			*p += 1;
+	} else if(c == '\\') {
-		} else if(IsCHAR(c)) {
+	    rc = parse822_quoted_pair(p, e, qstr);
-			StrAppendChar(qstr, c);
+	} else if(c == '\r') {
-			*p += 1;
+	    /* invalid character... */
-		} else {
+	    *p += 1;
-			// invalid character...
+	} else if(parse822_is_char(c)) {
-			*p += 1;
+	    rc = str_append_char(qstr, c);
-		}
+	    *p += 1;
+	} else {
+	    /* invalid character... */
+	    *p += 1;
+	}
+	if(rc) {
+	    *p = save;
+	    return rc;
 	}
-	return 0; // end-of-qstr not found
+    }
+    *p = save;
+    return EPARSE; /* end-of-qstr not found */
 }
-int GetWord(char** p, char* e, char** word)
+int parse822_word(const char** p, const char* e, char** word)
 {
-	// word = atom / quoted-string
+    /* word = atom / quoted-string */
+    const char* save = *p;
+    int rc = EOK;
-	char* save = *p;
+    parse822_skip_comments(p, e);
-	SkipComments(p, e);
+    save = *p;
-	{
+    {
-		char* qstr = 0;
+	char* qstr = 0;
-		if(GetQuotedString(p, e, &qstr)) {
+	if((rc = parse822_quoted_string(p, e, &qstr)) == EOK) {
-			StrAppend(word, qstr);
+	    rc = str_append(word, qstr);
-			StrFree(&qstr);
+	    str_free(&qstr);
-			return 1;
+	    if(rc != EOK)
-		}
+		*p = save;
+	    return rc;
 	}
+    }
-	*p = save;
+    if(rc != EPARSE) {
-		// Necessary because the quoted string could have found
+	/* it's fatal */
-		// a partial string (invalid syntax). Thus reset, the atom
+	return rc;
-		// will fail to if the syntax is invalid.
+    }
-	{
+    /* Necessary because the quoted string could have found
-		char* atom = 0;
+     * a partial string (invalid syntax). Thus reset, the atom
-		if(GetAtom(p, e, &atom)) {
+     * will fail to if the syntax is invalid.
-			StrAppend(word, atom);
+     */
-			StrFree(&atom);
+    {
+	char* atom = 0;
+	if(parse822_atom(p, e, &atom) == EOK) {
+	    rc = str_append(word, atom);
+	    str_free(&atom);
+	    if(rc != EOK)
+		*p = save;
-			return 1;
+	    return rc;
-		}
 	}
-	*p = save;
+    }
-	return 0;
+    return EPARSE;
 }
-int GetPhrase(char** p, char* e, char** phrase)
+int parse822_phrase(const char** p, const char* e, char** phrase)
 {
-	// phrase = 1*word
+    /* phrase = 1*word */
-	if(!GetWord(p, e, phrase)) {
+    const char* save = *p;
-		return 0;
+    int rc;
-	}
-	// ok, got the 1 word, now append all the others we can
-	{
-		char* word = 0;
-		while(GetWord(p, e, &word)) {
-			StrAppendChar(phrase, ' ');
-			StrAppend(phrase, word);
-			*word = 0;
-		}
-	}
-	return 1;
-}
-// this is all a bit of a hack....
+    if((rc = parse822_word(p, e, phrase)))
-typedef struct _address mailbox_t;
+	return rc;
-mailbox_t* new_mb(void) {
+    /* ok, got the 1 word, now append all the others we can */
-	return (mailbox_t*) calloc(1, sizeof(mailbox_t));
+    {
-}
+	char* word = 0;
-mailbox_t* fill_mb(char* comments, char* personal, char* local, char* domain)
+	while((rc = parse822_word(p, e, &word)) == EOK) {
-{
+	    rc = str_append_char(phrase, ' ');
-	mailbox_t* mb = new_mb();
-	if(!mb) {
+	    if(rc == EOK)
-		return 0;
+		rc = str_append(phrase, word);
-	}
-	mb->comments = comments;
+	    str_free(&word);
-	mb->personal = personal;
-	/* this is wrong, local must be quoted */
+	    if(rc != EOK)
-	StrAppend(&mb->email, local);
+		break;
-	StrAppend(&mb->email, "@");
+	}
-	StrAppend(&mb->email, domain);
+	if(rc == EPARSE)
+	    rc = EOK; /* its not an error to find no more words */
+    }
+    if(rc)
+	*p = save;
-	mb->local_part = local;
+    return rc;
-	mb->domain = domain;
+}
-	return mb;
+static address_t new_mb(void) {
+    return calloc(1, sizeof(struct _address));
 }
-int address_create0 (address_t* a, const char* s)
+static int fill_mb(
+	address_t* a,
+	char* comments, char* personal, char* local, char* domain)
 {
-	// a must exist, and can't already have been initialized
+    int rc = EOK;
-	int status = 0;
+    *a = new_mb();
+    if(!*a) {
+	return ENOMEM;
+    }
+    (*a)->comments = comments;
+    (*a)->personal = personal;
+    /* this is wrong, local must be quoted */
+    do {
+	/* loop exists only to break out of */
+	if((rc = str_append(&(*a)->email, local)))
+	    break;
+	if((rc = str_append(&(*a)->email, "@")))
+	    break;
+	if((rc = str_append(&(*a)->email, domain)))
+	    break;
+    } while(0);
+    (*a)->local_part = local;
+    (*a)->domain = domain;
+    if(rc != EOK) {
+	/* note that the arguments have NOT been freed, we only own
+	 * them on success. */
+	free(*a);
+    }
+    return rc;
+}
-	if(!a || *a) {
+int address_create0 (address_t* a, const char* s)
-		return EINVAL;
+{
+    /* 'a' must exist, and can't already have been initialized
+     */
+    int status = 0;
+    if(!a || *a) {
+	return EINVAL;
+    }
+    status = parse822_address_list(a, (char*) s);
+    if(status == EOK) {
+	if(!*a) {
+	    /* there was a group that got parsed correctly, but had
+	     * no addresses...
+	     */
+	    return EPARSE;
 	}
+	(*a)->addr = strdup(s);
-	status = GetAddressList(a, (char*) s);
+	if(!(*a)->addr) {
-	if(status > 0) {
+	    address_destroy(a);
-		(*a)->addr = strdup(s);
+	    return ENOMEM;
-		if(!(*a)->addr) {
-			address_destroy(a);
-			return ENOMEM;
-		}
 	}
+    }
-	return 0;
+    return status;
 }
+int parse822_address_list(address_t* a, const char* s)
-int GetAddressList(mailbox_t** a, char* s)
 {
-	// address-list = #(address)
+    /* address-list = #(address) */
-	char** p = &s;
+    const char** p = &s;
-	char*  e = &s[strlen(s)];
+    const char*  e = &s[strlen(s)];
-		/* need to make the parsing api const-correct */
+    int rc = EOK;
-	int ok = 0;
+    address_t* n = a; /* the next address we'll be creating */
-	mailbox_t** an = a; /* the next address we'll be creating */
-	if(!GetAddress(p, e, an))
+    if((rc = parse822_address(p, e, n)))
-		return 0;
+	return rc;
+    parse822_skip_comments(p, e);
+    while(*p < e)
+    {
 	/* An address can contain a group, so an entire
 	 * list of addresses may have been appended, or no
 	 * addresses at all. Walk to the end. 
 	 */
-	while(*an) {
+	while(*n) {
-		++ok;
+	    n = &(*n)->next;
-		an = &(*an)->next;
+	}
+	/* Remember that ',,a@b' is a valid list! So, we must find
+	 * the <,>, but the address after it can be empty.
+	 */
+	if((rc = parse822_special(p, e, ','))) {
+	     break;
 	}
+	parse822_skip_comments(p, e);
+	rc = parse822_address(p, e, n);
-	SkipComments(p, e);
+	if(rc == EOK || rc == EPARSE) {
+	    /* that's cool, it may be a <,>, we'll find out if it isn't
-	while(GetSpecial(p, e, ','))
+	     * at the top of the loop
-	{
+	     */
-		// Remember that 'a,,b' is a valid list!
+	    rc = EOK;
-		if(GetAddress(p, e, an)) {
+	} else {
-			while(*an) {
+	    /* anything else is a fatal error, break out */
-				++ok;
+	    break;
-				an = &(*an)->next;
-			}
-		}
 	}
-	// A little problem here in that we return the number of
+	parse822_skip_comments(p, e);
-	// addresses found, but if there was trailing garbage
+    }
-	// in the text, then we'll just be ignoring that.
-	return ok;
+    if(rc) {
-}
+	address_destroy(a);
-int GetAddress(char**p, char* e, mailbox_t** a)
+    }
-{
-	// address = mailbox / group
-	return
+    return rc;
-		GetMailBox(p, e, a) ||
-		GetGroup(p, e, a);
 }
-int GetGroup(char**p, char* e, mailbox_t** a)
+int parse822_address(const char** p, const char* e, address_t* a)
 {
-	// group = phrase ":" [#mailbox] ";"
+    /* address = mailbox / group */
-	char* save = *p;
+    int rc;
+    if((rc = parse822_mail_box(p, e, a)) == EPARSE)
+    	rc = parse822_group(p, e, a);
-	SkipComments(p, e);
+    return rc;
+}
-	if(!GetPhrase(p, e, 0)) {
+int parse822_group(const char** p, const char* e, address_t* a)
-		return 0;
+{
-	}
+    /* group = phrase ":" [#mailbox] ";" */
-	SkipComments(p, e);
+    const char* save = *p;
+    address_t* asave = a; /* so we can destroy these if parsing fails */
+    int rc;
-	if(!GetSpecial(p, e, ':')) {
+    parse822_skip_comments(p, e);
-		*p = save;
-		return 0;
-	}
-	SkipComments(p, e);
+    *p = save;
-	if(GetMailBox(p, e, a)) {
+    if((rc = parse822_phrase(p, e, 0))) {
-		a = &(*a)->next;
+	return rc;
+    }
-		/* see if there are more */
+    parse822_skip_comments(p, e);
-		SkipComments(p, e);
-		while(GetSpecial(p, e, ',')) {
-			SkipComments(p, e);
-			/* Rembmeber that a,,b is a valid list! */
+    if((rc = parse822_special(p, e, ':'))) {
-			if(GetMailBox(p, e, a)) {
+	*p = save;
-				a = &(*a)->next;
+	return rc;
-			}
+    }
-		}
+    /* Basically, on each loop, we may find a mailbox, but we must find
+     * a comma after the mailbox, otherwise we've popped off the end
+     * of the list.
+     */
+    for(;;) {
+	parse822_skip_comments(p, e);
+	/* it's ok not be a mailbox, but other errors are fatal */
+	rc = parse822_mail_box(p, e, a);
+	if(rc == EOK) {
+	    a = &(*a)->next;
+	    parse822_skip_comments(p, e);
+	} else if(rc != EPARSE) {
+	    break;
 	}
-	if(!GetSpecial(p, e, ';')) {
+	if((rc = parse822_special(p, e, ','))) {
-		*p = save;
+	    /* the commas aren't optional */
-		return 0;
+	    break;
 	}
+    }
+    if(rc == EPARSE) {
+	rc = EOK; /* ok, as long as we find the ";" next */
+    }
-	return 1;
+    if(rc || (rc = parse822_special(p, e, ';'))) {
-}
+	*p = save;
-int GetMailBox(char** p, char* e, mailbox_t** a)
-{
-	// mailbox = addr-spec "(" comment ")" / [phrase] route-addr
-	//
-	//  Note: we parse the ancient comment on the right since
-	//    it's such "common practice". :-(
-	//  Note: phrase is called display-name in drums.
-	//  Note: phrase is optional in drums, though not in RFC 822.
-	// -> addr-spec
-	if(GetAddrSpec(p, e, a)) {
-		char* comment = 0;
-		SkipWs(p, e);
+	address_destroy(asave);
+    }
-		if(GetComment(p, e, &comment)) {
+    return rc;
-			// yuck.
+}
-			(*a)->personal = comment;
-		}
-		return 1;
+int parse822_mail_box(const char** p, const char* e, address_t* a)
+{
+    /* mailbox = addr-spec [ "(" comment ")" ] / [phrase] route-addr
+     *
+     *	Note: we parse the ancient comment on the right since
+     *	  it's such "common practice". :-(
+     *	Note: phrase is called display-name in drums.
+     *	Note: phrase is optional in drums, though not in RFC 822.
+     */
+    const char* save = *p;
+    int rc;
+    /* -> addr-spec */
+    if((rc = parse822_addr_spec(p, e, a)) == EOK) {
+	/*int rc = EOK; */
+	parse822_skip_ws(p, e);
+	/* yuck. */
+	if((rc = parse822_comment(p, e, &(*a)->personal)) == EPARSE) {
+	    rc = EOK;
+	    /* cool if there's no comment, */
+	}
+	/* but if something else is wrong, destroy the address */
+	if(rc) {
+	    address_destroy(a);
+	    *p = save;
 	}
-	// -> phrase route-addr
+	return rc;
-	{
+    }
-		char* save = *p;
+    if(rc != EPARSE) {
-		char* phrase = 0;
+	*p = save;
+	return rc;
+    }
+    /* -> phrase route-addr */
+    {
+	char* phrase = 0;
+	/*int rc; */
-		GetPhrase(p, e, &phrase);
+	rc = parse822_phrase(p, e, &phrase);
-		if(!GetRouteAddr(p, e, a)) {
+	if(rc != EPARSE && rc != EOK) {
-			*p = save;
+	    return rc;
-			return 0;
+	}
-		}
-		/* add the phrase */
+	if((rc = parse822_route_addr(p, e, a))) {
-		(*a)->personal = phrase;
+	    *p = save;
+	    str_free(&phrase);
+	    return rc;
 	}
-	return 1;
+	/* add the phrase */
+	(*a)->personal = phrase;
+    }
+    return EOK;
 }
-int GetRouteAddr(char** p, char* e, mailbox_t ** a)
+int parse822_route_addr(const char** p, const char* e, address_t* a)
 {
-	// route-addr = "<" [route] addr-spec ">"
+    /* route-addr = "<" [route] addr-spec ">" */
-	char* save = *p;
+    const char* save = *p;
-	char* route = 0;
+    char* route = 0;
+    int rc;
-	SkipComments(p, e);
+    parse822_skip_comments(p, e);
+    if((rc = parse822_special(p, e, '<'))) {
+	*p = save;
-	if(!GetSpecial(p, e, '<')) {
+	return rc;
-		*p = save;
+    }
-		return 0;
+    parse822_route(p, e, &route);
-	}
-	GetRoute(p, e, &route);
+    if((rc = parse822_addr_spec(p, e, a))) {
+	*p = save;
-	if(!GetAddrSpec(p, e, a)) {
+	str_free(&route);
-		*p = save;
-		StrFree(&route);
+	return rc;
+    }
-		return 0;
+    (*a)->route = route; /* now we don't have to free our local */
-	}
-	(*a)->route = route; /* now we don't have to free our local */
+    parse822_skip_comments(p, e);
-	SkipComments(p, e);
+    if((rc = parse822_special(p, e, '>'))) {
+	*p = save;
-	if(!GetSpecial(p, e, '>')) {
-		*p = save;
-		address_destroy(a);
+	address_destroy(a);
-		return 0;
+	return rc;
-	}
+    }
-	return 1;
+    return EOK;
 }
-int GetRoute(char** p, char* e, char** route)
+int parse822_route(const char** p, const char* e, char** route)
 {
-	// route = 1#("@" domain ) ":"
+    /* route = 1#("@" domain ) ":" */
-	// 
-	// Note: I don't hav a way of returning the route, so toss it for now.
-	char* accumulator = 0;
+    const char* save = *p;
+    char* accumulator = 0;
+    int rc = EOK;
-	for(;;) {
+    for(;;) {
-		SkipComments(p, e);
+	parse822_skip_comments(p, e);
-		if(!GetSpecial(p, e, '@')) {
+	if((rc = parse822_special(p, e, '@'))) {
-			// it's not a route
+	    break;
-			return 0;
+	}
-		}
-		StrAppend(&accumulator, "@");
+	if((rc = str_append(&accumulator, "@"))) {
+	    break;
+	}
-		SkipComments(p, e);
+	parse822_skip_comments(p, e);
-		if(!GetDomain(p, e, &accumulator)) {
+	if((rc = parse822_domain(p, e, &accumulator))) {
-			// it looked like a route, but there's no domain!
+	    /* it looked like a route, but there's no domain! */
-			return 0;
+	    break;
-		}
+	}
-		SkipComments(p, e);
+	parse822_skip_comments(p, e);
-		if(!GetSpecial(p, e, ',')) {
+	if((rc = parse822_special(p, e, ',')) == EPARSE) {
-			// there's no more routes
+	    /* no more routes, but we got one so its ok */
-			break;
+	    rc = EOK;
-		}
+	    break;
-		StrAppend(&accumulator, ", ");
+	}
+	if((rc = str_append(&accumulator, ", "))) {
+	    break;
 	}
+    }
-	SkipComments(p, e);
+    parse822_skip_comments(p, e);
-	if(!GetSpecial(p, e, ':')) {
+    if(!rc) {
-		return 0;
+	rc = parse822_special(p, e, ':');
-	}
+    }
-	StrAppend(route, accumulator);
+    if(!rc) {
-	StrFree(&accumulator);
+	rc = str_append(route, accumulator);
+    }
+    if(rc) {
+	str_free(&accumulator);
+	*p = save;
+    }
-	return 1;
+    return rc;
 }
-int GetAddrSpec(char** p, char* e, mailbox_t ** a)
+int parse822_addr_spec(const char** p, const char* e, address_t* a)
 {
-	// addr-spec = local-part "@" domain
+    /* addr-spec = local-part "@" domain */
-	char* save = *p;
+    const char* save = *p;
-	char* local_part = 0;
+    char* local_part = 0;
-	char* domain = 0;
+    char* domain = 0;
+    int rc;
-	if(!GetLocalPart(p, e, &local_part))
+    rc = parse822_local_part(p, e, &local_part);
-		return 0;
-	SkipComments(p, e);
+    parse822_skip_comments(p, e);
-	if(!GetSpecial(p, e, '@')) {
+    if(!rc) {
-		*p = save;
+	rc = parse822_special(p, e, '@');
-		return 0;
+    }
-	}
-	if(!GetDomain(p, e, &domain)) {
+    if(!rc) {
-		*p = save;
+	rc = parse822_domain(p, e, &domain);
-		return 0;
+    }
-	}
-	*a = fill_mb(0, 0, local_part, domain);
+    if(!rc) {
+	rc = fill_mb(a, 0, 0, local_part, domain);
+    }
-	return 1;
+    if(rc) {
+	*p = save;
+	str_free(&local_part);
+	str_free(&domain);
+    }
+    return rc;
 }
-int GetLocalPart(char** p, char* e, char** local_part)
-{
-	// local-part = word *("." word)
-	// Note: rewrite as ->  word ["." local-part]
-	char* save = *p;
-	SkipComments(p, e);
+int parse822_local_part(const char** p, const char* e, char** local_part)
+{
-	if(!GetWord(p, e, local_part)) {
+    /* local-part = word *("." word)
-		*p = save;
+     *
-		return 0;
+     * Note: rewrite as ->  word ["." local-part]
-	}
+     */
-	// we've got a local-part, but keep looking for more
-	save = *p;
+    const char* save = *p;
+    const char* save2 = *p;
+    int rc;
-	SkipComments(p, e);
+    parse822_skip_comments(p, e);
-	if(!GetSpecial(p, e, '.')) {
+    if((rc = parse822_word(p, e, local_part))) {
-		*p = save;
+	*p = save;
-		return 1;
+	return rc;
-	}
+    }
-	{
+    /* We've got a local-part, but keep looking for more. */
-		char* more = 0;
+    parse822_skip_comments(p, e);
-		if(!GetLocalPart(p, e, &more)) {
-			*p = save;
+    /* If we get a parse error, we roll back to save2, but if
-			return 1;
+     * something else failed, we have to roll back to save.
-		}
+     */
-		// append more
+    save2 = *p;
-		StrAppend(local_part, ".");
-		StrAppend(local_part, more);
+    rc = parse822_special(p, e, '.');
-		StrFree(&more);
+    if(!rc) {
+	char* more = 0;
+	if((rc = parse822_local_part(p, e, &more)) == EOK) {
+	    /* append more */
+	    if((rc = str_append(local_part, ".")) == EOK) {
+		rc = str_append(local_part, more);
+	    }
+	    str_free(&more);
 	}
+    }
-	return 1;
+    if(rc == EPARSE) {
+	/* if we didn't get more ("." word) pairs, that's ok */
+	*p = save2;
+	rc = EOK;
+    }
+    if(rc) {
+	/* if anything else failed, that's real */
+	*p = save;
+	str_free(local_part);
+    }
+    return rc;
 }
-int GetDomain(char** p, char* e, char** domain)
+int parse822_domain(const char** p, const char* e, char** domain)
 {
-	// domain = sub-domain *("." sub-domain)
+    /* domain = sub-domain *("." sub-domain)
+     *
+     * Note: rewrite as -> sub-domain ("." domain)
+     */
-	// Note: rewrite as -> sub-domain ("." domain)
+    const char* save = *p;
+    const char* save2 = 0;
+    int rc;
-	char* save = 0;
+    parse822_skip_comments(p, e);
-	if(!GetSubDomain(p, e, domain))
+    if((rc = parse822_sub_domain(p, e, domain))) {
-		return 0;
+	*p = save;
+	return rc;
+    }
-	// we've got the 1, keep looking for more
-	save = *p;
+    /* We save before skipping comments to preserve the comment
+     * at the end of a domain, the addr-spec may want to abuse it
+     * for a personal name.
+     */
+    save2 = *p;
-	SkipComments(p, e);
+    /* we've got the 1, keep looking for more */
-	if(!GetSpecial(p, e, '.')) {
+    parse822_skip_comments(p, e);
-		// we do this to preserve the comment at the end of a
-		// domain, the addr-spec may want to abuse it for a
-		// personal name.
-		*p = save;
-		return 1;
-	}
-	{
-		char* more = 0;
-		if(!GetDomain(p, e, &more)) {
-			*p = save;
-			return 1;
-		}
-		StrAppend(domain, ".");
-		StrAppend(domain, more);
-		StrFree(&more);
-	}
-	return 1;
+    rc = parse822_special(p, e, '.');
-}
-int GetSubDomain(char** p, char* e, char** sub_domain)
-{
-	// sub-domain = domain-ref / domain-literal
-	//   Note: domain-literal isn't supported yet.
-	return
+    if(!rc) {
-		GetDomainRef(p, e, sub_domain) ||
+	char* more = 0;
-		GetDomainLiteral(p, e, sub_domain);
+	if((rc = parse822_domain(p, e, &more)) == EOK) {
+	    if((rc = str_append(domain, ".")) == EOK) {
+		rc = str_append(domain, more);
+	    }
+	    str_free(&more);
+	}
+    }
+    if(rc == EPARSE) {
+	/* we didn't parse more ("." sub-domain) pairs, that's ok */
+	*p = save2;
+	rc = EOK;
+    }
+    if(rc) {
+	/* something else failed, roll it all back */
+	*p = save;
+	str_free(domain);
+    }
+    return rc;
 }
-int GetDomainRef(char** p, char* e, char** domain_ref)
-{
-	// domain-ref = atom
-	return GetAtom(p, e, domain_ref);
+int parse822_sub_domain(const char** p, const char* e, char** sub_domain)
-}
-int GetDText(char** p, char* e, char** dtext)
 {
-	// dtext = 1*dtext
+    /* sub-domain = domain-ref / domain-literal
+     */
-	// Note: dtext is only defined as a character class in
-	//  RFC822, but this definition is more useful for
-	//  slurping domain literals.
-	char* start = *p;
+    int rc;
+    if((rc = parse822_domain_ref(p, e, sub_domain)) == EPARSE)
+    	rc = parse822_domain_literal(p, e, sub_domain);
-	while(*p < e && IsDText(**p)) {
+    return rc;
-		*p += 1;
+}
-	}
-	if(start == *p) {
-		return 0;
-	}
-	StrAppendRange(dtext, start, *p);
+int parse822_domain_ref(const char** p, const char* e, char** domain_ref)
+{
+    /* domain-ref = atom */
-	return 1;
+    return parse822_atom(p, e, domain_ref);
 }
-int GetDomainLiteral(char** p, char* e, char** domain_literal)
+int parse822_d_text(const char** p, const char* e, char** dtext)
 {
-	// domain-literal = "[" *(dtext / quoted-pair) "]"
+    /* d-text = 1*dtext
+     *
+     * Note: dtext is only defined as a character class in
+     *	RFC822, but this definition is more useful for
+     *	slurping domain literals.
+     */
-	char* save = *p;
+    const char* start = *p;
+    int rc = EOK;
-	char* literal = 0;
+    while(*p < e && parse822_is_d_text(**p)) {
+	*p += 1;
+    }
-	if(!GetSpecial(p, e, '[')) {
+    if(start == *p) {
-		return 0;
+	return EPARSE;
-	}
+    }
-	StrAppendChar(&literal, '[');
-	while(GetDText(p, e, &literal) || GetQuotedPair(p, e, &literal)) {
+    if((rc = str_append_range(dtext, start, *p))) {
-		/* Eat all of this we can get! */
+	*p = start;
-	}
+    }
-	if(!GetSpecial(p, e, ']')) {
-		*p = save;
-		return 0;
+    return rc;
-	}
+}
-	StrAppendChar(&literal, ']');
-	StrAppend(domain_literal, literal);
+int parse822_domain_literal(const char** p, const char* e, char** domain_literal)
+{
+    /* domain-literal = "[" *(dtext / quoted-pair) "]" */
-	StrFree(&literal);
+    const char* save = *p;
+    char* literal = 0;
+    int rc;
-	return 1;
+    if((rc = parse822_special(p, e, '['))) {
+	return rc;
+    }
+    if((rc = str_append_char(&literal, '['))) {
+	*p = save;
+	return rc;
+    }
+    while(
+	    (rc = parse822_d_text(p, e, &literal)) == EOK ||
+	    (rc = parse822_quoted_pair(p, e, &literal)) == EOK
+	) {
+	/* Eat all of this we can get! */
+    }
+    if(rc == EPARSE) {
+	rc = EOK;
+    }
+    if(!rc) {
+	rc = parse822_special(p, e, ']');
+    }
+    if(!rc) {
+	rc = str_append_char(&literal, ']');
+    }
+    if(!rc) {
+	rc = str_append(domain_literal, literal);
+    }
+    str_free(&literal);
+    if(rc) {
+	*p = save;
+    }
+    return rc;
 }
 #if 0
-int GetFieldName(char** p, char* e, char** fieldname)
+int parse822_field_name(const char** p, const char* e, char** fieldname)
 {
-	// field-name = 1*<any CHAR, excluding CTLS, SPACE, and ":"> ":"
+    /* field-name = 1*<any char, excluding ctlS, space, and ":"> ":" */
-	Ptr save = p;
+    Ptr save = p;
-	Rope fn;
+    Rope fn;
-	while(*p != e) {
+    while(*p != e) {
-		char c = *p;
+	char c = *p;
-		if(!IsCHAR(c))
+	if(!parse822_is_char(c))
-			break;
+	    break;
-		if(IsCTL(c))
+	if(parse822_is_ctl(c))
-			break;
+	    break;
-		if(IsSPACE(c))
+	if(parse822_is_space(c))
-			break;
+	    break;
-		if(c == ':')
+	if(c == ':')
-			break;
+	    break;
-		fn.append(c);
+	fn.append(c);
-		*p += 1;
+	*p += 1;
-	}
+    }
-	// must be at least one char in the field name
+    /* must be at least one char in the field name */
-	if(fn.empty()) {
+    if(fn.empty()) {
-		p = save;
+	p = save;
-		return 0;
+	return 0;
-	}
+    }
-	SkipComments(p, e);
+    parse822_skip_comments(p, e);
-	if(!GetSpecial(p, e, ':')) {
+    if(!parse822_special(p, e, ':')) {
-		p = save;
+	p = save;
-		return 0;
+	return 0;
-	}
+    }
-	fieldname = fn;
+    fieldname = fn;
-	return 1;
+    return 1;
 }
-int GetFieldBody(char** p, char* e, Rope& fieldbody)
+int parse822_field_body(const char** p, const char* e, Rope& fieldbody)
 {
-	// field-body = *text [CRLF LWSP-char field-body]
+    /* field-body = *text [CRLF lwsp-char field-body] */
-	Ptr save = p;
+    Ptr save = p;
-	Rope fb;
+    Rope fb;
-	for(;;)
+    for(;;)
-	{
+    {
-		Ptr eol = p;
+	Ptr eol = p;
-		while(eol != e) {
+	while(eol != e) {
-			char c = *eol;
+	    char c = *eol;
-			if(eol[0] == '\r' && (eol+1) != e && eol[1] == '\n')
+	    if(eol[0] == '\r' && (eol+1) != e && eol[1] == '\n')
-				break;
+		break;
-			++eol;
+	    ++eol;
-		}
+	}
-		fb.append(p, eol);
+	fb.append(p, eol);
-		p = eol;
+	p = eol;
-		if(eol == e)
+	if(eol == e)
-			break; // no more, so we're done
+	    break; /* no more, so we're done */
-		assert(p[0] == '\r');
+	assert(p[0] == '\r');
-		assert(p[1] == '\n');
+	assert(p[1] == '\n');
-		p += 2;
+	p += 2;
-		if(*p == e)
+	if(*p == e)
-			break; // no more, so we're done
+	    break; /* no more, so we're done */
-		// check if next line is a continuation line
+	/* check if next line is a continuation line */
-		if(*p != ' ' && *p != '\t')
+	if(*p != ' ' && *p != '\t')
-			break;
+	    break;
-	}
+    }
-	fieldbody = fb;
+    fieldbody = fb;
-	return 1;
+    return 1;
 }
 #endif