New parser from Sam.
Showing
2 changed files
with
879 additions
and
646 deletions
... | @@ -38,7 +38,7 @@ extern "C" { | ... | @@ -38,7 +38,7 @@ extern "C" { |
38 | 38 | ||
39 | /* | 39 | /* |
40 | * The data-structure representing an RFC822 MAILBOX. It may be | 40 | * The data-structure representing an RFC822 MAILBOX. It may be |
41 | * one MAILBOX in a list of them, as found in an ADDRESS list or | 41 | * one MAILBOX or a list of them, as found in an ADDRESS or |
42 | * a MAILBOX list (as found in a GROUP). | 42 | * a MAILBOX list (as found in a GROUP). |
43 | * | 43 | * |
44 | * Capitalized names are from RFC 822, section 6.1 (Address Syntax). | 44 | * Capitalized names are from RFC 822, section 6.1 (Address Syntax). |
... | @@ -62,7 +62,8 @@ struct _address | ... | @@ -62,7 +62,8 @@ struct _address |
62 | char *route; | 62 | char *route; |
63 | /* the optional ROUTE in the ROUTE-ADDR form of MAILBOX */ | 63 | /* the optional ROUTE in the ROUTE-ADDR form of MAILBOX */ |
64 | 64 | ||
65 | // size_t num; -- didn't appear to be used anywhere... | 65 | /* size_t num; this didn't appear to be used anywhere... so I commented |
66 | it out, is that ok? -sam */ | ||
66 | 67 | ||
67 | struct _address *next; | 68 | struct _address *next; |
68 | }; | 69 | }; | ... | ... |
... | @@ -15,6 +15,41 @@ | ... | @@ -15,6 +15,41 @@ |
15 | along with this program; if not, write to the Free Software | 15 | along with this program; if not, write to the Free Software |
16 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ | 16 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ |
17 | 17 | ||
18 | /* vi:sw=4:ts=8 */ | ||
19 | /* | ||
20 | Things to (maybe) do: | ||
21 | |||
22 | - groups used to return the number of addresses, now it returns | ||
23 | success... but doesn't create an _address for 'foo:;'. Should | ||
24 | it create one with just a personal? | ||
25 | |||
26 | x - C comments only. | ||
27 | x - no C++ reserved words. | ||
28 | x - fix is_digit() to be like the other is functions | ||
29 | |||
30 | - what should return codes be, possible errors are: | ||
31 | . no mem (ENOMEM) | ||
32 | . function wasn't called correctly, usually a missing argument (EINVAL) | ||
33 | . invalid syntax found during parsing (ENOENT) | ||
34 | |||
35 | All functions should return ==0 on success, and errno on failure. | ||
36 | |||
37 | x - const-correct the APIs | ||
38 | x - "new = (char*) realloc()", cast not needed | ||
39 | x - mailbox_t* nuked in favor of address_t | ||
40 | x - fix handful of memory leaks detected by Alain | ||
41 | |||
42 | - test for memory leaks, so I don't have to rely on Alains sharp eyes | ||
43 | - fix the realloc, try a struct _string { char* b, size_t sz }; | ||
44 | x - where does parse822.h go? | ||
45 | - parse field names and bodies | ||
46 | - parse dates (pull from Mail++) | ||
47 | - parse Received: field | ||
48 | x - check RFC again, can groups be nested? No! | ||
49 | - should we do best effort parsing, so parsing "sam@locahost, foo@" | ||
50 | gets one address, or just say it is or it isn't in RFC format? | ||
51 | */ | ||
52 | |||
18 | #ifdef HAVE_CONFIG_H | 53 | #ifdef HAVE_CONFIG_H |
19 | # include <config.h> | 54 | # include <config.h> |
20 | #endif | 55 | #endif |
... | @@ -27,904 +62,1101 @@ | ... | @@ -27,904 +62,1101 @@ |
27 | 62 | ||
28 | #include "address0.h" | 63 | #include "address0.h" |
29 | 64 | ||
30 | int GetDigits(char** p, char* e, int min, int max, int* digits); | 65 | #include <mailutils/parse822.h> |
31 | int GetSpecial(char** p, char* e, char c); | 66 | |
32 | int GetComment(char** p, char* e, char** comment); | 67 | #ifdef EOK |
33 | int GetAtom(char** p, char* e, char** atom); | 68 | # undef EOK |
34 | int GetQuotedPair(char** p, char* e, char** qpair); | 69 | #endif |
35 | int GetQuotedString(char** p, char* e, char** qstr); | ||
36 | int GetWord(char** p, char* e, char** word); | ||
37 | int GetPhrase(char** p, char* e, char** phrase); | ||
38 | |||
39 | int GetAddressList(address_t* a, char* s); | ||
40 | int GetMailBox(char** p, char* e, address_t* a); | ||
41 | int GetGroup(char** p, char* e, address_t* a); | ||
42 | int GetAddress(char** p, char* e, address_t* a); | ||
43 | int GetRouteAddr(char** p, char* e, address_t* a); | ||
44 | int GetRoute(char** p, char* e, char** route); | ||
45 | int GetAddrSpec(char** p, char* e, address_t* a); | ||
46 | int GetLocalPart(char** p, char* e, char** local_part); | ||
47 | int GetDomain(char** p, char* e, char** domain); | ||
48 | int GetSubDomain(char** p, char* e, char** sub_domain); | ||
49 | int GetDomainRef(char** p, char* e, char** domain_ref); | ||
50 | int GetDomainLiteral(char** p, char* e, char** domain_literal); | ||
51 | |||
52 | // Some convenience functions for dealing with dynamically re-sized | ||
53 | // strings. | ||
54 | |||
55 | int StrAppendN(char** to, char* from, size_t n) | ||
56 | { | ||
57 | size_t l = 0; | ||
58 | |||
59 | /* if not to, then silently discard data */ | ||
60 | if(!to) { | ||
61 | return 1; | ||
62 | } | ||
63 | 70 | ||
64 | if(*to) { | 71 | #define EOK 0 |
65 | char* new; | 72 | #define EPARSE ENOENT |
66 | 73 | ||
67 | l = strlen(*to); | 74 | /* |
75 | * Some convenience functions for dealing with dynamically re-sized | ||
76 | * strings. | ||
77 | */ | ||
68 | 78 | ||
69 | new = (char*) realloc(*to, l + n + 1); | 79 | static int str_append_n(char** to, const char* from, size_t n) |
80 | { | ||
81 | size_t l = 0; | ||
70 | 82 | ||
71 | if(!new) { | 83 | /* if not to, then silently discard data */ |
72 | return 0; | 84 | if(!to) { |
73 | } | 85 | return EOK; |
74 | 86 | } | |
75 | *to = new; | 87 | |
76 | } else { | 88 | if(*to) { |
77 | *to = (char*) malloc(n + 1); | 89 | char* bigger; |
90 | |||
91 | l = strlen(*to); | ||
92 | |||
93 | bigger = realloc(*to, l + n + 1); | ||
94 | |||
95 | if(!bigger) { | ||
96 | return ENOMEM; | ||
78 | } | 97 | } |
98 | |||
99 | *to = bigger; | ||
100 | } else { | ||
101 | *to = malloc(n + 1); | ||
102 | } | ||
79 | 103 | ||
80 | strncpy(&to[0][l], from, n); | 104 | strncpy(&to[0][l], from, n); |
81 | 105 | ||
82 | /* strncpy is lame, nul terminate our buffer */ | 106 | /* strncpy is lame, nul terminate our buffer */ |
83 | 107 | ||
84 | to[0][l + n] = 0; | 108 | to[0][l + n] = 0; |
85 | 109 | ||
86 | return 1; | 110 | return EOK; |
87 | } | 111 | } |
88 | 112 | static int str_append(char** to, const char* from) | |
89 | int StrAppend(char** to, char* from) | ||
90 | { | 113 | { |
91 | return StrAppendN(to, from, strlen(from)); | 114 | return str_append_n(to, from, strlen(from)); |
92 | } | 115 | } |
93 | 116 | static int str_append_char(char** to, char c) | |
94 | int StrAppendChar(char** to, char c) | ||
95 | { | 117 | { |
96 | return StrAppendN(to, &c, 1); | 118 | return str_append_n(to, &c, 1); |
97 | } | 119 | } |
98 | 120 | static int str_append_range(char** to, const char* b, const char* e) | |
99 | int StrAppendRange(char** to, char* b, char* e) | ||
100 | { | 121 | { |
101 | return StrAppendN(to, b, e - b); | 122 | return str_append_n(to, b, e - b); |
102 | } | 123 | } |
103 | 124 | static void str_free(char** s) | |
104 | void StrFree(char** s) | ||
105 | { | 125 | { |
106 | if(s && *s) { | 126 | if(s && *s) { |
107 | free(*s); | 127 | free(*s); |
108 | *s = 0; | 128 | *s = 0; |
109 | } | 129 | } |
110 | } | 130 | } |
111 | 131 | ||
112 | // | 132 | /* |
113 | // MRfc822Tokenizer | 133 | * Character Classification - could be rewritten in a C library |
114 | // | 134 | * independent way, my system's C library matches the RFC |
115 | 135 | * definitions. I don't know if that's guaranteed. | |
116 | // | 136 | * |
117 | // Character Classification - could be rewritten in a C library | 137 | * Note that all return values are: |
118 | // independent way, my system's C library matches the RFC | 138 | * 1 -> TRUE |
119 | // definitions, but I don't know if that's guaranteed. | 139 | * 0 -> FALSE |
120 | // | 140 | * This may be appear different than the 0 == success return |
121 | int IsCHAR(char c) | 141 | * values of the other functions, but I was getting lost in |
142 | * boolean arithmetic. | ||
143 | */ | ||
144 | int parse822_is_char(char c) | ||
122 | { | 145 | { |
123 | return isascii(c); | 146 | return isascii(c); |
124 | } | 147 | } |
125 | int IsDIGIT(char** p, char* e) | 148 | int parse822_is_digit(char c) |
126 | { | 149 | { |
127 | // DIGIT = <any ASCII decimal digit> | 150 | /* digit = <any ASCII decimal digit> */ |
128 | 151 | ||
129 | if(*p == e) { | 152 | return isdigit(c); |
130 | return 0; | ||
131 | } | ||
132 | |||
133 | return isdigit(**p); | ||
134 | } | 153 | } |
135 | int IsCTL(char c) | 154 | int parse822_is_ctl(char c) |
136 | { | 155 | { |
137 | return iscntrl(c) || c == 127 /* DEL */; | 156 | return iscntrl(c) || c == 127 /* DEL */; |
138 | } | 157 | } |
139 | int IsSPACE(char c) | 158 | int parse822_is_space(char c) |
140 | { | 159 | { |
141 | return c == ' '; | 160 | return c == ' '; |
142 | } | 161 | } |
143 | int IsHTAB(char c) | 162 | int parse822_is_htab(char c) |
144 | { | 163 | { |
145 | return c == '\t'; | 164 | return c == '\t'; |
146 | } | 165 | } |
147 | int IsLWSPChar(char c) | 166 | int parse822_is_lwsp_char(char c) |
148 | { | 167 | { |
149 | return IsSPACE(c) || IsHTAB(c); | 168 | return parse822_is_space(c) || parse822_is_htab(c); |
150 | } | 169 | } |
151 | int IsSpecial(char c) | 170 | int parse822_is_special(char c) |
152 | { | 171 | { |
153 | return strchr("()<>@,;:\\\".[]", c) ? 1 : 0; | 172 | return strchr("()<>@,;:\\\".[]", c) ? 1 : 0; |
154 | } | 173 | } |
155 | int IsAtomChar(char c) | 174 | int parse822_is_atom_char(char c) |
156 | { | 175 | { |
157 | return IsCHAR(c) && !IsSpecial(c) && !IsSPACE(c) && !IsCTL(c); | 176 | return |
177 | parse822_is_char(c) && | ||
178 | !parse822_is_special(c) && | ||
179 | !parse822_is_space(c) && | ||
180 | !parse822_is_ctl(c) | ||
181 | ; | ||
158 | } | 182 | } |
159 | int IsQText(char c) | 183 | int parse822_is_q_text(char c) |
160 | { | 184 | { |
161 | return IsCHAR(c) | 185 | return |
162 | && c != '"' | 186 | parse822_is_char(c) |
163 | && c != '\\' | 187 | && c != '"' |
164 | && c != '\r'; | 188 | && c != '\\' |
189 | && c != '\r' | ||
190 | ; | ||
165 | } | 191 | } |
166 | int IsDText(char c) | 192 | int parse822_is_d_text(char c) |
167 | { | 193 | { |
168 | return IsCHAR(c) | 194 | return |
169 | && c != '[' | 195 | parse822_is_char(c) |
170 | && c != ']' | 196 | && c != '[' |
171 | && c != '\\' | 197 | && c != ']' |
172 | && c != '\r'; | 198 | && c != '\\' |
199 | && c != '\r' | ||
200 | ; | ||
173 | } | 201 | } |
174 | int IsSmtpQ(char c) | 202 | /* |
203 | * SMTP's version of qtext, called <q> in the RFC 821 syntax, | ||
204 | * also excludes <LF>. | ||
205 | */ | ||
206 | int parse822_is_smtp_q(char c) | ||
175 | { | 207 | { |
176 | return IsQText(c) | 208 | return parse822_is_q_text(c) |
177 | && c != '\n'; | 209 | && c != '\n'; |
178 | } | 210 | } |
179 | 211 | ||
180 | // | 212 | /* |
181 | // Lexical Analysis - these tokens are all from RFC822, | 213 | * Lexical Analysis - these tokens are all from RFC822, |
182 | // section 3.3, Lexical Tokens, though not all tokens are | 214 | * section 3.3, Lexical Tokens, though not all tokens are |
183 | // implemented. | 215 | * implemented. The names match those used int the extended |
184 | // | 216 | * BNF of the RFC where possible. |
217 | */ | ||
185 | 218 | ||
186 | int SkipWs(char** p, char* e) | 219 | int parse822_skip_ws(const char** p, const char* e) |
187 | { | 220 | { |
188 | int ws = 0; | 221 | while((*p != e) && parse822_is_lwsp_char(**p)) { |
189 | 222 | *p += 1; | |
190 | while((*p != e) && IsLWSPChar(**p)) { | 223 | } |
191 | ++ws; | 224 | return EOK; |
192 | *p += 1; | ||
193 | } | ||
194 | |||
195 | return ws; | ||
196 | } | 225 | } |
197 | 226 | ||
198 | int SkipComments(char** p, char* e) | 227 | int parse822_skip_comments(const char** p, const char* e) |
199 | { | 228 | { |
200 | int comments; | 229 | int status; |
201 | 230 | ||
202 | while(GetComment(p, e, 0)) | 231 | while((status = parse822_comment(p, e, 0)) == EOK) |
203 | comments++; | 232 | ; |
204 | 233 | ||
205 | return comments++; | 234 | return EOK; |
206 | } | 235 | } |
207 | 236 | ||
208 | int GetDigits(char** p, char* e, | 237 | int parse822_digits(const char** p, const char* e, |
209 | int min, int max, int* digits) | 238 | int min, int max, int* digits) |
210 | { | 239 | { |
211 | char* save = *p; | 240 | const char* save = *p; |
212 | 241 | ||
213 | int i = 0; | 242 | int i = 0; |
214 | 243 | ||
215 | assert(digits); | 244 | assert(digits); |
216 | 245 | ||
217 | *digits = 0; | 246 | *digits = 0; |
218 | 247 | ||
219 | while(IsDIGIT(p, e)) { | 248 | while(*p < e && parse822_is_digit(**p)) { |
220 | *digits = *digits * 10 + **p - '0'; | 249 | *digits = *digits * 10 + **p - '0'; |
221 | *p += 1; | 250 | *p += 1; |
222 | ++i; | 251 | ++i; |
223 | if(max != 0 && i == max) { | 252 | if(max != 0 && i == max) { |
224 | break; | 253 | break; |
225 | } | ||
226 | } | ||
227 | if(i < min) { | ||
228 | *p = save; | ||
229 | return 0; | ||
230 | } | 254 | } |
255 | } | ||
256 | if(i < min) { | ||
257 | *p = save; | ||
258 | return EPARSE; | ||
259 | } | ||
231 | 260 | ||
232 | return 1; | 261 | return EOK; |
233 | } | 262 | } |
234 | 263 | ||
235 | int GetSpecial(char** p, char* e, char c) | 264 | int parse822_special(const char** p, const char* e, char c) |
236 | { | 265 | { |
237 | SkipWs(p, e); // not comments, they start with a special... | 266 | parse822_skip_ws(p, e); /* not comments, they start with a special... */ |
238 | 267 | ||
239 | if((*p != e) && **p == c) { | 268 | if((*p != e) && **p == c) { |
240 | *p += 1; | 269 | *p += 1; |
241 | return 1; | 270 | return EOK; |
242 | } | 271 | } |
243 | return 0; | 272 | return EPARSE; |
244 | } | 273 | } |
245 | 274 | ||
246 | int GetComment(char** p, char* e, char** comment) | 275 | int parse822_comment(const char** p, const char* e, char** comment) |
247 | { | 276 | { |
248 | // comment = "(" *(ctext / quoted-pair / comment) ")" | 277 | /* comment = "(" *(ctext / quoted-pair / comment) ")" |
249 | // ctext = <any CHAR except "(", ")", "\", & CR, including LWSP> | 278 | * ctext = <any char except "(", ")", "\", & CR, including lwsp> |
250 | 279 | */ | |
251 | if(!GetSpecial(p, e, '(')) { | 280 | const char* save = *p; |
252 | return 0; | 281 | int rc; |
253 | } | 282 | |
254 | 283 | if((rc = parse822_special(p, e, '('))) { | |
255 | while(*p != e) { | 284 | return rc; |
256 | char c = **p; | 285 | } |
257 | 286 | ||
258 | if(c == ')') { | 287 | while(*p != e) { |
259 | *p += 1; | 288 | char c = **p; |
260 | return 1; // found end-of-comment | 289 | |
261 | } else if(c == '(') { | 290 | if(c == ')') { |
262 | GetComment(p, e, comment); | 291 | *p += 1; |
263 | } else if(c == '\\') { | 292 | return EOK; /* found end-of-comment */ |
264 | GetQuotedPair(p, e, comment); | 293 | } else if(c == '(') { |
265 | } else if(c == '\r') { | 294 | rc = parse822_comment(p, e, comment); |
266 | // invalid character... | 295 | } else if(c == '\\') { |
267 | *p += 1; | 296 | rc = parse822_quoted_pair(p, e, comment); |
268 | } else if(IsCHAR(c)) { | 297 | } else if(c == '\r') { |
269 | StrAppendChar(comment, c); | 298 | /* invalid character... */ |
270 | *p += 1; | 299 | *p += 1; |
271 | } else { | 300 | } else if(parse822_is_char(c)) { |
272 | // invalid character... should I append it? | 301 | rc = str_append_char(comment, c); |
273 | *p += 1; | 302 | *p += 1; |
274 | } | 303 | } else { |
304 | /* invalid character... */ | ||
305 | *p += 1; | ||
275 | } | 306 | } |
276 | return 0; // end-of-comment not found | 307 | if(rc != EOK) |
308 | break; | ||
309 | } | ||
310 | |||
311 | if(*p == e) { | ||
312 | rc = EPARSE; /* end-of-comment not found */ | ||
313 | } | ||
314 | |||
315 | *p = save; | ||
316 | |||
317 | assert(rc != EOK); | ||
318 | |||
319 | return rc; | ||
277 | } | 320 | } |
278 | 321 | ||
279 | int GetAtom(char** p, char* e, char** atom) | 322 | int parse822_atom(const char** p, const char* e, char** atom) |
280 | { | 323 | { |
281 | // atom = 1*<an atom char> | 324 | /* atom = 1*<an atom char> */ |
282 | 325 | ||
283 | int ok = 0; | 326 | const char* save = *p; |
327 | int rc = EPARSE; | ||
284 | 328 | ||
285 | SkipComments(p, e); | 329 | parse822_skip_comments(p, e); |
286 | 330 | ||
287 | while((*p != e) && IsAtomChar(**p)) | 331 | save = *p; |
288 | { | 332 | |
289 | ++ok; | 333 | while((*p != e) && parse822_is_atom_char(**p)) |
290 | StrAppendChar(atom, **p); | 334 | { |
291 | *p += 1; | 335 | rc = str_append_char(atom, **p); |
336 | *p += 1; | ||
337 | if(rc != EOK) { | ||
338 | *p = save; | ||
339 | break; | ||
292 | } | 340 | } |
293 | return ok; | 341 | } |
342 | return rc; | ||
294 | } | 343 | } |
295 | int GetQuotedPair(char** p, char* e, char** qpair) | 344 | |
345 | int parse822_quoted_pair(const char** p, const char* e, char** qpair) | ||
296 | { | 346 | { |
297 | // quoted-pair = "\" CHAR | 347 | /* quoted-pair = "\" char */ |
298 | 348 | ||
299 | /* need TWO characters to be available */ | 349 | int rc; |
300 | if((e - *p) < 2) | ||
301 | return 0; | ||
302 | 350 | ||
303 | if(**p != '\\') | 351 | /* need TWO characters to be available */ |
304 | return 0; | 352 | if((e - *p) < 2) |
353 | return EPARSE; | ||
305 | 354 | ||
306 | *p += 1; | 355 | if(**p != '\\') |
307 | if(*p == e) | 356 | return EPARSE; |
308 | return 0; | ||
309 | 357 | ||
310 | StrAppendChar(qpair, **p); | 358 | if((rc = str_append_char(qpair, *(*p + 1)))) |
359 | return rc; | ||
311 | 360 | ||
312 | *p += 1; | 361 | *p += 2; |
362 | |||
363 | return EOK; | ||
364 | } | ||
313 | 365 | ||
314 | return 1; | 366 | int parse822_quoted_string(const char** p, const char* e, char** qstr) |
315 | } | 367 | { |
316 | int GetQuotedString(char** p, char* e, char** qstr) | 368 | /* quoted-string = <"> *(qtext/quoted-pair) <"> |
317 | { | 369 | * qtext = char except <">, "\", & CR, including lwsp-char |
318 | // quoted-string = <"> *(qtext/quoted-pair) <"> | 370 | */ |
319 | // qtext = CHAR except <">, "\", & CR, including LWSP-char | 371 | |
320 | 372 | const char* save = *p; | |
321 | SkipComments(p, e); | 373 | int rc; |
322 | 374 | ||
323 | if(!GetSpecial(p, e, '"')) | 375 | parse822_skip_comments(p, e); |
324 | return 0; | 376 | |
325 | 377 | save = *p; | |
326 | while(*p != e) | 378 | |
327 | { | 379 | if((rc = parse822_special(p, e, '"'))) |
328 | char c = **p; | 380 | return rc; |
329 | 381 | ||
330 | if(c == '"') { | 382 | while(*p != e) |
331 | *p += 1; | 383 | { |
332 | return 1; // found end-of-qstr | 384 | char c = **p; |
333 | } else if(c == '\\') { | 385 | |
334 | GetQuotedPair(p, e, qstr); | 386 | if(c == '"') { |
335 | } else if(c == '\r') { | 387 | *p += 1; |
336 | // invalid character... | 388 | return EOK; /* found end-of-qstr */ |
337 | *p += 1; | 389 | } else if(c == '\\') { |
338 | } else if(IsCHAR(c)) { | 390 | rc = parse822_quoted_pair(p, e, qstr); |
339 | StrAppendChar(qstr, c); | 391 | } else if(c == '\r') { |
340 | *p += 1; | 392 | /* invalid character... */ |
341 | } else { | 393 | *p += 1; |
342 | // invalid character... | 394 | } else if(parse822_is_char(c)) { |
343 | *p += 1; | 395 | rc = str_append_char(qstr, c); |
344 | } | 396 | *p += 1; |
397 | } else { | ||
398 | /* invalid character... */ | ||
399 | *p += 1; | ||
400 | } | ||
401 | if(rc) { | ||
402 | *p = save; | ||
403 | return rc; | ||
345 | } | 404 | } |
346 | return 0; // end-of-qstr not found | 405 | } |
406 | *p = save; | ||
407 | |||
408 | return EPARSE; /* end-of-qstr not found */ | ||
347 | } | 409 | } |
348 | int GetWord(char** p, char* e, char** word) | 410 | |
411 | int parse822_word(const char** p, const char* e, char** word) | ||
349 | { | 412 | { |
350 | // word = atom / quoted-string | 413 | /* word = atom / quoted-string */ |
414 | |||
415 | const char* save = *p; | ||
416 | int rc = EOK; | ||
351 | 417 | ||
352 | char* save = *p; | 418 | parse822_skip_comments(p, e); |
353 | 419 | ||
354 | SkipComments(p, e); | 420 | save = *p; |
355 | 421 | ||
356 | { | 422 | { |
357 | char* qstr = 0; | 423 | char* qstr = 0; |
358 | if(GetQuotedString(p, e, &qstr)) { | 424 | if((rc = parse822_quoted_string(p, e, &qstr)) == EOK) { |
359 | StrAppend(word, qstr); | 425 | rc = str_append(word, qstr); |
360 | 426 | ||
361 | StrFree(&qstr); | 427 | str_free(&qstr); |
362 | 428 | ||
363 | return 1; | 429 | if(rc != EOK) |
364 | } | 430 | *p = save; |
431 | |||
432 | return rc; | ||
365 | } | 433 | } |
434 | } | ||
366 | 435 | ||
367 | *p = save; | 436 | if(rc != EPARSE) { |
368 | // Necessary because the quoted string could have found | 437 | /* it's fatal */ |
369 | // a partial string (invalid syntax). Thus reset, the atom | 438 | return rc; |
370 | // will fail to if the syntax is invalid. | 439 | } |
371 | 440 | ||
372 | { | 441 | /* Necessary because the quoted string could have found |
373 | char* atom = 0; | 442 | * a partial string (invalid syntax). Thus reset, the atom |
374 | if(GetAtom(p, e, &atom)) { | 443 | * will fail to if the syntax is invalid. |
375 | StrAppend(word, atom); | 444 | */ |
376 | 445 | ||
377 | StrFree(&atom); | 446 | { |
447 | char* atom = 0; | ||
448 | if(parse822_atom(p, e, &atom) == EOK) { | ||
449 | rc = str_append(word, atom); | ||
450 | |||
451 | str_free(&atom); | ||
452 | |||
453 | if(rc != EOK) | ||
454 | *p = save; | ||
378 | 455 | ||
379 | return 1; | 456 | return rc; |
380 | } | ||
381 | } | 457 | } |
382 | *p = save; | 458 | } |
383 | 459 | ||
384 | return 0; | 460 | return EPARSE; |
385 | } | 461 | } |
386 | int GetPhrase(char** p, char* e, char** phrase) | 462 | |
463 | int parse822_phrase(const char** p, const char* e, char** phrase) | ||
387 | { | 464 | { |
388 | // phrase = 1*word | 465 | /* phrase = 1*word */ |
389 | 466 | ||
390 | if(!GetWord(p, e, phrase)) { | 467 | const char* save = *p; |
391 | return 0; | 468 | int rc; |
392 | } | ||
393 | // ok, got the 1 word, now append all the others we can | ||
394 | { | ||
395 | char* word = 0; | ||
396 | while(GetWord(p, e, &word)) { | ||
397 | StrAppendChar(phrase, ' '); | ||
398 | StrAppend(phrase, word); | ||
399 | *word = 0; | ||
400 | } | ||
401 | } | ||
402 | return 1; | ||
403 | } | ||
404 | 469 | ||
405 | // this is all a bit of a hack.... | 470 | if((rc = parse822_word(p, e, phrase))) |
406 | typedef struct _address mailbox_t; | 471 | return rc; |
407 | 472 | ||
408 | mailbox_t* new_mb(void) { | 473 | /* ok, got the 1 word, now append all the others we can */ |
409 | return (mailbox_t*) calloc(1, sizeof(mailbox_t)); | 474 | { |
410 | } | 475 | char* word = 0; |
411 | 476 | ||
412 | mailbox_t* fill_mb(char* comments, char* personal, char* local, char* domain) | 477 | while((rc = parse822_word(p, e, &word)) == EOK) { |
413 | { | 478 | rc = str_append_char(phrase, ' '); |
414 | mailbox_t* mb = new_mb(); | ||
415 | 479 | ||
416 | if(!mb) { | 480 | if(rc == EOK) |
417 | return 0; | 481 | rc = str_append(phrase, word); |
418 | } | ||
419 | 482 | ||
420 | mb->comments = comments; | 483 | str_free(&word); |
421 | mb->personal = personal; | ||
422 | 484 | ||
423 | /* this is wrong, local must be quoted */ | 485 | if(rc != EOK) |
424 | StrAppend(&mb->email, local); | 486 | break; |
425 | StrAppend(&mb->email, "@"); | 487 | } |
426 | StrAppend(&mb->email, domain); | 488 | if(rc == EPARSE) |
489 | rc = EOK; /* its not an error to find no more words */ | ||
490 | } | ||
491 | if(rc) | ||
492 | *p = save; | ||
427 | 493 | ||
428 | mb->local_part = local; | 494 | return rc; |
429 | mb->domain = domain; | 495 | } |
430 | 496 | ||
431 | return mb; | 497 | static address_t new_mb(void) { |
498 | return calloc(1, sizeof(struct _address)); | ||
432 | } | 499 | } |
433 | 500 | ||
434 | int address_create0 (address_t* a, const char* s) | 501 | static int fill_mb( |
502 | address_t* a, | ||
503 | char* comments, char* personal, char* local, char* domain) | ||
435 | { | 504 | { |
436 | // a must exist, and can't already have been initialized | 505 | int rc = EOK; |
437 | int status = 0; | 506 | |
507 | *a = new_mb(); | ||
508 | |||
509 | if(!*a) { | ||
510 | return ENOMEM; | ||
511 | } | ||
512 | |||
513 | (*a)->comments = comments; | ||
514 | (*a)->personal = personal; | ||
515 | |||
516 | /* this is wrong, local must be quoted */ | ||
517 | do { | ||
518 | /* loop exists only to break out of */ | ||
519 | if((rc = str_append(&(*a)->email, local))) | ||
520 | break; | ||
521 | if((rc = str_append(&(*a)->email, "@"))) | ||
522 | break; | ||
523 | if((rc = str_append(&(*a)->email, domain))) | ||
524 | break; | ||
525 | } while(0); | ||
526 | |||
527 | (*a)->local_part = local; | ||
528 | (*a)->domain = domain; | ||
529 | |||
530 | if(rc != EOK) { | ||
531 | /* note that the arguments have NOT been freed, we only own | ||
532 | * them on success. */ | ||
533 | free(*a); | ||
534 | } | ||
535 | |||
536 | return rc; | ||
537 | } | ||
438 | 538 | ||
439 | if(!a || *a) { | 539 | int address_create0 (address_t* a, const char* s) |
440 | return EINVAL; | 540 | { |
541 | /* 'a' must exist, and can't already have been initialized | ||
542 | */ | ||
543 | |||
544 | int status = 0; | ||
545 | |||
546 | if(!a || *a) { | ||
547 | return EINVAL; | ||
548 | } | ||
549 | |||
550 | status = parse822_address_list(a, (char*) s); | ||
551 | |||
552 | if(status == EOK) { | ||
553 | if(!*a) { | ||
554 | /* there was a group that got parsed correctly, but had | ||
555 | * no addresses... | ||
556 | */ | ||
557 | return EPARSE; | ||
441 | } | 558 | } |
442 | 559 | (*a)->addr = strdup(s); | |
443 | status = GetAddressList(a, (char*) s); | 560 | |
444 | 561 | if(!(*a)->addr) { | |
445 | if(status > 0) { | 562 | address_destroy(a); |
446 | (*a)->addr = strdup(s); | 563 | return ENOMEM; |
447 | if(!(*a)->addr) { | ||
448 | address_destroy(a); | ||
449 | return ENOMEM; | ||
450 | } | ||
451 | } | 564 | } |
565 | } | ||
452 | 566 | ||
453 | return 0; | 567 | return status; |
454 | } | 568 | } |
455 | 569 | ||
456 | 570 | int parse822_address_list(address_t* a, const char* s) | |
457 | int GetAddressList(mailbox_t** a, char* s) | ||
458 | { | 571 | { |
459 | // address-list = #(address) | 572 | /* address-list = #(address) */ |
460 | 573 | ||
461 | char** p = &s; | 574 | const char** p = &s; |
462 | char* e = &s[strlen(s)]; | 575 | const char* e = &s[strlen(s)]; |
463 | /* need to make the parsing api const-correct */ | 576 | int rc = EOK; |
464 | int ok = 0; | 577 | address_t* n = a; /* the next address we'll be creating */ |
465 | mailbox_t** an = a; /* the next address we'll be creating */ | ||
466 | 578 | ||
467 | if(!GetAddress(p, e, an)) | 579 | if((rc = parse822_address(p, e, n))) |
468 | return 0; | 580 | return rc; |
469 | 581 | ||
582 | parse822_skip_comments(p, e); | ||
583 | |||
584 | while(*p < e) | ||
585 | { | ||
470 | /* An address can contain a group, so an entire | 586 | /* An address can contain a group, so an entire |
471 | * list of addresses may have been appended, or no | 587 | * list of addresses may have been appended, or no |
472 | * addresses at all. Walk to the end. | 588 | * addresses at all. Walk to the end. |
473 | */ | 589 | */ |
474 | while(*an) { | 590 | while(*n) { |
475 | ++ok; | 591 | n = &(*n)->next; |
476 | an = &(*an)->next; | 592 | } |
593 | |||
594 | /* Remember that ',,a@b' is a valid list! So, we must find | ||
595 | * the <,>, but the address after it can be empty. | ||
596 | */ | ||
597 | if((rc = parse822_special(p, e, ','))) { | ||
598 | break; | ||
477 | } | 599 | } |
600 | parse822_skip_comments(p, e); | ||
601 | |||
602 | rc = parse822_address(p, e, n); | ||
478 | 603 | ||
479 | SkipComments(p, e); | 604 | if(rc == EOK || rc == EPARSE) { |
480 | 605 | /* that's cool, it may be a <,>, we'll find out if it isn't | |
481 | while(GetSpecial(p, e, ',')) | 606 | * at the top of the loop |
482 | { | 607 | */ |
483 | // Remember that 'a,,b' is a valid list! | 608 | rc = EOK; |
484 | if(GetAddress(p, e, an)) { | 609 | } else { |
485 | while(*an) { | 610 | /* anything else is a fatal error, break out */ |
486 | ++ok; | 611 | break; |
487 | an = &(*an)->next; | ||
488 | } | ||
489 | } | ||
490 | } | 612 | } |
491 | 613 | ||
492 | // A little problem here in that we return the number of | 614 | parse822_skip_comments(p, e); |
493 | // addresses found, but if there was trailing garbage | 615 | } |
494 | // in the text, then we'll just be ignoring that. | ||
495 | 616 | ||
496 | return ok; | 617 | if(rc) { |
497 | } | 618 | address_destroy(a); |
498 | int GetAddress(char**p, char* e, mailbox_t** a) | 619 | } |
499 | { | ||
500 | // address = mailbox / group | ||
501 | 620 | ||
502 | return | 621 | return rc; |
503 | GetMailBox(p, e, a) || | ||
504 | GetGroup(p, e, a); | ||
505 | } | 622 | } |
506 | int GetGroup(char**p, char* e, mailbox_t** a) | 623 | |
624 | int parse822_address(const char** p, const char* e, address_t* a) | ||
507 | { | 625 | { |
508 | // group = phrase ":" [#mailbox] ";" | 626 | /* address = mailbox / group */ |
509 | 627 | ||
510 | char* save = *p; | 628 | int rc; |
629 | |||
630 | if((rc = parse822_mail_box(p, e, a)) == EPARSE) | ||
631 | rc = parse822_group(p, e, a); | ||
511 | 632 | ||
512 | SkipComments(p, e); | 633 | return rc; |
634 | } | ||
513 | 635 | ||
514 | if(!GetPhrase(p, e, 0)) { | 636 | int parse822_group(const char** p, const char* e, address_t* a) |
515 | return 0; | 637 | { |
516 | } | 638 | /* group = phrase ":" [#mailbox] ";" */ |
517 | 639 | ||
518 | SkipComments(p, e); | 640 | const char* save = *p; |
641 | address_t* asave = a; /* so we can destroy these if parsing fails */ | ||
642 | int rc; | ||
519 | 643 | ||
520 | if(!GetSpecial(p, e, ':')) { | 644 | parse822_skip_comments(p, e); |
521 | *p = save; | ||
522 | return 0; | ||
523 | } | ||
524 | 645 | ||
525 | SkipComments(p, e); | 646 | *p = save; |
526 | 647 | ||
527 | if(GetMailBox(p, e, a)) { | 648 | if((rc = parse822_phrase(p, e, 0))) { |
528 | a = &(*a)->next; | 649 | return rc; |
650 | } | ||
529 | 651 | ||
530 | /* see if there are more */ | 652 | parse822_skip_comments(p, e); |
531 | SkipComments(p, e); | ||
532 | |||
533 | while(GetSpecial(p, e, ',')) { | ||
534 | SkipComments(p, e); | ||
535 | 653 | ||
536 | /* Rembmeber that a,,b is a valid list! */ | 654 | if((rc = parse822_special(p, e, ':'))) { |
537 | if(GetMailBox(p, e, a)) { | 655 | *p = save; |
538 | a = &(*a)->next; | 656 | return rc; |
539 | } | 657 | } |
540 | } | 658 | |
659 | /* Basically, on each loop, we may find a mailbox, but we must find | ||
660 | * a comma after the mailbox, otherwise we've popped off the end | ||
661 | * of the list. | ||
662 | */ | ||
663 | for(;;) { | ||
664 | parse822_skip_comments(p, e); | ||
665 | |||
666 | /* it's ok not be a mailbox, but other errors are fatal */ | ||
667 | rc = parse822_mail_box(p, e, a); | ||
668 | if(rc == EOK) { | ||
669 | a = &(*a)->next; | ||
670 | |||
671 | parse822_skip_comments(p, e); | ||
672 | } else if(rc != EPARSE) { | ||
673 | break; | ||
541 | } | 674 | } |
542 | 675 | ||
543 | if(!GetSpecial(p, e, ';')) { | 676 | if((rc = parse822_special(p, e, ','))) { |
544 | *p = save; | 677 | /* the commas aren't optional */ |
545 | return 0; | 678 | break; |
546 | } | 679 | } |
680 | } | ||
681 | if(rc == EPARSE) { | ||
682 | rc = EOK; /* ok, as long as we find the ";" next */ | ||
683 | } | ||
547 | 684 | ||
548 | return 1; | 685 | if(rc || (rc = parse822_special(p, e, ';'))) { |
549 | } | 686 | *p = save; |
550 | int GetMailBox(char** p, char* e, mailbox_t** a) | ||
551 | { | ||
552 | // mailbox = addr-spec "(" comment ")" / [phrase] route-addr | ||
553 | // | ||
554 | // Note: we parse the ancient comment on the right since | ||
555 | // it's such "common practice". :-( | ||
556 | // Note: phrase is called display-name in drums. | ||
557 | // Note: phrase is optional in drums, though not in RFC 822. | ||
558 | |||
559 | // -> addr-spec | ||
560 | if(GetAddrSpec(p, e, a)) { | ||
561 | char* comment = 0; | ||
562 | 687 | ||
563 | SkipWs(p, e); | 688 | address_destroy(asave); |
689 | } | ||
564 | 690 | ||
565 | if(GetComment(p, e, &comment)) { | 691 | return rc; |
566 | // yuck. | 692 | } |
567 | (*a)->personal = comment; | ||
568 | } | ||
569 | 693 | ||
570 | return 1; | 694 | int parse822_mail_box(const char** p, const char* e, address_t* a) |
695 | { | ||
696 | /* mailbox = addr-spec [ "(" comment ")" ] / [phrase] route-addr | ||
697 | * | ||
698 | * Note: we parse the ancient comment on the right since | ||
699 | * it's such "common practice". :-( | ||
700 | * Note: phrase is called display-name in drums. | ||
701 | * Note: phrase is optional in drums, though not in RFC 822. | ||
702 | */ | ||
703 | const char* save = *p; | ||
704 | int rc; | ||
705 | |||
706 | /* -> addr-spec */ | ||
707 | if((rc = parse822_addr_spec(p, e, a)) == EOK) { | ||
708 | /*int rc = EOK; */ | ||
709 | |||
710 | parse822_skip_ws(p, e); | ||
711 | |||
712 | /* yuck. */ | ||
713 | if((rc = parse822_comment(p, e, &(*a)->personal)) == EPARSE) { | ||
714 | rc = EOK; | ||
715 | /* cool if there's no comment, */ | ||
716 | } | ||
717 | /* but if something else is wrong, destroy the address */ | ||
718 | if(rc) { | ||
719 | address_destroy(a); | ||
720 | *p = save; | ||
571 | } | 721 | } |
572 | 722 | ||
573 | // -> phrase route-addr | 723 | return rc; |
574 | { | 724 | } |
575 | char* save = *p; | 725 | if(rc != EPARSE) { |
576 | char* phrase = 0; | 726 | *p = save; |
727 | return rc; | ||
728 | } | ||
729 | |||
730 | /* -> phrase route-addr */ | ||
731 | { | ||
732 | char* phrase = 0; | ||
733 | /*int rc; */ | ||
577 | 734 | ||
578 | GetPhrase(p, e, &phrase); | 735 | rc = parse822_phrase(p, e, &phrase); |
579 | 736 | ||
580 | if(!GetRouteAddr(p, e, a)) { | 737 | if(rc != EPARSE && rc != EOK) { |
581 | *p = save; | 738 | return rc; |
582 | return 0; | 739 | } |
583 | } | ||
584 | 740 | ||
585 | /* add the phrase */ | 741 | if((rc = parse822_route_addr(p, e, a))) { |
586 | (*a)->personal = phrase; | 742 | *p = save; |
743 | str_free(&phrase); | ||
744 | return rc; | ||
587 | } | 745 | } |
588 | 746 | ||
589 | return 1; | 747 | /* add the phrase */ |
748 | (*a)->personal = phrase; | ||
749 | } | ||
750 | |||
751 | return EOK; | ||
590 | } | 752 | } |
591 | int GetRouteAddr(char** p, char* e, mailbox_t ** a) | 753 | |
754 | int parse822_route_addr(const char** p, const char* e, address_t* a) | ||
592 | { | 755 | { |
593 | // route-addr = "<" [route] addr-spec ">" | 756 | /* route-addr = "<" [route] addr-spec ">" */ |
594 | 757 | ||
595 | char* save = *p; | 758 | const char* save = *p; |
596 | char* route = 0; | 759 | char* route = 0; |
760 | int rc; | ||
597 | 761 | ||
598 | SkipComments(p, e); | 762 | parse822_skip_comments(p, e); |
599 | 763 | ||
764 | if((rc = parse822_special(p, e, '<'))) { | ||
765 | *p = save; | ||
600 | 766 | ||
601 | if(!GetSpecial(p, e, '<')) { | 767 | return rc; |
602 | *p = save; | 768 | } |
603 | 769 | ||
604 | return 0; | 770 | parse822_route(p, e, &route); |
605 | } | ||
606 | 771 | ||
607 | GetRoute(p, e, &route); | 772 | if((rc = parse822_addr_spec(p, e, a))) { |
773 | *p = save; | ||
608 | 774 | ||
609 | if(!GetAddrSpec(p, e, a)) { | 775 | str_free(&route); |
610 | *p = save; | ||
611 | 776 | ||
612 | StrFree(&route); | 777 | return rc; |
778 | } | ||
613 | 779 | ||
614 | return 0; | 780 | (*a)->route = route; /* now we don't have to free our local */ |
615 | } | ||
616 | 781 | ||
617 | (*a)->route = route; /* now we don't have to free our local */ | 782 | parse822_skip_comments(p, e); |
618 | 783 | ||
619 | SkipComments(p, e); | 784 | if((rc = parse822_special(p, e, '>'))) { |
620 | 785 | *p = save; | |
621 | if(!GetSpecial(p, e, '>')) { | ||
622 | *p = save; | ||
623 | 786 | ||
624 | address_destroy(a); | 787 | address_destroy(a); |
625 | 788 | ||
626 | return 0; | 789 | return rc; |
627 | } | 790 | } |
628 | 791 | ||
629 | return 1; | 792 | return EOK; |
630 | } | 793 | } |
631 | 794 | ||
632 | int GetRoute(char** p, char* e, char** route) | 795 | int parse822_route(const char** p, const char* e, char** route) |
633 | { | 796 | { |
634 | // route = 1#("@" domain ) ":" | 797 | /* route = 1#("@" domain ) ":" */ |
635 | // | ||
636 | // Note: I don't hav a way of returning the route, so toss it for now. | ||
637 | 798 | ||
638 | char* accumulator = 0; | 799 | const char* save = *p; |
800 | char* accumulator = 0; | ||
801 | int rc = EOK; | ||
639 | 802 | ||
640 | for(;;) { | 803 | for(;;) { |
641 | SkipComments(p, e); | 804 | parse822_skip_comments(p, e); |
642 | 805 | ||
643 | if(!GetSpecial(p, e, '@')) { | 806 | if((rc = parse822_special(p, e, '@'))) { |
644 | // it's not a route | 807 | break; |
645 | return 0; | 808 | } |
646 | } | ||
647 | 809 | ||
648 | StrAppend(&accumulator, "@"); | 810 | if((rc = str_append(&accumulator, "@"))) { |
811 | break; | ||
812 | } | ||
649 | 813 | ||
650 | SkipComments(p, e); | 814 | parse822_skip_comments(p, e); |
651 | 815 | ||
652 | if(!GetDomain(p, e, &accumulator)) { | 816 | if((rc = parse822_domain(p, e, &accumulator))) { |
653 | // it looked like a route, but there's no domain! | 817 | /* it looked like a route, but there's no domain! */ |
654 | return 0; | 818 | break; |
655 | } | 819 | } |
656 | 820 | ||
657 | SkipComments(p, e); | 821 | parse822_skip_comments(p, e); |
658 | 822 | ||
659 | if(!GetSpecial(p, e, ',')) { | 823 | if((rc = parse822_special(p, e, ',')) == EPARSE) { |
660 | // there's no more routes | 824 | /* no more routes, but we got one so its ok */ |
661 | break; | 825 | rc = EOK; |
662 | } | 826 | break; |
663 | StrAppend(&accumulator, ", "); | 827 | } |
828 | if((rc = str_append(&accumulator, ", "))) { | ||
829 | break; | ||
664 | } | 830 | } |
831 | } | ||
665 | 832 | ||
666 | SkipComments(p, e); | 833 | parse822_skip_comments(p, e); |
667 | 834 | ||
668 | if(!GetSpecial(p, e, ':')) { | 835 | if(!rc) { |
669 | return 0; | 836 | rc = parse822_special(p, e, ':'); |
670 | } | 837 | } |
671 | 838 | ||
672 | StrAppend(route, accumulator); | 839 | if(!rc) { |
673 | StrFree(&accumulator); | 840 | rc = str_append(route, accumulator); |
841 | } | ||
842 | if(rc) { | ||
843 | str_free(&accumulator); | ||
844 | *p = save; | ||
845 | } | ||
674 | 846 | ||
675 | return 1; | 847 | return rc; |
676 | } | 848 | } |
677 | int GetAddrSpec(char** p, char* e, mailbox_t ** a) | 849 | |
850 | int parse822_addr_spec(const char** p, const char* e, address_t* a) | ||
678 | { | 851 | { |
679 | // addr-spec = local-part "@" domain | 852 | /* addr-spec = local-part "@" domain */ |
680 | 853 | ||
681 | char* save = *p; | 854 | const char* save = *p; |
682 | char* local_part = 0; | 855 | char* local_part = 0; |
683 | char* domain = 0; | 856 | char* domain = 0; |
857 | int rc; | ||
684 | 858 | ||
685 | if(!GetLocalPart(p, e, &local_part)) | 859 | rc = parse822_local_part(p, e, &local_part); |
686 | return 0; | ||
687 | 860 | ||
688 | SkipComments(p, e); | 861 | parse822_skip_comments(p, e); |
689 | 862 | ||
690 | if(!GetSpecial(p, e, '@')) { | 863 | if(!rc) { |
691 | *p = save; | 864 | rc = parse822_special(p, e, '@'); |
692 | return 0; | 865 | } |
693 | } | ||
694 | 866 | ||
695 | if(!GetDomain(p, e, &domain)) { | 867 | if(!rc) { |
696 | *p = save; | 868 | rc = parse822_domain(p, e, &domain); |
697 | return 0; | 869 | } |
698 | } | ||
699 | 870 | ||
700 | *a = fill_mb(0, 0, local_part, domain); | 871 | if(!rc) { |
872 | rc = fill_mb(a, 0, 0, local_part, domain); | ||
873 | } | ||
701 | 874 | ||
702 | return 1; | 875 | if(rc) { |
876 | *p = save; | ||
877 | str_free(&local_part); | ||
878 | str_free(&domain); | ||
879 | } | ||
880 | return rc; | ||
703 | } | 881 | } |
704 | int GetLocalPart(char** p, char* e, char** local_part) | ||
705 | { | ||
706 | // local-part = word *("." word) | ||
707 | |||
708 | // Note: rewrite as -> word ["." local-part] | ||
709 | |||
710 | char* save = *p; | ||
711 | 882 | ||
712 | SkipComments(p, e); | 883 | int parse822_local_part(const char** p, const char* e, char** local_part) |
713 | 884 | { | |
714 | if(!GetWord(p, e, local_part)) { | 885 | /* local-part = word *("." word) |
715 | *p = save; | 886 | * |
716 | return 0; | 887 | * Note: rewrite as -> word ["." local-part] |
717 | } | 888 | */ |
718 | // we've got a local-part, but keep looking for more | ||
719 | 889 | ||
720 | save = *p; | 890 | const char* save = *p; |
891 | const char* save2 = *p; | ||
892 | int rc; | ||
721 | 893 | ||
722 | SkipComments(p, e); | 894 | parse822_skip_comments(p, e); |
723 | 895 | ||
724 | if(!GetSpecial(p, e, '.')) { | 896 | if((rc = parse822_word(p, e, local_part))) { |
725 | *p = save; | 897 | *p = save; |
726 | return 1; | 898 | return rc; |
727 | } | 899 | } |
728 | { | 900 | /* We've got a local-part, but keep looking for more. */ |
729 | char* more = 0; | 901 | |
730 | 902 | parse822_skip_comments(p, e); | |
731 | if(!GetLocalPart(p, e, &more)) { | 903 | |
732 | *p = save; | 904 | /* If we get a parse error, we roll back to save2, but if |
733 | return 1; | 905 | * something else failed, we have to roll back to save. |
734 | } | 906 | */ |
735 | // append more | 907 | save2 = *p; |
736 | StrAppend(local_part, "."); | 908 | |
737 | StrAppend(local_part, more); | 909 | rc = parse822_special(p, e, '.'); |
738 | StrFree(&more); | 910 | |
911 | if(!rc) { | ||
912 | char* more = 0; | ||
913 | if((rc = parse822_local_part(p, e, &more)) == EOK) { | ||
914 | /* append more */ | ||
915 | if((rc = str_append(local_part, ".")) == EOK) { | ||
916 | rc = str_append(local_part, more); | ||
917 | } | ||
918 | str_free(&more); | ||
739 | } | 919 | } |
740 | 920 | } | |
741 | return 1; | 921 | |
922 | if(rc == EPARSE) { | ||
923 | /* if we didn't get more ("." word) pairs, that's ok */ | ||
924 | *p = save2; | ||
925 | rc = EOK; | ||
926 | } | ||
927 | if(rc) { | ||
928 | /* if anything else failed, that's real */ | ||
929 | *p = save; | ||
930 | str_free(local_part); | ||
931 | } | ||
932 | return rc; | ||
742 | } | 933 | } |
743 | int GetDomain(char** p, char* e, char** domain) | 934 | |
935 | int parse822_domain(const char** p, const char* e, char** domain) | ||
744 | { | 936 | { |
745 | // domain = sub-domain *("." sub-domain) | 937 | /* domain = sub-domain *("." sub-domain) |
938 | * | ||
939 | * Note: rewrite as -> sub-domain ("." domain) | ||
940 | */ | ||
746 | 941 | ||
747 | // Note: rewrite as -> sub-domain ("." domain) | 942 | const char* save = *p; |
943 | const char* save2 = 0; | ||
944 | int rc; | ||
748 | 945 | ||
749 | char* save = 0; | 946 | parse822_skip_comments(p, e); |
750 | 947 | ||
751 | if(!GetSubDomain(p, e, domain)) | 948 | if((rc = parse822_sub_domain(p, e, domain))) { |
752 | return 0; | 949 | *p = save; |
950 | return rc; | ||
951 | } | ||
753 | 952 | ||
754 | // we've got the 1, keep looking for more | ||
755 | 953 | ||
756 | save = *p; | 954 | /* We save before skipping comments to preserve the comment |
955 | * at the end of a domain, the addr-spec may want to abuse it | ||
956 | * for a personal name. | ||
957 | */ | ||
958 | save2 = *p; | ||
757 | 959 | ||
758 | SkipComments(p, e); | 960 | /* we've got the 1, keep looking for more */ |
759 | 961 | ||
760 | if(!GetSpecial(p, e, '.')) { | 962 | parse822_skip_comments(p, e); |
761 | // we do this to preserve the comment at the end of a | ||
762 | // domain, the addr-spec may want to abuse it for a | ||
763 | // personal name. | ||
764 | *p = save; | ||
765 | return 1; | ||
766 | } | ||
767 | { | ||
768 | char* more = 0; | ||
769 | if(!GetDomain(p, e, &more)) { | ||
770 | *p = save; | ||
771 | return 1; | ||
772 | } | ||
773 | |||
774 | StrAppend(domain, "."); | ||
775 | StrAppend(domain, more); | ||
776 | StrFree(&more); | ||
777 | } | ||
778 | 963 | ||
779 | return 1; | 964 | rc = parse822_special(p, e, '.'); |
780 | } | ||
781 | int GetSubDomain(char** p, char* e, char** sub_domain) | ||
782 | { | ||
783 | // sub-domain = domain-ref / domain-literal | ||
784 | // Note: domain-literal isn't supported yet. | ||
785 | 965 | ||
786 | return | 966 | if(!rc) { |
787 | GetDomainRef(p, e, sub_domain) || | 967 | char* more = 0; |
788 | GetDomainLiteral(p, e, sub_domain); | 968 | if((rc = parse822_domain(p, e, &more)) == EOK) { |
969 | if((rc = str_append(domain, ".")) == EOK) { | ||
970 | rc = str_append(domain, more); | ||
971 | } | ||
972 | str_free(&more); | ||
973 | } | ||
974 | } | ||
975 | if(rc == EPARSE) { | ||
976 | /* we didn't parse more ("." sub-domain) pairs, that's ok */ | ||
977 | *p = save2; | ||
978 | rc = EOK; | ||
979 | } | ||
980 | |||
981 | if(rc) { | ||
982 | /* something else failed, roll it all back */ | ||
983 | *p = save; | ||
984 | str_free(domain); | ||
985 | } | ||
986 | return rc; | ||
789 | } | 987 | } |
790 | int GetDomainRef(char** p, char* e, char** domain_ref) | ||
791 | { | ||
792 | // domain-ref = atom | ||
793 | 988 | ||
794 | return GetAtom(p, e, domain_ref); | 989 | int parse822_sub_domain(const char** p, const char* e, char** sub_domain) |
795 | } | ||
796 | int GetDText(char** p, char* e, char** dtext) | ||
797 | { | 990 | { |
798 | // dtext = 1*dtext | 991 | /* sub-domain = domain-ref / domain-literal |
799 | 992 | */ | |
800 | // Note: dtext is only defined as a character class in | ||
801 | // RFC822, but this definition is more useful for | ||
802 | // slurping domain literals. | ||
803 | 993 | ||
804 | char* start = *p; | 994 | int rc; |
995 | |||
996 | if((rc = parse822_domain_ref(p, e, sub_domain)) == EPARSE) | ||
997 | rc = parse822_domain_literal(p, e, sub_domain); | ||
805 | 998 | ||
806 | while(*p < e && IsDText(**p)) { | 999 | return rc; |
807 | *p += 1; | 1000 | } |
808 | } | ||
809 | |||
810 | if(start == *p) { | ||
811 | return 0; | ||
812 | } | ||
813 | 1001 | ||
814 | StrAppendRange(dtext, start, *p); | 1002 | int parse822_domain_ref(const char** p, const char* e, char** domain_ref) |
1003 | { | ||
1004 | /* domain-ref = atom */ | ||
815 | 1005 | ||
816 | return 1; | 1006 | return parse822_atom(p, e, domain_ref); |
817 | } | 1007 | } |
818 | 1008 | ||
819 | int GetDomainLiteral(char** p, char* e, char** domain_literal) | 1009 | int parse822_d_text(const char** p, const char* e, char** dtext) |
820 | { | 1010 | { |
821 | // domain-literal = "[" *(dtext / quoted-pair) "]" | 1011 | /* d-text = 1*dtext |
1012 | * | ||
1013 | * Note: dtext is only defined as a character class in | ||
1014 | * RFC822, but this definition is more useful for | ||
1015 | * slurping domain literals. | ||
1016 | */ | ||
822 | 1017 | ||
823 | char* save = *p; | 1018 | const char* start = *p; |
1019 | int rc = EOK; | ||
824 | 1020 | ||
825 | char* literal = 0; | 1021 | while(*p < e && parse822_is_d_text(**p)) { |
1022 | *p += 1; | ||
1023 | } | ||
826 | 1024 | ||
827 | if(!GetSpecial(p, e, '[')) { | 1025 | if(start == *p) { |
828 | return 0; | 1026 | return EPARSE; |
829 | } | 1027 | } |
830 | StrAppendChar(&literal, '['); | ||
831 | 1028 | ||
832 | while(GetDText(p, e, &literal) || GetQuotedPair(p, e, &literal)) { | 1029 | if((rc = str_append_range(dtext, start, *p))) { |
833 | /* Eat all of this we can get! */ | 1030 | *p = start; |
834 | } | 1031 | } |
835 | if(!GetSpecial(p, e, ']')) { | ||
836 | *p = save; | ||
837 | 1032 | ||
838 | return 0; | 1033 | return rc; |
839 | } | 1034 | } |
840 | StrAppendChar(&literal, ']'); | ||
841 | 1035 | ||
842 | StrAppend(domain_literal, literal); | 1036 | int parse822_domain_literal(const char** p, const char* e, char** domain_literal) |
1037 | { | ||
1038 | /* domain-literal = "[" *(dtext / quoted-pair) "]" */ | ||
843 | 1039 | ||
844 | StrFree(&literal); | 1040 | const char* save = *p; |
1041 | char* literal = 0; | ||
1042 | int rc; | ||
845 | 1043 | ||
846 | return 1; | 1044 | if((rc = parse822_special(p, e, '['))) { |
1045 | return rc; | ||
1046 | } | ||
1047 | if((rc = str_append_char(&literal, '['))) { | ||
1048 | *p = save; | ||
1049 | return rc; | ||
1050 | } | ||
1051 | |||
1052 | while( | ||
1053 | (rc = parse822_d_text(p, e, &literal)) == EOK || | ||
1054 | (rc = parse822_quoted_pair(p, e, &literal)) == EOK | ||
1055 | ) { | ||
1056 | /* Eat all of this we can get! */ | ||
1057 | } | ||
1058 | if(rc == EPARSE) { | ||
1059 | rc = EOK; | ||
1060 | } | ||
1061 | if(!rc) { | ||
1062 | rc = parse822_special(p, e, ']'); | ||
1063 | } | ||
1064 | if(!rc) { | ||
1065 | rc = str_append_char(&literal, ']'); | ||
1066 | } | ||
1067 | if(!rc) { | ||
1068 | rc = str_append(domain_literal, literal); | ||
1069 | } | ||
1070 | |||
1071 | str_free(&literal); | ||
1072 | |||
1073 | if(rc) { | ||
1074 | *p = save; | ||
1075 | } | ||
1076 | return rc; | ||
847 | } | 1077 | } |
848 | 1078 | ||
849 | #if 0 | 1079 | #if 0 |
850 | int GetFieldName(char** p, char* e, char** fieldname) | 1080 | int parse822_field_name(const char** p, const char* e, char** fieldname) |
851 | { | 1081 | { |
852 | // field-name = 1*<any CHAR, excluding CTLS, SPACE, and ":"> ":" | 1082 | /* field-name = 1*<any char, excluding ctlS, space, and ":"> ":" */ |
853 | 1083 | ||
854 | Ptr save = p; | 1084 | Ptr save = p; |
855 | 1085 | ||
856 | Rope fn; | 1086 | Rope fn; |
857 | 1087 | ||
858 | while(*p != e) { | 1088 | while(*p != e) { |
859 | char c = *p; | 1089 | char c = *p; |
860 | 1090 | ||
861 | if(!IsCHAR(c)) | 1091 | if(!parse822_is_char(c)) |
862 | break; | 1092 | break; |
863 | 1093 | ||
864 | if(IsCTL(c)) | 1094 | if(parse822_is_ctl(c)) |
865 | break; | 1095 | break; |
866 | if(IsSPACE(c)) | 1096 | if(parse822_is_space(c)) |
867 | break; | 1097 | break; |
868 | if(c == ':') | 1098 | if(c == ':') |
869 | break; | 1099 | break; |
870 | 1100 | ||
871 | fn.append(c); | 1101 | fn.append(c); |
872 | *p += 1; | 1102 | *p += 1; |
873 | } | 1103 | } |
874 | // must be at least one char in the field name | 1104 | /* must be at least one char in the field name */ |
875 | if(fn.empty()) { | 1105 | if(fn.empty()) { |
876 | p = save; | 1106 | p = save; |
877 | return 0; | 1107 | return 0; |
878 | } | 1108 | } |
879 | SkipComments(p, e); | 1109 | parse822_skip_comments(p, e); |
880 | 1110 | ||
881 | if(!GetSpecial(p, e, ':')) { | 1111 | if(!parse822_special(p, e, ':')) { |
882 | p = save; | 1112 | p = save; |
883 | return 0; | 1113 | return 0; |
884 | } | 1114 | } |
885 | 1115 | ||
886 | fieldname = fn; | 1116 | fieldname = fn; |
887 | 1117 | ||
888 | return 1; | 1118 | return 1; |
889 | } | 1119 | } |
890 | int GetFieldBody(char** p, char* e, Rope& fieldbody) | 1120 | |
1121 | int parse822_field_body(const char** p, const char* e, Rope& fieldbody) | ||
891 | { | 1122 | { |
892 | // field-body = *text [CRLF LWSP-char field-body] | 1123 | /* field-body = *text [CRLF lwsp-char field-body] */ |
893 | 1124 | ||
894 | Ptr save = p; | 1125 | Ptr save = p; |
895 | 1126 | ||
896 | Rope fb; | 1127 | Rope fb; |
897 | 1128 | ||
898 | for(;;) | 1129 | for(;;) |
899 | { | 1130 | { |
900 | Ptr eol = p; | 1131 | Ptr eol = p; |
901 | while(eol != e) { | 1132 | while(eol != e) { |
902 | char c = *eol; | 1133 | char c = *eol; |
903 | if(eol[0] == '\r' && (eol+1) != e && eol[1] == '\n') | 1134 | if(eol[0] == '\r' && (eol+1) != e && eol[1] == '\n') |
904 | break; | 1135 | break; |
905 | ++eol; | 1136 | ++eol; |
906 | } | 1137 | } |
907 | fb.append(p, eol); | 1138 | fb.append(p, eol); |
908 | p = eol; | 1139 | p = eol; |
909 | if(eol == e) | 1140 | if(eol == e) |
910 | break; // no more, so we're done | 1141 | break; /* no more, so we're done */ |
911 | 1142 | ||
912 | assert(p[0] == '\r'); | 1143 | assert(p[0] == '\r'); |
913 | assert(p[1] == '\n'); | 1144 | assert(p[1] == '\n'); |
914 | 1145 | ||
915 | p += 2; | 1146 | p += 2; |
916 | 1147 | ||
917 | if(*p == e) | 1148 | if(*p == e) |
918 | break; // no more, so we're done | 1149 | break; /* no more, so we're done */ |
919 | 1150 | ||
920 | // check if next line is a continuation line | 1151 | /* check if next line is a continuation line */ |
921 | if(*p != ' ' && *p != '\t') | 1152 | if(*p != ' ' && *p != '\t') |
922 | break; | 1153 | break; |
923 | } | 1154 | } |
924 | 1155 | ||
925 | fieldbody = fb; | 1156 | fieldbody = fb; |
926 | 1157 | ||
927 | return 1; | 1158 | return 1; |
928 | } | 1159 | } |
1160 | |||
929 | #endif | 1161 | #endif |
930 | 1162 | ... | ... |
-
Please register or sign in to post a comment