Commit 631205fb 631205fb8afbcd33c8eaeb722700d7b976879cbf by Sam Roberts

Added a function to do generalized URL parsing.

1 parent d5cb4d08
...@@ -19,24 +19,42 @@ ...@@ -19,24 +19,42 @@
19 # include <config.h> 19 # include <config.h>
20 #endif 20 #endif
21 21
22 #include <string.h> 22 #include <ctype.h>
23 #include <stdlib.h>
24 #include <errno.h> 23 #include <errno.h>
24 #include <stdlib.h>
25 #include <string.h>
25 #ifdef HAVE_STRINGS_H 26 #ifdef HAVE_STRINGS_H
26 # include <strings.h> 27 # include <strings.h>
27 #endif 28 #endif
28 29
30 #include <mailutils/mutil.h>
29 #include <mailutils/registrar.h> 31 #include <mailutils/registrar.h>
32
30 #include <misc.h> 33 #include <misc.h>
31 #include <url0.h> 34 #include <url0.h>
32 35
36 #ifndef EPARSE
37 # define EPARSE ENOENT
38 #endif
39
40 /*
41 TODO: implement functions to create a url and encode it properly.
42 */
43
44 static int url_parse0(url_t, char*);
45
33 int 46 int
34 url_create (url_t *purl, const char *name) 47 url_create (url_t * purl, const char *name)
35 { 48 {
36 url_t url = calloc(1, sizeof (*url)); 49 url_t url = calloc (1, sizeof (*url));
37 if (url == NULL) 50 if (url == NULL)
38 return ENOMEM; 51 return ENOMEM;
39 url->name = strdup (name); 52 url->name = strdup (name);
53 if (url->name == NULL)
54 {
55 free (url);
56 return ENOMEM;
57 }
40 *purl = url; 58 *purl = url;
41 return 0; 59 return 0;
42 } 60 }
...@@ -81,6 +99,250 @@ url_destroy (url_t *purl) ...@@ -81,6 +99,250 @@ url_destroy (url_t *purl)
81 } 99 }
82 } 100 }
83 101
102 /* From RFC 1738, section 2.2 */
103 char *
104 url_decode (const char *s)
105 {
106 char *d = strdup (s);
107 const char *eos = s + strlen (s);
108 int i;
109
110 if (!d)
111 return NULL;
112
113 for (i = 0; s < eos; i++)
114 {
115 if (*s != '%')
116 {
117 d[i] = *s;
118 s++;
119 }
120 else
121 {
122 unsigned long ul = 0;
123
124 s++;
125
126 /* don't check return value, it's correctly coded, or it's not,
127 in which case we just skip the garbage, this is a decoder,
128 not an AI project */
129
130 mu_hexstr2ul (&ul, s, 2);
131
132 s += 2;
133
134 d[i] = (char) ul;
135 }
136 }
137
138 d[i] = 0;
139
140 return d;
141 }
142
143 int
144 url_parse (url_t url)
145 {
146 int err = 0;
147 char *n = NULL;
148 struct _url u = { 0, };
149
150 if (!url || !url->name)
151 return EINVAL;
152
153 /* can't have been parsed already */
154 if(url->scheme || url->user || url->passwd || url->auth ||
155 url->host || url->path || url->query)
156 return EINVAL;
157
158 n = strdup (url->name);
159
160 if (!n)
161 return ENOMEM;
162
163 err = url_parse0 (&u, n);
164
165 if (!err)
166 {
167 /* Dup the strings we found. We wouldn't have to do this
168 if we did a single alloc of the source url name, and
169 kept it around. It's also a good time to do hex decoding,
170 though.
171 */
172
173
174 #define UALLOC(X) \
175 if(u.X && (url->X = url_decode(u.X)) == 0) { \
176 err = ENOMEM; \
177 goto CLEANUP; \
178 }
179
180 UALLOC (scheme)
181 UALLOC (user)
182 UALLOC (passwd)
183 UALLOC (auth)
184 UALLOC (host)
185 UALLOC (path)
186 UALLOC (query)
187
188 #undef UALLOC
189
190 url->port = u.port;
191 }
192
193 CLEANUP:
194 free (n);
195
196 if (err)
197 {
198 #define UFREE(X) if(X) { free(X); X = 0; }
199
200 UFREE(url->scheme)
201 UFREE(url->user)
202 UFREE(url->passwd)
203 UFREE(url->auth)
204 UFREE(url->host)
205 UFREE(url->path)
206 UFREE(url->query)
207
208 #undef UFREE
209 }
210
211 return err;
212 }
213
214 /*
215
216 Syntax, condensed from RFC 1738, and extended with the ;auth=
217 of RFC 2384 (for POP) and RFC 2192 (for IMAP):
218
219 url =
220 scheme ":" = "//"
221
222 [ user [ ( ":" password ) | ( ";auth=" auth ) ] "@" ]
223
224 host [ ":" port ]
225
226 [ ( "/" urlpath ) | ( "?" query ) ]
227
228 All hell will break loose in this parser if the user/pass/auth
229 portion is missing, and the urlpath has any @ or : characters
230 in it. A imap mailbox, say, named after the email address of
231 the person the mail is from:
232
233 imap://imap.uniserve.com/alain@qnx.com
234
235 Is this required to be % quoted, though? I hope so!
236
237 */
238
239 static int
240 url_parse0 (url_t u, char *name)
241 {
242 char *p; /* pointer into name */
243
244 /* reject the obvious */
245 if (name == NULL)
246 return EINVAL;
247
248 /* Parse out the SCHEME. */
249 p = strchr (name, ':');
250 if (p == NULL)
251 {
252 return EPARSE;
253 }
254
255 *p++ = 0;
256
257 u->scheme = name;
258
259 /* RFC 1738, section 2.1, lower the scheme case */
260 for ( ; name < p; name++)
261 *name = tolower(*name);
262
263 name = p;
264
265 if (strncmp (name, "//", 2) != 0)
266 return EPARSE;
267
268 name += 2;
269
270 /* Split into LHS and RHS of the '@', and then parse each side. */
271 u->host = strchr (name, '@');
272 if (u->host == NULL)
273 u->host = name;
274 else
275 {
276 /* Parse the LHS into an identification/authentication pair. */
277 *u->host++ = 0;
278
279 u->user = name;
280
281 /* Try to split the user into a:
282 <user>:<password>
283 or
284 <user>;AUTH=<auth>
285 */
286
287 for (; *name; name++)
288 {
289 if (*name == ';')
290 {
291 /* Make sure it's the auth token. */
292 if (strncasecmp (name + 1, "auth=", 5) == 0)
293 {
294 *name++ = 0;
295
296 name += 5;
297
298 u->auth = name;
299
300 break;
301 }
302 }
303 if (*name == ':')
304 {
305 *name++ = 0;
306 u->passwd = name;
307 break;
308 }
309 }
310 }
311
312 /* Parse the host and port from the RHS. */
313 p = strchr (u->host, ':');
314
315 if (p)
316 {
317 *p++ = 0;
318
319 u->port = strtol (p, &p, 10);
320
321 /* Check for garbage after the port: we should be on the start
322 of a path, a query, or at the end of the string. */
323 if (*p && strcspn (p, "/?") != 0)
324 return EPARSE;
325 }
326 else
327 p = u->host + strcspn (u->host, "/?");
328
329 /* Either way, if we're not at a nul, we're at a path or query. */
330 if (*p == '?')
331 {
332 /* found a query */
333 *p++ = 0;
334 u->query = p;
335 }
336 if (*p == '/')
337 {
338 /* found a path */
339 *p++ = 0;
340 u->path = p;
341 }
342
343 return 0;
344 }
345
84 int 346 int
85 url_get_scheme (const url_t url, char *scheme, size_t len, size_t *n) 347 url_get_scheme (const url_t url, char *scheme, size_t len, size_t *n)
86 { 348 {
......