Added a function to do generalized URL parsing.
Showing
1 changed file
with
266 additions
and
4 deletions
... | @@ -19,24 +19,42 @@ | ... | @@ -19,24 +19,42 @@ |
19 | # include <config.h> | 19 | # include <config.h> |
20 | #endif | 20 | #endif |
21 | 21 | ||
22 | #include <string.h> | 22 | #include <ctype.h> |
23 | #include <stdlib.h> | ||
24 | #include <errno.h> | 23 | #include <errno.h> |
24 | #include <stdlib.h> | ||
25 | #include <string.h> | ||
25 | #ifdef HAVE_STRINGS_H | 26 | #ifdef HAVE_STRINGS_H |
26 | # include <strings.h> | 27 | # include <strings.h> |
27 | #endif | 28 | #endif |
28 | 29 | ||
30 | #include <mailutils/mutil.h> | ||
29 | #include <mailutils/registrar.h> | 31 | #include <mailutils/registrar.h> |
32 | |||
30 | #include <misc.h> | 33 | #include <misc.h> |
31 | #include <url0.h> | 34 | #include <url0.h> |
32 | 35 | ||
36 | #ifndef EPARSE | ||
37 | # define EPARSE ENOENT | ||
38 | #endif | ||
39 | |||
40 | /* | ||
41 | TODO: implement functions to create a url and encode it properly. | ||
42 | */ | ||
43 | |||
44 | static int url_parse0(url_t, char*); | ||
45 | |||
33 | int | 46 | int |
34 | url_create (url_t *purl, const char *name) | 47 | url_create (url_t * purl, const char *name) |
35 | { | 48 | { |
36 | url_t url = calloc(1, sizeof (*url)); | 49 | url_t url = calloc (1, sizeof (*url)); |
37 | if (url == NULL) | 50 | if (url == NULL) |
38 | return ENOMEM; | 51 | return ENOMEM; |
39 | url->name = strdup (name); | 52 | url->name = strdup (name); |
53 | if (url->name == NULL) | ||
54 | { | ||
55 | free (url); | ||
56 | return ENOMEM; | ||
57 | } | ||
40 | *purl = url; | 58 | *purl = url; |
41 | return 0; | 59 | return 0; |
42 | } | 60 | } |
... | @@ -81,6 +99,250 @@ url_destroy (url_t *purl) | ... | @@ -81,6 +99,250 @@ url_destroy (url_t *purl) |
81 | } | 99 | } |
82 | } | 100 | } |
83 | 101 | ||
102 | /* From RFC 1738, section 2.2 */ | ||
103 | char * | ||
104 | url_decode (const char *s) | ||
105 | { | ||
106 | char *d = strdup (s); | ||
107 | const char *eos = s + strlen (s); | ||
108 | int i; | ||
109 | |||
110 | if (!d) | ||
111 | return NULL; | ||
112 | |||
113 | for (i = 0; s < eos; i++) | ||
114 | { | ||
115 | if (*s != '%') | ||
116 | { | ||
117 | d[i] = *s; | ||
118 | s++; | ||
119 | } | ||
120 | else | ||
121 | { | ||
122 | unsigned long ul = 0; | ||
123 | |||
124 | s++; | ||
125 | |||
126 | /* don't check return value, it's correctly coded, or it's not, | ||
127 | in which case we just skip the garbage, this is a decoder, | ||
128 | not an AI project */ | ||
129 | |||
130 | mu_hexstr2ul (&ul, s, 2); | ||
131 | |||
132 | s += 2; | ||
133 | |||
134 | d[i] = (char) ul; | ||
135 | } | ||
136 | } | ||
137 | |||
138 | d[i] = 0; | ||
139 | |||
140 | return d; | ||
141 | } | ||
142 | |||
143 | int | ||
144 | url_parse (url_t url) | ||
145 | { | ||
146 | int err = 0; | ||
147 | char *n = NULL; | ||
148 | struct _url u = { 0, }; | ||
149 | |||
150 | if (!url || !url->name) | ||
151 | return EINVAL; | ||
152 | |||
153 | /* can't have been parsed already */ | ||
154 | if(url->scheme || url->user || url->passwd || url->auth || | ||
155 | url->host || url->path || url->query) | ||
156 | return EINVAL; | ||
157 | |||
158 | n = strdup (url->name); | ||
159 | |||
160 | if (!n) | ||
161 | return ENOMEM; | ||
162 | |||
163 | err = url_parse0 (&u, n); | ||
164 | |||
165 | if (!err) | ||
166 | { | ||
167 | /* Dup the strings we found. We wouldn't have to do this | ||
168 | if we did a single alloc of the source url name, and | ||
169 | kept it around. It's also a good time to do hex decoding, | ||
170 | though. | ||
171 | */ | ||
172 | |||
173 | |||
174 | #define UALLOC(X) \ | ||
175 | if(u.X && (url->X = url_decode(u.X)) == 0) { \ | ||
176 | err = ENOMEM; \ | ||
177 | goto CLEANUP; \ | ||
178 | } | ||
179 | |||
180 | UALLOC (scheme) | ||
181 | UALLOC (user) | ||
182 | UALLOC (passwd) | ||
183 | UALLOC (auth) | ||
184 | UALLOC (host) | ||
185 | UALLOC (path) | ||
186 | UALLOC (query) | ||
187 | |||
188 | #undef UALLOC | ||
189 | |||
190 | url->port = u.port; | ||
191 | } | ||
192 | |||
193 | CLEANUP: | ||
194 | free (n); | ||
195 | |||
196 | if (err) | ||
197 | { | ||
198 | #define UFREE(X) if(X) { free(X); X = 0; } | ||
199 | |||
200 | UFREE(url->scheme) | ||
201 | UFREE(url->user) | ||
202 | UFREE(url->passwd) | ||
203 | UFREE(url->auth) | ||
204 | UFREE(url->host) | ||
205 | UFREE(url->path) | ||
206 | UFREE(url->query) | ||
207 | |||
208 | #undef UFREE | ||
209 | } | ||
210 | |||
211 | return err; | ||
212 | } | ||
213 | |||
214 | /* | ||
215 | |||
216 | Syntax, condensed from RFC 1738, and extended with the ;auth= | ||
217 | of RFC 2384 (for POP) and RFC 2192 (for IMAP): | ||
218 | |||
219 | url = | ||
220 | scheme ":" = "//" | ||
221 | |||
222 | [ user [ ( ":" password ) | ( ";auth=" auth ) ] "@" ] | ||
223 | |||
224 | host [ ":" port ] | ||
225 | |||
226 | [ ( "/" urlpath ) | ( "?" query ) ] | ||
227 | |||
228 | All hell will break loose in this parser if the user/pass/auth | ||
229 | portion is missing, and the urlpath has any @ or : characters | ||
230 | in it. A imap mailbox, say, named after the email address of | ||
231 | the person the mail is from: | ||
232 | |||
233 | imap://imap.uniserve.com/alain@qnx.com | ||
234 | |||
235 | Is this required to be % quoted, though? I hope so! | ||
236 | |||
237 | */ | ||
238 | |||
239 | static int | ||
240 | url_parse0 (url_t u, char *name) | ||
241 | { | ||
242 | char *p; /* pointer into name */ | ||
243 | |||
244 | /* reject the obvious */ | ||
245 | if (name == NULL) | ||
246 | return EINVAL; | ||
247 | |||
248 | /* Parse out the SCHEME. */ | ||
249 | p = strchr (name, ':'); | ||
250 | if (p == NULL) | ||
251 | { | ||
252 | return EPARSE; | ||
253 | } | ||
254 | |||
255 | *p++ = 0; | ||
256 | |||
257 | u->scheme = name; | ||
258 | |||
259 | /* RFC 1738, section 2.1, lower the scheme case */ | ||
260 | for ( ; name < p; name++) | ||
261 | *name = tolower(*name); | ||
262 | |||
263 | name = p; | ||
264 | |||
265 | if (strncmp (name, "//", 2) != 0) | ||
266 | return EPARSE; | ||
267 | |||
268 | name += 2; | ||
269 | |||
270 | /* Split into LHS and RHS of the '@', and then parse each side. */ | ||
271 | u->host = strchr (name, '@'); | ||
272 | if (u->host == NULL) | ||
273 | u->host = name; | ||
274 | else | ||
275 | { | ||
276 | /* Parse the LHS into an identification/authentication pair. */ | ||
277 | *u->host++ = 0; | ||
278 | |||
279 | u->user = name; | ||
280 | |||
281 | /* Try to split the user into a: | ||
282 | <user>:<password> | ||
283 | or | ||
284 | <user>;AUTH=<auth> | ||
285 | */ | ||
286 | |||
287 | for (; *name; name++) | ||
288 | { | ||
289 | if (*name == ';') | ||
290 | { | ||
291 | /* Make sure it's the auth token. */ | ||
292 | if (strncasecmp (name + 1, "auth=", 5) == 0) | ||
293 | { | ||
294 | *name++ = 0; | ||
295 | |||
296 | name += 5; | ||
297 | |||
298 | u->auth = name; | ||
299 | |||
300 | break; | ||
301 | } | ||
302 | } | ||
303 | if (*name == ':') | ||
304 | { | ||
305 | *name++ = 0; | ||
306 | u->passwd = name; | ||
307 | break; | ||
308 | } | ||
309 | } | ||
310 | } | ||
311 | |||
312 | /* Parse the host and port from the RHS. */ | ||
313 | p = strchr (u->host, ':'); | ||
314 | |||
315 | if (p) | ||
316 | { | ||
317 | *p++ = 0; | ||
318 | |||
319 | u->port = strtol (p, &p, 10); | ||
320 | |||
321 | /* Check for garbage after the port: we should be on the start | ||
322 | of a path, a query, or at the end of the string. */ | ||
323 | if (*p && strcspn (p, "/?") != 0) | ||
324 | return EPARSE; | ||
325 | } | ||
326 | else | ||
327 | p = u->host + strcspn (u->host, "/?"); | ||
328 | |||
329 | /* Either way, if we're not at a nul, we're at a path or query. */ | ||
330 | if (*p == '?') | ||
331 | { | ||
332 | /* found a query */ | ||
333 | *p++ = 0; | ||
334 | u->query = p; | ||
335 | } | ||
336 | if (*p == '/') | ||
337 | { | ||
338 | /* found a path */ | ||
339 | *p++ = 0; | ||
340 | u->path = p; | ||
341 | } | ||
342 | |||
343 | return 0; | ||
344 | } | ||
345 | |||
84 | int | 346 | int |
85 | url_get_scheme (const url_t url, char *scheme, size_t len, size_t *n) | 347 | url_get_scheme (const url_t url, char *scheme, size_t len, size_t *n) |
86 | { | 348 | { | ... | ... |
-
Please register or sign in to post a comment