Annotation of XML/uri.c, revision 1.24
1.1 daniel 1: /**
2: * uri.c: set of generic URI related routines
3: *
1.2 daniel 4: * Reference: RFC 2396
5: *
6: * See Copyright for the status of this software.
7: *
8: * Daniel.Veillard@w3.org
1.1 daniel 9: */
10:
11: #ifdef WIN32
12: #define INCLUDE_WINSOCK
13: #include "win32config.h"
14: #else
15: #include "config.h"
16: #endif
17:
18: #include <stdio.h>
19: #include <string.h>
20:
1.10 daniel 21: #include <libxml/xmlmemory.h>
22: #include <libxml/uri.h>
1.3 daniel 23:
1.11 daniel 24: /*
1.1 daniel 25: * alpha = lowalpha | upalpha
26: */
27: #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
28:
29:
1.11 daniel 30: /*
1.1 daniel 31: * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
32: * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
33: * "u" | "v" | "w" | "x" | "y" | "z"
34: */
35:
1.4 daniel 36: #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
1.1 daniel 37:
1.11 daniel 38: /*
1.1 daniel 39: * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
40: * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
41: * "U" | "V" | "W" | "X" | "Y" | "Z"
42: */
1.4 daniel 43: #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
1.1 daniel 44:
1.11 daniel 45: /*
1.1 daniel 46: * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
47: */
48:
1.4 daniel 49: #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
1.1 daniel 50:
1.11 daniel 51: /*
1.1 daniel 52: * alphanum = alpha | digit
53: */
54:
55: #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
56:
1.11 daniel 57: /*
58: * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
1.1 daniel 59: * "a" | "b" | "c" | "d" | "e" | "f"
60: */
61:
1.4 daniel 62: #define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
63: (((x) >= 'A') && ((x) <= 'F')))
1.1 daniel 64:
1.11 daniel 65: /*
1.1 daniel 66: * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
67: */
68:
1.4 daniel 69: #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
70: ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
71: ((x) == '(') || ((x) == ')'))
1.1 daniel 72:
73:
1.11 daniel 74: /*
1.1 daniel 75: * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
76: */
77:
1.4 daniel 78: #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
79: ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
80: ((x) == '+') || ((x) == '$') || ((x) == ','))
1.1 daniel 81:
1.11 daniel 82: /*
1.1 daniel 83: * unreserved = alphanum | mark
84: */
85:
86: #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
87:
1.11 daniel 88: /*
1.4 daniel 89: * escaped = "%" hex hex
90: */
91:
92: #define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
93: (IS_HEX((p)[2])))
94:
1.11 daniel 95: /*
1.4 daniel 96: * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
97: * "&" | "=" | "+" | "$" | ","
98: */
99: #define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
100: ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
101: ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
102: ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
103:
1.11 daniel 104: /*
1.4 daniel 105: * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
106: */
107: #define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
108: ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
109: ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
110: ((*(p) == ',')))
111:
1.11 daniel 112: /*
1.4 daniel 113: * rel_segment = 1*( unreserved | escaped |
114: * ";" | "@" | "&" | "=" | "+" | "$" | "," )
115: */
116:
117: #define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
118: ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
119: ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
120: ((*(p) == ',')))
121:
1.11 daniel 122: /*
1.4 daniel 123: * scheme = alpha *( alpha | digit | "+" | "-" | "." )
124: */
125:
126: #define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
127: ((x) == '+') || ((x) == '-') || ((x) == '.'))
128:
1.11 daniel 129: /*
1.4 daniel 130: * reg_name = 1*( unreserved | escaped | "$" | "," |
131: * ";" | ":" | "@" | "&" | "=" | "+" )
132: */
133:
134: #define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
135: ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
136: ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
137: ((*(p) == '=')) || ((*(p) == '+')))
138:
1.11 daniel 139: /*
1.4 daniel 140: * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
141: * "+" | "$" | "," )
142: */
143: #define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
144: ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
145: ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
146: ((*(p) == ',')))
147:
1.11 daniel 148: /*
1.4 daniel 149: * uric = reserved | unreserved | escaped
150: */
151:
152: #define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
153: (IS_RESERVED(*(p))))
154:
1.11 daniel 155: /*
1.4 daniel 156: * Skip to next pointer char, handle escaped sequences
157: */
158:
159: #define NEXT(p) ((*p == '%')? p += 3 : p++)
1.1 daniel 160:
1.11 daniel 161: /*
1.9 daniel 162: * Productions from the spec.
1.1 daniel 163: *
1.9 daniel 164: * authority = server | reg_name
1.4 daniel 165: * reg_name = 1*( unreserved | escaped | "$" | "," |
166: * ";" | ":" | "@" | "&" | "=" | "+" )
1.9 daniel 167: *
168: * path = [ abs_path | opaque_part ]
169: */
1.1 daniel 170:
1.4 daniel 171: /**
172: * xmlCreateURI:
173: *
174: * Simply creates an empty xmlURI
175: *
176: * Returns the new structure or NULL in case of error
177: */
178: xmlURIPtr
179: xmlCreateURI(void) {
180: xmlURIPtr ret;
181:
182: ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
183: if (ret == NULL) {
184: fprintf(stderr, "xmlCreateURI: out of memory\n");
185: return(NULL);
186: }
187: memset(ret, 0, sizeof(xmlURI));
188: return(ret);
189: }
1.1 daniel 190:
1.4 daniel 191: /**
1.7 daniel 192: * xmlSaveUri:
193: * @uri: pointer to an xmlURI
194: *
195: * Save the URI as an escaped string
196: *
197: * Returns a new string (to be deallocated by caller)
198: */
199: xmlChar *
200: xmlSaveUri(xmlURIPtr uri) {
201: xmlChar *ret = NULL;
202: const char *p;
203: int len;
204: int max;
205:
206: if (uri == NULL) return(NULL);
207:
208:
209: max = 80;
1.14 veillard 210: ret = (xmlChar *) xmlMalloc((max + 1) * sizeof(xmlChar));
1.7 daniel 211: if (ret == NULL) {
212: fprintf(stderr, "xmlSaveUri: out of memory\n");
213: return(NULL);
214: }
215: len = 0;
216:
217: if (uri->scheme != NULL) {
218: p = uri->scheme;
219: while (*p != 0) {
220: if (len >= max) {
221: max *= 2;
1.14 veillard 222: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.7 daniel 223: if (ret == NULL) {
224: fprintf(stderr, "xmlSaveUri: out of memory\n");
225: return(NULL);
226: }
227: }
228: ret[len++] = *p++;
229: }
230: if (len >= max) {
231: max *= 2;
1.14 veillard 232: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.7 daniel 233: if (ret == NULL) {
234: fprintf(stderr, "xmlSaveUri: out of memory\n");
235: return(NULL);
236: }
237: }
238: ret[len++] = ':';
239: }
240: if (uri->opaque != NULL) {
241: p = uri->opaque;
242: while (*p != 0) {
243: if (len + 3 >= max) {
244: max *= 2;
1.14 veillard 245: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.7 daniel 246: if (ret == NULL) {
247: fprintf(stderr, "xmlSaveUri: out of memory\n");
248: return(NULL);
249: }
250: }
251: if ((IS_UNRESERVED(*(p))) ||
252: ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||
253: ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||
254: ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
255: ret[len++] = *p++;
256: else {
1.21 veillard 257: int val = *(unsigned char *)p++;
258: int hi = val / 0x10, lo = val % 0x10;
1.7 daniel 259: ret[len++] = '%';
1.21 veillard 260: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
261: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1.7 daniel 262: }
263: }
264: if (len >= max) {
265: max *= 2;
1.14 veillard 266: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.7 daniel 267: if (ret == NULL) {
268: fprintf(stderr, "xmlSaveUri: out of memory\n");
269: return(NULL);
270: }
271: }
272: ret[len++] = 0;
273: } else {
1.9 daniel 274: if (uri->server != NULL) {
275: if (len + 3 >= max) {
276: max *= 2;
1.14 veillard 277: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.9 daniel 278: if (ret == NULL) {
279: fprintf(stderr, "xmlSaveUri: out of memory\n");
280: return(NULL);
281: }
282: }
283: ret[len++] = '/';
284: ret[len++] = '/';
285: if (uri->user != NULL) {
286: p = uri->user;
287: while (*p != 0) {
288: if (len + 3 >= max) {
289: max *= 2;
1.14 veillard 290: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.9 daniel 291: if (ret == NULL) {
292: fprintf(stderr, "xmlSaveUri: out of memory\n");
293: return(NULL);
294: }
295: }
296: if ((IS_UNRESERVED(*(p))) ||
297: ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) ||
298: ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
299: ((*(p) == ',')))
300: ret[len++] = *p++;
301: else {
1.21 veillard 302: int val = *(unsigned char *)p++;
303: int hi = val / 0x10, lo = val % 0x10;
1.9 daniel 304: ret[len++] = '%';
1.21 veillard 305: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
306: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1.9 daniel 307: }
308: }
309: if (len + 3 >= max) {
310: max *= 2;
1.14 veillard 311: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.9 daniel 312: if (ret == NULL) {
313: fprintf(stderr, "xmlSaveUri: out of memory\n");
314: return(NULL);
315: }
316: }
317: ret[len++] = '@';
318: }
319: p = uri->server;
320: while (*p != 0) {
321: if (len >= max) {
322: max *= 2;
1.14 veillard 323: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.9 daniel 324: if (ret == NULL) {
325: fprintf(stderr, "xmlSaveUri: out of memory\n");
326: return(NULL);
327: }
328: }
329: ret[len++] = *p++;
330: }
331: if (uri->port > 0) {
332: if (len + 10 >= max) {
333: max *= 2;
1.14 veillard 334: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.9 daniel 335: if (ret == NULL) {
336: fprintf(stderr, "xmlSaveUri: out of memory\n");
337: return(NULL);
338: }
339: }
340: len += sprintf((char *) &ret[len], ":%d", uri->port);
341: }
342: } else if (uri->authority != NULL) {
1.7 daniel 343: if (len + 3 >= max) {
344: max *= 2;
1.14 veillard 345: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.7 daniel 346: if (ret == NULL) {
347: fprintf(stderr, "xmlSaveUri: out of memory\n");
348: return(NULL);
349: }
350: }
351: ret[len++] = '/';
352: ret[len++] = '/';
353: p = uri->authority;
354: while (*p != 0) {
355: if (len + 3 >= max) {
356: max *= 2;
1.14 veillard 357: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.7 daniel 358: if (ret == NULL) {
359: fprintf(stderr, "xmlSaveUri: out of memory\n");
360: return(NULL);
361: }
362: }
363: if ((IS_UNRESERVED(*(p))) ||
364: ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
365: ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
366: ((*(p) == '=')) || ((*(p) == '+')))
367: ret[len++] = *p++;
368: else {
1.21 veillard 369: int val = *(unsigned char *)p++;
370: int hi = val / 0x10, lo = val % 0x10;
1.7 daniel 371: ret[len++] = '%';
1.21 veillard 372: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
373: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1.7 daniel 374: }
375: }
376: }
377: if (uri->path != NULL) {
378: p = uri->path;
379: while (*p != 0) {
380: if (len + 3 >= max) {
381: max *= 2;
1.14 veillard 382: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.7 daniel 383: if (ret == NULL) {
384: fprintf(stderr, "xmlSaveUri: out of memory\n");
385: return(NULL);
386: }
387: }
388: if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
389: ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
390: ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
391: ((*(p) == ',')))
392: ret[len++] = *p++;
393: else {
1.21 veillard 394: int val = *(unsigned char *)p++;
395: int hi = val / 0x10, lo = val % 0x10;
1.7 daniel 396: ret[len++] = '%';
1.21 veillard 397: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
398: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1.7 daniel 399: }
400: }
401: }
402: if (uri->query != NULL) {
403: if (len + 3 >= max) {
404: max *= 2;
1.14 veillard 405: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.7 daniel 406: if (ret == NULL) {
407: fprintf(stderr, "xmlSaveUri: out of memory\n");
408: return(NULL);
409: }
410: }
411: ret[len++] = '?';
412: p = uri->query;
413: while (*p != 0) {
414: if (len + 3 >= max) {
415: max *= 2;
1.14 veillard 416: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.7 daniel 417: if (ret == NULL) {
418: fprintf(stderr, "xmlSaveUri: out of memory\n");
419: return(NULL);
420: }
421: }
422: if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
423: ret[len++] = *p++;
424: else {
1.21 veillard 425: int val = *(unsigned char *)p++;
426: int hi = val / 0x10, lo = val % 0x10;
1.7 daniel 427: ret[len++] = '%';
1.21 veillard 428: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
429: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1.7 daniel 430: }
431: }
432: }
433: if (uri->fragment != NULL) {
434: if (len + 3 >= max) {
435: max *= 2;
1.14 veillard 436: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.7 daniel 437: if (ret == NULL) {
438: fprintf(stderr, "xmlSaveUri: out of memory\n");
439: return(NULL);
440: }
441: }
442: ret[len++] = '#';
443: p = uri->fragment;
444: while (*p != 0) {
445: if (len + 3 >= max) {
446: max *= 2;
1.14 veillard 447: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.7 daniel 448: if (ret == NULL) {
449: fprintf(stderr, "xmlSaveUri: out of memory\n");
450: return(NULL);
451: }
452: }
453: if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
454: ret[len++] = *p++;
455: else {
1.21 veillard 456: int val = *(unsigned char *)p++;
457: int hi = val / 0x10, lo = val % 0x10;
1.7 daniel 458: ret[len++] = '%';
1.21 veillard 459: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
460: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1.7 daniel 461: }
462: }
463: }
464: if (len >= max) {
465: max *= 2;
1.14 veillard 466: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.7 daniel 467: if (ret == NULL) {
468: fprintf(stderr, "xmlSaveUri: out of memory\n");
469: return(NULL);
470: }
471: }
472: ret[len++] = 0;
473: }
474: return(ret);
475: }
476:
477: /**
1.5 daniel 478: * xmlPrintURI:
479: * @stream: a FILE* for the output
480: * @uri: pointer to an xmlURI
481: *
482: * Prints the URI in the stream @steam.
483: */
484: void
485: xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1.7 daniel 486: xmlChar *out;
1.5 daniel 487:
1.7 daniel 488: out = xmlSaveUri(uri);
489: if (out != NULL) {
490: fprintf(stream, "%s", out);
491: xmlFree(out);
1.5 daniel 492: }
493: }
494:
495: /**
1.4 daniel 496: * xmlCleanURI:
497: * @uri: pointer to an xmlURI
498: *
499: * Make sure the xmlURI struct is free of content
500: */
501: void
502: xmlCleanURI(xmlURIPtr uri) {
503: if (uri == NULL) return;
504:
505: if (uri->scheme != NULL) xmlFree(uri->scheme);
506: uri->scheme = NULL;
507: if (uri->server != NULL) xmlFree(uri->server);
508: uri->server = NULL;
1.9 daniel 509: if (uri->user != NULL) xmlFree(uri->user);
510: uri->user = NULL;
1.4 daniel 511: if (uri->path != NULL) xmlFree(uri->path);
512: uri->path = NULL;
513: if (uri->fragment != NULL) xmlFree(uri->fragment);
514: uri->fragment = NULL;
515: if (uri->opaque != NULL) xmlFree(uri->opaque);
516: uri->opaque = NULL;
1.5 daniel 517: if (uri->authority != NULL) xmlFree(uri->authority);
518: uri->authority = NULL;
519: if (uri->query != NULL) xmlFree(uri->query);
520: uri->query = NULL;
1.4 daniel 521: }
522:
523: /**
524: * xmlFreeURI:
525: * @uri: pointer to an xmlURI
526: *
527: * Free up the xmlURI struct
528: */
529: void
530: xmlFreeURI(xmlURIPtr uri) {
531: if (uri == NULL) return;
532:
533: if (uri->scheme != NULL) xmlFree(uri->scheme);
534: if (uri->server != NULL) xmlFree(uri->server);
1.9 daniel 535: if (uri->user != NULL) xmlFree(uri->user);
1.4 daniel 536: if (uri->path != NULL) xmlFree(uri->path);
537: if (uri->fragment != NULL) xmlFree(uri->fragment);
538: if (uri->opaque != NULL) xmlFree(uri->opaque);
1.5 daniel 539: if (uri->authority != NULL) xmlFree(uri->authority);
540: if (uri->query != NULL) xmlFree(uri->query);
1.4 daniel 541: memset(uri, -1, sizeof(xmlURI));
542: xmlFree(uri);
543: }
544:
545: /**
1.9 daniel 546: * xmlURIUnescapeString:
1.4 daniel 547: * @str: the string to unescape
548: * @len: the lenght in bytes to unescape (or <= 0 to indicate full string)
549: * @target: optionnal destination buffer
550: *
551: * Unescaping routine, does not do validity checks !
1.7 daniel 552: * Output is direct unsigned char translation of %XX values (no encoding)
1.4 daniel 553: *
554: * Returns an copy of the string, but unescaped
555: */
556: char *
1.9 daniel 557: xmlURIUnescapeString(const char *str, int len, char *target) {
1.4 daniel 558: char *ret, *out;
559: const char *in;
560:
561: if (str == NULL)
562: return(NULL);
563: if (len <= 0) len = strlen(str);
1.5 daniel 564: if (len <= 0) return(NULL);
1.4 daniel 565:
566: if (target == NULL) {
567: ret = (char *) xmlMalloc(len + 1);
568: if (ret == NULL) {
1.9 daniel 569: fprintf(stderr, "xmlURIUnescapeString: out of memory\n");
1.4 daniel 570: return(NULL);
571: }
572: } else
573: ret = target;
574: in = str;
575: out = ret;
1.5 daniel 576: while(len > 0) {
1.4 daniel 577: if (*in == '%') {
578: in++;
579: if ((*in >= '0') && (*in <= '9'))
580: *out = (*in - '0');
581: else if ((*in >= 'a') && (*in <= 'f'))
582: *out = (*in - 'a') + 10;
583: else if ((*in >= 'A') && (*in <= 'F'))
584: *out = (*in - 'A') + 10;
585: in++;
586: if ((*in >= '0') && (*in <= '9'))
587: *out = *out * 16 + (*in - '0');
588: else if ((*in >= 'a') && (*in <= 'f'))
589: *out = *out * 16 + (*in - 'a') + 10;
590: else if ((*in >= 'A') && (*in <= 'F'))
591: *out = *out * 16 + (*in - 'A') + 10;
592: in++;
593: len -= 3;
1.7 daniel 594: out++;
1.4 daniel 595: } else {
596: *out++ = *in++;
597: len--;
598: }
599: }
600: *out = 0;
601: return(ret);
602: }
1.1 daniel 603:
604:
1.4 daniel 605: /**
606: * xmlParseURIFragment:
607: * @uri: pointer to an URI structure
608: * @str: pointer to the string to analyze
609: *
610: * Parse an URI fragment string and fills in the appropriate fields
611: * of the @uri structure.
612: *
613: * fragment = *uric
614: *
615: * Returns 0 or the error code
616: */
617: int
618: xmlParseURIFragment(xmlURIPtr uri, const char **str) {
619: const char *cur = *str;
620:
621: if (str == NULL) return(-1);
622:
623: while (IS_URIC(cur)) NEXT(cur);
624: if (uri != NULL) {
625: if (uri->fragment != NULL) xmlFree(uri->fragment);
1.9 daniel 626: uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
1.4 daniel 627: }
628: *str = cur;
629: return(0);
630: }
631:
632: /**
1.5 daniel 633: * xmlParseURIQuery:
634: * @uri: pointer to an URI structure
635: * @str: pointer to the string to analyze
636: *
637: * Parse the query part of an URI
638: *
639: * query = *uric
640: *
641: * Returns 0 or the error code
642: */
643: int
644: xmlParseURIQuery(xmlURIPtr uri, const char **str) {
645: const char *cur = *str;
646:
647: if (str == NULL) return(-1);
648:
649: while (IS_URIC(cur)) NEXT(cur);
650: if (uri != NULL) {
651: if (uri->query != NULL) xmlFree(uri->query);
1.9 daniel 652: uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
1.5 daniel 653: }
654: *str = cur;
655: return(0);
656: }
657:
658: /**
1.4 daniel 659: * xmlParseURIScheme:
660: * @uri: pointer to an URI structure
661: * @str: pointer to the string to analyze
662: *
663: * Parse an URI scheme
664: *
665: * scheme = alpha *( alpha | digit | "+" | "-" | "." )
666: *
667: * Returns 0 or the error code
668: */
669: int
670: xmlParseURIScheme(xmlURIPtr uri, const char **str) {
671: const char *cur;
672:
673: if (str == NULL)
674: return(-1);
675:
676: cur = *str;
677: if (!IS_ALPHA(*cur))
678: return(2);
679: cur++;
680: while (IS_SCHEME(*cur)) cur++;
681: if (uri != NULL) {
682: if (uri->scheme != NULL) xmlFree(uri->scheme);
1.9 daniel 683: uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL); /* !!! strndup */
1.4 daniel 684: }
685: *str = cur;
686: return(0);
687: }
688:
689: /**
690: * xmlParseURIOpaquePart:
691: * @uri: pointer to an URI structure
692: * @str: pointer to the string to analyze
693: *
694: * Parse an URI opaque part
695: *
696: * opaque_part = uric_no_slash *uric
697: *
698: * Returns 0 or the error code
699: */
700: int
701: xmlParseURIOpaquePart(xmlURIPtr uri, const char **str) {
702: const char *cur;
703:
704: if (str == NULL)
705: return(-1);
706:
707: cur = *str;
708: if (!IS_URIC_NO_SLASH(cur)) {
709: return(3);
710: }
711: NEXT(cur);
712: while (IS_URIC(cur)) NEXT(cur);
713: if (uri != NULL) {
1.5 daniel 714: if (uri->opaque != NULL) xmlFree(uri->opaque);
1.9 daniel 715: uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
1.4 daniel 716: }
717: *str = cur;
718: return(0);
719: }
720:
721: /**
1.9 daniel 722: * xmlParseURIServer:
723: * @uri: pointer to an URI structure
724: * @str: pointer to the string to analyze
725: *
726: * Parse a server subpart of an URI, it's a finer grain analysis
727: * of the authority part.
728: *
729: * server = [ [ userinfo "@" ] hostport ]
730: * userinfo = *( unreserved | escaped |
731: * ";" | ":" | "&" | "=" | "+" | "$" | "," )
732: * hostport = host [ ":" port ]
733: * host = hostname | IPv4address
734: * hostname = *( domainlabel "." ) toplabel [ "." ]
735: * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
736: * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
737: * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
738: * port = *digit
739: *
740: * Returns 0 or the error code
741: */
742: int
743: xmlParseURIServer(xmlURIPtr uri, const char **str) {
744: const char *cur;
745: const char *host, *tmp;
746:
747: if (str == NULL)
748: return(-1);
749:
750: cur = *str;
751:
752: /*
753: * is there an userinfo ?
754: */
755: while (IS_USERINFO(cur)) NEXT(cur);
756: if (*cur == '@') {
757: if (uri != NULL) {
758: if (uri->user != NULL) xmlFree(uri->user);
759: uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
760: }
761: cur++;
762: } else {
763: if (uri != NULL) {
764: if (uri->user != NULL) xmlFree(uri->user);
765: uri->user = NULL;
766: }
767: cur = *str;
768: }
769: /*
770: * host part of hostport can derive either an IPV4 address
771: * or an unresolved name. Check the IP first, it easier to detect
772: * errors if wrong one
773: */
774: host = cur;
775: if (IS_DIGIT(*cur)) {
776: while(IS_DIGIT(*cur)) cur++;
777: if (*cur != '.')
778: goto host_name;
779: cur++;
780: if (!IS_DIGIT(*cur))
781: goto host_name;
782: while(IS_DIGIT(*cur)) cur++;
783: if (*cur != '.')
784: goto host_name;
785: cur++;
786: if (!IS_DIGIT(*cur))
787: goto host_name;
788: while(IS_DIGIT(*cur)) cur++;
789: if (*cur != '.')
790: goto host_name;
791: cur++;
792: if (!IS_DIGIT(*cur))
793: goto host_name;
794: while(IS_DIGIT(*cur)) cur++;
795: if (uri != NULL) {
796: if (uri->authority != NULL) xmlFree(uri->authority);
797: uri->authority = NULL;
798: if (uri->server != NULL) xmlFree(uri->server);
799: uri->server = xmlURIUnescapeString(host, cur - host, NULL);
800: }
801: goto host_done;
802: }
803: host_name:
804: /*
805: * the hostname production as-is is a parser nightmare.
806: * simplify it to
807: * hostname = *( domainlabel "." ) domainlabel [ "." ]
808: * and just make sure the last label starts with a non numeric char.
809: */
810: if (!IS_ALPHANUM(*cur))
811: return(6);
812: while (IS_ALPHANUM(*cur)) {
813: while ((IS_ALPHANUM(*cur)) || (*cur == '-')) cur++;
814: if (*cur == '.')
815: cur++;
816: }
817: tmp = cur;
818: tmp--;
819: while (IS_ALPHANUM(*tmp) && (*tmp != '.') && (tmp >= host)) tmp--;
820: tmp++;
821: if (!IS_ALPHA(*tmp))
822: return(7);
823: if (uri != NULL) {
824: if (uri->authority != NULL) xmlFree(uri->authority);
825: uri->authority = NULL;
826: if (uri->server != NULL) xmlFree(uri->server);
827: uri->server = xmlURIUnescapeString(host, cur - host, NULL);
828: }
829:
830: host_done:
831:
832: /*
833: * finish by checking for a port presence.
834: */
835: if (*cur == ':') {
836: cur++;
837: if (IS_DIGIT(*cur)) {
838: if (uri != NULL)
839: uri->port = 0;
840: while (IS_DIGIT(*cur)) {
841: if (uri != NULL)
842: uri->port = uri->port * 10 + (*cur - '0');
843: cur++;
844: }
845: }
846: }
847: *str = cur;
848: return(0);
849: }
850:
851: /**
1.6 daniel 852: * xmlParseURIRelSegment:
853: * @uri: pointer to an URI structure
854: * @str: pointer to the string to analyze
855: *
856: * Parse an URI relative segment
857: *
858: * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
859: * "+" | "$" | "," )
860: *
861: * Returns 0 or the error code
862: */
863: int
864: xmlParseURIRelSegment(xmlURIPtr uri, const char **str) {
865: const char *cur;
866:
867: if (str == NULL)
868: return(-1);
869:
870: cur = *str;
871: if (!IS_SEGMENT(cur)) {
872: return(3);
873: }
874: NEXT(cur);
875: while (IS_SEGMENT(cur)) NEXT(cur);
876: if (uri != NULL) {
877: if (uri->path != NULL) xmlFree(uri->path);
1.9 daniel 878: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
1.6 daniel 879: }
880: *str = cur;
881: return(0);
882: }
883:
884: /**
1.4 daniel 885: * xmlParseURIPathSegments:
886: * @uri: pointer to an URI structure
887: * @str: pointer to the string to analyze
888: * @slash: should we add a leading slash
889: *
890: * Parse an URI set of path segments
891: *
892: * path_segments = segment *( "/" segment )
893: * segment = *pchar *( ";" param )
894: * param = *pchar
895: *
896: * Returns 0 or the error code
897: */
898: int
899: xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash) {
900: const char *cur;
901:
902: if (str == NULL)
903: return(-1);
904:
905: cur = *str;
906:
907: do {
908: while (IS_PCHAR(cur)) NEXT(cur);
909: if (*cur == ';') {
910: cur++;
911: while (IS_PCHAR(cur)) NEXT(cur);
912: }
913: if (*cur != '/') break;
914: cur++;
915: } while (1);
916: if (uri != NULL) {
1.5 daniel 917: int len, len2 = 0;
918: char *path;
1.4 daniel 919:
920: /*
921: * Concat the set of path segments to the current path
922: */
1.5 daniel 923: len = cur - *str;
924: if (slash)
925: len++;
926:
1.4 daniel 927: if (uri->path != NULL) {
1.5 daniel 928: len2 = strlen(uri->path);
929: len += len2;
930: }
931: path = (char *) xmlMalloc(len + 1);
932: if (path == NULL) {
933: fprintf(stderr, "xmlParseURIPathSegments: out of memory\n");
934: *str = cur;
935: return(-1);
1.4 daniel 936: }
1.5 daniel 937: if (uri->path != NULL)
938: memcpy(path, uri->path, len2);
1.6 daniel 939: if (slash) {
940: path[len2] = '/';
941: len2++;
942: }
1.16 veillard 943: path[len2] = 0;
944: if (cur - *str > 0)
945: xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1.5 daniel 946: if (uri->path != NULL)
947: xmlFree(uri->path);
948: uri->path = path;
949: }
950: *str = cur;
951: return(0);
952: }
953:
954: /**
955: * xmlParseURIAuthority:
956: * @uri: pointer to an URI structure
957: * @str: pointer to the string to analyze
958: *
959: * Parse the authority part of an URI.
960: *
961: * authority = server | reg_name
962: * server = [ [ userinfo "@" ] hostport ]
963: * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
964: * "@" | "&" | "=" | "+" )
965: *
966: * Note : this is completely ambiguous since reg_name is allowed to
967: * use the full set of chars in use by server:
968: *
969: * 3.2.1. Registry-based Naming Authority
970: *
971: * The structure of a registry-based naming authority is specific
972: * to the URI scheme, but constrained to the allowed characters
973: * for an authority component.
974: *
975: * Returns 0 or the error code
976: */
977: int
978: xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
979: const char *cur;
1.9 daniel 980: int ret;
1.5 daniel 981:
982: if (str == NULL)
983: return(-1);
984:
985: cur = *str;
1.9 daniel 986:
987: /*
988: * try first to parse it as a server string.
989: */
990: ret = xmlParseURIServer(uri, str);
991: if (ret == 0)
992: return(0);
993:
994: /*
995: * failed, fallback to reg_name
996: */
1.5 daniel 997: if (!IS_REG_NAME(cur)) {
998: return(5);
999: }
1000: NEXT(cur);
1001: while (IS_REG_NAME(cur)) NEXT(cur);
1002: if (uri != NULL) {
1.9 daniel 1003: if (uri->server != NULL) xmlFree(uri->server);
1004: uri->server = NULL;
1005: if (uri->user != NULL) xmlFree(uri->user);
1006: uri->user = NULL;
1.5 daniel 1007: if (uri->authority != NULL) xmlFree(uri->authority);
1.9 daniel 1008: uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
1.4 daniel 1009: }
1010: *str = cur;
1011: return(0);
1012: }
1013:
1014: /**
1015: * xmlParseURIHierPart:
1016: * @uri: pointer to an URI structure
1017: * @str: pointer to the string to analyze
1018: *
1019: * Parse an URI hirarchical part
1020: *
1021: * hier_part = ( net_path | abs_path ) [ "?" query ]
1022: * abs_path = "/" path_segments
1023: * net_path = "//" authority [ abs_path ]
1024: *
1025: * Returns 0 or the error code
1026: */
1027: int
1028: xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1029: int ret;
1030: const char *cur;
1031:
1032: if (str == NULL)
1033: return(-1);
1034:
1035: cur = *str;
1036:
1037: if ((cur[0] == '/') && (cur[1] == '/')) {
1038: cur += 2;
1039: ret = xmlParseURIAuthority(uri, &cur);
1.5 daniel 1040: if (ret != 0)
1.4 daniel 1041: return(ret);
1042: if (cur[0] == '/') {
1043: cur++;
1044: ret = xmlParseURIPathSegments(uri, &cur, 1);
1045: }
1046: } else if (cur[0] == '/') {
1047: cur++;
1048: ret = xmlParseURIPathSegments(uri, &cur, 1);
1049: } else {
1050: return(4);
1051: }
1052: if (ret != 0)
1053: return(ret);
1054: if (*cur == '?') {
1055: cur++;
1056: ret = xmlParseURIQuery(uri, &cur);
1057: if (ret != 0)
1058: return(ret);
1059: }
1060: *str = cur;
1061: return(0);
1062: }
1063:
1064: /**
1065: * xmlParseAbsoluteURI:
1066: * @uri: pointer to an URI structure
1067: * @str: pointer to the string to analyze
1068: *
1069: * Parse an URI reference string and fills in the appropriate fields
1070: * of the @uri structure
1071: *
1072: * absoluteURI = scheme ":" ( hier_part | opaque_part )
1073: *
1074: * Returns 0 or the error code
1075: */
1076: int
1077: xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1078: int ret;
1079:
1080: if (str == NULL)
1081: return(-1);
1082:
1083: ret = xmlParseURIScheme(uri, str);
1084: if (ret != 0) return(ret);
1085: if (**str != ':')
1086: return(1);
1087: (*str)++;
1088: if (**str == '/')
1089: return(xmlParseURIHierPart(uri, str));
1090: return(xmlParseURIOpaquePart(uri, str));
1091: }
1092:
1093: /**
1.5 daniel 1094: * xmlParseRelativeURI:
1095: * @uri: pointer to an URI structure
1096: * @str: pointer to the string to analyze
1097: *
1098: * Parse an relative URI string and fills in the appropriate fields
1099: * of the @uri structure
1100: *
1101: * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1.6 daniel 1102: * abs_path = "/" path_segments
1103: * net_path = "//" authority [ abs_path ]
1104: * rel_path = rel_segment [ abs_path ]
1.5 daniel 1105: *
1106: * Returns 0 or the error code
1107: */
1108: int
1109: xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1110: int ret = 0;
1.6 daniel 1111: const char *cur;
1.5 daniel 1112:
1113: if (str == NULL)
1114: return(-1);
1115:
1.6 daniel 1116: cur = *str;
1117: if ((cur[0] == '/') && (cur[1] == '/')) {
1118: cur += 2;
1119: ret = xmlParseURIAuthority(uri, &cur);
1120: if (ret != 0)
1121: return(ret);
1122: if (cur[0] == '/') {
1123: cur++;
1124: ret = xmlParseURIPathSegments(uri, &cur, 1);
1125: }
1126: } else if (cur[0] == '/') {
1127: cur++;
1128: ret = xmlParseURIPathSegments(uri, &cur, 1);
1.17 veillard 1129: } else if (cur[0] != '#' && cur[0] != '?') {
1.6 daniel 1130: ret = xmlParseURIRelSegment(uri, &cur);
1131: if (ret != 0)
1132: return(ret);
1133: if (cur[0] == '/') {
1134: cur++;
1135: ret = xmlParseURIPathSegments(uri, &cur, 1);
1136: }
1137: }
1138: if (ret != 0)
1139: return(ret);
1140: if (*cur == '?') {
1141: cur++;
1142: ret = xmlParseURIQuery(uri, &cur);
1143: if (ret != 0)
1144: return(ret);
1145: }
1146: *str = cur;
1.5 daniel 1147: return(ret);
1148: }
1149:
1150: /**
1.4 daniel 1151: * xmlParseURIReference:
1152: * @uri: pointer to an URI structure
1153: * @str: the string to analyze
1154: *
1155: * Parse an URI reference string and fills in the appropriate fields
1156: * of the @uri structure
1157: *
1158: * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1159: *
1160: * Returns 0 or the error code
1161: */
1162: int
1163: xmlParseURIReference(xmlURIPtr uri, const char *str) {
1164: int ret;
1165: const char *tmp = str;
1166:
1167: if (str == NULL)
1168: return(-1);
1169: xmlCleanURI(uri);
1170:
1171: /*
1172: * Try first to parse aboslute refs, then fallback to relative if
1173: * it fails.
1.1 daniel 1174: */
1.4 daniel 1175: ret = xmlParseAbsoluteURI(uri, &str);
1176: if (ret != 0) {
1177: xmlCleanURI(uri);
1178: str = tmp;
1.5 daniel 1179: ret = xmlParseRelativeURI(uri, &str);
1.4 daniel 1180: }
1181: if (ret != 0) {
1182: xmlCleanURI(uri);
1183: return(ret);
1184: }
1185:
1186: if (*str == '#') {
1187: str++;
1188: ret = xmlParseURIFragment(uri, &str);
1189: if (ret != 0) return(ret);
1190: }
1191: if (*str != 0) {
1192: xmlCleanURI(uri);
1193: return(1);
1194: }
1195: return(0);
1196: }
1.2 daniel 1197:
1198: /**
1.12 daniel 1199: * xmlParseURI:
1200: * @str: the URI string to analyze
1201: *
1202: * Parse an URI
1203: *
1204: * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1205: *
1206: * Returns a newly build xmlURIPtr or NULL in case of error
1207: */
1208: xmlURIPtr
1209: xmlParseURI(const char *str) {
1210: xmlURIPtr uri;
1211: int ret;
1212:
1213: if (str == NULL)
1214: return(NULL);
1215: uri = xmlCreateURI();
1216: if (uri != NULL) {
1217: ret = xmlParseURIReference(uri, str);
1218: if (ret) {
1219: xmlFreeURI(uri);
1220: return(NULL);
1221: }
1222: }
1223: return(uri);
1224: }
1225:
1226: /**
1.8 daniel 1227: * xmlNormalizeURIPath:
1228: * @path: pointer to the path string
1229: *
1230: * applies the 5 normalization steps to a path string
1231: * Normalization occurs directly on the string, no new allocation is done
1232: *
1233: * Returns 0 or an error code
1234: */
1235: int
1236: xmlNormalizeURIPath(char *path) {
1237: int cur, out;
1238:
1239: if (path == NULL)
1240: return(-1);
1241: cur = 0;
1242: out = 0;
1243: while ((path[cur] != 0) && (path[cur] != '/')) cur++;
1244: if (path[cur] == 0)
1245: return(0);
1246:
1247: /* we are positionned at the beginning of the first segment */
1248: cur++;
1249: out = cur;
1250:
1251: /*
1252: * Analyze each segment in sequence.
1253: */
1254: while (path[cur] != 0) {
1255: /*
1256: * c) All occurrences of "./", where "." is a complete path segment,
1257: * are removed from the buffer string.
1258: */
1259: if ((path[cur] == '.') && (path[cur + 1] == '/')) {
1260: cur += 2;
1261: continue;
1262: }
1263:
1264: /*
1265: * d) If the buffer string ends with "." as a complete path segment,
1266: * that "." is removed.
1267: */
1268: if ((path[cur] == '.') && (path[cur + 1] == 0)) {
1269: path[out] = 0;
1270: break;
1271: }
1272:
1273: /* read the segment */
1274: while ((path[cur] != 0) && (path[cur] != '/')) {
1275: path[out++] = path[cur++];
1276: }
1277: path[out++] = path[cur];
1278: if (path[cur] != 0) {
1279: cur++;
1280: }
1281: }
1282:
1283: cur = 0;
1284: out = 0;
1285: while ((path[cur] != 0) && (path[cur] != '/')) cur++;
1286: if (path[cur] == 0)
1287: return(0);
1288: /* we are positionned at the beginning of the first segment */
1289: cur++;
1290: out = cur;
1291: /*
1292: * Analyze each segment in sequence.
1293: */
1294: while (path[cur] != 0) {
1295: /*
1296: * e) All occurrences of "<segment>/../", where <segment> is a
1297: * complete path segment not equal to "..", are removed from the
1298: * buffer string. Removal of these path segments is performed
1299: * iteratively, removing the leftmost matching pattern on each
1300: * iteration, until no matching pattern remains.
1301: */
1302: if ((cur > 1) && (out > 1) &&
1303: (path[cur] == '/') && (path[cur + 1] == '.') &&
1304: (path[cur + 2] == '.') && (path[cur + 3] == '/') &&
1305: ((path[out] != '.') || (path[out - 1] != '.') ||
1306: (path[out - 2] != '/'))) {
1307: cur += 3;
1308: out --;
1309: while ((out > 0) && (path[out] != '/')) { out --; }
1310: path[out] = 0;
1311: continue;
1312: }
1313:
1314: /*
1315: * f) If the buffer string ends with "<segment>/..", where <segment>
1316: * is a complete path segment not equal to "..", that
1317: * "<segment>/.." is removed.
1318: */
1319: if ((path[cur] == '/') && (path[cur + 1] == '.') &&
1320: (path[cur + 2] == '.') && (path[cur + 3] == 0) &&
1321: ((path[out] != '.') || (path[out - 1] != '.') ||
1322: (path[out - 2] != '/'))) {
1323: cur += 4;
1324: out --;
1325: while ((out > 0) && (path[out - 1] != '/')) { out --; }
1326: path[out] = 0;
1327: continue;
1328: }
1329:
1330: path[out++] = path[cur++]; /* / or 0 */
1331: }
1332: path[out] = 0;
1333:
1334: /*
1335: * g) If the resulting buffer string still begins with one or more
1336: * complete path segments of "..", then the reference is
1337: * considered to be in error. Implementations may handle this
1338: * error by retaining these components in the resolved path (i.e.,
1339: * treating them as part of the final URI), by removing them from
1340: * the resolved path (i.e., discarding relative levels above the
1341: * root), or by avoiding traversal of the reference.
1342: *
1343: * We discard them from the final path.
1344: */
1345: cur = 0;
1346: while ((path[cur] == '/') && (path[cur + 1] == '.') &&
1347: (path[cur + 2] == '.'))
1348: cur += 3;
1349: if (cur != 0) {
1350: out = 0;
1351: while (path[cur] != 0) path[out++] = path[cur++];
1352: path[out] = 0;
1353: }
1354: return(0);
1355: }
1356:
1357: /**
1.2 daniel 1358: * xmlBuildURI:
1359: * @URI: the URI instance found in the document
1.4 daniel 1360: * @base: the base value
1.2 daniel 1361: *
1362: * Computes he final URI of the reference done by checking that
1.4 daniel 1363: * the given URI is valid, and building the final URI using the
1.6 daniel 1364: * base URI. This is processed according to section 5.2 of the
1365: * RFC 2396
1366: *
1367: * 5.2. Resolving Relative References to Absolute Form
1.2 daniel 1368: *
1.7 daniel 1369: * Returns a new URI string (to be freed by the caller) or NULL in case
1370: * of error.
1.2 daniel 1371: */
1372: xmlChar *
1.4 daniel 1373: xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1.7 daniel 1374: xmlChar *val = NULL;
1.24 ! veillard 1375: int ret, ret2, len, index, cur, out;
1.7 daniel 1376: xmlURIPtr ref = NULL;
1377: xmlURIPtr bas = NULL;
1378: xmlURIPtr res = NULL;
1379:
1380: /*
1381: * 1) The URI reference is parsed into the potential four components and
1382: * fragment identifier, as described in Section 4.3.
1.18 veillard 1383: *
1384: * NOTE that a completely empty URI is treated by modern browsers
1385: * as a reference to "." rather than as a synonym for the current
1386: * URI. Should we do that here?
1.7 daniel 1387: */
1.24 ! veillard 1388: if (URI == NULL)
! 1389: ret = -1;
! 1390: else {
! 1391: ref = xmlCreateURI();
! 1392: if (ref == NULL)
! 1393: goto done;
! 1394: if (*URI)
! 1395: ret = xmlParseURIReference(ref, (const char *) URI);
! 1396: else
! 1397: ret = -1;
! 1398: }
! 1399: if (base == NULL)
! 1400: ret2 = -1;
! 1401: else {
! 1402: bas = xmlCreateURI();
! 1403: if (bas == NULL)
1.17 veillard 1404: goto done;
1.24 ! veillard 1405: ret2 = xmlParseURIReference(bas, (const char *) base);
1.17 veillard 1406: }
1.24 ! veillard 1407: if ((ret != 0) && (ret2 != 0))
1.8 daniel 1408: goto done;
1.24 ! veillard 1409: if (ret != 0) {
! 1410: /*
! 1411: * the base fragment must be ignored
! 1412: */
! 1413: if (bas->fragment != NULL) {
! 1414: xmlFree(bas->fragment);
! 1415: bas->fragment = NULL;
! 1416: }
! 1417: val = xmlSaveUri(bas);
1.7 daniel 1418: goto done;
1.24 ! veillard 1419: }
! 1420: if (ret2 != 0) {
! 1421: val = xmlSaveUri(ref);
! 1422: goto done;
! 1423: }
! 1424:
1.7 daniel 1425:
1426: /*
1427: * 2) If the path component is empty and the scheme, authority, and
1428: * query components are undefined, then it is a reference to the
1.18 veillard 1429: * current document and we are done. Otherwise, the reference URI's
1430: * query and fragment components are defined as found (or not found)
1431: * within the URI reference and not inherited from the base URI.
1.17 veillard 1432: *
1.18 veillard 1433: * NOTE that in modern browsers, the parsing differs from the above
1434: * in the following aspect: the query component is allowed to be
1435: * defined while still treating this as a reference to the current
1436: * document.
1.7 daniel 1437: */
1438: res = xmlCreateURI();
1439: if (res == NULL)
1440: goto done;
1441: if ((ref->scheme == NULL) && (ref->path == NULL) &&
1.18 veillard 1442: ((ref->authority == NULL) && (ref->server == NULL))) {
1443: if (bas->scheme != NULL)
1444: res->scheme = xmlMemStrdup(bas->scheme);
1445: if (bas->authority != NULL)
1446: res->authority = xmlMemStrdup(bas->authority);
1447: else if (bas->server != NULL) {
1448: res->server = xmlMemStrdup(bas->server);
1449: if (bas->user != NULL)
1450: res->user = xmlMemStrdup(bas->user);
1451: res->port = bas->port;
1452: }
1453: if (bas->path != NULL)
1454: res->path = xmlMemStrdup(bas->path);
1455: if (ref->query != NULL)
1456: res->query = xmlMemStrdup(ref->query);
1457: else if (bas->query != NULL)
1458: res->query = xmlMemStrdup(bas->query);
1459: if (ref->fragment != NULL)
1460: res->fragment = xmlMemStrdup(ref->fragment);
1461: goto step_7;
1.7 daniel 1462: }
1.17 veillard 1463:
1.18 veillard 1464: if (ref->query != NULL)
1465: res->query = xmlMemStrdup(ref->query);
1466: if (ref->fragment != NULL)
1467: res->fragment = xmlMemStrdup(ref->fragment);
1.7 daniel 1468:
1469: /*
1470: * 3) If the scheme component is defined, indicating that the reference
1471: * starts with a scheme name, then the reference is interpreted as an
1472: * absolute URI and we are done. Otherwise, the reference URI's
1473: * scheme is inherited from the base URI's scheme component.
1474: */
1475: if (ref->scheme != NULL) {
1476: val = xmlSaveUri(ref);
1477: goto done;
1478: }
1.13 daniel 1479: if (bas->scheme != NULL)
1480: res->scheme = xmlMemStrdup(bas->scheme);
1.7 daniel 1481:
1482: /*
1483: * 4) If the authority component is defined, then the reference is a
1484: * network-path and we skip to step 7. Otherwise, the reference
1485: * URI's authority is inherited from the base URI's authority
1486: * component, which will also be undefined if the URI scheme does not
1487: * use an authority component.
1488: */
1.9 daniel 1489: if ((ref->authority != NULL) || (ref->server != NULL)) {
1490: if (ref->authority != NULL)
1491: res->authority = xmlMemStrdup(ref->authority);
1492: else {
1493: res->server = xmlMemStrdup(ref->server);
1494: if (ref->user != NULL)
1495: res->user = xmlMemStrdup(ref->user);
1496: res->port = ref->port;
1497: }
1.8 daniel 1498: if (ref->path != NULL)
1499: res->path = xmlMemStrdup(ref->path);
1.7 daniel 1500: goto step_7;
1.8 daniel 1501: }
1.7 daniel 1502: if (bas->authority != NULL)
1503: res->authority = xmlMemStrdup(bas->authority);
1.9 daniel 1504: else if (bas->server != NULL) {
1505: res->server = xmlMemStrdup(bas->server);
1506: if (bas->user != NULL)
1507: res->user = xmlMemStrdup(bas->user);
1508: res->port = bas->port;
1509: }
1.7 daniel 1510:
1511: /*
1512: * 5) If the path component begins with a slash character ("/"), then
1513: * the reference is an absolute-path and we skip to step 7.
1.18 veillard 1514: */
1515: if ((ref->path != NULL) && (ref->path[0] == '/')) {
1.8 daniel 1516: res->path = xmlMemStrdup(ref->path);
1.7 daniel 1517: goto step_7;
1.8 daniel 1518: }
1.7 daniel 1519:
1520:
1521: /*
1522: * 6) If this step is reached, then we are resolving a relative-path
1523: * reference. The relative path needs to be merged with the base
1524: * URI's path. Although there are many ways to do this, we will
1525: * describe a simple method using a separate string buffer.
1.8 daniel 1526: *
1527: * Allocate a buffer large enough for the result string.
1.7 daniel 1528: */
1529: len = 2; /* extra / and 0 */
1530: if (ref->path != NULL)
1531: len += strlen(ref->path);
1532: if (bas->path != NULL)
1533: len += strlen(bas->path);
1.8 daniel 1534: res->path = (char *) xmlMalloc(len);
1535: if (res->path == NULL) {
1536: fprintf(stderr, "xmlBuildURI: out of memory\n");
1537: goto done;
1538: }
1539: res->path[0] = 0;
1540:
1541: /*
1542: * a) All but the last segment of the base URI's path component is
1543: * copied to the buffer. In other words, any characters after the
1544: * last (right-most) slash character, if any, are excluded.
1545: */
1546: cur = 0;
1547: out = 0;
1548: if (bas->path != NULL) {
1549: while (bas->path[cur] != 0) {
1550: while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
1551: cur++;
1552: if (bas->path[cur] == 0)
1553: break;
1554:
1555: cur++;
1556: while (out < cur) {
1557: res->path[out] = bas->path[out];
1558: out++;
1559: }
1560: }
1561: }
1562: res->path[out] = 0;
1563:
1564: /*
1565: * b) The reference's path component is appended to the buffer
1566: * string.
1567: */
1.22 veillard 1568: if (ref->path != NULL && ref->path[0] != 0) {
1.8 daniel 1569: index = 0;
1.15 veillard 1570: /*
1571: * Ensure the path includes a '/'
1572: */
1.23 veillard 1573: if ((out == 0) && (bas->server != NULL))
1.15 veillard 1574: res->path[out++] = '/';
1.8 daniel 1575: while (ref->path[index] != 0) {
1576: res->path[out++] = ref->path[index++];
1577: }
1578: }
1579: res->path[out] = 0;
1580:
1581: /*
1582: * Steps c) to h) are really path normalization steps
1583: */
1584: xmlNormalizeURIPath(res->path);
1585:
1.7 daniel 1586: step_7:
1587:
1.8 daniel 1588: /*
1589: * 7) The resulting URI components, including any inherited from the
1590: * base URI, are recombined to give the absolute form of the URI
1591: * reference.
1592: */
1593: val = xmlSaveUri(res);
1594:
1.7 daniel 1595: done:
1596: if (ref != NULL)
1597: xmlFreeURI(ref);
1.19 veillard 1598: if (bas != NULL)
1.7 daniel 1599: xmlFreeURI(bas);
1600: if (res != NULL)
1601: xmlFreeURI(res);
1602: return(val);
1.2 daniel 1603: }
1.5 daniel 1604:
1605:
Webmaster