Annotation of XML/uri.c, revision 1.28
1.1 daniel 1: /**
2: * uri.c: set of generic URI related routines
3: *
1.2 daniel 4: * Reference: RFC 2396
5: *
6: * See Copyright for the status of this software.
7: *
8: * Daniel.Veillard@w3.org
1.1 daniel 9: */
10:
11: #ifdef WIN32
12: #define INCLUDE_WINSOCK
13: #include "win32config.h"
14: #else
15: #include "config.h"
16: #endif
17:
18: #include <stdio.h>
19: #include <string.h>
20:
1.10 daniel 21: #include <libxml/xmlmemory.h>
22: #include <libxml/uri.h>
1.27 veillard 23: #include <libxml/xmlerror.h>
1.3 daniel 24:
1.11 daniel 25: /*
1.1 daniel 26: * alpha = lowalpha | upalpha
27: */
28: #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
29:
30:
1.11 daniel 31: /*
1.1 daniel 32: * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
33: * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
34: * "u" | "v" | "w" | "x" | "y" | "z"
35: */
36:
1.4 daniel 37: #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
1.1 daniel 38:
1.11 daniel 39: /*
1.1 daniel 40: * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
41: * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
42: * "U" | "V" | "W" | "X" | "Y" | "Z"
43: */
1.4 daniel 44: #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
1.1 daniel 45:
1.11 daniel 46: /*
1.1 daniel 47: * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
48: */
49:
1.4 daniel 50: #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
1.1 daniel 51:
1.11 daniel 52: /*
1.1 daniel 53: * alphanum = alpha | digit
54: */
55:
56: #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
57:
1.11 daniel 58: /*
59: * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
1.1 daniel 60: * "a" | "b" | "c" | "d" | "e" | "f"
61: */
62:
1.4 daniel 63: #define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
64: (((x) >= 'A') && ((x) <= 'F')))
1.1 daniel 65:
1.11 daniel 66: /*
1.1 daniel 67: * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
68: */
69:
1.4 daniel 70: #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
71: ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
72: ((x) == '(') || ((x) == ')'))
1.1 daniel 73:
74:
1.11 daniel 75: /*
1.1 daniel 76: * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
77: */
78:
1.4 daniel 79: #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
80: ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
81: ((x) == '+') || ((x) == '$') || ((x) == ','))
1.1 daniel 82:
1.11 daniel 83: /*
1.1 daniel 84: * unreserved = alphanum | mark
85: */
86:
87: #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
88:
1.11 daniel 89: /*
1.4 daniel 90: * escaped = "%" hex hex
91: */
92:
93: #define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
94: (IS_HEX((p)[2])))
95:
1.11 daniel 96: /*
1.4 daniel 97: * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
98: * "&" | "=" | "+" | "$" | ","
99: */
100: #define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
101: ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
102: ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
103: ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
104:
1.11 daniel 105: /*
1.4 daniel 106: * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
107: */
108: #define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
109: ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
110: ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
111: ((*(p) == ',')))
112:
1.11 daniel 113: /*
1.4 daniel 114: * rel_segment = 1*( unreserved | escaped |
115: * ";" | "@" | "&" | "=" | "+" | "$" | "," )
116: */
117:
118: #define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
119: ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
120: ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
121: ((*(p) == ',')))
122:
1.11 daniel 123: /*
1.4 daniel 124: * scheme = alpha *( alpha | digit | "+" | "-" | "." )
125: */
126:
127: #define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
128: ((x) == '+') || ((x) == '-') || ((x) == '.'))
129:
1.11 daniel 130: /*
1.4 daniel 131: * reg_name = 1*( unreserved | escaped | "$" | "," |
132: * ";" | ":" | "@" | "&" | "=" | "+" )
133: */
134:
135: #define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
136: ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
137: ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
138: ((*(p) == '=')) || ((*(p) == '+')))
139:
1.11 daniel 140: /*
1.4 daniel 141: * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
142: * "+" | "$" | "," )
143: */
144: #define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
145: ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
146: ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
147: ((*(p) == ',')))
148:
1.11 daniel 149: /*
1.4 daniel 150: * uric = reserved | unreserved | escaped
151: */
152:
153: #define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
154: (IS_RESERVED(*(p))))
155:
1.11 daniel 156: /*
1.4 daniel 157: * Skip to next pointer char, handle escaped sequences
158: */
159:
160: #define NEXT(p) ((*p == '%')? p += 3 : p++)
1.1 daniel 161:
1.11 daniel 162: /*
1.9 daniel 163: * Productions from the spec.
1.1 daniel 164: *
1.9 daniel 165: * authority = server | reg_name
1.4 daniel 166: * reg_name = 1*( unreserved | escaped | "$" | "," |
167: * ";" | ":" | "@" | "&" | "=" | "+" )
1.9 daniel 168: *
169: * path = [ abs_path | opaque_part ]
170: */
1.1 daniel 171:
1.4 daniel 172: /**
173: * xmlCreateURI:
174: *
175: * Simply creates an empty xmlURI
176: *
177: * Returns the new structure or NULL in case of error
178: */
179: xmlURIPtr
180: xmlCreateURI(void) {
181: xmlURIPtr ret;
182:
183: ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
184: if (ret == NULL) {
1.27 veillard 185: xmlGenericError(xmlGenericErrorContext,
186: "xmlCreateURI: out of memory\n");
1.4 daniel 187: return(NULL);
188: }
189: memset(ret, 0, sizeof(xmlURI));
190: return(ret);
191: }
1.1 daniel 192:
1.4 daniel 193: /**
1.7 daniel 194: * xmlSaveUri:
195: * @uri: pointer to an xmlURI
196: *
197: * Save the URI as an escaped string
198: *
199: * Returns a new string (to be deallocated by caller)
200: */
201: xmlChar *
202: xmlSaveUri(xmlURIPtr uri) {
203: xmlChar *ret = NULL;
204: const char *p;
205: int len;
206: int max;
207:
208: if (uri == NULL) return(NULL);
209:
210:
211: max = 80;
1.14 veillard 212: ret = (xmlChar *) xmlMalloc((max + 1) * sizeof(xmlChar));
1.7 daniel 213: if (ret == NULL) {
1.27 veillard 214: xmlGenericError(xmlGenericErrorContext,
215: "xmlSaveUri: out of memory\n");
1.7 daniel 216: return(NULL);
217: }
218: len = 0;
219:
220: if (uri->scheme != NULL) {
221: p = uri->scheme;
222: while (*p != 0) {
223: if (len >= max) {
224: max *= 2;
1.14 veillard 225: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.7 daniel 226: if (ret == NULL) {
1.27 veillard 227: xmlGenericError(xmlGenericErrorContext,
228: "xmlSaveUri: out of memory\n");
1.7 daniel 229: return(NULL);
230: }
231: }
232: ret[len++] = *p++;
233: }
234: if (len >= max) {
235: max *= 2;
1.14 veillard 236: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.7 daniel 237: if (ret == NULL) {
1.27 veillard 238: xmlGenericError(xmlGenericErrorContext,
239: "xmlSaveUri: out of memory\n");
1.7 daniel 240: return(NULL);
241: }
242: }
243: ret[len++] = ':';
244: }
245: if (uri->opaque != NULL) {
246: p = uri->opaque;
247: while (*p != 0) {
248: if (len + 3 >= max) {
249: max *= 2;
1.14 veillard 250: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.7 daniel 251: if (ret == NULL) {
1.27 veillard 252: xmlGenericError(xmlGenericErrorContext,
253: "xmlSaveUri: out of memory\n");
1.7 daniel 254: return(NULL);
255: }
256: }
257: if ((IS_UNRESERVED(*(p))) ||
258: ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||
259: ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||
260: ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
261: ret[len++] = *p++;
262: else {
1.21 veillard 263: int val = *(unsigned char *)p++;
264: int hi = val / 0x10, lo = val % 0x10;
1.7 daniel 265: ret[len++] = '%';
1.21 veillard 266: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
267: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1.7 daniel 268: }
269: }
270: if (len >= max) {
271: max *= 2;
1.14 veillard 272: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.7 daniel 273: if (ret == NULL) {
1.27 veillard 274: xmlGenericError(xmlGenericErrorContext,
275: "xmlSaveUri: out of memory\n");
1.7 daniel 276: return(NULL);
277: }
278: }
279: ret[len++] = 0;
280: } else {
1.9 daniel 281: if (uri->server != NULL) {
282: if (len + 3 >= max) {
283: max *= 2;
1.14 veillard 284: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.9 daniel 285: if (ret == NULL) {
1.27 veillard 286: xmlGenericError(xmlGenericErrorContext,
287: "xmlSaveUri: out of memory\n");
1.9 daniel 288: return(NULL);
289: }
290: }
291: ret[len++] = '/';
292: ret[len++] = '/';
293: if (uri->user != NULL) {
294: p = uri->user;
295: while (*p != 0) {
296: if (len + 3 >= max) {
297: max *= 2;
1.27 veillard 298: ret = (xmlChar *) xmlRealloc(ret,
299: (max + 1) * sizeof(xmlChar));
1.9 daniel 300: if (ret == NULL) {
1.27 veillard 301: xmlGenericError(xmlGenericErrorContext,
302: "xmlSaveUri: out of memory\n");
1.9 daniel 303: return(NULL);
304: }
305: }
306: if ((IS_UNRESERVED(*(p))) ||
1.27 veillard 307: ((*(p) == ';')) || ((*(p) == ':')) ||
308: ((*(p) == '&')) || ((*(p) == '=')) ||
309: ((*(p) == '+')) || ((*(p) == '$')) ||
1.9 daniel 310: ((*(p) == ',')))
311: ret[len++] = *p++;
312: else {
1.21 veillard 313: int val = *(unsigned char *)p++;
314: int hi = val / 0x10, lo = val % 0x10;
1.9 daniel 315: ret[len++] = '%';
1.21 veillard 316: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
317: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1.9 daniel 318: }
319: }
320: if (len + 3 >= max) {
321: max *= 2;
1.27 veillard 322: ret = (xmlChar *) xmlRealloc(ret,
323: (max + 1) * sizeof(xmlChar));
1.9 daniel 324: if (ret == NULL) {
1.27 veillard 325: xmlGenericError(xmlGenericErrorContext,
326: "xmlSaveUri: out of memory\n");
1.9 daniel 327: return(NULL);
328: }
329: }
330: ret[len++] = '@';
331: }
332: p = uri->server;
333: while (*p != 0) {
334: if (len >= max) {
335: max *= 2;
1.27 veillard 336: ret = (xmlChar *) xmlRealloc(ret,
337: (max + 1) * sizeof(xmlChar));
1.9 daniel 338: if (ret == NULL) {
1.27 veillard 339: xmlGenericError(xmlGenericErrorContext,
340: "xmlSaveUri: out of memory\n");
1.9 daniel 341: return(NULL);
342: }
343: }
344: ret[len++] = *p++;
345: }
346: if (uri->port > 0) {
347: if (len + 10 >= max) {
348: max *= 2;
1.27 veillard 349: ret = (xmlChar *) xmlRealloc(ret,
350: (max + 1) * sizeof(xmlChar));
1.9 daniel 351: if (ret == NULL) {
1.27 veillard 352: xmlGenericError(xmlGenericErrorContext,
353: "xmlSaveUri: out of memory\n");
1.9 daniel 354: return(NULL);
355: }
356: }
357: len += sprintf((char *) &ret[len], ":%d", uri->port);
358: }
359: } else if (uri->authority != NULL) {
1.7 daniel 360: if (len + 3 >= max) {
361: max *= 2;
1.27 veillard 362: ret = (xmlChar *) xmlRealloc(ret,
363: (max + 1) * sizeof(xmlChar));
1.7 daniel 364: if (ret == NULL) {
1.27 veillard 365: xmlGenericError(xmlGenericErrorContext,
366: "xmlSaveUri: out of memory\n");
1.7 daniel 367: return(NULL);
368: }
369: }
370: ret[len++] = '/';
371: ret[len++] = '/';
372: p = uri->authority;
373: while (*p != 0) {
374: if (len + 3 >= max) {
375: max *= 2;
1.27 veillard 376: ret = (xmlChar *) xmlRealloc(ret,
377: (max + 1) * sizeof(xmlChar));
1.7 daniel 378: if (ret == NULL) {
1.27 veillard 379: xmlGenericError(xmlGenericErrorContext,
380: "xmlSaveUri: out of memory\n");
1.7 daniel 381: return(NULL);
382: }
383: }
384: if ((IS_UNRESERVED(*(p))) ||
385: ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
386: ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
387: ((*(p) == '=')) || ((*(p) == '+')))
388: ret[len++] = *p++;
389: else {
1.21 veillard 390: int val = *(unsigned char *)p++;
391: int hi = val / 0x10, lo = val % 0x10;
1.7 daniel 392: ret[len++] = '%';
1.21 veillard 393: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
394: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1.7 daniel 395: }
396: }
1.26 veillard 397: } else if (uri->scheme != NULL) {
1.25 veillard 398: if (len + 3 >= max) {
399: max *= 2;
1.27 veillard 400: ret = (xmlChar *) xmlRealloc(ret,
401: (max + 1) * sizeof(xmlChar));
1.25 veillard 402: if (ret == NULL) {
1.27 veillard 403: xmlGenericError(xmlGenericErrorContext,
404: "xmlSaveUri: out of memory\n");
1.25 veillard 405: return(NULL);
406: }
407: }
408: ret[len++] = '/';
409: ret[len++] = '/';
1.7 daniel 410: }
411: if (uri->path != NULL) {
412: p = uri->path;
413: while (*p != 0) {
414: if (len + 3 >= max) {
415: max *= 2;
1.27 veillard 416: ret = (xmlChar *) xmlRealloc(ret,
417: (max + 1) * sizeof(xmlChar));
1.7 daniel 418: if (ret == NULL) {
1.27 veillard 419: xmlGenericError(xmlGenericErrorContext,
420: "xmlSaveUri: out of memory\n");
1.7 daniel 421: return(NULL);
422: }
423: }
424: if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
425: ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
426: ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
427: ((*(p) == ',')))
428: ret[len++] = *p++;
429: else {
1.21 veillard 430: int val = *(unsigned char *)p++;
431: int hi = val / 0x10, lo = val % 0x10;
1.7 daniel 432: ret[len++] = '%';
1.21 veillard 433: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
434: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1.7 daniel 435: }
436: }
437: }
438: if (uri->query != NULL) {
439: if (len + 3 >= max) {
440: max *= 2;
1.27 veillard 441: ret = (xmlChar *) xmlRealloc(ret,
442: (max + 1) * sizeof(xmlChar));
1.7 daniel 443: if (ret == NULL) {
1.27 veillard 444: xmlGenericError(xmlGenericErrorContext,
445: "xmlSaveUri: out of memory\n");
1.7 daniel 446: return(NULL);
447: }
448: }
449: ret[len++] = '?';
450: p = uri->query;
451: while (*p != 0) {
452: if (len + 3 >= max) {
453: max *= 2;
1.27 veillard 454: ret = (xmlChar *) xmlRealloc(ret,
455: (max + 1) * sizeof(xmlChar));
1.7 daniel 456: if (ret == NULL) {
1.27 veillard 457: xmlGenericError(xmlGenericErrorContext,
458: "xmlSaveUri: out of memory\n");
1.7 daniel 459: return(NULL);
460: }
461: }
462: if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
463: ret[len++] = *p++;
464: else {
1.21 veillard 465: int val = *(unsigned char *)p++;
466: int hi = val / 0x10, lo = val % 0x10;
1.7 daniel 467: ret[len++] = '%';
1.21 veillard 468: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
469: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1.7 daniel 470: }
471: }
472: }
473: if (uri->fragment != NULL) {
474: if (len + 3 >= max) {
475: max *= 2;
1.27 veillard 476: ret = (xmlChar *) xmlRealloc(ret,
477: (max + 1) * sizeof(xmlChar));
1.7 daniel 478: if (ret == NULL) {
1.27 veillard 479: xmlGenericError(xmlGenericErrorContext,
480: "xmlSaveUri: out of memory\n");
1.7 daniel 481: return(NULL);
482: }
483: }
484: ret[len++] = '#';
485: p = uri->fragment;
486: while (*p != 0) {
487: if (len + 3 >= max) {
488: max *= 2;
1.27 veillard 489: ret = (xmlChar *) xmlRealloc(ret,
490: (max + 1) * sizeof(xmlChar));
1.7 daniel 491: if (ret == NULL) {
1.27 veillard 492: xmlGenericError(xmlGenericErrorContext,
493: "xmlSaveUri: out of memory\n");
1.7 daniel 494: return(NULL);
495: }
496: }
497: if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
498: ret[len++] = *p++;
499: else {
1.21 veillard 500: int val = *(unsigned char *)p++;
501: int hi = val / 0x10, lo = val % 0x10;
1.7 daniel 502: ret[len++] = '%';
1.21 veillard 503: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
504: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1.7 daniel 505: }
506: }
507: }
508: if (len >= max) {
509: max *= 2;
1.14 veillard 510: ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1.7 daniel 511: if (ret == NULL) {
1.27 veillard 512: xmlGenericError(xmlGenericErrorContext,
513: "xmlSaveUri: out of memory\n");
1.7 daniel 514: return(NULL);
515: }
516: }
517: ret[len++] = 0;
518: }
519: return(ret);
520: }
521:
522: /**
1.5 daniel 523: * xmlPrintURI:
524: * @stream: a FILE* for the output
525: * @uri: pointer to an xmlURI
526: *
527: * Prints the URI in the stream @steam.
528: */
529: void
530: xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1.7 daniel 531: xmlChar *out;
1.5 daniel 532:
1.7 daniel 533: out = xmlSaveUri(uri);
534: if (out != NULL) {
535: fprintf(stream, "%s", out);
536: xmlFree(out);
1.5 daniel 537: }
538: }
539:
540: /**
1.4 daniel 541: * xmlCleanURI:
542: * @uri: pointer to an xmlURI
543: *
544: * Make sure the xmlURI struct is free of content
545: */
546: void
547: xmlCleanURI(xmlURIPtr uri) {
548: if (uri == NULL) return;
549:
550: if (uri->scheme != NULL) xmlFree(uri->scheme);
551: uri->scheme = NULL;
552: if (uri->server != NULL) xmlFree(uri->server);
553: uri->server = NULL;
1.9 daniel 554: if (uri->user != NULL) xmlFree(uri->user);
555: uri->user = NULL;
1.4 daniel 556: if (uri->path != NULL) xmlFree(uri->path);
557: uri->path = NULL;
558: if (uri->fragment != NULL) xmlFree(uri->fragment);
559: uri->fragment = NULL;
560: if (uri->opaque != NULL) xmlFree(uri->opaque);
561: uri->opaque = NULL;
1.5 daniel 562: if (uri->authority != NULL) xmlFree(uri->authority);
563: uri->authority = NULL;
564: if (uri->query != NULL) xmlFree(uri->query);
565: uri->query = NULL;
1.4 daniel 566: }
567:
568: /**
569: * xmlFreeURI:
570: * @uri: pointer to an xmlURI
571: *
572: * Free up the xmlURI struct
573: */
574: void
575: xmlFreeURI(xmlURIPtr uri) {
576: if (uri == NULL) return;
577:
578: if (uri->scheme != NULL) xmlFree(uri->scheme);
579: if (uri->server != NULL) xmlFree(uri->server);
1.9 daniel 580: if (uri->user != NULL) xmlFree(uri->user);
1.4 daniel 581: if (uri->path != NULL) xmlFree(uri->path);
582: if (uri->fragment != NULL) xmlFree(uri->fragment);
583: if (uri->opaque != NULL) xmlFree(uri->opaque);
1.5 daniel 584: if (uri->authority != NULL) xmlFree(uri->authority);
585: if (uri->query != NULL) xmlFree(uri->query);
1.4 daniel 586: memset(uri, -1, sizeof(xmlURI));
587: xmlFree(uri);
588: }
589:
590: /**
1.9 daniel 591: * xmlURIUnescapeString:
1.4 daniel 592: * @str: the string to unescape
593: * @len: the lenght in bytes to unescape (or <= 0 to indicate full string)
594: * @target: optionnal destination buffer
595: *
596: * Unescaping routine, does not do validity checks !
1.7 daniel 597: * Output is direct unsigned char translation of %XX values (no encoding)
1.4 daniel 598: *
599: * Returns an copy of the string, but unescaped
600: */
601: char *
1.9 daniel 602: xmlURIUnescapeString(const char *str, int len, char *target) {
1.4 daniel 603: char *ret, *out;
604: const char *in;
605:
606: if (str == NULL)
607: return(NULL);
608: if (len <= 0) len = strlen(str);
1.5 daniel 609: if (len <= 0) return(NULL);
1.4 daniel 610:
611: if (target == NULL) {
612: ret = (char *) xmlMalloc(len + 1);
613: if (ret == NULL) {
1.27 veillard 614: xmlGenericError(xmlGenericErrorContext,
615: "xmlURIUnescapeString: out of memory\n");
1.4 daniel 616: return(NULL);
617: }
618: } else
619: ret = target;
620: in = str;
621: out = ret;
1.5 daniel 622: while(len > 0) {
1.4 daniel 623: if (*in == '%') {
624: in++;
625: if ((*in >= '0') && (*in <= '9'))
626: *out = (*in - '0');
627: else if ((*in >= 'a') && (*in <= 'f'))
628: *out = (*in - 'a') + 10;
629: else if ((*in >= 'A') && (*in <= 'F'))
630: *out = (*in - 'A') + 10;
631: in++;
632: if ((*in >= '0') && (*in <= '9'))
633: *out = *out * 16 + (*in - '0');
634: else if ((*in >= 'a') && (*in <= 'f'))
635: *out = *out * 16 + (*in - 'a') + 10;
636: else if ((*in >= 'A') && (*in <= 'F'))
637: *out = *out * 16 + (*in - 'A') + 10;
638: in++;
639: len -= 3;
1.7 daniel 640: out++;
1.4 daniel 641: } else {
642: *out++ = *in++;
643: len--;
644: }
645: }
646: *out = 0;
647: return(ret);
648: }
1.1 daniel 649:
650:
1.4 daniel 651: /**
652: * xmlParseURIFragment:
653: * @uri: pointer to an URI structure
654: * @str: pointer to the string to analyze
655: *
656: * Parse an URI fragment string and fills in the appropriate fields
657: * of the @uri structure.
658: *
659: * fragment = *uric
660: *
661: * Returns 0 or the error code
662: */
663: int
664: xmlParseURIFragment(xmlURIPtr uri, const char **str) {
665: const char *cur = *str;
666:
667: if (str == NULL) return(-1);
668:
669: while (IS_URIC(cur)) NEXT(cur);
670: if (uri != NULL) {
671: if (uri->fragment != NULL) xmlFree(uri->fragment);
1.9 daniel 672: uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
1.4 daniel 673: }
674: *str = cur;
675: return(0);
676: }
677:
678: /**
1.5 daniel 679: * xmlParseURIQuery:
680: * @uri: pointer to an URI structure
681: * @str: pointer to the string to analyze
682: *
683: * Parse the query part of an URI
684: *
685: * query = *uric
686: *
687: * Returns 0 or the error code
688: */
689: int
690: xmlParseURIQuery(xmlURIPtr uri, const char **str) {
691: const char *cur = *str;
692:
693: if (str == NULL) return(-1);
694:
695: while (IS_URIC(cur)) NEXT(cur);
696: if (uri != NULL) {
697: if (uri->query != NULL) xmlFree(uri->query);
1.9 daniel 698: uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
1.5 daniel 699: }
700: *str = cur;
701: return(0);
702: }
703:
704: /**
1.4 daniel 705: * xmlParseURIScheme:
706: * @uri: pointer to an URI structure
707: * @str: pointer to the string to analyze
708: *
709: * Parse an URI scheme
710: *
711: * scheme = alpha *( alpha | digit | "+" | "-" | "." )
712: *
713: * Returns 0 or the error code
714: */
715: int
716: xmlParseURIScheme(xmlURIPtr uri, const char **str) {
717: const char *cur;
718:
719: if (str == NULL)
720: return(-1);
721:
722: cur = *str;
723: if (!IS_ALPHA(*cur))
724: return(2);
725: cur++;
726: while (IS_SCHEME(*cur)) cur++;
727: if (uri != NULL) {
728: if (uri->scheme != NULL) xmlFree(uri->scheme);
1.27 veillard 729: /* !!! strndup */
730: uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL);
1.4 daniel 731: }
732: *str = cur;
733: return(0);
734: }
735:
736: /**
737: * xmlParseURIOpaquePart:
738: * @uri: pointer to an URI structure
739: * @str: pointer to the string to analyze
740: *
741: * Parse an URI opaque part
742: *
743: * opaque_part = uric_no_slash *uric
744: *
745: * Returns 0 or the error code
746: */
747: int
748: xmlParseURIOpaquePart(xmlURIPtr uri, const char **str) {
749: const char *cur;
750:
751: if (str == NULL)
752: return(-1);
753:
754: cur = *str;
755: if (!IS_URIC_NO_SLASH(cur)) {
756: return(3);
757: }
758: NEXT(cur);
759: while (IS_URIC(cur)) NEXT(cur);
760: if (uri != NULL) {
1.5 daniel 761: if (uri->opaque != NULL) xmlFree(uri->opaque);
1.9 daniel 762: uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
1.4 daniel 763: }
764: *str = cur;
765: return(0);
766: }
767:
768: /**
1.9 daniel 769: * xmlParseURIServer:
770: * @uri: pointer to an URI structure
771: * @str: pointer to the string to analyze
772: *
773: * Parse a server subpart of an URI, it's a finer grain analysis
774: * of the authority part.
775: *
776: * server = [ [ userinfo "@" ] hostport ]
777: * userinfo = *( unreserved | escaped |
778: * ";" | ":" | "&" | "=" | "+" | "$" | "," )
779: * hostport = host [ ":" port ]
780: * host = hostname | IPv4address
781: * hostname = *( domainlabel "." ) toplabel [ "." ]
782: * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
783: * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
784: * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
785: * port = *digit
786: *
787: * Returns 0 or the error code
788: */
789: int
790: xmlParseURIServer(xmlURIPtr uri, const char **str) {
791: const char *cur;
792: const char *host, *tmp;
793:
794: if (str == NULL)
795: return(-1);
796:
797: cur = *str;
798:
799: /*
800: * is there an userinfo ?
801: */
802: while (IS_USERINFO(cur)) NEXT(cur);
803: if (*cur == '@') {
804: if (uri != NULL) {
805: if (uri->user != NULL) xmlFree(uri->user);
806: uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
807: }
808: cur++;
809: } else {
810: if (uri != NULL) {
811: if (uri->user != NULL) xmlFree(uri->user);
812: uri->user = NULL;
813: }
814: cur = *str;
815: }
816: /*
1.25 veillard 817: * This can be empty in the case where there is no server
818: */
819: host = cur;
820: if (*cur == '/') {
821: if (uri != NULL) {
822: if (uri->authority != NULL) xmlFree(uri->authority);
823: uri->authority = NULL;
824: if (uri->server != NULL) xmlFree(uri->server);
825: uri->server = NULL;
826: uri->port = 0;
827: }
828: return(0);
829: }
830: /*
1.9 daniel 831: * host part of hostport can derive either an IPV4 address
832: * or an unresolved name. Check the IP first, it easier to detect
833: * errors if wrong one
834: */
835: if (IS_DIGIT(*cur)) {
836: while(IS_DIGIT(*cur)) cur++;
837: if (*cur != '.')
838: goto host_name;
839: cur++;
840: if (!IS_DIGIT(*cur))
841: goto host_name;
842: while(IS_DIGIT(*cur)) cur++;
843: if (*cur != '.')
844: goto host_name;
845: cur++;
846: if (!IS_DIGIT(*cur))
847: goto host_name;
848: while(IS_DIGIT(*cur)) cur++;
849: if (*cur != '.')
850: goto host_name;
851: cur++;
852: if (!IS_DIGIT(*cur))
853: goto host_name;
854: while(IS_DIGIT(*cur)) cur++;
855: if (uri != NULL) {
856: if (uri->authority != NULL) xmlFree(uri->authority);
857: uri->authority = NULL;
858: if (uri->server != NULL) xmlFree(uri->server);
859: uri->server = xmlURIUnescapeString(host, cur - host, NULL);
860: }
861: goto host_done;
862: }
863: host_name:
864: /*
865: * the hostname production as-is is a parser nightmare.
866: * simplify it to
867: * hostname = *( domainlabel "." ) domainlabel [ "." ]
868: * and just make sure the last label starts with a non numeric char.
869: */
870: if (!IS_ALPHANUM(*cur))
871: return(6);
872: while (IS_ALPHANUM(*cur)) {
873: while ((IS_ALPHANUM(*cur)) || (*cur == '-')) cur++;
874: if (*cur == '.')
875: cur++;
876: }
877: tmp = cur;
878: tmp--;
879: while (IS_ALPHANUM(*tmp) && (*tmp != '.') && (tmp >= host)) tmp--;
880: tmp++;
881: if (!IS_ALPHA(*tmp))
882: return(7);
883: if (uri != NULL) {
884: if (uri->authority != NULL) xmlFree(uri->authority);
885: uri->authority = NULL;
886: if (uri->server != NULL) xmlFree(uri->server);
887: uri->server = xmlURIUnescapeString(host, cur - host, NULL);
888: }
889:
890: host_done:
891:
892: /*
893: * finish by checking for a port presence.
894: */
895: if (*cur == ':') {
896: cur++;
897: if (IS_DIGIT(*cur)) {
898: if (uri != NULL)
899: uri->port = 0;
900: while (IS_DIGIT(*cur)) {
901: if (uri != NULL)
902: uri->port = uri->port * 10 + (*cur - '0');
903: cur++;
904: }
905: }
906: }
907: *str = cur;
908: return(0);
909: }
910:
911: /**
1.6 daniel 912: * xmlParseURIRelSegment:
913: * @uri: pointer to an URI structure
914: * @str: pointer to the string to analyze
915: *
916: * Parse an URI relative segment
917: *
918: * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
919: * "+" | "$" | "," )
920: *
921: * Returns 0 or the error code
922: */
923: int
924: xmlParseURIRelSegment(xmlURIPtr uri, const char **str) {
925: const char *cur;
926:
927: if (str == NULL)
928: return(-1);
929:
930: cur = *str;
931: if (!IS_SEGMENT(cur)) {
932: return(3);
933: }
934: NEXT(cur);
935: while (IS_SEGMENT(cur)) NEXT(cur);
936: if (uri != NULL) {
937: if (uri->path != NULL) xmlFree(uri->path);
1.9 daniel 938: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
1.6 daniel 939: }
940: *str = cur;
941: return(0);
942: }
943:
944: /**
1.4 daniel 945: * xmlParseURIPathSegments:
946: * @uri: pointer to an URI structure
947: * @str: pointer to the string to analyze
948: * @slash: should we add a leading slash
949: *
950: * Parse an URI set of path segments
951: *
952: * path_segments = segment *( "/" segment )
953: * segment = *pchar *( ";" param )
954: * param = *pchar
955: *
956: * Returns 0 or the error code
957: */
958: int
959: xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash) {
960: const char *cur;
961:
962: if (str == NULL)
963: return(-1);
964:
965: cur = *str;
966:
967: do {
968: while (IS_PCHAR(cur)) NEXT(cur);
969: if (*cur == ';') {
970: cur++;
971: while (IS_PCHAR(cur)) NEXT(cur);
972: }
973: if (*cur != '/') break;
974: cur++;
975: } while (1);
976: if (uri != NULL) {
1.5 daniel 977: int len, len2 = 0;
978: char *path;
1.4 daniel 979:
980: /*
981: * Concat the set of path segments to the current path
982: */
1.5 daniel 983: len = cur - *str;
984: if (slash)
985: len++;
986:
1.4 daniel 987: if (uri->path != NULL) {
1.5 daniel 988: len2 = strlen(uri->path);
989: len += len2;
990: }
991: path = (char *) xmlMalloc(len + 1);
992: if (path == NULL) {
1.27 veillard 993: xmlGenericError(xmlGenericErrorContext,
994: "xmlParseURIPathSegments: out of memory\n");
1.5 daniel 995: *str = cur;
996: return(-1);
1.4 daniel 997: }
1.5 daniel 998: if (uri->path != NULL)
999: memcpy(path, uri->path, len2);
1.6 daniel 1000: if (slash) {
1001: path[len2] = '/';
1002: len2++;
1003: }
1.16 veillard 1004: path[len2] = 0;
1005: if (cur - *str > 0)
1006: xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1.5 daniel 1007: if (uri->path != NULL)
1008: xmlFree(uri->path);
1009: uri->path = path;
1010: }
1011: *str = cur;
1012: return(0);
1013: }
1014:
1015: /**
1016: * xmlParseURIAuthority:
1017: * @uri: pointer to an URI structure
1018: * @str: pointer to the string to analyze
1019: *
1020: * Parse the authority part of an URI.
1021: *
1022: * authority = server | reg_name
1023: * server = [ [ userinfo "@" ] hostport ]
1024: * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1025: * "@" | "&" | "=" | "+" )
1026: *
1027: * Note : this is completely ambiguous since reg_name is allowed to
1028: * use the full set of chars in use by server:
1029: *
1030: * 3.2.1. Registry-based Naming Authority
1031: *
1032: * The structure of a registry-based naming authority is specific
1033: * to the URI scheme, but constrained to the allowed characters
1034: * for an authority component.
1035: *
1036: * Returns 0 or the error code
1037: */
1038: int
1039: xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1040: const char *cur;
1.9 daniel 1041: int ret;
1.5 daniel 1042:
1043: if (str == NULL)
1044: return(-1);
1045:
1046: cur = *str;
1.9 daniel 1047:
1048: /*
1049: * try first to parse it as a server string.
1050: */
1051: ret = xmlParseURIServer(uri, str);
1052: if (ret == 0)
1053: return(0);
1054:
1055: /*
1056: * failed, fallback to reg_name
1057: */
1.5 daniel 1058: if (!IS_REG_NAME(cur)) {
1059: return(5);
1060: }
1061: NEXT(cur);
1062: while (IS_REG_NAME(cur)) NEXT(cur);
1063: if (uri != NULL) {
1.9 daniel 1064: if (uri->server != NULL) xmlFree(uri->server);
1065: uri->server = NULL;
1066: if (uri->user != NULL) xmlFree(uri->user);
1067: uri->user = NULL;
1.5 daniel 1068: if (uri->authority != NULL) xmlFree(uri->authority);
1.9 daniel 1069: uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
1.4 daniel 1070: }
1071: *str = cur;
1072: return(0);
1073: }
1074:
1075: /**
1076: * xmlParseURIHierPart:
1077: * @uri: pointer to an URI structure
1078: * @str: pointer to the string to analyze
1079: *
1080: * Parse an URI hirarchical part
1081: *
1082: * hier_part = ( net_path | abs_path ) [ "?" query ]
1083: * abs_path = "/" path_segments
1084: * net_path = "//" authority [ abs_path ]
1085: *
1086: * Returns 0 or the error code
1087: */
1088: int
1089: xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1090: int ret;
1091: const char *cur;
1092:
1093: if (str == NULL)
1094: return(-1);
1095:
1096: cur = *str;
1097:
1098: if ((cur[0] == '/') && (cur[1] == '/')) {
1099: cur += 2;
1100: ret = xmlParseURIAuthority(uri, &cur);
1.5 daniel 1101: if (ret != 0)
1.4 daniel 1102: return(ret);
1103: if (cur[0] == '/') {
1104: cur++;
1105: ret = xmlParseURIPathSegments(uri, &cur, 1);
1106: }
1107: } else if (cur[0] == '/') {
1108: cur++;
1109: ret = xmlParseURIPathSegments(uri, &cur, 1);
1110: } else {
1111: return(4);
1112: }
1113: if (ret != 0)
1114: return(ret);
1115: if (*cur == '?') {
1116: cur++;
1117: ret = xmlParseURIQuery(uri, &cur);
1118: if (ret != 0)
1119: return(ret);
1120: }
1121: *str = cur;
1122: return(0);
1123: }
1124:
1125: /**
1126: * xmlParseAbsoluteURI:
1127: * @uri: pointer to an URI structure
1128: * @str: pointer to the string to analyze
1129: *
1130: * Parse an URI reference string and fills in the appropriate fields
1131: * of the @uri structure
1132: *
1133: * absoluteURI = scheme ":" ( hier_part | opaque_part )
1134: *
1135: * Returns 0 or the error code
1136: */
1137: int
1138: xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1139: int ret;
1140:
1141: if (str == NULL)
1142: return(-1);
1143:
1144: ret = xmlParseURIScheme(uri, str);
1145: if (ret != 0) return(ret);
1146: if (**str != ':')
1147: return(1);
1148: (*str)++;
1149: if (**str == '/')
1150: return(xmlParseURIHierPart(uri, str));
1151: return(xmlParseURIOpaquePart(uri, str));
1152: }
1153:
1154: /**
1.5 daniel 1155: * xmlParseRelativeURI:
1156: * @uri: pointer to an URI structure
1157: * @str: pointer to the string to analyze
1158: *
1159: * Parse an relative URI string and fills in the appropriate fields
1160: * of the @uri structure
1161: *
1162: * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1.6 daniel 1163: * abs_path = "/" path_segments
1164: * net_path = "//" authority [ abs_path ]
1165: * rel_path = rel_segment [ abs_path ]
1.5 daniel 1166: *
1167: * Returns 0 or the error code
1168: */
1169: int
1170: xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1171: int ret = 0;
1.6 daniel 1172: const char *cur;
1.5 daniel 1173:
1174: if (str == NULL)
1175: return(-1);
1176:
1.6 daniel 1177: cur = *str;
1178: if ((cur[0] == '/') && (cur[1] == '/')) {
1179: cur += 2;
1180: ret = xmlParseURIAuthority(uri, &cur);
1181: if (ret != 0)
1182: return(ret);
1183: if (cur[0] == '/') {
1184: cur++;
1185: ret = xmlParseURIPathSegments(uri, &cur, 1);
1186: }
1187: } else if (cur[0] == '/') {
1188: cur++;
1189: ret = xmlParseURIPathSegments(uri, &cur, 1);
1.17 veillard 1190: } else if (cur[0] != '#' && cur[0] != '?') {
1.6 daniel 1191: ret = xmlParseURIRelSegment(uri, &cur);
1192: if (ret != 0)
1193: return(ret);
1194: if (cur[0] == '/') {
1195: cur++;
1196: ret = xmlParseURIPathSegments(uri, &cur, 1);
1197: }
1198: }
1199: if (ret != 0)
1200: return(ret);
1201: if (*cur == '?') {
1202: cur++;
1203: ret = xmlParseURIQuery(uri, &cur);
1204: if (ret != 0)
1205: return(ret);
1206: }
1207: *str = cur;
1.5 daniel 1208: return(ret);
1209: }
1210:
1211: /**
1.4 daniel 1212: * xmlParseURIReference:
1213: * @uri: pointer to an URI structure
1214: * @str: the string to analyze
1215: *
1216: * Parse an URI reference string and fills in the appropriate fields
1217: * of the @uri structure
1218: *
1219: * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1220: *
1221: * Returns 0 or the error code
1222: */
1223: int
1224: xmlParseURIReference(xmlURIPtr uri, const char *str) {
1225: int ret;
1226: const char *tmp = str;
1227:
1228: if (str == NULL)
1229: return(-1);
1230: xmlCleanURI(uri);
1231:
1232: /*
1233: * Try first to parse aboslute refs, then fallback to relative if
1234: * it fails.
1.1 daniel 1235: */
1.4 daniel 1236: ret = xmlParseAbsoluteURI(uri, &str);
1237: if (ret != 0) {
1238: xmlCleanURI(uri);
1239: str = tmp;
1.5 daniel 1240: ret = xmlParseRelativeURI(uri, &str);
1.4 daniel 1241: }
1242: if (ret != 0) {
1243: xmlCleanURI(uri);
1244: return(ret);
1245: }
1246:
1247: if (*str == '#') {
1248: str++;
1249: ret = xmlParseURIFragment(uri, &str);
1250: if (ret != 0) return(ret);
1251: }
1252: if (*str != 0) {
1253: xmlCleanURI(uri);
1254: return(1);
1255: }
1256: return(0);
1257: }
1.2 daniel 1258:
1259: /**
1.12 daniel 1260: * xmlParseURI:
1261: * @str: the URI string to analyze
1262: *
1263: * Parse an URI
1264: *
1265: * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1266: *
1267: * Returns a newly build xmlURIPtr or NULL in case of error
1268: */
1269: xmlURIPtr
1270: xmlParseURI(const char *str) {
1271: xmlURIPtr uri;
1272: int ret;
1273:
1274: if (str == NULL)
1275: return(NULL);
1276: uri = xmlCreateURI();
1277: if (uri != NULL) {
1278: ret = xmlParseURIReference(uri, str);
1279: if (ret) {
1280: xmlFreeURI(uri);
1281: return(NULL);
1282: }
1283: }
1284: return(uri);
1285: }
1286:
1287: /**
1.8 daniel 1288: * xmlNormalizeURIPath:
1289: * @path: pointer to the path string
1290: *
1291: * applies the 5 normalization steps to a path string
1292: * Normalization occurs directly on the string, no new allocation is done
1293: *
1294: * Returns 0 or an error code
1295: */
1296: int
1297: xmlNormalizeURIPath(char *path) {
1298: int cur, out;
1299:
1300: if (path == NULL)
1301: return(-1);
1302: cur = 0;
1303: out = 0;
1304: while ((path[cur] != 0) && (path[cur] != '/')) cur++;
1305: if (path[cur] == 0)
1306: return(0);
1307:
1308: /* we are positionned at the beginning of the first segment */
1309: cur++;
1310: out = cur;
1311:
1312: /*
1313: * Analyze each segment in sequence.
1314: */
1315: while (path[cur] != 0) {
1316: /*
1317: * c) All occurrences of "./", where "." is a complete path segment,
1318: * are removed from the buffer string.
1319: */
1320: if ((path[cur] == '.') && (path[cur + 1] == '/')) {
1321: cur += 2;
1322: continue;
1323: }
1324:
1325: /*
1326: * d) If the buffer string ends with "." as a complete path segment,
1327: * that "." is removed.
1328: */
1329: if ((path[cur] == '.') && (path[cur + 1] == 0)) {
1330: path[out] = 0;
1331: break;
1332: }
1333:
1334: /* read the segment */
1335: while ((path[cur] != 0) && (path[cur] != '/')) {
1336: path[out++] = path[cur++];
1337: }
1338: path[out++] = path[cur];
1339: if (path[cur] != 0) {
1340: cur++;
1341: }
1342: }
1343:
1344: cur = 0;
1345: out = 0;
1346: while ((path[cur] != 0) && (path[cur] != '/')) cur++;
1347: if (path[cur] == 0)
1348: return(0);
1349: /* we are positionned at the beginning of the first segment */
1350: cur++;
1351: out = cur;
1352: /*
1353: * Analyze each segment in sequence.
1354: */
1355: while (path[cur] != 0) {
1356: /*
1357: * e) All occurrences of "<segment>/../", where <segment> is a
1358: * complete path segment not equal to "..", are removed from the
1359: * buffer string. Removal of these path segments is performed
1360: * iteratively, removing the leftmost matching pattern on each
1361: * iteration, until no matching pattern remains.
1362: */
1363: if ((cur > 1) && (out > 1) &&
1364: (path[cur] == '/') && (path[cur + 1] == '.') &&
1365: (path[cur + 2] == '.') && (path[cur + 3] == '/') &&
1366: ((path[out] != '.') || (path[out - 1] != '.') ||
1367: (path[out - 2] != '/'))) {
1368: cur += 3;
1369: out --;
1370: while ((out > 0) && (path[out] != '/')) { out --; }
1371: path[out] = 0;
1372: continue;
1373: }
1374:
1375: /*
1376: * f) If the buffer string ends with "<segment>/..", where <segment>
1377: * is a complete path segment not equal to "..", that
1378: * "<segment>/.." is removed.
1379: */
1380: if ((path[cur] == '/') && (path[cur + 1] == '.') &&
1381: (path[cur + 2] == '.') && (path[cur + 3] == 0) &&
1382: ((path[out] != '.') || (path[out - 1] != '.') ||
1383: (path[out - 2] != '/'))) {
1384: cur += 4;
1385: out --;
1386: while ((out > 0) && (path[out - 1] != '/')) { out --; }
1387: path[out] = 0;
1388: continue;
1389: }
1390:
1391: path[out++] = path[cur++]; /* / or 0 */
1392: }
1393: path[out] = 0;
1394:
1395: /*
1396: * g) If the resulting buffer string still begins with one or more
1397: * complete path segments of "..", then the reference is
1398: * considered to be in error. Implementations may handle this
1399: * error by retaining these components in the resolved path (i.e.,
1400: * treating them as part of the final URI), by removing them from
1401: * the resolved path (i.e., discarding relative levels above the
1402: * root), or by avoiding traversal of the reference.
1403: *
1404: * We discard them from the final path.
1405: */
1406: cur = 0;
1407: while ((path[cur] == '/') && (path[cur + 1] == '.') &&
1408: (path[cur + 2] == '.'))
1409: cur += 3;
1410: if (cur != 0) {
1411: out = 0;
1412: while (path[cur] != 0) path[out++] = path[cur++];
1413: path[out] = 0;
1414: }
1415: return(0);
1416: }
1417:
1418: /**
1.2 daniel 1419: * xmlBuildURI:
1420: * @URI: the URI instance found in the document
1.4 daniel 1421: * @base: the base value
1.2 daniel 1422: *
1423: * Computes he final URI of the reference done by checking that
1.4 daniel 1424: * the given URI is valid, and building the final URI using the
1.6 daniel 1425: * base URI. This is processed according to section 5.2 of the
1426: * RFC 2396
1427: *
1428: * 5.2. Resolving Relative References to Absolute Form
1.2 daniel 1429: *
1.7 daniel 1430: * Returns a new URI string (to be freed by the caller) or NULL in case
1431: * of error.
1.2 daniel 1432: */
1433: xmlChar *
1.4 daniel 1434: xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1.7 daniel 1435: xmlChar *val = NULL;
1.28 ! veillard 1436: int ret, len, index, cur, out;
1.7 daniel 1437: xmlURIPtr ref = NULL;
1438: xmlURIPtr bas = NULL;
1439: xmlURIPtr res = NULL;
1440:
1441: /*
1442: * 1) The URI reference is parsed into the potential four components and
1443: * fragment identifier, as described in Section 4.3.
1.18 veillard 1444: *
1445: * NOTE that a completely empty URI is treated by modern browsers
1446: * as a reference to "." rather than as a synonym for the current
1447: * URI. Should we do that here?
1.7 daniel 1448: */
1.24 veillard 1449: if (URI == NULL)
1450: ret = -1;
1451: else {
1.28 ! veillard 1452: if (*URI) {
! 1453: ref = xmlCreateURI();
! 1454: if (ref == NULL)
! 1455: goto done;
1.24 veillard 1456: ret = xmlParseURIReference(ref, (const char *) URI);
1.28 ! veillard 1457: }
1.24 veillard 1458: else
1.28 ! veillard 1459: ret = 0;
1.24 veillard 1460: }
1.28 ! veillard 1461: if (ret != 0)
! 1462: goto done;
1.24 veillard 1463: if (base == NULL)
1.28 ! veillard 1464: ret = -1;
1.24 veillard 1465: else {
1466: bas = xmlCreateURI();
1467: if (bas == NULL)
1.17 veillard 1468: goto done;
1.28 ! veillard 1469: ret = xmlParseURIReference(bas, (const char *) base);
1.17 veillard 1470: }
1.28 ! veillard 1471: if (ret != 0) {
! 1472: if (ref)
! 1473: val = xmlSaveUri(ref);
1.8 daniel 1474: goto done;
1.28 ! veillard 1475: }
! 1476: if (ref == NULL) {
1.24 veillard 1477: /*
1478: * the base fragment must be ignored
1479: */
1480: if (bas->fragment != NULL) {
1481: xmlFree(bas->fragment);
1482: bas->fragment = NULL;
1483: }
1484: val = xmlSaveUri(bas);
1.7 daniel 1485: goto done;
1.24 veillard 1486: }
1.7 daniel 1487:
1488: /*
1489: * 2) If the path component is empty and the scheme, authority, and
1490: * query components are undefined, then it is a reference to the
1.18 veillard 1491: * current document and we are done. Otherwise, the reference URI's
1492: * query and fragment components are defined as found (or not found)
1493: * within the URI reference and not inherited from the base URI.
1.17 veillard 1494: *
1.18 veillard 1495: * NOTE that in modern browsers, the parsing differs from the above
1496: * in the following aspect: the query component is allowed to be
1497: * defined while still treating this as a reference to the current
1498: * document.
1.7 daniel 1499: */
1500: res = xmlCreateURI();
1501: if (res == NULL)
1502: goto done;
1503: if ((ref->scheme == NULL) && (ref->path == NULL) &&
1.18 veillard 1504: ((ref->authority == NULL) && (ref->server == NULL))) {
1505: if (bas->scheme != NULL)
1506: res->scheme = xmlMemStrdup(bas->scheme);
1507: if (bas->authority != NULL)
1508: res->authority = xmlMemStrdup(bas->authority);
1509: else if (bas->server != NULL) {
1510: res->server = xmlMemStrdup(bas->server);
1511: if (bas->user != NULL)
1512: res->user = xmlMemStrdup(bas->user);
1513: res->port = bas->port;
1514: }
1515: if (bas->path != NULL)
1516: res->path = xmlMemStrdup(bas->path);
1517: if (ref->query != NULL)
1518: res->query = xmlMemStrdup(ref->query);
1519: else if (bas->query != NULL)
1520: res->query = xmlMemStrdup(bas->query);
1521: if (ref->fragment != NULL)
1522: res->fragment = xmlMemStrdup(ref->fragment);
1523: goto step_7;
1.7 daniel 1524: }
1.17 veillard 1525:
1.18 veillard 1526: if (ref->query != NULL)
1527: res->query = xmlMemStrdup(ref->query);
1528: if (ref->fragment != NULL)
1529: res->fragment = xmlMemStrdup(ref->fragment);
1.7 daniel 1530:
1531: /*
1532: * 3) If the scheme component is defined, indicating that the reference
1533: * starts with a scheme name, then the reference is interpreted as an
1534: * absolute URI and we are done. Otherwise, the reference URI's
1535: * scheme is inherited from the base URI's scheme component.
1536: */
1537: if (ref->scheme != NULL) {
1538: val = xmlSaveUri(ref);
1539: goto done;
1540: }
1.13 daniel 1541: if (bas->scheme != NULL)
1542: res->scheme = xmlMemStrdup(bas->scheme);
1.7 daniel 1543:
1544: /*
1545: * 4) If the authority component is defined, then the reference is a
1546: * network-path and we skip to step 7. Otherwise, the reference
1547: * URI's authority is inherited from the base URI's authority
1548: * component, which will also be undefined if the URI scheme does not
1549: * use an authority component.
1550: */
1.9 daniel 1551: if ((ref->authority != NULL) || (ref->server != NULL)) {
1552: if (ref->authority != NULL)
1553: res->authority = xmlMemStrdup(ref->authority);
1554: else {
1555: res->server = xmlMemStrdup(ref->server);
1556: if (ref->user != NULL)
1557: res->user = xmlMemStrdup(ref->user);
1558: res->port = ref->port;
1559: }
1.8 daniel 1560: if (ref->path != NULL)
1561: res->path = xmlMemStrdup(ref->path);
1.7 daniel 1562: goto step_7;
1.8 daniel 1563: }
1.7 daniel 1564: if (bas->authority != NULL)
1565: res->authority = xmlMemStrdup(bas->authority);
1.9 daniel 1566: else if (bas->server != NULL) {
1567: res->server = xmlMemStrdup(bas->server);
1568: if (bas->user != NULL)
1569: res->user = xmlMemStrdup(bas->user);
1570: res->port = bas->port;
1571: }
1.7 daniel 1572:
1573: /*
1574: * 5) If the path component begins with a slash character ("/"), then
1575: * the reference is an absolute-path and we skip to step 7.
1.18 veillard 1576: */
1577: if ((ref->path != NULL) && (ref->path[0] == '/')) {
1.8 daniel 1578: res->path = xmlMemStrdup(ref->path);
1.7 daniel 1579: goto step_7;
1.8 daniel 1580: }
1.7 daniel 1581:
1582:
1583: /*
1584: * 6) If this step is reached, then we are resolving a relative-path
1585: * reference. The relative path needs to be merged with the base
1586: * URI's path. Although there are many ways to do this, we will
1587: * describe a simple method using a separate string buffer.
1.8 daniel 1588: *
1589: * Allocate a buffer large enough for the result string.
1.7 daniel 1590: */
1591: len = 2; /* extra / and 0 */
1592: if (ref->path != NULL)
1593: len += strlen(ref->path);
1594: if (bas->path != NULL)
1595: len += strlen(bas->path);
1.8 daniel 1596: res->path = (char *) xmlMalloc(len);
1597: if (res->path == NULL) {
1.27 veillard 1598: xmlGenericError(xmlGenericErrorContext,
1599: "xmlBuildURI: out of memory\n");
1.8 daniel 1600: goto done;
1601: }
1602: res->path[0] = 0;
1603:
1604: /*
1605: * a) All but the last segment of the base URI's path component is
1606: * copied to the buffer. In other words, any characters after the
1607: * last (right-most) slash character, if any, are excluded.
1608: */
1609: cur = 0;
1610: out = 0;
1611: if (bas->path != NULL) {
1612: while (bas->path[cur] != 0) {
1613: while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
1614: cur++;
1615: if (bas->path[cur] == 0)
1616: break;
1617:
1618: cur++;
1619: while (out < cur) {
1620: res->path[out] = bas->path[out];
1621: out++;
1622: }
1623: }
1624: }
1625: res->path[out] = 0;
1626:
1627: /*
1628: * b) The reference's path component is appended to the buffer
1629: * string.
1630: */
1.22 veillard 1631: if (ref->path != NULL && ref->path[0] != 0) {
1.8 daniel 1632: index = 0;
1.15 veillard 1633: /*
1634: * Ensure the path includes a '/'
1635: */
1.23 veillard 1636: if ((out == 0) && (bas->server != NULL))
1.15 veillard 1637: res->path[out++] = '/';
1.8 daniel 1638: while (ref->path[index] != 0) {
1639: res->path[out++] = ref->path[index++];
1640: }
1641: }
1642: res->path[out] = 0;
1643:
1644: /*
1645: * Steps c) to h) are really path normalization steps
1646: */
1647: xmlNormalizeURIPath(res->path);
1648:
1.7 daniel 1649: step_7:
1650:
1.8 daniel 1651: /*
1652: * 7) The resulting URI components, including any inherited from the
1653: * base URI, are recombined to give the absolute form of the URI
1654: * reference.
1655: */
1656: val = xmlSaveUri(res);
1657:
1.7 daniel 1658: done:
1659: if (ref != NULL)
1660: xmlFreeURI(ref);
1.19 veillard 1661: if (bas != NULL)
1.7 daniel 1662: xmlFreeURI(bas);
1663: if (res != NULL)
1664: xmlFreeURI(res);
1665: return(val);
1.2 daniel 1666: }
1.5 daniel 1667:
1668:
Webmaster