Annotation of XML/nanohttp.c, revision 1.4
1.1 daniel 1: /*
2: * nanohttp.c: minimalist HTTP implementation to fetch external subsets.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
1.4 ! daniel 9: #ifndef WIN32
! 10: #include "config.h"
! 11: #endif
! 12:
1.1 daniel 13: #include <stdio.h>
14: #include <string.h>
1.4 ! daniel 15:
! 16: #ifdef HAVE_STDLIB_H
1.1 daniel 17: #include <stdlib.h>
1.4 ! daniel 18: #endif
! 19: #ifdef HAVE_UNISTD_H
1.1 daniel 20: #include <unistd.h>
1.4 ! daniel 21: #endif
! 22: #ifdef HAVE_SYS_SOCKET_H
1.1 daniel 23: #include <sys/socket.h>
1.4 ! daniel 24: #endif
! 25: #ifdef HAVE_NETINET_IN_H
1.1 daniel 26: #include <netinet/in.h>
1.4 ! daniel 27: #endif
! 28: #ifdef HAVE_ARPA_INET_H
1.1 daniel 29: #include <arpa/inet.h>
1.4 ! daniel 30: #endif
! 31: #ifdef HAVE_NETDB_H
1.1 daniel 32: #include <netdb.h>
1.4 ! daniel 33: #endif
! 34: #ifdef HAVE_FCNTL_H
1.1 daniel 35: #include <fcntl.h>
1.4 ! daniel 36: #endif
! 37: #ifdef HAVE_ERRNO_H
1.1 daniel 38: #include <errno.h>
1.4 ! daniel 39: #endif
! 40: #ifdef HAVE_SYS_TIME_H
1.1 daniel 41: #include <sys/time.h>
1.4 ! daniel 42: #endif
! 43: #ifdef HAVE_SYS_SELECT_H
1.1 daniel 44: #include <sys/select.h>
1.4 ! daniel 45: #endif
1.1 daniel 46:
47: #define XML_NANO_HTTP_MAX_REDIR 10
48:
49: #define XML_NANO_HTTP_CHUNK 4096
50:
51: #define XML_NANO_HTTP_CLOSED 0
52: #define XML_NANO_HTTP_WRITE 1
53: #define XML_NANO_HTTP_READ 2
54: #define XML_NANO_HTTP_NONE 4
55:
56: typedef struct xmlNanoHTTPCtxt {
57: char *protocol; /* the protocol name */
58: char *hostname; /* the host name */
59: int port; /* the port */
60: char *path; /* the path within the URL */
61: int fd; /* the file descriptor for the socket */
62: int state; /* WRITE / READ / CLOSED */
63: char *out; /* buffer sent (zero terminated) */
64: char *outptr; /* index within the buffer sent */
65: char *in; /* the receiving buffer */
66: char *content; /* the start of the content */
67: char *inptr; /* the next byte to read from network */
68: char *inrptr; /* the next byte to give back to the client */
69: int inlen; /* len of the input buffer */
70: int last; /* return code for last operation */
71: int returnValue; /* the protocol return value */
72: char *contentType; /* the MIME type for the input */
73: char *location; /* the new URL in case of redirect */
74: } xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
75:
76: static void xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
77: const char *cur = URL;
78: char buf[4096];
79: int index = 0;
80: int port = 0;
81:
82: if (ctxt->protocol != NULL) {
83: free(ctxt->protocol);
84: ctxt->protocol = NULL;
85: }
86: if (ctxt->hostname != NULL) {
87: free(ctxt->hostname);
88: ctxt->hostname = NULL;
89: }
90: if (ctxt->path != NULL) {
91: free(ctxt->path);
92: ctxt->path = NULL;
93: }
94: buf[index] = 0;
95: while (*cur != 0) {
96: if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) {
97: buf[index] = 0;
98: ctxt->protocol = strdup(buf);
99: index = 0;
100: cur += 3;
101: break;
102: }
103: buf[index++] = *cur++;
104: }
105: if (*cur == 0) return;
106:
107: buf[index] = 0;
108: while (1) {
109: if (cur[0] == ':') {
110: buf[index] = 0;
111: ctxt->hostname = strdup(buf);
112: index = 0;
113: cur += 1;
114: while ((*cur >= '0') && (*cur <= '9')) {
115: port *= 10;
116: port += *cur - '0';
117: cur++;
118: }
119: if (port != 0) ctxt->port = port;
120: while ((cur[0] != '/') && (*cur != 0))
121: cur++;
122: break;
123: }
124: if ((*cur == '/') || (*cur == 0)) {
125: buf[index] = 0;
126: ctxt->hostname = strdup(buf);
127: index = 0;
128: break;
129: }
130: buf[index++] = *cur++;
131: }
132: if (*cur == 0)
133: ctxt->path = strdup("/");
134: else
135: ctxt->path = strdup(cur);
136: }
137:
138: static xmlNanoHTTPCtxtPtr xmlNanoHTTPNewCtxt(const char *URL) {
139: xmlNanoHTTPCtxtPtr ret;
140:
141: ret = (xmlNanoHTTPCtxtPtr) malloc(sizeof(xmlNanoHTTPCtxt));
142: if (ret == NULL) return(NULL);
143:
144: memset(ret, 0, sizeof(xmlNanoHTTPCtxt));
145: ret->port = 80;
146: ret->returnValue = 0;
147:
148: xmlNanoHTTPScanURL(ret, URL);
149:
150: return(ret);
151: }
152:
153: static void xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
154: if (ctxt->hostname != NULL) free(ctxt->hostname);
155: if (ctxt->protocol != NULL) free(ctxt->protocol);
156: if (ctxt->path != NULL) free(ctxt->path);
157: if (ctxt->out != NULL) free(ctxt->out);
158: if (ctxt->in != NULL) free(ctxt->in);
159: if (ctxt->contentType != NULL) free(ctxt->contentType);
160: if (ctxt->location != NULL) free(ctxt->location);
161: ctxt->state = XML_NANO_HTTP_NONE;
162: if (ctxt->fd >= 0) close(ctxt->fd);
163: ctxt->fd = -1;
164: free(ctxt);
165: }
166:
167: static void xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt) {
168: if (ctxt->state & XML_NANO_HTTP_WRITE)
169: ctxt->last = write(ctxt->fd, ctxt->outptr, strlen(ctxt->outptr));
170: }
171:
172: static int xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt) {
173: fd_set rfd;
174: struct timeval tv;
175:
176:
177: while (ctxt->state & XML_NANO_HTTP_READ) {
178: if (ctxt->in == NULL) {
179: ctxt->in = (char *) malloc(65000 * sizeof(char));
180: if (ctxt->in == NULL) {
181: ctxt->last = -1;
182: return(-1);
183: }
184: ctxt->inlen = 65000;
185: ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in;
186: }
187: if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) {
188: int delta = ctxt->inrptr - ctxt->in;
189: int len = ctxt->inptr - ctxt->inrptr;
190:
191: memmove(ctxt->in, ctxt->inrptr, len);
192: ctxt->inrptr -= delta;
193: ctxt->content -= delta;
194: ctxt->inptr -= delta;
195: }
196: if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) {
197: int d_inptr = ctxt->inptr - ctxt->in;
198: int d_content = ctxt->content - ctxt->in;
199: int d_inrptr = ctxt->inrptr - ctxt->in;
200:
201: ctxt->inlen *= 2;
202: ctxt->in = (char *) realloc(ctxt->in, ctxt->inlen);
203: if (ctxt->in == NULL) {
204: ctxt->last = -1;
205: return(-1);
206: }
207: ctxt->inptr = ctxt->in + d_inptr;
208: ctxt->content = ctxt->in + d_content;
209: ctxt->inrptr = ctxt->in + d_inrptr;
210: }
211: ctxt->last = read(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK);
212: if (ctxt->last > 0) {
213: ctxt->inptr += ctxt->last;
214: return(ctxt->last);
215: }
216: if (ctxt->last == 0) {
217: return(0);
218: }
219: #ifdef EWOULDBLOCK
220: if ((ctxt->last == -1) && (errno != EWOULDBLOCK)) {
221: return 0;
222: }
223: #endif
224: tv.tv_sec=10;
225: tv.tv_usec=0;
226: FD_ZERO(&rfd);
227: FD_SET(ctxt->fd, &rfd);
228:
229: if(select(ctxt->fd+1, &rfd, NULL, NULL, &tv)<1)
230: return 0;
231: }
232: return(0);
233: }
234:
235: char *xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
236: static char buf[4096];
237: char *bp=buf;
238:
239: while(bp - buf < 4095) {
240: if(ctxt->inrptr == ctxt->inptr) {
241: if (xmlNanoHTTPRecv(ctxt) == 0) {
242: if (bp == buf)
243: return NULL;
244: else
245: *bp = 0;
246: return buf;
247: }
248: }
249: *bp = *ctxt->inrptr++;
250: if(*bp == '\n') {
251: *bp = 0;
252: return buf;
253: }
254: if(*bp != '\r')
255: bp++;
256: }
257: buf[4095] = 0;
258: return(buf);
259: }
260:
261:
262: static void xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
263: const char *cur = line;
264:
265: if (line == NULL) return;
266:
267: if (!strncmp(line, "HTTP/", 5)) {
268: int version = 0;
269: int ret = 0;
270:
271: cur += 5;
272: while ((*cur >= '0') && (*cur <= '9')) {
273: version *= 10;
274: version += *cur - '0';
275: cur++;
276: }
277: if (*cur == '.') {
278: cur++;
279: if ((*cur >= '0') && (*cur <= '9')) {
280: version *= 10;
281: version += *cur - '0';
282: cur++;
283: }
284: while ((*cur >= '0') && (*cur <= '9'))
285: cur++;
286: } else
287: version *= 10;
288: if ((*cur != ' ') && (*cur != '\t')) return;
289: while ((*cur == ' ') || (*cur == '\t')) cur++;
290: if ((*cur < '0') || (*cur > '9')) return;
291: while ((*cur >= '0') && (*cur <= '9')) {
292: ret *= 10;
293: ret += *cur - '0';
294: cur++;
295: }
296: if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
297: ctxt->returnValue = ret;
298: } else if (!strncmp(line, "Content-Type:", 13)) {
299: cur += 13;
300: while ((*cur == ' ') || (*cur == '\t')) cur++;
301: if (ctxt->contentType != NULL)
302: free(ctxt->contentType);
303: ctxt->contentType = strdup(cur);
304: } else if (!strncmp(line, "ContentType:", 12)) {
305: cur += 12;
306: if (ctxt->contentType != NULL) return;
307: while ((*cur == ' ') || (*cur == '\t')) cur++;
308: ctxt->contentType = strdup(cur);
309: } else if (!strncmp(line, "content-type:", 13)) {
310: cur += 13;
311: if (ctxt->contentType != NULL) return;
312: while ((*cur == ' ') || (*cur == '\t')) cur++;
313: ctxt->contentType = strdup(cur);
314: } else if (!strncmp(line, "contenttype:", 12)) {
315: cur += 12;
316: if (ctxt->contentType != NULL) return;
317: while ((*cur == ' ') || (*cur == '\t')) cur++;
318: ctxt->contentType = strdup(cur);
319: } else if (!strncmp(line, "Location:", 9)) {
320: cur += 9;
321: while ((*cur == ' ') || (*cur == '\t')) cur++;
322: if (ctxt->location != NULL)
323: free(ctxt->location);
324: ctxt->location = strdup(cur);
325: } else if (!strncmp(line, "location:", 9)) {
326: cur += 9;
327: if (ctxt->location != NULL) return;
328: while ((*cur == ' ') || (*cur == '\t')) cur++;
329: ctxt->location = strdup(cur);
330: }
331: }
332:
333: static int xmlNanoHTTPConnectAttempt(struct in_addr ia, int port)
334: {
335: int s=socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
336: struct sockaddr_in sin;
337: fd_set wfd;
338: struct timeval tv;
1.2 daniel 339: int status;
1.1 daniel 340:
341: if(s==-1) {
342: perror("socket");
343: return(-1);
344: }
345:
1.2 daniel 346: #ifdef _WINSOCKAPI_
347: {
348: long levents = FD_READ | FD_WRITE | FD_ACCEPT |
349: FD_CONNECT | FD_CLOSE ;
350: int rv = 0 ;
351: u_long one = 1;
352:
1.3 daniel 353: status = ioctlsocket(s, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0;
1.2 daniel 354: }
355: #else /* _WINSOCKAPI_ */
356: #if defined(VMS)
357: {
358: int enable = 1;
1.3 daniel 359: status = IOCTL(s, FIONBIO, &enable);
1.2 daniel 360: }
361: #else /* VMS */
1.3 daniel 362: if((status = fcntl(s, F_GETFL, 0)) != -1) {
1.2 daniel 363: #ifdef O_NONBLOCK
364: status |= O_NONBLOCK;
365: #else /* O_NONBLOCK */
366: #ifdef F_NDELAY
367: status |= F_NDELAY;
368: #endif /* F_NDELAY */
369: #endif /* !O_NONBLOCK */
1.3 daniel 370: status = fcntl(s, F_SETFL, status);
1.2 daniel 371: }
372: if(status < 0) {
1.1 daniel 373: perror("nonblocking");
374: close(s);
375: return(-1);
376: }
1.2 daniel 377: #endif /* !VMS */
378: #endif /* !_WINSOCKAPI_ */
379:
1.1 daniel 380:
381: sin.sin_family = AF_INET;
382: sin.sin_addr = ia;
383: sin.sin_port = htons(port);
384:
385: if((connect(s, (struct sockaddr *)&sin, sizeof(sin))==-1) &&
386: (errno != EINPROGRESS)) {
387: perror("connect");
388: close(s);
389: return(-1);
390: }
391:
392: tv.tv_sec = 60; /* We use 60 second timeouts for now */
393: tv.tv_usec = 0;
394:
395: FD_ZERO(&wfd);
396: FD_SET(s, &wfd);
397:
398: switch(select(s+1, NULL, &wfd, NULL, &tv))
399: {
400: case 0:
401: /* Time out */
402: close(s);
403: return(-1);
404: case -1:
405: /* Ermm.. ?? */
406: perror("select");
407: close(s);
408: return(-1);
409: }
410:
411: return s;
412: }
413:
414: int xmlNanoHTTPConnectHost(const char *host, int port)
415: {
416: struct hostent *h;
417: int i;
418: int s;
419:
420: h=gethostbyname(host);
421: if(h==NULL)
422: {
423: fprintf(stderr,"unable to resolve '%s'.\n", host);
424: return(-1);
425: }
426:
427:
428: for(i=0; h->h_addr_list[i]; i++)
429: {
430: struct in_addr ia;
431: memcpy(&ia, h->h_addr_list[i],4);
432: s = xmlNanoHTTPConnectAttempt(ia, port);
433: if(s != -1)
434: return s;
435: }
436: fprintf(stderr, "unable to connect to '%s'.\n", host);
437: return(-1);
438: }
439:
440: int xmlNanoHTTPOldFetch(const char *URL, const char *filename,
441: char **contentType) {
442: xmlNanoHTTPCtxtPtr ctxt;
443: char buf[4096];
444: int ret;
445: int fd;
446: char *p;
447: int head;
448: int nbRedirects = 0;
449: char *redirURL = NULL;
450:
451: retry:
452: if (redirURL == NULL)
453: ctxt = xmlNanoHTTPNewCtxt(URL);
454: else
455: ctxt = xmlNanoHTTPNewCtxt(redirURL);
456:
457: if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
458: xmlNanoHTTPFreeCtxt(ctxt);
459: if (redirURL != NULL) free(redirURL);
460: return(-1);
461: }
462: if (ctxt->hostname == NULL) {
463: xmlNanoHTTPFreeCtxt(ctxt);
464: if (redirURL != NULL) free(redirURL);
465: return(-1);
466: }
467: ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
468: if (ret < 0) {
469: xmlNanoHTTPFreeCtxt(ctxt);
470: if (redirURL != NULL) free(redirURL);
471: return(-1);
472: }
473: ctxt->fd = ret;
474: snprintf(buf, sizeof(buf),"GET %s HTTP/1.0\r\nhost: %s\r\n\r\n",
475: ctxt->path, ctxt->hostname);
476: ctxt->outptr = ctxt->out = strdup(buf);
477: ctxt->state = XML_NANO_HTTP_WRITE;
478: xmlNanoHTTPSend(ctxt);
479: ctxt->state = XML_NANO_HTTP_READ;
480: head = 1;
481:
482: while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
483: if (head && (*p == 0)) {
484: head = 0;
485: ctxt->content = ctxt->inrptr;
486: break;
487: }
488: xmlNanoHTTPScanAnswer(ctxt, p);
489: if (p != NULL) printf("%s\n", p);
490: }
491: while (xmlNanoHTTPRecv(ctxt)) ;
492:
493: if (!strcmp(filename, "-"))
494: fd = 0;
495: else {
496: fd = open(filename, O_CREAT | O_WRONLY);
497: if (fd < 0) {
498: xmlNanoHTTPFreeCtxt(ctxt);
499: if (redirURL != NULL) free(redirURL);
500: return(-1);
501: }
502: }
503:
504: printf("Code %d, content-type '%s'\n\n",
505: ctxt->returnValue, ctxt->contentType);
506: if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
507: (ctxt->returnValue < 400)) {
508: printf("Redirect to: %s\n", ctxt->location);
509: if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
510: nbRedirects++;
511: if (redirURL != NULL) free(redirURL);
512: redirURL = strdup(ctxt->location);
513: xmlNanoHTTPFreeCtxt(ctxt);
514: goto retry;
515: }
516: }
517:
518: write(fd, ctxt->content, ctxt->inptr - ctxt->content);
519: xmlNanoHTTPFreeCtxt(ctxt);
520: if (redirURL != NULL) free(redirURL);
521: return(0);
522: }
523:
524: void *
525: xmlNanoHTTPOpen(const char *URL, char **contentType) {
526: xmlNanoHTTPCtxtPtr ctxt;
527: char buf[4096];
528: int ret;
529: char *p;
530: int head;
531: int nbRedirects = 0;
532: char *redirURL = NULL;
533:
534: retry:
535: if (redirURL == NULL)
536: ctxt = xmlNanoHTTPNewCtxt(URL);
537: else {
538: ctxt = xmlNanoHTTPNewCtxt(redirURL);
539: free(redirURL);
540: redirURL = NULL;
541: }
542:
543: if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
544: xmlNanoHTTPFreeCtxt(ctxt);
545: if (redirURL != NULL) free(redirURL);
546: return(NULL);
547: }
548: if (ctxt->hostname == NULL) {
549: xmlNanoHTTPFreeCtxt(ctxt);
550: return(NULL);
551: }
552: ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
553: if (ret < 0) {
554: xmlNanoHTTPFreeCtxt(ctxt);
555: return(NULL);
556: }
557: ctxt->fd = ret;
558: snprintf(buf, sizeof(buf),"GET %s HTTP/1.0\r\nhost: %s\r\n\r\n",
559: ctxt->path, ctxt->hostname);
560: ctxt->outptr = ctxt->out = strdup(buf);
561: ctxt->state = XML_NANO_HTTP_WRITE;
562: xmlNanoHTTPSend(ctxt);
563: ctxt->state = XML_NANO_HTTP_READ;
564: head = 1;
565:
566: while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
567: if (head && (*p == 0)) {
568: head = 0;
569: ctxt->content = ctxt->inrptr;
570: break;
571: }
572: xmlNanoHTTPScanAnswer(ctxt, p);
573:
574: if (p != NULL) printf("%s\n", p);
575: }
576:
577: if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
578: (ctxt->returnValue < 400)) {
579: printf("Redirect to: %s\n", ctxt->location);
580: while (xmlNanoHTTPRecv(ctxt)) ;
581: if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
582: nbRedirects++;
583: redirURL = strdup(ctxt->location);
584: xmlNanoHTTPFreeCtxt(ctxt);
585: goto retry;
586: }
587: xmlNanoHTTPFreeCtxt(ctxt);
588: return(NULL);
589:
590: }
591:
592: printf("Code %d, content-type '%s'\n\n",
593: ctxt->returnValue, ctxt->contentType);
594:
595: return((void *) ctxt);
596: }
597:
598: int
599: xmlNanoHTTPRead(void *ctx, void *dest, int len) {
600: xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
601:
602: if (ctx == NULL) return(-1);
603: if (dest == NULL) return(-1);
604: if (len <= 0) return(0);
605:
606: while (ctxt->inptr - ctxt->inrptr < len) {
607: if (xmlNanoHTTPRecv(ctxt) == 0) break;
608: }
609: if (ctxt->inptr - ctxt->inrptr < len)
610: len = ctxt->inptr - ctxt->inrptr;
611: memcpy(dest, ctxt->inrptr, len);
612: ctxt->inrptr += len;
613: return(len);
614: }
615:
616: void
617: xmlNanoHTTPClose(void *ctx) {
618: xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
619:
620: if (ctx == NULL) return;
621:
622: xmlNanoHTTPFreeCtxt(ctxt);
623: }
624:
625: int xmlNanoHTTPFetch(const char *URL, const char *filename,
626: char **contentType) {
627: void *ctxt;
628: char buf[4096];
629: int fd;
630: int len;
631:
632: ctxt = xmlNanoHTTPOpen(URL, contentType);
633: if (ctxt == NULL) return(-1);
634:
635: if (!strcmp(filename, "-"))
636: fd = 0;
637: else {
638: fd = open(filename, O_CREAT | O_WRONLY);
639: if (fd < 0) {
640: xmlNanoHTTPClose(ctxt);
641: return(-1);
642: }
643: }
644:
645: while ((len = xmlNanoHTTPRead(ctxt, buf, sizeof(buf))) > 0) {
646: write(fd, buf, len);
647: }
648:
649: xmlNanoHTTPClose(ctxt);
650: return(0);
651: }
652:
653: #ifdef STANDALONE
654: int main(int argc, char **argv) {
655: char *contentType = NULL;
656:
657: if (argv[1] != NULL) {
658: if (argv[2] != NULL)
659: xmlNanoHTTPFetch(argv[1], argv[2], &contentType);
660: else
661: xmlNanoHTTPFetch(argv[1], "-", &contentType);
662: } else {
663: printf("%s: minimal HTTP GET implementation\n", argv[0]);
664: printf("\tusage %s [ URL [ filename ] ]\n", argv[0]);
665: }
666: return(0);
667: }
668: #endif /* STANDALONE */
Webmaster