/* * nanohttp.c: minimalist HTTP implementation to fetch external subsets. * * See Copyright for the status of this software. * * Daniel.Veillard@w3.org */ #ifndef WIN32 #include "config.h" #endif #include #include #ifdef HAVE_STDLIB_H #include #endif #ifdef HAVE_UNISTD_H #include #endif #ifdef HAVE_SYS_SOCKET_H #include #endif #ifdef HAVE_NETINET_IN_H #include #endif #ifdef HAVE_ARPA_INET_H #include #endif #ifdef HAVE_NETDB_H #include #endif #ifdef HAVE_FCNTL_H #include #endif #ifdef HAVE_ERRNO_H #include #endif #ifdef HAVE_SYS_TIME_H #include #endif #ifdef HAVE_SYS_SELECT_H #include #endif #define XML_NANO_HTTP_MAX_REDIR 10 #define XML_NANO_HTTP_CHUNK 4096 #define XML_NANO_HTTP_CLOSED 0 #define XML_NANO_HTTP_WRITE 1 #define XML_NANO_HTTP_READ 2 #define XML_NANO_HTTP_NONE 4 typedef struct xmlNanoHTTPCtxt { char *protocol; /* the protocol name */ char *hostname; /* the host name */ int port; /* the port */ char *path; /* the path within the URL */ int fd; /* the file descriptor for the socket */ int state; /* WRITE / READ / CLOSED */ char *out; /* buffer sent (zero terminated) */ char *outptr; /* index within the buffer sent */ char *in; /* the receiving buffer */ char *content; /* the start of the content */ char *inptr; /* the next byte to read from network */ char *inrptr; /* the next byte to give back to the client */ int inlen; /* len of the input buffer */ int last; /* return code for last operation */ int returnValue; /* the protocol return value */ char *contentType; /* the MIME type for the input */ char *location; /* the new URL in case of redirect */ } xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr; static void xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) { const char *cur = URL; char buf[4096]; int index = 0; int port = 0; if (ctxt->protocol != NULL) { free(ctxt->protocol); ctxt->protocol = NULL; } if (ctxt->hostname != NULL) { free(ctxt->hostname); ctxt->hostname = NULL; } if (ctxt->path != NULL) { free(ctxt->path); ctxt->path = NULL; } buf[index] = 0; while (*cur != 0) { if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) { buf[index] = 0; ctxt->protocol = strdup(buf); index = 0; cur += 3; break; } buf[index++] = *cur++; } if (*cur == 0) return; buf[index] = 0; while (1) { if (cur[0] == ':') { buf[index] = 0; ctxt->hostname = strdup(buf); index = 0; cur += 1; while ((*cur >= '0') && (*cur <= '9')) { port *= 10; port += *cur - '0'; cur++; } if (port != 0) ctxt->port = port; while ((cur[0] != '/') && (*cur != 0)) cur++; break; } if ((*cur == '/') || (*cur == 0)) { buf[index] = 0; ctxt->hostname = strdup(buf); index = 0; break; } buf[index++] = *cur++; } if (*cur == 0) ctxt->path = strdup("/"); else ctxt->path = strdup(cur); } static xmlNanoHTTPCtxtPtr xmlNanoHTTPNewCtxt(const char *URL) { xmlNanoHTTPCtxtPtr ret; ret = (xmlNanoHTTPCtxtPtr) malloc(sizeof(xmlNanoHTTPCtxt)); if (ret == NULL) return(NULL); memset(ret, 0, sizeof(xmlNanoHTTPCtxt)); ret->port = 80; ret->returnValue = 0; xmlNanoHTTPScanURL(ret, URL); return(ret); } static void xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) { if (ctxt->hostname != NULL) free(ctxt->hostname); if (ctxt->protocol != NULL) free(ctxt->protocol); if (ctxt->path != NULL) free(ctxt->path); if (ctxt->out != NULL) free(ctxt->out); if (ctxt->in != NULL) free(ctxt->in); if (ctxt->contentType != NULL) free(ctxt->contentType); if (ctxt->location != NULL) free(ctxt->location); ctxt->state = XML_NANO_HTTP_NONE; if (ctxt->fd >= 0) close(ctxt->fd); ctxt->fd = -1; free(ctxt); } static void xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt) { if (ctxt->state & XML_NANO_HTTP_WRITE) ctxt->last = write(ctxt->fd, ctxt->outptr, strlen(ctxt->outptr)); } static int xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt) { fd_set rfd; struct timeval tv; while (ctxt->state & XML_NANO_HTTP_READ) { if (ctxt->in == NULL) { ctxt->in = (char *) malloc(65000 * sizeof(char)); if (ctxt->in == NULL) { ctxt->last = -1; return(-1); } ctxt->inlen = 65000; ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in; } if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) { int delta = ctxt->inrptr - ctxt->in; int len = ctxt->inptr - ctxt->inrptr; memmove(ctxt->in, ctxt->inrptr, len); ctxt->inrptr -= delta; ctxt->content -= delta; ctxt->inptr -= delta; } if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) { int d_inptr = ctxt->inptr - ctxt->in; int d_content = ctxt->content - ctxt->in; int d_inrptr = ctxt->inrptr - ctxt->in; ctxt->inlen *= 2; ctxt->in = (char *) realloc(ctxt->in, ctxt->inlen); if (ctxt->in == NULL) { ctxt->last = -1; return(-1); } ctxt->inptr = ctxt->in + d_inptr; ctxt->content = ctxt->in + d_content; ctxt->inrptr = ctxt->in + d_inrptr; } ctxt->last = read(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK); if (ctxt->last > 0) { ctxt->inptr += ctxt->last; return(ctxt->last); } if (ctxt->last == 0) { return(0); } #ifdef EWOULDBLOCK if ((ctxt->last == -1) && (errno != EWOULDBLOCK)) { return 0; } #endif tv.tv_sec=10; tv.tv_usec=0; FD_ZERO(&rfd); FD_SET(ctxt->fd, &rfd); if(select(ctxt->fd+1, &rfd, NULL, NULL, &tv)<1) return 0; } return(0); } char *xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) { static char buf[4096]; char *bp=buf; while(bp - buf < 4095) { if(ctxt->inrptr == ctxt->inptr) { if (xmlNanoHTTPRecv(ctxt) == 0) { if (bp == buf) return NULL; else *bp = 0; return buf; } } *bp = *ctxt->inrptr++; if(*bp == '\n') { *bp = 0; return buf; } if(*bp != '\r') bp++; } buf[4095] = 0; return(buf); } static void xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) { const char *cur = line; if (line == NULL) return; if (!strncmp(line, "HTTP/", 5)) { int version = 0; int ret = 0; cur += 5; while ((*cur >= '0') && (*cur <= '9')) { version *= 10; version += *cur - '0'; cur++; } if (*cur == '.') { cur++; if ((*cur >= '0') && (*cur <= '9')) { version *= 10; version += *cur - '0'; cur++; } while ((*cur >= '0') && (*cur <= '9')) cur++; } else version *= 10; if ((*cur != ' ') && (*cur != '\t')) return; while ((*cur == ' ') || (*cur == '\t')) cur++; if ((*cur < '0') || (*cur > '9')) return; while ((*cur >= '0') && (*cur <= '9')) { ret *= 10; ret += *cur - '0'; cur++; } if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return; ctxt->returnValue = ret; } else if (!strncmp(line, "Content-Type:", 13)) { cur += 13; while ((*cur == ' ') || (*cur == '\t')) cur++; if (ctxt->contentType != NULL) free(ctxt->contentType); ctxt->contentType = strdup(cur); } else if (!strncmp(line, "ContentType:", 12)) { cur += 12; if (ctxt->contentType != NULL) return; while ((*cur == ' ') || (*cur == '\t')) cur++; ctxt->contentType = strdup(cur); } else if (!strncmp(line, "content-type:", 13)) { cur += 13; if (ctxt->contentType != NULL) return; while ((*cur == ' ') || (*cur == '\t')) cur++; ctxt->contentType = strdup(cur); } else if (!strncmp(line, "contenttype:", 12)) { cur += 12; if (ctxt->contentType != NULL) return; while ((*cur == ' ') || (*cur == '\t')) cur++; ctxt->contentType = strdup(cur); } else if (!strncmp(line, "Location:", 9)) { cur += 9; while ((*cur == ' ') || (*cur == '\t')) cur++; if (ctxt->location != NULL) free(ctxt->location); ctxt->location = strdup(cur); } else if (!strncmp(line, "location:", 9)) { cur += 9; if (ctxt->location != NULL) return; while ((*cur == ' ') || (*cur == '\t')) cur++; ctxt->location = strdup(cur); } } static int xmlNanoHTTPConnectAttempt(struct in_addr ia, int port) { int s=socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); struct sockaddr_in sin; fd_set wfd; struct timeval tv; int status; if(s==-1) { perror("socket"); return(-1); } #ifdef _WINSOCKAPI_ { long levents = FD_READ | FD_WRITE | FD_ACCEPT | FD_CONNECT | FD_CLOSE ; int rv = 0 ; u_long one = 1; status = ioctlsocket(s, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0; } #else /* _WINSOCKAPI_ */ #if defined(VMS) { int enable = 1; status = IOCTL(s, FIONBIO, &enable); } #else /* VMS */ if((status = fcntl(s, F_GETFL, 0)) != -1) { #ifdef O_NONBLOCK status |= O_NONBLOCK; #else /* O_NONBLOCK */ #ifdef F_NDELAY status |= F_NDELAY; #endif /* F_NDELAY */ #endif /* !O_NONBLOCK */ status = fcntl(s, F_SETFL, status); } if(status < 0) { perror("nonblocking"); close(s); return(-1); } #endif /* !VMS */ #endif /* !_WINSOCKAPI_ */ sin.sin_family = AF_INET; sin.sin_addr = ia; sin.sin_port = htons(port); if((connect(s, (struct sockaddr *)&sin, sizeof(sin))==-1) && (errno != EINPROGRESS)) { perror("connect"); close(s); return(-1); } tv.tv_sec = 60; /* We use 60 second timeouts for now */ tv.tv_usec = 0; FD_ZERO(&wfd); FD_SET(s, &wfd); switch(select(s+1, NULL, &wfd, NULL, &tv)) { case 0: /* Time out */ close(s); return(-1); case -1: /* Ermm.. ?? */ perror("select"); close(s); return(-1); } return s; } int xmlNanoHTTPConnectHost(const char *host, int port) { struct hostent *h; int i; int s; h=gethostbyname(host); if(h==NULL) { fprintf(stderr,"unable to resolve '%s'.\n", host); return(-1); } for(i=0; h->h_addr_list[i]; i++) { struct in_addr ia; memcpy(&ia, h->h_addr_list[i],4); s = xmlNanoHTTPConnectAttempt(ia, port); if(s != -1) return s; } fprintf(stderr, "unable to connect to '%s'.\n", host); return(-1); } int xmlNanoHTTPOldFetch(const char *URL, const char *filename, char **contentType) { xmlNanoHTTPCtxtPtr ctxt; char buf[4096]; int ret; int fd; char *p; int head; int nbRedirects = 0; char *redirURL = NULL; retry: if (redirURL == NULL) ctxt = xmlNanoHTTPNewCtxt(URL); else ctxt = xmlNanoHTTPNewCtxt(redirURL); if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) { xmlNanoHTTPFreeCtxt(ctxt); if (redirURL != NULL) free(redirURL); return(-1); } if (ctxt->hostname == NULL) { xmlNanoHTTPFreeCtxt(ctxt); if (redirURL != NULL) free(redirURL); return(-1); } ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port); if (ret < 0) { xmlNanoHTTPFreeCtxt(ctxt); if (redirURL != NULL) free(redirURL); return(-1); } ctxt->fd = ret; snprintf(buf, sizeof(buf),"GET %s HTTP/1.0\r\nhost: %s\r\n\r\n", ctxt->path, ctxt->hostname); ctxt->outptr = ctxt->out = strdup(buf); ctxt->state = XML_NANO_HTTP_WRITE; xmlNanoHTTPSend(ctxt); ctxt->state = XML_NANO_HTTP_READ; head = 1; while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) { if (head && (*p == 0)) { head = 0; ctxt->content = ctxt->inrptr; break; } xmlNanoHTTPScanAnswer(ctxt, p); if (p != NULL) printf("%s\n", p); } while (xmlNanoHTTPRecv(ctxt)) ; if (!strcmp(filename, "-")) fd = 0; else { fd = open(filename, O_CREAT | O_WRONLY); if (fd < 0) { xmlNanoHTTPFreeCtxt(ctxt); if (redirURL != NULL) free(redirURL); return(-1); } } printf("Code %d, content-type '%s'\n\n", ctxt->returnValue, ctxt->contentType); if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) && (ctxt->returnValue < 400)) { printf("Redirect to: %s\n", ctxt->location); if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) { nbRedirects++; if (redirURL != NULL) free(redirURL); redirURL = strdup(ctxt->location); xmlNanoHTTPFreeCtxt(ctxt); goto retry; } } write(fd, ctxt->content, ctxt->inptr - ctxt->content); xmlNanoHTTPFreeCtxt(ctxt); if (redirURL != NULL) free(redirURL); return(0); } void * xmlNanoHTTPOpen(const char *URL, char **contentType) { xmlNanoHTTPCtxtPtr ctxt; char buf[4096]; int ret; char *p; int head; int nbRedirects = 0; char *redirURL = NULL; retry: if (redirURL == NULL) ctxt = xmlNanoHTTPNewCtxt(URL); else { ctxt = xmlNanoHTTPNewCtxt(redirURL); free(redirURL); redirURL = NULL; } if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) { xmlNanoHTTPFreeCtxt(ctxt); if (redirURL != NULL) free(redirURL); return(NULL); } if (ctxt->hostname == NULL) { xmlNanoHTTPFreeCtxt(ctxt); return(NULL); } ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port); if (ret < 0) { xmlNanoHTTPFreeCtxt(ctxt); return(NULL); } ctxt->fd = ret; snprintf(buf, sizeof(buf),"GET %s HTTP/1.0\r\nhost: %s\r\n\r\n", ctxt->path, ctxt->hostname); ctxt->outptr = ctxt->out = strdup(buf); ctxt->state = XML_NANO_HTTP_WRITE; xmlNanoHTTPSend(ctxt); ctxt->state = XML_NANO_HTTP_READ; head = 1; while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) { if (head && (*p == 0)) { head = 0; ctxt->content = ctxt->inrptr; break; } xmlNanoHTTPScanAnswer(ctxt, p); if (p != NULL) printf("%s\n", p); } if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) && (ctxt->returnValue < 400)) { printf("Redirect to: %s\n", ctxt->location); while (xmlNanoHTTPRecv(ctxt)) ; if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) { nbRedirects++; redirURL = strdup(ctxt->location); xmlNanoHTTPFreeCtxt(ctxt); goto retry; } xmlNanoHTTPFreeCtxt(ctxt); return(NULL); } printf("Code %d, content-type '%s'\n\n", ctxt->returnValue, ctxt->contentType); return((void *) ctxt); } int xmlNanoHTTPRead(void *ctx, void *dest, int len) { xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx; if (ctx == NULL) return(-1); if (dest == NULL) return(-1); if (len <= 0) return(0); while (ctxt->inptr - ctxt->inrptr < len) { if (xmlNanoHTTPRecv(ctxt) == 0) break; } if (ctxt->inptr - ctxt->inrptr < len) len = ctxt->inptr - ctxt->inrptr; memcpy(dest, ctxt->inrptr, len); ctxt->inrptr += len; return(len); } void xmlNanoHTTPClose(void *ctx) { xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx; if (ctx == NULL) return; xmlNanoHTTPFreeCtxt(ctxt); } int xmlNanoHTTPFetch(const char *URL, const char *filename, char **contentType) { void *ctxt; char buf[4096]; int fd; int len; ctxt = xmlNanoHTTPOpen(URL, contentType); if (ctxt == NULL) return(-1); if (!strcmp(filename, "-")) fd = 0; else { fd = open(filename, O_CREAT | O_WRONLY); if (fd < 0) { xmlNanoHTTPClose(ctxt); return(-1); } } while ((len = xmlNanoHTTPRead(ctxt, buf, sizeof(buf))) > 0) { write(fd, buf, len); } xmlNanoHTTPClose(ctxt); return(0); } #ifdef STANDALONE int main(int argc, char **argv) { char *contentType = NULL; if (argv[1] != NULL) { if (argv[2] != NULL) xmlNanoHTTPFetch(argv[1], argv[2], &contentType); else xmlNanoHTTPFetch(argv[1], "-", &contentType); } else { printf("%s: minimal HTTP GET implementation\n", argv[0]); printf("\tusage %s [ URL [ filename ] ]\n", argv[0]); } return(0); } #endif /* STANDALONE */