/*
* nanohttp.c: minimalist HTTP implementation to fetch external subsets.
*
* See Copyright for the status of this software.
*
* Daniel.Veillard@w3.org
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/time.h>
#include <sys/select.h>
#define XML_NANO_HTTP_MAX_REDIR 10
#define XML_NANO_HTTP_CHUNK 4096
#define XML_NANO_HTTP_CLOSED 0
#define XML_NANO_HTTP_WRITE 1
#define XML_NANO_HTTP_READ 2
#define XML_NANO_HTTP_NONE 4
typedef struct xmlNanoHTTPCtxt {
char *protocol; /* the protocol name */
char *hostname; /* the host name */
int port; /* the port */
char *path; /* the path within the URL */
int fd; /* the file descriptor for the socket */
int state; /* WRITE / READ / CLOSED */
char *out; /* buffer sent (zero terminated) */
char *outptr; /* index within the buffer sent */
char *in; /* the receiving buffer */
char *content; /* the start of the content */
char *inptr; /* the next byte to read from network */
char *inrptr; /* the next byte to give back to the client */
int inlen; /* len of the input buffer */
int last; /* return code for last operation */
int returnValue; /* the protocol return value */
char *contentType; /* the MIME type for the input */
char *location; /* the new URL in case of redirect */
} xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
static void xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
const char *cur = URL;
char buf[4096];
int index = 0;
int port = 0;
if (ctxt->protocol != NULL) {
free(ctxt->protocol);
ctxt->protocol = NULL;
}
if (ctxt->hostname != NULL) {
free(ctxt->hostname);
ctxt->hostname = NULL;
}
if (ctxt->path != NULL) {
free(ctxt->path);
ctxt->path = NULL;
}
buf[index] = 0;
while (*cur != 0) {
if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) {
buf[index] = 0;
ctxt->protocol = strdup(buf);
index = 0;
cur += 3;
break;
}
buf[index++] = *cur++;
}
if (*cur == 0) return;
buf[index] = 0;
while (1) {
if (cur[0] == ':') {
buf[index] = 0;
ctxt->hostname = strdup(buf);
index = 0;
cur += 1;
while ((*cur >= '0') && (*cur <= '9')) {
port *= 10;
port += *cur - '0';
cur++;
}
if (port != 0) ctxt->port = port;
while ((cur[0] != '/') && (*cur != 0))
cur++;
break;
}
if ((*cur == '/') || (*cur == 0)) {
buf[index] = 0;
ctxt->hostname = strdup(buf);
index = 0;
break;
}
buf[index++] = *cur++;
}
if (*cur == 0)
ctxt->path = strdup("/");
else
ctxt->path = strdup(cur);
}
static xmlNanoHTTPCtxtPtr xmlNanoHTTPNewCtxt(const char *URL) {
xmlNanoHTTPCtxtPtr ret;
ret = (xmlNanoHTTPCtxtPtr) malloc(sizeof(xmlNanoHTTPCtxt));
if (ret == NULL) return(NULL);
memset(ret, 0, sizeof(xmlNanoHTTPCtxt));
ret->port = 80;
ret->returnValue = 0;
xmlNanoHTTPScanURL(ret, URL);
return(ret);
}
static void xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
if (ctxt->hostname != NULL) free(ctxt->hostname);
if (ctxt->protocol != NULL) free(ctxt->protocol);
if (ctxt->path != NULL) free(ctxt->path);
if (ctxt->out != NULL) free(ctxt->out);
if (ctxt->in != NULL) free(ctxt->in);
if (ctxt->contentType != NULL) free(ctxt->contentType);
if (ctxt->location != NULL) free(ctxt->location);
ctxt->state = XML_NANO_HTTP_NONE;
if (ctxt->fd >= 0) close(ctxt->fd);
ctxt->fd = -1;
free(ctxt);
}
static void xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt) {
if (ctxt->state & XML_NANO_HTTP_WRITE)
ctxt->last = write(ctxt->fd, ctxt->outptr, strlen(ctxt->outptr));
}
static int xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt) {
fd_set rfd;
struct timeval tv;
while (ctxt->state & XML_NANO_HTTP_READ) {
if (ctxt->in == NULL) {
ctxt->in = (char *) malloc(65000 * sizeof(char));
if (ctxt->in == NULL) {
ctxt->last = -1;
return(-1);
}
ctxt->inlen = 65000;
ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in;
}
if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) {
int delta = ctxt->inrptr - ctxt->in;
int len = ctxt->inptr - ctxt->inrptr;
memmove(ctxt->in, ctxt->inrptr, len);
ctxt->inrptr -= delta;
ctxt->content -= delta;
ctxt->inptr -= delta;
}
if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) {
int d_inptr = ctxt->inptr - ctxt->in;
int d_content = ctxt->content - ctxt->in;
int d_inrptr = ctxt->inrptr - ctxt->in;
ctxt->inlen *= 2;
ctxt->in = (char *) realloc(ctxt->in, ctxt->inlen);
if (ctxt->in == NULL) {
ctxt->last = -1;
return(-1);
}
ctxt->inptr = ctxt->in + d_inptr;
ctxt->content = ctxt->in + d_content;
ctxt->inrptr = ctxt->in + d_inrptr;
}
ctxt->last = read(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK);
if (ctxt->last > 0) {
ctxt->inptr += ctxt->last;
return(ctxt->last);
}
if (ctxt->last == 0) {
return(0);
}
#ifdef EWOULDBLOCK
if ((ctxt->last == -1) && (errno != EWOULDBLOCK)) {
return 0;
}
#endif
tv.tv_sec=10;
tv.tv_usec=0;
FD_ZERO(&rfd);
FD_SET(ctxt->fd, &rfd);
if(select(ctxt->fd+1, &rfd, NULL, NULL, &tv)<1)
return 0;
}
return(0);
}
char *xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
static char buf[4096];
char *bp=buf;
while(bp - buf < 4095) {
if(ctxt->inrptr == ctxt->inptr) {
if (xmlNanoHTTPRecv(ctxt) == 0) {
if (bp == buf)
return NULL;
else
*bp = 0;
return buf;
}
}
*bp = *ctxt->inrptr++;
if(*bp == '\n') {
*bp = 0;
return buf;
}
if(*bp != '\r')
bp++;
}
buf[4095] = 0;
return(buf);
}
static void xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
const char *cur = line;
if (line == NULL) return;
if (!strncmp(line, "HTTP/", 5)) {
int version = 0;
int ret = 0;
cur += 5;
while ((*cur >= '0') && (*cur <= '9')) {
version *= 10;
version += *cur - '0';
cur++;
}
if (*cur == '.') {
cur++;
if ((*cur >= '0') && (*cur <= '9')) {
version *= 10;
version += *cur - '0';
cur++;
}
while ((*cur >= '0') && (*cur <= '9'))
cur++;
} else
version *= 10;
if ((*cur != ' ') && (*cur != '\t')) return;
while ((*cur == ' ') || (*cur == '\t')) cur++;
if ((*cur < '0') || (*cur > '9')) return;
while ((*cur >= '0') && (*cur <= '9')) {
ret *= 10;
ret += *cur - '0';
cur++;
}
if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
ctxt->returnValue = ret;
} else if (!strncmp(line, "Content-Type:", 13)) {
cur += 13;
while ((*cur == ' ') || (*cur == '\t')) cur++;
if (ctxt->contentType != NULL)
free(ctxt->contentType);
ctxt->contentType = strdup(cur);
} else if (!strncmp(line, "ContentType:", 12)) {
cur += 12;
if (ctxt->contentType != NULL) return;
while ((*cur == ' ') || (*cur == '\t')) cur++;
ctxt->contentType = strdup(cur);
} else if (!strncmp(line, "content-type:", 13)) {
cur += 13;
if (ctxt->contentType != NULL) return;
while ((*cur == ' ') || (*cur == '\t')) cur++;
ctxt->contentType = strdup(cur);
} else if (!strncmp(line, "contenttype:", 12)) {
cur += 12;
if (ctxt->contentType != NULL) return;
while ((*cur == ' ') || (*cur == '\t')) cur++;
ctxt->contentType = strdup(cur);
} else if (!strncmp(line, "Location:", 9)) {
cur += 9;
while ((*cur == ' ') || (*cur == '\t')) cur++;
if (ctxt->location != NULL)
free(ctxt->location);
ctxt->location = strdup(cur);
} else if (!strncmp(line, "location:", 9)) {
cur += 9;
if (ctxt->location != NULL) return;
while ((*cur == ' ') || (*cur == '\t')) cur++;
ctxt->location = strdup(cur);
}
}
static int xmlNanoHTTPConnectAttempt(struct in_addr ia, int port)
{
int s=socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
struct sockaddr_in sin;
fd_set wfd;
struct timeval tv;
if(s==-1) {
perror("socket");
return(-1);
}
if(fcntl(s, F_SETFL, FNDELAY)==-1) {
perror("nonblocking");
close(s);
return(-1);
}
sin.sin_family = AF_INET;
sin.sin_addr = ia;
sin.sin_port = htons(port);
if((connect(s, (struct sockaddr *)&sin, sizeof(sin))==-1) &&
(errno != EINPROGRESS)) {
perror("connect");
close(s);
return(-1);
}
tv.tv_sec = 60; /* We use 60 second timeouts for now */
tv.tv_usec = 0;
FD_ZERO(&wfd);
FD_SET(s, &wfd);
switch(select(s+1, NULL, &wfd, NULL, &tv))
{
case 0:
/* Time out */
close(s);
return(-1);
case -1:
/* Ermm.. ?? */
perror("select");
close(s);
return(-1);
}
return s;
}
int xmlNanoHTTPConnectHost(const char *host, int port)
{
struct hostent *h;
int i;
int s;
h=gethostbyname(host);
if(h==NULL)
{
fprintf(stderr,"unable to resolve '%s'.\n", host);
return(-1);
}
for(i=0; h->h_addr_list[i]; i++)
{
struct in_addr ia;
memcpy(&ia, h->h_addr_list[i],4);
s = xmlNanoHTTPConnectAttempt(ia, port);
if(s != -1)
return s;
}
fprintf(stderr, "unable to connect to '%s'.\n", host);
return(-1);
}
int xmlNanoHTTPOldFetch(const char *URL, const char *filename,
char **contentType) {
xmlNanoHTTPCtxtPtr ctxt;
char buf[4096];
int ret;
int fd;
char *p;
int head;
int nbRedirects = 0;
char *redirURL = NULL;
retry:
if (redirURL == NULL)
ctxt = xmlNanoHTTPNewCtxt(URL);
else
ctxt = xmlNanoHTTPNewCtxt(redirURL);
if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
xmlNanoHTTPFreeCtxt(ctxt);
if (redirURL != NULL) free(redirURL);
return(-1);
}
if (ctxt->hostname == NULL) {
xmlNanoHTTPFreeCtxt(ctxt);
if (redirURL != NULL) free(redirURL);
return(-1);
}
ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
if (ret < 0) {
xmlNanoHTTPFreeCtxt(ctxt);
if (redirURL != NULL) free(redirURL);
return(-1);
}
ctxt->fd = ret;
snprintf(buf, sizeof(buf),"GET %s HTTP/1.0\r\nhost: %s\r\n\r\n",
ctxt->path, ctxt->hostname);
ctxt->outptr = ctxt->out = strdup(buf);
ctxt->state = XML_NANO_HTTP_WRITE;
xmlNanoHTTPSend(ctxt);
ctxt->state = XML_NANO_HTTP_READ;
head = 1;
while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
if (head && (*p == 0)) {
head = 0;
ctxt->content = ctxt->inrptr;
break;
}
xmlNanoHTTPScanAnswer(ctxt, p);
if (p != NULL) printf("%s\n", p);
}
while (xmlNanoHTTPRecv(ctxt)) ;
if (!strcmp(filename, "-"))
fd = 0;
else {
fd = open(filename, O_CREAT | O_WRONLY);
if (fd < 0) {
xmlNanoHTTPFreeCtxt(ctxt);
if (redirURL != NULL) free(redirURL);
return(-1);
}
}
printf("Code %d, content-type '%s'\n\n",
ctxt->returnValue, ctxt->contentType);
if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
(ctxt->returnValue < 400)) {
printf("Redirect to: %s\n", ctxt->location);
if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
nbRedirects++;
if (redirURL != NULL) free(redirURL);
redirURL = strdup(ctxt->location);
xmlNanoHTTPFreeCtxt(ctxt);
goto retry;
}
}
write(fd, ctxt->content, ctxt->inptr - ctxt->content);
xmlNanoHTTPFreeCtxt(ctxt);
if (redirURL != NULL) free(redirURL);
return(0);
}
void *
xmlNanoHTTPOpen(const char *URL, char **contentType) {
xmlNanoHTTPCtxtPtr ctxt;
char buf[4096];
int ret;
char *p;
int head;
int nbRedirects = 0;
char *redirURL = NULL;
retry:
if (redirURL == NULL)
ctxt = xmlNanoHTTPNewCtxt(URL);
else {
ctxt = xmlNanoHTTPNewCtxt(redirURL);
free(redirURL);
redirURL = NULL;
}
if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
xmlNanoHTTPFreeCtxt(ctxt);
if (redirURL != NULL) free(redirURL);
return(NULL);
}
if (ctxt->hostname == NULL) {
xmlNanoHTTPFreeCtxt(ctxt);
return(NULL);
}
ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
if (ret < 0) {
xmlNanoHTTPFreeCtxt(ctxt);
return(NULL);
}
ctxt->fd = ret;
snprintf(buf, sizeof(buf),"GET %s HTTP/1.0\r\nhost: %s\r\n\r\n",
ctxt->path, ctxt->hostname);
ctxt->outptr = ctxt->out = strdup(buf);
ctxt->state = XML_NANO_HTTP_WRITE;
xmlNanoHTTPSend(ctxt);
ctxt->state = XML_NANO_HTTP_READ;
head = 1;
while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
if (head && (*p == 0)) {
head = 0;
ctxt->content = ctxt->inrptr;
break;
}
xmlNanoHTTPScanAnswer(ctxt, p);
if (p != NULL) printf("%s\n", p);
}
if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
(ctxt->returnValue < 400)) {
printf("Redirect to: %s\n", ctxt->location);
while (xmlNanoHTTPRecv(ctxt)) ;
if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
nbRedirects++;
redirURL = strdup(ctxt->location);
xmlNanoHTTPFreeCtxt(ctxt);
goto retry;
}
xmlNanoHTTPFreeCtxt(ctxt);
return(NULL);
}
printf("Code %d, content-type '%s'\n\n",
ctxt->returnValue, ctxt->contentType);
return((void *) ctxt);
}
int
xmlNanoHTTPRead(void *ctx, void *dest, int len) {
xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
if (ctx == NULL) return(-1);
if (dest == NULL) return(-1);
if (len <= 0) return(0);
while (ctxt->inptr - ctxt->inrptr < len) {
if (xmlNanoHTTPRecv(ctxt) == 0) break;
}
if (ctxt->inptr - ctxt->inrptr < len)
len = ctxt->inptr - ctxt->inrptr;
memcpy(dest, ctxt->inrptr, len);
ctxt->inrptr += len;
return(len);
}
void
xmlNanoHTTPClose(void *ctx) {
xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
if (ctx == NULL) return;
xmlNanoHTTPFreeCtxt(ctxt);
}
int xmlNanoHTTPFetch(const char *URL, const char *filename,
char **contentType) {
void *ctxt;
char buf[4096];
int fd;
int len;
ctxt = xmlNanoHTTPOpen(URL, contentType);
if (ctxt == NULL) return(-1);
if (!strcmp(filename, "-"))
fd = 0;
else {
fd = open(filename, O_CREAT | O_WRONLY);
if (fd < 0) {
xmlNanoHTTPClose(ctxt);
return(-1);
}
}
while ((len = xmlNanoHTTPRead(ctxt, buf, sizeof(buf))) > 0) {
write(fd, buf, len);
}
xmlNanoHTTPClose(ctxt);
return(0);
}
#ifdef STANDALONE
int main(int argc, char **argv) {
char *contentType = NULL;
if (argv[1] != NULL) {
if (argv[2] != NULL)
xmlNanoHTTPFetch(argv[1], argv[2], &contentType);
else
xmlNanoHTTPFetch(argv[1], "-", &contentType);
} else {
printf("%s: minimal HTTP GET implementation\n", argv[0]);
printf("\tusage %s [ URL [ filename ] ]\n", argv[0]);
}
return(0);
}
#endif /* STANDALONE */
Webmaster