File:  [Public] / XML / nanohttp.c
Revision 1.1: download - view: text, annotated - select for diffs
Fri Sep 17 10:21:04 1999 UTC (24 years, 8 months ago) by daniel
Branches: MAIN
CVS tags: HEAD
Added a minimal HTTP implementation to fetch external resources, Daniel.

/*
 * nanohttp.c: minimalist HTTP implementation to fetch external subsets.
 *
 * See Copyright for the status of this software.
 *
 * Daniel.Veillard@w3.org
 */
 
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <fcntl.h> 
#include <errno.h>
#include <sys/time.h>
#include <sys/select.h>

#define XML_NANO_HTTP_MAX_REDIR	10

#define XML_NANO_HTTP_CHUNK	4096

#define XML_NANO_HTTP_CLOSED	0
#define XML_NANO_HTTP_WRITE	1
#define XML_NANO_HTTP_READ	2
#define XML_NANO_HTTP_NONE	4

typedef struct xmlNanoHTTPCtxt {
    char *protocol;	/* the protocol name */
    char *hostname;	/* the host name */
    int port;		/* the port */
    char *path;		/* the path within the URL */
    int fd;		/* the file descriptor for the socket */
    int state;		/* WRITE / READ / CLOSED */
    char *out;		/* buffer sent (zero terminated) */
    char *outptr;	/* index within the buffer sent */
    char *in;		/* the receiving buffer */
    char *content;	/* the start of the content */
    char *inptr;	/* the next byte to read from network */
    char *inrptr;	/* the next byte to give back to the client */
    int inlen;		/* len of the input buffer */
    int last;		/* return code for last operation */
    int returnValue;	/* the protocol return value */
    char *contentType;	/* the MIME type for the input */
    char *location;	/* the new URL in case of redirect */
} xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;

static void xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
    const char *cur = URL;
    char buf[4096];
    int index = 0;
    int port = 0;

    if (ctxt->protocol != NULL) { 
        free(ctxt->protocol);
	ctxt->protocol = NULL;
    }
    if (ctxt->hostname != NULL) { 
        free(ctxt->hostname);
	ctxt->hostname = NULL;
    }
    if (ctxt->path != NULL) { 
        free(ctxt->path);
	ctxt->path = NULL;
    }
    buf[index] = 0;
    while (*cur != 0) {
        if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) {
	    buf[index] = 0;
	    ctxt->protocol = strdup(buf);
	    index = 0;
            cur += 3;
	    break;
	}
	buf[index++] = *cur++;
    }
    if (*cur == 0) return;

    buf[index] = 0;
    while (1) {
        if (cur[0] == ':') {
	    buf[index] = 0;
	    ctxt->hostname = strdup(buf);
	    index = 0;
	    cur += 1;
	    while ((*cur >= '0') && (*cur <= '9')) {
	        port *= 10;
		port += *cur - '0';
		cur++;
	    }
	    if (port != 0) ctxt->port = port;
	    while ((cur[0] != '/') && (*cur != 0)) 
	        cur++;
	    break;
	}
        if ((*cur == '/') || (*cur == 0)) {
	    buf[index] = 0;
	    ctxt->hostname = strdup(buf);
	    index = 0;
	    break;
	}
	buf[index++] = *cur++;
    }
    if (*cur == 0) 
        ctxt->path = strdup("/");
    else
	ctxt->path = strdup(cur);
}

static xmlNanoHTTPCtxtPtr xmlNanoHTTPNewCtxt(const char *URL) {
    xmlNanoHTTPCtxtPtr ret;

    ret = (xmlNanoHTTPCtxtPtr) malloc(sizeof(xmlNanoHTTPCtxt));
    if (ret == NULL) return(NULL);

    memset(ret, 0, sizeof(xmlNanoHTTPCtxt));
    ret->port = 80;
    ret->returnValue = 0;

    xmlNanoHTTPScanURL(ret, URL);

    return(ret);
}

static void xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
    if (ctxt->hostname != NULL) free(ctxt->hostname);
    if (ctxt->protocol != NULL) free(ctxt->protocol);
    if (ctxt->path != NULL) free(ctxt->path);
    if (ctxt->out != NULL) free(ctxt->out);
    if (ctxt->in != NULL) free(ctxt->in);
    if (ctxt->contentType != NULL) free(ctxt->contentType);
    if (ctxt->location != NULL) free(ctxt->location);
    ctxt->state = XML_NANO_HTTP_NONE;
    if (ctxt->fd >= 0) close(ctxt->fd);
    ctxt->fd = -1;
    free(ctxt);
}

static void xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt) {
    if (ctxt->state & XML_NANO_HTTP_WRITE)
	ctxt->last = write(ctxt->fd, ctxt->outptr, strlen(ctxt->outptr));
}

static int xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt) {
    fd_set rfd;
    struct timeval tv;


    while (ctxt->state & XML_NANO_HTTP_READ) {
	if (ctxt->in == NULL) {
	    ctxt->in = (char *) malloc(65000 * sizeof(char));
	    if (ctxt->in == NULL) {
	        ctxt->last = -1;
		return(-1);
	    }
	    ctxt->inlen = 65000;
	    ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in;
	}
	if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) {
	    int delta = ctxt->inrptr - ctxt->in;
	    int len = ctxt->inptr - ctxt->inrptr;
	    
	    memmove(ctxt->in, ctxt->inrptr, len);
	    ctxt->inrptr -= delta;
	    ctxt->content -= delta;
	    ctxt->inptr -= delta;
	}
        if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) {
	    int d_inptr = ctxt->inptr - ctxt->in;
	    int d_content = ctxt->content - ctxt->in;
	    int d_inrptr = ctxt->inrptr - ctxt->in;

	    ctxt->inlen *= 2;
            ctxt->in = (char *) realloc(ctxt->in, ctxt->inlen);
	    if (ctxt->in == NULL) {
	        ctxt->last = -1;
		return(-1);
	    }
            ctxt->inptr = ctxt->in + d_inptr;
            ctxt->content = ctxt->in + d_content;
            ctxt->inrptr = ctxt->in + d_inrptr;
	}
	ctxt->last = read(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK);
	if (ctxt->last > 0) {
	    ctxt->inptr += ctxt->last;
	    return(ctxt->last);
	}
	if (ctxt->last == 0) {
	    return(0);
	}
#ifdef EWOULDBLOCK
	if ((ctxt->last == -1) && (errno != EWOULDBLOCK)) {
	    return 0;
	}
#endif
	tv.tv_sec=10;
	tv.tv_usec=0;
	FD_ZERO(&rfd);
	FD_SET(ctxt->fd, &rfd);
	
	if(select(ctxt->fd+1, &rfd, NULL, NULL, &tv)<1)
		return 0;
    }
    return(0);
}

char *xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
    static char buf[4096];
    char *bp=buf;
    
    while(bp - buf < 4095) {
	if(ctxt->inrptr == ctxt->inptr) {
	    if (xmlNanoHTTPRecv(ctxt) == 0) {
		if (bp == buf)
		    return NULL;
		else
		    *bp = 0;
		return buf;
	    }
	}
	*bp = *ctxt->inrptr++;
	if(*bp == '\n') {
	    *bp = 0;
	    return buf;
	}
	if(*bp != '\r')
	    bp++;
    }
    buf[4095] = 0;
    return(buf);
}

	
static void xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
    const char *cur = line;

    if (line == NULL) return;

    if (!strncmp(line, "HTTP/", 5)) {
        int version = 0;
	int ret = 0;

	cur += 5;
	while ((*cur >= '0') && (*cur <= '9')) {
	    version *= 10;
	    version += *cur - '0';
	    cur++;
	}
	if (*cur == '.') {
	    cur++;
	    if ((*cur >= '0') && (*cur <= '9')) {
		version *= 10;
		version += *cur - '0';
		cur++;
	    }
	    while ((*cur >= '0') && (*cur <= '9'))
		cur++;
	} else
	    version *= 10;
	if ((*cur != ' ') && (*cur != '\t')) return;
	while ((*cur == ' ') || (*cur == '\t')) cur++;
	if ((*cur < '0') || (*cur > '9')) return;
	while ((*cur >= '0') && (*cur <= '9')) {
	    ret *= 10;
	    ret += *cur - '0';
	    cur++;
	}
	if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
	ctxt->returnValue = ret;
    } else if (!strncmp(line, "Content-Type:", 13)) {
        cur += 13;
	while ((*cur == ' ') || (*cur == '\t')) cur++;
	if (ctxt->contentType != NULL)
	    free(ctxt->contentType);
	ctxt->contentType = strdup(cur);
    } else if (!strncmp(line, "ContentType:", 12)) {
        cur += 12;
	if (ctxt->contentType != NULL) return;
	while ((*cur == ' ') || (*cur == '\t')) cur++;
	ctxt->contentType = strdup(cur);
    } else if (!strncmp(line, "content-type:", 13)) {
        cur += 13;
	if (ctxt->contentType != NULL) return;
	while ((*cur == ' ') || (*cur == '\t')) cur++;
	ctxt->contentType = strdup(cur);
    } else if (!strncmp(line, "contenttype:", 12)) {
        cur += 12;
	if (ctxt->contentType != NULL) return;
	while ((*cur == ' ') || (*cur == '\t')) cur++;
	ctxt->contentType = strdup(cur);
    } else if (!strncmp(line, "Location:", 9)) {
        cur += 9;
	while ((*cur == ' ') || (*cur == '\t')) cur++;
	if (ctxt->location != NULL)
	    free(ctxt->location);
	ctxt->location = strdup(cur);
    } else if (!strncmp(line, "location:", 9)) {
        cur += 9;
	if (ctxt->location != NULL) return;
	while ((*cur == ' ') || (*cur == '\t')) cur++;
	ctxt->location = strdup(cur);
    }
}

static int xmlNanoHTTPConnectAttempt(struct in_addr ia, int port)
{
    int s=socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
    struct sockaddr_in sin;
    fd_set wfd;
    struct timeval tv;
    
    if(s==-1) {
	perror("socket");
	return(-1);
    }
    
    if(fcntl(s, F_SETFL, FNDELAY)==-1) {
	perror("nonblocking");
	close(s);
	return(-1);
    }

    sin.sin_family = AF_INET;	
    sin.sin_addr   = ia;
    sin.sin_port   = htons(port);
    
    if((connect(s, (struct sockaddr *)&sin, sizeof(sin))==-1) &&
       (errno != EINPROGRESS)) {
	perror("connect");
	close(s);
	return(-1);
    }	
    
    tv.tv_sec = 60;		/* We use 60 second timeouts for now */
    tv.tv_usec = 0;
    
    FD_ZERO(&wfd);
    FD_SET(s, &wfd);
    
    switch(select(s+1, NULL, &wfd, NULL, &tv))
    {
	case 0:
	    /* Time out */
	    close(s);
	    return(-1);
	case -1:
	    /* Ermm.. ?? */
	    perror("select");
	    close(s);
	    return(-1);
    }
    
    return s;
}
 
int xmlNanoHTTPConnectHost(const char *host, int port)
{
    struct hostent *h;
    int i;
    int s;
    
    h=gethostbyname(host);
    if(h==NULL)
    {
	fprintf(stderr,"unable to resolve '%s'.\n", host);
	return(-1);
    }
    
    
    for(i=0; h->h_addr_list[i]; i++)
    {
	struct in_addr ia;
	memcpy(&ia, h->h_addr_list[i],4);
	s = xmlNanoHTTPConnectAttempt(ia, port);
	if(s != -1)
		return s;
    }
    fprintf(stderr, "unable to connect to '%s'.\n", host);
    return(-1);
}

int xmlNanoHTTPOldFetch(const char *URL, const char *filename,
                     char **contentType) {
    xmlNanoHTTPCtxtPtr ctxt;
    char buf[4096];
    int ret;
    int fd;
    char *p;
    int head;
    int nbRedirects = 0;
    char *redirURL = NULL;
    
retry:
    if (redirURL == NULL)
	ctxt = xmlNanoHTTPNewCtxt(URL);
    else
	ctxt = xmlNanoHTTPNewCtxt(redirURL);

    if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
        xmlNanoHTTPFreeCtxt(ctxt);
	if (redirURL != NULL) free(redirURL);
        return(-1);
    }
    if (ctxt->hostname == NULL) {
        xmlNanoHTTPFreeCtxt(ctxt);
	if (redirURL != NULL) free(redirURL);
        return(-1);
    }
    ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
    if (ret < 0) {
        xmlNanoHTTPFreeCtxt(ctxt);
	if (redirURL != NULL) free(redirURL);
        return(-1);
    }
    ctxt->fd = ret;
    snprintf(buf, sizeof(buf),"GET %s HTTP/1.0\r\nhost: %s\r\n\r\n",
	     ctxt->path, ctxt->hostname);
    ctxt->outptr = ctxt->out = strdup(buf);
    ctxt->state = XML_NANO_HTTP_WRITE;
    xmlNanoHTTPSend(ctxt);
    ctxt->state = XML_NANO_HTTP_READ;
    head = 1;

    while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
        if (head && (*p == 0)) {
	    head = 0;
	    ctxt->content = ctxt->inrptr;
	    break;
	}
	xmlNanoHTTPScanAnswer(ctxt, p);
if (p != NULL) printf("%s\n", p);
    }
    while (xmlNanoHTTPRecv(ctxt)) ;

    if (!strcmp(filename, "-")) 
        fd = 0;
    else {
        fd = open(filename, O_CREAT | O_WRONLY);
	if (fd < 0) {
	    xmlNanoHTTPFreeCtxt(ctxt);
	    if (redirURL != NULL) free(redirURL);
	    return(-1);
	}
    }

printf("Code %d, content-type '%s'\n\n",
       ctxt->returnValue, ctxt->contentType);
    if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
        (ctxt->returnValue < 400)) {
printf("Redirect to: %s\n", ctxt->location);
        if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
	    nbRedirects++;
	    if (redirURL != NULL) free(redirURL);
	    redirURL = strdup(ctxt->location);
	    xmlNanoHTTPFreeCtxt(ctxt);
	    goto retry;
	}
    }

    write(fd, ctxt->content, ctxt->inptr - ctxt->content);
    xmlNanoHTTPFreeCtxt(ctxt);
    if (redirURL != NULL) free(redirURL);
    return(0);
}

void *
xmlNanoHTTPOpen(const char *URL, char **contentType) {
    xmlNanoHTTPCtxtPtr ctxt;
    char buf[4096];
    int ret;
    char *p;
    int head;
    int nbRedirects = 0;
    char *redirURL = NULL;
    
retry:
    if (redirURL == NULL)
	ctxt = xmlNanoHTTPNewCtxt(URL);
    else {
	ctxt = xmlNanoHTTPNewCtxt(redirURL);
	free(redirURL);
	redirURL = NULL;
    }

    if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
        xmlNanoHTTPFreeCtxt(ctxt);
	if (redirURL != NULL) free(redirURL);
        return(NULL);
    }
    if (ctxt->hostname == NULL) {
        xmlNanoHTTPFreeCtxt(ctxt);
        return(NULL);
    }
    ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
    if (ret < 0) {
        xmlNanoHTTPFreeCtxt(ctxt);
        return(NULL);
    }
    ctxt->fd = ret;
    snprintf(buf, sizeof(buf),"GET %s HTTP/1.0\r\nhost: %s\r\n\r\n",
	     ctxt->path, ctxt->hostname);
    ctxt->outptr = ctxt->out = strdup(buf);
    ctxt->state = XML_NANO_HTTP_WRITE;
    xmlNanoHTTPSend(ctxt);
    ctxt->state = XML_NANO_HTTP_READ;
    head = 1;

    while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
        if (head && (*p == 0)) {
	    head = 0;
	    ctxt->content = ctxt->inrptr;
	    break;
	}
	xmlNanoHTTPScanAnswer(ctxt, p);

if (p != NULL) printf("%s\n", p);
    }

    if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
        (ctxt->returnValue < 400)) {
printf("Redirect to: %s\n", ctxt->location);
	while (xmlNanoHTTPRecv(ctxt)) ;
        if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
	    nbRedirects++;
	    redirURL = strdup(ctxt->location);
	    xmlNanoHTTPFreeCtxt(ctxt);
	    goto retry;
	}
	xmlNanoHTTPFreeCtxt(ctxt);
	return(NULL);

    }

printf("Code %d, content-type '%s'\n\n",
       ctxt->returnValue, ctxt->contentType);

    return((void *) ctxt);
}

int
xmlNanoHTTPRead(void *ctx, void *dest, int len) {
    xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;

    if (ctx == NULL) return(-1);
    if (dest == NULL) return(-1);
    if (len <= 0) return(0);

    while (ctxt->inptr - ctxt->inrptr < len) {
        if (xmlNanoHTTPRecv(ctxt) == 0) break;
    }
    if (ctxt->inptr - ctxt->inrptr < len)
        len = ctxt->inptr - ctxt->inrptr;
    memcpy(dest, ctxt->inrptr, len);
    ctxt->inrptr += len;
    return(len);
}

void
xmlNanoHTTPClose(void *ctx) {
    xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;

    if (ctx == NULL) return;

    xmlNanoHTTPFreeCtxt(ctxt);
}

int xmlNanoHTTPFetch(const char *URL, const char *filename,
                     char **contentType) {
    void *ctxt;
    char buf[4096];
    int fd;
    int len;
    
    ctxt = xmlNanoHTTPOpen(URL, contentType);
    if (ctxt == NULL) return(-1);

    if (!strcmp(filename, "-")) 
        fd = 0;
    else {
        fd = open(filename, O_CREAT | O_WRONLY);
	if (fd < 0) {
	    xmlNanoHTTPClose(ctxt);
	    return(-1);
	}
    }

    while ((len = xmlNanoHTTPRead(ctxt, buf, sizeof(buf))) > 0) {
	write(fd, buf, len);
    }

    xmlNanoHTTPClose(ctxt);
    return(0);
}

#ifdef STANDALONE
int main(int argc, char **argv) {
    char *contentType = NULL;

    if (argv[1] != NULL) {
	if (argv[2] != NULL) 
	    xmlNanoHTTPFetch(argv[1], argv[2], &contentType);
        else
	    xmlNanoHTTPFetch(argv[1], "-", &contentType);
    } else {
        printf("%s: minimal HTTP GET implementation\n", argv[0]);
        printf("\tusage %s [ URL [ filename ] ]\n", argv[0]);
    }
    return(0);
}
#endif /* STANDALONE */

Webmaster