File:  [Public] / libwww / Library / src / HTMIME.c
Revision 2.21: download - view: text, annotated - select for diffs
Thu May 18 14:46:33 1995 UTC (29 years, 4 months ago) by frystyk
Branches: MAIN
CVS tags: HEAD
New Bind module

/*								       HTMIME.c
**	MIME MESSAGE PARSE
**
**	(c) COPYRIGHT CERN 1994.
**	Please first read the full copyright statement in the file COPYRIGH.
**
**	This is RFC 1341-specific code.
**	The input stream pushed into this parser is assumed to be
**	stripped on CRs, ie lines end with LF, not CR LF.
**	(It is easy to change this except for the body part where
**	conversion can be slow.)
**
** History:
**	   Feb 92	Written Tim Berners-Lee, CERN
**	 8 Jul 94  FM	Insulate free() from _free structure element.
**	14 Mar 95  HFN	Now using anchor for storing data. No more `\n',
**			static buffers etc.
*/

/* Library include files */
#include "tcp.h"
#include "HTUtils.h"
#include "HTString.h"
#include "HTFormat.h"
#include "HTChunk.h"
#include "HTFWrite.h"
#include "HTMIME.h"					 /* Implemented here */

/*		MIME Object
**		-----------
*/
typedef enum _MIME_state {
    BEGINNING_OF_LINE,
    CHECK,				/* check against check_pointer */
    UNKNOWN,				/* Unknown header */
    JUNK_LINE,				/* Ignore rest of header */

    CONTENT,				/* Intermediate states */
    FIRSTLETTER_D,
    FIRSTLETTER_L,
    CONTENTLETTER_L,
    CONTENTLETTER_T,

    ALLOW,				/* Headers supported */
    AUTHENTICATE,
    CONTENT_ENCODING,
    CONTENT_LANGUAGE,
    CONTENT_LENGTH,
    CONTENT_TRANSFER_ENCODING,
    CONTENT_TYPE,
    DATE,
    DERIVED_FROM,
    EXPIRES,
    LAST_MODIFIED,
    LINK,
    LOCATION,
    PUBLIC_METHODS,
    RETRY_AFTER,
    TITLE,
    URI_HEADER,
    VERSION
} MIME_state;

struct _HTStream {
    CONST HTStreamClass *	isa;
    HTRequest *			request;
    HTStream *			target;
    HTFormat			target_format;
    HTChunk *			buffer;
    HTSocketEOL			EOLstate;
    BOOL			transparent;
};

/* ------------------------------------------------------------------------- */

/*
**	This is a FSM parser which is tolerant as it can be of all
**	syntax errors.  It ignores field names it does not understand,
**	and resynchronises on line beginnings.
*/
PRIVATE void parseheader ARGS3(HTStream *, me, HTRequest *, request,
			       HTParentAnchor *, anchor)
{
    MIME_state state = BEGINNING_OF_LINE;
    MIME_state ok_state;			  /* got this state if match */
    char *ptr = me->buffer->data-1;     /* We dont change the data in length */
    char *stop = ptr+me->buffer->size;			     /* When to stop */
    char *header = ptr;      				  /* For diagnostics */
    CONST char * check_pointer;				   /* checking input */
    char *value;
    me->transparent = YES;		  /* Pump rest of data right through */
    if (!ptr)					       /* No header to parse */
	return;
    while (ptr < stop) {
	switch (state) {
	  case BEGINNING_OF_LINE:
	    header = ++ptr;
	    switch (TOLOWER(*ptr)) {
	      case 'a':
		check_pointer = "llow";
		ok_state = ALLOW;
		state = CHECK;
		break;

	      case 'c':
		check_pointer = "ontent-";
		ok_state = CONTENT;
		state = CHECK;
		break;

	      case 'd':
		state = FIRSTLETTER_D;
		break;

	      case 'e':
		check_pointer = "xpires";
		ok_state = EXPIRES;
		state = CHECK;
		break;

	      case 'l':
		state = FIRSTLETTER_L;
		break;

	      case 'm':
		check_pointer = "ime-version";
		ok_state = JUNK_LINE;  /* We don't use this but recognize it */
		state = CHECK;
		break;

	      case 'p':
		break;

	      case 'r':
		check_pointer = "etry-after";
		ok_state = RETRY_AFTER;
		state = CHECK;
		break;

	      case 's':
		check_pointer = "erver";
		ok_state = JUNK_LINE;  /* We don't use this but recognize it */
		state = CHECK;
		break;

	      case 't':
		check_pointer = "itle";
		ok_state = TITLE;
		state = CHECK;
		break;

	      case 'u':
		check_pointer = "ri";
		ok_state = URI_HEADER;
		state = CHECK;
		break;

	      case 'v':
		check_pointer = "ersion";
		ok_state = VERSION;
		state = CHECK;
		break;

	      case 'w':
		check_pointer = "ww-authenticate";
		ok_state = AUTHENTICATE;
		state = CHECK;
		break;

	      default:
		state = UNKNOWN;
		break;
	    }
	    ptr++;
	    break;
	
	  case FIRSTLETTER_D:
	    switch (TOLOWER(*ptr)) {
	      case 'a':
		check_pointer = "te";
		ok_state = DATE;
		state = CHECK;
		break;

	      case 'e':
		check_pointer = "rived-from";
		ok_state = DERIVED_FROM;
		state = CHECK;
		break;

	      default:
		state = UNKNOWN;
		break;
	    }
	    ptr++;
	    break;

	  case FIRSTLETTER_L:
	    switch (TOLOWER(*ptr)) {
	      case 'a':
		check_pointer = "st-modified";
		ok_state = LAST_MODIFIED;
		state = CHECK;
		break;

	      case 'i':
		check_pointer = "nk";
		ok_state = LINK;
		state = CHECK;
		break;

	      case 'o':
		check_pointer = "cation";
		ok_state = LOCATION;
		state = CHECK;
		break;

	      default:
		state = UNKNOWN;
		break;
	    }
	    ptr++;
	    break;

	  case CONTENT:
	    switch (TOLOWER(*ptr)) {
	      case 'e':
		check_pointer = "ncoding";
		ok_state = CONTENT_ENCODING;
		state = CHECK;
		break;

	      case 'l':
		state = CONTENTLETTER_L;
		break;

	      case 't':
		state = CONTENTLETTER_T;
		break;

	      default:
		state = UNKNOWN;
		break;
	    }
	    ptr++;
	    break;

	  case CONTENTLETTER_L:
	    switch (TOLOWER(*ptr)) {
	      case 'a':
		check_pointer = "nguage";
		ok_state = CONTENT_LANGUAGE;
		state = CHECK;
		break;

	      case 'e':
		check_pointer = "ngth";
		ok_state = CONTENT_LENGTH;
		state = CHECK;
		break;

	      default:
		state = UNKNOWN;
		break;
	    }
	    ptr++;
	    break;

	  case CONTENTLETTER_T:
	    switch (TOLOWER(*ptr)) {
	      case 'r':
		check_pointer = "ansfer-encoding";
		ok_state = CONTENT_TRANSFER_ENCODING;
		state = CHECK;
		break;

	      case 'y':
		check_pointer = "pe";
		ok_state = CONTENT_TYPE;
		state = CHECK;
		break;

	      default:
		state = UNKNOWN;
		break;
	    }
	    ptr++;
	    break;

	  case CHECK:				     /* Check against string */
	    while (TOLOWER(*ptr) == *(check_pointer)++) ptr++;
	    if (!*--check_pointer) {
		state = ok_state;
		while (*ptr && (WHITE(*ptr) || *ptr==':')) /* Spool to value */
		    ptr++;
	    } else
		state = UNKNOWN;
	    break;

	  case ALLOW:	    
	    while ((value = HTNextField(&ptr)) != NULL) {
		char *lc = value;
		HTMethod new_method;
		while ((*lc = TOUPPER(*lc))) lc++; 
		if ((new_method = HTMethod_enum(value)) != METHOD_INVALID)
		    anchor->methods += new_method;
	    }
	    if (STREAM_TRACE)
		fprintf(TDEST, "MIMEParser.. Methods allowed: %d\n",
			anchor->methods);
	    state = JUNK_LINE;
	    break;

	  case AUTHENTICATE:
	    if ((value = HTNextField(&ptr)) != NULL) {
		StrAllocCopy(request->WWWAAScheme, value);

		/* The parsing is done in HTSSUtils.c for the moment */
		if (*ptr) StrAllocCopy(request->WWWAARealm, ptr);
	    }
	    state = JUNK_LINE;
	    break;

	  case CONTENT_ENCODING:
	    if ((value = HTNextField(&ptr)) != NULL) {
		char *lc = value;
		while ((*lc = TOLOWER(*lc))) lc++;
		anchor->content_encoding = HTAtom_for(value);
	    }
	    state = JUNK_LINE;
	    break;

	  case CONTENT_LANGUAGE:		 /* @@@ SHOULD BE A LIST @@@ */
	    if ((value = HTNextField(&ptr)) != NULL) {
		char *lc = value;
		while ((*lc = TOLOWER(*lc))) lc++;
		anchor->content_language = HTAtom_for(value);
	    }
	    state = JUNK_LINE;
	    break;

	  case CONTENT_LENGTH:
	    if ((value = HTNextField(&ptr)) != NULL)
		anchor->content_length = atol(value);
	    state = JUNK_LINE;
	    break;

	  case CONTENT_TRANSFER_ENCODING:
	    if ((value = HTNextField(&ptr)) != NULL) {
		char *lc = value;
		while ((*lc = TOLOWER(*lc))) lc++;
		anchor->cte = HTAtom_for(value);
	    }
	    state = JUNK_LINE;
	    break;

	  case CONTENT_TYPE:
	    if ((value = HTNextField(&ptr)) != NULL) {
		char *lc = value;
		while ((*lc = TOLOWER(*lc))) lc++; 
		anchor->content_type = HTAtom_for(value);
		while ((value = HTNextField(&ptr)) != NULL) {	  /* Charset */
		    if (!strcasecomp(value, "charset")) {
			if ((value = HTNextField(&ptr)) != NULL) {
			    lc = value;
			    while ((*lc = TOLOWER(*lc))) lc++;
			    anchor->charset = HTAtom_for(value);
			}
		    } else if (!strcasecomp(value, "level")) {	    /* Level */
			if ((value = HTNextField(&ptr)) != NULL) {
			    lc = value;
			    while ((*lc = TOLOWER(*lc))) lc++;
			    anchor->level = HTAtom_for(value);
			}
		    }
		}
	    }
	    state = JUNK_LINE;
	    break;

	  case DATE:
	    anchor->date = HTParseTime(ptr);
	    state = JUNK_LINE;
	    break;

	  case DERIVED_FROM:
	    if ((value = HTNextField(&ptr)) != NULL)
		StrAllocCopy(anchor->derived_from, value);
	    state = JUNK_LINE;
	    break;

	  case EXPIRES:
	    anchor->expires = HTParseTime(ptr);
	    state = JUNK_LINE;
	    break;

	  case LAST_MODIFIED:
	    anchor->last_modified = HTParseTime(ptr);
	    state = JUNK_LINE;
	    break;

	  case LINK:
	    state = UNKNOWN;				/* @@@@@@@@@@@ */
	    break;

	  case LOCATION:
	    if ((value = HTNextField(&ptr)) != NULL)
		StrAllocCopy(request->redirect, value);
	    state = JUNK_LINE;
	    break;

	  case PUBLIC_METHODS:
	    state = UNKNOWN;				/* @@@@@@@@@@@ */
	    break;

	  case RETRY_AFTER:
	    request->retry_after = HTParseTime(ptr);
	    state = JUNK_LINE;
	    break;

	  case TITLE:	  /* Can't reuse buffer as HTML version might differ */
	    if ((value = HTNextField(&ptr)) != NULL)
		StrAllocCopy(anchor->title, value);
	    state = JUNK_LINE;
	    break;

	  case URI_HEADER:
	    state = LOCATION;			/* @@@ Need extended parsing */
	    break;

	  case VERSION:
	    if ((value = HTNextField(&ptr)) != NULL)
		StrAllocCopy(anchor->version, value);
	    state = JUNK_LINE;
	    break;

	  case UNKNOWN:
	    if (STREAM_TRACE)
		fprintf(TDEST,"MIMEParser.. Unknown header: `%s\'\n", header);
	    HTAnchor_addExtra(anchor, header);

	    /* Fall through */

	  case JUNK_LINE:
	    while (*ptr) ptr++;
	    state = BEGINNING_OF_LINE;
	    break;
	}
    }

    if (STREAM_TRACE)
	fprintf(TDEST, "MIMEParser.. Media type %s is converted to %s\n",
		HTAtom_name(anchor->content_type),
		HTAtom_name(me->target_format));
    if ((me->target = HTStreamStack(anchor->content_type,
				    me->target_format, me->target,
				    me->request, YES)) == NULL) {
	if (STREAM_TRACE)
	    fprintf(TDEST, "MIMEParser.. Can't convert media type\n");
	me->target = HTBlackHole();
    }
    anchor->header_parsed = YES;
}


/*
**	Header is terminated by CRCR, LFLF, CRLFLF, CRLFCRLF
**	Folding is either of CF LWS, LF LWS, CRLF LWS
*/
PRIVATE int HTMIME_put_block ARGS3(HTStream *, me, CONST char *, b, int, l)
{
    while (!me->transparent && l-- > 0) {
	if (me->EOLstate == EOL_FCR) {
	    if (*b == CR)				    /* End of header */
		parseheader(me, me->request, me->request->anchor);
	    else if (*b == LF)				   	     /* CRLF */
		me->EOLstate = EOL_FLF;
	    else if (WHITE(*b)) {			   /* Folding: CR SP */
		me->EOLstate = EOL_BEGIN;
		HTChunkPutc(me->buffer, ' ');
	    } else {						 /* New line */
		me->EOLstate = EOL_BEGIN;
		HTChunkPutc(me->buffer, '\0');
		HTChunkPutc(me->buffer, *b);
	    }
	} else if (me->EOLstate == EOL_FLF) {
	    if (*b == CR)				/* LF CR or CR LF CR */
		me->EOLstate = EOL_SCR;
	    else if (*b == LF)				    /* End of header */
		parseheader(me, me->request, me->request->anchor);
	    else if (WHITE(*b)) {	       /* Folding: LF SP or CR LF SP */
		me->EOLstate = EOL_BEGIN;
		HTChunkPutc(me->buffer, ' ');
	    } else {						/* New line */
		me->EOLstate = EOL_BEGIN;
		HTChunkPutc(me->buffer, '\0');
		HTChunkPutc(me->buffer, *b);
	    }
	} else if (me->EOLstate == EOL_SCR) {
	    if (*b==CR || *b==LF)			    /* End of header */
		parseheader(me, me->request, me->request->anchor);
	    else if (WHITE(*b)) {	 /* Folding: LF CR SP or CR LF CR SP */
		me->EOLstate = EOL_BEGIN;
		HTChunkPutc(me->buffer, ' ');
	    } else {						/* New line */
		me->EOLstate = EOL_BEGIN;
		HTChunkPutc(me->buffer, '\0');
		HTChunkPutc(me->buffer, *b);
	    }
	} else if (*b == CR) {
	    me->EOLstate = EOL_FCR;
	} else if (*b == LF) {
	    me->EOLstate = EOL_FLF;			       /* Line found */
	} else
	    HTChunkPutc(me->buffer, *b);
	b++;
    }
    if (l > 0)						   /* Anything left? */
        return (*me->target->isa->put_block)(me->target, b, l);
    return HT_OK;
}


/*	Character handling
**	------------------
*/
PRIVATE int HTMIME_put_character ARGS2(HTStream *, me, char, c)
{
    return HTMIME_put_block(me, &c, 1);
}


/*	String handling
**	---------------
*/
PRIVATE int HTMIME_put_string ARGS2(HTStream *, me, CONST char *, s)
{
    return HTMIME_put_block(me, s, (int) strlen(s));
}


/*	Flush an stream object
**	---------------------
*/
PRIVATE int HTMIME_flush ARGS1(HTStream *, me)
{
    return (*me->target->isa->flush)(me->target);
}

/*	Free a stream object
**	--------------------
*/
PRIVATE int HTMIME_free ARGS1(HTStream *, me)
{
    int status = HT_OK;
    if (me->target)
	status = (*me->target->isa->_free)(me->target);
    HTChunkFree(me->buffer);
    free(me);
    return status;
}

/*	End writing
*/
PRIVATE int HTMIME_abort ARGS2(HTStream *, me, HTError, e)
{
    int status = HT_ERROR;
    if (me->target)
	status = (*me->target->isa->abort)(me->target, e);
    free(me);
    return status;
}



/*	Structured Object Class
**	-----------------------
*/
PRIVATE CONST HTStreamClass HTMIME =
{		
	"MIMEParser",
	HTMIME_flush,
	HTMIME_free,
	HTMIME_abort,
	HTMIME_put_character,
	HTMIME_put_string,
	HTMIME_put_block
}; 


/*	Subclass-specific Methods
**	-------------------------
*/
PUBLIC HTStream* HTMIMEConvert ARGS5(
	HTRequest *,		request,
	void *,			param,
	HTFormat,		input_format,
	HTFormat,		output_format,
	HTStream *,		output_stream)
{
    HTStream* me;
    if ((me=(HTStream *) calloc(1, sizeof(* me))) == NULL)
	outofmem(__FILE__, "HTMIMEConvert");
    me->isa = &HTMIME;       
    me->request = request;
    me->target = output_stream;
    me->target_format = output_format;
    me->buffer = HTChunkCreate(512);
    me->EOLstate = EOL_BEGIN;
    return me;
}

Webmaster