File:  [Public] / libwww / Library / src / HTGuess.c
Revision 2.3: download - view: text, annotated - select for diffs
Wed Apr 27 12:50:48 1994 UTC (30 years, 1 month ago) by luotonen
Branches: MAIN
CVS tags: HEAD
Some small fixes to make proxy server work better + added no-close
option for HTWriter + fixed conflicts in FTP module after Henrik's
rewrite.


/*		STREAM TO GUESS CONTENT-TYPE			HTGuess.c
**		============================
**
**	This version of the stream object just writes its input
**	to its output, but prepends Content-Type: field and an
**	empty line after it.
**
*/


#define SAMPLE_SIZE	200	/* Number of chars to look at */

#include "HTGuess.h"

#include "HTFormat.h"
#include "HTAlert.h"
#include "HTList.h"

/*		Stream Object
**		------------
*/

struct _HTStream {
	CONST HTStreamClass *	isa;

	HTRequest *		req;
	HTStream *		output_stream;

	BOOL			discard;
	int			cnt;
	int			text_cnt;
	int			lf_cnt;
	int			cr_cnt;
	int			pg_cnt;
	int			ctrl_cnt;
	int			high_cnt;
	char *			write_ptr;
	char			buffer[ SAMPLE_SIZE + 1 ];
};


PRIVATE BOOL is_html ARGS1(char *, buf)
{
    char * p = strchr(buf,'<');

    if (p && (!strncasecomp(p, "<HTML>", 6) ||
	      !strncasecomp(p, "<HEAD", 5) ||
	      !strncasecomp(p, "<TITLE>", 7) ||
	      !strncasecomp(p, "<BODY>", 6) ||
	      !strncasecomp(p, "<PLAINTEXT>", 11) ||
	      (p[0]=='<' && TOUPPER(p[1]) == 'H' && p[3]=='>')))
	return YES;
    else
	return NO;
}


#define PUT_CHAR(c)	\
    (*me->output_stream->isa->put_character)(me->output_stream,c)
#define PUT_STRING(s)	\
    (*me->output_stream->isa->put_string)(me->output_stream,s)
#define PUT_BLOCK(b,l)	\
    (*me->output_stream->isa->put_block)(me->output_stream,b,l)

#define CONTENT_TYPE(t)	\
    me->req->content_type = HTAtom_for(t)
#define CONTENT_ENCODING(t)	\
    me->req->content_encoding = HTAtom_for(t)


PRIVATE BOOL header_and_flush ARGS1(HTStream *, me)
{
    CTRACE(stderr,"GUESSING.... text=%d newlines=%d ctrl=%d high=%d\n",
	   me->text_cnt, me->lf_cnt, me->ctrl_cnt, me->high_cnt);
    if (me->cnt) {
	CTRACE(stderr,
	       "Percentages. text=%d%% newlines=%d%% ctrl=%d%% high=%d%%\n",
	       (int)(100*me->text_cnt/me->cnt + 0.5),
	       (int)(100*me->lf_cnt  /me->cnt + 0.5),
	       (int)(100*me->ctrl_cnt/me->cnt + 0.5),
	       (int)(100*me->high_cnt/me->cnt + 0.5));
    }

    if (!me->ctrl_cnt ||
	me->text_cnt + me->lf_cnt >= 16 * (me->ctrl_cnt + me->high_cnt)) {

	/* some kind of text */

	*me->write_ptr = 0;		/* terminate buffer */

	if (me->high_cnt > 0)
	    CONTENT_ENCODING("8bit");
	else
	    CONTENT_ENCODING("7bit");

	if (is_html(me->buffer))
	    CONTENT_TYPE("text/html");

	else if (!strncmp(me->buffer, "%!", 2))
	    CONTENT_TYPE("application/postscript");

	else if (strstr(me->buffer, "#define") &&
		 strstr(me->buffer, "_width") &&
		 strstr(me->buffer, "_bits"))
	    CONTENT_TYPE("image/x-xbitmap");

	else
	    CONTENT_TYPE("text/plain");
    }
    else {
	if (!strncmp(me->buffer, "GIF", 3))
	    CONTENT_TYPE("image/gif");

	else if (!strncmp(me->buffer, "\377\330\377\340", 4))
	    CONTENT_TYPE("image/jpeg");

	else if (!strcmp(me->buffer, "MM"))	/* MM followed by a zero */
	    CONTENT_TYPE("image/tiff");

	else if (!strncmp(me->buffer, ".snd", 4))
	    CONTENT_TYPE("audio/basic");

	else if (!strncmp(me->buffer, "\037\235", 2))
	    CONTENT_ENCODING("x-compress");

	else if (!strncmp(me->buffer, "\037\213", 2))
	    CONTENT_ENCODING("x-gzip");

	else
	    CONTENT_TYPE("application/octet-stream");
    }

    if (!me->req->content_type)  CONTENT_TYPE("www/unknown");
    if (!me->req->content_encoding)  CONTENT_ENCODING("binary");

    CTRACE(stderr,"Guessed..... %s\n", HTAtom_name(me->req->content_type));
    CTRACE(stderr,"Encoding.... %s\n", HTAtom_name(me->req->content_encoding));

    me->output_stream = HTStreamStack(me->req->content_type, me->req, NO);
    if (!me->output_stream) {
	char buffer[1024];
	sprintf(buffer, "Guesser: Sorry, can't convert from %s to %s.",
		HTAtom_name(me->req->content_type),
		HTAtom_name(me->req->output_format));
	CTRACE(stderr, "HTFormat.... %s\n", buffer);
        HTLoadError(me->req, 501, buffer);
	me->discard = YES;	/* Turning into a black hole */
	return NO;
    }
    else {
	PUT_BLOCK(me->buffer, me->cnt);
	return YES;
    }
}


PRIVATE void HTGuess_put_character ARGS2(HTStream *, me, char, c)
{
    if (me->discard) return;
    if (me->output_stream) PUT_CHAR(c);
    else {
	me->cnt++;
	if	(c < 0)	  me->high_cnt++;
	else if (c == LF) me->lf_cnt++;
	else if (c == CR) me->cr_cnt++;
	else if (c == 12) me->pg_cnt++;
	else if (c =='\t')me->text_cnt++;
	else if (c < 32)  me->ctrl_cnt++;
	else if (c < 128) me->text_cnt++;
	else		  me->high_cnt++;
	*me->write_ptr++ = c;
	if (me->cnt >= SAMPLE_SIZE) header_and_flush(me);
    }
}

PRIVATE void HTGuess_put_string ARGS2(HTStream *, me, CONST char*, s)
{
    if (me->discard) return;
    if (me->output_stream) PUT_STRING(s);
    else {
	while (*s) {
	    HTGuess_put_character(me,*s);
	    s++;
	}
    }
}

PRIVATE void HTGuess_put_block ARGS3(HTStream *, me, CONST char*, b, int, l)
{
    if (me->discard) return;
    while (!me->output_stream && l > 0) {
	HTGuess_put_character(me, *b);
	b++;
	l--;
    }
    if (l > 0) PUT_BLOCK(b,l);
}

PRIVATE void HTGuess_free ARGS1(HTStream *, me)
{
    if (!me->discard && !me->output_stream)
	header_and_flush(me);
    if (me->output_stream)
	(*me->output_stream->isa->free)(me->output_stream);
    free(me);
}

PRIVATE void HTGuess_abort ARGS2(HTStream *, me, HTError, e)
{
    if (me->output_stream)
	(*me->output_stream->isa->abort)(me,e);
    free(me);
}


/*	Guessing stream
**	---------------
*/
PRIVATE CONST HTStreamClass HTGuessClass =
{		
	"Guess",
	HTGuess_free,
	HTGuess_abort,
	HTGuess_put_character,
 	HTGuess_put_string,
	HTGuess_put_block
};



PUBLIC HTStream * HTGuess_new ARGS1(HTRequest *, req)
{
    HTStream * me = (HTStream*)calloc(1,sizeof(HTStream));
    if (!me) outofmem(__FILE__, "HTGuess_new");

    me->isa = &HTGuessClass;
    me->req =req;
    me->write_ptr = me->buffer;
    return me;
}


Webmaster