Annotation of libwww/Library/src/HTGuess.c, revision 2.12.2.1
2.10 frystyk 1: /* HTGuess.c
2: ** STREAM TO GUESS CONTENT-TYPE
3: **
4: ** (c) COPYRIGHT CERN 1994.
5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 luotonen 6: **
7: ** This version of the stream object just writes its input
8: ** to its output, but prepends Content-Type: field and an
9: ** empty line after it.
10: **
2.8 duns 11: ** HISTORY:
12: ** 8 Jul 94 FM Insulate free() from _free structure element.
13: **
2.1 luotonen 14: */
15:
2.12 frystyk 16: /* Library include files */
17: #include "tcp.h"
18: #include "HTUtils.h"
19: #include "HTString.h"
2.1 luotonen 20: #include "HTFormat.h"
21: #include "HTAlert.h"
22: #include "HTList.h"
2.12.2.1! frystyk 23: #include "HTFWrite.h"
2.12 frystyk 24: #include "HTGuess.h"
25:
26: #define SAMPLE_SIZE 200 /* Number of chars to look at */
2.1 luotonen 27:
28: /* Stream Object
29: ** ------------
30: */
31:
32: struct _HTStream {
33: CONST HTStreamClass * isa;
34:
35: HTRequest * req;
2.12.2.1! frystyk 36: HTParentAnchor * anchor;
2.9 frystyk 37: HTFormat output_format;
2.1 luotonen 38: HTStream * output_stream;
2.9 frystyk 39: HTStream * target;
2.1 luotonen 40:
2.12.2.1! frystyk 41: BOOL transparent;
2.1 luotonen 42: int cnt;
43: int text_cnt;
44: int lf_cnt;
45: int cr_cnt;
46: int pg_cnt;
47: int ctrl_cnt;
48: int high_cnt;
49: char * write_ptr;
50: char buffer[ SAMPLE_SIZE + 1 ];
51: };
52:
2.12.2.1! frystyk 53: #define PUT_CHAR(c) (*me->target->isa->put_character)(me->target,c)
! 54: #define PUT_STRING(s) (*me->target->isa->put_string)(me->target,s)
! 55: #define PUT_BLOCK(b,l) (*me->target->isa->put_block)(me->target,b,l)
! 56:
! 57: #define CONTENT_TYPE(t) HTAnchor_setFormat(me->anchor, HTAtom_for(t))
! 58: #define CONTENT_ENCODING(t) HTAnchor_setEncoding(me->anchor, HTAtom_for(t))
! 59:
! 60: /* ------------------------------------------------------------------------- */
2.1 luotonen 61:
62: PRIVATE BOOL is_html ARGS1(char *, buf)
63: {
64: char * p = strchr(buf,'<');
65:
66: if (p && (!strncasecomp(p, "<HTML>", 6) ||
67: !strncasecomp(p, "<HEAD", 5) ||
68: !strncasecomp(p, "<TITLE>", 7) ||
69: !strncasecomp(p, "<BODY>", 6) ||
70: !strncasecomp(p, "<PLAINTEXT>", 11) ||
71: (p[0]=='<' && TOUPPER(p[1]) == 'H' && p[3]=='>')))
72: return YES;
73: else
74: return NO;
75: }
76:
2.12.2.1! frystyk 77: PRIVATE int HTGuess_flush ARGS1(HTStream *, me)
! 78: {
! 79: if (!me->transparent) {
! 80: if (PROT_TRACE)
! 81: fprintf(TDEST,"GUESSING.... text=%d newlines=%d ctrl=%d high=%d\n",
! 82: me->text_cnt, me->lf_cnt, me->ctrl_cnt, me->high_cnt);
! 83: if (me->cnt) {
! 84: if (PROT_TRACE) fprintf(TDEST,
! 85: "Percentages. text=%d%% newlines=%d%% ctrl=%d%% high=%d%%\n",
! 86: (int)(100*me->text_cnt/me->cnt + 0.5),
! 87: (int)(100*me->lf_cnt /me->cnt + 0.5),
! 88: (int)(100*me->ctrl_cnt/me->cnt + 0.5),
! 89: (int)(100*me->high_cnt/me->cnt + 0.5));
! 90: }
! 91:
! 92: if (!me->ctrl_cnt ||
! 93: me->text_cnt + me->lf_cnt >= 16 * (me->ctrl_cnt + me->high_cnt)) {
! 94:
! 95: /* some kind of text */
! 96:
! 97: *me->write_ptr = 0; /* terminate buffer */
! 98:
! 99: if (me->high_cnt > 0)
! 100: CONTENT_ENCODING("8bit");
! 101: else
! 102: CONTENT_ENCODING("7bit");
! 103:
! 104: if (is_html(me->buffer))
! 105: CONTENT_TYPE("text/html");
! 106:
! 107: else if (!strncmp(me->buffer, "%!", 2))
! 108: CONTENT_TYPE("application/postscript");
! 109:
! 110: else if (strstr(me->buffer, "#define") &&
! 111: strstr(me->buffer, "_width") &&
! 112: strstr(me->buffer, "_bits"))
! 113: CONTENT_TYPE("image/x-xbitmap");
! 114:
! 115: else if (!strncmp(me->buffer, "begin ", 6))
! 116: CONTENT_ENCODING("base64");
! 117: else
! 118: CONTENT_TYPE("text/plain");
! 119: }
! 120: else {
! 121: if (!strncmp(me->buffer, "GIF", 3))
! 122: CONTENT_TYPE("image/gif");
2.1 luotonen 123:
2.12.2.1! frystyk 124: else if (!strncmp(me->buffer, "\377\330\377\340", 4))
! 125: CONTENT_TYPE("image/jpeg");
2.1 luotonen 126:
2.12.2.1! frystyk 127: else if (!strcmp(me->buffer, "MM")) /* MM followed by a zero */
! 128: CONTENT_TYPE("image/tiff");
2.1 luotonen 129:
2.12.2.1! frystyk 130: else if (!strncmp(me->buffer, ".snd", 4))
! 131: CONTENT_TYPE("audio/basic");
2.1 luotonen 132:
2.12.2.1! frystyk 133: else if (!strncmp(me->buffer, "\037\235", 2))
! 134: CONTENT_ENCODING("x-compress");
2.1 luotonen 135:
2.12.2.1! frystyk 136: else if (!strncmp(me->buffer, "\037\213", 2))
! 137: CONTENT_ENCODING("x-gzip");
2.1 luotonen 138:
2.12.2.1! frystyk 139: else
! 140: CONTENT_TYPE("application/octet-stream");
! 141: }
! 142:
! 143: if (!me->anchor->content_type) CONTENT_TYPE("www/unknown");
! 144: if (!me->anchor->content_encoding) CONTENT_ENCODING("binary");
! 145:
! 146: if (PROT_TRACE) fprintf(TDEST,"Guessed..... %s\n",
! 147: HTAtom_name(me->anchor->content_type));
! 148: if (PROT_TRACE) fprintf(TDEST,"Encoding.... %s\n",
! 149: HTAtom_name(me->anchor->content_encoding));
! 150: if ((me->target = HTStreamStack(me->anchor->content_type,
! 151: me->output_format, me->output_stream,
! 152: me->req, NO)) == NULL) {
! 153: if (PROT_TRACE)
! 154: fprintf(TDEST, "HTGuess..... Can't convert media type\n");
! 155: me->target = HTBlackHole();
! 156: }
2.1 luotonen 157: }
2.12.2.1! frystyk 158: return PUT_BLOCK(me->buffer, me->cnt);
2.1 luotonen 159: }
160:
161:
2.12.2.1! frystyk 162: PRIVATE int HTGuess_put_block ARGS3(HTStream *, me, CONST char*, b, int, l)
2.1 luotonen 163: {
2.12.2.1! frystyk 164: while (!me->transparent && l-- > 0) {
! 165: int status;
! 166: if (me->target) {
! 167: if ((status = HTGuess_flush(me)) != HT_OK)
! 168: return status;
! 169: } else {
! 170: me->cnt++;
! 171: if (*b == LF)
! 172: me->lf_cnt++;
! 173: else if (*b == CR)
! 174: me->cr_cnt++;
! 175: else if (*b == 12)
! 176: me->pg_cnt++;
! 177: else if (*b =='\t')
! 178: me->text_cnt++;
! 179: else if ((unsigned char)*b < 32)
! 180: me->ctrl_cnt++;
! 181: else if ((unsigned char)*b < 128)
! 182: me->text_cnt++;
! 183: else
! 184: me->high_cnt++;
! 185: *me->write_ptr++ = *b++;
! 186: if (me->cnt >= SAMPLE_SIZE) {
! 187: if ((status = HTGuess_flush(me)) != HT_OK)
! 188: return status;
! 189: else
! 190: me->transparent = YES;
! 191: }
! 192: }
2.1 luotonen 193: }
2.12.2.1! frystyk 194: if (l > 0)
! 195: return PUT_BLOCK(b, l);
! 196: return HT_OK;
2.1 luotonen 197: }
198:
2.12.2.1! frystyk 199: PRIVATE int HTGuess_put_character ARGS2(HTStream *, me, char, c)
2.1 luotonen 200: {
2.12.2.1! frystyk 201: return HTGuess_put_block(me, &c, 1);
2.1 luotonen 202: }
203:
2.12.2.1! frystyk 204: PRIVATE int HTGuess_put_string ARGS2(HTStream *, me, CONST char*, s)
2.1 luotonen 205: {
2.12.2.1! frystyk 206: return HTGuess_put_block(me, s, (int) strlen(s));
2.1 luotonen 207: }
208:
2.9 frystyk 209: PRIVATE int HTGuess_free ARGS1(HTStream *, me)
2.1 luotonen 210: {
2.12.2.1! frystyk 211: int status;
! 212: if (!me->transparent && (status = HTGuess_flush(me)) != HT_OK)
! 213: return status;
! 214: else
! 215: me->transparent = YES;
! 216: if ((status = (*me->target->isa->_free)(me->target)) != HT_OK)
! 217: return status;
2.1 luotonen 218: free(me);
2.12.2.1! frystyk 219: return HT_OK;
2.1 luotonen 220: }
221:
2.9 frystyk 222: PRIVATE int HTGuess_abort ARGS2(HTStream *, me, HTError, e)
2.1 luotonen 223: {
2.9 frystyk 224: if (me->target)
225: (*me->target->isa->abort)(me,e);
2.1 luotonen 226: free(me);
2.12.2.1! frystyk 227: return HT_ERROR;
2.1 luotonen 228: }
229:
230:
231: /* Guessing stream
232: ** ---------------
233: */
234: PRIVATE CONST HTStreamClass HTGuessClass =
235: {
2.12.2.1! frystyk 236: "GuessWhat",
! 237: HTGuess_flush,
2.1 luotonen 238: HTGuess_free,
239: HTGuess_abort,
240: HTGuess_put_character,
241: HTGuess_put_string,
242: HTGuess_put_block
243: };
244:
245:
2.9 frystyk 246: PUBLIC HTStream * HTGuess_new ARGS5(HTRequest *, req,
247: void *, param,
248: HTFormat, input_format,
249: HTFormat, output_format,
250: HTStream *, output_stream)
2.1 luotonen 251: {
2.12.2.1! frystyk 252: HTStream * me = (HTStream *) calloc(1,sizeof(HTStream));
2.1 luotonen 253: if (!me) outofmem(__FILE__, "HTGuess_new");
254:
255: me->isa = &HTGuessClass;
2.9 frystyk 256: me->req = req;
2.12.2.1! frystyk 257: me->anchor = req->anchor;
2.9 frystyk 258: me->output_format = output_format;
259: me->output_stream = output_stream;
2.1 luotonen 260: me->write_ptr = me->buffer;
261: return me;
262: }
Webmaster