Annotation of libwww/Library/src/HTGuess.c, revision 2.11
2.10 frystyk 1: /* HTGuess.c
2: ** STREAM TO GUESS CONTENT-TYPE
3: **
4: ** (c) COPYRIGHT CERN 1994.
5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 luotonen 6: **
7: ** This version of the stream object just writes its input
8: ** to its output, but prepends Content-Type: field and an
9: ** empty line after it.
10: **
2.8 duns 11: ** HISTORY:
12: ** 8 Jul 94 FM Insulate free() from _free structure element.
13: **
2.1 luotonen 14: */
15:
2.11 ! roeber 16: #include "sysdep.h"
2.1 luotonen 17:
18: #define SAMPLE_SIZE 200 /* Number of chars to look at */
19:
20: #include "HTGuess.h"
21:
22: #include "HTFormat.h"
23: #include "HTAlert.h"
24: #include "HTList.h"
25:
26: /* Stream Object
27: ** ------------
28: */
29:
30: struct _HTStream {
31: CONST HTStreamClass * isa;
32:
33: HTRequest * req;
2.9 frystyk 34: HTFormat output_format;
2.1 luotonen 35: HTStream * output_stream;
2.9 frystyk 36: HTStream * target;
2.1 luotonen 37:
38: BOOL discard;
39: int cnt;
40: int text_cnt;
41: int lf_cnt;
42: int cr_cnt;
43: int pg_cnt;
44: int ctrl_cnt;
45: int high_cnt;
46: char * write_ptr;
47: char buffer[ SAMPLE_SIZE + 1 ];
48: };
49:
50:
51: PRIVATE BOOL is_html ARGS1(char *, buf)
52: {
53: char * p = strchr(buf,'<');
54:
55: if (p && (!strncasecomp(p, "<HTML>", 6) ||
56: !strncasecomp(p, "<HEAD", 5) ||
57: !strncasecomp(p, "<TITLE>", 7) ||
58: !strncasecomp(p, "<BODY>", 6) ||
59: !strncasecomp(p, "<PLAINTEXT>", 11) ||
60: (p[0]=='<' && TOUPPER(p[1]) == 'H' && p[3]=='>')))
61: return YES;
62: else
63: return NO;
64: }
65:
66:
67: #define PUT_CHAR(c) \
2.9 frystyk 68: (*me->target->isa->put_character)(me->target,c)
2.1 luotonen 69: #define PUT_STRING(s) \
2.9 frystyk 70: (*me->target->isa->put_string)(me->target,s)
2.1 luotonen 71: #define PUT_BLOCK(b,l) \
2.9 frystyk 72: (*me->target->isa->put_block)(me->target,b,l)
2.1 luotonen 73:
74: #define CONTENT_TYPE(t) \
75: me->req->content_type = HTAtom_for(t)
76: #define CONTENT_ENCODING(t) \
77: me->req->content_encoding = HTAtom_for(t)
78:
79:
80: PRIVATE BOOL header_and_flush ARGS1(HTStream *, me)
81: {
82: CTRACE(stderr,"GUESSING.... text=%d newlines=%d ctrl=%d high=%d\n",
83: me->text_cnt, me->lf_cnt, me->ctrl_cnt, me->high_cnt);
2.2 luotonen 84: if (me->cnt) {
85: CTRACE(stderr,
86: "Percentages. text=%d%% newlines=%d%% ctrl=%d%% high=%d%%\n",
87: (int)(100*me->text_cnt/me->cnt + 0.5),
88: (int)(100*me->lf_cnt /me->cnt + 0.5),
89: (int)(100*me->ctrl_cnt/me->cnt + 0.5),
90: (int)(100*me->high_cnt/me->cnt + 0.5));
91: }
2.1 luotonen 92:
93: if (!me->ctrl_cnt ||
94: me->text_cnt + me->lf_cnt >= 16 * (me->ctrl_cnt + me->high_cnt)) {
95:
96: /* some kind of text */
97:
98: *me->write_ptr = 0; /* terminate buffer */
99:
100: if (me->high_cnt > 0)
101: CONTENT_ENCODING("8bit");
102: else
103: CONTENT_ENCODING("7bit");
104:
105: if (is_html(me->buffer))
106: CONTENT_TYPE("text/html");
107:
108: else if (!strncmp(me->buffer, "%!", 2))
109: CONTENT_TYPE("application/postscript");
110:
111: else if (strstr(me->buffer, "#define") &&
112: strstr(me->buffer, "_width") &&
113: strstr(me->buffer, "_bits"))
114: CONTENT_TYPE("image/x-xbitmap");
115:
116: else
117: CONTENT_TYPE("text/plain");
118: }
119: else {
120: if (!strncmp(me->buffer, "GIF", 3))
121: CONTENT_TYPE("image/gif");
122:
123: else if (!strncmp(me->buffer, "\377\330\377\340", 4))
124: CONTENT_TYPE("image/jpeg");
125:
126: else if (!strcmp(me->buffer, "MM")) /* MM followed by a zero */
127: CONTENT_TYPE("image/tiff");
128:
129: else if (!strncmp(me->buffer, ".snd", 4))
130: CONTENT_TYPE("audio/basic");
131:
132: else if (!strncmp(me->buffer, "\037\235", 2))
133: CONTENT_ENCODING("x-compress");
134:
135: else if (!strncmp(me->buffer, "\037\213", 2))
136: CONTENT_ENCODING("x-gzip");
137:
138: else
139: CONTENT_TYPE("application/octet-stream");
140: }
141:
142: if (!me->req->content_type) CONTENT_TYPE("www/unknown");
143: if (!me->req->content_encoding) CONTENT_ENCODING("binary");
144:
145: CTRACE(stderr,"Guessed..... %s\n", HTAtom_name(me->req->content_type));
146: CTRACE(stderr,"Encoding.... %s\n", HTAtom_name(me->req->content_encoding));
147:
2.9 frystyk 148: me->target = HTStreamStack(me->req->content_type, me->output_format,
149: me->output_stream, me->req, NO);
150: if (!me->target) {
2.1 luotonen 151: me->discard = YES; /* Turning into a black hole */
152: return NO;
153: }
154: else {
155: PUT_BLOCK(me->buffer, me->cnt);
156: return YES;
157: }
158: }
159:
160:
161: PRIVATE void HTGuess_put_character ARGS2(HTStream *, me, char, c)
162: {
163: if (me->discard) return;
2.9 frystyk 164: if (me->target) PUT_CHAR(c);
2.1 luotonen 165: else {
166: me->cnt++;
2.4 luotonen 167: #if 0
2.3 luotonen 168: if (c < 0) me->high_cnt++;
2.4 luotonen 169: else
170: #endif
171: if (c == LF) me->lf_cnt++;
2.1 luotonen 172: else if (c == CR) me->cr_cnt++;
173: else if (c == 12) me->pg_cnt++;
2.2 luotonen 174: else if (c =='\t')me->text_cnt++;
2.4 luotonen 175: else if ((unsigned char)c < 32) me->ctrl_cnt++;
176: else if ((unsigned char)c < 128) me->text_cnt++;
2.1 luotonen 177: else me->high_cnt++;
178: *me->write_ptr++ = c;
179: if (me->cnt >= SAMPLE_SIZE) header_and_flush(me);
180: }
181: }
182:
183: PRIVATE void HTGuess_put_string ARGS2(HTStream *, me, CONST char*, s)
184: {
185: if (me->discard) return;
2.9 frystyk 186: if (me->target) PUT_STRING(s);
2.1 luotonen 187: else {
188: while (*s) {
189: HTGuess_put_character(me,*s);
190: s++;
191: }
192: }
193: }
194:
195: PRIVATE void HTGuess_put_block ARGS3(HTStream *, me, CONST char*, b, int, l)
196: {
197: if (me->discard) return;
2.9 frystyk 198: while (!me->target && l > 0) {
2.1 luotonen 199: HTGuess_put_character(me, *b);
200: b++;
201: l--;
202: }
203: if (l > 0) PUT_BLOCK(b,l);
204: }
205:
2.9 frystyk 206: PRIVATE int HTGuess_free ARGS1(HTStream *, me)
2.1 luotonen 207: {
2.9 frystyk 208: if (!me->discard && !me->target)
2.1 luotonen 209: header_and_flush(me);
2.9 frystyk 210: if (me->target)
211: (*me->target->isa->_free)(me->target);
2.1 luotonen 212: free(me);
2.9 frystyk 213: return 0;
2.1 luotonen 214: }
215:
2.9 frystyk 216: PRIVATE int HTGuess_abort ARGS2(HTStream *, me, HTError, e)
2.1 luotonen 217: {
2.9 frystyk 218: if (me->target)
219: (*me->target->isa->abort)(me,e);
2.1 luotonen 220: free(me);
2.9 frystyk 221: return EOF;
2.1 luotonen 222: }
223:
224:
225: /* Guessing stream
226: ** ---------------
227: */
228: PRIVATE CONST HTStreamClass HTGuessClass =
229: {
230: "Guess",
231: HTGuess_free,
232: HTGuess_abort,
233: HTGuess_put_character,
234: HTGuess_put_string,
235: HTGuess_put_block
236: };
237:
238:
239:
2.9 frystyk 240: PUBLIC HTStream * HTGuess_new ARGS5(HTRequest *, req,
241: void *, param,
242: HTFormat, input_format,
243: HTFormat, output_format,
244: HTStream *, output_stream)
2.1 luotonen 245: {
246: HTStream * me = (HTStream*)calloc(1,sizeof(HTStream));
247: if (!me) outofmem(__FILE__, "HTGuess_new");
248:
249: me->isa = &HTGuessClass;
2.9 frystyk 250: me->req = req;
251: me->output_format = output_format;
252: me->output_stream = output_stream;
2.1 luotonen 253: me->write_ptr = me->buffer;
254: return me;
255: }
2.9 frystyk 256:
2.1 luotonen 257:
Webmaster