Annotation of libwww/Library/src/HTGuess.c, revision 2.10
2.10 ! frystyk 1: /* HTGuess.c
! 2: ** STREAM TO GUESS CONTENT-TYPE
! 3: **
! 4: ** (c) COPYRIGHT CERN 1994.
! 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 luotonen 6: **
7: ** This version of the stream object just writes its input
8: ** to its output, but prepends Content-Type: field and an
9: ** empty line after it.
10: **
2.8 duns 11: ** HISTORY:
12: ** 8 Jul 94 FM Insulate free() from _free structure element.
13: **
2.1 luotonen 14: */
15:
16:
17: #define SAMPLE_SIZE 200 /* Number of chars to look at */
18:
19: #include "HTGuess.h"
20:
21: #include "HTFormat.h"
22: #include "HTAlert.h"
23: #include "HTList.h"
24:
25: /* Stream Object
26: ** ------------
27: */
28:
29: struct _HTStream {
30: CONST HTStreamClass * isa;
31:
32: HTRequest * req;
2.9 frystyk 33: HTFormat output_format;
2.1 luotonen 34: HTStream * output_stream;
2.9 frystyk 35: HTStream * target;
2.1 luotonen 36:
37: BOOL discard;
38: int cnt;
39: int text_cnt;
40: int lf_cnt;
41: int cr_cnt;
42: int pg_cnt;
43: int ctrl_cnt;
44: int high_cnt;
45: char * write_ptr;
46: char buffer[ SAMPLE_SIZE + 1 ];
47: };
48:
49:
50: PRIVATE BOOL is_html ARGS1(char *, buf)
51: {
52: char * p = strchr(buf,'<');
53:
54: if (p && (!strncasecomp(p, "<HTML>", 6) ||
55: !strncasecomp(p, "<HEAD", 5) ||
56: !strncasecomp(p, "<TITLE>", 7) ||
57: !strncasecomp(p, "<BODY>", 6) ||
58: !strncasecomp(p, "<PLAINTEXT>", 11) ||
59: (p[0]=='<' && TOUPPER(p[1]) == 'H' && p[3]=='>')))
60: return YES;
61: else
62: return NO;
63: }
64:
65:
66: #define PUT_CHAR(c) \
2.9 frystyk 67: (*me->target->isa->put_character)(me->target,c)
2.1 luotonen 68: #define PUT_STRING(s) \
2.9 frystyk 69: (*me->target->isa->put_string)(me->target,s)
2.1 luotonen 70: #define PUT_BLOCK(b,l) \
2.9 frystyk 71: (*me->target->isa->put_block)(me->target,b,l)
2.1 luotonen 72:
73: #define CONTENT_TYPE(t) \
74: me->req->content_type = HTAtom_for(t)
75: #define CONTENT_ENCODING(t) \
76: me->req->content_encoding = HTAtom_for(t)
77:
78:
79: PRIVATE BOOL header_and_flush ARGS1(HTStream *, me)
80: {
81: CTRACE(stderr,"GUESSING.... text=%d newlines=%d ctrl=%d high=%d\n",
82: me->text_cnt, me->lf_cnt, me->ctrl_cnt, me->high_cnt);
2.2 luotonen 83: if (me->cnt) {
84: CTRACE(stderr,
85: "Percentages. text=%d%% newlines=%d%% ctrl=%d%% high=%d%%\n",
86: (int)(100*me->text_cnt/me->cnt + 0.5),
87: (int)(100*me->lf_cnt /me->cnt + 0.5),
88: (int)(100*me->ctrl_cnt/me->cnt + 0.5),
89: (int)(100*me->high_cnt/me->cnt + 0.5));
90: }
2.1 luotonen 91:
92: if (!me->ctrl_cnt ||
93: me->text_cnt + me->lf_cnt >= 16 * (me->ctrl_cnt + me->high_cnt)) {
94:
95: /* some kind of text */
96:
97: *me->write_ptr = 0; /* terminate buffer */
98:
99: if (me->high_cnt > 0)
100: CONTENT_ENCODING("8bit");
101: else
102: CONTENT_ENCODING("7bit");
103:
104: if (is_html(me->buffer))
105: CONTENT_TYPE("text/html");
106:
107: else if (!strncmp(me->buffer, "%!", 2))
108: CONTENT_TYPE("application/postscript");
109:
110: else if (strstr(me->buffer, "#define") &&
111: strstr(me->buffer, "_width") &&
112: strstr(me->buffer, "_bits"))
113: CONTENT_TYPE("image/x-xbitmap");
114:
115: else
116: CONTENT_TYPE("text/plain");
117: }
118: else {
119: if (!strncmp(me->buffer, "GIF", 3))
120: CONTENT_TYPE("image/gif");
121:
122: else if (!strncmp(me->buffer, "\377\330\377\340", 4))
123: CONTENT_TYPE("image/jpeg");
124:
125: else if (!strcmp(me->buffer, "MM")) /* MM followed by a zero */
126: CONTENT_TYPE("image/tiff");
127:
128: else if (!strncmp(me->buffer, ".snd", 4))
129: CONTENT_TYPE("audio/basic");
130:
131: else if (!strncmp(me->buffer, "\037\235", 2))
132: CONTENT_ENCODING("x-compress");
133:
134: else if (!strncmp(me->buffer, "\037\213", 2))
135: CONTENT_ENCODING("x-gzip");
136:
137: else
138: CONTENT_TYPE("application/octet-stream");
139: }
140:
141: if (!me->req->content_type) CONTENT_TYPE("www/unknown");
142: if (!me->req->content_encoding) CONTENT_ENCODING("binary");
143:
144: CTRACE(stderr,"Guessed..... %s\n", HTAtom_name(me->req->content_type));
145: CTRACE(stderr,"Encoding.... %s\n", HTAtom_name(me->req->content_encoding));
146:
2.9 frystyk 147: me->target = HTStreamStack(me->req->content_type, me->output_format,
148: me->output_stream, me->req, NO);
149: if (!me->target) {
2.1 luotonen 150: me->discard = YES; /* Turning into a black hole */
151: return NO;
152: }
153: else {
154: PUT_BLOCK(me->buffer, me->cnt);
155: return YES;
156: }
157: }
158:
159:
160: PRIVATE void HTGuess_put_character ARGS2(HTStream *, me, char, c)
161: {
162: if (me->discard) return;
2.9 frystyk 163: if (me->target) PUT_CHAR(c);
2.1 luotonen 164: else {
165: me->cnt++;
2.4 luotonen 166: #if 0
2.3 luotonen 167: if (c < 0) me->high_cnt++;
2.4 luotonen 168: else
169: #endif
170: if (c == LF) me->lf_cnt++;
2.1 luotonen 171: else if (c == CR) me->cr_cnt++;
172: else if (c == 12) me->pg_cnt++;
2.2 luotonen 173: else if (c =='\t')me->text_cnt++;
2.4 luotonen 174: else if ((unsigned char)c < 32) me->ctrl_cnt++;
175: else if ((unsigned char)c < 128) me->text_cnt++;
2.1 luotonen 176: else me->high_cnt++;
177: *me->write_ptr++ = c;
178: if (me->cnt >= SAMPLE_SIZE) header_and_flush(me);
179: }
180: }
181:
182: PRIVATE void HTGuess_put_string ARGS2(HTStream *, me, CONST char*, s)
183: {
184: if (me->discard) return;
2.9 frystyk 185: if (me->target) PUT_STRING(s);
2.1 luotonen 186: else {
187: while (*s) {
188: HTGuess_put_character(me,*s);
189: s++;
190: }
191: }
192: }
193:
194: PRIVATE void HTGuess_put_block ARGS3(HTStream *, me, CONST char*, b, int, l)
195: {
196: if (me->discard) return;
2.9 frystyk 197: while (!me->target && l > 0) {
2.1 luotonen 198: HTGuess_put_character(me, *b);
199: b++;
200: l--;
201: }
202: if (l > 0) PUT_BLOCK(b,l);
203: }
204:
2.9 frystyk 205: PRIVATE int HTGuess_free ARGS1(HTStream *, me)
2.1 luotonen 206: {
2.9 frystyk 207: if (!me->discard && !me->target)
2.1 luotonen 208: header_and_flush(me);
2.9 frystyk 209: if (me->target)
210: (*me->target->isa->_free)(me->target);
2.1 luotonen 211: free(me);
2.9 frystyk 212: return 0;
2.1 luotonen 213: }
214:
2.9 frystyk 215: PRIVATE int HTGuess_abort ARGS2(HTStream *, me, HTError, e)
2.1 luotonen 216: {
2.9 frystyk 217: if (me->target)
218: (*me->target->isa->abort)(me,e);
2.1 luotonen 219: free(me);
2.9 frystyk 220: return EOF;
2.1 luotonen 221: }
222:
223:
224: /* Guessing stream
225: ** ---------------
226: */
227: PRIVATE CONST HTStreamClass HTGuessClass =
228: {
229: "Guess",
230: HTGuess_free,
231: HTGuess_abort,
232: HTGuess_put_character,
233: HTGuess_put_string,
234: HTGuess_put_block
235: };
236:
237:
238:
2.9 frystyk 239: PUBLIC HTStream * HTGuess_new ARGS5(HTRequest *, req,
240: void *, param,
241: HTFormat, input_format,
242: HTFormat, output_format,
243: HTStream *, output_stream)
2.1 luotonen 244: {
245: HTStream * me = (HTStream*)calloc(1,sizeof(HTStream));
246: if (!me) outofmem(__FILE__, "HTGuess_new");
247:
248: me->isa = &HTGuessClass;
2.9 frystyk 249: me->req = req;
250: me->output_format = output_format;
251: me->output_stream = output_stream;
2.1 luotonen 252: me->write_ptr = me->buffer;
253: return me;
254: }
2.9 frystyk 255:
2.1 luotonen 256:
Webmaster