Annotation of libwww/Library/src/HTGuess.c, revision 2.2
2.1 luotonen 1:
2: /* STREAM TO GUESS CONTENT-TYPE HTGuess.c
3: ** ============================
4: **
5: ** This version of the stream object just writes its input
6: ** to its output, but prepends Content-Type: field and an
7: ** empty line after it.
8: **
9: */
10:
11:
12: #define SAMPLE_SIZE 200 /* Number of chars to look at */
13:
14: #include "HTGuess.h"
15:
16: #include "HTFormat.h"
17: #include "HTAlert.h"
18: #include "HTList.h"
19:
20: /* Stream Object
21: ** ------------
22: */
23:
24: struct _HTStream {
25: CONST HTStreamClass * isa;
26:
27: HTRequest * req;
28: HTStream * output_stream;
29:
30: BOOL discard;
31: int cnt;
32: int text_cnt;
33: int lf_cnt;
34: int cr_cnt;
35: int pg_cnt;
36: int ctrl_cnt;
37: int high_cnt;
38: char * write_ptr;
39: char buffer[ SAMPLE_SIZE + 1 ];
40: };
41:
42:
43: PRIVATE BOOL is_html ARGS1(char *, buf)
44: {
45: char * p = strchr(buf,'<');
46:
47: if (p && (!strncasecomp(p, "<HTML>", 6) ||
48: !strncasecomp(p, "<HEAD", 5) ||
49: !strncasecomp(p, "<TITLE>", 7) ||
50: !strncasecomp(p, "<BODY>", 6) ||
51: !strncasecomp(p, "<PLAINTEXT>", 11) ||
52: (p[0]=='<' && TOUPPER(p[1]) == 'H' && p[3]=='>')))
53: return YES;
54: else
55: return NO;
56: }
57:
58:
59: #define PUT_CHAR(c) \
60: (*me->output_stream->isa->put_character)(me->output_stream,c)
61: #define PUT_STRING(s) \
62: (*me->output_stream->isa->put_string)(me->output_stream,s)
63: #define PUT_BLOCK(b,l) \
64: (*me->output_stream->isa->put_block)(me->output_stream,b,l)
65:
66: #define CONTENT_TYPE(t) \
67: me->req->content_type = HTAtom_for(t)
68: #define CONTENT_ENCODING(t) \
69: me->req->content_encoding = HTAtom_for(t)
70:
71:
72: PRIVATE BOOL header_and_flush ARGS1(HTStream *, me)
73: {
74: CTRACE(stderr,"GUESSING.... text=%d newlines=%d ctrl=%d high=%d\n",
75: me->text_cnt, me->lf_cnt, me->ctrl_cnt, me->high_cnt);
2.2 ! luotonen 76: if (me->cnt) {
! 77: CTRACE(stderr,
! 78: "Percentages. text=%d%% newlines=%d%% ctrl=%d%% high=%d%%\n",
! 79: (int)(100*me->text_cnt/me->cnt + 0.5),
! 80: (int)(100*me->lf_cnt /me->cnt + 0.5),
! 81: (int)(100*me->ctrl_cnt/me->cnt + 0.5),
! 82: (int)(100*me->high_cnt/me->cnt + 0.5));
! 83: }
2.1 luotonen 84:
85: if (!me->ctrl_cnt ||
86: me->text_cnt + me->lf_cnt >= 16 * (me->ctrl_cnt + me->high_cnt)) {
87:
88: /* some kind of text */
89:
90: *me->write_ptr = 0; /* terminate buffer */
91:
92: if (me->high_cnt > 0)
93: CONTENT_ENCODING("8bit");
94: else
95: CONTENT_ENCODING("7bit");
96:
97: if (is_html(me->buffer))
98: CONTENT_TYPE("text/html");
99:
100: else if (!strncmp(me->buffer, "%!", 2))
101: CONTENT_TYPE("application/postscript");
102:
103: else if (strstr(me->buffer, "#define") &&
104: strstr(me->buffer, "_width") &&
105: strstr(me->buffer, "_bits"))
106: CONTENT_TYPE("image/x-xbitmap");
107:
108: else
109: CONTENT_TYPE("text/plain");
110: }
111: else {
112: if (!strncmp(me->buffer, "GIF", 3))
113: CONTENT_TYPE("image/gif");
114:
115: else if (!strncmp(me->buffer, "\377\330\377\340", 4))
116: CONTENT_TYPE("image/jpeg");
117:
118: else if (!strcmp(me->buffer, "MM")) /* MM followed by a zero */
119: CONTENT_TYPE("image/tiff");
120:
121: else if (!strncmp(me->buffer, ".snd", 4))
122: CONTENT_TYPE("audio/basic");
123:
124: else if (!strncmp(me->buffer, "\037\235", 2))
125: CONTENT_ENCODING("x-compress");
126:
127: else if (!strncmp(me->buffer, "\037\213", 2))
128: CONTENT_ENCODING("x-gzip");
129:
130: else
131: CONTENT_TYPE("application/octet-stream");
132: }
133:
134: if (!me->req->content_type) CONTENT_TYPE("www/unknown");
135: if (!me->req->content_encoding) CONTENT_ENCODING("binary");
136:
137: CTRACE(stderr,"Guessed..... %s\n", HTAtom_name(me->req->content_type));
138: CTRACE(stderr,"Encoding.... %s\n", HTAtom_name(me->req->content_encoding));
139:
140: me->output_stream = HTStreamStack(me->req->content_type, me->req, NO);
141: if (!me->output_stream) {
142: char buffer[1024];
143: sprintf(buffer, "Guesser: Sorry, can't convert from %s to %s.",
144: HTAtom_name(me->req->content_type),
145: HTAtom_name(me->req->output_format));
146: CTRACE(stderr, "HTFormat.... %s\n", buffer);
147: HTLoadError(me->req, 501, buffer);
148: me->discard = YES; /* Turning into a black hole */
149: return NO;
150: }
151: else {
152: PUT_BLOCK(me->buffer, me->cnt);
153: return YES;
154: }
155: }
156:
157:
158: PRIVATE void HTGuess_put_character ARGS2(HTStream *, me, char, c)
159: {
160: if (me->discard) return;
161: if (me->output_stream) PUT_CHAR(c);
162: else {
163: me->cnt++;
164: if (c == LF) me->lf_cnt++;
165: else if (c == CR) me->cr_cnt++;
166: else if (c == 12) me->pg_cnt++;
2.2 ! luotonen 167: else if (c =='\t')me->text_cnt++;
2.1 luotonen 168: else if (c < 32) me->ctrl_cnt++;
169: else if (c < 128) me->text_cnt++;
170: else me->high_cnt++;
171: *me->write_ptr++ = c;
172: if (me->cnt >= SAMPLE_SIZE) header_and_flush(me);
173: }
174: }
175:
176: PRIVATE void HTGuess_put_string ARGS2(HTStream *, me, CONST char*, s)
177: {
178: if (me->discard) return;
179: if (me->output_stream) PUT_STRING(s);
180: else {
181: while (*s) {
182: HTGuess_put_character(me,*s);
183: s++;
184: }
185: }
186: }
187:
188: PRIVATE void HTGuess_put_block ARGS3(HTStream *, me, CONST char*, b, int, l)
189: {
190: if (me->discard) return;
191: while (!me->output_stream && l > 0) {
192: HTGuess_put_character(me, *b);
193: b++;
194: l--;
195: }
196: if (l > 0) PUT_BLOCK(b,l);
197: }
198:
199: PRIVATE void HTGuess_free ARGS1(HTStream *, me)
200: {
201: CTRACE(stderr, " ** DEBUG: HTGuess_free\n");
202: if (!me->discard && !me->output_stream)
203: header_and_flush(me);
204: if (me->output_stream)
205: (*me->output_stream->isa->free)(me->output_stream);
206: free(me);
207: }
208:
209: PRIVATE void HTGuess_abort ARGS2(HTStream *, me, HTError, e)
210: {
211: CTRACE(stderr, " ** DEBUG: HTGuess_abort\n");
212: if (me->output_stream)
213: (*me->output_stream->isa->abort)(me,e);
214: free(me);
215: }
216:
217:
218: /* Guessing stream
219: ** ---------------
220: */
221: PRIVATE CONST HTStreamClass HTGuessClass =
222: {
223: "Guess",
224: HTGuess_free,
225: HTGuess_abort,
226: HTGuess_put_character,
227: HTGuess_put_string,
228: HTGuess_put_block
229: };
230:
231:
232:
233: PUBLIC HTStream * HTGuess_new ARGS1(HTRequest *, req)
234: {
235: HTStream * me = (HTStream*)calloc(1,sizeof(HTStream));
236: if (!me) outofmem(__FILE__, "HTGuess_new");
237:
238: CTRACE(stderr, " ** DEBUG: HTGuess_new\n");
239:
240: me->isa = &HTGuessClass;
241: me->req =req;
242: me->write_ptr = me->buffer;
243: return me;
244: }
245:
Webmaster