Annotation of libwww/Library/src/HTFormat.c, revision 1.11
1.7 secret 1:
1.1 timbl 2: /* Manage different file formats HTFormat.c
3: ** =============================
4: **
5: ** Bugs:
6: ** Not reentrant.
7: **
8: ** Assumes the incoming stream is ASCII, rather than a local file
9: ** format, and so ALWAYS converts from ASCII on non-ASCII machines.
10: ** Therefore, non-ASCII machines can't read local files.
1.2 timbl 11: **
12: */
13:
1.10 timbl 14:
1.2 timbl 15: /* Implements:
1.1 timbl 16: */
1.2 timbl 17: #include "HTFormat.h"
18:
19: PUBLIC float HTMaxSecs = 1e10; /* No effective limit */
20: PUBLIC float HTMaxLength = 1e10; /* No effective limit */
21:
22: #ifdef unix
23: #ifdef NeXT
24: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
25: #else
26: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n"
27: /* Full pathname would be better! */
28: #endif
29: #endif
30:
1.1 timbl 31:
32: #include "HTUtils.h"
33: #include "tcp.h"
34:
35: #include "HTML.h"
1.2 timbl 36: #include "HTMLDTD.h"
1.1 timbl 37: #include "HText.h"
1.2 timbl 38: #include "HTAlert.h"
39: #include "HTList.h"
40: #include "HTInit.h"
41: /* Streams and structured streams which we use:
42: */
43: #include "HTFWriter.h"
44: #include "HTPlain.h"
45: #include "SGML.h"
46: #include "HTML.h"
47: #include "HTMLGen.h"
48:
49: PUBLIC BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */
50: extern BOOL interactive;
51:
1.10 timbl 52: #ifdef ORIGINAL
1.2 timbl 53: struct _HTStream {
54: CONST HTStreamClass* isa;
55: /* ... */
56: };
1.10 timbl 57: #endif
58:
59: /* this version used by the NetToText stream */
60: struct _HTStream {
61: CONST HTStreamClass * isa;
62: BOOL had_cr;
63: HTStream * sink;
64: };
1.2 timbl 65:
66:
67: /* Presentation methods
68: ** --------------------
69: */
70:
71: PUBLIC HTList * HTPresentations = 0;
72: PUBLIC HTPresentation* default_presentation = 0;
73:
74:
75: /* Define a presentation system command for a content-type
76: ** -------------------------------------------------------
77: */
78: PUBLIC void HTSetPresentation ARGS5(
79: CONST char *, representation,
80: CONST char *, command,
81: float, quality,
82: float, secs,
83: float, secs_per_byte
84: ){
85:
86: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
87: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
88:
89: pres->rep = HTAtom_for(representation);
90: pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */
91: pres->converter = HTSaveAndExecute; /* Fixed for now ... */
92: pres->quality = quality;
93: pres->secs = secs;
94: pres->secs_per_byte = secs_per_byte;
95: pres->rep = HTAtom_for(representation);
96: pres->command = 0;
97: StrAllocCopy(pres->command, command);
98:
99: if (!HTPresentations) HTPresentations = HTList_new();
100:
101: if (strcmp(representation, "*")==0) {
102: if (default_presentation) free(default_presentation);
103: default_presentation = pres;
104: } else {
105: HTList_addObject(HTPresentations, pres);
106: }
107: }
108:
109:
110: /* Define a built-in function for a content-type
111: ** ---------------------------------------------
112: */
113: PUBLIC void HTSetConversion ARGS6(
114: CONST char *, representation_in,
115: CONST char *, representation_out,
1.6 timbl 116: HTConverter*, converter,
1.2 timbl 117: float, quality,
118: float, secs,
119: float, secs_per_byte
120: ){
1.1 timbl 121:
1.2 timbl 122: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
123: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
124:
125: pres->rep = HTAtom_for(representation_in);
126: pres->rep_out = HTAtom_for(representation_out);
127: pres->converter = converter;
128: pres->command = NULL; /* Fixed */
129: pres->quality = quality;
130: pres->secs = secs;
131: pres->secs_per_byte = secs_per_byte;
132: pres->command = 0;
133:
134: if (!HTPresentations) HTPresentations = HTList_new();
135:
136: if (strcmp(representation_in, "*")==0) {
137: if (default_presentation) free(default_presentation);
138: default_presentation = pres;
139: } else {
140: HTList_addObject(HTPresentations, pres);
141: }
142: }
1.1 timbl 143:
144:
145:
146: /* File buffering
147: ** --------------
148: **
149: ** The input file is read using the macro which can read from
150: ** a socket or a file.
151: ** The input buffer size, if large will give greater efficiency and
152: ** release the server faster, and if small will save space on PCs etc.
153: */
154: #define INPUT_BUFFER_SIZE 4096 /* Tradeoff */
155: PRIVATE char input_buffer[INPUT_BUFFER_SIZE];
156: PRIVATE char * input_pointer;
157: PRIVATE char * input_limit;
158: PRIVATE int input_file_number;
159:
160:
161: /* Set up the buffering
162: **
163: ** These routines are public because they are in fact needed by
164: ** many parsers, and on PCs and Macs we should not duplicate
165: ** the static buffer area.
166: */
167: PUBLIC void HTInitInput ARGS1 (int,file_number)
168: {
169: input_file_number = file_number;
170: input_pointer = input_limit = input_buffer;
171: }
172:
173:
174: PUBLIC char HTGetChararcter NOARGS
175: {
176: char ch;
177: do {
178: if (input_pointer >= input_limit) {
179: int status = NETREAD(
180: input_file_number, input_buffer, INPUT_BUFFER_SIZE);
181: if (status <= 0) {
182: if (status == 0) return (char)EOF;
183: if (TRACE) fprintf(stderr,
184: "HTFormat: File read error %d\n", status);
185: return (char)EOF; /* -1 is returned by UCX at end of HTTP link */
186: }
187: input_pointer = input_buffer;
188: input_limit = input_buffer + status;
189: }
190: ch = *input_pointer++;
191: } while (ch == (char) 13); /* Ignore ASCII carriage return */
192:
193: return FROMASCII(ch);
194: }
195:
196: /* Stream the data to an ouput file as binary
197: */
198: PUBLIC int HTOutputBinary ARGS2( int, input,
199: FILE *, output)
200: {
201: do {
202: int status = NETREAD(
203: input, input_buffer, INPUT_BUFFER_SIZE);
204: if (status <= 0) {
205: if (status == 0) return 0;
206: if (TRACE) fprintf(stderr,
207: "HTFormat: File read error %d\n", status);
208: return 2; /* Error */
209: }
210: fwrite(input_buffer, sizeof(char), status, output);
211: } while (YES);
212: }
213:
214:
1.2 timbl 215: /* Create a filter stack
216: ** ---------------------
217: **
1.7 secret 218: ** If a wildcard match is made, a temporary HTPresentation
1.2 timbl 219: ** structure is made to hold the destination format while the
220: ** new stack is generated. This is just to pass the out format to
221: ** MIME so far. Storing the format of a stream in the stream might
222: ** be a lot neater.
1.10 timbl 223: **
224: ** The www/source format is special, in that if you can take
225: ** that you can take anything. However, we
1.2 timbl 226: */
227: PUBLIC HTStream * HTStreamStack ARGS4(
1.10 timbl 228: HTFormat, rep_in,
1.2 timbl 229: HTFormat, rep_out,
230: HTStream*, sink,
231: HTParentAnchor*, anchor)
232: {
233: HTAtom * wildcard = HTAtom_for("*");
1.10 timbl 234: HTFormat source = WWW_SOURCE;
1.2 timbl 235: if (TRACE) fprintf(stderr,
236: "HTFormat: Constructing stream stack for %s to %s\n",
1.10 timbl 237: HTAtom_name(rep_in),
1.2 timbl 238: HTAtom_name(rep_out));
239:
240: if (rep_out == WWW_SOURCE ||
1.10 timbl 241: rep_out == rep_in) return sink;
1.2 timbl 242:
243: if (!HTPresentations) HTFormatInit(); /* set up the list */
244:
245: {
246: int n = HTList_count(HTPresentations);
247: int i;
1.10 timbl 248: HTPresentation * pres, *match, *wildcard_match=0,
249: *source_match=0, *source_wildcard_match=0;
1.2 timbl 250: for(i=0; i<n; i++) {
251: pres = HTList_objectAt(HTPresentations, i);
1.10 timbl 252: if (pres->rep == rep_in) {
1.2 timbl 253: if (pres->rep_out == rep_out)
254: return (*pres->converter)(pres, anchor, sink);
255: if (pres->rep_out == wildcard) {
1.10 timbl 256: wildcard_match = pres;
257: }
258: }
259: if (pres->rep == source) {
260: if (pres->rep_out == rep_out)
261: source_match = pres;
262: if (pres->rep_out == wildcard) {
263: source_wildcard_match = pres;
1.2 timbl 264: }
265: }
266: }
1.10 timbl 267:
268: match = wildcard_match ? wildcard_match :
269: source_match ? source_match :
270: source_wildcard_match;
271:
272: if (match) {
1.11 ! timbl 273: HTPresentation temp;
! 274: temp = *match; /* Specific instance */
1.10 timbl 275: temp.rep = rep_in; /* yuk */
276: temp.rep_out = rep_out; /* yuk */
277: return (*match->converter)(&temp, anchor, sink);
278: }
1.2 timbl 279: }
1.10 timbl 280:
1.2 timbl 281:
1.10 timbl 282: #ifdef XMOSAIC_HACK_REMOVED_NOW /* Use above source method instead */
1.3 timbl 283: return sink;
284: #else
1.2 timbl 285: return NULL;
1.3 timbl 286: #endif
1.2 timbl 287: }
288:
289:
290: /* Find the cost of a filter stack
291: ** -------------------------------
292: **
293: ** Must return the cost of the same stack which StreamStack would set up.
294: **
295: ** On entry,
296: ** length The size of the data to be converted
297: */
298: PUBLIC float HTStackValue ARGS4(
1.10 timbl 299: HTFormat, rep_in,
1.2 timbl 300: HTFormat, rep_out,
301: float, initial_value,
302: long int, length)
303: {
304: HTAtom * wildcard = HTAtom_for("*");
305:
306: if (TRACE) fprintf(stderr,
307: "HTFormat: Evaluating stream stack for %s worth %.3f to %s\n",
1.10 timbl 308: HTAtom_name(rep_in), initial_value,
1.2 timbl 309: HTAtom_name(rep_out));
310:
311: if (rep_out == WWW_SOURCE ||
1.10 timbl 312: rep_out == rep_in) return 0.0;
1.2 timbl 313:
314: if (!HTPresentations) HTFormatInit(); /* set up the list */
315:
316: {
317: int n = HTList_count(HTPresentations);
318: int i;
319: HTPresentation * pres;
320: for(i=0; i<n; i++) {
321: pres = HTList_objectAt(HTPresentations, i);
1.10 timbl 322: if (pres->rep == rep_in && (
1.2 timbl 323: pres->rep_out == rep_out ||
324: pres->rep_out == wildcard)) {
325: float value = initial_value * pres->quality;
326: if (HTMaxSecs != 0.0)
327: value = value - (length*pres->secs_per_byte + pres->secs)
328: /HTMaxSecs;
329: return value;
330: }
331: }
332: }
333:
334: return -1e30; /* Really bad */
335:
336: }
337:
1.1 timbl 338:
1.2 timbl 339: /* Push data from a socket down a stream
340: ** -------------------------------------
1.1 timbl 341: **
1.2 timbl 342: ** This routine is responsible for creating and PRESENTING any
1.1 timbl 343: ** graphic (or other) objects described by the file.
1.2 timbl 344: **
345: ** The file number given is assumed to be a TELNET stream ie containing
346: ** CRLF at the end of lines which need to be stripped to LF for unix
347: ** when the format is textual.
348: **
1.1 timbl 349: */
1.2 timbl 350: PUBLIC void HTCopy ARGS2(
351: int, file_number,
352: HTStream*, sink)
1.1 timbl 353: {
1.2 timbl 354: HTStreamClass targetClass;
355:
1.5 timbl 356: /* Push the data down the stream
1.2 timbl 357: **
358: */
359: targetClass = *(sink->isa); /* Copy pointers to procedures */
360:
361: /* Push binary from socket down sink
1.10 timbl 362: **
363: ** This operation could be put into a main event loop
1.2 timbl 364: */
365: for(;;) {
366: int status = NETREAD(
367: file_number, input_buffer, INPUT_BUFFER_SIZE);
368: if (status <= 0) {
369: if (status == 0) break;
370: if (TRACE) fprintf(stderr,
371: "HTFormat: Read error, read returns %d\n", status);
372: break;
373: }
1.8 timbl 374:
375: #ifdef NOT_ASCII
376: {
377: char * p;
378: for(p = input_buffer; p < input_buffer+status; p++) {
379: *p = FROMASCII(*p);
380: }
381: }
382: #endif
383:
1.4 timbl 384: (*targetClass.put_block)(sink, input_buffer, status);
1.2 timbl 385: } /* next bufferload */
386:
387: }
388:
1.1 timbl 389:
1.7 secret 390:
391: /* Push data from a file pointer down a stream
392: ** -------------------------------------
393: **
394: ** This routine is responsible for creating and PRESENTING any
395: ** graphic (or other) objects described by the file.
396: **
397: **
398: */
399: PUBLIC void HTFileCopy ARGS2(
400: FILE *, fp,
401: HTStream*, sink)
402: {
403: HTStreamClass targetClass;
404:
405: /* Push the data down the stream
406: **
407: */
408: targetClass = *(sink->isa); /* Copy pointers to procedures */
409:
410: /* Push binary from socket down sink
411: */
412: for(;;) {
413: int status = fread(
414: input_buffer, 1, INPUT_BUFFER_SIZE, fp);
415: if (status == 0) { /* EOF or error */
416: if (ferror(fp) == 0) break;
417: if (TRACE) fprintf(stderr,
418: "HTFormat: Read error, read returns %d\n", ferror(fp));
419: break;
420: }
421: (*targetClass.put_block)(sink, input_buffer, status);
422: } /* next bufferload */
423:
424: }
425:
426:
427:
428:
1.2 timbl 429: /* Push data from a socket down a stream STRIPPING CR
430: ** --------------------------------------------------
431: **
432: ** This routine is responsible for creating and PRESENTING any
1.8 timbl 433: ** graphic (or other) objects described by the socket.
1.2 timbl 434: **
435: ** The file number given is assumed to be a TELNET stream ie containing
436: ** CRLF at the end of lines which need to be stripped to LF for unix
437: ** when the format is textual.
438: **
1.1 timbl 439: */
1.2 timbl 440: PUBLIC void HTCopyNoCR ARGS2(
441: int, file_number,
442: HTStream*, sink)
443: {
444: HTStreamClass targetClass;
1.1 timbl 445:
1.2 timbl 446: /* Push the data, ignoring CRLF, down the stream
447: **
448: */
449: targetClass = *(sink->isa); /* Copy pointers to procedures */
450:
451: /* Push text from telnet socket down sink
452: **
453: ** @@@@@ To push strings could be faster? (especially is we
454: ** cheat and don't ignore CR! :-}
455: */
1.1 timbl 456: HTInitInput(file_number);
1.2 timbl 457: for(;;) {
458: char character;
459: character = HTGetChararcter();
460: if (character == (char)EOF) break;
461: (*targetClass.put_character)(sink, character);
462: }
463: }
1.1 timbl 464:
1.2 timbl 465:
1.7 secret 466:
1.2 timbl 467: /* Parse a socket given format and file number
468: **
469: ** This routine is responsible for creating and PRESENTING any
470: ** graphic (or other) objects described by the file.
471: **
472: ** The file number given is assumed to be a TELNET stream ie containing
473: ** CRLF at the end of lines which need to be stripped to LF for unix
474: ** when the format is textual.
475: **
476: */
477: PUBLIC int HTParseSocket ARGS5(
1.10 timbl 478: HTFormat, rep_in,
1.2 timbl 479: HTFormat, format_out,
480: HTParentAnchor *, anchor,
481: int, file_number,
482: HTStream*, sink)
483: {
484: HTStream * stream;
485: HTStreamClass targetClass;
1.1 timbl 486:
1.10 timbl 487: stream = HTStreamStack(rep_in,
1.2 timbl 488: format_out,
489: sink , anchor);
490:
491: if (!stream) {
492: char buffer[1024]; /* @@@@@@@@ */
493: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.10 timbl 494: HTAtom_name(rep_in), HTAtom_name(format_out));
1.3 timbl 495: if (TRACE) fprintf(stderr, "HTFormat: %s\n", buffer);
1.2 timbl 496: return HTLoadError(sink, 501, buffer);
497: }
1.1 timbl 498:
1.3 timbl 499: /* Push the data, ignoring CRLF if necessary, down the stream
500: **
1.2 timbl 501: **
1.3 timbl 502: ** @@ Bug: This decision ought to be made based on "encoding"
1.9 timbl 503: ** rather than on format. @@@ When we handle encoding.
1.3 timbl 504: ** The current method smells anyway.
1.2 timbl 505: */
506: targetClass = *(stream->isa); /* Copy pointers to procedures */
1.10 timbl 507: if (rep_in == WWW_BINARY || HTOutputSource
508: || strstr(HTAtom_name(rep_in), "image/")
509: || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.2 timbl 510: HTCopy(file_number, stream);
511: } else { /* ascii text with CRLFs :-( */
512: HTCopyNoCR(file_number, stream);
513: }
1.7 secret 514: (*targetClass.free)(stream);
515:
516: return HT_LOADED;
517: }
518:
519:
520:
521: /* Parse a file given format and file pointer
522: **
523: ** This routine is responsible for creating and PRESENTING any
524: ** graphic (or other) objects described by the file.
525: **
526: ** The file number given is assumed to be a TELNET stream ie containing
1.10 timbl 527: ** CRLF at the end of lines which need to be stripped to \n for unix
1.7 secret 528: ** when the format is textual.
529: **
530: */
531: PUBLIC int HTParseFile ARGS5(
1.10 timbl 532: HTFormat, rep_in,
1.7 secret 533: HTFormat, format_out,
534: HTParentAnchor *, anchor,
535: FILE *, fp,
536: HTStream*, sink)
537: {
538: HTStream * stream;
539: HTStreamClass targetClass;
540:
1.10 timbl 541: stream = HTStreamStack(rep_in,
1.7 secret 542: format_out,
543: sink , anchor);
544:
545: if (!stream) {
546: char buffer[1024]; /* @@@@@@@@ */
547: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.10 timbl 548: HTAtom_name(rep_in), HTAtom_name(format_out));
1.7 secret 549: if (TRACE) fprintf(stderr, "HTFormat(in HTParseFile): %s\n", buffer);
550: return HTLoadError(sink, 501, buffer);
551: }
552:
1.9 timbl 553: /* Push the data down the stream
1.7 secret 554: **
555: **
556: ** @@ Bug: This decision ought to be made based on "encoding"
1.10 timbl 557: ** rather than on content-type. @@@ When we handle encoding.
1.7 secret 558: ** The current method smells anyway.
559: */
560: targetClass = *(stream->isa); /* Copy pointers to procedures */
561: HTFileCopy(fp, stream);
1.2 timbl 562: (*targetClass.free)(stream);
1.1 timbl 563:
1.2 timbl 564: return HT_LOADED;
1.1 timbl 565: }
1.2 timbl 566:
1.10 timbl 567:
568: /* Converter stream: Network Telnet to internal character text
569: ** -----------------------------------------------------------
570: **
571: ** The input is assumed to be in ASCII, with lines delimited
572: ** by (13,10) pairs, These pairs are converted into (CR,LF)
573: ** pairs in the local representation. The (CR,LF) sequence
574: ** when found is changed to a '\n' character, the internal
575: ** C representation of a new line.
576: */
577:
578:
1.11 ! timbl 579: PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char)
1.10 timbl 580: {
581: char c = FROMASCII(net_char);
582: if (me->had_cr) {
583: if (c==LF) {
584: me->sink->isa->put_character(me->sink, '\n'); /* Newline */
585: me->had_cr = NO;
586: return;
587: } else {
588: me->sink->isa->put_character(me->sink, CR); /* leftover */
589: }
590: }
591: me->had_cr = (c==CR);
592: if (!me->had_cr)
593: me->sink->isa->put_character(me->sink, c); /* normal */
594: }
595:
1.11 ! timbl 596: PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s)
1.10 timbl 597: {
598: CONST char * p;
599: for(p=s; *p; p++) NetToText_put_character(me, *p);
600: }
601:
1.11 ! timbl 602: PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l)
1.10 timbl 603: {
604: CONST char * p;
605: for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
606: }
607:
608: PRIVATE void NetToText_free ARGS1(HTStream *, me)
609: {
610: me->sink->isa->free(me->sink); /* Close rest of pipe */
611: free(me);
612: }
613:
614: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
615: {
616: me->sink->isa->abort(me->sink,e); /* Abort rest of pipe */
617: free(me);
618: }
619:
620: /* The class structure
621: */
622: PRIVATE HTStreamClass NetToTextClass = {
623: "NetToText",
624: NetToText_free,
625: NetToText_abort,
626: NetToText_put_character,
627: NetToText_put_string,
628: NetToText_put_block
629: };
630:
631: /* The creation method
632: */
633: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
634: {
635: HTStream* me = (HTStream*)malloc(sizeof(*me));
636: if (me == NULL) outofmem(__FILE__, "NetToText");
637: me->isa = &NetToTextClass;
638:
639: me->had_cr = NO;
640: me->sink = sink;
641: return me;
642: }
1.2 timbl 643:
644:
Webmaster