Annotation of libwww/Library/src/HTFormat.c, revision 1.10
1.7 secret 1:
1.1 timbl 2: /* Manage different file formats HTFormat.c
3: ** =============================
4: **
5: ** Bugs:
6: ** Not reentrant.
7: **
8: ** Assumes the incoming stream is ASCII, rather than a local file
9: ** format, and so ALWAYS converts from ASCII on non-ASCII machines.
10: ** Therefore, non-ASCII machines can't read local files.
1.2 timbl 11: **
12: */
13:
1.10 ! timbl 14:
1.2 timbl 15: /* Implements:
1.1 timbl 16: */
1.2 timbl 17: #include "HTFormat.h"
18:
19: PUBLIC float HTMaxSecs = 1e10; /* No effective limit */
20: PUBLIC float HTMaxLength = 1e10; /* No effective limit */
21:
22: #ifdef unix
23: #ifdef NeXT
24: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
25: #else
26: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n"
27: /* Full pathname would be better! */
28: #endif
29: #endif
30:
1.1 timbl 31:
32: #include "HTUtils.h"
33: #include "tcp.h"
34:
35: #include "HTML.h"
1.2 timbl 36: #include "HTMLDTD.h"
1.1 timbl 37: #include "HText.h"
1.2 timbl 38: #include "HTAlert.h"
39: #include "HTList.h"
40: #include "HTInit.h"
41: /* Streams and structured streams which we use:
42: */
43: #include "HTFWriter.h"
44: #include "HTPlain.h"
45: #include "SGML.h"
46: #include "HTML.h"
47: #include "HTMLGen.h"
48:
49: PUBLIC BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */
50: extern BOOL interactive;
51:
1.10 ! timbl 52: #ifdef ORIGINAL
1.2 timbl 53: struct _HTStream {
54: CONST HTStreamClass* isa;
55: /* ... */
56: };
1.10 ! timbl 57: #endif
! 58:
! 59: /* this version used by the NetToText stream */
! 60: struct _HTStream {
! 61: CONST HTStreamClass * isa;
! 62: BOOL had_cr;
! 63: HTStream * sink;
! 64: };
1.2 timbl 65:
66:
67: /* Presentation methods
68: ** --------------------
69: */
70:
71: PUBLIC HTList * HTPresentations = 0;
72: PUBLIC HTPresentation* default_presentation = 0;
73:
74:
75: /* Define a presentation system command for a content-type
76: ** -------------------------------------------------------
77: */
78: PUBLIC void HTSetPresentation ARGS5(
79: CONST char *, representation,
80: CONST char *, command,
81: float, quality,
82: float, secs,
83: float, secs_per_byte
84: ){
85:
86: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
87: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
88:
89: pres->rep = HTAtom_for(representation);
90: pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */
91: pres->converter = HTSaveAndExecute; /* Fixed for now ... */
92: pres->quality = quality;
93: pres->secs = secs;
94: pres->secs_per_byte = secs_per_byte;
95: pres->rep = HTAtom_for(representation);
96: pres->command = 0;
97: StrAllocCopy(pres->command, command);
98:
99: if (!HTPresentations) HTPresentations = HTList_new();
100:
101: if (strcmp(representation, "*")==0) {
102: if (default_presentation) free(default_presentation);
103: default_presentation = pres;
104: } else {
105: HTList_addObject(HTPresentations, pres);
106: }
107: }
108:
109:
110: /* Define a built-in function for a content-type
111: ** ---------------------------------------------
112: */
113: PUBLIC void HTSetConversion ARGS6(
114: CONST char *, representation_in,
115: CONST char *, representation_out,
1.6 timbl 116: HTConverter*, converter,
1.2 timbl 117: float, quality,
118: float, secs,
119: float, secs_per_byte
120: ){
1.1 timbl 121:
1.2 timbl 122: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
123: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
124:
125: pres->rep = HTAtom_for(representation_in);
126: pres->rep_out = HTAtom_for(representation_out);
127: pres->converter = converter;
128: pres->command = NULL; /* Fixed */
129: pres->quality = quality;
130: pres->secs = secs;
131: pres->secs_per_byte = secs_per_byte;
132: pres->command = 0;
133:
134: if (!HTPresentations) HTPresentations = HTList_new();
135:
136: if (strcmp(representation_in, "*")==0) {
137: if (default_presentation) free(default_presentation);
138: default_presentation = pres;
139: } else {
140: HTList_addObject(HTPresentations, pres);
141: }
142: }
1.1 timbl 143:
144:
145:
146: /* File buffering
147: ** --------------
148: **
149: ** The input file is read using the macro which can read from
150: ** a socket or a file.
151: ** The input buffer size, if large will give greater efficiency and
152: ** release the server faster, and if small will save space on PCs etc.
153: */
154: #define INPUT_BUFFER_SIZE 4096 /* Tradeoff */
155: PRIVATE char input_buffer[INPUT_BUFFER_SIZE];
156: PRIVATE char * input_pointer;
157: PRIVATE char * input_limit;
158: PRIVATE int input_file_number;
159:
160:
161: /* Set up the buffering
162: **
163: ** These routines are public because they are in fact needed by
164: ** many parsers, and on PCs and Macs we should not duplicate
165: ** the static buffer area.
166: */
167: PUBLIC void HTInitInput ARGS1 (int,file_number)
168: {
169: input_file_number = file_number;
170: input_pointer = input_limit = input_buffer;
171: }
172:
173:
174: PUBLIC char HTGetChararcter NOARGS
175: {
176: char ch;
177: do {
178: if (input_pointer >= input_limit) {
179: int status = NETREAD(
180: input_file_number, input_buffer, INPUT_BUFFER_SIZE);
181: if (status <= 0) {
182: if (status == 0) return (char)EOF;
183: if (TRACE) fprintf(stderr,
184: "HTFormat: File read error %d\n", status);
185: return (char)EOF; /* -1 is returned by UCX at end of HTTP link */
186: }
187: input_pointer = input_buffer;
188: input_limit = input_buffer + status;
189: }
190: ch = *input_pointer++;
191: } while (ch == (char) 13); /* Ignore ASCII carriage return */
192:
193: return FROMASCII(ch);
194: }
195:
196: /* Stream the data to an ouput file as binary
197: */
198: PUBLIC int HTOutputBinary ARGS2( int, input,
199: FILE *, output)
200: {
201: do {
202: int status = NETREAD(
203: input, input_buffer, INPUT_BUFFER_SIZE);
204: if (status <= 0) {
205: if (status == 0) return 0;
206: if (TRACE) fprintf(stderr,
207: "HTFormat: File read error %d\n", status);
208: return 2; /* Error */
209: }
210: fwrite(input_buffer, sizeof(char), status, output);
211: } while (YES);
212: }
213:
214:
1.2 timbl 215: /* Create a filter stack
216: ** ---------------------
217: **
1.7 secret 218: ** If a wildcard match is made, a temporary HTPresentation
1.2 timbl 219: ** structure is made to hold the destination format while the
220: ** new stack is generated. This is just to pass the out format to
221: ** MIME so far. Storing the format of a stream in the stream might
222: ** be a lot neater.
1.10 ! timbl 223: **
! 224: ** The www/source format is special, in that if you can take
! 225: ** that you can take anything. However, we
1.2 timbl 226: */
227: PUBLIC HTStream * HTStreamStack ARGS4(
1.10 ! timbl 228: HTFormat, rep_in,
1.2 timbl 229: HTFormat, rep_out,
230: HTStream*, sink,
231: HTParentAnchor*, anchor)
232: {
233: HTAtom * wildcard = HTAtom_for("*");
1.10 ! timbl 234: HTFormat source = WWW_SOURCE;
1.2 timbl 235: if (TRACE) fprintf(stderr,
236: "HTFormat: Constructing stream stack for %s to %s\n",
1.10 ! timbl 237: HTAtom_name(rep_in),
1.2 timbl 238: HTAtom_name(rep_out));
239:
240: if (rep_out == WWW_SOURCE ||
1.10 ! timbl 241: rep_out == rep_in) return sink;
1.2 timbl 242:
243: if (!HTPresentations) HTFormatInit(); /* set up the list */
244:
245: {
246: int n = HTList_count(HTPresentations);
247: int i;
1.10 ! timbl 248: HTPresentation * pres, *match, *wildcard_match=0,
! 249: *source_match=0, *source_wildcard_match=0;
1.2 timbl 250: for(i=0; i<n; i++) {
251: pres = HTList_objectAt(HTPresentations, i);
1.10 ! timbl 252: if (pres->rep == rep_in) {
1.2 timbl 253: if (pres->rep_out == rep_out)
254: return (*pres->converter)(pres, anchor, sink);
255: if (pres->rep_out == wildcard) {
1.10 ! timbl 256: wildcard_match = pres;
! 257: }
! 258: }
! 259: if (pres->rep == source) {
! 260: if (pres->rep_out == rep_out)
! 261: source_match = pres;
! 262: if (pres->rep_out == wildcard) {
! 263: source_wildcard_match = pres;
1.2 timbl 264: }
265: }
266: }
1.10 ! timbl 267:
! 268: match = wildcard_match ? wildcard_match :
! 269: source_match ? source_match :
! 270: source_wildcard_match;
! 271:
! 272: if (match) {
! 273: HTPresentation temp = *match; /* Specific instance */
! 274: temp.rep = rep_in; /* yuk */
! 275: temp.rep_out = rep_out; /* yuk */
! 276: return (*match->converter)(&temp, anchor, sink);
! 277: }
1.2 timbl 278: }
1.10 ! timbl 279:
1.2 timbl 280:
1.10 ! timbl 281: #ifdef XMOSAIC_HACK_REMOVED_NOW /* Use above source method instead */
1.3 timbl 282: return sink;
283: #else
1.2 timbl 284: return NULL;
1.3 timbl 285: #endif
1.2 timbl 286: }
287:
288:
289: /* Find the cost of a filter stack
290: ** -------------------------------
291: **
292: ** Must return the cost of the same stack which StreamStack would set up.
293: **
294: ** On entry,
295: ** length The size of the data to be converted
296: */
297: PUBLIC float HTStackValue ARGS4(
1.10 ! timbl 298: HTFormat, rep_in,
1.2 timbl 299: HTFormat, rep_out,
300: float, initial_value,
301: long int, length)
302: {
303: HTAtom * wildcard = HTAtom_for("*");
304:
305: if (TRACE) fprintf(stderr,
306: "HTFormat: Evaluating stream stack for %s worth %.3f to %s\n",
1.10 ! timbl 307: HTAtom_name(rep_in), initial_value,
1.2 timbl 308: HTAtom_name(rep_out));
309:
310: if (rep_out == WWW_SOURCE ||
1.10 ! timbl 311: rep_out == rep_in) return 0.0;
1.2 timbl 312:
313: if (!HTPresentations) HTFormatInit(); /* set up the list */
314:
315: {
316: int n = HTList_count(HTPresentations);
317: int i;
318: HTPresentation * pres;
319: for(i=0; i<n; i++) {
320: pres = HTList_objectAt(HTPresentations, i);
1.10 ! timbl 321: if (pres->rep == rep_in && (
1.2 timbl 322: pres->rep_out == rep_out ||
323: pres->rep_out == wildcard)) {
324: float value = initial_value * pres->quality;
325: if (HTMaxSecs != 0.0)
326: value = value - (length*pres->secs_per_byte + pres->secs)
327: /HTMaxSecs;
328: return value;
329: }
330: }
331: }
332:
333: return -1e30; /* Really bad */
334:
335: }
336:
1.1 timbl 337:
1.2 timbl 338: /* Push data from a socket down a stream
339: ** -------------------------------------
1.1 timbl 340: **
1.2 timbl 341: ** This routine is responsible for creating and PRESENTING any
1.1 timbl 342: ** graphic (or other) objects described by the file.
1.2 timbl 343: **
344: ** The file number given is assumed to be a TELNET stream ie containing
345: ** CRLF at the end of lines which need to be stripped to LF for unix
346: ** when the format is textual.
347: **
1.1 timbl 348: */
1.2 timbl 349: PUBLIC void HTCopy ARGS2(
350: int, file_number,
351: HTStream*, sink)
1.1 timbl 352: {
1.2 timbl 353: HTStreamClass targetClass;
354:
1.5 timbl 355: /* Push the data down the stream
1.2 timbl 356: **
357: */
358: targetClass = *(sink->isa); /* Copy pointers to procedures */
359:
360: /* Push binary from socket down sink
1.10 ! timbl 361: **
! 362: ** This operation could be put into a main event loop
1.2 timbl 363: */
364: for(;;) {
365: int status = NETREAD(
366: file_number, input_buffer, INPUT_BUFFER_SIZE);
367: if (status <= 0) {
368: if (status == 0) break;
369: if (TRACE) fprintf(stderr,
370: "HTFormat: Read error, read returns %d\n", status);
371: break;
372: }
1.8 timbl 373:
374: #ifdef NOT_ASCII
375: {
376: char * p;
377: for(p = input_buffer; p < input_buffer+status; p++) {
378: *p = FROMASCII(*p);
379: }
380: }
381: #endif
382:
1.4 timbl 383: (*targetClass.put_block)(sink, input_buffer, status);
1.2 timbl 384: } /* next bufferload */
385:
386: }
387:
1.1 timbl 388:
1.7 secret 389:
390: /* Push data from a file pointer down a stream
391: ** -------------------------------------
392: **
393: ** This routine is responsible for creating and PRESENTING any
394: ** graphic (or other) objects described by the file.
395: **
396: **
397: */
398: PUBLIC void HTFileCopy ARGS2(
399: FILE *, fp,
400: HTStream*, sink)
401: {
402: HTStreamClass targetClass;
403:
404: /* Push the data down the stream
405: **
406: */
407: targetClass = *(sink->isa); /* Copy pointers to procedures */
408:
409: /* Push binary from socket down sink
410: */
411: for(;;) {
412: int status = fread(
413: input_buffer, 1, INPUT_BUFFER_SIZE, fp);
414: if (status == 0) { /* EOF or error */
415: if (ferror(fp) == 0) break;
416: if (TRACE) fprintf(stderr,
417: "HTFormat: Read error, read returns %d\n", ferror(fp));
418: break;
419: }
420: (*targetClass.put_block)(sink, input_buffer, status);
421: } /* next bufferload */
422:
423: }
424:
425:
426:
427:
1.2 timbl 428: /* Push data from a socket down a stream STRIPPING CR
429: ** --------------------------------------------------
430: **
431: ** This routine is responsible for creating and PRESENTING any
1.8 timbl 432: ** graphic (or other) objects described by the socket.
1.2 timbl 433: **
434: ** The file number given is assumed to be a TELNET stream ie containing
435: ** CRLF at the end of lines which need to be stripped to LF for unix
436: ** when the format is textual.
437: **
1.1 timbl 438: */
1.2 timbl 439: PUBLIC void HTCopyNoCR ARGS2(
440: int, file_number,
441: HTStream*, sink)
442: {
443: HTStreamClass targetClass;
1.1 timbl 444:
1.2 timbl 445: /* Push the data, ignoring CRLF, down the stream
446: **
447: */
448: targetClass = *(sink->isa); /* Copy pointers to procedures */
449:
450: /* Push text from telnet socket down sink
451: **
452: ** @@@@@ To push strings could be faster? (especially is we
453: ** cheat and don't ignore CR! :-}
454: */
1.1 timbl 455: HTInitInput(file_number);
1.2 timbl 456: for(;;) {
457: char character;
458: character = HTGetChararcter();
459: if (character == (char)EOF) break;
460: (*targetClass.put_character)(sink, character);
461: }
462: }
1.1 timbl 463:
1.2 timbl 464:
1.7 secret 465:
1.2 timbl 466: /* Parse a socket given format and file number
467: **
468: ** This routine is responsible for creating and PRESENTING any
469: ** graphic (or other) objects described by the file.
470: **
471: ** The file number given is assumed to be a TELNET stream ie containing
472: ** CRLF at the end of lines which need to be stripped to LF for unix
473: ** when the format is textual.
474: **
475: */
476: PUBLIC int HTParseSocket ARGS5(
1.10 ! timbl 477: HTFormat, rep_in,
1.2 timbl 478: HTFormat, format_out,
479: HTParentAnchor *, anchor,
480: int, file_number,
481: HTStream*, sink)
482: {
483: HTStream * stream;
484: HTStreamClass targetClass;
1.1 timbl 485:
1.10 ! timbl 486: stream = HTStreamStack(rep_in,
1.2 timbl 487: format_out,
488: sink , anchor);
489:
490: if (!stream) {
491: char buffer[1024]; /* @@@@@@@@ */
492: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.10 ! timbl 493: HTAtom_name(rep_in), HTAtom_name(format_out));
1.3 timbl 494: if (TRACE) fprintf(stderr, "HTFormat: %s\n", buffer);
1.2 timbl 495: return HTLoadError(sink, 501, buffer);
496: }
1.1 timbl 497:
1.3 timbl 498: /* Push the data, ignoring CRLF if necessary, down the stream
499: **
1.2 timbl 500: **
1.3 timbl 501: ** @@ Bug: This decision ought to be made based on "encoding"
1.9 timbl 502: ** rather than on format. @@@ When we handle encoding.
1.3 timbl 503: ** The current method smells anyway.
1.2 timbl 504: */
505: targetClass = *(stream->isa); /* Copy pointers to procedures */
1.10 ! timbl 506: if (rep_in == WWW_BINARY || HTOutputSource
! 507: || strstr(HTAtom_name(rep_in), "image/")
! 508: || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.2 timbl 509: HTCopy(file_number, stream);
510: } else { /* ascii text with CRLFs :-( */
511: HTCopyNoCR(file_number, stream);
512: }
1.7 secret 513: (*targetClass.free)(stream);
514:
515: return HT_LOADED;
516: }
517:
518:
519:
520: /* Parse a file given format and file pointer
521: **
522: ** This routine is responsible for creating and PRESENTING any
523: ** graphic (or other) objects described by the file.
524: **
525: ** The file number given is assumed to be a TELNET stream ie containing
1.10 ! timbl 526: ** CRLF at the end of lines which need to be stripped to \n for unix
1.7 secret 527: ** when the format is textual.
528: **
529: */
530: PUBLIC int HTParseFile ARGS5(
1.10 ! timbl 531: HTFormat, rep_in,
1.7 secret 532: HTFormat, format_out,
533: HTParentAnchor *, anchor,
534: FILE *, fp,
535: HTStream*, sink)
536: {
537: HTStream * stream;
538: HTStreamClass targetClass;
539:
1.10 ! timbl 540: stream = HTStreamStack(rep_in,
1.7 secret 541: format_out,
542: sink , anchor);
543:
544: if (!stream) {
545: char buffer[1024]; /* @@@@@@@@ */
546: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.10 ! timbl 547: HTAtom_name(rep_in), HTAtom_name(format_out));
1.7 secret 548: if (TRACE) fprintf(stderr, "HTFormat(in HTParseFile): %s\n", buffer);
549: return HTLoadError(sink, 501, buffer);
550: }
551:
1.9 timbl 552: /* Push the data down the stream
1.7 secret 553: **
554: **
555: ** @@ Bug: This decision ought to be made based on "encoding"
1.10 ! timbl 556: ** rather than on content-type. @@@ When we handle encoding.
1.7 secret 557: ** The current method smells anyway.
558: */
559: targetClass = *(stream->isa); /* Copy pointers to procedures */
560: HTFileCopy(fp, stream);
1.2 timbl 561: (*targetClass.free)(stream);
1.1 timbl 562:
1.2 timbl 563: return HT_LOADED;
1.1 timbl 564: }
1.2 timbl 565:
1.10 ! timbl 566:
! 567: /* Converter stream: Network Telnet to internal character text
! 568: ** -----------------------------------------------------------
! 569: **
! 570: ** The input is assumed to be in ASCII, with lines delimited
! 571: ** by (13,10) pairs, These pairs are converted into (CR,LF)
! 572: ** pairs in the local representation. The (CR,LF) sequence
! 573: ** when found is changed to a '\n' character, the internal
! 574: ** C representation of a new line.
! 575: */
! 576:
! 577:
! 578: PRIVATE void NetToText_put_character ARGS2(HTStream, * me, char, net_char)
! 579: {
! 580: char c = FROMASCII(net_char);
! 581: if (me->had_cr) {
! 582: if (c==LF) {
! 583: me->sink->isa->put_character(me->sink, '\n'); /* Newline */
! 584: me->had_cr = NO;
! 585: return;
! 586: } else {
! 587: me->sink->isa->put_character(me->sink, CR); /* leftover */
! 588: }
! 589: }
! 590: me->had_cr = (c==CR);
! 591: if (!me->had_cr)
! 592: me->sink->isa->put_character(me->sink, c); /* normal */
! 593: }
! 594:
! 595: PRIVATE void NetToText_put_string ARGS2(HTStream, * me, CONST char *, s)
! 596: {
! 597: CONST char * p;
! 598: for(p=s; *p; p++) NetToText_put_character(me, *p);
! 599: }
! 600:
! 601: PRIVATE void NetToText_put_block ARGS3(HTStream, * me, CONST char*, s, int, l)
! 602: {
! 603: CONST char * p;
! 604: for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
! 605: }
! 606:
! 607: PRIVATE void NetToText_free ARGS1(HTStream *, me)
! 608: {
! 609: me->sink->isa->free(me->sink); /* Close rest of pipe */
! 610: free(me);
! 611: }
! 612:
! 613: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
! 614: {
! 615: me->sink->isa->abort(me->sink,e); /* Abort rest of pipe */
! 616: free(me);
! 617: }
! 618:
! 619: /* The class structure
! 620: */
! 621: PRIVATE HTStreamClass NetToTextClass = {
! 622: "NetToText",
! 623: NetToText_free,
! 624: NetToText_abort,
! 625: NetToText_put_character,
! 626: NetToText_put_string,
! 627: NetToText_put_block
! 628: };
! 629:
! 630: /* The creation method
! 631: */
! 632: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
! 633: {
! 634: HTStream* me = (HTStream*)malloc(sizeof(*me));
! 635: if (me == NULL) outofmem(__FILE__, "NetToText");
! 636: me->isa = &NetToTextClass;
! 637:
! 638: me->had_cr = NO;
! 639: me->sink = sink;
! 640: return me;
! 641: }
1.2 timbl 642:
643:
Webmaster