Annotation of libwww/Library/src/HTFormat.c, revision 1.12
1.7 secret 1:
1.1 timbl 2: /* Manage different file formats HTFormat.c
3: ** =============================
4: **
5: ** Bugs:
6: ** Not reentrant.
7: **
8: ** Assumes the incoming stream is ASCII, rather than a local file
9: ** format, and so ALWAYS converts from ASCII on non-ASCII machines.
10: ** Therefore, non-ASCII machines can't read local files.
1.2 timbl 11: **
12: */
13:
1.10 timbl 14:
1.2 timbl 15: /* Implements:
1.1 timbl 16: */
1.2 timbl 17: #include "HTFormat.h"
18:
19: PUBLIC float HTMaxSecs = 1e10; /* No effective limit */
20: PUBLIC float HTMaxLength = 1e10; /* No effective limit */
21:
22: #ifdef unix
23: #ifdef NeXT
24: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
25: #else
26: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n"
27: /* Full pathname would be better! */
28: #endif
29: #endif
30:
1.1 timbl 31:
32: #include "HTUtils.h"
33: #include "tcp.h"
34:
35: #include "HTML.h"
1.12 ! timbl 36: #include "HTMLPDTD.h"
1.1 timbl 37: #include "HText.h"
1.2 timbl 38: #include "HTAlert.h"
39: #include "HTList.h"
40: #include "HTInit.h"
41: /* Streams and structured streams which we use:
42: */
43: #include "HTFWriter.h"
44: #include "HTPlain.h"
45: #include "SGML.h"
46: #include "HTML.h"
47: #include "HTMLGen.h"
48:
49: PUBLIC BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */
50: extern BOOL interactive;
51:
1.10 timbl 52: #ifdef ORIGINAL
1.2 timbl 53: struct _HTStream {
54: CONST HTStreamClass* isa;
55: /* ... */
56: };
1.10 timbl 57: #endif
58:
59: /* this version used by the NetToText stream */
60: struct _HTStream {
61: CONST HTStreamClass * isa;
62: BOOL had_cr;
63: HTStream * sink;
64: };
1.2 timbl 65:
66:
67: /* Presentation methods
68: ** --------------------
69: */
70:
1.12 ! timbl 71: /* PUBLIC HTList * HTPresentations = 0; */
! 72: /* PUBLIC HTPresentation* default_presentation = 0; */
1.2 timbl 73:
74:
75: /* Define a presentation system command for a content-type
76: ** -------------------------------------------------------
77: */
1.12 ! timbl 78: PUBLIC void HTSetPresentation ARGS6(
! 79: HTList *, conversions,
! 80: CONST char *, representation,
! 81: CONST char *, command,
! 82: float, quality,
! 83: float, secs,
! 84: float, secs_per_byte
1.2 timbl 85: ){
86:
87: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
88: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
89:
90: pres->rep = HTAtom_for(representation);
91: pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */
92: pres->converter = HTSaveAndExecute; /* Fixed for now ... */
93: pres->quality = quality;
94: pres->secs = secs;
95: pres->secs_per_byte = secs_per_byte;
96: pres->rep = HTAtom_for(representation);
97: pres->command = 0;
98: StrAllocCopy(pres->command, command);
99:
1.12 ! timbl 100: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 101:
1.12 ! timbl 102: #ifdef OLD_CODE if (strcmp(representation, "*")==0) {
1.2 timbl 103: if (default_presentation) free(default_presentation);
104: default_presentation = pres;
1.12 ! timbl 105: } else
! 106: #endif
! 107: HTList_addObject(conversions, pres);
1.2 timbl 108: }
109:
110:
111: /* Define a built-in function for a content-type
112: ** ---------------------------------------------
113: */
1.12 ! timbl 114: PUBLIC void HTSetConversion ARGS7(
! 115: HTList *, conversions,
! 116: CONST char *, representation_in,
! 117: CONST char *, representation_out,
1.6 timbl 118: HTConverter*, converter,
1.12 ! timbl 119: float, quality,
! 120: float, secs,
! 121: float, secs_per_byte
1.2 timbl 122: ){
1.1 timbl 123:
1.2 timbl 124: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
125: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
126:
127: pres->rep = HTAtom_for(representation_in);
128: pres->rep_out = HTAtom_for(representation_out);
129: pres->converter = converter;
130: pres->command = NULL; /* Fixed */
131: pres->quality = quality;
132: pres->secs = secs;
133: pres->secs_per_byte = secs_per_byte;
134: pres->command = 0;
135:
1.12 ! timbl 136: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 137:
1.12 ! timbl 138: #ifdef OLD_CODE
1.2 timbl 139: if (strcmp(representation_in, "*")==0) {
140: if (default_presentation) free(default_presentation);
141: default_presentation = pres;
1.12 ! timbl 142: } else
! 143: #endif
! 144: HTList_addObject(conversions, pres);
1.2 timbl 145: }
1.1 timbl 146:
147:
148:
149: /* File buffering
150: ** --------------
151: **
152: ** The input file is read using the macro which can read from
153: ** a socket or a file.
154: ** The input buffer size, if large will give greater efficiency and
155: ** release the server faster, and if small will save space on PCs etc.
156: */
157: #define INPUT_BUFFER_SIZE 4096 /* Tradeoff */
158: PRIVATE char input_buffer[INPUT_BUFFER_SIZE];
159: PRIVATE char * input_pointer;
160: PRIVATE char * input_limit;
161: PRIVATE int input_file_number;
162:
163:
164: /* Set up the buffering
165: **
166: ** These routines are public because they are in fact needed by
167: ** many parsers, and on PCs and Macs we should not duplicate
168: ** the static buffer area.
169: */
170: PUBLIC void HTInitInput ARGS1 (int,file_number)
171: {
172: input_file_number = file_number;
173: input_pointer = input_limit = input_buffer;
174: }
175:
176:
177: PUBLIC char HTGetChararcter NOARGS
178: {
179: char ch;
180: do {
181: if (input_pointer >= input_limit) {
182: int status = NETREAD(
183: input_file_number, input_buffer, INPUT_BUFFER_SIZE);
184: if (status <= 0) {
185: if (status == 0) return (char)EOF;
186: if (TRACE) fprintf(stderr,
187: "HTFormat: File read error %d\n", status);
188: return (char)EOF; /* -1 is returned by UCX at end of HTTP link */
189: }
190: input_pointer = input_buffer;
191: input_limit = input_buffer + status;
192: }
193: ch = *input_pointer++;
194: } while (ch == (char) 13); /* Ignore ASCII carriage return */
195:
196: return FROMASCII(ch);
197: }
198:
199: /* Stream the data to an ouput file as binary
200: */
201: PUBLIC int HTOutputBinary ARGS2( int, input,
202: FILE *, output)
203: {
204: do {
205: int status = NETREAD(
206: input, input_buffer, INPUT_BUFFER_SIZE);
207: if (status <= 0) {
208: if (status == 0) return 0;
209: if (TRACE) fprintf(stderr,
210: "HTFormat: File read error %d\n", status);
211: return 2; /* Error */
212: }
213: fwrite(input_buffer, sizeof(char), status, output);
214: } while (YES);
215: }
216:
217:
1.2 timbl 218: /* Create a filter stack
219: ** ---------------------
220: **
1.7 secret 221: ** If a wildcard match is made, a temporary HTPresentation
1.2 timbl 222: ** structure is made to hold the destination format while the
223: ** new stack is generated. This is just to pass the out format to
224: ** MIME so far. Storing the format of a stream in the stream might
225: ** be a lot neater.
1.10 timbl 226: **
227: ** The www/source format is special, in that if you can take
228: ** that you can take anything. However, we
1.2 timbl 229: */
1.12 ! timbl 230: PUBLIC HTStream * HTStreamStack ARGS2(
1.10 timbl 231: HTFormat, rep_in,
1.12 ! timbl 232: HTRequest *, request)
1.2 timbl 233: {
1.12 ! timbl 234: HTFormat rep_out = request->output_format; /* Could be a param */
! 235: HTList * conversions = request->conversions; /* Could be a param */
1.2 timbl 236: HTAtom * wildcard = HTAtom_for("*");
1.10 timbl 237: HTFormat source = WWW_SOURCE;
1.2 timbl 238: if (TRACE) fprintf(stderr,
239: "HTFormat: Constructing stream stack for %s to %s\n",
1.10 timbl 240: HTAtom_name(rep_in),
1.2 timbl 241: HTAtom_name(rep_out));
242:
243: if (rep_out == WWW_SOURCE ||
1.12 ! timbl 244: rep_out == rep_in) return request->output_stream;
1.2 timbl 245:
1.12 ! timbl 246: /* if (!HTPresentations) HTFormatInit(); */ /* set up the list */
1.2 timbl 247:
248: {
1.12 ! timbl 249: int n = HTList_count(conversions);
1.2 timbl 250: int i;
1.10 timbl 251: HTPresentation * pres, *match, *wildcard_match=0,
252: *source_match=0, *source_wildcard_match=0;
1.2 timbl 253: for(i=0; i<n; i++) {
1.12 ! timbl 254: pres = HTList_objectAt(conversions, i);
1.10 timbl 255: if (pres->rep == rep_in) {
1.2 timbl 256: if (pres->rep_out == rep_out)
1.12 ! timbl 257: return (*pres->converter)(request, pres->command,
! 258: rep_in, pres->rep_out, request->output_stream);
1.2 timbl 259: if (pres->rep_out == wildcard) {
1.10 timbl 260: wildcard_match = pres;
261: }
262: }
263: if (pres->rep == source) {
264: if (pres->rep_out == rep_out)
265: source_match = pres;
266: if (pres->rep_out == wildcard) {
267: source_wildcard_match = pres;
1.2 timbl 268: }
269: }
270: }
1.10 timbl 271:
272: match = wildcard_match ? wildcard_match :
273: source_match ? source_match :
274: source_wildcard_match;
275:
1.12 ! timbl 276: if (match) return (*match->converter)(
! 277: request, match->command, rep_in, rep_out,
! 278: request->output_stream);
1.2 timbl 279: }
1.10 timbl 280:
1.2 timbl 281:
1.10 timbl 282: #ifdef XMOSAIC_HACK_REMOVED_NOW /* Use above source method instead */
1.12 ! timbl 283: return request->output_stream;
1.3 timbl 284: #else
1.2 timbl 285: return NULL;
1.3 timbl 286: #endif
1.2 timbl 287: }
288:
289:
290: /* Find the cost of a filter stack
291: ** -------------------------------
292: **
293: ** Must return the cost of the same stack which StreamStack would set up.
294: **
295: ** On entry,
296: ** length The size of the data to be converted
297: */
1.12 ! timbl 298: PUBLIC float HTStackValue ARGS5(
! 299: HTList *, conversions,
1.10 timbl 300: HTFormat, rep_in,
1.2 timbl 301: HTFormat, rep_out,
302: float, initial_value,
303: long int, length)
304: {
305: HTAtom * wildcard = HTAtom_for("*");
306:
307: if (TRACE) fprintf(stderr,
308: "HTFormat: Evaluating stream stack for %s worth %.3f to %s\n",
1.10 timbl 309: HTAtom_name(rep_in), initial_value,
1.2 timbl 310: HTAtom_name(rep_out));
311:
312: if (rep_out == WWW_SOURCE ||
1.10 timbl 313: rep_out == rep_in) return 0.0;
1.2 timbl 314:
1.12 ! timbl 315: /* if (!HTPresentations) HTFormatInit(); set up the list */
1.2 timbl 316:
317: {
1.12 ! timbl 318: int n = HTList_count(conversions);
1.2 timbl 319: int i;
320: HTPresentation * pres;
321: for(i=0; i<n; i++) {
1.12 ! timbl 322: pres = HTList_objectAt(conversions, i);
1.10 timbl 323: if (pres->rep == rep_in && (
1.2 timbl 324: pres->rep_out == rep_out ||
325: pres->rep_out == wildcard)) {
326: float value = initial_value * pres->quality;
327: if (HTMaxSecs != 0.0)
328: value = value - (length*pres->secs_per_byte + pres->secs)
329: /HTMaxSecs;
330: return value;
331: }
332: }
333: }
334:
335: return -1e30; /* Really bad */
336:
337: }
338:
1.1 timbl 339:
1.2 timbl 340: /* Push data from a socket down a stream
341: ** -------------------------------------
1.1 timbl 342: **
1.2 timbl 343: ** This routine is responsible for creating and PRESENTING any
1.1 timbl 344: ** graphic (or other) objects described by the file.
1.2 timbl 345: **
346: ** The file number given is assumed to be a TELNET stream ie containing
347: ** CRLF at the end of lines which need to be stripped to LF for unix
348: ** when the format is textual.
349: **
1.1 timbl 350: */
1.2 timbl 351: PUBLIC void HTCopy ARGS2(
352: int, file_number,
353: HTStream*, sink)
1.1 timbl 354: {
1.2 timbl 355: HTStreamClass targetClass;
356:
1.5 timbl 357: /* Push the data down the stream
1.2 timbl 358: **
359: */
360: targetClass = *(sink->isa); /* Copy pointers to procedures */
361:
362: /* Push binary from socket down sink
1.10 timbl 363: **
364: ** This operation could be put into a main event loop
1.2 timbl 365: */
366: for(;;) {
367: int status = NETREAD(
368: file_number, input_buffer, INPUT_BUFFER_SIZE);
369: if (status <= 0) {
370: if (status == 0) break;
371: if (TRACE) fprintf(stderr,
372: "HTFormat: Read error, read returns %d\n", status);
373: break;
374: }
1.8 timbl 375:
376: #ifdef NOT_ASCII
377: {
378: char * p;
379: for(p = input_buffer; p < input_buffer+status; p++) {
380: *p = FROMASCII(*p);
381: }
382: }
383: #endif
384:
1.4 timbl 385: (*targetClass.put_block)(sink, input_buffer, status);
1.2 timbl 386: } /* next bufferload */
387:
388: }
389:
1.1 timbl 390:
1.7 secret 391:
392: /* Push data from a file pointer down a stream
393: ** -------------------------------------
394: **
395: ** This routine is responsible for creating and PRESENTING any
396: ** graphic (or other) objects described by the file.
397: **
398: **
399: */
400: PUBLIC void HTFileCopy ARGS2(
401: FILE *, fp,
402: HTStream*, sink)
403: {
404: HTStreamClass targetClass;
405:
406: /* Push the data down the stream
407: **
408: */
409: targetClass = *(sink->isa); /* Copy pointers to procedures */
410:
411: /* Push binary from socket down sink
412: */
413: for(;;) {
414: int status = fread(
415: input_buffer, 1, INPUT_BUFFER_SIZE, fp);
416: if (status == 0) { /* EOF or error */
417: if (ferror(fp) == 0) break;
418: if (TRACE) fprintf(stderr,
419: "HTFormat: Read error, read returns %d\n", ferror(fp));
420: break;
421: }
422: (*targetClass.put_block)(sink, input_buffer, status);
423: } /* next bufferload */
424:
425: }
426:
427:
428:
429:
1.2 timbl 430: /* Push data from a socket down a stream STRIPPING CR
431: ** --------------------------------------------------
432: **
433: ** This routine is responsible for creating and PRESENTING any
1.8 timbl 434: ** graphic (or other) objects described by the socket.
1.2 timbl 435: **
436: ** The file number given is assumed to be a TELNET stream ie containing
437: ** CRLF at the end of lines which need to be stripped to LF for unix
438: ** when the format is textual.
439: **
1.1 timbl 440: */
1.2 timbl 441: PUBLIC void HTCopyNoCR ARGS2(
442: int, file_number,
443: HTStream*, sink)
444: {
445: HTStreamClass targetClass;
1.1 timbl 446:
1.2 timbl 447: /* Push the data, ignoring CRLF, down the stream
448: **
449: */
450: targetClass = *(sink->isa); /* Copy pointers to procedures */
451:
452: /* Push text from telnet socket down sink
453: **
454: ** @@@@@ To push strings could be faster? (especially is we
455: ** cheat and don't ignore CR! :-}
456: */
1.1 timbl 457: HTInitInput(file_number);
1.2 timbl 458: for(;;) {
459: char character;
460: character = HTGetChararcter();
461: if (character == (char)EOF) break;
462: (*targetClass.put_character)(sink, character);
463: }
464: }
1.1 timbl 465:
1.2 timbl 466:
1.7 secret 467:
1.2 timbl 468: /* Parse a socket given format and file number
469: **
470: ** This routine is responsible for creating and PRESENTING any
471: ** graphic (or other) objects described by the file.
472: **
473: ** The file number given is assumed to be a TELNET stream ie containing
474: ** CRLF at the end of lines which need to be stripped to LF for unix
475: ** when the format is textual.
476: **
477: */
1.12 ! timbl 478: PUBLIC int HTParseSocket ARGS3(
1.10 timbl 479: HTFormat, rep_in,
1.2 timbl 480: int, file_number,
1.12 ! timbl 481: HTRequest *, request)
1.2 timbl 482: {
483: HTStream * stream;
484: HTStreamClass targetClass;
1.1 timbl 485:
1.12 ! timbl 486: stream = HTStreamStack(rep_in, request);
1.2 timbl 487:
488: if (!stream) {
489: char buffer[1024]; /* @@@@@@@@ */
490: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12 ! timbl 491: HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.3 timbl 492: if (TRACE) fprintf(stderr, "HTFormat: %s\n", buffer);
1.12 ! timbl 493: return HTLoadError(request->output_stream, 501, buffer);
1.2 timbl 494: }
1.1 timbl 495:
1.3 timbl 496: /* Push the data, ignoring CRLF if necessary, down the stream
497: **
1.2 timbl 498: **
1.3 timbl 499: ** @@ Bug: This decision ought to be made based on "encoding"
1.9 timbl 500: ** rather than on format. @@@ When we handle encoding.
1.3 timbl 501: ** The current method smells anyway.
1.2 timbl 502: */
503: targetClass = *(stream->isa); /* Copy pointers to procedures */
1.10 timbl 504: if (rep_in == WWW_BINARY || HTOutputSource
505: || strstr(HTAtom_name(rep_in), "image/")
506: || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.2 timbl 507: HTCopy(file_number, stream);
508: } else { /* ascii text with CRLFs :-( */
509: HTCopyNoCR(file_number, stream);
510: }
1.7 secret 511: (*targetClass.free)(stream);
512:
513: return HT_LOADED;
514: }
515:
516:
517:
518: /* Parse a file given format and file pointer
519: **
520: ** This routine is responsible for creating and PRESENTING any
521: ** graphic (or other) objects described by the file.
522: **
523: ** The file number given is assumed to be a TELNET stream ie containing
1.10 timbl 524: ** CRLF at the end of lines which need to be stripped to \n for unix
1.7 secret 525: ** when the format is textual.
526: **
527: */
1.12 ! timbl 528: PUBLIC int HTParseFile ARGS3(
1.10 timbl 529: HTFormat, rep_in,
1.7 secret 530: FILE *, fp,
1.12 ! timbl 531: HTRequest *, request)
1.7 secret 532: {
533: HTStream * stream;
534: HTStreamClass targetClass;
535:
1.12 ! timbl 536: stream = HTStreamStack(rep_in, request);
1.7 secret 537:
538: if (!stream) {
539: char buffer[1024]; /* @@@@@@@@ */
540: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12 ! timbl 541: HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.7 secret 542: if (TRACE) fprintf(stderr, "HTFormat(in HTParseFile): %s\n", buffer);
1.12 ! timbl 543: return HTLoadError(request->output_stream, 501, buffer);
1.7 secret 544: }
545:
1.9 timbl 546: /* Push the data down the stream
1.7 secret 547: **
548: **
549: ** @@ Bug: This decision ought to be made based on "encoding"
1.10 timbl 550: ** rather than on content-type. @@@ When we handle encoding.
1.7 secret 551: ** The current method smells anyway.
552: */
553: targetClass = *(stream->isa); /* Copy pointers to procedures */
554: HTFileCopy(fp, stream);
1.2 timbl 555: (*targetClass.free)(stream);
1.1 timbl 556:
1.2 timbl 557: return HT_LOADED;
1.1 timbl 558: }
1.2 timbl 559:
1.10 timbl 560:
561: /* Converter stream: Network Telnet to internal character text
562: ** -----------------------------------------------------------
563: **
564: ** The input is assumed to be in ASCII, with lines delimited
565: ** by (13,10) pairs, These pairs are converted into (CR,LF)
566: ** pairs in the local representation. The (CR,LF) sequence
567: ** when found is changed to a '\n' character, the internal
568: ** C representation of a new line.
569: */
570:
571:
1.11 timbl 572: PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char)
1.10 timbl 573: {
574: char c = FROMASCII(net_char);
575: if (me->had_cr) {
576: if (c==LF) {
577: me->sink->isa->put_character(me->sink, '\n'); /* Newline */
578: me->had_cr = NO;
579: return;
580: } else {
581: me->sink->isa->put_character(me->sink, CR); /* leftover */
582: }
583: }
584: me->had_cr = (c==CR);
585: if (!me->had_cr)
586: me->sink->isa->put_character(me->sink, c); /* normal */
587: }
588:
1.11 timbl 589: PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s)
1.10 timbl 590: {
591: CONST char * p;
592: for(p=s; *p; p++) NetToText_put_character(me, *p);
593: }
594:
1.11 timbl 595: PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l)
1.10 timbl 596: {
597: CONST char * p;
598: for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
599: }
600:
601: PRIVATE void NetToText_free ARGS1(HTStream *, me)
602: {
603: me->sink->isa->free(me->sink); /* Close rest of pipe */
604: free(me);
605: }
606:
607: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
608: {
609: me->sink->isa->abort(me->sink,e); /* Abort rest of pipe */
610: free(me);
611: }
612:
613: /* The class structure
614: */
615: PRIVATE HTStreamClass NetToTextClass = {
616: "NetToText",
617: NetToText_free,
618: NetToText_abort,
619: NetToText_put_character,
620: NetToText_put_string,
621: NetToText_put_block
622: };
623:
624: /* The creation method
625: */
626: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
627: {
628: HTStream* me = (HTStream*)malloc(sizeof(*me));
629: if (me == NULL) outofmem(__FILE__, "NetToText");
630: me->isa = &NetToTextClass;
631:
632: me->had_cr = NO;
633: me->sink = sink;
634: return me;
635: }
1.2 timbl 636:
637:
Webmaster