Annotation of libwww/Library/src/HTFormat.c, revision 1.33
1.7 secret 1:
1.1 timbl 2: /* Manage different file formats HTFormat.c
3: ** =============================
4: **
5: ** Bugs:
6: ** Not reentrant.
7: **
8: ** Assumes the incoming stream is ASCII, rather than a local file
9: ** format, and so ALWAYS converts from ASCII on non-ASCII machines.
10: ** Therefore, non-ASCII machines can't read local files.
1.2 timbl 11: **
12: */
13:
1.10 timbl 14:
1.2 timbl 15: /* Implements:
1.1 timbl 16: */
1.2 timbl 17: #include "HTFormat.h"
18:
19: PUBLIC float HTMaxSecs = 1e10; /* No effective limit */
20: PUBLIC float HTMaxLength = 1e10; /* No effective limit */
21:
22: #ifdef unix
23: #ifdef NeXT
24: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
25: #else
26: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n"
27: /* Full pathname would be better! */
28: #endif
29: #endif
30:
1.1 timbl 31:
32: #include "HTUtils.h"
33: #include "tcp.h"
34:
35: #include "HTML.h"
1.12 timbl 36: #include "HTMLPDTD.h"
1.1 timbl 37: #include "HText.h"
1.2 timbl 38: #include "HTAlert.h"
39: #include "HTList.h"
40: #include "HTInit.h"
41: /* Streams and structured streams which we use:
42: */
43: #include "HTFWriter.h"
44: #include "HTPlain.h"
45: #include "SGML.h"
46: #include "HTML.h"
47: #include "HTMLGen.h"
48:
49: PUBLIC BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */
50:
1.10 timbl 51: #ifdef ORIGINAL
1.2 timbl 52: struct _HTStream {
53: CONST HTStreamClass* isa;
54: /* ... */
55: };
1.10 timbl 56: #endif
57:
58: /* this version used by the NetToText stream */
59: struct _HTStream {
60: CONST HTStreamClass * isa;
61: BOOL had_cr;
62: HTStream * sink;
63: };
1.2 timbl 64:
65:
1.17 luotonen 66: /*
67: ** Accept-Encoding and Accept-Language
68: */
69: typedef struct _HTAcceptNode {
70: HTAtom * atom;
71: float quality;
72: } HTAcceptNode;
73:
74:
75:
76:
1.2 timbl 77: /* Presentation methods
78: ** --------------------
79: */
80:
1.14 timbl 81: PUBLIC HTList * HTConversions = NULL;
1.2 timbl 82:
1.31 frystyk 83: /* -------------------------------------------------------------------------
84: This function replaces the code in HTRequest_delete() in order to keep
85: the data structure hidden (it is NOT a joke!)
86: Henrik 14/03-94
87: ------------------------------------------------------------------------- */
88: PUBLIC void HTFormatDelete ARGS1(HTList *, me)
89: {
90: HTList *cur = me;
91: HTPresentation *pres;
92: if (!me)
93: return;
94: while ((pres = (HTPresentation*) HTList_nextObject(cur))) {
95: FREE(pres->command); /* Leak fixed AL 6 Feb 1994 */
96: free(pres);
97: }
98: HTList_delete(me); /* Leak fixed AL 6 Feb 1994 */
99: }
100:
1.2 timbl 101:
102: /* Define a presentation system command for a content-type
103: ** -------------------------------------------------------
104: */
1.12 timbl 105: PUBLIC void HTSetPresentation ARGS6(
106: HTList *, conversions,
107: CONST char *, representation,
108: CONST char *, command,
109: float, quality,
110: float, secs,
111: float, secs_per_byte
1.2 timbl 112: ){
113:
114: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
115: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
116:
117: pres->rep = HTAtom_for(representation);
118: pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */
119: pres->converter = HTSaveAndExecute; /* Fixed for now ... */
120: pres->quality = quality;
121: pres->secs = secs;
122: pres->secs_per_byte = secs_per_byte;
123: pres->rep = HTAtom_for(representation);
124: pres->command = 0;
125: StrAllocCopy(pres->command, command);
126:
1.12 timbl 127: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 128:
1.15 luotonen 129: #ifdef OLD_CODE
130: if (strcmp(representation, "*")==0) {
1.2 timbl 131: if (default_presentation) free(default_presentation);
132: default_presentation = pres;
1.12 timbl 133: } else
134: #endif
135: HTList_addObject(conversions, pres);
1.2 timbl 136: }
137:
138:
139: /* Define a built-in function for a content-type
140: ** ---------------------------------------------
141: */
1.12 timbl 142: PUBLIC void HTSetConversion ARGS7(
143: HTList *, conversions,
144: CONST char *, representation_in,
145: CONST char *, representation_out,
1.6 timbl 146: HTConverter*, converter,
1.12 timbl 147: float, quality,
148: float, secs,
149: float, secs_per_byte
1.2 timbl 150: ){
1.1 timbl 151:
1.2 timbl 152: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
153: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
154:
155: pres->rep = HTAtom_for(representation_in);
156: pres->rep_out = HTAtom_for(representation_out);
157: pres->converter = converter;
158: pres->command = NULL; /* Fixed */
159: pres->quality = quality;
160: pres->secs = secs;
161: pres->secs_per_byte = secs_per_byte;
162: pres->command = 0;
163:
1.12 timbl 164: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 165:
1.12 timbl 166: #ifdef OLD_CODE
1.2 timbl 167: if (strcmp(representation_in, "*")==0) {
168: if (default_presentation) free(default_presentation);
169: default_presentation = pres;
1.12 timbl 170: } else
171: #endif
172: HTList_addObject(conversions, pres);
1.2 timbl 173: }
1.1 timbl 174:
175:
176:
1.17 luotonen 177: PUBLIC void HTAcceptEncoding ARGS3(HTList *, list,
178: char *, enc,
179: float, quality)
180: {
181: HTAcceptNode * node;
182: char * cur;
183:
184: if (!list || !enc || !*enc) return;
185:
186: for(cur=enc; *cur; cur++) *cur=TOLOWER(*cur);
187:
188: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
189: if (!node) outofmem(__FILE__, "HTAcceptEncoding");
190: HTList_addObject(list, (void*)node);
191:
192: node->atom = HTAtom_for(enc);
193: node->quality = quality;
194: }
195:
196:
197: PUBLIC void HTAcceptLanguage ARGS3(HTList *, list,
198: char *, lang,
199: float, quality)
200: {
201: HTAcceptNode * node;
202:
203: if (!list || !lang || !*lang) return;
204:
205: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
206: if (!node) outofmem(__FILE__, "HTAcceptLanguage");
207:
208: HTList_addObject(list, (void*)node);
209: node->atom = HTAtom_for(lang);
210: node->quality = quality;
211: }
212:
213:
214: PRIVATE BOOL wild_match ARGS2(HTAtom *, template,
215: HTAtom *, actual)
216: {
217: char *t, *a, *st, *sa;
218: BOOL match = NO;
219:
1.22 luotonen 220: if (template && actual && (t = HTAtom_name(template))) {
221: if (!strcmp(t, "*"))
222: return YES;
1.17 luotonen 223:
1.22 luotonen 224: if (strchr(t, '*') &&
225: (a = HTAtom_name(actual)) &&
226: (st = strchr(t, '/')) && (sa = strchr(a,'/'))) {
1.17 luotonen 227:
1.22 luotonen 228: *sa = 0;
229: *st = 0;
230:
231: if ((*(st-1)=='*' &&
232: (*(st+1)=='*' || !strcasecomp(st+1, sa+1))) ||
233: (*(st+1)=='*' && !strcasecomp(t,a)))
234: match = YES;
235:
236: *sa = '/';
237: *st = '/';
238: }
239: }
1.23 luotonen 240: return match;
1.17 luotonen 241: }
242:
243:
244: PRIVATE float type_value ARGS2(HTAtom *, content_type,
245: HTList *, accepted)
246: {
247: HTList * cur = accepted;
248: HTPresentation * pres;
249: HTPresentation * wild = NULL;
250:
251: if (!content_type || !accepted) return -1;
252:
253: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
254: if (pres->rep == content_type)
255: return pres->quality;
256: else if (wild_match(pres->rep, content_type))
257: wild = pres;
258: }
259: if (wild) return wild->quality;
260: else return -1;
261: }
262:
263:
264: PRIVATE float lang_value ARGS2(HTAtom *, language,
265: HTList *, accepted)
266: {
267: HTList * cur = accepted;
268: HTAcceptNode * node;
269: HTAcceptNode * wild = NULL;
270:
271: if (!language || !accepted || HTList_isEmpty(accepted)) {
272: return 0.1;
273: }
274:
275: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
276: if (node->atom == language) {
277: return node->quality;
278: }
279: else if (wild_match(node->atom, language)) {
280: wild = node;
281: }
282: }
283:
284: if (wild) {
285: return wild->quality;
286: }
287: else {
288: return 0.1;
289: }
290: }
291:
292:
293: PRIVATE float encoding_value ARGS2(HTAtom *, encoding,
294: HTList *, accepted)
295: {
296: HTList * cur = accepted;
297: HTAcceptNode * node;
298: HTAcceptNode * wild = NULL;
299: char * e;
300:
301: if (!encoding || !accepted || HTList_isEmpty(accepted))
302: return 1;
303:
304: e = HTAtom_name(encoding);
305: if (!strcmp(e, "7bit") || !strcmp(e, "8bit") || !strcmp(e, "binary"))
306: return 1;
307:
308: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
309: if (node->atom == encoding)
310: return node->quality;
311: else if (wild_match(node->atom, encoding))
312: wild = node;
313: }
314: if (wild) return wild->quality;
315: else return 1;
316: }
317:
318:
319: PUBLIC BOOL HTRank ARGS4(HTList *, possibilities,
320: HTList *, accepted_content_types,
321: HTList *, accepted_languages,
322: HTList *, accepted_encodings)
323: {
324: int accepted_cnt = 0;
325: HTList * accepted;
326: HTList * sorted;
327: HTList * cur;
328: HTContentDescription * d;
329:
330: if (!possibilities) return NO;
331:
332: accepted = HTList_new();
333: cur = possibilities;
334: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
335: float tv = type_value(d->content_type, accepted_content_types);
336: float lv = lang_value(d->content_language, accepted_languages);
337: float ev = encoding_value(d->content_encoding, accepted_encodings);
338:
339: if (tv > 0) {
340: d->quality *= tv * lv * ev;
341: HTList_addObject(accepted, d);
342: accepted_cnt++;
343: }
1.18 luotonen 344: else {
345: if (d->filename) free(d->filename);
346: free(d);
347: }
1.17 luotonen 348: }
349:
1.18 luotonen 350: CTRACE(stderr, "Ranking.....\n");
1.17 luotonen 351: CTRACE(stderr,
1.18 luotonen 352: "\nRANK QUALITY CONTENT-TYPE LANGUAGE ENCODING FILE\n");
1.17 luotonen 353:
354: sorted = HTList_new();
355: while (accepted_cnt-- > 0) {
356: HTContentDescription * worst = NULL;
357: cur = accepted;
358: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
359: if (!worst || d->quality < worst->quality)
360: worst = d;
361: }
362: if (worst) {
363: CTRACE(stderr, "%d. %.4f %-20.20s %-8.8s %-10.10s %s\n",
364: accepted_cnt+1,
365: worst->quality,
366: (worst->content_type
367: ? HTAtom_name(worst->content_type) : "-"),
368: (worst->content_language
369: ? HTAtom_name(worst->content_language) :"-"),
370: (worst->content_encoding
371: ? HTAtom_name(worst->content_encoding) :"-"),
372: (worst->filename
373: ? worst->filename :"-"));
374: HTList_removeObject(accepted, (void*)worst);
375: HTList_addObject(sorted, (void*)worst);
376: }
377: }
1.18 luotonen 378: CTRACE(stderr, "\n");
1.17 luotonen 379: HTList_delete(accepted);
380: HTList_delete(possibilities->next);
381: possibilities->next = sorted->next;
382: sorted->next = NULL;
383: HTList_delete(sorted);
384:
385: if (!HTList_isEmpty(possibilities)) return YES;
386: else return NO;
387: }
388:
389:
390:
391:
392:
1.13 timbl 393: /* Socket Input Buffering
394: ** ----------------------
1.1 timbl 395: **
1.13 timbl 396: ** This code is used because one cannot in general open a
397: ** file descriptor for a socket.
398: **
1.1 timbl 399: ** The input file is read using the macro which can read from
1.13 timbl 400: ** a socket or a file, but this should not be used for files
401: ** as fopen() etc is more portable of course.
402: **
1.1 timbl 403: ** The input buffer size, if large will give greater efficiency and
404: ** release the server faster, and if small will save space on PCs etc.
405: */
406:
407:
408: /* Set up the buffering
409: **
410: ** These routines are public because they are in fact needed by
411: ** many parsers, and on PCs and Macs we should not duplicate
412: ** the static buffer area.
413: */
1.13 timbl 414: PUBLIC HTInputSocket * HTInputSocket_new ARGS1 (int,file_number)
1.1 timbl 415: {
1.28 frystyk 416: HTInputSocket *isoc = (HTInputSocket *)calloc(1, sizeof(*isoc));
1.13 timbl 417: if (!isoc) outofmem(__FILE__, "HTInputSocket_new");
418: isoc->input_file_number = file_number;
419: isoc->input_pointer = isoc->input_limit = isoc->input_buffer;
420: return isoc;
1.1 timbl 421: }
422:
423:
1.13 timbl 424: PUBLIC char HTInputSocket_getCharacter ARGS1(HTInputSocket*, isoc)
1.1 timbl 425: {
426: char ch;
427: do {
1.13 timbl 428: if (isoc-> input_pointer >= isoc->input_limit) {
1.1 timbl 429: int status = NETREAD(
1.13 timbl 430: isoc->input_file_number,
431: isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 432: if (status <= 0) {
433: if (status == 0) return (char)EOF;
434: if (TRACE) fprintf(stderr,
435: "HTFormat: File read error %d\n", status);
436: return (char)EOF; /* -1 is returned by UCX at end of HTTP link */
437: }
1.13 timbl 438: isoc-> input_pointer = isoc->input_buffer;
439: isoc->input_limit = isoc->input_buffer + status;
1.1 timbl 440: }
1.13 timbl 441: ch = *isoc-> input_pointer++;
1.1 timbl 442: } while (ch == (char) 13); /* Ignore ASCII carriage return */
443:
444: return FROMASCII(ch);
445: }
446:
1.17 luotonen 447: PUBLIC void HTInputSocket_free ARGS1(HTInputSocket *, me)
1.13 timbl 448: {
449: if (me) free(me);
450: }
451:
452:
1.16 luotonen 453: PUBLIC char * HTInputSocket_getBlock ARGS2(HTInputSocket*, isoc,
454: int *, len)
455: {
456: if (isoc->input_pointer >= isoc->input_limit) {
457: int status = NETREAD(isoc->input_file_number,
458: isoc->input_buffer,
459: ((*len < INPUT_BUFFER_SIZE) ?
460: *len : INPUT_BUFFER_SIZE));
461: if (status <= 0) {
462: isoc->input_limit = isoc->input_buffer;
463: if (status < 0)
464: CTRACE(stderr, "HTInputSocket: File read error %d\n", status);
465: *len = 0;
466: return NULL;
467: }
468: else {
469: *len = status;
470: return isoc->input_buffer;
471: }
472: }
473: else {
474: char * ret = isoc->input_pointer;
475: *len = isoc->input_limit - isoc->input_pointer;
476: isoc->input_pointer = isoc->input_limit;
477: return ret;
478: }
479: }
480:
481:
1.15 luotonen 482: PRIVATE int fill_in_buffer ARGS1(HTInputSocket *, isoc)
483: {
484: if (isoc) {
485: int status;
486:
487: isoc->input_pointer = isoc->input_buffer;
488: status = NETREAD(isoc->input_file_number,
489: isoc->input_buffer,
490: INPUT_BUFFER_SIZE);
491: if (status <= 0) {
492: isoc->input_limit = isoc->input_buffer;
493: if (status < 0)
494: if (TRACE) fprintf(stderr,
495: "HTInputSocket: File read error %d\n",
496: status);
497: }
498: else
499: isoc->input_limit = isoc->input_buffer + status;
500: return status;
501: }
502: return -1;
503: }
504:
505:
506: PRIVATE void ascii_cat ARGS3(char **, linep,
507: char *, start,
508: char *, end)
509: {
510: if (linep && start && end && start <= end) {
511: char *ptr;
512:
513: if (*linep) {
514: int len = strlen(*linep);
515: *linep = (char*)realloc(*linep, len + end-start + 1);
516: ptr = *linep + len;
517: }
518: else {
519: ptr = *linep = (char*)malloc(end-start + 1);
520: }
521:
522: while (start < end) {
523: *ptr = FROMASCII(*start);
524: ptr++;
525: start++;
526: }
527: *ptr = 0;
528: }
529: }
530:
531:
532: PRIVATE char * get_some_line ARGS2(HTInputSocket *, isoc,
533: BOOL, unfold)
534: {
535: if (!isoc)
536: return NULL;
537: else {
538: BOOL check_unfold = NO;
539: int prev_cr = 0;
540: char *start = isoc->input_pointer;
541: char *cur = isoc->input_pointer;
542: char * line = NULL;
543:
544: for(;;) {
545: /*
546: ** Get more if needed to complete line
547: */
548: if (cur >= isoc->input_limit) { /* Need more data */
549: ascii_cat(&line, start, cur);
550: if (fill_in_buffer(isoc) <= 0)
551: return line;
552: start = cur = isoc->input_pointer;
553: } /* if need more data */
554:
555: /*
556: ** Find a line feed if there is one
557: */
558: for(; cur < isoc->input_limit; cur++) {
559: char c = FROMASCII(*cur);
560: if (!c) {
1.18 luotonen 561: if (line) free(line); /* Leak fixed AL 6 Feb 94 */
1.15 luotonen 562: return NULL; /* Panic! read a 0! */
563: }
564: if (check_unfold && c != ' ' && c != '\t') {
565: return line; /* Note: didn't update isoc->input_pointer */
566: }
567: else {
568: check_unfold = NO;
569: }
570:
571: if (c=='\r') {
572: prev_cr = 1;
573: }
574: else {
575: if (c=='\n') { /* Found a line feed */
576: ascii_cat(&line, start, cur-prev_cr);
577: start = isoc->input_pointer = cur+1;
578:
579: if (line && strlen(line) > 0 && unfold) {
580: check_unfold = YES;
581: }
582: else {
583: return line;
584: }
585: } /* if NL */
586: /* else just a regular character */
587: prev_cr = 0;
588: } /* if not CR */
589: } /* while characters in buffer remain */
590: } /* until line read or end-of-file */
591: } /* valid parameters to function */
592: }
593:
594:
595: PUBLIC char * HTInputSocket_getLine ARGS1(HTInputSocket *, isoc)
596: {
597: return get_some_line(isoc, NO);
598: }
599:
600: PUBLIC char * HTInputSocket_getUnfoldedLine ARGS1(HTInputSocket *, isoc)
601: {
602: return get_some_line(isoc, YES);
603: }
604:
605:
606: /*
607: ** Read HTTP status line (if there is one).
608: **
609: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
610: ** First look at the stub in ASCII and check if it starts "HTTP/".
611: **
612: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
613: ** will be taken as a HTTP 1.0 server. Failure.
614: */
615: #define STUB_LENGTH 20
616: PUBLIC char * HTInputSocket_getStatusLine ARGS1(HTInputSocket *, isoc)
617: {
618: if (!isoc) {
619: return NULL;
620: }
621: else {
622: char buf[STUB_LENGTH + 1];
623: int i;
624: char server_version[STUB_LENGTH+1];
625: int server_status;
626:
627: /*
628: ** Read initial buffer
629: */
630: if (isoc->input_pointer >= isoc->input_limit &&
631: fill_in_buffer(isoc) <= 0) {
632: return NULL;
633: }
634:
635: for (i=0; i < STUB_LENGTH; i++)
636: buf[i] = FROMASCII(isoc->input_buffer[i]);
637: buf[STUB_LENGTH] = 0;
638:
639: if (0 != strncmp(buf, "HTTP/", 5) ||
640: sscanf(buf, "%20s%d", server_version, &server_status) < 2)
641: return NULL;
642: else
643: return get_some_line(isoc, NO);
644: }
645: }
646:
647:
648: /*
649: ** Do heuristic test to see if this is binary.
650: **
651: ** We check for characters above 128 in the first few bytes, and
652: ** if we find them we forget the html default.
653: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
654: **
655: ** Bugs: An HTTP 0.9 server returning a binary document with
656: ** characters < 128 will be read as ASCII.
657: */
658: PUBLIC BOOL HTInputSocket_seemsBinary ARGS1(HTInputSocket *, isoc)
659: {
660: if (isoc &&
661: (isoc->input_pointer < isoc->input_limit ||
662: fill_in_buffer(isoc) > 0)) {
663: char *p = isoc->input_buffer;
664: int i = STUB_LENGTH;
665:
666: for( ; i && p < isoc->input_limit; p++, i++)
667: if (((int)*p)&128)
668: return YES;
669: }
670: return NO;
671: }
672:
673:
674:
1.1 timbl 675: /* Stream the data to an ouput file as binary
676: */
1.13 timbl 677: PUBLIC int HTOutputBinary ARGS3( HTInputSocket *, isoc,
678: int, input,
679: FILE *, output)
1.1 timbl 680: {
681: do {
682: int status = NETREAD(
1.13 timbl 683: input, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 684: if (status <= 0) {
685: if (status == 0) return 0;
686: if (TRACE) fprintf(stderr,
687: "HTFormat: File read error %d\n", status);
688: return 2; /* Error */
689: }
1.13 timbl 690: fwrite(isoc->input_buffer, sizeof(char), status, output);
1.1 timbl 691: } while (YES);
692: }
693:
694:
1.33 ! luotonen 695: PRIVATE BOOL better_match ARGS2(HTFormat, f,
! 696: HTFormat, g)
! 697: {
! 698: CONST char *p, *q;
! 699:
! 700: if (f && g && (p = HTAtom_name(f)) && (q = HTAtom_name(g))) {
! 701: int i,j;
! 702: for(i=0 ; *p; p++) if (*p == '*') i++;
! 703: for(j=0 ; *q; q++) if (*q == '*') j++;
! 704: if (i < j) return YES;
! 705: }
! 706: return NO;
! 707: }
! 708:
1.17 luotonen 709:
1.2 timbl 710: /* Create a filter stack
711: ** ---------------------
712: **
1.7 secret 713: ** If a wildcard match is made, a temporary HTPresentation
1.2 timbl 714: ** structure is made to hold the destination format while the
715: ** new stack is generated. This is just to pass the out format to
716: ** MIME so far. Storing the format of a stream in the stream might
717: ** be a lot neater.
1.10 timbl 718: **
1.29 frystyk 719: ** The star/star format is special, in that if you can take
1.10 timbl 720: ** that you can take anything. However, we
1.2 timbl 721: */
1.12 timbl 722: PUBLIC HTStream * HTStreamStack ARGS2(
1.10 timbl 723: HTFormat, rep_in,
1.12 timbl 724: HTRequest *, request)
1.2 timbl 725: {
1.12 timbl 726: HTFormat rep_out = request->output_format; /* Could be a param */
1.14 timbl 727: HTList * conversion[2];
728: int which_list;
1.25 frystyk 729: float best_quality = -1e30; /* Pretty bad! */
1.29 frystyk 730: HTPresentation *pres, *match, *best_match=0;
1.14 timbl 731:
1.2 timbl 732: if (TRACE) fprintf(stderr,
733: "HTFormat: Constructing stream stack for %s to %s\n",
1.10 timbl 734: HTAtom_name(rep_in),
1.2 timbl 735: HTAtom_name(rep_out));
736:
1.21 luotonen 737: if (rep_out == WWW_SOURCE || rep_out == rep_in)
738: return request->output_stream;
1.2 timbl 739:
1.14 timbl 740: conversion[0] = request->conversions;
741: conversion[1] = HTConversions;
1.17 luotonen 742:
1.15 luotonen 743: for(which_list = 0; which_list<2; which_list++) {
744: HTList * cur = conversion[which_list];
745:
746: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
1.25 frystyk 747: if ((pres->rep == rep_in || wild_match(pres->rep, rep_in)) &&
1.33 ! luotonen 748: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
! 749: if (!best_match ||
! 750: better_match(pres->rep, best_match->rep) ||
! 751: (!better_match(best_match->rep, pres->rep) &&
! 752: pres->quality > best_quality)) {
1.25 frystyk 753: best_match = pres;
754: best_quality = pres->quality;
1.10 timbl 755: }
756: }
1.33 ! luotonen 757:
1.29 frystyk 758: #ifdef OLD_CODE
759: /* This case is now included in the best_match loop */
1.25 frystyk 760: /* Special case when input format is 'www/source' */
1.10 timbl 761: if (pres->rep == source) {
1.29 frystyk 762: if (pres->rep_out == rep_out ||
763: wild_match(pres->rep_out, rep_out))
1.10 timbl 764: source_match = pres;
1.2 timbl 765: }
1.29 frystyk 766: #endif
1.2 timbl 767: }
768: }
1.33 ! luotonen 769:
1.29 frystyk 770: match = best_match ? best_match : NULL;
771: if (match) {
772: if (match->rep == WWW_SOURCE) {
773: if (TRACE) fprintf(stderr,
774: "HTFormat: Don't know how to handle this, so put out %s to %s\n",
775: HTAtom_name(match->rep),
776: HTAtom_name(rep_out));
777: }
778: return (*match->converter)(
1.25 frystyk 779: request, match->command, rep_in, rep_out,
780: request->output_stream);
1.29 frystyk 781: }
1.2 timbl 782: return NULL;
783: }
784:
785:
786: /* Find the cost of a filter stack
787: ** -------------------------------
788: **
789: ** Must return the cost of the same stack which StreamStack would set up.
790: **
791: ** On entry,
792: ** length The size of the data to be converted
793: */
1.12 timbl 794: PUBLIC float HTStackValue ARGS5(
1.14 timbl 795: HTList *, theseConversions,
1.10 timbl 796: HTFormat, rep_in,
1.2 timbl 797: HTFormat, rep_out,
798: float, initial_value,
799: long int, length)
800: {
1.14 timbl 801: int which_list;
802: HTList* conversion[2];
803:
1.2 timbl 804: if (TRACE) fprintf(stderr,
805: "HTFormat: Evaluating stream stack for %s worth %.3f to %s\n",
1.10 timbl 806: HTAtom_name(rep_in), initial_value,
1.2 timbl 807: HTAtom_name(rep_out));
808:
809: if (rep_out == WWW_SOURCE ||
1.10 timbl 810: rep_out == rep_in) return 0.0;
1.2 timbl 811:
1.12 timbl 812: /* if (!HTPresentations) HTFormatInit(); set up the list */
1.2 timbl 813:
1.14 timbl 814: conversion[0] = theseConversions;
815: conversion[1] = HTConversions;
816:
817: for(which_list = 0; which_list<2; which_list++)
818: if (conversion[which_list]) {
1.15 luotonen 819: HTList * cur = conversion[which_list];
1.2 timbl 820: HTPresentation * pres;
1.15 luotonen 821: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
822: if (pres->rep == rep_in &&
1.17 luotonen 823: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
1.2 timbl 824: float value = initial_value * pres->quality;
825: if (HTMaxSecs != 0.0)
1.15 luotonen 826: value = value - (length*pres->secs_per_byte + pres->secs)
1.2 timbl 827: /HTMaxSecs;
828: return value;
829: }
830: }
831: }
832:
833: return -1e30; /* Really bad */
1.17 luotonen 834: }
835:
836:
1.2 timbl 837:
1.1 timbl 838:
1.2 timbl 839: /* Push data from a socket down a stream
840: ** -------------------------------------
1.1 timbl 841: **
1.2 timbl 842: ** This routine is responsible for creating and PRESENTING any
1.1 timbl 843: ** graphic (or other) objects described by the file.
1.2 timbl 844: **
845: ** The file number given is assumed to be a TELNET stream ie containing
846: ** CRLF at the end of lines which need to be stripped to LF for unix
847: ** when the format is textual.
848: **
1.26 luotonen 849: ** RETURNS the number of bytes transferred.
850: **
1.1 timbl 851: */
1.26 luotonen 852: PUBLIC int HTCopy ARGS2(
1.2 timbl 853: int, file_number,
854: HTStream*, sink)
1.1 timbl 855: {
1.2 timbl 856: HTStreamClass targetClass;
1.13 timbl 857: HTInputSocket * isoc;
1.26 luotonen 858: int cnt = 0;
859:
1.5 timbl 860: /* Push the data down the stream
1.2 timbl 861: **
862: */
863: targetClass = *(sink->isa); /* Copy pointers to procedures */
1.13 timbl 864: isoc = HTInputSocket_new(file_number);
1.2 timbl 865:
866: /* Push binary from socket down sink
1.10 timbl 867: **
868: ** This operation could be put into a main event loop
1.2 timbl 869: */
870: for(;;) {
871: int status = NETREAD(
1.13 timbl 872: file_number, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.2 timbl 873: if (status <= 0) {
874: if (status == 0) break;
875: if (TRACE) fprintf(stderr,
1.24 luotonen 876: "HTFormat: Read error, read returns %d with errno=%d\n",
877: status, errno);
1.2 timbl 878: break;
879: }
1.26 luotonen 880:
1.8 timbl 881: #ifdef NOT_ASCII
882: {
883: char * p;
1.13 timbl 884: for(p = isoc->input_buffer; p < isoc->input_buffer+status; p++) {
1.8 timbl 885: *p = FROMASCII(*p);
886: }
887: }
888: #endif
889:
1.13 timbl 890: (*targetClass.put_block)(sink, isoc->input_buffer, status);
1.26 luotonen 891: cnt += status;
1.2 timbl 892: } /* next bufferload */
1.26 luotonen 893:
1.13 timbl 894: HTInputSocket_free(isoc);
1.26 luotonen 895:
896: return cnt;
1.2 timbl 897: }
898:
1.1 timbl 899:
1.7 secret 900:
901: /* Push data from a file pointer down a stream
902: ** -------------------------------------
903: **
904: ** This routine is responsible for creating and PRESENTING any
905: ** graphic (or other) objects described by the file.
906: **
907: **
908: */
909: PUBLIC void HTFileCopy ARGS2(
910: FILE *, fp,
911: HTStream*, sink)
912: {
913: HTStreamClass targetClass;
1.13 timbl 914: char input_buffer[INPUT_BUFFER_SIZE];
1.7 secret 915:
916: /* Push the data down the stream
917: **
918: */
919: targetClass = *(sink->isa); /* Copy pointers to procedures */
920:
921: /* Push binary from socket down sink
922: */
923: for(;;) {
924: int status = fread(
925: input_buffer, 1, INPUT_BUFFER_SIZE, fp);
926: if (status == 0) { /* EOF or error */
927: if (ferror(fp) == 0) break;
928: if (TRACE) fprintf(stderr,
929: "HTFormat: Read error, read returns %d\n", ferror(fp));
930: break;
931: }
932: (*targetClass.put_block)(sink, input_buffer, status);
1.13 timbl 933: } /* next bufferload */
1.7 secret 934: }
935:
936:
937:
938:
1.2 timbl 939: /* Push data from a socket down a stream STRIPPING CR
940: ** --------------------------------------------------
941: **
942: ** This routine is responsible for creating and PRESENTING any
1.8 timbl 943: ** graphic (or other) objects described by the socket.
1.2 timbl 944: **
945: ** The file number given is assumed to be a TELNET stream ie containing
946: ** CRLF at the end of lines which need to be stripped to LF for unix
947: ** when the format is textual.
948: **
1.1 timbl 949: */
1.2 timbl 950: PUBLIC void HTCopyNoCR ARGS2(
951: int, file_number,
952: HTStream*, sink)
953: {
1.13 timbl 954: HTStreamClass targetClass;
955: HTInputSocket * isoc;
1.1 timbl 956:
1.2 timbl 957: /* Push the data, ignoring CRLF, down the stream
958: **
959: */
960: targetClass = *(sink->isa); /* Copy pointers to procedures */
961:
962: /* Push text from telnet socket down sink
963: **
964: ** @@@@@ To push strings could be faster? (especially is we
965: ** cheat and don't ignore CR! :-}
966: */
1.13 timbl 967: isoc = HTInputSocket_new(file_number);
1.2 timbl 968: for(;;) {
969: char character;
1.13 timbl 970: character = HTInputSocket_getCharacter(isoc);
1.2 timbl 971: if (character == (char)EOF) break;
972: (*targetClass.put_character)(sink, character);
973: }
1.13 timbl 974: HTInputSocket_free(isoc);
1.2 timbl 975: }
1.1 timbl 976:
1.2 timbl 977:
1.7 secret 978:
1.2 timbl 979: /* Parse a socket given format and file number
980: **
981: ** This routine is responsible for creating and PRESENTING any
982: ** graphic (or other) objects described by the file.
983: **
984: ** The file number given is assumed to be a TELNET stream ie containing
985: ** CRLF at the end of lines which need to be stripped to LF for unix
986: ** when the format is textual.
987: **
988: */
1.14 timbl 989:
1.12 timbl 990: PUBLIC int HTParseSocket ARGS3(
1.10 timbl 991: HTFormat, rep_in,
1.2 timbl 992: int, file_number,
1.12 timbl 993: HTRequest *, request)
1.2 timbl 994: {
995: HTStream * stream;
996: HTStreamClass targetClass;
1.1 timbl 997:
1.12 timbl 998: stream = HTStreamStack(rep_in, request);
1.29 frystyk 999:
1.2 timbl 1000: if (!stream) {
1.30 frystyk 1001: char buffer[1024]; /* @@@@@@@@ */
1.2 timbl 1002: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12 timbl 1003: HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.30 frystyk 1004: if (TRACE) fprintf(stderr, "HTFormat(in HTParseSocket): %s\n", buffer);
1.16 luotonen 1005: return HTLoadError(request, 501, buffer);
1.2 timbl 1006: }
1.1 timbl 1007:
1.3 timbl 1008: /* Push the data, ignoring CRLF if necessary, down the stream
1009: **
1.2 timbl 1010: **
1.3 timbl 1011: ** @@ Bug: This decision ought to be made based on "encoding"
1.9 timbl 1012: ** rather than on format. @@@ When we handle encoding.
1.3 timbl 1013: ** The current method smells anyway.
1.2 timbl 1014: */
1015: targetClass = *(stream->isa); /* Copy pointers to procedures */
1.32 luotonen 1016: if (rep_in == WWW_BINARY || rep_in == WWW_UNKNOWN || HTOutputSource
1.26 luotonen 1017: || (request->content_encoding &&
1018: request->content_encoding != HTAtom_for("8bit") &&
1019: request->content_encoding != HTAtom_for("7bit"))
1.10 timbl 1020: || strstr(HTAtom_name(rep_in), "image/")
1021: || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.29 frystyk 1022: HTCopy(file_number, stream);
1.2 timbl 1023: } else { /* ascii text with CRLFs :-( */
1024: HTCopyNoCR(file_number, stream);
1025: }
1.7 secret 1026: (*targetClass.free)(stream);
1027:
1028: return HT_LOADED;
1029: }
1030:
1031:
1032:
1033: /* Parse a file given format and file pointer
1034: **
1035: ** This routine is responsible for creating and PRESENTING any
1036: ** graphic (or other) objects described by the file.
1037: **
1038: ** The file number given is assumed to be a TELNET stream ie containing
1.10 timbl 1039: ** CRLF at the end of lines which need to be stripped to \n for unix
1.7 secret 1040: ** when the format is textual.
1041: **
1042: */
1.12 timbl 1043: PUBLIC int HTParseFile ARGS3(
1.10 timbl 1044: HTFormat, rep_in,
1.7 secret 1045: FILE *, fp,
1.12 timbl 1046: HTRequest *, request)
1.7 secret 1047: {
1048: HTStream * stream;
1049: HTStreamClass targetClass;
1050:
1.12 timbl 1051: stream = HTStreamStack(rep_in, request);
1.7 secret 1052:
1053: if (!stream) {
1.30 frystyk 1054: char buffer[1024]; /* @@@@@@@@ */
1.7 secret 1055: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12 timbl 1056: HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.7 secret 1057: if (TRACE) fprintf(stderr, "HTFormat(in HTParseFile): %s\n", buffer);
1.29 frystyk 1058: return HTLoadError(request, 501, buffer);
1.7 secret 1059: }
1060:
1.9 timbl 1061: /* Push the data down the stream
1.7 secret 1062: **
1063: **
1064: ** @@ Bug: This decision ought to be made based on "encoding"
1.10 timbl 1065: ** rather than on content-type. @@@ When we handle encoding.
1.7 secret 1066: ** The current method smells anyway.
1067: */
1068: targetClass = *(stream->isa); /* Copy pointers to procedures */
1069: HTFileCopy(fp, stream);
1.2 timbl 1070: (*targetClass.free)(stream);
1.1 timbl 1071:
1.2 timbl 1072: return HT_LOADED;
1.1 timbl 1073: }
1.2 timbl 1074:
1.10 timbl 1075:
1076: /* Converter stream: Network Telnet to internal character text
1077: ** -----------------------------------------------------------
1078: **
1079: ** The input is assumed to be in ASCII, with lines delimited
1080: ** by (13,10) pairs, These pairs are converted into (CR,LF)
1081: ** pairs in the local representation. The (CR,LF) sequence
1082: ** when found is changed to a '\n' character, the internal
1083: ** C representation of a new line.
1084: */
1085:
1086:
1.11 timbl 1087: PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char)
1.10 timbl 1088: {
1089: char c = FROMASCII(net_char);
1090: if (me->had_cr) {
1091: if (c==LF) {
1092: me->sink->isa->put_character(me->sink, '\n'); /* Newline */
1093: me->had_cr = NO;
1094: return;
1095: } else {
1096: me->sink->isa->put_character(me->sink, CR); /* leftover */
1097: }
1098: }
1099: me->had_cr = (c==CR);
1100: if (!me->had_cr)
1101: me->sink->isa->put_character(me->sink, c); /* normal */
1102: }
1103:
1.11 timbl 1104: PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s)
1.10 timbl 1105: {
1106: CONST char * p;
1107: for(p=s; *p; p++) NetToText_put_character(me, *p);
1108: }
1109:
1.11 timbl 1110: PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l)
1.10 timbl 1111: {
1112: CONST char * p;
1113: for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
1114: }
1115:
1116: PRIVATE void NetToText_free ARGS1(HTStream *, me)
1117: {
1118: me->sink->isa->free(me->sink); /* Close rest of pipe */
1119: free(me);
1120: }
1121:
1122: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
1123: {
1124: me->sink->isa->abort(me->sink,e); /* Abort rest of pipe */
1125: free(me);
1126: }
1127:
1128: /* The class structure
1129: */
1130: PRIVATE HTStreamClass NetToTextClass = {
1131: "NetToText",
1132: NetToText_free,
1133: NetToText_abort,
1134: NetToText_put_character,
1135: NetToText_put_string,
1136: NetToText_put_block
1137: };
1138:
1139: /* The creation method
1140: */
1141: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
1142: {
1143: HTStream* me = (HTStream*)malloc(sizeof(*me));
1144: if (me == NULL) outofmem(__FILE__, "NetToText");
1145: me->isa = &NetToTextClass;
1146:
1147: me->had_cr = NO;
1148: me->sink = sink;
1149: return me;
1150: }
1.2 timbl 1151:
1152:
Webmaster