Annotation of libwww/Library/src/HTFormat.c, revision 1.34
1.7 secret 1:
1.1 timbl 2: /* Manage different file formats HTFormat.c
3: ** =============================
4: **
5: ** Bugs:
6: ** Not reentrant.
7: **
8: ** Assumes the incoming stream is ASCII, rather than a local file
9: ** format, and so ALWAYS converts from ASCII on non-ASCII machines.
10: ** Therefore, non-ASCII machines can't read local files.
1.2 timbl 11: **
12: */
13:
1.10 timbl 14:
1.2 timbl 15: /* Implements:
1.1 timbl 16: */
1.2 timbl 17: #include "HTFormat.h"
18:
19: PUBLIC float HTMaxSecs = 1e10; /* No effective limit */
20: PUBLIC float HTMaxLength = 1e10; /* No effective limit */
21:
22: #ifdef unix
23: #ifdef NeXT
24: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
25: #else
26: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n"
27: /* Full pathname would be better! */
28: #endif
29: #endif
30:
1.1 timbl 31:
32: #include "HTUtils.h"
33: #include "tcp.h"
34:
35: #include "HTML.h"
1.12 timbl 36: #include "HTMLPDTD.h"
1.1 timbl 37: #include "HText.h"
1.2 timbl 38: #include "HTAlert.h"
39: #include "HTList.h"
40: #include "HTInit.h"
41: /* Streams and structured streams which we use:
42: */
43: #include "HTFWriter.h"
44: #include "HTPlain.h"
45: #include "SGML.h"
46: #include "HTML.h"
47: #include "HTMLGen.h"
1.34 ! luotonen 48: #include "HTGuess.h"
! 49:
1.2 timbl 50:
51: PUBLIC BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */
52:
1.10 timbl 53: #ifdef ORIGINAL
1.2 timbl 54: struct _HTStream {
55: CONST HTStreamClass* isa;
56: /* ... */
57: };
1.10 timbl 58: #endif
59:
60: /* this version used by the NetToText stream */
61: struct _HTStream {
62: CONST HTStreamClass * isa;
63: BOOL had_cr;
64: HTStream * sink;
65: };
1.2 timbl 66:
67:
1.17 luotonen 68: /*
69: ** Accept-Encoding and Accept-Language
70: */
71: typedef struct _HTAcceptNode {
72: HTAtom * atom;
73: float quality;
74: } HTAcceptNode;
75:
76:
77:
78:
1.2 timbl 79: /* Presentation methods
80: ** --------------------
81: */
82:
1.14 timbl 83: PUBLIC HTList * HTConversions = NULL;
1.2 timbl 84:
1.31 frystyk 85: /* -------------------------------------------------------------------------
86: This function replaces the code in HTRequest_delete() in order to keep
87: the data structure hidden (it is NOT a joke!)
88: Henrik 14/03-94
89: ------------------------------------------------------------------------- */
90: PUBLIC void HTFormatDelete ARGS1(HTList *, me)
91: {
92: HTList *cur = me;
93: HTPresentation *pres;
94: if (!me)
95: return;
96: while ((pres = (HTPresentation*) HTList_nextObject(cur))) {
97: FREE(pres->command); /* Leak fixed AL 6 Feb 1994 */
98: free(pres);
99: }
100: HTList_delete(me); /* Leak fixed AL 6 Feb 1994 */
101: }
102:
1.2 timbl 103:
104: /* Define a presentation system command for a content-type
105: ** -------------------------------------------------------
106: */
1.12 timbl 107: PUBLIC void HTSetPresentation ARGS6(
108: HTList *, conversions,
109: CONST char *, representation,
110: CONST char *, command,
111: float, quality,
112: float, secs,
113: float, secs_per_byte
1.2 timbl 114: ){
115:
116: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
117: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
118:
119: pres->rep = HTAtom_for(representation);
120: pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */
121: pres->converter = HTSaveAndExecute; /* Fixed for now ... */
122: pres->quality = quality;
123: pres->secs = secs;
124: pres->secs_per_byte = secs_per_byte;
125: pres->rep = HTAtom_for(representation);
126: pres->command = 0;
127: StrAllocCopy(pres->command, command);
128:
1.12 timbl 129: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 130:
1.15 luotonen 131: #ifdef OLD_CODE
132: if (strcmp(representation, "*")==0) {
1.2 timbl 133: if (default_presentation) free(default_presentation);
134: default_presentation = pres;
1.12 timbl 135: } else
136: #endif
137: HTList_addObject(conversions, pres);
1.2 timbl 138: }
139:
140:
141: /* Define a built-in function for a content-type
142: ** ---------------------------------------------
143: */
1.12 timbl 144: PUBLIC void HTSetConversion ARGS7(
145: HTList *, conversions,
146: CONST char *, representation_in,
147: CONST char *, representation_out,
1.6 timbl 148: HTConverter*, converter,
1.12 timbl 149: float, quality,
150: float, secs,
151: float, secs_per_byte
1.2 timbl 152: ){
1.1 timbl 153:
1.2 timbl 154: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
155: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
156:
157: pres->rep = HTAtom_for(representation_in);
158: pres->rep_out = HTAtom_for(representation_out);
159: pres->converter = converter;
160: pres->command = NULL; /* Fixed */
161: pres->quality = quality;
162: pres->secs = secs;
163: pres->secs_per_byte = secs_per_byte;
164: pres->command = 0;
165:
1.12 timbl 166: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 167:
1.12 timbl 168: #ifdef OLD_CODE
1.2 timbl 169: if (strcmp(representation_in, "*")==0) {
170: if (default_presentation) free(default_presentation);
171: default_presentation = pres;
1.12 timbl 172: } else
173: #endif
174: HTList_addObject(conversions, pres);
1.2 timbl 175: }
1.1 timbl 176:
177:
178:
1.17 luotonen 179: PUBLIC void HTAcceptEncoding ARGS3(HTList *, list,
180: char *, enc,
181: float, quality)
182: {
183: HTAcceptNode * node;
184: char * cur;
185:
186: if (!list || !enc || !*enc) return;
187:
188: for(cur=enc; *cur; cur++) *cur=TOLOWER(*cur);
189:
190: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
191: if (!node) outofmem(__FILE__, "HTAcceptEncoding");
192: HTList_addObject(list, (void*)node);
193:
194: node->atom = HTAtom_for(enc);
195: node->quality = quality;
196: }
197:
198:
199: PUBLIC void HTAcceptLanguage ARGS3(HTList *, list,
200: char *, lang,
201: float, quality)
202: {
203: HTAcceptNode * node;
204:
205: if (!list || !lang || !*lang) return;
206:
207: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
208: if (!node) outofmem(__FILE__, "HTAcceptLanguage");
209:
210: HTList_addObject(list, (void*)node);
211: node->atom = HTAtom_for(lang);
212: node->quality = quality;
213: }
214:
215:
216: PRIVATE BOOL wild_match ARGS2(HTAtom *, template,
217: HTAtom *, actual)
218: {
219: char *t, *a, *st, *sa;
220: BOOL match = NO;
221:
1.22 luotonen 222: if (template && actual && (t = HTAtom_name(template))) {
223: if (!strcmp(t, "*"))
224: return YES;
1.17 luotonen 225:
1.22 luotonen 226: if (strchr(t, '*') &&
227: (a = HTAtom_name(actual)) &&
228: (st = strchr(t, '/')) && (sa = strchr(a,'/'))) {
1.17 luotonen 229:
1.22 luotonen 230: *sa = 0;
231: *st = 0;
232:
233: if ((*(st-1)=='*' &&
234: (*(st+1)=='*' || !strcasecomp(st+1, sa+1))) ||
235: (*(st+1)=='*' && !strcasecomp(t,a)))
236: match = YES;
237:
238: *sa = '/';
239: *st = '/';
240: }
241: }
1.23 luotonen 242: return match;
1.17 luotonen 243: }
244:
245:
246: PRIVATE float type_value ARGS2(HTAtom *, content_type,
247: HTList *, accepted)
248: {
249: HTList * cur = accepted;
250: HTPresentation * pres;
251: HTPresentation * wild = NULL;
252:
253: if (!content_type || !accepted) return -1;
254:
255: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
256: if (pres->rep == content_type)
257: return pres->quality;
258: else if (wild_match(pres->rep, content_type))
259: wild = pres;
260: }
261: if (wild) return wild->quality;
262: else return -1;
263: }
264:
265:
266: PRIVATE float lang_value ARGS2(HTAtom *, language,
267: HTList *, accepted)
268: {
269: HTList * cur = accepted;
270: HTAcceptNode * node;
271: HTAcceptNode * wild = NULL;
272:
273: if (!language || !accepted || HTList_isEmpty(accepted)) {
274: return 0.1;
275: }
276:
277: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
278: if (node->atom == language) {
279: return node->quality;
280: }
281: else if (wild_match(node->atom, language)) {
282: wild = node;
283: }
284: }
285:
286: if (wild) {
287: return wild->quality;
288: }
289: else {
290: return 0.1;
291: }
292: }
293:
294:
295: PRIVATE float encoding_value ARGS2(HTAtom *, encoding,
296: HTList *, accepted)
297: {
298: HTList * cur = accepted;
299: HTAcceptNode * node;
300: HTAcceptNode * wild = NULL;
301: char * e;
302:
303: if (!encoding || !accepted || HTList_isEmpty(accepted))
304: return 1;
305:
306: e = HTAtom_name(encoding);
307: if (!strcmp(e, "7bit") || !strcmp(e, "8bit") || !strcmp(e, "binary"))
308: return 1;
309:
310: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
311: if (node->atom == encoding)
312: return node->quality;
313: else if (wild_match(node->atom, encoding))
314: wild = node;
315: }
316: if (wild) return wild->quality;
317: else return 1;
318: }
319:
320:
321: PUBLIC BOOL HTRank ARGS4(HTList *, possibilities,
322: HTList *, accepted_content_types,
323: HTList *, accepted_languages,
324: HTList *, accepted_encodings)
325: {
326: int accepted_cnt = 0;
327: HTList * accepted;
328: HTList * sorted;
329: HTList * cur;
330: HTContentDescription * d;
331:
332: if (!possibilities) return NO;
333:
334: accepted = HTList_new();
335: cur = possibilities;
336: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
337: float tv = type_value(d->content_type, accepted_content_types);
338: float lv = lang_value(d->content_language, accepted_languages);
339: float ev = encoding_value(d->content_encoding, accepted_encodings);
340:
341: if (tv > 0) {
342: d->quality *= tv * lv * ev;
343: HTList_addObject(accepted, d);
344: accepted_cnt++;
345: }
1.18 luotonen 346: else {
347: if (d->filename) free(d->filename);
348: free(d);
349: }
1.17 luotonen 350: }
351:
1.18 luotonen 352: CTRACE(stderr, "Ranking.....\n");
1.17 luotonen 353: CTRACE(stderr,
1.18 luotonen 354: "\nRANK QUALITY CONTENT-TYPE LANGUAGE ENCODING FILE\n");
1.17 luotonen 355:
356: sorted = HTList_new();
357: while (accepted_cnt-- > 0) {
358: HTContentDescription * worst = NULL;
359: cur = accepted;
360: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
361: if (!worst || d->quality < worst->quality)
362: worst = d;
363: }
364: if (worst) {
365: CTRACE(stderr, "%d. %.4f %-20.20s %-8.8s %-10.10s %s\n",
366: accepted_cnt+1,
367: worst->quality,
368: (worst->content_type
369: ? HTAtom_name(worst->content_type) : "-"),
370: (worst->content_language
371: ? HTAtom_name(worst->content_language) :"-"),
372: (worst->content_encoding
373: ? HTAtom_name(worst->content_encoding) :"-"),
374: (worst->filename
375: ? worst->filename :"-"));
376: HTList_removeObject(accepted, (void*)worst);
377: HTList_addObject(sorted, (void*)worst);
378: }
379: }
1.18 luotonen 380: CTRACE(stderr, "\n");
1.17 luotonen 381: HTList_delete(accepted);
382: HTList_delete(possibilities->next);
383: possibilities->next = sorted->next;
384: sorted->next = NULL;
385: HTList_delete(sorted);
386:
387: if (!HTList_isEmpty(possibilities)) return YES;
388: else return NO;
389: }
390:
391:
392:
393:
394:
1.13 timbl 395: /* Socket Input Buffering
396: ** ----------------------
1.1 timbl 397: **
1.13 timbl 398: ** This code is used because one cannot in general open a
399: ** file descriptor for a socket.
400: **
1.1 timbl 401: ** The input file is read using the macro which can read from
1.13 timbl 402: ** a socket or a file, but this should not be used for files
403: ** as fopen() etc is more portable of course.
404: **
1.1 timbl 405: ** The input buffer size, if large will give greater efficiency and
406: ** release the server faster, and if small will save space on PCs etc.
407: */
408:
409:
410: /* Set up the buffering
411: **
412: ** These routines are public because they are in fact needed by
413: ** many parsers, and on PCs and Macs we should not duplicate
414: ** the static buffer area.
415: */
1.13 timbl 416: PUBLIC HTInputSocket * HTInputSocket_new ARGS1 (int,file_number)
1.1 timbl 417: {
1.28 frystyk 418: HTInputSocket *isoc = (HTInputSocket *)calloc(1, sizeof(*isoc));
1.13 timbl 419: if (!isoc) outofmem(__FILE__, "HTInputSocket_new");
420: isoc->input_file_number = file_number;
421: isoc->input_pointer = isoc->input_limit = isoc->input_buffer;
422: return isoc;
1.1 timbl 423: }
424:
425:
1.13 timbl 426: PUBLIC char HTInputSocket_getCharacter ARGS1(HTInputSocket*, isoc)
1.1 timbl 427: {
428: char ch;
429: do {
1.13 timbl 430: if (isoc-> input_pointer >= isoc->input_limit) {
1.1 timbl 431: int status = NETREAD(
1.13 timbl 432: isoc->input_file_number,
433: isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 434: if (status <= 0) {
435: if (status == 0) return (char)EOF;
436: if (TRACE) fprintf(stderr,
437: "HTFormat: File read error %d\n", status);
438: return (char)EOF; /* -1 is returned by UCX at end of HTTP link */
439: }
1.13 timbl 440: isoc-> input_pointer = isoc->input_buffer;
441: isoc->input_limit = isoc->input_buffer + status;
1.1 timbl 442: }
1.13 timbl 443: ch = *isoc-> input_pointer++;
1.1 timbl 444: } while (ch == (char) 13); /* Ignore ASCII carriage return */
445:
446: return FROMASCII(ch);
447: }
448:
1.17 luotonen 449: PUBLIC void HTInputSocket_free ARGS1(HTInputSocket *, me)
1.13 timbl 450: {
451: if (me) free(me);
452: }
453:
454:
1.16 luotonen 455: PUBLIC char * HTInputSocket_getBlock ARGS2(HTInputSocket*, isoc,
456: int *, len)
457: {
458: if (isoc->input_pointer >= isoc->input_limit) {
459: int status = NETREAD(isoc->input_file_number,
460: isoc->input_buffer,
461: ((*len < INPUT_BUFFER_SIZE) ?
462: *len : INPUT_BUFFER_SIZE));
463: if (status <= 0) {
464: isoc->input_limit = isoc->input_buffer;
465: if (status < 0)
466: CTRACE(stderr, "HTInputSocket: File read error %d\n", status);
467: *len = 0;
468: return NULL;
469: }
470: else {
471: *len = status;
472: return isoc->input_buffer;
473: }
474: }
475: else {
476: char * ret = isoc->input_pointer;
477: *len = isoc->input_limit - isoc->input_pointer;
478: isoc->input_pointer = isoc->input_limit;
479: return ret;
480: }
481: }
482:
483:
1.15 luotonen 484: PRIVATE int fill_in_buffer ARGS1(HTInputSocket *, isoc)
485: {
486: if (isoc) {
487: int status;
488:
489: isoc->input_pointer = isoc->input_buffer;
490: status = NETREAD(isoc->input_file_number,
491: isoc->input_buffer,
492: INPUT_BUFFER_SIZE);
493: if (status <= 0) {
494: isoc->input_limit = isoc->input_buffer;
495: if (status < 0)
496: if (TRACE) fprintf(stderr,
497: "HTInputSocket: File read error %d\n",
498: status);
499: }
500: else
501: isoc->input_limit = isoc->input_buffer + status;
502: return status;
503: }
504: return -1;
505: }
506:
507:
508: PRIVATE void ascii_cat ARGS3(char **, linep,
509: char *, start,
510: char *, end)
511: {
512: if (linep && start && end && start <= end) {
513: char *ptr;
514:
515: if (*linep) {
516: int len = strlen(*linep);
517: *linep = (char*)realloc(*linep, len + end-start + 1);
518: ptr = *linep + len;
519: }
520: else {
521: ptr = *linep = (char*)malloc(end-start + 1);
522: }
523:
524: while (start < end) {
525: *ptr = FROMASCII(*start);
526: ptr++;
527: start++;
528: }
529: *ptr = 0;
530: }
531: }
532:
533:
534: PRIVATE char * get_some_line ARGS2(HTInputSocket *, isoc,
535: BOOL, unfold)
536: {
537: if (!isoc)
538: return NULL;
539: else {
540: BOOL check_unfold = NO;
541: int prev_cr = 0;
542: char *start = isoc->input_pointer;
543: char *cur = isoc->input_pointer;
544: char * line = NULL;
545:
546: for(;;) {
547: /*
548: ** Get more if needed to complete line
549: */
550: if (cur >= isoc->input_limit) { /* Need more data */
551: ascii_cat(&line, start, cur);
552: if (fill_in_buffer(isoc) <= 0)
553: return line;
554: start = cur = isoc->input_pointer;
555: } /* if need more data */
556:
557: /*
558: ** Find a line feed if there is one
559: */
560: for(; cur < isoc->input_limit; cur++) {
561: char c = FROMASCII(*cur);
562: if (!c) {
1.18 luotonen 563: if (line) free(line); /* Leak fixed AL 6 Feb 94 */
1.15 luotonen 564: return NULL; /* Panic! read a 0! */
565: }
566: if (check_unfold && c != ' ' && c != '\t') {
567: return line; /* Note: didn't update isoc->input_pointer */
568: }
569: else {
570: check_unfold = NO;
571: }
572:
573: if (c=='\r') {
574: prev_cr = 1;
575: }
576: else {
577: if (c=='\n') { /* Found a line feed */
578: ascii_cat(&line, start, cur-prev_cr);
579: start = isoc->input_pointer = cur+1;
580:
581: if (line && strlen(line) > 0 && unfold) {
582: check_unfold = YES;
583: }
584: else {
585: return line;
586: }
587: } /* if NL */
588: /* else just a regular character */
589: prev_cr = 0;
590: } /* if not CR */
591: } /* while characters in buffer remain */
592: } /* until line read or end-of-file */
593: } /* valid parameters to function */
594: }
595:
596:
597: PUBLIC char * HTInputSocket_getLine ARGS1(HTInputSocket *, isoc)
598: {
599: return get_some_line(isoc, NO);
600: }
601:
602: PUBLIC char * HTInputSocket_getUnfoldedLine ARGS1(HTInputSocket *, isoc)
603: {
604: return get_some_line(isoc, YES);
605: }
606:
607:
608: /*
609: ** Read HTTP status line (if there is one).
610: **
611: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
612: ** First look at the stub in ASCII and check if it starts "HTTP/".
613: **
614: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
615: ** will be taken as a HTTP 1.0 server. Failure.
616: */
617: #define STUB_LENGTH 20
618: PUBLIC char * HTInputSocket_getStatusLine ARGS1(HTInputSocket *, isoc)
619: {
620: if (!isoc) {
621: return NULL;
622: }
623: else {
624: char buf[STUB_LENGTH + 1];
625: int i;
626: char server_version[STUB_LENGTH+1];
627: int server_status;
628:
629: /*
630: ** Read initial buffer
631: */
632: if (isoc->input_pointer >= isoc->input_limit &&
633: fill_in_buffer(isoc) <= 0) {
634: return NULL;
635: }
636:
637: for (i=0; i < STUB_LENGTH; i++)
638: buf[i] = FROMASCII(isoc->input_buffer[i]);
639: buf[STUB_LENGTH] = 0;
640:
641: if (0 != strncmp(buf, "HTTP/", 5) ||
642: sscanf(buf, "%20s%d", server_version, &server_status) < 2)
643: return NULL;
644: else
645: return get_some_line(isoc, NO);
646: }
647: }
648:
649:
650: /*
651: ** Do heuristic test to see if this is binary.
652: **
653: ** We check for characters above 128 in the first few bytes, and
654: ** if we find them we forget the html default.
655: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
656: **
657: ** Bugs: An HTTP 0.9 server returning a binary document with
658: ** characters < 128 will be read as ASCII.
659: */
660: PUBLIC BOOL HTInputSocket_seemsBinary ARGS1(HTInputSocket *, isoc)
661: {
662: if (isoc &&
663: (isoc->input_pointer < isoc->input_limit ||
664: fill_in_buffer(isoc) > 0)) {
665: char *p = isoc->input_buffer;
666: int i = STUB_LENGTH;
667:
668: for( ; i && p < isoc->input_limit; p++, i++)
669: if (((int)*p)&128)
670: return YES;
671: }
672: return NO;
673: }
674:
675:
676:
1.1 timbl 677: /* Stream the data to an ouput file as binary
678: */
1.13 timbl 679: PUBLIC int HTOutputBinary ARGS3( HTInputSocket *, isoc,
680: int, input,
681: FILE *, output)
1.1 timbl 682: {
683: do {
684: int status = NETREAD(
1.13 timbl 685: input, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 686: if (status <= 0) {
687: if (status == 0) return 0;
688: if (TRACE) fprintf(stderr,
689: "HTFormat: File read error %d\n", status);
690: return 2; /* Error */
691: }
1.13 timbl 692: fwrite(isoc->input_buffer, sizeof(char), status, output);
1.1 timbl 693: } while (YES);
694: }
695:
696:
1.33 luotonen 697: PRIVATE BOOL better_match ARGS2(HTFormat, f,
698: HTFormat, g)
699: {
700: CONST char *p, *q;
701:
702: if (f && g && (p = HTAtom_name(f)) && (q = HTAtom_name(g))) {
703: int i,j;
704: for(i=0 ; *p; p++) if (*p == '*') i++;
705: for(j=0 ; *q; q++) if (*q == '*') j++;
706: if (i < j) return YES;
707: }
708: return NO;
709: }
710:
1.17 luotonen 711:
1.2 timbl 712: /* Create a filter stack
713: ** ---------------------
714: **
1.7 secret 715: ** If a wildcard match is made, a temporary HTPresentation
1.2 timbl 716: ** structure is made to hold the destination format while the
717: ** new stack is generated. This is just to pass the out format to
718: ** MIME so far. Storing the format of a stream in the stream might
719: ** be a lot neater.
1.10 timbl 720: **
1.29 frystyk 721: ** The star/star format is special, in that if you can take
1.10 timbl 722: ** that you can take anything. However, we
1.2 timbl 723: */
1.34 ! luotonen 724: PUBLIC HTStream * HTStreamStack ARGS3(HTFormat, rep_in,
! 725: HTRequest *, request,
! 726: BOOL, guess)
1.2 timbl 727: {
1.12 timbl 728: HTFormat rep_out = request->output_format; /* Could be a param */
1.14 timbl 729: HTList * conversion[2];
730: int which_list;
1.25 frystyk 731: float best_quality = -1e30; /* Pretty bad! */
1.29 frystyk 732: HTPresentation *pres, *match, *best_match=0;
1.14 timbl 733:
1.2 timbl 734: if (TRACE) fprintf(stderr,
735: "HTFormat: Constructing stream stack for %s to %s\n",
1.10 timbl 736: HTAtom_name(rep_in),
1.2 timbl 737: HTAtom_name(rep_out));
1.34 ! luotonen 738:
! 739: if (guess && rep_in == WWW_UNKNOWN) {
! 740: CTRACE(stderr, "Returning... guessing stream\n");
! 741: return HTGuess_new(request);
! 742: }
! 743:
1.21 luotonen 744: if (rep_out == WWW_SOURCE || rep_out == rep_in)
745: return request->output_stream;
1.2 timbl 746:
1.14 timbl 747: conversion[0] = request->conversions;
748: conversion[1] = HTConversions;
1.17 luotonen 749:
1.15 luotonen 750: for(which_list = 0; which_list<2; which_list++) {
751: HTList * cur = conversion[which_list];
752:
753: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
1.25 frystyk 754: if ((pres->rep == rep_in || wild_match(pres->rep, rep_in)) &&
1.33 luotonen 755: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
756: if (!best_match ||
757: better_match(pres->rep, best_match->rep) ||
758: (!better_match(best_match->rep, pres->rep) &&
759: pres->quality > best_quality)) {
1.25 frystyk 760: best_match = pres;
761: best_quality = pres->quality;
1.10 timbl 762: }
763: }
1.33 luotonen 764:
1.29 frystyk 765: #ifdef OLD_CODE
766: /* This case is now included in the best_match loop */
1.25 frystyk 767: /* Special case when input format is 'www/source' */
1.10 timbl 768: if (pres->rep == source) {
1.29 frystyk 769: if (pres->rep_out == rep_out ||
770: wild_match(pres->rep_out, rep_out))
1.10 timbl 771: source_match = pres;
1.2 timbl 772: }
1.29 frystyk 773: #endif
1.2 timbl 774: }
775: }
1.33 luotonen 776:
1.29 frystyk 777: match = best_match ? best_match : NULL;
778: if (match) {
779: if (match->rep == WWW_SOURCE) {
780: if (TRACE) fprintf(stderr,
781: "HTFormat: Don't know how to handle this, so put out %s to %s\n",
782: HTAtom_name(match->rep),
783: HTAtom_name(rep_out));
784: }
785: return (*match->converter)(
1.25 frystyk 786: request, match->command, rep_in, rep_out,
787: request->output_stream);
1.29 frystyk 788: }
1.2 timbl 789: return NULL;
790: }
791:
792:
793: /* Find the cost of a filter stack
794: ** -------------------------------
795: **
796: ** Must return the cost of the same stack which StreamStack would set up.
797: **
798: ** On entry,
799: ** length The size of the data to be converted
800: */
1.12 timbl 801: PUBLIC float HTStackValue ARGS5(
1.14 timbl 802: HTList *, theseConversions,
1.10 timbl 803: HTFormat, rep_in,
1.2 timbl 804: HTFormat, rep_out,
805: float, initial_value,
806: long int, length)
807: {
1.14 timbl 808: int which_list;
809: HTList* conversion[2];
810:
1.2 timbl 811: if (TRACE) fprintf(stderr,
812: "HTFormat: Evaluating stream stack for %s worth %.3f to %s\n",
1.10 timbl 813: HTAtom_name(rep_in), initial_value,
1.2 timbl 814: HTAtom_name(rep_out));
815:
816: if (rep_out == WWW_SOURCE ||
1.10 timbl 817: rep_out == rep_in) return 0.0;
1.2 timbl 818:
1.12 timbl 819: /* if (!HTPresentations) HTFormatInit(); set up the list */
1.2 timbl 820:
1.14 timbl 821: conversion[0] = theseConversions;
822: conversion[1] = HTConversions;
823:
824: for(which_list = 0; which_list<2; which_list++)
825: if (conversion[which_list]) {
1.15 luotonen 826: HTList * cur = conversion[which_list];
1.2 timbl 827: HTPresentation * pres;
1.15 luotonen 828: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
829: if (pres->rep == rep_in &&
1.17 luotonen 830: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
1.2 timbl 831: float value = initial_value * pres->quality;
832: if (HTMaxSecs != 0.0)
1.15 luotonen 833: value = value - (length*pres->secs_per_byte + pres->secs)
1.2 timbl 834: /HTMaxSecs;
835: return value;
836: }
837: }
838: }
839:
840: return -1e30; /* Really bad */
1.17 luotonen 841: }
842:
843:
1.2 timbl 844:
1.1 timbl 845:
1.2 timbl 846: /* Push data from a socket down a stream
847: ** -------------------------------------
1.1 timbl 848: **
1.2 timbl 849: ** This routine is responsible for creating and PRESENTING any
1.1 timbl 850: ** graphic (or other) objects described by the file.
1.2 timbl 851: **
852: ** The file number given is assumed to be a TELNET stream ie containing
853: ** CRLF at the end of lines which need to be stripped to LF for unix
854: ** when the format is textual.
855: **
1.26 luotonen 856: ** RETURNS the number of bytes transferred.
857: **
1.1 timbl 858: */
1.26 luotonen 859: PUBLIC int HTCopy ARGS2(
1.2 timbl 860: int, file_number,
861: HTStream*, sink)
1.1 timbl 862: {
1.2 timbl 863: HTStreamClass targetClass;
1.13 timbl 864: HTInputSocket * isoc;
1.26 luotonen 865: int cnt = 0;
866:
1.5 timbl 867: /* Push the data down the stream
1.2 timbl 868: **
869: */
870: targetClass = *(sink->isa); /* Copy pointers to procedures */
1.13 timbl 871: isoc = HTInputSocket_new(file_number);
1.2 timbl 872:
873: /* Push binary from socket down sink
1.10 timbl 874: **
875: ** This operation could be put into a main event loop
1.2 timbl 876: */
877: for(;;) {
878: int status = NETREAD(
1.13 timbl 879: file_number, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.2 timbl 880: if (status <= 0) {
881: if (status == 0) break;
882: if (TRACE) fprintf(stderr,
1.24 luotonen 883: "HTFormat: Read error, read returns %d with errno=%d\n",
884: status, errno);
1.2 timbl 885: break;
886: }
1.26 luotonen 887:
1.8 timbl 888: #ifdef NOT_ASCII
889: {
890: char * p;
1.13 timbl 891: for(p = isoc->input_buffer; p < isoc->input_buffer+status; p++) {
1.8 timbl 892: *p = FROMASCII(*p);
893: }
894: }
895: #endif
896:
1.13 timbl 897: (*targetClass.put_block)(sink, isoc->input_buffer, status);
1.26 luotonen 898: cnt += status;
1.2 timbl 899: } /* next bufferload */
1.26 luotonen 900:
1.13 timbl 901: HTInputSocket_free(isoc);
1.26 luotonen 902:
903: return cnt;
1.2 timbl 904: }
905:
1.1 timbl 906:
1.7 secret 907:
908: /* Push data from a file pointer down a stream
909: ** -------------------------------------
910: **
911: ** This routine is responsible for creating and PRESENTING any
912: ** graphic (or other) objects described by the file.
913: **
914: **
915: */
916: PUBLIC void HTFileCopy ARGS2(
917: FILE *, fp,
918: HTStream*, sink)
919: {
920: HTStreamClass targetClass;
1.13 timbl 921: char input_buffer[INPUT_BUFFER_SIZE];
1.7 secret 922:
923: /* Push the data down the stream
924: **
925: */
926: targetClass = *(sink->isa); /* Copy pointers to procedures */
927:
928: /* Push binary from socket down sink
929: */
930: for(;;) {
931: int status = fread(
932: input_buffer, 1, INPUT_BUFFER_SIZE, fp);
933: if (status == 0) { /* EOF or error */
934: if (ferror(fp) == 0) break;
935: if (TRACE) fprintf(stderr,
936: "HTFormat: Read error, read returns %d\n", ferror(fp));
937: break;
938: }
939: (*targetClass.put_block)(sink, input_buffer, status);
1.13 timbl 940: } /* next bufferload */
1.7 secret 941: }
942:
943:
944:
945:
1.2 timbl 946: /* Push data from a socket down a stream STRIPPING CR
947: ** --------------------------------------------------
948: **
949: ** This routine is responsible for creating and PRESENTING any
1.8 timbl 950: ** graphic (or other) objects described by the socket.
1.2 timbl 951: **
952: ** The file number given is assumed to be a TELNET stream ie containing
953: ** CRLF at the end of lines which need to be stripped to LF for unix
954: ** when the format is textual.
955: **
1.1 timbl 956: */
1.2 timbl 957: PUBLIC void HTCopyNoCR ARGS2(
958: int, file_number,
959: HTStream*, sink)
960: {
1.13 timbl 961: HTStreamClass targetClass;
962: HTInputSocket * isoc;
1.1 timbl 963:
1.2 timbl 964: /* Push the data, ignoring CRLF, down the stream
965: **
966: */
967: targetClass = *(sink->isa); /* Copy pointers to procedures */
968:
969: /* Push text from telnet socket down sink
970: **
971: ** @@@@@ To push strings could be faster? (especially is we
972: ** cheat and don't ignore CR! :-}
973: */
1.13 timbl 974: isoc = HTInputSocket_new(file_number);
1.2 timbl 975: for(;;) {
976: char character;
1.13 timbl 977: character = HTInputSocket_getCharacter(isoc);
1.2 timbl 978: if (character == (char)EOF) break;
979: (*targetClass.put_character)(sink, character);
980: }
1.13 timbl 981: HTInputSocket_free(isoc);
1.2 timbl 982: }
1.1 timbl 983:
1.2 timbl 984:
1.7 secret 985:
1.2 timbl 986: /* Parse a socket given format and file number
987: **
988: ** This routine is responsible for creating and PRESENTING any
989: ** graphic (or other) objects described by the file.
990: **
991: ** The file number given is assumed to be a TELNET stream ie containing
992: ** CRLF at the end of lines which need to be stripped to LF for unix
993: ** when the format is textual.
994: **
995: */
1.14 timbl 996:
1.12 timbl 997: PUBLIC int HTParseSocket ARGS3(
1.10 timbl 998: HTFormat, rep_in,
1.2 timbl 999: int, file_number,
1.12 timbl 1000: HTRequest *, request)
1.2 timbl 1001: {
1002: HTStream * stream;
1003: HTStreamClass targetClass;
1.1 timbl 1004:
1.34 ! luotonen 1005: stream = HTStreamStack(rep_in, request, YES);
1.29 frystyk 1006:
1.2 timbl 1007: if (!stream) {
1.30 frystyk 1008: char buffer[1024]; /* @@@@@@@@ */
1.2 timbl 1009: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12 timbl 1010: HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.30 frystyk 1011: if (TRACE) fprintf(stderr, "HTFormat(in HTParseSocket): %s\n", buffer);
1.16 luotonen 1012: return HTLoadError(request, 501, buffer);
1.2 timbl 1013: }
1.1 timbl 1014:
1.3 timbl 1015: /* Push the data, ignoring CRLF if necessary, down the stream
1016: **
1.2 timbl 1017: **
1.3 timbl 1018: ** @@ Bug: This decision ought to be made based on "encoding"
1.9 timbl 1019: ** rather than on format. @@@ When we handle encoding.
1.3 timbl 1020: ** The current method smells anyway.
1.2 timbl 1021: */
1022: targetClass = *(stream->isa); /* Copy pointers to procedures */
1.32 luotonen 1023: if (rep_in == WWW_BINARY || rep_in == WWW_UNKNOWN || HTOutputSource
1.26 luotonen 1024: || (request->content_encoding &&
1025: request->content_encoding != HTAtom_for("8bit") &&
1026: request->content_encoding != HTAtom_for("7bit"))
1.10 timbl 1027: || strstr(HTAtom_name(rep_in), "image/")
1028: || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.29 frystyk 1029: HTCopy(file_number, stream);
1.2 timbl 1030: } else { /* ascii text with CRLFs :-( */
1031: HTCopyNoCR(file_number, stream);
1032: }
1.7 secret 1033: (*targetClass.free)(stream);
1034:
1035: return HT_LOADED;
1036: }
1037:
1038:
1039:
1040: /* Parse a file given format and file pointer
1041: **
1042: ** This routine is responsible for creating and PRESENTING any
1043: ** graphic (or other) objects described by the file.
1044: **
1045: ** The file number given is assumed to be a TELNET stream ie containing
1.10 timbl 1046: ** CRLF at the end of lines which need to be stripped to \n for unix
1.7 secret 1047: ** when the format is textual.
1048: **
1049: */
1.12 timbl 1050: PUBLIC int HTParseFile ARGS3(
1.10 timbl 1051: HTFormat, rep_in,
1.7 secret 1052: FILE *, fp,
1.12 timbl 1053: HTRequest *, request)
1.7 secret 1054: {
1055: HTStream * stream;
1056: HTStreamClass targetClass;
1057:
1.34 ! luotonen 1058: stream = HTStreamStack(rep_in, request, YES);
1.7 secret 1059:
1060: if (!stream) {
1.30 frystyk 1061: char buffer[1024]; /* @@@@@@@@ */
1.7 secret 1062: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12 timbl 1063: HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.7 secret 1064: if (TRACE) fprintf(stderr, "HTFormat(in HTParseFile): %s\n", buffer);
1.29 frystyk 1065: return HTLoadError(request, 501, buffer);
1.7 secret 1066: }
1067:
1.9 timbl 1068: /* Push the data down the stream
1.7 secret 1069: **
1070: **
1071: ** @@ Bug: This decision ought to be made based on "encoding"
1.10 timbl 1072: ** rather than on content-type. @@@ When we handle encoding.
1.7 secret 1073: ** The current method smells anyway.
1074: */
1075: targetClass = *(stream->isa); /* Copy pointers to procedures */
1076: HTFileCopy(fp, stream);
1.2 timbl 1077: (*targetClass.free)(stream);
1.1 timbl 1078:
1.2 timbl 1079: return HT_LOADED;
1.1 timbl 1080: }
1.2 timbl 1081:
1.10 timbl 1082:
1083: /* Converter stream: Network Telnet to internal character text
1084: ** -----------------------------------------------------------
1085: **
1086: ** The input is assumed to be in ASCII, with lines delimited
1087: ** by (13,10) pairs, These pairs are converted into (CR,LF)
1088: ** pairs in the local representation. The (CR,LF) sequence
1089: ** when found is changed to a '\n' character, the internal
1090: ** C representation of a new line.
1091: */
1092:
1093:
1.11 timbl 1094: PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char)
1.10 timbl 1095: {
1096: char c = FROMASCII(net_char);
1097: if (me->had_cr) {
1098: if (c==LF) {
1099: me->sink->isa->put_character(me->sink, '\n'); /* Newline */
1100: me->had_cr = NO;
1101: return;
1102: } else {
1103: me->sink->isa->put_character(me->sink, CR); /* leftover */
1104: }
1105: }
1106: me->had_cr = (c==CR);
1107: if (!me->had_cr)
1108: me->sink->isa->put_character(me->sink, c); /* normal */
1109: }
1110:
1.11 timbl 1111: PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s)
1.10 timbl 1112: {
1113: CONST char * p;
1114: for(p=s; *p; p++) NetToText_put_character(me, *p);
1115: }
1116:
1.11 timbl 1117: PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l)
1.10 timbl 1118: {
1119: CONST char * p;
1120: for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
1121: }
1122:
1123: PRIVATE void NetToText_free ARGS1(HTStream *, me)
1124: {
1125: me->sink->isa->free(me->sink); /* Close rest of pipe */
1126: free(me);
1127: }
1128:
1129: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
1130: {
1131: me->sink->isa->abort(me->sink,e); /* Abort rest of pipe */
1132: free(me);
1133: }
1134:
1135: /* The class structure
1136: */
1137: PRIVATE HTStreamClass NetToTextClass = {
1138: "NetToText",
1139: NetToText_free,
1140: NetToText_abort,
1141: NetToText_put_character,
1142: NetToText_put_string,
1143: NetToText_put_block
1144: };
1145:
1146: /* The creation method
1147: */
1148: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
1149: {
1150: HTStream* me = (HTStream*)malloc(sizeof(*me));
1151: if (me == NULL) outofmem(__FILE__, "NetToText");
1152: me->isa = &NetToTextClass;
1153:
1154: me->had_cr = NO;
1155: me->sink = sink;
1156: return me;
1157: }
1.2 timbl 1158:
1159:
Webmaster