Annotation of libwww/Library/src/HTFormat.c, revision 1.36
1.7 secret 1:
1.1 timbl 2: /* Manage different file formats HTFormat.c
3: ** =============================
4: **
5: ** Bugs:
6: ** Not reentrant.
7: **
8: ** Assumes the incoming stream is ASCII, rather than a local file
9: ** format, and so ALWAYS converts from ASCII on non-ASCII machines.
10: ** Therefore, non-ASCII machines can't read local files.
1.2 timbl 11: **
12: */
13:
1.10 timbl 14:
1.2 timbl 15: /* Implements:
1.1 timbl 16: */
1.2 timbl 17: #include "HTFormat.h"
18:
19: PUBLIC float HTMaxSecs = 1e10; /* No effective limit */
20: PUBLIC float HTMaxLength = 1e10; /* No effective limit */
21:
22: #ifdef unix
23: #ifdef NeXT
24: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
25: #else
26: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n"
27: /* Full pathname would be better! */
28: #endif
29: #endif
30:
1.1 timbl 31:
32: #include "HTUtils.h"
33: #include "tcp.h"
34:
35: #include "HTML.h"
1.12 timbl 36: #include "HTMLPDTD.h"
1.1 timbl 37: #include "HText.h"
1.2 timbl 38: #include "HTAlert.h"
39: #include "HTList.h"
40: #include "HTInit.h"
41: /* Streams and structured streams which we use:
42: */
43: #include "HTFWriter.h"
44: #include "HTPlain.h"
45: #include "SGML.h"
46: #include "HTML.h"
47: #include "HTMLGen.h"
1.34 luotonen 48: #include "HTGuess.h"
49:
1.2 timbl 50:
51: PUBLIC BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */
52:
1.10 timbl 53: #ifdef ORIGINAL
1.2 timbl 54: struct _HTStream {
55: CONST HTStreamClass* isa;
56: /* ... */
57: };
1.10 timbl 58: #endif
59:
60: /* this version used by the NetToText stream */
61: struct _HTStream {
62: CONST HTStreamClass * isa;
63: BOOL had_cr;
64: HTStream * sink;
65: };
1.2 timbl 66:
67:
1.17 luotonen 68: /*
69: ** Accept-Encoding and Accept-Language
70: */
71: typedef struct _HTAcceptNode {
72: HTAtom * atom;
73: float quality;
74: } HTAcceptNode;
75:
76:
77:
78:
1.2 timbl 79: /* Presentation methods
80: ** --------------------
81: */
82:
1.14 timbl 83: PUBLIC HTList * HTConversions = NULL;
1.2 timbl 84:
1.31 frystyk 85: /* -------------------------------------------------------------------------
86: This function replaces the code in HTRequest_delete() in order to keep
87: the data structure hidden (it is NOT a joke!)
88: Henrik 14/03-94
89: ------------------------------------------------------------------------- */
90: PUBLIC void HTFormatDelete ARGS1(HTList *, me)
91: {
92: HTList *cur = me;
93: HTPresentation *pres;
94: if (!me)
95: return;
96: while ((pres = (HTPresentation*) HTList_nextObject(cur))) {
97: FREE(pres->command); /* Leak fixed AL 6 Feb 1994 */
98: free(pres);
99: }
100: HTList_delete(me); /* Leak fixed AL 6 Feb 1994 */
101: }
102:
1.2 timbl 103:
104: /* Define a presentation system command for a content-type
105: ** -------------------------------------------------------
106: */
1.12 timbl 107: PUBLIC void HTSetPresentation ARGS6(
108: HTList *, conversions,
109: CONST char *, representation,
110: CONST char *, command,
111: float, quality,
112: float, secs,
113: float, secs_per_byte
1.2 timbl 114: ){
115:
116: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
117: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
118:
119: pres->rep = HTAtom_for(representation);
120: pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */
121: pres->converter = HTSaveAndExecute; /* Fixed for now ... */
122: pres->quality = quality;
123: pres->secs = secs;
124: pres->secs_per_byte = secs_per_byte;
125: pres->rep = HTAtom_for(representation);
126: pres->command = 0;
127: StrAllocCopy(pres->command, command);
128:
1.12 timbl 129: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 130:
1.15 luotonen 131: #ifdef OLD_CODE
132: if (strcmp(representation, "*")==0) {
1.2 timbl 133: if (default_presentation) free(default_presentation);
134: default_presentation = pres;
1.12 timbl 135: } else
136: #endif
137: HTList_addObject(conversions, pres);
1.2 timbl 138: }
139:
140:
141: /* Define a built-in function for a content-type
142: ** ---------------------------------------------
143: */
1.12 timbl 144: PUBLIC void HTSetConversion ARGS7(
145: HTList *, conversions,
146: CONST char *, representation_in,
147: CONST char *, representation_out,
1.6 timbl 148: HTConverter*, converter,
1.12 timbl 149: float, quality,
150: float, secs,
151: float, secs_per_byte
1.2 timbl 152: ){
1.1 timbl 153:
1.2 timbl 154: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
155: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
156:
157: pres->rep = HTAtom_for(representation_in);
158: pres->rep_out = HTAtom_for(representation_out);
159: pres->converter = converter;
160: pres->command = NULL; /* Fixed */
161: pres->quality = quality;
162: pres->secs = secs;
163: pres->secs_per_byte = secs_per_byte;
164: pres->command = 0;
165:
1.12 timbl 166: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 167:
1.12 timbl 168: #ifdef OLD_CODE
1.2 timbl 169: if (strcmp(representation_in, "*")==0) {
170: if (default_presentation) free(default_presentation);
171: default_presentation = pres;
1.12 timbl 172: } else
173: #endif
174: HTList_addObject(conversions, pres);
1.2 timbl 175: }
1.1 timbl 176:
177:
178:
1.17 luotonen 179: PUBLIC void HTAcceptEncoding ARGS3(HTList *, list,
180: char *, enc,
181: float, quality)
182: {
183: HTAcceptNode * node;
184: char * cur;
185:
186: if (!list || !enc || !*enc) return;
187:
188: for(cur=enc; *cur; cur++) *cur=TOLOWER(*cur);
189:
190: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
191: if (!node) outofmem(__FILE__, "HTAcceptEncoding");
192: HTList_addObject(list, (void*)node);
193:
194: node->atom = HTAtom_for(enc);
195: node->quality = quality;
196: }
197:
198:
199: PUBLIC void HTAcceptLanguage ARGS3(HTList *, list,
200: char *, lang,
201: float, quality)
202: {
203: HTAcceptNode * node;
204:
205: if (!list || !lang || !*lang) return;
206:
207: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
208: if (!node) outofmem(__FILE__, "HTAcceptLanguage");
209:
210: HTList_addObject(list, (void*)node);
211: node->atom = HTAtom_for(lang);
212: node->quality = quality;
213: }
214:
215:
216: PRIVATE BOOL wild_match ARGS2(HTAtom *, template,
217: HTAtom *, actual)
218: {
219: char *t, *a, *st, *sa;
220: BOOL match = NO;
221:
1.22 luotonen 222: if (template && actual && (t = HTAtom_name(template))) {
223: if (!strcmp(t, "*"))
224: return YES;
1.17 luotonen 225:
1.22 luotonen 226: if (strchr(t, '*') &&
227: (a = HTAtom_name(actual)) &&
228: (st = strchr(t, '/')) && (sa = strchr(a,'/'))) {
1.17 luotonen 229:
1.22 luotonen 230: *sa = 0;
231: *st = 0;
232:
233: if ((*(st-1)=='*' &&
234: (*(st+1)=='*' || !strcasecomp(st+1, sa+1))) ||
235: (*(st+1)=='*' && !strcasecomp(t,a)))
236: match = YES;
237:
238: *sa = '/';
239: *st = '/';
240: }
241: }
1.23 luotonen 242: return match;
1.17 luotonen 243: }
244:
1.36 ! luotonen 245: /*
! 246: * Added by takada@seraph.ntt.jp (94/04/08)
! 247: */
! 248: PRIVATE BOOL lang_match ARGS2(HTAtom *, template,
! 249: HTAtom *, actual)
! 250: {
! 251: char *t, *a, *st, *sa;
! 252: BOOL match = NO;
! 253:
! 254: if (template && actual &&
! 255: (t = HTAtom_name(template)) && (a = HTAtom_name(actual))) {
! 256: st = strchr(t, '_');
! 257: sa = strchr(a, '_');
! 258: if ((st != NULL) && (sa != NULL)) {
! 259: if (!strcasecomp(t, a))
! 260: match = YES;
! 261: else
! 262: match = NO;
! 263: }
! 264: else {
! 265: if (st != NULL) *st = 0;
! 266: if (sa != NULL) *sa = 0;
! 267: if (!strcasecomp(t, a))
! 268: match = YES;
! 269: else
! 270: match = NO;
! 271: if (st != NULL) *st = '_';
! 272: if (sa != NULL) *sa = '_';
! 273: }
! 274: }
! 275: return match;
! 276: }
! 277: /* end of addition */
! 278:
! 279:
1.17 luotonen 280:
281: PRIVATE float type_value ARGS2(HTAtom *, content_type,
282: HTList *, accepted)
283: {
284: HTList * cur = accepted;
285: HTPresentation * pres;
286: HTPresentation * wild = NULL;
287:
288: if (!content_type || !accepted) return -1;
289:
290: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
291: if (pres->rep == content_type)
292: return pres->quality;
293: else if (wild_match(pres->rep, content_type))
294: wild = pres;
295: }
296: if (wild) return wild->quality;
297: else return -1;
298: }
299:
300:
301: PRIVATE float lang_value ARGS2(HTAtom *, language,
302: HTList *, accepted)
303: {
304: HTList * cur = accepted;
305: HTAcceptNode * node;
306: HTAcceptNode * wild = NULL;
307:
308: if (!language || !accepted || HTList_isEmpty(accepted)) {
309: return 0.1;
310: }
311:
312: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
313: if (node->atom == language) {
314: return node->quality;
315: }
1.36 ! luotonen 316: /*
! 317: * patch by takada@seraph.ntt.jp (94/04/08)
! 318: * the original line was
! 319: * else if (wild_match(node->atom, language)) {
! 320: * and the new line is
! 321: */
! 322: else if (lang_match(node->atom, language)) {
1.17 luotonen 323: wild = node;
324: }
325: }
326:
327: if (wild) {
328: return wild->quality;
329: }
330: else {
331: return 0.1;
332: }
333: }
334:
335:
336: PRIVATE float encoding_value ARGS2(HTAtom *, encoding,
337: HTList *, accepted)
338: {
339: HTList * cur = accepted;
340: HTAcceptNode * node;
341: HTAcceptNode * wild = NULL;
342: char * e;
343:
344: if (!encoding || !accepted || HTList_isEmpty(accepted))
345: return 1;
346:
347: e = HTAtom_name(encoding);
348: if (!strcmp(e, "7bit") || !strcmp(e, "8bit") || !strcmp(e, "binary"))
349: return 1;
350:
351: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
352: if (node->atom == encoding)
353: return node->quality;
354: else if (wild_match(node->atom, encoding))
355: wild = node;
356: }
357: if (wild) return wild->quality;
358: else return 1;
359: }
360:
361:
362: PUBLIC BOOL HTRank ARGS4(HTList *, possibilities,
363: HTList *, accepted_content_types,
364: HTList *, accepted_languages,
365: HTList *, accepted_encodings)
366: {
367: int accepted_cnt = 0;
368: HTList * accepted;
369: HTList * sorted;
370: HTList * cur;
371: HTContentDescription * d;
372:
373: if (!possibilities) return NO;
374:
375: accepted = HTList_new();
376: cur = possibilities;
377: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
378: float tv = type_value(d->content_type, accepted_content_types);
379: float lv = lang_value(d->content_language, accepted_languages);
380: float ev = encoding_value(d->content_encoding, accepted_encodings);
381:
382: if (tv > 0) {
383: d->quality *= tv * lv * ev;
384: HTList_addObject(accepted, d);
385: accepted_cnt++;
386: }
1.18 luotonen 387: else {
388: if (d->filename) free(d->filename);
389: free(d);
390: }
1.17 luotonen 391: }
392:
1.18 luotonen 393: CTRACE(stderr, "Ranking.....\n");
1.17 luotonen 394: CTRACE(stderr,
1.18 luotonen 395: "\nRANK QUALITY CONTENT-TYPE LANGUAGE ENCODING FILE\n");
1.17 luotonen 396:
397: sorted = HTList_new();
398: while (accepted_cnt-- > 0) {
399: HTContentDescription * worst = NULL;
400: cur = accepted;
401: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
402: if (!worst || d->quality < worst->quality)
403: worst = d;
404: }
405: if (worst) {
406: CTRACE(stderr, "%d. %.4f %-20.20s %-8.8s %-10.10s %s\n",
407: accepted_cnt+1,
408: worst->quality,
409: (worst->content_type
410: ? HTAtom_name(worst->content_type) : "-"),
411: (worst->content_language
412: ? HTAtom_name(worst->content_language) :"-"),
413: (worst->content_encoding
414: ? HTAtom_name(worst->content_encoding) :"-"),
415: (worst->filename
416: ? worst->filename :"-"));
417: HTList_removeObject(accepted, (void*)worst);
418: HTList_addObject(sorted, (void*)worst);
419: }
420: }
1.18 luotonen 421: CTRACE(stderr, "\n");
1.17 luotonen 422: HTList_delete(accepted);
423: HTList_delete(possibilities->next);
424: possibilities->next = sorted->next;
425: sorted->next = NULL;
426: HTList_delete(sorted);
427:
428: if (!HTList_isEmpty(possibilities)) return YES;
429: else return NO;
430: }
431:
432:
433:
434:
435:
1.13 timbl 436: /* Socket Input Buffering
437: ** ----------------------
1.1 timbl 438: **
1.13 timbl 439: ** This code is used because one cannot in general open a
440: ** file descriptor for a socket.
441: **
1.1 timbl 442: ** The input file is read using the macro which can read from
1.13 timbl 443: ** a socket or a file, but this should not be used for files
444: ** as fopen() etc is more portable of course.
445: **
1.1 timbl 446: ** The input buffer size, if large will give greater efficiency and
447: ** release the server faster, and if small will save space on PCs etc.
448: */
449:
450:
451: /* Set up the buffering
452: **
453: ** These routines are public because they are in fact needed by
454: ** many parsers, and on PCs and Macs we should not duplicate
455: ** the static buffer area.
456: */
1.13 timbl 457: PUBLIC HTInputSocket * HTInputSocket_new ARGS1 (int,file_number)
1.1 timbl 458: {
1.28 frystyk 459: HTInputSocket *isoc = (HTInputSocket *)calloc(1, sizeof(*isoc));
1.13 timbl 460: if (!isoc) outofmem(__FILE__, "HTInputSocket_new");
461: isoc->input_file_number = file_number;
462: isoc->input_pointer = isoc->input_limit = isoc->input_buffer;
463: return isoc;
1.1 timbl 464: }
465:
1.35 frystyk 466: /* This should return HT_INTERRUPTED if interrupted BUT the connection
467: MUST not be closed */
468: PUBLIC int HTInputSocket_getCharacter ARGS1(HTInputSocket*, isoc)
1.1 timbl 469: {
1.35 frystyk 470: int ch;
1.1 timbl 471: do {
1.13 timbl 472: if (isoc-> input_pointer >= isoc->input_limit) {
1.1 timbl 473: int status = NETREAD(
1.13 timbl 474: isoc->input_file_number,
475: isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 476: if (status <= 0) {
1.35 frystyk 477: if (status == 0) return EOF;
1.1 timbl 478: if (TRACE) fprintf(stderr,
479: "HTFormat: File read error %d\n", status);
1.35 frystyk 480: return EOF; /* -1 is returned by UCX at end of HTTP link */
1.1 timbl 481: }
1.35 frystyk 482: isoc->input_pointer = isoc->input_buffer;
1.13 timbl 483: isoc->input_limit = isoc->input_buffer + status;
1.1 timbl 484: }
1.35 frystyk 485: ch = (int) *isoc->input_pointer++;
486: } while (ch == 13); /* Ignore ASCII carriage return */
1.1 timbl 487:
488: return FROMASCII(ch);
489: }
490:
1.17 luotonen 491: PUBLIC void HTInputSocket_free ARGS1(HTInputSocket *, me)
1.13 timbl 492: {
493: if (me) free(me);
494: }
495:
496:
1.16 luotonen 497: PUBLIC char * HTInputSocket_getBlock ARGS2(HTInputSocket*, isoc,
498: int *, len)
499: {
500: if (isoc->input_pointer >= isoc->input_limit) {
501: int status = NETREAD(isoc->input_file_number,
502: isoc->input_buffer,
503: ((*len < INPUT_BUFFER_SIZE) ?
504: *len : INPUT_BUFFER_SIZE));
505: if (status <= 0) {
506: isoc->input_limit = isoc->input_buffer;
507: if (status < 0)
508: CTRACE(stderr, "HTInputSocket: File read error %d\n", status);
509: *len = 0;
510: return NULL;
511: }
512: else {
513: *len = status;
514: return isoc->input_buffer;
515: }
516: }
517: else {
518: char * ret = isoc->input_pointer;
519: *len = isoc->input_limit - isoc->input_pointer;
520: isoc->input_pointer = isoc->input_limit;
521: return ret;
522: }
523: }
524:
525:
1.15 luotonen 526: PRIVATE int fill_in_buffer ARGS1(HTInputSocket *, isoc)
527: {
528: if (isoc) {
529: int status;
530:
531: isoc->input_pointer = isoc->input_buffer;
532: status = NETREAD(isoc->input_file_number,
533: isoc->input_buffer,
534: INPUT_BUFFER_SIZE);
535: if (status <= 0) {
536: isoc->input_limit = isoc->input_buffer;
537: if (status < 0)
538: if (TRACE) fprintf(stderr,
539: "HTInputSocket: File read error %d\n",
540: status);
541: }
542: else
543: isoc->input_limit = isoc->input_buffer + status;
544: return status;
545: }
546: return -1;
547: }
548:
549:
550: PRIVATE void ascii_cat ARGS3(char **, linep,
551: char *, start,
552: char *, end)
553: {
554: if (linep && start && end && start <= end) {
555: char *ptr;
556:
557: if (*linep) {
558: int len = strlen(*linep);
559: *linep = (char*)realloc(*linep, len + end-start + 1);
560: ptr = *linep + len;
561: }
562: else {
563: ptr = *linep = (char*)malloc(end-start + 1);
564: }
565:
566: while (start < end) {
567: *ptr = FROMASCII(*start);
568: ptr++;
569: start++;
570: }
571: *ptr = 0;
572: }
573: }
574:
575:
576: PRIVATE char * get_some_line ARGS2(HTInputSocket *, isoc,
577: BOOL, unfold)
578: {
579: if (!isoc)
580: return NULL;
581: else {
582: BOOL check_unfold = NO;
583: int prev_cr = 0;
584: char *start = isoc->input_pointer;
585: char *cur = isoc->input_pointer;
586: char * line = NULL;
587:
588: for(;;) {
589: /*
590: ** Get more if needed to complete line
591: */
592: if (cur >= isoc->input_limit) { /* Need more data */
593: ascii_cat(&line, start, cur);
594: if (fill_in_buffer(isoc) <= 0)
595: return line;
596: start = cur = isoc->input_pointer;
597: } /* if need more data */
598:
599: /*
600: ** Find a line feed if there is one
601: */
602: for(; cur < isoc->input_limit; cur++) {
603: char c = FROMASCII(*cur);
604: if (!c) {
1.18 luotonen 605: if (line) free(line); /* Leak fixed AL 6 Feb 94 */
1.15 luotonen 606: return NULL; /* Panic! read a 0! */
607: }
608: if (check_unfold && c != ' ' && c != '\t') {
609: return line; /* Note: didn't update isoc->input_pointer */
610: }
611: else {
612: check_unfold = NO;
613: }
614:
615: if (c=='\r') {
616: prev_cr = 1;
617: }
618: else {
619: if (c=='\n') { /* Found a line feed */
620: ascii_cat(&line, start, cur-prev_cr);
621: start = isoc->input_pointer = cur+1;
622:
623: if (line && strlen(line) > 0 && unfold) {
624: check_unfold = YES;
625: }
626: else {
627: return line;
628: }
629: } /* if NL */
630: /* else just a regular character */
631: prev_cr = 0;
632: } /* if not CR */
633: } /* while characters in buffer remain */
634: } /* until line read or end-of-file */
635: } /* valid parameters to function */
636: }
637:
638:
639: PUBLIC char * HTInputSocket_getLine ARGS1(HTInputSocket *, isoc)
640: {
641: return get_some_line(isoc, NO);
642: }
643:
644: PUBLIC char * HTInputSocket_getUnfoldedLine ARGS1(HTInputSocket *, isoc)
645: {
646: return get_some_line(isoc, YES);
647: }
648:
649:
650: /*
651: ** Read HTTP status line (if there is one).
652: **
653: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
654: ** First look at the stub in ASCII and check if it starts "HTTP/".
655: **
656: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
657: ** will be taken as a HTTP 1.0 server. Failure.
658: */
659: #define STUB_LENGTH 20
660: PUBLIC char * HTInputSocket_getStatusLine ARGS1(HTInputSocket *, isoc)
661: {
662: if (!isoc) {
663: return NULL;
664: }
665: else {
666: char buf[STUB_LENGTH + 1];
667: int i;
668: char server_version[STUB_LENGTH+1];
669: int server_status;
670:
671: /*
672: ** Read initial buffer
673: */
674: if (isoc->input_pointer >= isoc->input_limit &&
675: fill_in_buffer(isoc) <= 0) {
676: return NULL;
677: }
678:
679: for (i=0; i < STUB_LENGTH; i++)
680: buf[i] = FROMASCII(isoc->input_buffer[i]);
681: buf[STUB_LENGTH] = 0;
682:
683: if (0 != strncmp(buf, "HTTP/", 5) ||
684: sscanf(buf, "%20s%d", server_version, &server_status) < 2)
685: return NULL;
686: else
687: return get_some_line(isoc, NO);
688: }
689: }
690:
691:
692: /*
693: ** Do heuristic test to see if this is binary.
694: **
695: ** We check for characters above 128 in the first few bytes, and
696: ** if we find them we forget the html default.
697: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
698: **
699: ** Bugs: An HTTP 0.9 server returning a binary document with
700: ** characters < 128 will be read as ASCII.
701: */
702: PUBLIC BOOL HTInputSocket_seemsBinary ARGS1(HTInputSocket *, isoc)
703: {
704: if (isoc &&
705: (isoc->input_pointer < isoc->input_limit ||
706: fill_in_buffer(isoc) > 0)) {
707: char *p = isoc->input_buffer;
708: int i = STUB_LENGTH;
709:
710: for( ; i && p < isoc->input_limit; p++, i++)
711: if (((int)*p)&128)
712: return YES;
713: }
714: return NO;
715: }
716:
717:
718:
1.1 timbl 719: /* Stream the data to an ouput file as binary
720: */
1.13 timbl 721: PUBLIC int HTOutputBinary ARGS3( HTInputSocket *, isoc,
722: int, input,
723: FILE *, output)
1.1 timbl 724: {
725: do {
726: int status = NETREAD(
1.13 timbl 727: input, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 728: if (status <= 0) {
729: if (status == 0) return 0;
730: if (TRACE) fprintf(stderr,
731: "HTFormat: File read error %d\n", status);
732: return 2; /* Error */
733: }
1.13 timbl 734: fwrite(isoc->input_buffer, sizeof(char), status, output);
1.1 timbl 735: } while (YES);
736: }
737:
738:
1.33 luotonen 739: PRIVATE BOOL better_match ARGS2(HTFormat, f,
740: HTFormat, g)
741: {
742: CONST char *p, *q;
743:
744: if (f && g && (p = HTAtom_name(f)) && (q = HTAtom_name(g))) {
745: int i,j;
746: for(i=0 ; *p; p++) if (*p == '*') i++;
747: for(j=0 ; *q; q++) if (*q == '*') j++;
748: if (i < j) return YES;
749: }
750: return NO;
751: }
752:
1.17 luotonen 753:
1.2 timbl 754: /* Create a filter stack
755: ** ---------------------
756: **
1.7 secret 757: ** If a wildcard match is made, a temporary HTPresentation
1.2 timbl 758: ** structure is made to hold the destination format while the
759: ** new stack is generated. This is just to pass the out format to
760: ** MIME so far. Storing the format of a stream in the stream might
761: ** be a lot neater.
1.10 timbl 762: **
1.29 frystyk 763: ** The star/star format is special, in that if you can take
1.10 timbl 764: ** that you can take anything. However, we
1.2 timbl 765: */
1.34 luotonen 766: PUBLIC HTStream * HTStreamStack ARGS3(HTFormat, rep_in,
767: HTRequest *, request,
768: BOOL, guess)
1.2 timbl 769: {
1.12 timbl 770: HTFormat rep_out = request->output_format; /* Could be a param */
1.14 timbl 771: HTList * conversion[2];
772: int which_list;
1.25 frystyk 773: float best_quality = -1e30; /* Pretty bad! */
1.29 frystyk 774: HTPresentation *pres, *match, *best_match=0;
1.14 timbl 775:
1.2 timbl 776: if (TRACE) fprintf(stderr,
777: "HTFormat: Constructing stream stack for %s to %s\n",
1.10 timbl 778: HTAtom_name(rep_in),
1.2 timbl 779: HTAtom_name(rep_out));
1.34 luotonen 780:
781: if (guess && rep_in == WWW_UNKNOWN) {
782: CTRACE(stderr, "Returning... guessing stream\n");
783: return HTGuess_new(request);
784: }
785:
1.21 luotonen 786: if (rep_out == WWW_SOURCE || rep_out == rep_in)
787: return request->output_stream;
1.2 timbl 788:
1.14 timbl 789: conversion[0] = request->conversions;
790: conversion[1] = HTConversions;
1.17 luotonen 791:
1.15 luotonen 792: for(which_list = 0; which_list<2; which_list++) {
793: HTList * cur = conversion[which_list];
794:
795: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
1.25 frystyk 796: if ((pres->rep == rep_in || wild_match(pres->rep, rep_in)) &&
1.33 luotonen 797: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
798: if (!best_match ||
799: better_match(pres->rep, best_match->rep) ||
800: (!better_match(best_match->rep, pres->rep) &&
801: pres->quality > best_quality)) {
1.25 frystyk 802: best_match = pres;
803: best_quality = pres->quality;
1.10 timbl 804: }
805: }
1.33 luotonen 806:
1.29 frystyk 807: #ifdef OLD_CODE
808: /* This case is now included in the best_match loop */
1.25 frystyk 809: /* Special case when input format is 'www/source' */
1.10 timbl 810: if (pres->rep == source) {
1.29 frystyk 811: if (pres->rep_out == rep_out ||
812: wild_match(pres->rep_out, rep_out))
1.10 timbl 813: source_match = pres;
1.2 timbl 814: }
1.29 frystyk 815: #endif
1.2 timbl 816: }
817: }
1.33 luotonen 818:
1.29 frystyk 819: match = best_match ? best_match : NULL;
820: if (match) {
821: if (match->rep == WWW_SOURCE) {
822: if (TRACE) fprintf(stderr,
823: "HTFormat: Don't know how to handle this, so put out %s to %s\n",
824: HTAtom_name(match->rep),
825: HTAtom_name(rep_out));
826: }
827: return (*match->converter)(
1.25 frystyk 828: request, match->command, rep_in, rep_out,
829: request->output_stream);
1.29 frystyk 830: }
1.2 timbl 831: return NULL;
832: }
833:
834:
835: /* Find the cost of a filter stack
836: ** -------------------------------
837: **
838: ** Must return the cost of the same stack which StreamStack would set up.
839: **
840: ** On entry,
841: ** length The size of the data to be converted
842: */
1.12 timbl 843: PUBLIC float HTStackValue ARGS5(
1.14 timbl 844: HTList *, theseConversions,
1.10 timbl 845: HTFormat, rep_in,
1.2 timbl 846: HTFormat, rep_out,
847: float, initial_value,
848: long int, length)
849: {
1.14 timbl 850: int which_list;
851: HTList* conversion[2];
852:
1.2 timbl 853: if (TRACE) fprintf(stderr,
854: "HTFormat: Evaluating stream stack for %s worth %.3f to %s\n",
1.10 timbl 855: HTAtom_name(rep_in), initial_value,
1.2 timbl 856: HTAtom_name(rep_out));
857:
858: if (rep_out == WWW_SOURCE ||
1.10 timbl 859: rep_out == rep_in) return 0.0;
1.2 timbl 860:
1.12 timbl 861: /* if (!HTPresentations) HTFormatInit(); set up the list */
1.2 timbl 862:
1.14 timbl 863: conversion[0] = theseConversions;
864: conversion[1] = HTConversions;
865:
866: for(which_list = 0; which_list<2; which_list++)
867: if (conversion[which_list]) {
1.15 luotonen 868: HTList * cur = conversion[which_list];
1.2 timbl 869: HTPresentation * pres;
1.15 luotonen 870: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
871: if (pres->rep == rep_in &&
1.17 luotonen 872: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
1.2 timbl 873: float value = initial_value * pres->quality;
874: if (HTMaxSecs != 0.0)
1.15 luotonen 875: value = value - (length*pres->secs_per_byte + pres->secs)
1.2 timbl 876: /HTMaxSecs;
877: return value;
878: }
879: }
880: }
881:
882: return -1e30; /* Really bad */
1.17 luotonen 883: }
884:
885:
1.2 timbl 886:
1.1 timbl 887:
1.2 timbl 888: /* Push data from a socket down a stream
889: ** -------------------------------------
1.1 timbl 890: **
1.2 timbl 891: ** This routine is responsible for creating and PRESENTING any
1.1 timbl 892: ** graphic (or other) objects described by the file.
1.2 timbl 893: **
894: ** The file number given is assumed to be a TELNET stream ie containing
895: ** CRLF at the end of lines which need to be stripped to LF for unix
896: ** when the format is textual.
897: **
1.26 luotonen 898: ** RETURNS the number of bytes transferred.
899: **
1.1 timbl 900: */
1.26 luotonen 901: PUBLIC int HTCopy ARGS2(
1.2 timbl 902: int, file_number,
903: HTStream*, sink)
1.1 timbl 904: {
1.2 timbl 905: HTStreamClass targetClass;
1.13 timbl 906: HTInputSocket * isoc;
1.26 luotonen 907: int cnt = 0;
908:
1.5 timbl 909: /* Push the data down the stream
1.2 timbl 910: **
911: */
912: targetClass = *(sink->isa); /* Copy pointers to procedures */
1.13 timbl 913: isoc = HTInputSocket_new(file_number);
1.2 timbl 914:
915: /* Push binary from socket down sink
1.10 timbl 916: **
917: ** This operation could be put into a main event loop
1.2 timbl 918: */
919: for(;;) {
920: int status = NETREAD(
1.13 timbl 921: file_number, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.2 timbl 922: if (status <= 0) {
923: if (status == 0) break;
924: if (TRACE) fprintf(stderr,
1.24 luotonen 925: "HTFormat: Read error, read returns %d with errno=%d\n",
926: status, errno);
1.2 timbl 927: break;
928: }
1.26 luotonen 929:
1.8 timbl 930: #ifdef NOT_ASCII
931: {
932: char * p;
1.13 timbl 933: for(p = isoc->input_buffer; p < isoc->input_buffer+status; p++) {
1.8 timbl 934: *p = FROMASCII(*p);
935: }
936: }
937: #endif
938:
1.13 timbl 939: (*targetClass.put_block)(sink, isoc->input_buffer, status);
1.26 luotonen 940: cnt += status;
1.2 timbl 941: } /* next bufferload */
1.26 luotonen 942:
1.13 timbl 943: HTInputSocket_free(isoc);
1.26 luotonen 944:
945: return cnt;
1.2 timbl 946: }
947:
1.1 timbl 948:
1.7 secret 949:
950: /* Push data from a file pointer down a stream
951: ** -------------------------------------
952: **
953: ** This routine is responsible for creating and PRESENTING any
954: ** graphic (or other) objects described by the file.
955: **
956: **
957: */
958: PUBLIC void HTFileCopy ARGS2(
959: FILE *, fp,
960: HTStream*, sink)
961: {
962: HTStreamClass targetClass;
1.13 timbl 963: char input_buffer[INPUT_BUFFER_SIZE];
1.7 secret 964:
965: /* Push the data down the stream
966: **
967: */
968: targetClass = *(sink->isa); /* Copy pointers to procedures */
969:
970: /* Push binary from socket down sink
971: */
972: for(;;) {
973: int status = fread(
974: input_buffer, 1, INPUT_BUFFER_SIZE, fp);
975: if (status == 0) { /* EOF or error */
976: if (ferror(fp) == 0) break;
977: if (TRACE) fprintf(stderr,
978: "HTFormat: Read error, read returns %d\n", ferror(fp));
979: break;
980: }
981: (*targetClass.put_block)(sink, input_buffer, status);
1.13 timbl 982: } /* next bufferload */
1.7 secret 983: }
984:
985:
986:
987:
1.2 timbl 988: /* Push data from a socket down a stream STRIPPING CR
989: ** --------------------------------------------------
990: **
991: ** This routine is responsible for creating and PRESENTING any
1.8 timbl 992: ** graphic (or other) objects described by the socket.
1.2 timbl 993: **
994: ** The file number given is assumed to be a TELNET stream ie containing
995: ** CRLF at the end of lines which need to be stripped to LF for unix
996: ** when the format is textual.
997: **
1.1 timbl 998: */
1.2 timbl 999: PUBLIC void HTCopyNoCR ARGS2(
1000: int, file_number,
1001: HTStream*, sink)
1002: {
1.13 timbl 1003: HTStreamClass targetClass;
1004: HTInputSocket * isoc;
1.1 timbl 1005:
1.2 timbl 1006: /* Push the data, ignoring CRLF, down the stream
1007: **
1008: */
1009: targetClass = *(sink->isa); /* Copy pointers to procedures */
1010:
1011: /* Push text from telnet socket down sink
1012: **
1013: ** @@@@@ To push strings could be faster? (especially is we
1014: ** cheat and don't ignore CR! :-}
1015: */
1.13 timbl 1016: isoc = HTInputSocket_new(file_number);
1.2 timbl 1017: for(;;) {
1018: char character;
1.13 timbl 1019: character = HTInputSocket_getCharacter(isoc);
1.2 timbl 1020: if (character == (char)EOF) break;
1021: (*targetClass.put_character)(sink, character);
1022: }
1.13 timbl 1023: HTInputSocket_free(isoc);
1.2 timbl 1024: }
1.1 timbl 1025:
1.2 timbl 1026:
1.7 secret 1027:
1.2 timbl 1028: /* Parse a socket given format and file number
1029: **
1030: ** This routine is responsible for creating and PRESENTING any
1031: ** graphic (or other) objects described by the file.
1032: **
1033: ** The file number given is assumed to be a TELNET stream ie containing
1034: ** CRLF at the end of lines which need to be stripped to LF for unix
1035: ** when the format is textual.
1036: **
1037: */
1.14 timbl 1038:
1.12 timbl 1039: PUBLIC int HTParseSocket ARGS3(
1.10 timbl 1040: HTFormat, rep_in,
1.2 timbl 1041: int, file_number,
1.12 timbl 1042: HTRequest *, request)
1.2 timbl 1043: {
1044: HTStream * stream;
1045: HTStreamClass targetClass;
1.1 timbl 1046:
1.34 luotonen 1047: stream = HTStreamStack(rep_in, request, YES);
1.29 frystyk 1048:
1.2 timbl 1049: if (!stream) {
1.30 frystyk 1050: char buffer[1024]; /* @@@@@@@@ */
1.2 timbl 1051: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12 timbl 1052: HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.30 frystyk 1053: if (TRACE) fprintf(stderr, "HTFormat(in HTParseSocket): %s\n", buffer);
1.16 luotonen 1054: return HTLoadError(request, 501, buffer);
1.2 timbl 1055: }
1.1 timbl 1056:
1.3 timbl 1057: /* Push the data, ignoring CRLF if necessary, down the stream
1058: **
1.2 timbl 1059: **
1.3 timbl 1060: ** @@ Bug: This decision ought to be made based on "encoding"
1.9 timbl 1061: ** rather than on format. @@@ When we handle encoding.
1.3 timbl 1062: ** The current method smells anyway.
1.2 timbl 1063: */
1064: targetClass = *(stream->isa); /* Copy pointers to procedures */
1.32 luotonen 1065: if (rep_in == WWW_BINARY || rep_in == WWW_UNKNOWN || HTOutputSource
1.26 luotonen 1066: || (request->content_encoding &&
1067: request->content_encoding != HTAtom_for("8bit") &&
1068: request->content_encoding != HTAtom_for("7bit"))
1.10 timbl 1069: || strstr(HTAtom_name(rep_in), "image/")
1070: || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.29 frystyk 1071: HTCopy(file_number, stream);
1.2 timbl 1072: } else { /* ascii text with CRLFs :-( */
1073: HTCopyNoCR(file_number, stream);
1074: }
1.7 secret 1075: (*targetClass.free)(stream);
1076:
1077: return HT_LOADED;
1078: }
1079:
1080:
1081:
1082: /* Parse a file given format and file pointer
1083: **
1084: ** This routine is responsible for creating and PRESENTING any
1085: ** graphic (or other) objects described by the file.
1086: **
1087: ** The file number given is assumed to be a TELNET stream ie containing
1.10 timbl 1088: ** CRLF at the end of lines which need to be stripped to \n for unix
1.7 secret 1089: ** when the format is textual.
1090: **
1091: */
1.12 timbl 1092: PUBLIC int HTParseFile ARGS3(
1.10 timbl 1093: HTFormat, rep_in,
1.7 secret 1094: FILE *, fp,
1.12 timbl 1095: HTRequest *, request)
1.7 secret 1096: {
1097: HTStream * stream;
1098: HTStreamClass targetClass;
1099:
1.34 luotonen 1100: stream = HTStreamStack(rep_in, request, YES);
1.7 secret 1101:
1102: if (!stream) {
1.30 frystyk 1103: char buffer[1024]; /* @@@@@@@@ */
1.7 secret 1104: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12 timbl 1105: HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.7 secret 1106: if (TRACE) fprintf(stderr, "HTFormat(in HTParseFile): %s\n", buffer);
1.29 frystyk 1107: return HTLoadError(request, 501, buffer);
1.7 secret 1108: }
1109:
1.9 timbl 1110: /* Push the data down the stream
1.7 secret 1111: **
1112: **
1113: ** @@ Bug: This decision ought to be made based on "encoding"
1.10 timbl 1114: ** rather than on content-type. @@@ When we handle encoding.
1.7 secret 1115: ** The current method smells anyway.
1116: */
1117: targetClass = *(stream->isa); /* Copy pointers to procedures */
1118: HTFileCopy(fp, stream);
1.2 timbl 1119: (*targetClass.free)(stream);
1.1 timbl 1120:
1.2 timbl 1121: return HT_LOADED;
1.1 timbl 1122: }
1.2 timbl 1123:
1.10 timbl 1124:
1125: /* Converter stream: Network Telnet to internal character text
1126: ** -----------------------------------------------------------
1127: **
1128: ** The input is assumed to be in ASCII, with lines delimited
1129: ** by (13,10) pairs, These pairs are converted into (CR,LF)
1130: ** pairs in the local representation. The (CR,LF) sequence
1131: ** when found is changed to a '\n' character, the internal
1132: ** C representation of a new line.
1133: */
1134:
1135:
1.11 timbl 1136: PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char)
1.10 timbl 1137: {
1138: char c = FROMASCII(net_char);
1139: if (me->had_cr) {
1140: if (c==LF) {
1141: me->sink->isa->put_character(me->sink, '\n'); /* Newline */
1142: me->had_cr = NO;
1143: return;
1144: } else {
1145: me->sink->isa->put_character(me->sink, CR); /* leftover */
1146: }
1147: }
1148: me->had_cr = (c==CR);
1149: if (!me->had_cr)
1150: me->sink->isa->put_character(me->sink, c); /* normal */
1151: }
1152:
1.11 timbl 1153: PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s)
1.10 timbl 1154: {
1155: CONST char * p;
1156: for(p=s; *p; p++) NetToText_put_character(me, *p);
1157: }
1158:
1.11 timbl 1159: PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l)
1.10 timbl 1160: {
1161: CONST char * p;
1162: for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
1163: }
1164:
1165: PRIVATE void NetToText_free ARGS1(HTStream *, me)
1166: {
1167: me->sink->isa->free(me->sink); /* Close rest of pipe */
1168: free(me);
1169: }
1170:
1171: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
1172: {
1173: me->sink->isa->abort(me->sink,e); /* Abort rest of pipe */
1174: free(me);
1175: }
1176:
1177: /* The class structure
1178: */
1179: PRIVATE HTStreamClass NetToTextClass = {
1180: "NetToText",
1181: NetToText_free,
1182: NetToText_abort,
1183: NetToText_put_character,
1184: NetToText_put_string,
1185: NetToText_put_block
1186: };
1187:
1188: /* The creation method
1189: */
1190: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
1191: {
1192: HTStream* me = (HTStream*)malloc(sizeof(*me));
1193: if (me == NULL) outofmem(__FILE__, "NetToText");
1194: me->isa = &NetToTextClass;
1195:
1196: me->had_cr = NO;
1197: me->sink = sink;
1198: return me;
1199: }
1.2 timbl 1200:
1201:
Webmaster