Annotation of libwww/Library/src/HTFormat.c, revision 1.35
1.7 secret 1:
1.1 timbl 2: /* Manage different file formats HTFormat.c
3: ** =============================
4: **
5: ** Bugs:
6: ** Not reentrant.
7: **
8: ** Assumes the incoming stream is ASCII, rather than a local file
9: ** format, and so ALWAYS converts from ASCII on non-ASCII machines.
10: ** Therefore, non-ASCII machines can't read local files.
1.2 timbl 11: **
12: */
13:
1.10 timbl 14:
1.2 timbl 15: /* Implements:
1.1 timbl 16: */
1.2 timbl 17: #include "HTFormat.h"
18:
19: PUBLIC float HTMaxSecs = 1e10; /* No effective limit */
20: PUBLIC float HTMaxLength = 1e10; /* No effective limit */
21:
22: #ifdef unix
23: #ifdef NeXT
24: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
25: #else
26: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n"
27: /* Full pathname would be better! */
28: #endif
29: #endif
30:
1.1 timbl 31:
32: #include "HTUtils.h"
33: #include "tcp.h"
34:
35: #include "HTML.h"
1.12 timbl 36: #include "HTMLPDTD.h"
1.1 timbl 37: #include "HText.h"
1.2 timbl 38: #include "HTAlert.h"
39: #include "HTList.h"
40: #include "HTInit.h"
41: /* Streams and structured streams which we use:
42: */
43: #include "HTFWriter.h"
44: #include "HTPlain.h"
45: #include "SGML.h"
46: #include "HTML.h"
47: #include "HTMLGen.h"
1.34 luotonen 48: #include "HTGuess.h"
49:
1.2 timbl 50:
51: PUBLIC BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */
52:
1.10 timbl 53: #ifdef ORIGINAL
1.2 timbl 54: struct _HTStream {
55: CONST HTStreamClass* isa;
56: /* ... */
57: };
1.10 timbl 58: #endif
59:
60: /* this version used by the NetToText stream */
61: struct _HTStream {
62: CONST HTStreamClass * isa;
63: BOOL had_cr;
64: HTStream * sink;
65: };
1.2 timbl 66:
67:
1.17 luotonen 68: /*
69: ** Accept-Encoding and Accept-Language
70: */
71: typedef struct _HTAcceptNode {
72: HTAtom * atom;
73: float quality;
74: } HTAcceptNode;
75:
76:
77:
78:
1.2 timbl 79: /* Presentation methods
80: ** --------------------
81: */
82:
1.14 timbl 83: PUBLIC HTList * HTConversions = NULL;
1.2 timbl 84:
1.31 frystyk 85: /* -------------------------------------------------------------------------
86: This function replaces the code in HTRequest_delete() in order to keep
87: the data structure hidden (it is NOT a joke!)
88: Henrik 14/03-94
89: ------------------------------------------------------------------------- */
90: PUBLIC void HTFormatDelete ARGS1(HTList *, me)
91: {
92: HTList *cur = me;
93: HTPresentation *pres;
94: if (!me)
95: return;
96: while ((pres = (HTPresentation*) HTList_nextObject(cur))) {
97: FREE(pres->command); /* Leak fixed AL 6 Feb 1994 */
98: free(pres);
99: }
100: HTList_delete(me); /* Leak fixed AL 6 Feb 1994 */
101: }
102:
1.2 timbl 103:
104: /* Define a presentation system command for a content-type
105: ** -------------------------------------------------------
106: */
1.12 timbl 107: PUBLIC void HTSetPresentation ARGS6(
108: HTList *, conversions,
109: CONST char *, representation,
110: CONST char *, command,
111: float, quality,
112: float, secs,
113: float, secs_per_byte
1.2 timbl 114: ){
115:
116: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
117: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
118:
119: pres->rep = HTAtom_for(representation);
120: pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */
121: pres->converter = HTSaveAndExecute; /* Fixed for now ... */
122: pres->quality = quality;
123: pres->secs = secs;
124: pres->secs_per_byte = secs_per_byte;
125: pres->rep = HTAtom_for(representation);
126: pres->command = 0;
127: StrAllocCopy(pres->command, command);
128:
1.12 timbl 129: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 130:
1.15 luotonen 131: #ifdef OLD_CODE
132: if (strcmp(representation, "*")==0) {
1.2 timbl 133: if (default_presentation) free(default_presentation);
134: default_presentation = pres;
1.12 timbl 135: } else
136: #endif
137: HTList_addObject(conversions, pres);
1.2 timbl 138: }
139:
140:
141: /* Define a built-in function for a content-type
142: ** ---------------------------------------------
143: */
1.12 timbl 144: PUBLIC void HTSetConversion ARGS7(
145: HTList *, conversions,
146: CONST char *, representation_in,
147: CONST char *, representation_out,
1.6 timbl 148: HTConverter*, converter,
1.12 timbl 149: float, quality,
150: float, secs,
151: float, secs_per_byte
1.2 timbl 152: ){
1.1 timbl 153:
1.2 timbl 154: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
155: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
156:
157: pres->rep = HTAtom_for(representation_in);
158: pres->rep_out = HTAtom_for(representation_out);
159: pres->converter = converter;
160: pres->command = NULL; /* Fixed */
161: pres->quality = quality;
162: pres->secs = secs;
163: pres->secs_per_byte = secs_per_byte;
164: pres->command = 0;
165:
1.12 timbl 166: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 167:
1.12 timbl 168: #ifdef OLD_CODE
1.2 timbl 169: if (strcmp(representation_in, "*")==0) {
170: if (default_presentation) free(default_presentation);
171: default_presentation = pres;
1.12 timbl 172: } else
173: #endif
174: HTList_addObject(conversions, pres);
1.2 timbl 175: }
1.1 timbl 176:
177:
178:
1.17 luotonen 179: PUBLIC void HTAcceptEncoding ARGS3(HTList *, list,
180: char *, enc,
181: float, quality)
182: {
183: HTAcceptNode * node;
184: char * cur;
185:
186: if (!list || !enc || !*enc) return;
187:
188: for(cur=enc; *cur; cur++) *cur=TOLOWER(*cur);
189:
190: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
191: if (!node) outofmem(__FILE__, "HTAcceptEncoding");
192: HTList_addObject(list, (void*)node);
193:
194: node->atom = HTAtom_for(enc);
195: node->quality = quality;
196: }
197:
198:
199: PUBLIC void HTAcceptLanguage ARGS3(HTList *, list,
200: char *, lang,
201: float, quality)
202: {
203: HTAcceptNode * node;
204:
205: if (!list || !lang || !*lang) return;
206:
207: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
208: if (!node) outofmem(__FILE__, "HTAcceptLanguage");
209:
210: HTList_addObject(list, (void*)node);
211: node->atom = HTAtom_for(lang);
212: node->quality = quality;
213: }
214:
215:
216: PRIVATE BOOL wild_match ARGS2(HTAtom *, template,
217: HTAtom *, actual)
218: {
219: char *t, *a, *st, *sa;
220: BOOL match = NO;
221:
1.22 luotonen 222: if (template && actual && (t = HTAtom_name(template))) {
223: if (!strcmp(t, "*"))
224: return YES;
1.17 luotonen 225:
1.22 luotonen 226: if (strchr(t, '*') &&
227: (a = HTAtom_name(actual)) &&
228: (st = strchr(t, '/')) && (sa = strchr(a,'/'))) {
1.17 luotonen 229:
1.22 luotonen 230: *sa = 0;
231: *st = 0;
232:
233: if ((*(st-1)=='*' &&
234: (*(st+1)=='*' || !strcasecomp(st+1, sa+1))) ||
235: (*(st+1)=='*' && !strcasecomp(t,a)))
236: match = YES;
237:
238: *sa = '/';
239: *st = '/';
240: }
241: }
1.23 luotonen 242: return match;
1.17 luotonen 243: }
244:
245:
246: PRIVATE float type_value ARGS2(HTAtom *, content_type,
247: HTList *, accepted)
248: {
249: HTList * cur = accepted;
250: HTPresentation * pres;
251: HTPresentation * wild = NULL;
252:
253: if (!content_type || !accepted) return -1;
254:
255: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
256: if (pres->rep == content_type)
257: return pres->quality;
258: else if (wild_match(pres->rep, content_type))
259: wild = pres;
260: }
261: if (wild) return wild->quality;
262: else return -1;
263: }
264:
265:
266: PRIVATE float lang_value ARGS2(HTAtom *, language,
267: HTList *, accepted)
268: {
269: HTList * cur = accepted;
270: HTAcceptNode * node;
271: HTAcceptNode * wild = NULL;
272:
273: if (!language || !accepted || HTList_isEmpty(accepted)) {
274: return 0.1;
275: }
276:
277: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
278: if (node->atom == language) {
279: return node->quality;
280: }
281: else if (wild_match(node->atom, language)) {
282: wild = node;
283: }
284: }
285:
286: if (wild) {
287: return wild->quality;
288: }
289: else {
290: return 0.1;
291: }
292: }
293:
294:
295: PRIVATE float encoding_value ARGS2(HTAtom *, encoding,
296: HTList *, accepted)
297: {
298: HTList * cur = accepted;
299: HTAcceptNode * node;
300: HTAcceptNode * wild = NULL;
301: char * e;
302:
303: if (!encoding || !accepted || HTList_isEmpty(accepted))
304: return 1;
305:
306: e = HTAtom_name(encoding);
307: if (!strcmp(e, "7bit") || !strcmp(e, "8bit") || !strcmp(e, "binary"))
308: return 1;
309:
310: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
311: if (node->atom == encoding)
312: return node->quality;
313: else if (wild_match(node->atom, encoding))
314: wild = node;
315: }
316: if (wild) return wild->quality;
317: else return 1;
318: }
319:
320:
321: PUBLIC BOOL HTRank ARGS4(HTList *, possibilities,
322: HTList *, accepted_content_types,
323: HTList *, accepted_languages,
324: HTList *, accepted_encodings)
325: {
326: int accepted_cnt = 0;
327: HTList * accepted;
328: HTList * sorted;
329: HTList * cur;
330: HTContentDescription * d;
331:
332: if (!possibilities) return NO;
333:
334: accepted = HTList_new();
335: cur = possibilities;
336: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
337: float tv = type_value(d->content_type, accepted_content_types);
338: float lv = lang_value(d->content_language, accepted_languages);
339: float ev = encoding_value(d->content_encoding, accepted_encodings);
340:
341: if (tv > 0) {
342: d->quality *= tv * lv * ev;
343: HTList_addObject(accepted, d);
344: accepted_cnt++;
345: }
1.18 luotonen 346: else {
347: if (d->filename) free(d->filename);
348: free(d);
349: }
1.17 luotonen 350: }
351:
1.18 luotonen 352: CTRACE(stderr, "Ranking.....\n");
1.17 luotonen 353: CTRACE(stderr,
1.18 luotonen 354: "\nRANK QUALITY CONTENT-TYPE LANGUAGE ENCODING FILE\n");
1.17 luotonen 355:
356: sorted = HTList_new();
357: while (accepted_cnt-- > 0) {
358: HTContentDescription * worst = NULL;
359: cur = accepted;
360: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
361: if (!worst || d->quality < worst->quality)
362: worst = d;
363: }
364: if (worst) {
365: CTRACE(stderr, "%d. %.4f %-20.20s %-8.8s %-10.10s %s\n",
366: accepted_cnt+1,
367: worst->quality,
368: (worst->content_type
369: ? HTAtom_name(worst->content_type) : "-"),
370: (worst->content_language
371: ? HTAtom_name(worst->content_language) :"-"),
372: (worst->content_encoding
373: ? HTAtom_name(worst->content_encoding) :"-"),
374: (worst->filename
375: ? worst->filename :"-"));
376: HTList_removeObject(accepted, (void*)worst);
377: HTList_addObject(sorted, (void*)worst);
378: }
379: }
1.18 luotonen 380: CTRACE(stderr, "\n");
1.17 luotonen 381: HTList_delete(accepted);
382: HTList_delete(possibilities->next);
383: possibilities->next = sorted->next;
384: sorted->next = NULL;
385: HTList_delete(sorted);
386:
387: if (!HTList_isEmpty(possibilities)) return YES;
388: else return NO;
389: }
390:
391:
392:
393:
394:
1.13 timbl 395: /* Socket Input Buffering
396: ** ----------------------
1.1 timbl 397: **
1.13 timbl 398: ** This code is used because one cannot in general open a
399: ** file descriptor for a socket.
400: **
1.1 timbl 401: ** The input file is read using the macro which can read from
1.13 timbl 402: ** a socket or a file, but this should not be used for files
403: ** as fopen() etc is more portable of course.
404: **
1.1 timbl 405: ** The input buffer size, if large will give greater efficiency and
406: ** release the server faster, and if small will save space on PCs etc.
407: */
408:
409:
410: /* Set up the buffering
411: **
412: ** These routines are public because they are in fact needed by
413: ** many parsers, and on PCs and Macs we should not duplicate
414: ** the static buffer area.
415: */
1.13 timbl 416: PUBLIC HTInputSocket * HTInputSocket_new ARGS1 (int,file_number)
1.1 timbl 417: {
1.28 frystyk 418: HTInputSocket *isoc = (HTInputSocket *)calloc(1, sizeof(*isoc));
1.13 timbl 419: if (!isoc) outofmem(__FILE__, "HTInputSocket_new");
420: isoc->input_file_number = file_number;
421: isoc->input_pointer = isoc->input_limit = isoc->input_buffer;
422: return isoc;
1.1 timbl 423: }
424:
1.35 ! frystyk 425: /* This should return HT_INTERRUPTED if interrupted BUT the connection
! 426: MUST not be closed */
! 427: PUBLIC int HTInputSocket_getCharacter ARGS1(HTInputSocket*, isoc)
1.1 timbl 428: {
1.35 ! frystyk 429: int ch;
1.1 timbl 430: do {
1.13 timbl 431: if (isoc-> input_pointer >= isoc->input_limit) {
1.1 timbl 432: int status = NETREAD(
1.13 timbl 433: isoc->input_file_number,
434: isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 435: if (status <= 0) {
1.35 ! frystyk 436: if (status == 0) return EOF;
1.1 timbl 437: if (TRACE) fprintf(stderr,
438: "HTFormat: File read error %d\n", status);
1.35 ! frystyk 439: return EOF; /* -1 is returned by UCX at end of HTTP link */
1.1 timbl 440: }
1.35 ! frystyk 441: isoc->input_pointer = isoc->input_buffer;
1.13 timbl 442: isoc->input_limit = isoc->input_buffer + status;
1.1 timbl 443: }
1.35 ! frystyk 444: ch = (int) *isoc->input_pointer++;
! 445: } while (ch == 13); /* Ignore ASCII carriage return */
1.1 timbl 446:
447: return FROMASCII(ch);
448: }
449:
1.17 luotonen 450: PUBLIC void HTInputSocket_free ARGS1(HTInputSocket *, me)
1.13 timbl 451: {
452: if (me) free(me);
453: }
454:
455:
1.16 luotonen 456: PUBLIC char * HTInputSocket_getBlock ARGS2(HTInputSocket*, isoc,
457: int *, len)
458: {
459: if (isoc->input_pointer >= isoc->input_limit) {
460: int status = NETREAD(isoc->input_file_number,
461: isoc->input_buffer,
462: ((*len < INPUT_BUFFER_SIZE) ?
463: *len : INPUT_BUFFER_SIZE));
464: if (status <= 0) {
465: isoc->input_limit = isoc->input_buffer;
466: if (status < 0)
467: CTRACE(stderr, "HTInputSocket: File read error %d\n", status);
468: *len = 0;
469: return NULL;
470: }
471: else {
472: *len = status;
473: return isoc->input_buffer;
474: }
475: }
476: else {
477: char * ret = isoc->input_pointer;
478: *len = isoc->input_limit - isoc->input_pointer;
479: isoc->input_pointer = isoc->input_limit;
480: return ret;
481: }
482: }
483:
484:
1.15 luotonen 485: PRIVATE int fill_in_buffer ARGS1(HTInputSocket *, isoc)
486: {
487: if (isoc) {
488: int status;
489:
490: isoc->input_pointer = isoc->input_buffer;
491: status = NETREAD(isoc->input_file_number,
492: isoc->input_buffer,
493: INPUT_BUFFER_SIZE);
494: if (status <= 0) {
495: isoc->input_limit = isoc->input_buffer;
496: if (status < 0)
497: if (TRACE) fprintf(stderr,
498: "HTInputSocket: File read error %d\n",
499: status);
500: }
501: else
502: isoc->input_limit = isoc->input_buffer + status;
503: return status;
504: }
505: return -1;
506: }
507:
508:
509: PRIVATE void ascii_cat ARGS3(char **, linep,
510: char *, start,
511: char *, end)
512: {
513: if (linep && start && end && start <= end) {
514: char *ptr;
515:
516: if (*linep) {
517: int len = strlen(*linep);
518: *linep = (char*)realloc(*linep, len + end-start + 1);
519: ptr = *linep + len;
520: }
521: else {
522: ptr = *linep = (char*)malloc(end-start + 1);
523: }
524:
525: while (start < end) {
526: *ptr = FROMASCII(*start);
527: ptr++;
528: start++;
529: }
530: *ptr = 0;
531: }
532: }
533:
534:
535: PRIVATE char * get_some_line ARGS2(HTInputSocket *, isoc,
536: BOOL, unfold)
537: {
538: if (!isoc)
539: return NULL;
540: else {
541: BOOL check_unfold = NO;
542: int prev_cr = 0;
543: char *start = isoc->input_pointer;
544: char *cur = isoc->input_pointer;
545: char * line = NULL;
546:
547: for(;;) {
548: /*
549: ** Get more if needed to complete line
550: */
551: if (cur >= isoc->input_limit) { /* Need more data */
552: ascii_cat(&line, start, cur);
553: if (fill_in_buffer(isoc) <= 0)
554: return line;
555: start = cur = isoc->input_pointer;
556: } /* if need more data */
557:
558: /*
559: ** Find a line feed if there is one
560: */
561: for(; cur < isoc->input_limit; cur++) {
562: char c = FROMASCII(*cur);
563: if (!c) {
1.18 luotonen 564: if (line) free(line); /* Leak fixed AL 6 Feb 94 */
1.15 luotonen 565: return NULL; /* Panic! read a 0! */
566: }
567: if (check_unfold && c != ' ' && c != '\t') {
568: return line; /* Note: didn't update isoc->input_pointer */
569: }
570: else {
571: check_unfold = NO;
572: }
573:
574: if (c=='\r') {
575: prev_cr = 1;
576: }
577: else {
578: if (c=='\n') { /* Found a line feed */
579: ascii_cat(&line, start, cur-prev_cr);
580: start = isoc->input_pointer = cur+1;
581:
582: if (line && strlen(line) > 0 && unfold) {
583: check_unfold = YES;
584: }
585: else {
586: return line;
587: }
588: } /* if NL */
589: /* else just a regular character */
590: prev_cr = 0;
591: } /* if not CR */
592: } /* while characters in buffer remain */
593: } /* until line read or end-of-file */
594: } /* valid parameters to function */
595: }
596:
597:
598: PUBLIC char * HTInputSocket_getLine ARGS1(HTInputSocket *, isoc)
599: {
600: return get_some_line(isoc, NO);
601: }
602:
603: PUBLIC char * HTInputSocket_getUnfoldedLine ARGS1(HTInputSocket *, isoc)
604: {
605: return get_some_line(isoc, YES);
606: }
607:
608:
609: /*
610: ** Read HTTP status line (if there is one).
611: **
612: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
613: ** First look at the stub in ASCII and check if it starts "HTTP/".
614: **
615: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
616: ** will be taken as a HTTP 1.0 server. Failure.
617: */
618: #define STUB_LENGTH 20
619: PUBLIC char * HTInputSocket_getStatusLine ARGS1(HTInputSocket *, isoc)
620: {
621: if (!isoc) {
622: return NULL;
623: }
624: else {
625: char buf[STUB_LENGTH + 1];
626: int i;
627: char server_version[STUB_LENGTH+1];
628: int server_status;
629:
630: /*
631: ** Read initial buffer
632: */
633: if (isoc->input_pointer >= isoc->input_limit &&
634: fill_in_buffer(isoc) <= 0) {
635: return NULL;
636: }
637:
638: for (i=0; i < STUB_LENGTH; i++)
639: buf[i] = FROMASCII(isoc->input_buffer[i]);
640: buf[STUB_LENGTH] = 0;
641:
642: if (0 != strncmp(buf, "HTTP/", 5) ||
643: sscanf(buf, "%20s%d", server_version, &server_status) < 2)
644: return NULL;
645: else
646: return get_some_line(isoc, NO);
647: }
648: }
649:
650:
651: /*
652: ** Do heuristic test to see if this is binary.
653: **
654: ** We check for characters above 128 in the first few bytes, and
655: ** if we find them we forget the html default.
656: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
657: **
658: ** Bugs: An HTTP 0.9 server returning a binary document with
659: ** characters < 128 will be read as ASCII.
660: */
661: PUBLIC BOOL HTInputSocket_seemsBinary ARGS1(HTInputSocket *, isoc)
662: {
663: if (isoc &&
664: (isoc->input_pointer < isoc->input_limit ||
665: fill_in_buffer(isoc) > 0)) {
666: char *p = isoc->input_buffer;
667: int i = STUB_LENGTH;
668:
669: for( ; i && p < isoc->input_limit; p++, i++)
670: if (((int)*p)&128)
671: return YES;
672: }
673: return NO;
674: }
675:
676:
677:
1.1 timbl 678: /* Stream the data to an ouput file as binary
679: */
1.13 timbl 680: PUBLIC int HTOutputBinary ARGS3( HTInputSocket *, isoc,
681: int, input,
682: FILE *, output)
1.1 timbl 683: {
684: do {
685: int status = NETREAD(
1.13 timbl 686: input, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 687: if (status <= 0) {
688: if (status == 0) return 0;
689: if (TRACE) fprintf(stderr,
690: "HTFormat: File read error %d\n", status);
691: return 2; /* Error */
692: }
1.13 timbl 693: fwrite(isoc->input_buffer, sizeof(char), status, output);
1.1 timbl 694: } while (YES);
695: }
696:
697:
1.33 luotonen 698: PRIVATE BOOL better_match ARGS2(HTFormat, f,
699: HTFormat, g)
700: {
701: CONST char *p, *q;
702:
703: if (f && g && (p = HTAtom_name(f)) && (q = HTAtom_name(g))) {
704: int i,j;
705: for(i=0 ; *p; p++) if (*p == '*') i++;
706: for(j=0 ; *q; q++) if (*q == '*') j++;
707: if (i < j) return YES;
708: }
709: return NO;
710: }
711:
1.17 luotonen 712:
1.2 timbl 713: /* Create a filter stack
714: ** ---------------------
715: **
1.7 secret 716: ** If a wildcard match is made, a temporary HTPresentation
1.2 timbl 717: ** structure is made to hold the destination format while the
718: ** new stack is generated. This is just to pass the out format to
719: ** MIME so far. Storing the format of a stream in the stream might
720: ** be a lot neater.
1.10 timbl 721: **
1.29 frystyk 722: ** The star/star format is special, in that if you can take
1.10 timbl 723: ** that you can take anything. However, we
1.2 timbl 724: */
1.34 luotonen 725: PUBLIC HTStream * HTStreamStack ARGS3(HTFormat, rep_in,
726: HTRequest *, request,
727: BOOL, guess)
1.2 timbl 728: {
1.12 timbl 729: HTFormat rep_out = request->output_format; /* Could be a param */
1.14 timbl 730: HTList * conversion[2];
731: int which_list;
1.25 frystyk 732: float best_quality = -1e30; /* Pretty bad! */
1.29 frystyk 733: HTPresentation *pres, *match, *best_match=0;
1.14 timbl 734:
1.2 timbl 735: if (TRACE) fprintf(stderr,
736: "HTFormat: Constructing stream stack for %s to %s\n",
1.10 timbl 737: HTAtom_name(rep_in),
1.2 timbl 738: HTAtom_name(rep_out));
1.34 luotonen 739:
740: if (guess && rep_in == WWW_UNKNOWN) {
741: CTRACE(stderr, "Returning... guessing stream\n");
742: return HTGuess_new(request);
743: }
744:
1.21 luotonen 745: if (rep_out == WWW_SOURCE || rep_out == rep_in)
746: return request->output_stream;
1.2 timbl 747:
1.14 timbl 748: conversion[0] = request->conversions;
749: conversion[1] = HTConversions;
1.17 luotonen 750:
1.15 luotonen 751: for(which_list = 0; which_list<2; which_list++) {
752: HTList * cur = conversion[which_list];
753:
754: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
1.25 frystyk 755: if ((pres->rep == rep_in || wild_match(pres->rep, rep_in)) &&
1.33 luotonen 756: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
757: if (!best_match ||
758: better_match(pres->rep, best_match->rep) ||
759: (!better_match(best_match->rep, pres->rep) &&
760: pres->quality > best_quality)) {
1.25 frystyk 761: best_match = pres;
762: best_quality = pres->quality;
1.10 timbl 763: }
764: }
1.33 luotonen 765:
1.29 frystyk 766: #ifdef OLD_CODE
767: /* This case is now included in the best_match loop */
1.25 frystyk 768: /* Special case when input format is 'www/source' */
1.10 timbl 769: if (pres->rep == source) {
1.29 frystyk 770: if (pres->rep_out == rep_out ||
771: wild_match(pres->rep_out, rep_out))
1.10 timbl 772: source_match = pres;
1.2 timbl 773: }
1.29 frystyk 774: #endif
1.2 timbl 775: }
776: }
1.33 luotonen 777:
1.29 frystyk 778: match = best_match ? best_match : NULL;
779: if (match) {
780: if (match->rep == WWW_SOURCE) {
781: if (TRACE) fprintf(stderr,
782: "HTFormat: Don't know how to handle this, so put out %s to %s\n",
783: HTAtom_name(match->rep),
784: HTAtom_name(rep_out));
785: }
786: return (*match->converter)(
1.25 frystyk 787: request, match->command, rep_in, rep_out,
788: request->output_stream);
1.29 frystyk 789: }
1.2 timbl 790: return NULL;
791: }
792:
793:
794: /* Find the cost of a filter stack
795: ** -------------------------------
796: **
797: ** Must return the cost of the same stack which StreamStack would set up.
798: **
799: ** On entry,
800: ** length The size of the data to be converted
801: */
1.12 timbl 802: PUBLIC float HTStackValue ARGS5(
1.14 timbl 803: HTList *, theseConversions,
1.10 timbl 804: HTFormat, rep_in,
1.2 timbl 805: HTFormat, rep_out,
806: float, initial_value,
807: long int, length)
808: {
1.14 timbl 809: int which_list;
810: HTList* conversion[2];
811:
1.2 timbl 812: if (TRACE) fprintf(stderr,
813: "HTFormat: Evaluating stream stack for %s worth %.3f to %s\n",
1.10 timbl 814: HTAtom_name(rep_in), initial_value,
1.2 timbl 815: HTAtom_name(rep_out));
816:
817: if (rep_out == WWW_SOURCE ||
1.10 timbl 818: rep_out == rep_in) return 0.0;
1.2 timbl 819:
1.12 timbl 820: /* if (!HTPresentations) HTFormatInit(); set up the list */
1.2 timbl 821:
1.14 timbl 822: conversion[0] = theseConversions;
823: conversion[1] = HTConversions;
824:
825: for(which_list = 0; which_list<2; which_list++)
826: if (conversion[which_list]) {
1.15 luotonen 827: HTList * cur = conversion[which_list];
1.2 timbl 828: HTPresentation * pres;
1.15 luotonen 829: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
830: if (pres->rep == rep_in &&
1.17 luotonen 831: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
1.2 timbl 832: float value = initial_value * pres->quality;
833: if (HTMaxSecs != 0.0)
1.15 luotonen 834: value = value - (length*pres->secs_per_byte + pres->secs)
1.2 timbl 835: /HTMaxSecs;
836: return value;
837: }
838: }
839: }
840:
841: return -1e30; /* Really bad */
1.17 luotonen 842: }
843:
844:
1.2 timbl 845:
1.1 timbl 846:
1.2 timbl 847: /* Push data from a socket down a stream
848: ** -------------------------------------
1.1 timbl 849: **
1.2 timbl 850: ** This routine is responsible for creating and PRESENTING any
1.1 timbl 851: ** graphic (or other) objects described by the file.
1.2 timbl 852: **
853: ** The file number given is assumed to be a TELNET stream ie containing
854: ** CRLF at the end of lines which need to be stripped to LF for unix
855: ** when the format is textual.
856: **
1.26 luotonen 857: ** RETURNS the number of bytes transferred.
858: **
1.1 timbl 859: */
1.26 luotonen 860: PUBLIC int HTCopy ARGS2(
1.2 timbl 861: int, file_number,
862: HTStream*, sink)
1.1 timbl 863: {
1.2 timbl 864: HTStreamClass targetClass;
1.13 timbl 865: HTInputSocket * isoc;
1.26 luotonen 866: int cnt = 0;
867:
1.5 timbl 868: /* Push the data down the stream
1.2 timbl 869: **
870: */
871: targetClass = *(sink->isa); /* Copy pointers to procedures */
1.13 timbl 872: isoc = HTInputSocket_new(file_number);
1.2 timbl 873:
874: /* Push binary from socket down sink
1.10 timbl 875: **
876: ** This operation could be put into a main event loop
1.2 timbl 877: */
878: for(;;) {
879: int status = NETREAD(
1.13 timbl 880: file_number, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.2 timbl 881: if (status <= 0) {
882: if (status == 0) break;
883: if (TRACE) fprintf(stderr,
1.24 luotonen 884: "HTFormat: Read error, read returns %d with errno=%d\n",
885: status, errno);
1.2 timbl 886: break;
887: }
1.26 luotonen 888:
1.8 timbl 889: #ifdef NOT_ASCII
890: {
891: char * p;
1.13 timbl 892: for(p = isoc->input_buffer; p < isoc->input_buffer+status; p++) {
1.8 timbl 893: *p = FROMASCII(*p);
894: }
895: }
896: #endif
897:
1.13 timbl 898: (*targetClass.put_block)(sink, isoc->input_buffer, status);
1.26 luotonen 899: cnt += status;
1.2 timbl 900: } /* next bufferload */
1.26 luotonen 901:
1.13 timbl 902: HTInputSocket_free(isoc);
1.26 luotonen 903:
904: return cnt;
1.2 timbl 905: }
906:
1.1 timbl 907:
1.7 secret 908:
909: /* Push data from a file pointer down a stream
910: ** -------------------------------------
911: **
912: ** This routine is responsible for creating and PRESENTING any
913: ** graphic (or other) objects described by the file.
914: **
915: **
916: */
917: PUBLIC void HTFileCopy ARGS2(
918: FILE *, fp,
919: HTStream*, sink)
920: {
921: HTStreamClass targetClass;
1.13 timbl 922: char input_buffer[INPUT_BUFFER_SIZE];
1.7 secret 923:
924: /* Push the data down the stream
925: **
926: */
927: targetClass = *(sink->isa); /* Copy pointers to procedures */
928:
929: /* Push binary from socket down sink
930: */
931: for(;;) {
932: int status = fread(
933: input_buffer, 1, INPUT_BUFFER_SIZE, fp);
934: if (status == 0) { /* EOF or error */
935: if (ferror(fp) == 0) break;
936: if (TRACE) fprintf(stderr,
937: "HTFormat: Read error, read returns %d\n", ferror(fp));
938: break;
939: }
940: (*targetClass.put_block)(sink, input_buffer, status);
1.13 timbl 941: } /* next bufferload */
1.7 secret 942: }
943:
944:
945:
946:
1.2 timbl 947: /* Push data from a socket down a stream STRIPPING CR
948: ** --------------------------------------------------
949: **
950: ** This routine is responsible for creating and PRESENTING any
1.8 timbl 951: ** graphic (or other) objects described by the socket.
1.2 timbl 952: **
953: ** The file number given is assumed to be a TELNET stream ie containing
954: ** CRLF at the end of lines which need to be stripped to LF for unix
955: ** when the format is textual.
956: **
1.1 timbl 957: */
1.2 timbl 958: PUBLIC void HTCopyNoCR ARGS2(
959: int, file_number,
960: HTStream*, sink)
961: {
1.13 timbl 962: HTStreamClass targetClass;
963: HTInputSocket * isoc;
1.1 timbl 964:
1.2 timbl 965: /* Push the data, ignoring CRLF, down the stream
966: **
967: */
968: targetClass = *(sink->isa); /* Copy pointers to procedures */
969:
970: /* Push text from telnet socket down sink
971: **
972: ** @@@@@ To push strings could be faster? (especially is we
973: ** cheat and don't ignore CR! :-}
974: */
1.13 timbl 975: isoc = HTInputSocket_new(file_number);
1.2 timbl 976: for(;;) {
977: char character;
1.13 timbl 978: character = HTInputSocket_getCharacter(isoc);
1.2 timbl 979: if (character == (char)EOF) break;
980: (*targetClass.put_character)(sink, character);
981: }
1.13 timbl 982: HTInputSocket_free(isoc);
1.2 timbl 983: }
1.1 timbl 984:
1.2 timbl 985:
1.7 secret 986:
1.2 timbl 987: /* Parse a socket given format and file number
988: **
989: ** This routine is responsible for creating and PRESENTING any
990: ** graphic (or other) objects described by the file.
991: **
992: ** The file number given is assumed to be a TELNET stream ie containing
993: ** CRLF at the end of lines which need to be stripped to LF for unix
994: ** when the format is textual.
995: **
996: */
1.14 timbl 997:
1.12 timbl 998: PUBLIC int HTParseSocket ARGS3(
1.10 timbl 999: HTFormat, rep_in,
1.2 timbl 1000: int, file_number,
1.12 timbl 1001: HTRequest *, request)
1.2 timbl 1002: {
1003: HTStream * stream;
1004: HTStreamClass targetClass;
1.1 timbl 1005:
1.34 luotonen 1006: stream = HTStreamStack(rep_in, request, YES);
1.29 frystyk 1007:
1.2 timbl 1008: if (!stream) {
1.30 frystyk 1009: char buffer[1024]; /* @@@@@@@@ */
1.2 timbl 1010: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12 timbl 1011: HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.30 frystyk 1012: if (TRACE) fprintf(stderr, "HTFormat(in HTParseSocket): %s\n", buffer);
1.16 luotonen 1013: return HTLoadError(request, 501, buffer);
1.2 timbl 1014: }
1.1 timbl 1015:
1.3 timbl 1016: /* Push the data, ignoring CRLF if necessary, down the stream
1017: **
1.2 timbl 1018: **
1.3 timbl 1019: ** @@ Bug: This decision ought to be made based on "encoding"
1.9 timbl 1020: ** rather than on format. @@@ When we handle encoding.
1.3 timbl 1021: ** The current method smells anyway.
1.2 timbl 1022: */
1023: targetClass = *(stream->isa); /* Copy pointers to procedures */
1.32 luotonen 1024: if (rep_in == WWW_BINARY || rep_in == WWW_UNKNOWN || HTOutputSource
1.26 luotonen 1025: || (request->content_encoding &&
1026: request->content_encoding != HTAtom_for("8bit") &&
1027: request->content_encoding != HTAtom_for("7bit"))
1.10 timbl 1028: || strstr(HTAtom_name(rep_in), "image/")
1029: || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.29 frystyk 1030: HTCopy(file_number, stream);
1.2 timbl 1031: } else { /* ascii text with CRLFs :-( */
1032: HTCopyNoCR(file_number, stream);
1033: }
1.7 secret 1034: (*targetClass.free)(stream);
1035:
1036: return HT_LOADED;
1037: }
1038:
1039:
1040:
1041: /* Parse a file given format and file pointer
1042: **
1043: ** This routine is responsible for creating and PRESENTING any
1044: ** graphic (or other) objects described by the file.
1045: **
1046: ** The file number given is assumed to be a TELNET stream ie containing
1.10 timbl 1047: ** CRLF at the end of lines which need to be stripped to \n for unix
1.7 secret 1048: ** when the format is textual.
1049: **
1050: */
1.12 timbl 1051: PUBLIC int HTParseFile ARGS3(
1.10 timbl 1052: HTFormat, rep_in,
1.7 secret 1053: FILE *, fp,
1.12 timbl 1054: HTRequest *, request)
1.7 secret 1055: {
1056: HTStream * stream;
1057: HTStreamClass targetClass;
1058:
1.34 luotonen 1059: stream = HTStreamStack(rep_in, request, YES);
1.7 secret 1060:
1061: if (!stream) {
1.30 frystyk 1062: char buffer[1024]; /* @@@@@@@@ */
1.7 secret 1063: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12 timbl 1064: HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.7 secret 1065: if (TRACE) fprintf(stderr, "HTFormat(in HTParseFile): %s\n", buffer);
1.29 frystyk 1066: return HTLoadError(request, 501, buffer);
1.7 secret 1067: }
1068:
1.9 timbl 1069: /* Push the data down the stream
1.7 secret 1070: **
1071: **
1072: ** @@ Bug: This decision ought to be made based on "encoding"
1.10 timbl 1073: ** rather than on content-type. @@@ When we handle encoding.
1.7 secret 1074: ** The current method smells anyway.
1075: */
1076: targetClass = *(stream->isa); /* Copy pointers to procedures */
1077: HTFileCopy(fp, stream);
1.2 timbl 1078: (*targetClass.free)(stream);
1.1 timbl 1079:
1.2 timbl 1080: return HT_LOADED;
1.1 timbl 1081: }
1.2 timbl 1082:
1.10 timbl 1083:
1084: /* Converter stream: Network Telnet to internal character text
1085: ** -----------------------------------------------------------
1086: **
1087: ** The input is assumed to be in ASCII, with lines delimited
1088: ** by (13,10) pairs, These pairs are converted into (CR,LF)
1089: ** pairs in the local representation. The (CR,LF) sequence
1090: ** when found is changed to a '\n' character, the internal
1091: ** C representation of a new line.
1092: */
1093:
1094:
1.11 timbl 1095: PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char)
1.10 timbl 1096: {
1097: char c = FROMASCII(net_char);
1098: if (me->had_cr) {
1099: if (c==LF) {
1100: me->sink->isa->put_character(me->sink, '\n'); /* Newline */
1101: me->had_cr = NO;
1102: return;
1103: } else {
1104: me->sink->isa->put_character(me->sink, CR); /* leftover */
1105: }
1106: }
1107: me->had_cr = (c==CR);
1108: if (!me->had_cr)
1109: me->sink->isa->put_character(me->sink, c); /* normal */
1110: }
1111:
1.11 timbl 1112: PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s)
1.10 timbl 1113: {
1114: CONST char * p;
1115: for(p=s; *p; p++) NetToText_put_character(me, *p);
1116: }
1117:
1.11 timbl 1118: PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l)
1.10 timbl 1119: {
1120: CONST char * p;
1121: for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
1122: }
1123:
1124: PRIVATE void NetToText_free ARGS1(HTStream *, me)
1125: {
1126: me->sink->isa->free(me->sink); /* Close rest of pipe */
1127: free(me);
1128: }
1129:
1130: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
1131: {
1132: me->sink->isa->abort(me->sink,e); /* Abort rest of pipe */
1133: free(me);
1134: }
1135:
1136: /* The class structure
1137: */
1138: PRIVATE HTStreamClass NetToTextClass = {
1139: "NetToText",
1140: NetToText_free,
1141: NetToText_abort,
1142: NetToText_put_character,
1143: NetToText_put_string,
1144: NetToText_put_block
1145: };
1146:
1147: /* The creation method
1148: */
1149: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
1150: {
1151: HTStream* me = (HTStream*)malloc(sizeof(*me));
1152: if (me == NULL) outofmem(__FILE__, "NetToText");
1153: me->isa = &NetToTextClass;
1154:
1155: me->had_cr = NO;
1156: me->sink = sink;
1157: return me;
1158: }
1.2 timbl 1159:
1160:
Webmaster