Annotation of libwww/Library/src/HTFormat.c, revision 1.40
1.7 secret 1:
1.1 timbl 2: /* Manage different file formats HTFormat.c
3: ** =============================
4: **
5: ** Bugs:
6: ** Not reentrant.
7: **
8: ** Assumes the incoming stream is ASCII, rather than a local file
9: ** format, and so ALWAYS converts from ASCII on non-ASCII machines.
10: ** Therefore, non-ASCII machines can't read local files.
1.2 timbl 11: **
12: */
13:
1.10 timbl 14:
1.2 timbl 15: /* Implements:
1.1 timbl 16: */
1.2 timbl 17: #include "HTFormat.h"
18:
19: PUBLIC float HTMaxSecs = 1e10; /* No effective limit */
20: PUBLIC float HTMaxLength = 1e10; /* No effective limit */
21:
22: #ifdef unix
23: #ifdef NeXT
24: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
25: #else
26: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n"
27: /* Full pathname would be better! */
28: #endif
29: #endif
30:
1.1 timbl 31:
32: #include "HTUtils.h"
33: #include "tcp.h"
34:
35: #include "HTML.h"
1.12 timbl 36: #include "HTMLPDTD.h"
1.1 timbl 37: #include "HText.h"
1.2 timbl 38: #include "HTAlert.h"
39: #include "HTList.h"
40: #include "HTInit.h"
41: /* Streams and structured streams which we use:
42: */
43: #include "HTFWriter.h"
44: #include "HTPlain.h"
45: #include "SGML.h"
46: #include "HTML.h"
47: #include "HTMLGen.h"
1.34 luotonen 48: #include "HTGuess.h"
49:
1.2 timbl 50:
51: PUBLIC BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */
52:
1.10 timbl 53: #ifdef ORIGINAL
1.2 timbl 54: struct _HTStream {
55: CONST HTStreamClass* isa;
56: /* ... */
57: };
1.10 timbl 58: #endif
59:
60: /* this version used by the NetToText stream */
61: struct _HTStream {
62: CONST HTStreamClass * isa;
63: BOOL had_cr;
64: HTStream * sink;
65: };
1.2 timbl 66:
67:
1.17 luotonen 68: /*
69: ** Accept-Encoding and Accept-Language
70: */
71: typedef struct _HTAcceptNode {
72: HTAtom * atom;
73: float quality;
74: } HTAcceptNode;
75:
76:
77:
78:
1.2 timbl 79: /* Presentation methods
80: ** --------------------
81: */
82:
1.14 timbl 83: PUBLIC HTList * HTConversions = NULL;
1.2 timbl 84:
1.31 frystyk 85: /* -------------------------------------------------------------------------
86: This function replaces the code in HTRequest_delete() in order to keep
87: the data structure hidden (it is NOT a joke!)
88: Henrik 14/03-94
89: ------------------------------------------------------------------------- */
90: PUBLIC void HTFormatDelete ARGS1(HTList *, me)
91: {
92: HTList *cur = me;
93: HTPresentation *pres;
94: if (!me)
95: return;
96: while ((pres = (HTPresentation*) HTList_nextObject(cur))) {
97: FREE(pres->command); /* Leak fixed AL 6 Feb 1994 */
98: free(pres);
99: }
100: HTList_delete(me); /* Leak fixed AL 6 Feb 1994 */
101: }
102:
1.2 timbl 103:
104: /* Define a presentation system command for a content-type
105: ** -------------------------------------------------------
106: */
1.12 timbl 107: PUBLIC void HTSetPresentation ARGS6(
108: HTList *, conversions,
109: CONST char *, representation,
110: CONST char *, command,
111: float, quality,
112: float, secs,
113: float, secs_per_byte
1.2 timbl 114: ){
115:
116: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
117: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
118:
119: pres->rep = HTAtom_for(representation);
120: pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */
121: pres->converter = HTSaveAndExecute; /* Fixed for now ... */
122: pres->quality = quality;
123: pres->secs = secs;
124: pres->secs_per_byte = secs_per_byte;
125: pres->rep = HTAtom_for(representation);
126: pres->command = 0;
127: StrAllocCopy(pres->command, command);
128:
1.12 timbl 129: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 130:
1.15 luotonen 131: #ifdef OLD_CODE
132: if (strcmp(representation, "*")==0) {
1.2 timbl 133: if (default_presentation) free(default_presentation);
134: default_presentation = pres;
1.12 timbl 135: } else
136: #endif
137: HTList_addObject(conversions, pres);
1.2 timbl 138: }
139:
140:
141: /* Define a built-in function for a content-type
142: ** ---------------------------------------------
143: */
1.12 timbl 144: PUBLIC void HTSetConversion ARGS7(
145: HTList *, conversions,
146: CONST char *, representation_in,
147: CONST char *, representation_out,
1.6 timbl 148: HTConverter*, converter,
1.12 timbl 149: float, quality,
150: float, secs,
151: float, secs_per_byte
1.2 timbl 152: ){
1.1 timbl 153:
1.2 timbl 154: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
155: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
156:
157: pres->rep = HTAtom_for(representation_in);
158: pres->rep_out = HTAtom_for(representation_out);
159: pres->converter = converter;
160: pres->command = NULL; /* Fixed */
161: pres->quality = quality;
162: pres->secs = secs;
163: pres->secs_per_byte = secs_per_byte;
164: pres->command = 0;
165:
1.12 timbl 166: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 167:
1.12 timbl 168: #ifdef OLD_CODE
1.2 timbl 169: if (strcmp(representation_in, "*")==0) {
170: if (default_presentation) free(default_presentation);
171: default_presentation = pres;
1.12 timbl 172: } else
173: #endif
174: HTList_addObject(conversions, pres);
1.2 timbl 175: }
1.1 timbl 176:
177:
178:
1.17 luotonen 179: PUBLIC void HTAcceptEncoding ARGS3(HTList *, list,
180: char *, enc,
181: float, quality)
182: {
183: HTAcceptNode * node;
184: char * cur;
185:
186: if (!list || !enc || !*enc) return;
187:
188: for(cur=enc; *cur; cur++) *cur=TOLOWER(*cur);
189:
190: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
191: if (!node) outofmem(__FILE__, "HTAcceptEncoding");
192: HTList_addObject(list, (void*)node);
193:
194: node->atom = HTAtom_for(enc);
195: node->quality = quality;
196: }
197:
198:
199: PUBLIC void HTAcceptLanguage ARGS3(HTList *, list,
200: char *, lang,
201: float, quality)
202: {
203: HTAcceptNode * node;
204:
205: if (!list || !lang || !*lang) return;
206:
207: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
208: if (!node) outofmem(__FILE__, "HTAcceptLanguage");
209:
210: HTList_addObject(list, (void*)node);
211: node->atom = HTAtom_for(lang);
212: node->quality = quality;
213: }
214:
215:
216: PRIVATE BOOL wild_match ARGS2(HTAtom *, template,
217: HTAtom *, actual)
218: {
219: char *t, *a, *st, *sa;
220: BOOL match = NO;
221:
1.22 luotonen 222: if (template && actual && (t = HTAtom_name(template))) {
223: if (!strcmp(t, "*"))
224: return YES;
1.17 luotonen 225:
1.22 luotonen 226: if (strchr(t, '*') &&
227: (a = HTAtom_name(actual)) &&
228: (st = strchr(t, '/')) && (sa = strchr(a,'/'))) {
1.17 luotonen 229:
1.22 luotonen 230: *sa = 0;
231: *st = 0;
232:
233: if ((*(st-1)=='*' &&
234: (*(st+1)=='*' || !strcasecomp(st+1, sa+1))) ||
235: (*(st+1)=='*' && !strcasecomp(t,a)))
236: match = YES;
237:
238: *sa = '/';
239: *st = '/';
240: }
241: }
1.23 luotonen 242: return match;
1.17 luotonen 243: }
244:
1.36 luotonen 245: /*
246: * Added by takada@seraph.ntt.jp (94/04/08)
247: */
248: PRIVATE BOOL lang_match ARGS2(HTAtom *, template,
249: HTAtom *, actual)
250: {
251: char *t, *a, *st, *sa;
252: BOOL match = NO;
253:
254: if (template && actual &&
255: (t = HTAtom_name(template)) && (a = HTAtom_name(actual))) {
256: st = strchr(t, '_');
257: sa = strchr(a, '_');
258: if ((st != NULL) && (sa != NULL)) {
259: if (!strcasecomp(t, a))
260: match = YES;
261: else
262: match = NO;
263: }
264: else {
265: if (st != NULL) *st = 0;
266: if (sa != NULL) *sa = 0;
267: if (!strcasecomp(t, a))
268: match = YES;
269: else
270: match = NO;
271: if (st != NULL) *st = '_';
272: if (sa != NULL) *sa = '_';
273: }
274: }
275: return match;
276: }
277: /* end of addition */
278:
279:
1.17 luotonen 280:
281: PRIVATE float type_value ARGS2(HTAtom *, content_type,
282: HTList *, accepted)
283: {
284: HTList * cur = accepted;
285: HTPresentation * pres;
286: HTPresentation * wild = NULL;
287:
288: if (!content_type || !accepted) return -1;
289:
290: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
291: if (pres->rep == content_type)
292: return pres->quality;
293: else if (wild_match(pres->rep, content_type))
294: wild = pres;
295: }
296: if (wild) return wild->quality;
297: else return -1;
298: }
299:
300:
301: PRIVATE float lang_value ARGS2(HTAtom *, language,
302: HTList *, accepted)
303: {
304: HTList * cur = accepted;
305: HTAcceptNode * node;
306: HTAcceptNode * wild = NULL;
307:
308: if (!language || !accepted || HTList_isEmpty(accepted)) {
309: return 0.1;
310: }
311:
312: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
313: if (node->atom == language) {
314: return node->quality;
315: }
1.36 luotonen 316: /*
317: * patch by takada@seraph.ntt.jp (94/04/08)
318: * the original line was
319: * else if (wild_match(node->atom, language)) {
320: * and the new line is
321: */
322: else if (lang_match(node->atom, language)) {
1.17 luotonen 323: wild = node;
324: }
325: }
326:
327: if (wild) {
328: return wild->quality;
329: }
330: else {
331: return 0.1;
332: }
333: }
334:
335:
336: PRIVATE float encoding_value ARGS2(HTAtom *, encoding,
337: HTList *, accepted)
338: {
339: HTList * cur = accepted;
340: HTAcceptNode * node;
341: HTAcceptNode * wild = NULL;
342: char * e;
343:
344: if (!encoding || !accepted || HTList_isEmpty(accepted))
345: return 1;
346:
347: e = HTAtom_name(encoding);
348: if (!strcmp(e, "7bit") || !strcmp(e, "8bit") || !strcmp(e, "binary"))
349: return 1;
350:
351: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
352: if (node->atom == encoding)
353: return node->quality;
354: else if (wild_match(node->atom, encoding))
355: wild = node;
356: }
357: if (wild) return wild->quality;
358: else return 1;
359: }
360:
361:
362: PUBLIC BOOL HTRank ARGS4(HTList *, possibilities,
363: HTList *, accepted_content_types,
364: HTList *, accepted_languages,
365: HTList *, accepted_encodings)
366: {
367: int accepted_cnt = 0;
368: HTList * accepted;
369: HTList * sorted;
370: HTList * cur;
371: HTContentDescription * d;
372:
373: if (!possibilities) return NO;
374:
375: accepted = HTList_new();
376: cur = possibilities;
377: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
378: float tv = type_value(d->content_type, accepted_content_types);
379: float lv = lang_value(d->content_language, accepted_languages);
380: float ev = encoding_value(d->content_encoding, accepted_encodings);
381:
382: if (tv > 0) {
383: d->quality *= tv * lv * ev;
384: HTList_addObject(accepted, d);
385: accepted_cnt++;
386: }
1.18 luotonen 387: else {
388: if (d->filename) free(d->filename);
389: free(d);
390: }
1.17 luotonen 391: }
392:
1.18 luotonen 393: CTRACE(stderr, "Ranking.....\n");
1.17 luotonen 394: CTRACE(stderr,
1.18 luotonen 395: "\nRANK QUALITY CONTENT-TYPE LANGUAGE ENCODING FILE\n");
1.17 luotonen 396:
397: sorted = HTList_new();
398: while (accepted_cnt-- > 0) {
399: HTContentDescription * worst = NULL;
400: cur = accepted;
401: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
402: if (!worst || d->quality < worst->quality)
403: worst = d;
404: }
405: if (worst) {
406: CTRACE(stderr, "%d. %.4f %-20.20s %-8.8s %-10.10s %s\n",
407: accepted_cnt+1,
408: worst->quality,
409: (worst->content_type
410: ? HTAtom_name(worst->content_type) : "-"),
411: (worst->content_language
412: ? HTAtom_name(worst->content_language) :"-"),
413: (worst->content_encoding
414: ? HTAtom_name(worst->content_encoding) :"-"),
415: (worst->filename
416: ? worst->filename :"-"));
417: HTList_removeObject(accepted, (void*)worst);
418: HTList_addObject(sorted, (void*)worst);
419: }
420: }
1.18 luotonen 421: CTRACE(stderr, "\n");
1.17 luotonen 422: HTList_delete(accepted);
423: HTList_delete(possibilities->next);
424: possibilities->next = sorted->next;
425: sorted->next = NULL;
426: HTList_delete(sorted);
427:
428: if (!HTList_isEmpty(possibilities)) return YES;
429: else return NO;
430: }
431:
432:
433:
434:
435:
1.13 timbl 436: /* Socket Input Buffering
437: ** ----------------------
1.1 timbl 438: **
1.13 timbl 439: ** This code is used because one cannot in general open a
440: ** file descriptor for a socket.
441: **
1.1 timbl 442: ** The input file is read using the macro which can read from
1.13 timbl 443: ** a socket or a file, but this should not be used for files
444: ** as fopen() etc is more portable of course.
445: **
1.1 timbl 446: ** The input buffer size, if large will give greater efficiency and
447: ** release the server faster, and if small will save space on PCs etc.
448: */
449:
450:
451: /* Set up the buffering
452: **
453: ** These routines are public because they are in fact needed by
454: ** many parsers, and on PCs and Macs we should not duplicate
455: ** the static buffer area.
456: */
1.13 timbl 457: PUBLIC HTInputSocket * HTInputSocket_new ARGS1 (int,file_number)
1.1 timbl 458: {
1.28 frystyk 459: HTInputSocket *isoc = (HTInputSocket *)calloc(1, sizeof(*isoc));
1.13 timbl 460: if (!isoc) outofmem(__FILE__, "HTInputSocket_new");
461: isoc->input_file_number = file_number;
462: isoc->input_pointer = isoc->input_limit = isoc->input_buffer;
463: return isoc;
1.1 timbl 464: }
465:
1.35 frystyk 466: /* This should return HT_INTERRUPTED if interrupted BUT the connection
467: MUST not be closed */
468: PUBLIC int HTInputSocket_getCharacter ARGS1(HTInputSocket*, isoc)
1.1 timbl 469: {
1.35 frystyk 470: int ch;
1.1 timbl 471: do {
1.13 timbl 472: if (isoc-> input_pointer >= isoc->input_limit) {
1.1 timbl 473: int status = NETREAD(
1.13 timbl 474: isoc->input_file_number,
475: isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 476: if (status <= 0) {
1.39 frystyk 477: if (status == 0)
478: return EOF;
479: if (status == HT_INTERRUPTED) {
480: if (TRACE)
481: fprintf(stderr, "Get Char.... Interrupted in HTInputSocket_getCharacter\n");
482: return HT_INTERRUPTED;
483: }
484: HTInetStatus("read");
485: return EOF; /* -1 is returned by UCX at end of HTTP link */
1.1 timbl 486: }
1.35 frystyk 487: isoc->input_pointer = isoc->input_buffer;
1.13 timbl 488: isoc->input_limit = isoc->input_buffer + status;
1.1 timbl 489: }
1.39 frystyk 490: ch = (unsigned char) *isoc->input_pointer++;
491: } while (ch == 13); /* Ignore ASCII carriage return */
1.1 timbl 492:
493: return FROMASCII(ch);
494: }
495:
1.17 luotonen 496: PUBLIC void HTInputSocket_free ARGS1(HTInputSocket *, me)
1.13 timbl 497: {
498: if (me) free(me);
499: }
500:
501:
1.16 luotonen 502: PUBLIC char * HTInputSocket_getBlock ARGS2(HTInputSocket*, isoc,
503: int *, len)
504: {
505: if (isoc->input_pointer >= isoc->input_limit) {
506: int status = NETREAD(isoc->input_file_number,
507: isoc->input_buffer,
508: ((*len < INPUT_BUFFER_SIZE) ?
509: *len : INPUT_BUFFER_SIZE));
510: if (status <= 0) {
511: isoc->input_limit = isoc->input_buffer;
512: if (status < 0)
1.39 frystyk 513: HTInetStatus("read");
1.16 luotonen 514: *len = 0;
515: return NULL;
516: }
517: else {
518: *len = status;
519: return isoc->input_buffer;
520: }
521: }
522: else {
523: char * ret = isoc->input_pointer;
524: *len = isoc->input_limit - isoc->input_pointer;
525: isoc->input_pointer = isoc->input_limit;
526: return ret;
527: }
528: }
529:
530:
1.15 luotonen 531: PRIVATE int fill_in_buffer ARGS1(HTInputSocket *, isoc)
532: {
533: if (isoc) {
534: int status;
535:
536: isoc->input_pointer = isoc->input_buffer;
537: status = NETREAD(isoc->input_file_number,
538: isoc->input_buffer,
539: INPUT_BUFFER_SIZE);
540: if (status <= 0) {
541: isoc->input_limit = isoc->input_buffer;
542: if (status < 0)
1.39 frystyk 543: HTInetStatus("read");
1.15 luotonen 544: }
545: else
546: isoc->input_limit = isoc->input_buffer + status;
547: return status;
548: }
549: return -1;
550: }
551:
552:
553: PRIVATE void ascii_cat ARGS3(char **, linep,
554: char *, start,
555: char *, end)
556: {
557: if (linep && start && end && start <= end) {
558: char *ptr;
559:
560: if (*linep) {
561: int len = strlen(*linep);
562: *linep = (char*)realloc(*linep, len + end-start + 1);
563: ptr = *linep + len;
564: }
565: else {
566: ptr = *linep = (char*)malloc(end-start + 1);
567: }
568:
569: while (start < end) {
570: *ptr = FROMASCII(*start);
571: ptr++;
572: start++;
573: }
574: *ptr = 0;
575: }
576: }
577:
578:
579: PRIVATE char * get_some_line ARGS2(HTInputSocket *, isoc,
580: BOOL, unfold)
581: {
582: if (!isoc)
583: return NULL;
584: else {
585: BOOL check_unfold = NO;
586: int prev_cr = 0;
587: char *start = isoc->input_pointer;
588: char *cur = isoc->input_pointer;
589: char * line = NULL;
590:
591: for(;;) {
592: /*
593: ** Get more if needed to complete line
594: */
595: if (cur >= isoc->input_limit) { /* Need more data */
596: ascii_cat(&line, start, cur);
597: if (fill_in_buffer(isoc) <= 0)
598: return line;
599: start = cur = isoc->input_pointer;
600: } /* if need more data */
601:
602: /*
603: ** Find a line feed if there is one
604: */
605: for(; cur < isoc->input_limit; cur++) {
606: char c = FROMASCII(*cur);
607: if (!c) {
1.18 luotonen 608: if (line) free(line); /* Leak fixed AL 6 Feb 94 */
1.15 luotonen 609: return NULL; /* Panic! read a 0! */
610: }
611: if (check_unfold && c != ' ' && c != '\t') {
612: return line; /* Note: didn't update isoc->input_pointer */
613: }
614: else {
615: check_unfold = NO;
616: }
617:
618: if (c=='\r') {
619: prev_cr = 1;
620: }
621: else {
622: if (c=='\n') { /* Found a line feed */
623: ascii_cat(&line, start, cur-prev_cr);
624: start = isoc->input_pointer = cur+1;
625:
626: if (line && strlen(line) > 0 && unfold) {
627: check_unfold = YES;
628: }
629: else {
630: return line;
631: }
632: } /* if NL */
633: /* else just a regular character */
634: prev_cr = 0;
635: } /* if not CR */
636: } /* while characters in buffer remain */
637: } /* until line read or end-of-file */
638: } /* valid parameters to function */
639: }
640:
641:
642: PUBLIC char * HTInputSocket_getLine ARGS1(HTInputSocket *, isoc)
643: {
644: return get_some_line(isoc, NO);
645: }
646:
647: PUBLIC char * HTInputSocket_getUnfoldedLine ARGS1(HTInputSocket *, isoc)
648: {
649: return get_some_line(isoc, YES);
650: }
651:
652:
653: /*
654: ** Read HTTP status line (if there is one).
655: **
656: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
657: ** First look at the stub in ASCII and check if it starts "HTTP/".
658: **
659: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
660: ** will be taken as a HTTP 1.0 server. Failure.
661: */
662: #define STUB_LENGTH 20
663: PUBLIC char * HTInputSocket_getStatusLine ARGS1(HTInputSocket *, isoc)
664: {
665: if (!isoc) {
666: return NULL;
667: }
668: else {
669: char buf[STUB_LENGTH + 1];
670: int i;
671: char server_version[STUB_LENGTH+1];
672: int server_status;
673:
674: /*
675: ** Read initial buffer
676: */
677: if (isoc->input_pointer >= isoc->input_limit &&
678: fill_in_buffer(isoc) <= 0) {
679: return NULL;
680: }
681:
682: for (i=0; i < STUB_LENGTH; i++)
683: buf[i] = FROMASCII(isoc->input_buffer[i]);
684: buf[STUB_LENGTH] = 0;
685:
686: if (0 != strncmp(buf, "HTTP/", 5) ||
687: sscanf(buf, "%20s%d", server_version, &server_status) < 2)
688: return NULL;
689: else
690: return get_some_line(isoc, NO);
691: }
692: }
693:
694:
695: /*
696: ** Do heuristic test to see if this is binary.
697: **
698: ** We check for characters above 128 in the first few bytes, and
699: ** if we find them we forget the html default.
700: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
701: **
702: ** Bugs: An HTTP 0.9 server returning a binary document with
703: ** characters < 128 will be read as ASCII.
704: */
705: PUBLIC BOOL HTInputSocket_seemsBinary ARGS1(HTInputSocket *, isoc)
706: {
707: if (isoc &&
708: (isoc->input_pointer < isoc->input_limit ||
709: fill_in_buffer(isoc) > 0)) {
710: char *p = isoc->input_buffer;
711: int i = STUB_LENGTH;
712:
713: for( ; i && p < isoc->input_limit; p++, i++)
714: if (((int)*p)&128)
715: return YES;
716: }
717: return NO;
718: }
719:
720:
721:
1.1 timbl 722: /* Stream the data to an ouput file as binary
723: */
1.38 luotonen 724: PUBLIC int HTOutputBinary ARGS3(HTInputSocket *,isoc,
1.13 timbl 725: int, input,
726: FILE *, output)
1.1 timbl 727: {
728: do {
729: int status = NETREAD(
1.13 timbl 730: input, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 731: if (status <= 0) {
732: if (status == 0) return 0;
733: if (TRACE) fprintf(stderr,
1.39 frystyk 734: "Out Binary.. Socket read error %d\n", status);
1.1 timbl 735: return 2; /* Error */
736: }
1.13 timbl 737: fwrite(isoc->input_buffer, sizeof(char), status, output);
1.1 timbl 738: } while (YES);
739: }
740:
1.38 luotonen 741:
742: /*
743: * Normal HTTP headers are never bigger than 2K.
744: */
745: #define S_BUFFER_SIZE 2000
746:
747: PUBLIC void HTInputSocket_startBuffering ARGS1(HTInputSocket *, isoc)
748: {
749: if (isoc) {
750: isoc->s_do_buffering = YES;
751: if (!isoc->s_buffer) {
752: isoc->s_buffer = (char*)malloc(S_BUFFER_SIZE + 1);
753: isoc->s_buffer_size = S_BUFFER_SIZE;
754: }
755: isoc->s_buffer_cur = isoc->s_buffer;
756: }
757: }
758:
759: PUBLIC void HTInputSocket_stopBuffering ARGS1(HTInputSocket *, isoc)
760: {
761: if (isoc) {
762: isoc->s_do_buffering = NO;
763: if (isoc->s_buffer_cur)
764: *isoc->s_buffer_cur = 0;
765: }
766: }
767:
768: PUBLIC int HTInputSocket_getBuffer ARGS2(HTInputSocket *, isoc,
769: char **, buffer_ptr)
770: {
771: if (!isoc || !isoc->s_buffer || !isoc->s_buffer_cur)
772: return 0;
773: else {
774: *isoc->s_buffer_cur = 0;
775: if (buffer_ptr)
776: *buffer_ptr = isoc->s_buffer;
777: return (int) (isoc->s_buffer_cur - isoc->s_buffer);
778: }
779: }
1.1 timbl 780:
1.33 luotonen 781: PRIVATE BOOL better_match ARGS2(HTFormat, f,
782: HTFormat, g)
783: {
784: CONST char *p, *q;
785:
786: if (f && g && (p = HTAtom_name(f)) && (q = HTAtom_name(g))) {
787: int i,j;
788: for(i=0 ; *p; p++) if (*p == '*') i++;
789: for(j=0 ; *q; q++) if (*q == '*') j++;
790: if (i < j) return YES;
791: }
792: return NO;
793: }
794:
1.17 luotonen 795:
1.2 timbl 796: /* Create a filter stack
797: ** ---------------------
798: **
1.7 secret 799: ** If a wildcard match is made, a temporary HTPresentation
1.2 timbl 800: ** structure is made to hold the destination format while the
801: ** new stack is generated. This is just to pass the out format to
802: ** MIME so far. Storing the format of a stream in the stream might
803: ** be a lot neater.
1.10 timbl 804: **
1.29 frystyk 805: ** The star/star format is special, in that if you can take
1.40 ! frystyk 806: ** that you can take anything.
! 807: **
! 808: ** On succes, request->error_block is set to YES so no more error
! 809: ** messages to the stream as the stream might be of any format.
1.2 timbl 810: */
1.34 luotonen 811: PUBLIC HTStream * HTStreamStack ARGS3(HTFormat, rep_in,
812: HTRequest *, request,
813: BOOL, guess)
1.2 timbl 814: {
1.12 timbl 815: HTFormat rep_out = request->output_format; /* Could be a param */
1.14 timbl 816: HTList * conversion[2];
817: int which_list;
1.25 frystyk 818: float best_quality = -1e30; /* Pretty bad! */
1.29 frystyk 819: HTPresentation *pres, *match, *best_match=0;
1.14 timbl 820:
1.2 timbl 821: if (TRACE) fprintf(stderr,
1.39 frystyk 822: "StreamStack. Constructing stream stack for %s to %s\n",
1.10 timbl 823: HTAtom_name(rep_in),
1.2 timbl 824: HTAtom_name(rep_out));
1.34 luotonen 825:
826: if (guess && rep_in == WWW_UNKNOWN) {
827: CTRACE(stderr, "Returning... guessing stream\n");
1.40 ! frystyk 828: request->error_block = YES; /* No more error output to stream */
1.34 luotonen 829: return HTGuess_new(request);
830: }
831:
1.21 luotonen 832: if (rep_out == WWW_SOURCE || rep_out == rep_in)
833: return request->output_stream;
1.2 timbl 834:
1.14 timbl 835: conversion[0] = request->conversions;
836: conversion[1] = HTConversions;
1.17 luotonen 837:
1.15 luotonen 838: for(which_list = 0; which_list<2; which_list++) {
839: HTList * cur = conversion[which_list];
840:
841: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
1.25 frystyk 842: if ((pres->rep == rep_in || wild_match(pres->rep, rep_in)) &&
1.33 luotonen 843: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
844: if (!best_match ||
845: better_match(pres->rep, best_match->rep) ||
846: (!better_match(best_match->rep, pres->rep) &&
847: pres->quality > best_quality)) {
1.25 frystyk 848: best_match = pres;
849: best_quality = pres->quality;
1.10 timbl 850: }
851: }
1.33 luotonen 852:
1.29 frystyk 853: #ifdef OLD_CODE
854: /* This case is now included in the best_match loop */
1.25 frystyk 855: /* Special case when input format is 'www/source' */
1.10 timbl 856: if (pres->rep == source) {
1.29 frystyk 857: if (pres->rep_out == rep_out ||
858: wild_match(pres->rep_out, rep_out))
1.10 timbl 859: source_match = pres;
1.2 timbl 860: }
1.29 frystyk 861: #endif
1.2 timbl 862: }
863: }
1.33 luotonen 864:
1.29 frystyk 865: match = best_match ? best_match : NULL;
866: if (match) {
1.40 ! frystyk 867: request->error_block = YES; /* No more error output to stream */
1.29 frystyk 868: if (match->rep == WWW_SOURCE) {
1.39 frystyk 869: if (TRACE) fprintf(stderr, "StreamStack. Don't know how to handle this, so put out %s to %s\n",
1.29 frystyk 870: HTAtom_name(match->rep),
871: HTAtom_name(rep_out));
872: }
873: return (*match->converter)(
1.25 frystyk 874: request, match->command, rep_in, rep_out,
875: request->output_stream);
1.29 frystyk 876: }
1.2 timbl 877: return NULL;
878: }
879:
880:
881: /* Find the cost of a filter stack
882: ** -------------------------------
883: **
884: ** Must return the cost of the same stack which StreamStack would set up.
885: **
886: ** On entry,
887: ** length The size of the data to be converted
888: */
1.12 timbl 889: PUBLIC float HTStackValue ARGS5(
1.14 timbl 890: HTList *, theseConversions,
1.10 timbl 891: HTFormat, rep_in,
1.2 timbl 892: HTFormat, rep_out,
893: float, initial_value,
894: long int, length)
895: {
1.14 timbl 896: int which_list;
897: HTList* conversion[2];
898:
1.2 timbl 899: if (TRACE) fprintf(stderr,
1.39 frystyk 900: "StackValue.. Evaluating stream stack for %s worth %.3f to %s\n",
1.10 timbl 901: HTAtom_name(rep_in), initial_value,
1.2 timbl 902: HTAtom_name(rep_out));
903:
904: if (rep_out == WWW_SOURCE ||
1.10 timbl 905: rep_out == rep_in) return 0.0;
1.2 timbl 906:
1.12 timbl 907: /* if (!HTPresentations) HTFormatInit(); set up the list */
1.2 timbl 908:
1.14 timbl 909: conversion[0] = theseConversions;
910: conversion[1] = HTConversions;
911:
912: for(which_list = 0; which_list<2; which_list++)
913: if (conversion[which_list]) {
1.15 luotonen 914: HTList * cur = conversion[which_list];
1.2 timbl 915: HTPresentation * pres;
1.15 luotonen 916: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
917: if (pres->rep == rep_in &&
1.17 luotonen 918: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
1.2 timbl 919: float value = initial_value * pres->quality;
920: if (HTMaxSecs != 0.0)
1.15 luotonen 921: value = value - (length*pres->secs_per_byte + pres->secs)
1.2 timbl 922: /HTMaxSecs;
923: return value;
924: }
925: }
926: }
927:
928: return -1e30; /* Really bad */
1.17 luotonen 929: }
930:
931:
1.2 timbl 932:
1.1 timbl 933:
1.2 timbl 934: /* Push data from a socket down a stream
935: ** -------------------------------------
1.1 timbl 936: **
1.2 timbl 937: ** This routine is responsible for creating and PRESENTING any
1.1 timbl 938: ** graphic (or other) objects described by the file.
1.2 timbl 939: **
940: ** The file number given is assumed to be a TELNET stream ie containing
941: ** CRLF at the end of lines which need to be stripped to LF for unix
942: ** when the format is textual.
943: **
1.26 luotonen 944: ** RETURNS the number of bytes transferred.
945: **
1.1 timbl 946: */
1.26 luotonen 947: PUBLIC int HTCopy ARGS2(
1.2 timbl 948: int, file_number,
949: HTStream*, sink)
1.1 timbl 950: {
1.2 timbl 951: HTStreamClass targetClass;
1.13 timbl 952: HTInputSocket * isoc;
1.26 luotonen 953: int cnt = 0;
954:
1.5 timbl 955: /* Push the data down the stream
1.2 timbl 956: **
957: */
958: targetClass = *(sink->isa); /* Copy pointers to procedures */
1.13 timbl 959: isoc = HTInputSocket_new(file_number);
1.2 timbl 960:
961: /* Push binary from socket down sink
1.10 timbl 962: **
963: ** This operation could be put into a main event loop
1.2 timbl 964: */
965: for(;;) {
966: int status = NETREAD(
1.13 timbl 967: file_number, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.2 timbl 968: if (status <= 0) {
969: if (status == 0) break;
970: if (TRACE) fprintf(stderr,
1.39 frystyk 971: "Socket Copy. Read error, read returns %d with errno=%d\n",
1.24 luotonen 972: status, errno);
1.2 timbl 973: break;
974: }
1.26 luotonen 975:
1.8 timbl 976: #ifdef NOT_ASCII
977: {
978: char * p;
1.13 timbl 979: for(p = isoc->input_buffer; p < isoc->input_buffer+status; p++) {
1.8 timbl 980: *p = FROMASCII(*p);
981: }
982: }
983: #endif
984:
1.13 timbl 985: (*targetClass.put_block)(sink, isoc->input_buffer, status);
1.26 luotonen 986: cnt += status;
1.2 timbl 987: } /* next bufferload */
1.26 luotonen 988:
1.13 timbl 989: HTInputSocket_free(isoc);
1.26 luotonen 990:
991: return cnt;
1.2 timbl 992: }
993:
1.1 timbl 994:
1.7 secret 995:
996: /* Push data from a file pointer down a stream
997: ** -------------------------------------
998: **
999: ** This routine is responsible for creating and PRESENTING any
1000: ** graphic (or other) objects described by the file.
1001: **
1002: **
1003: */
1004: PUBLIC void HTFileCopy ARGS2(
1005: FILE *, fp,
1006: HTStream*, sink)
1007: {
1008: HTStreamClass targetClass;
1.13 timbl 1009: char input_buffer[INPUT_BUFFER_SIZE];
1.7 secret 1010:
1011: /* Push the data down the stream
1012: **
1013: */
1014: targetClass = *(sink->isa); /* Copy pointers to procedures */
1015:
1016: /* Push binary from socket down sink
1017: */
1018: for(;;) {
1019: int status = fread(
1020: input_buffer, 1, INPUT_BUFFER_SIZE, fp);
1021: if (status == 0) { /* EOF or error */
1022: if (ferror(fp) == 0) break;
1023: if (TRACE) fprintf(stderr,
1.39 frystyk 1024: "File Copy... Read error, read returns %d\n", ferror(fp));
1.7 secret 1025: break;
1026: }
1027: (*targetClass.put_block)(sink, input_buffer, status);
1.13 timbl 1028: } /* next bufferload */
1.7 secret 1029: }
1030:
1031:
1032:
1033:
1.2 timbl 1034: /* Push data from a socket down a stream STRIPPING CR
1035: ** --------------------------------------------------
1036: **
1037: ** This routine is responsible for creating and PRESENTING any
1.8 timbl 1038: ** graphic (or other) objects described by the socket.
1.2 timbl 1039: **
1040: ** The file number given is assumed to be a TELNET stream ie containing
1041: ** CRLF at the end of lines which need to be stripped to LF for unix
1042: ** when the format is textual.
1.37 frystyk 1043: **
1044: ** Character handling is now of type int, Henrik, May 09-94
1.1 timbl 1045: */
1.2 timbl 1046: PUBLIC void HTCopyNoCR ARGS2(
1047: int, file_number,
1048: HTStream*, sink)
1049: {
1.13 timbl 1050: HTStreamClass targetClass;
1051: HTInputSocket * isoc;
1.37 frystyk 1052: int ch;
1.1 timbl 1053:
1.2 timbl 1054: /* Push the data, ignoring CRLF, down the stream
1055: **
1056: */
1057: targetClass = *(sink->isa); /* Copy pointers to procedures */
1058:
1059: /* Push text from telnet socket down sink
1060: **
1061: ** @@@@@ To push strings could be faster? (especially is we
1062: ** cheat and don't ignore CR! :-}
1063: */
1.13 timbl 1064: isoc = HTInputSocket_new(file_number);
1.37 frystyk 1065: while ((ch = HTInputSocket_getCharacter(isoc)) >= 0)
1066: (*targetClass.put_character)(sink, ch);
1.13 timbl 1067: HTInputSocket_free(isoc);
1.2 timbl 1068: }
1.1 timbl 1069:
1.2 timbl 1070:
1.7 secret 1071:
1.2 timbl 1072: /* Parse a socket given format and file number
1073: **
1074: ** This routine is responsible for creating and PRESENTING any
1075: ** graphic (or other) objects described by the file.
1076: **
1077: ** The file number given is assumed to be a TELNET stream ie containing
1078: ** CRLF at the end of lines which need to be stripped to LF for unix
1079: ** when the format is textual.
1080: **
1081: */
1.14 timbl 1082:
1.12 timbl 1083: PUBLIC int HTParseSocket ARGS3(
1.10 timbl 1084: HTFormat, rep_in,
1.2 timbl 1085: int, file_number,
1.12 timbl 1086: HTRequest *, request)
1.2 timbl 1087: {
1088: HTStream * stream;
1089: HTStreamClass targetClass;
1.1 timbl 1090:
1.40 ! frystyk 1091: if (request->error_stack) {
! 1092: if (TRACE) fprintf(stderr, "ParseSocket. Called whith non-empty error stack, so I return right away!\n");
! 1093: return -1;
! 1094: }
! 1095:
1.34 luotonen 1096: stream = HTStreamStack(rep_in, request, YES);
1.29 frystyk 1097:
1.2 timbl 1098: if (!stream) {
1.30 frystyk 1099: char buffer[1024]; /* @@@@@@@@ */
1.2 timbl 1100: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12 timbl 1101: HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.39 frystyk 1102: if (TRACE) fprintf(stderr, "ParseSocket. %s\n", buffer);
1.16 luotonen 1103: return HTLoadError(request, 501, buffer);
1.2 timbl 1104: }
1.1 timbl 1105:
1.3 timbl 1106: /* Push the data, ignoring CRLF if necessary, down the stream
1107: **
1.2 timbl 1108: **
1.3 timbl 1109: ** @@ Bug: This decision ought to be made based on "encoding"
1.9 timbl 1110: ** rather than on format. @@@ When we handle encoding.
1.3 timbl 1111: ** The current method smells anyway.
1.2 timbl 1112: */
1113: targetClass = *(stream->isa); /* Copy pointers to procedures */
1.32 luotonen 1114: if (rep_in == WWW_BINARY || rep_in == WWW_UNKNOWN || HTOutputSource
1.26 luotonen 1115: || (request->content_encoding &&
1116: request->content_encoding != HTAtom_for("8bit") &&
1117: request->content_encoding != HTAtom_for("7bit"))
1.10 timbl 1118: || strstr(HTAtom_name(rep_in), "image/")
1119: || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.29 frystyk 1120: HTCopy(file_number, stream);
1.2 timbl 1121: } else { /* ascii text with CRLFs :-( */
1122: HTCopyNoCR(file_number, stream);
1123: }
1.7 secret 1124: (*targetClass.free)(stream);
1125:
1126: return HT_LOADED;
1127: }
1128:
1129:
1130:
1131: /* Parse a file given format and file pointer
1132: **
1133: ** This routine is responsible for creating and PRESENTING any
1134: ** graphic (or other) objects described by the file.
1135: **
1136: ** The file number given is assumed to be a TELNET stream ie containing
1.10 timbl 1137: ** CRLF at the end of lines which need to be stripped to \n for unix
1.7 secret 1138: ** when the format is textual.
1139: **
1140: */
1.12 timbl 1141: PUBLIC int HTParseFile ARGS3(
1.10 timbl 1142: HTFormat, rep_in,
1.7 secret 1143: FILE *, fp,
1.12 timbl 1144: HTRequest *, request)
1.7 secret 1145: {
1146: HTStream * stream;
1147: HTStreamClass targetClass;
1.40 ! frystyk 1148:
! 1149: if (request->error_stack) {
! 1150: if (TRACE) fprintf(stderr, "ParseFile... Called whith non-empty error stack, so I return right away!\n");
! 1151: return -1;
! 1152: }
1.7 secret 1153:
1.34 luotonen 1154: stream = HTStreamStack(rep_in, request, YES);
1.7 secret 1155:
1156: if (!stream) {
1.30 frystyk 1157: char buffer[1024]; /* @@@@@@@@ */
1.7 secret 1158: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12 timbl 1159: HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.39 frystyk 1160: if (TRACE) fprintf(stderr, "ParseFile... %s\n", buffer);
1.29 frystyk 1161: return HTLoadError(request, 501, buffer);
1.7 secret 1162: }
1163:
1.9 timbl 1164: /* Push the data down the stream
1.7 secret 1165: **
1166: **
1167: ** @@ Bug: This decision ought to be made based on "encoding"
1.10 timbl 1168: ** rather than on content-type. @@@ When we handle encoding.
1.7 secret 1169: ** The current method smells anyway.
1170: */
1171: targetClass = *(stream->isa); /* Copy pointers to procedures */
1172: HTFileCopy(fp, stream);
1.2 timbl 1173: (*targetClass.free)(stream);
1.1 timbl 1174:
1.2 timbl 1175: return HT_LOADED;
1.1 timbl 1176: }
1.2 timbl 1177:
1.10 timbl 1178:
1179: /* Converter stream: Network Telnet to internal character text
1180: ** -----------------------------------------------------------
1181: **
1182: ** The input is assumed to be in ASCII, with lines delimited
1183: ** by (13,10) pairs, These pairs are converted into (CR,LF)
1184: ** pairs in the local representation. The (CR,LF) sequence
1185: ** when found is changed to a '\n' character, the internal
1186: ** C representation of a new line.
1187: */
1188:
1189:
1.11 timbl 1190: PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char)
1.10 timbl 1191: {
1192: char c = FROMASCII(net_char);
1193: if (me->had_cr) {
1194: if (c==LF) {
1195: me->sink->isa->put_character(me->sink, '\n'); /* Newline */
1196: me->had_cr = NO;
1197: return;
1198: } else {
1199: me->sink->isa->put_character(me->sink, CR); /* leftover */
1200: }
1201: }
1202: me->had_cr = (c==CR);
1203: if (!me->had_cr)
1204: me->sink->isa->put_character(me->sink, c); /* normal */
1205: }
1206:
1.11 timbl 1207: PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s)
1.10 timbl 1208: {
1209: CONST char * p;
1210: for(p=s; *p; p++) NetToText_put_character(me, *p);
1211: }
1212:
1.11 timbl 1213: PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l)
1.10 timbl 1214: {
1215: CONST char * p;
1216: for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
1217: }
1218:
1219: PRIVATE void NetToText_free ARGS1(HTStream *, me)
1220: {
1221: me->sink->isa->free(me->sink); /* Close rest of pipe */
1222: free(me);
1223: }
1224:
1225: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
1226: {
1227: me->sink->isa->abort(me->sink,e); /* Abort rest of pipe */
1228: free(me);
1229: }
1230:
1231: /* The class structure
1232: */
1233: PRIVATE HTStreamClass NetToTextClass = {
1234: "NetToText",
1235: NetToText_free,
1236: NetToText_abort,
1237: NetToText_put_character,
1238: NetToText_put_string,
1239: NetToText_put_block
1240: };
1241:
1242: /* The creation method
1243: */
1244: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
1245: {
1246: HTStream* me = (HTStream*)malloc(sizeof(*me));
1247: if (me == NULL) outofmem(__FILE__, "NetToText");
1248: me->isa = &NetToTextClass;
1249:
1250: me->had_cr = NO;
1251: me->sink = sink;
1252: return me;
1253: }
1.2 timbl 1254:
1255:
Webmaster