Annotation of libwww/Library/src/HTFormat.c, revision 1.39
1.7 secret 1:
1.1 timbl 2: /* Manage different file formats HTFormat.c
3: ** =============================
4: **
5: ** Bugs:
6: ** Not reentrant.
7: **
8: ** Assumes the incoming stream is ASCII, rather than a local file
9: ** format, and so ALWAYS converts from ASCII on non-ASCII machines.
10: ** Therefore, non-ASCII machines can't read local files.
1.2 timbl 11: **
12: */
13:
1.10 timbl 14:
1.2 timbl 15: /* Implements:
1.1 timbl 16: */
1.2 timbl 17: #include "HTFormat.h"
18:
19: PUBLIC float HTMaxSecs = 1e10; /* No effective limit */
20: PUBLIC float HTMaxLength = 1e10; /* No effective limit */
21:
22: #ifdef unix
23: #ifdef NeXT
24: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
25: #else
26: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n"
27: /* Full pathname would be better! */
28: #endif
29: #endif
30:
1.1 timbl 31:
32: #include "HTUtils.h"
33: #include "tcp.h"
34:
35: #include "HTML.h"
1.12 timbl 36: #include "HTMLPDTD.h"
1.1 timbl 37: #include "HText.h"
1.2 timbl 38: #include "HTAlert.h"
39: #include "HTList.h"
40: #include "HTInit.h"
41: /* Streams and structured streams which we use:
42: */
43: #include "HTFWriter.h"
44: #include "HTPlain.h"
45: #include "SGML.h"
46: #include "HTML.h"
47: #include "HTMLGen.h"
1.34 luotonen 48: #include "HTGuess.h"
49:
1.2 timbl 50:
51: PUBLIC BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */
52:
1.10 timbl 53: #ifdef ORIGINAL
1.2 timbl 54: struct _HTStream {
55: CONST HTStreamClass* isa;
56: /* ... */
57: };
1.10 timbl 58: #endif
59:
60: /* this version used by the NetToText stream */
61: struct _HTStream {
62: CONST HTStreamClass * isa;
63: BOOL had_cr;
64: HTStream * sink;
65: };
1.2 timbl 66:
67:
1.17 luotonen 68: /*
69: ** Accept-Encoding and Accept-Language
70: */
71: typedef struct _HTAcceptNode {
72: HTAtom * atom;
73: float quality;
74: } HTAcceptNode;
75:
76:
77:
78:
1.2 timbl 79: /* Presentation methods
80: ** --------------------
81: */
82:
1.14 timbl 83: PUBLIC HTList * HTConversions = NULL;
1.2 timbl 84:
1.31 frystyk 85: /* -------------------------------------------------------------------------
86: This function replaces the code in HTRequest_delete() in order to keep
87: the data structure hidden (it is NOT a joke!)
88: Henrik 14/03-94
89: ------------------------------------------------------------------------- */
90: PUBLIC void HTFormatDelete ARGS1(HTList *, me)
91: {
92: HTList *cur = me;
93: HTPresentation *pres;
94: if (!me)
95: return;
96: while ((pres = (HTPresentation*) HTList_nextObject(cur))) {
97: FREE(pres->command); /* Leak fixed AL 6 Feb 1994 */
98: free(pres);
99: }
100: HTList_delete(me); /* Leak fixed AL 6 Feb 1994 */
101: }
102:
1.2 timbl 103:
104: /* Define a presentation system command for a content-type
105: ** -------------------------------------------------------
106: */
1.12 timbl 107: PUBLIC void HTSetPresentation ARGS6(
108: HTList *, conversions,
109: CONST char *, representation,
110: CONST char *, command,
111: float, quality,
112: float, secs,
113: float, secs_per_byte
1.2 timbl 114: ){
115:
116: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
117: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
118:
119: pres->rep = HTAtom_for(representation);
120: pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */
121: pres->converter = HTSaveAndExecute; /* Fixed for now ... */
122: pres->quality = quality;
123: pres->secs = secs;
124: pres->secs_per_byte = secs_per_byte;
125: pres->rep = HTAtom_for(representation);
126: pres->command = 0;
127: StrAllocCopy(pres->command, command);
128:
1.12 timbl 129: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 130:
1.15 luotonen 131: #ifdef OLD_CODE
132: if (strcmp(representation, "*")==0) {
1.2 timbl 133: if (default_presentation) free(default_presentation);
134: default_presentation = pres;
1.12 timbl 135: } else
136: #endif
137: HTList_addObject(conversions, pres);
1.2 timbl 138: }
139:
140:
141: /* Define a built-in function for a content-type
142: ** ---------------------------------------------
143: */
1.12 timbl 144: PUBLIC void HTSetConversion ARGS7(
145: HTList *, conversions,
146: CONST char *, representation_in,
147: CONST char *, representation_out,
1.6 timbl 148: HTConverter*, converter,
1.12 timbl 149: float, quality,
150: float, secs,
151: float, secs_per_byte
1.2 timbl 152: ){
1.1 timbl 153:
1.2 timbl 154: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
155: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
156:
157: pres->rep = HTAtom_for(representation_in);
158: pres->rep_out = HTAtom_for(representation_out);
159: pres->converter = converter;
160: pres->command = NULL; /* Fixed */
161: pres->quality = quality;
162: pres->secs = secs;
163: pres->secs_per_byte = secs_per_byte;
164: pres->command = 0;
165:
1.12 timbl 166: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 167:
1.12 timbl 168: #ifdef OLD_CODE
1.2 timbl 169: if (strcmp(representation_in, "*")==0) {
170: if (default_presentation) free(default_presentation);
171: default_presentation = pres;
1.12 timbl 172: } else
173: #endif
174: HTList_addObject(conversions, pres);
1.2 timbl 175: }
1.1 timbl 176:
177:
178:
1.17 luotonen 179: PUBLIC void HTAcceptEncoding ARGS3(HTList *, list,
180: char *, enc,
181: float, quality)
182: {
183: HTAcceptNode * node;
184: char * cur;
185:
186: if (!list || !enc || !*enc) return;
187:
188: for(cur=enc; *cur; cur++) *cur=TOLOWER(*cur);
189:
190: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
191: if (!node) outofmem(__FILE__, "HTAcceptEncoding");
192: HTList_addObject(list, (void*)node);
193:
194: node->atom = HTAtom_for(enc);
195: node->quality = quality;
196: }
197:
198:
199: PUBLIC void HTAcceptLanguage ARGS3(HTList *, list,
200: char *, lang,
201: float, quality)
202: {
203: HTAcceptNode * node;
204:
205: if (!list || !lang || !*lang) return;
206:
207: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
208: if (!node) outofmem(__FILE__, "HTAcceptLanguage");
209:
210: HTList_addObject(list, (void*)node);
211: node->atom = HTAtom_for(lang);
212: node->quality = quality;
213: }
214:
215:
216: PRIVATE BOOL wild_match ARGS2(HTAtom *, template,
217: HTAtom *, actual)
218: {
219: char *t, *a, *st, *sa;
220: BOOL match = NO;
221:
1.22 luotonen 222: if (template && actual && (t = HTAtom_name(template))) {
223: if (!strcmp(t, "*"))
224: return YES;
1.17 luotonen 225:
1.22 luotonen 226: if (strchr(t, '*') &&
227: (a = HTAtom_name(actual)) &&
228: (st = strchr(t, '/')) && (sa = strchr(a,'/'))) {
1.17 luotonen 229:
1.22 luotonen 230: *sa = 0;
231: *st = 0;
232:
233: if ((*(st-1)=='*' &&
234: (*(st+1)=='*' || !strcasecomp(st+1, sa+1))) ||
235: (*(st+1)=='*' && !strcasecomp(t,a)))
236: match = YES;
237:
238: *sa = '/';
239: *st = '/';
240: }
241: }
1.23 luotonen 242: return match;
1.17 luotonen 243: }
244:
1.36 luotonen 245: /*
246: * Added by takada@seraph.ntt.jp (94/04/08)
247: */
248: PRIVATE BOOL lang_match ARGS2(HTAtom *, template,
249: HTAtom *, actual)
250: {
251: char *t, *a, *st, *sa;
252: BOOL match = NO;
253:
254: if (template && actual &&
255: (t = HTAtom_name(template)) && (a = HTAtom_name(actual))) {
256: st = strchr(t, '_');
257: sa = strchr(a, '_');
258: if ((st != NULL) && (sa != NULL)) {
259: if (!strcasecomp(t, a))
260: match = YES;
261: else
262: match = NO;
263: }
264: else {
265: if (st != NULL) *st = 0;
266: if (sa != NULL) *sa = 0;
267: if (!strcasecomp(t, a))
268: match = YES;
269: else
270: match = NO;
271: if (st != NULL) *st = '_';
272: if (sa != NULL) *sa = '_';
273: }
274: }
275: return match;
276: }
277: /* end of addition */
278:
279:
1.17 luotonen 280:
281: PRIVATE float type_value ARGS2(HTAtom *, content_type,
282: HTList *, accepted)
283: {
284: HTList * cur = accepted;
285: HTPresentation * pres;
286: HTPresentation * wild = NULL;
287:
288: if (!content_type || !accepted) return -1;
289:
290: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
291: if (pres->rep == content_type)
292: return pres->quality;
293: else if (wild_match(pres->rep, content_type))
294: wild = pres;
295: }
296: if (wild) return wild->quality;
297: else return -1;
298: }
299:
300:
301: PRIVATE float lang_value ARGS2(HTAtom *, language,
302: HTList *, accepted)
303: {
304: HTList * cur = accepted;
305: HTAcceptNode * node;
306: HTAcceptNode * wild = NULL;
307:
308: if (!language || !accepted || HTList_isEmpty(accepted)) {
309: return 0.1;
310: }
311:
312: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
313: if (node->atom == language) {
314: return node->quality;
315: }
1.36 luotonen 316: /*
317: * patch by takada@seraph.ntt.jp (94/04/08)
318: * the original line was
319: * else if (wild_match(node->atom, language)) {
320: * and the new line is
321: */
322: else if (lang_match(node->atom, language)) {
1.17 luotonen 323: wild = node;
324: }
325: }
326:
327: if (wild) {
328: return wild->quality;
329: }
330: else {
331: return 0.1;
332: }
333: }
334:
335:
336: PRIVATE float encoding_value ARGS2(HTAtom *, encoding,
337: HTList *, accepted)
338: {
339: HTList * cur = accepted;
340: HTAcceptNode * node;
341: HTAcceptNode * wild = NULL;
342: char * e;
343:
344: if (!encoding || !accepted || HTList_isEmpty(accepted))
345: return 1;
346:
347: e = HTAtom_name(encoding);
348: if (!strcmp(e, "7bit") || !strcmp(e, "8bit") || !strcmp(e, "binary"))
349: return 1;
350:
351: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
352: if (node->atom == encoding)
353: return node->quality;
354: else if (wild_match(node->atom, encoding))
355: wild = node;
356: }
357: if (wild) return wild->quality;
358: else return 1;
359: }
360:
361:
362: PUBLIC BOOL HTRank ARGS4(HTList *, possibilities,
363: HTList *, accepted_content_types,
364: HTList *, accepted_languages,
365: HTList *, accepted_encodings)
366: {
367: int accepted_cnt = 0;
368: HTList * accepted;
369: HTList * sorted;
370: HTList * cur;
371: HTContentDescription * d;
372:
373: if (!possibilities) return NO;
374:
375: accepted = HTList_new();
376: cur = possibilities;
377: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
378: float tv = type_value(d->content_type, accepted_content_types);
379: float lv = lang_value(d->content_language, accepted_languages);
380: float ev = encoding_value(d->content_encoding, accepted_encodings);
381:
382: if (tv > 0) {
383: d->quality *= tv * lv * ev;
384: HTList_addObject(accepted, d);
385: accepted_cnt++;
386: }
1.18 luotonen 387: else {
388: if (d->filename) free(d->filename);
389: free(d);
390: }
1.17 luotonen 391: }
392:
1.18 luotonen 393: CTRACE(stderr, "Ranking.....\n");
1.17 luotonen 394: CTRACE(stderr,
1.18 luotonen 395: "\nRANK QUALITY CONTENT-TYPE LANGUAGE ENCODING FILE\n");
1.17 luotonen 396:
397: sorted = HTList_new();
398: while (accepted_cnt-- > 0) {
399: HTContentDescription * worst = NULL;
400: cur = accepted;
401: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
402: if (!worst || d->quality < worst->quality)
403: worst = d;
404: }
405: if (worst) {
406: CTRACE(stderr, "%d. %.4f %-20.20s %-8.8s %-10.10s %s\n",
407: accepted_cnt+1,
408: worst->quality,
409: (worst->content_type
410: ? HTAtom_name(worst->content_type) : "-"),
411: (worst->content_language
412: ? HTAtom_name(worst->content_language) :"-"),
413: (worst->content_encoding
414: ? HTAtom_name(worst->content_encoding) :"-"),
415: (worst->filename
416: ? worst->filename :"-"));
417: HTList_removeObject(accepted, (void*)worst);
418: HTList_addObject(sorted, (void*)worst);
419: }
420: }
1.18 luotonen 421: CTRACE(stderr, "\n");
1.17 luotonen 422: HTList_delete(accepted);
423: HTList_delete(possibilities->next);
424: possibilities->next = sorted->next;
425: sorted->next = NULL;
426: HTList_delete(sorted);
427:
428: if (!HTList_isEmpty(possibilities)) return YES;
429: else return NO;
430: }
431:
432:
433:
434:
435:
1.13 timbl 436: /* Socket Input Buffering
437: ** ----------------------
1.1 timbl 438: **
1.13 timbl 439: ** This code is used because one cannot in general open a
440: ** file descriptor for a socket.
441: **
1.1 timbl 442: ** The input file is read using the macro which can read from
1.13 timbl 443: ** a socket or a file, but this should not be used for files
444: ** as fopen() etc is more portable of course.
445: **
1.1 timbl 446: ** The input buffer size, if large will give greater efficiency and
447: ** release the server faster, and if small will save space on PCs etc.
448: */
449:
450:
451: /* Set up the buffering
452: **
453: ** These routines are public because they are in fact needed by
454: ** many parsers, and on PCs and Macs we should not duplicate
455: ** the static buffer area.
456: */
1.13 timbl 457: PUBLIC HTInputSocket * HTInputSocket_new ARGS1 (int,file_number)
1.1 timbl 458: {
1.28 frystyk 459: HTInputSocket *isoc = (HTInputSocket *)calloc(1, sizeof(*isoc));
1.13 timbl 460: if (!isoc) outofmem(__FILE__, "HTInputSocket_new");
461: isoc->input_file_number = file_number;
462: isoc->input_pointer = isoc->input_limit = isoc->input_buffer;
463: return isoc;
1.1 timbl 464: }
465:
1.35 frystyk 466: /* This should return HT_INTERRUPTED if interrupted BUT the connection
467: MUST not be closed */
468: PUBLIC int HTInputSocket_getCharacter ARGS1(HTInputSocket*, isoc)
1.1 timbl 469: {
1.35 frystyk 470: int ch;
1.1 timbl 471: do {
1.13 timbl 472: if (isoc-> input_pointer >= isoc->input_limit) {
1.1 timbl 473: int status = NETREAD(
1.13 timbl 474: isoc->input_file_number,
475: isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 476: if (status <= 0) {
1.39 ! frystyk 477: if (status == 0)
! 478: return EOF;
! 479: if (status == HT_INTERRUPTED) {
! 480: if (TRACE)
! 481: fprintf(stderr, "Get Char.... Interrupted in HTInputSocket_getCharacter\n");
! 482: return HT_INTERRUPTED;
! 483: }
! 484: HTInetStatus("read");
! 485: return EOF; /* -1 is returned by UCX at end of HTTP link */
1.1 timbl 486: }
1.35 frystyk 487: isoc->input_pointer = isoc->input_buffer;
1.13 timbl 488: isoc->input_limit = isoc->input_buffer + status;
1.1 timbl 489: }
1.39 ! frystyk 490: ch = (unsigned char) *isoc->input_pointer++;
! 491: } while (ch == 13); /* Ignore ASCII carriage return */
1.1 timbl 492:
493: return FROMASCII(ch);
494: }
495:
1.17 luotonen 496: PUBLIC void HTInputSocket_free ARGS1(HTInputSocket *, me)
1.13 timbl 497: {
498: if (me) free(me);
499: }
500:
501:
1.16 luotonen 502: PUBLIC char * HTInputSocket_getBlock ARGS2(HTInputSocket*, isoc,
503: int *, len)
504: {
505: if (isoc->input_pointer >= isoc->input_limit) {
506: int status = NETREAD(isoc->input_file_number,
507: isoc->input_buffer,
508: ((*len < INPUT_BUFFER_SIZE) ?
509: *len : INPUT_BUFFER_SIZE));
510: if (status <= 0) {
511: isoc->input_limit = isoc->input_buffer;
512: if (status < 0)
1.39 ! frystyk 513: HTInetStatus("read");
1.16 luotonen 514: *len = 0;
515: return NULL;
516: }
517: else {
518: *len = status;
519: return isoc->input_buffer;
520: }
521: }
522: else {
523: char * ret = isoc->input_pointer;
524: *len = isoc->input_limit - isoc->input_pointer;
525: isoc->input_pointer = isoc->input_limit;
526: return ret;
527: }
528: }
529:
530:
1.15 luotonen 531: PRIVATE int fill_in_buffer ARGS1(HTInputSocket *, isoc)
532: {
533: if (isoc) {
534: int status;
535:
536: isoc->input_pointer = isoc->input_buffer;
537: status = NETREAD(isoc->input_file_number,
538: isoc->input_buffer,
539: INPUT_BUFFER_SIZE);
540: if (status <= 0) {
541: isoc->input_limit = isoc->input_buffer;
542: if (status < 0)
1.39 ! frystyk 543: HTInetStatus("read");
1.15 luotonen 544: }
545: else
546: isoc->input_limit = isoc->input_buffer + status;
547: return status;
548: }
549: return -1;
550: }
551:
552:
553: PRIVATE void ascii_cat ARGS3(char **, linep,
554: char *, start,
555: char *, end)
556: {
557: if (linep && start && end && start <= end) {
558: char *ptr;
559:
560: if (*linep) {
561: int len = strlen(*linep);
562: *linep = (char*)realloc(*linep, len + end-start + 1);
563: ptr = *linep + len;
564: }
565: else {
566: ptr = *linep = (char*)malloc(end-start + 1);
567: }
568:
569: while (start < end) {
570: *ptr = FROMASCII(*start);
571: ptr++;
572: start++;
573: }
574: *ptr = 0;
575: }
576: }
577:
578:
579: PRIVATE char * get_some_line ARGS2(HTInputSocket *, isoc,
580: BOOL, unfold)
581: {
582: if (!isoc)
583: return NULL;
584: else {
585: BOOL check_unfold = NO;
586: int prev_cr = 0;
587: char *start = isoc->input_pointer;
588: char *cur = isoc->input_pointer;
589: char * line = NULL;
590:
591: for(;;) {
592: /*
593: ** Get more if needed to complete line
594: */
595: if (cur >= isoc->input_limit) { /* Need more data */
596: ascii_cat(&line, start, cur);
597: if (fill_in_buffer(isoc) <= 0)
598: return line;
599: start = cur = isoc->input_pointer;
600: } /* if need more data */
601:
602: /*
603: ** Find a line feed if there is one
604: */
605: for(; cur < isoc->input_limit; cur++) {
606: char c = FROMASCII(*cur);
607: if (!c) {
1.18 luotonen 608: if (line) free(line); /* Leak fixed AL 6 Feb 94 */
1.15 luotonen 609: return NULL; /* Panic! read a 0! */
610: }
611: if (check_unfold && c != ' ' && c != '\t') {
612: return line; /* Note: didn't update isoc->input_pointer */
613: }
614: else {
615: check_unfold = NO;
616: }
617:
618: if (c=='\r') {
619: prev_cr = 1;
620: }
621: else {
622: if (c=='\n') { /* Found a line feed */
623: ascii_cat(&line, start, cur-prev_cr);
624: start = isoc->input_pointer = cur+1;
625:
626: if (line && strlen(line) > 0 && unfold) {
627: check_unfold = YES;
628: }
629: else {
630: return line;
631: }
632: } /* if NL */
633: /* else just a regular character */
634: prev_cr = 0;
635: } /* if not CR */
636: } /* while characters in buffer remain */
637: } /* until line read or end-of-file */
638: } /* valid parameters to function */
639: }
640:
641:
642: PUBLIC char * HTInputSocket_getLine ARGS1(HTInputSocket *, isoc)
643: {
644: return get_some_line(isoc, NO);
645: }
646:
647: PUBLIC char * HTInputSocket_getUnfoldedLine ARGS1(HTInputSocket *, isoc)
648: {
649: return get_some_line(isoc, YES);
650: }
651:
652:
653: /*
654: ** Read HTTP status line (if there is one).
655: **
656: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
657: ** First look at the stub in ASCII and check if it starts "HTTP/".
658: **
659: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
660: ** will be taken as a HTTP 1.0 server. Failure.
661: */
662: #define STUB_LENGTH 20
663: PUBLIC char * HTInputSocket_getStatusLine ARGS1(HTInputSocket *, isoc)
664: {
665: if (!isoc) {
666: return NULL;
667: }
668: else {
669: char buf[STUB_LENGTH + 1];
670: int i;
671: char server_version[STUB_LENGTH+1];
672: int server_status;
673:
674: /*
675: ** Read initial buffer
676: */
677: if (isoc->input_pointer >= isoc->input_limit &&
678: fill_in_buffer(isoc) <= 0) {
679: return NULL;
680: }
681:
682: for (i=0; i < STUB_LENGTH; i++)
683: buf[i] = FROMASCII(isoc->input_buffer[i]);
684: buf[STUB_LENGTH] = 0;
685:
686: if (0 != strncmp(buf, "HTTP/", 5) ||
687: sscanf(buf, "%20s%d", server_version, &server_status) < 2)
688: return NULL;
689: else
690: return get_some_line(isoc, NO);
691: }
692: }
693:
694:
695: /*
696: ** Do heuristic test to see if this is binary.
697: **
698: ** We check for characters above 128 in the first few bytes, and
699: ** if we find them we forget the html default.
700: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
701: **
702: ** Bugs: An HTTP 0.9 server returning a binary document with
703: ** characters < 128 will be read as ASCII.
704: */
705: PUBLIC BOOL HTInputSocket_seemsBinary ARGS1(HTInputSocket *, isoc)
706: {
707: if (isoc &&
708: (isoc->input_pointer < isoc->input_limit ||
709: fill_in_buffer(isoc) > 0)) {
710: char *p = isoc->input_buffer;
711: int i = STUB_LENGTH;
712:
713: for( ; i && p < isoc->input_limit; p++, i++)
714: if (((int)*p)&128)
715: return YES;
716: }
717: return NO;
718: }
719:
720:
721:
1.1 timbl 722: /* Stream the data to an ouput file as binary
723: */
1.38 luotonen 724: PUBLIC int HTOutputBinary ARGS3(HTInputSocket *,isoc,
1.13 timbl 725: int, input,
726: FILE *, output)
1.1 timbl 727: {
728: do {
729: int status = NETREAD(
1.13 timbl 730: input, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 731: if (status <= 0) {
732: if (status == 0) return 0;
733: if (TRACE) fprintf(stderr,
1.39 ! frystyk 734: "Out Binary.. Socket read error %d\n", status);
1.1 timbl 735: return 2; /* Error */
736: }
1.13 timbl 737: fwrite(isoc->input_buffer, sizeof(char), status, output);
1.1 timbl 738: } while (YES);
739: }
740:
1.38 luotonen 741:
742: /*
743: * Normal HTTP headers are never bigger than 2K.
744: */
745: #define S_BUFFER_SIZE 2000
746:
747: PUBLIC void HTInputSocket_startBuffering ARGS1(HTInputSocket *, isoc)
748: {
749: if (isoc) {
750: isoc->s_do_buffering = YES;
751: if (!isoc->s_buffer) {
752: isoc->s_buffer = (char*)malloc(S_BUFFER_SIZE + 1);
753: isoc->s_buffer_size = S_BUFFER_SIZE;
754: }
755: isoc->s_buffer_cur = isoc->s_buffer;
756: }
757: }
758:
759: PUBLIC void HTInputSocket_stopBuffering ARGS1(HTInputSocket *, isoc)
760: {
761: if (isoc) {
762: isoc->s_do_buffering = NO;
763: if (isoc->s_buffer_cur)
764: *isoc->s_buffer_cur = 0;
765: }
766: }
767:
768: PUBLIC int HTInputSocket_getBuffer ARGS2(HTInputSocket *, isoc,
769: char **, buffer_ptr)
770: {
771: if (!isoc || !isoc->s_buffer || !isoc->s_buffer_cur)
772: return 0;
773: else {
774: *isoc->s_buffer_cur = 0;
775: if (buffer_ptr)
776: *buffer_ptr = isoc->s_buffer;
777: return (int) (isoc->s_buffer_cur - isoc->s_buffer);
778: }
779: }
1.1 timbl 780:
1.33 luotonen 781: PRIVATE BOOL better_match ARGS2(HTFormat, f,
782: HTFormat, g)
783: {
784: CONST char *p, *q;
785:
786: if (f && g && (p = HTAtom_name(f)) && (q = HTAtom_name(g))) {
787: int i,j;
788: for(i=0 ; *p; p++) if (*p == '*') i++;
789: for(j=0 ; *q; q++) if (*q == '*') j++;
790: if (i < j) return YES;
791: }
792: return NO;
793: }
794:
1.17 luotonen 795:
1.2 timbl 796: /* Create a filter stack
797: ** ---------------------
798: **
1.7 secret 799: ** If a wildcard match is made, a temporary HTPresentation
1.2 timbl 800: ** structure is made to hold the destination format while the
801: ** new stack is generated. This is just to pass the out format to
802: ** MIME so far. Storing the format of a stream in the stream might
803: ** be a lot neater.
1.10 timbl 804: **
1.29 frystyk 805: ** The star/star format is special, in that if you can take
1.10 timbl 806: ** that you can take anything. However, we
1.2 timbl 807: */
1.34 luotonen 808: PUBLIC HTStream * HTStreamStack ARGS3(HTFormat, rep_in,
809: HTRequest *, request,
810: BOOL, guess)
1.2 timbl 811: {
1.12 timbl 812: HTFormat rep_out = request->output_format; /* Could be a param */
1.14 timbl 813: HTList * conversion[2];
814: int which_list;
1.25 frystyk 815: float best_quality = -1e30; /* Pretty bad! */
1.29 frystyk 816: HTPresentation *pres, *match, *best_match=0;
1.14 timbl 817:
1.2 timbl 818: if (TRACE) fprintf(stderr,
1.39 ! frystyk 819: "StreamStack. Constructing stream stack for %s to %s\n",
1.10 timbl 820: HTAtom_name(rep_in),
1.2 timbl 821: HTAtom_name(rep_out));
1.34 luotonen 822:
823: if (guess && rep_in == WWW_UNKNOWN) {
824: CTRACE(stderr, "Returning... guessing stream\n");
825: return HTGuess_new(request);
826: }
827:
1.21 luotonen 828: if (rep_out == WWW_SOURCE || rep_out == rep_in)
829: return request->output_stream;
1.2 timbl 830:
1.14 timbl 831: conversion[0] = request->conversions;
832: conversion[1] = HTConversions;
1.17 luotonen 833:
1.15 luotonen 834: for(which_list = 0; which_list<2; which_list++) {
835: HTList * cur = conversion[which_list];
836:
837: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
1.25 frystyk 838: if ((pres->rep == rep_in || wild_match(pres->rep, rep_in)) &&
1.33 luotonen 839: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
840: if (!best_match ||
841: better_match(pres->rep, best_match->rep) ||
842: (!better_match(best_match->rep, pres->rep) &&
843: pres->quality > best_quality)) {
1.25 frystyk 844: best_match = pres;
845: best_quality = pres->quality;
1.10 timbl 846: }
847: }
1.33 luotonen 848:
1.29 frystyk 849: #ifdef OLD_CODE
850: /* This case is now included in the best_match loop */
1.25 frystyk 851: /* Special case when input format is 'www/source' */
1.10 timbl 852: if (pres->rep == source) {
1.29 frystyk 853: if (pres->rep_out == rep_out ||
854: wild_match(pres->rep_out, rep_out))
1.10 timbl 855: source_match = pres;
1.2 timbl 856: }
1.29 frystyk 857: #endif
1.2 timbl 858: }
859: }
1.33 luotonen 860:
1.29 frystyk 861: match = best_match ? best_match : NULL;
862: if (match) {
863: if (match->rep == WWW_SOURCE) {
1.39 ! frystyk 864: if (TRACE) fprintf(stderr, "StreamStack. Don't know how to handle this, so put out %s to %s\n",
1.29 frystyk 865: HTAtom_name(match->rep),
866: HTAtom_name(rep_out));
867: }
868: return (*match->converter)(
1.25 frystyk 869: request, match->command, rep_in, rep_out,
870: request->output_stream);
1.29 frystyk 871: }
1.2 timbl 872: return NULL;
873: }
874:
875:
876: /* Find the cost of a filter stack
877: ** -------------------------------
878: **
879: ** Must return the cost of the same stack which StreamStack would set up.
880: **
881: ** On entry,
882: ** length The size of the data to be converted
883: */
1.12 timbl 884: PUBLIC float HTStackValue ARGS5(
1.14 timbl 885: HTList *, theseConversions,
1.10 timbl 886: HTFormat, rep_in,
1.2 timbl 887: HTFormat, rep_out,
888: float, initial_value,
889: long int, length)
890: {
1.14 timbl 891: int which_list;
892: HTList* conversion[2];
893:
1.2 timbl 894: if (TRACE) fprintf(stderr,
1.39 ! frystyk 895: "StackValue.. Evaluating stream stack for %s worth %.3f to %s\n",
1.10 timbl 896: HTAtom_name(rep_in), initial_value,
1.2 timbl 897: HTAtom_name(rep_out));
898:
899: if (rep_out == WWW_SOURCE ||
1.10 timbl 900: rep_out == rep_in) return 0.0;
1.2 timbl 901:
1.12 timbl 902: /* if (!HTPresentations) HTFormatInit(); set up the list */
1.2 timbl 903:
1.14 timbl 904: conversion[0] = theseConversions;
905: conversion[1] = HTConversions;
906:
907: for(which_list = 0; which_list<2; which_list++)
908: if (conversion[which_list]) {
1.15 luotonen 909: HTList * cur = conversion[which_list];
1.2 timbl 910: HTPresentation * pres;
1.15 luotonen 911: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
912: if (pres->rep == rep_in &&
1.17 luotonen 913: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
1.2 timbl 914: float value = initial_value * pres->quality;
915: if (HTMaxSecs != 0.0)
1.15 luotonen 916: value = value - (length*pres->secs_per_byte + pres->secs)
1.2 timbl 917: /HTMaxSecs;
918: return value;
919: }
920: }
921: }
922:
923: return -1e30; /* Really bad */
1.17 luotonen 924: }
925:
926:
1.2 timbl 927:
1.1 timbl 928:
1.2 timbl 929: /* Push data from a socket down a stream
930: ** -------------------------------------
1.1 timbl 931: **
1.2 timbl 932: ** This routine is responsible for creating and PRESENTING any
1.1 timbl 933: ** graphic (or other) objects described by the file.
1.2 timbl 934: **
935: ** The file number given is assumed to be a TELNET stream ie containing
936: ** CRLF at the end of lines which need to be stripped to LF for unix
937: ** when the format is textual.
938: **
1.26 luotonen 939: ** RETURNS the number of bytes transferred.
940: **
1.1 timbl 941: */
1.26 luotonen 942: PUBLIC int HTCopy ARGS2(
1.2 timbl 943: int, file_number,
944: HTStream*, sink)
1.1 timbl 945: {
1.2 timbl 946: HTStreamClass targetClass;
1.13 timbl 947: HTInputSocket * isoc;
1.26 luotonen 948: int cnt = 0;
949:
1.5 timbl 950: /* Push the data down the stream
1.2 timbl 951: **
952: */
953: targetClass = *(sink->isa); /* Copy pointers to procedures */
1.13 timbl 954: isoc = HTInputSocket_new(file_number);
1.2 timbl 955:
956: /* Push binary from socket down sink
1.10 timbl 957: **
958: ** This operation could be put into a main event loop
1.2 timbl 959: */
960: for(;;) {
961: int status = NETREAD(
1.13 timbl 962: file_number, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.2 timbl 963: if (status <= 0) {
964: if (status == 0) break;
965: if (TRACE) fprintf(stderr,
1.39 ! frystyk 966: "Socket Copy. Read error, read returns %d with errno=%d\n",
1.24 luotonen 967: status, errno);
1.2 timbl 968: break;
969: }
1.26 luotonen 970:
1.8 timbl 971: #ifdef NOT_ASCII
972: {
973: char * p;
1.13 timbl 974: for(p = isoc->input_buffer; p < isoc->input_buffer+status; p++) {
1.8 timbl 975: *p = FROMASCII(*p);
976: }
977: }
978: #endif
979:
1.13 timbl 980: (*targetClass.put_block)(sink, isoc->input_buffer, status);
1.26 luotonen 981: cnt += status;
1.2 timbl 982: } /* next bufferload */
1.26 luotonen 983:
1.13 timbl 984: HTInputSocket_free(isoc);
1.26 luotonen 985:
986: return cnt;
1.2 timbl 987: }
988:
1.1 timbl 989:
1.7 secret 990:
991: /* Push data from a file pointer down a stream
992: ** -------------------------------------
993: **
994: ** This routine is responsible for creating and PRESENTING any
995: ** graphic (or other) objects described by the file.
996: **
997: **
998: */
999: PUBLIC void HTFileCopy ARGS2(
1000: FILE *, fp,
1001: HTStream*, sink)
1002: {
1003: HTStreamClass targetClass;
1.13 timbl 1004: char input_buffer[INPUT_BUFFER_SIZE];
1.7 secret 1005:
1006: /* Push the data down the stream
1007: **
1008: */
1009: targetClass = *(sink->isa); /* Copy pointers to procedures */
1010:
1011: /* Push binary from socket down sink
1012: */
1013: for(;;) {
1014: int status = fread(
1015: input_buffer, 1, INPUT_BUFFER_SIZE, fp);
1016: if (status == 0) { /* EOF or error */
1017: if (ferror(fp) == 0) break;
1018: if (TRACE) fprintf(stderr,
1.39 ! frystyk 1019: "File Copy... Read error, read returns %d\n", ferror(fp));
1.7 secret 1020: break;
1021: }
1022: (*targetClass.put_block)(sink, input_buffer, status);
1.13 timbl 1023: } /* next bufferload */
1.7 secret 1024: }
1025:
1026:
1027:
1028:
1.2 timbl 1029: /* Push data from a socket down a stream STRIPPING CR
1030: ** --------------------------------------------------
1031: **
1032: ** This routine is responsible for creating and PRESENTING any
1.8 timbl 1033: ** graphic (or other) objects described by the socket.
1.2 timbl 1034: **
1035: ** The file number given is assumed to be a TELNET stream ie containing
1036: ** CRLF at the end of lines which need to be stripped to LF for unix
1037: ** when the format is textual.
1.37 frystyk 1038: **
1039: ** Character handling is now of type int, Henrik, May 09-94
1.1 timbl 1040: */
1.2 timbl 1041: PUBLIC void HTCopyNoCR ARGS2(
1042: int, file_number,
1043: HTStream*, sink)
1044: {
1.13 timbl 1045: HTStreamClass targetClass;
1046: HTInputSocket * isoc;
1.37 frystyk 1047: int ch;
1.1 timbl 1048:
1.2 timbl 1049: /* Push the data, ignoring CRLF, down the stream
1050: **
1051: */
1052: targetClass = *(sink->isa); /* Copy pointers to procedures */
1053:
1054: /* Push text from telnet socket down sink
1055: **
1056: ** @@@@@ To push strings could be faster? (especially is we
1057: ** cheat and don't ignore CR! :-}
1058: */
1.13 timbl 1059: isoc = HTInputSocket_new(file_number);
1.37 frystyk 1060: while ((ch = HTInputSocket_getCharacter(isoc)) >= 0)
1061: (*targetClass.put_character)(sink, ch);
1.13 timbl 1062: HTInputSocket_free(isoc);
1.2 timbl 1063: }
1.1 timbl 1064:
1.2 timbl 1065:
1.7 secret 1066:
1.2 timbl 1067: /* Parse a socket given format and file number
1068: **
1069: ** This routine is responsible for creating and PRESENTING any
1070: ** graphic (or other) objects described by the file.
1071: **
1072: ** The file number given is assumed to be a TELNET stream ie containing
1073: ** CRLF at the end of lines which need to be stripped to LF for unix
1074: ** when the format is textual.
1075: **
1076: */
1.14 timbl 1077:
1.12 timbl 1078: PUBLIC int HTParseSocket ARGS3(
1.10 timbl 1079: HTFormat, rep_in,
1.2 timbl 1080: int, file_number,
1.12 timbl 1081: HTRequest *, request)
1.2 timbl 1082: {
1083: HTStream * stream;
1084: HTStreamClass targetClass;
1.1 timbl 1085:
1.34 luotonen 1086: stream = HTStreamStack(rep_in, request, YES);
1.29 frystyk 1087:
1.2 timbl 1088: if (!stream) {
1.30 frystyk 1089: char buffer[1024]; /* @@@@@@@@ */
1.2 timbl 1090: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12 timbl 1091: HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.39 ! frystyk 1092: if (TRACE) fprintf(stderr, "ParseSocket. %s\n", buffer);
1.16 luotonen 1093: return HTLoadError(request, 501, buffer);
1.2 timbl 1094: }
1.1 timbl 1095:
1.3 timbl 1096: /* Push the data, ignoring CRLF if necessary, down the stream
1097: **
1.2 timbl 1098: **
1.3 timbl 1099: ** @@ Bug: This decision ought to be made based on "encoding"
1.9 timbl 1100: ** rather than on format. @@@ When we handle encoding.
1.3 timbl 1101: ** The current method smells anyway.
1.2 timbl 1102: */
1103: targetClass = *(stream->isa); /* Copy pointers to procedures */
1.32 luotonen 1104: if (rep_in == WWW_BINARY || rep_in == WWW_UNKNOWN || HTOutputSource
1.26 luotonen 1105: || (request->content_encoding &&
1106: request->content_encoding != HTAtom_for("8bit") &&
1107: request->content_encoding != HTAtom_for("7bit"))
1.10 timbl 1108: || strstr(HTAtom_name(rep_in), "image/")
1109: || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.29 frystyk 1110: HTCopy(file_number, stream);
1.2 timbl 1111: } else { /* ascii text with CRLFs :-( */
1112: HTCopyNoCR(file_number, stream);
1113: }
1.7 secret 1114: (*targetClass.free)(stream);
1115:
1116: return HT_LOADED;
1117: }
1118:
1119:
1120:
1121: /* Parse a file given format and file pointer
1122: **
1123: ** This routine is responsible for creating and PRESENTING any
1124: ** graphic (or other) objects described by the file.
1125: **
1126: ** The file number given is assumed to be a TELNET stream ie containing
1.10 timbl 1127: ** CRLF at the end of lines which need to be stripped to \n for unix
1.7 secret 1128: ** when the format is textual.
1129: **
1130: */
1.12 timbl 1131: PUBLIC int HTParseFile ARGS3(
1.10 timbl 1132: HTFormat, rep_in,
1.7 secret 1133: FILE *, fp,
1.12 timbl 1134: HTRequest *, request)
1.7 secret 1135: {
1136: HTStream * stream;
1137: HTStreamClass targetClass;
1138:
1.34 luotonen 1139: stream = HTStreamStack(rep_in, request, YES);
1.7 secret 1140:
1141: if (!stream) {
1.30 frystyk 1142: char buffer[1024]; /* @@@@@@@@ */
1.7 secret 1143: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12 timbl 1144: HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.39 ! frystyk 1145: if (TRACE) fprintf(stderr, "ParseFile... %s\n", buffer);
1.29 frystyk 1146: return HTLoadError(request, 501, buffer);
1.7 secret 1147: }
1148:
1.9 timbl 1149: /* Push the data down the stream
1.7 secret 1150: **
1151: **
1152: ** @@ Bug: This decision ought to be made based on "encoding"
1.10 timbl 1153: ** rather than on content-type. @@@ When we handle encoding.
1.7 secret 1154: ** The current method smells anyway.
1155: */
1156: targetClass = *(stream->isa); /* Copy pointers to procedures */
1157: HTFileCopy(fp, stream);
1.2 timbl 1158: (*targetClass.free)(stream);
1.1 timbl 1159:
1.2 timbl 1160: return HT_LOADED;
1.1 timbl 1161: }
1.2 timbl 1162:
1.10 timbl 1163:
1164: /* Converter stream: Network Telnet to internal character text
1165: ** -----------------------------------------------------------
1166: **
1167: ** The input is assumed to be in ASCII, with lines delimited
1168: ** by (13,10) pairs, These pairs are converted into (CR,LF)
1169: ** pairs in the local representation. The (CR,LF) sequence
1170: ** when found is changed to a '\n' character, the internal
1171: ** C representation of a new line.
1172: */
1173:
1174:
1.11 timbl 1175: PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char)
1.10 timbl 1176: {
1177: char c = FROMASCII(net_char);
1178: if (me->had_cr) {
1179: if (c==LF) {
1180: me->sink->isa->put_character(me->sink, '\n'); /* Newline */
1181: me->had_cr = NO;
1182: return;
1183: } else {
1184: me->sink->isa->put_character(me->sink, CR); /* leftover */
1185: }
1186: }
1187: me->had_cr = (c==CR);
1188: if (!me->had_cr)
1189: me->sink->isa->put_character(me->sink, c); /* normal */
1190: }
1191:
1.11 timbl 1192: PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s)
1.10 timbl 1193: {
1194: CONST char * p;
1195: for(p=s; *p; p++) NetToText_put_character(me, *p);
1196: }
1197:
1.11 timbl 1198: PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l)
1.10 timbl 1199: {
1200: CONST char * p;
1201: for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
1202: }
1203:
1204: PRIVATE void NetToText_free ARGS1(HTStream *, me)
1205: {
1206: me->sink->isa->free(me->sink); /* Close rest of pipe */
1207: free(me);
1208: }
1209:
1210: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
1211: {
1212: me->sink->isa->abort(me->sink,e); /* Abort rest of pipe */
1213: free(me);
1214: }
1215:
1216: /* The class structure
1217: */
1218: PRIVATE HTStreamClass NetToTextClass = {
1219: "NetToText",
1220: NetToText_free,
1221: NetToText_abort,
1222: NetToText_put_character,
1223: NetToText_put_string,
1224: NetToText_put_block
1225: };
1226:
1227: /* The creation method
1228: */
1229: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
1230: {
1231: HTStream* me = (HTStream*)malloc(sizeof(*me));
1232: if (me == NULL) outofmem(__FILE__, "NetToText");
1233: me->isa = &NetToTextClass;
1234:
1235: me->had_cr = NO;
1236: me->sink = sink;
1237: return me;
1238: }
1.2 timbl 1239:
1240:
Webmaster