Annotation of libwww/Library/src/HTFormat.c, revision 1.41
1.7 secret 1:
1.1 timbl 2: /* Manage different file formats HTFormat.c
3: ** =============================
4: **
5: ** Bugs:
6: ** Not reentrant.
7: **
8: ** Assumes the incoming stream is ASCII, rather than a local file
9: ** format, and so ALWAYS converts from ASCII on non-ASCII machines.
10: ** Therefore, non-ASCII machines can't read local files.
1.2 timbl 11: **
12: */
13:
1.10 timbl 14:
1.2 timbl 15: /* Implements:
1.1 timbl 16: */
1.2 timbl 17: #include "HTFormat.h"
18:
19: PUBLIC float HTMaxSecs = 1e10; /* No effective limit */
20: PUBLIC float HTMaxLength = 1e10; /* No effective limit */
21:
22: #ifdef unix
23: #ifdef NeXT
24: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
25: #else
26: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n"
27: /* Full pathname would be better! */
28: #endif
29: #endif
30:
1.1 timbl 31:
32: #include "HTUtils.h"
33: #include "tcp.h"
34:
35: #include "HTML.h"
1.12 timbl 36: #include "HTMLPDTD.h"
1.1 timbl 37: #include "HText.h"
1.2 timbl 38: #include "HTAlert.h"
39: #include "HTList.h"
40: #include "HTInit.h"
41: /* Streams and structured streams which we use:
42: */
43: #include "HTFWriter.h"
44: #include "HTPlain.h"
45: #include "SGML.h"
46: #include "HTML.h"
47: #include "HTMLGen.h"
1.41 ! frystyk 48: #include "HTTCP.h"
1.34 luotonen 49: #include "HTGuess.h"
50:
1.2 timbl 51:
52: PUBLIC BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */
53:
1.10 timbl 54: #ifdef ORIGINAL
1.2 timbl 55: struct _HTStream {
56: CONST HTStreamClass* isa;
57: /* ... */
58: };
1.10 timbl 59: #endif
60:
61: /* this version used by the NetToText stream */
62: struct _HTStream {
63: CONST HTStreamClass * isa;
64: BOOL had_cr;
65: HTStream * sink;
66: };
1.2 timbl 67:
68:
1.17 luotonen 69: /*
70: ** Accept-Encoding and Accept-Language
71: */
72: typedef struct _HTAcceptNode {
73: HTAtom * atom;
74: float quality;
75: } HTAcceptNode;
76:
77:
78:
79:
1.2 timbl 80: /* Presentation methods
81: ** --------------------
82: */
83:
1.14 timbl 84: PUBLIC HTList * HTConversions = NULL;
1.2 timbl 85:
1.31 frystyk 86: /* -------------------------------------------------------------------------
87: This function replaces the code in HTRequest_delete() in order to keep
88: the data structure hidden (it is NOT a joke!)
89: Henrik 14/03-94
90: ------------------------------------------------------------------------- */
91: PUBLIC void HTFormatDelete ARGS1(HTList *, me)
92: {
93: HTList *cur = me;
94: HTPresentation *pres;
95: if (!me)
96: return;
97: while ((pres = (HTPresentation*) HTList_nextObject(cur))) {
98: FREE(pres->command); /* Leak fixed AL 6 Feb 1994 */
99: free(pres);
100: }
101: HTList_delete(me); /* Leak fixed AL 6 Feb 1994 */
102: }
103:
1.2 timbl 104:
105: /* Define a presentation system command for a content-type
106: ** -------------------------------------------------------
107: */
1.12 timbl 108: PUBLIC void HTSetPresentation ARGS6(
109: HTList *, conversions,
110: CONST char *, representation,
111: CONST char *, command,
112: float, quality,
113: float, secs,
114: float, secs_per_byte
1.2 timbl 115: ){
116:
117: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
118: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
119:
120: pres->rep = HTAtom_for(representation);
121: pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */
122: pres->converter = HTSaveAndExecute; /* Fixed for now ... */
123: pres->quality = quality;
124: pres->secs = secs;
125: pres->secs_per_byte = secs_per_byte;
126: pres->rep = HTAtom_for(representation);
127: pres->command = 0;
128: StrAllocCopy(pres->command, command);
129:
1.12 timbl 130: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 131:
1.15 luotonen 132: #ifdef OLD_CODE
133: if (strcmp(representation, "*")==0) {
1.2 timbl 134: if (default_presentation) free(default_presentation);
135: default_presentation = pres;
1.12 timbl 136: } else
137: #endif
138: HTList_addObject(conversions, pres);
1.2 timbl 139: }
140:
141:
142: /* Define a built-in function for a content-type
143: ** ---------------------------------------------
144: */
1.12 timbl 145: PUBLIC void HTSetConversion ARGS7(
146: HTList *, conversions,
147: CONST char *, representation_in,
148: CONST char *, representation_out,
1.6 timbl 149: HTConverter*, converter,
1.12 timbl 150: float, quality,
151: float, secs,
152: float, secs_per_byte
1.2 timbl 153: ){
1.1 timbl 154:
1.2 timbl 155: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
156: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
157:
158: pres->rep = HTAtom_for(representation_in);
159: pres->rep_out = HTAtom_for(representation_out);
160: pres->converter = converter;
161: pres->command = NULL; /* Fixed */
162: pres->quality = quality;
163: pres->secs = secs;
164: pres->secs_per_byte = secs_per_byte;
165: pres->command = 0;
166:
1.12 timbl 167: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 168:
1.12 timbl 169: #ifdef OLD_CODE
1.2 timbl 170: if (strcmp(representation_in, "*")==0) {
171: if (default_presentation) free(default_presentation);
172: default_presentation = pres;
1.12 timbl 173: } else
174: #endif
175: HTList_addObject(conversions, pres);
1.2 timbl 176: }
1.1 timbl 177:
178:
179:
1.17 luotonen 180: PUBLIC void HTAcceptEncoding ARGS3(HTList *, list,
181: char *, enc,
182: float, quality)
183: {
184: HTAcceptNode * node;
185: char * cur;
186:
187: if (!list || !enc || !*enc) return;
188:
189: for(cur=enc; *cur; cur++) *cur=TOLOWER(*cur);
190:
191: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
192: if (!node) outofmem(__FILE__, "HTAcceptEncoding");
193: HTList_addObject(list, (void*)node);
194:
195: node->atom = HTAtom_for(enc);
196: node->quality = quality;
197: }
198:
199:
200: PUBLIC void HTAcceptLanguage ARGS3(HTList *, list,
201: char *, lang,
202: float, quality)
203: {
204: HTAcceptNode * node;
205:
206: if (!list || !lang || !*lang) return;
207:
208: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
209: if (!node) outofmem(__FILE__, "HTAcceptLanguage");
210:
211: HTList_addObject(list, (void*)node);
212: node->atom = HTAtom_for(lang);
213: node->quality = quality;
214: }
215:
216:
217: PRIVATE BOOL wild_match ARGS2(HTAtom *, template,
218: HTAtom *, actual)
219: {
220: char *t, *a, *st, *sa;
221: BOOL match = NO;
222:
1.22 luotonen 223: if (template && actual && (t = HTAtom_name(template))) {
224: if (!strcmp(t, "*"))
225: return YES;
1.17 luotonen 226:
1.22 luotonen 227: if (strchr(t, '*') &&
228: (a = HTAtom_name(actual)) &&
229: (st = strchr(t, '/')) && (sa = strchr(a,'/'))) {
1.17 luotonen 230:
1.22 luotonen 231: *sa = 0;
232: *st = 0;
233:
234: if ((*(st-1)=='*' &&
235: (*(st+1)=='*' || !strcasecomp(st+1, sa+1))) ||
236: (*(st+1)=='*' && !strcasecomp(t,a)))
237: match = YES;
238:
239: *sa = '/';
240: *st = '/';
241: }
242: }
1.23 luotonen 243: return match;
1.17 luotonen 244: }
245:
1.36 luotonen 246: /*
247: * Added by takada@seraph.ntt.jp (94/04/08)
248: */
249: PRIVATE BOOL lang_match ARGS2(HTAtom *, template,
250: HTAtom *, actual)
251: {
252: char *t, *a, *st, *sa;
253: BOOL match = NO;
254:
255: if (template && actual &&
256: (t = HTAtom_name(template)) && (a = HTAtom_name(actual))) {
257: st = strchr(t, '_');
258: sa = strchr(a, '_');
259: if ((st != NULL) && (sa != NULL)) {
260: if (!strcasecomp(t, a))
261: match = YES;
262: else
263: match = NO;
264: }
265: else {
266: if (st != NULL) *st = 0;
267: if (sa != NULL) *sa = 0;
268: if (!strcasecomp(t, a))
269: match = YES;
270: else
271: match = NO;
272: if (st != NULL) *st = '_';
273: if (sa != NULL) *sa = '_';
274: }
275: }
276: return match;
277: }
278: /* end of addition */
279:
280:
1.17 luotonen 281:
282: PRIVATE float type_value ARGS2(HTAtom *, content_type,
283: HTList *, accepted)
284: {
285: HTList * cur = accepted;
286: HTPresentation * pres;
287: HTPresentation * wild = NULL;
288:
289: if (!content_type || !accepted) return -1;
290:
291: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
292: if (pres->rep == content_type)
293: return pres->quality;
294: else if (wild_match(pres->rep, content_type))
295: wild = pres;
296: }
297: if (wild) return wild->quality;
298: else return -1;
299: }
300:
301:
302: PRIVATE float lang_value ARGS2(HTAtom *, language,
303: HTList *, accepted)
304: {
305: HTList * cur = accepted;
306: HTAcceptNode * node;
307: HTAcceptNode * wild = NULL;
308:
309: if (!language || !accepted || HTList_isEmpty(accepted)) {
310: return 0.1;
311: }
312:
313: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
314: if (node->atom == language) {
315: return node->quality;
316: }
1.36 luotonen 317: /*
318: * patch by takada@seraph.ntt.jp (94/04/08)
319: * the original line was
320: * else if (wild_match(node->atom, language)) {
321: * and the new line is
322: */
323: else if (lang_match(node->atom, language)) {
1.17 luotonen 324: wild = node;
325: }
326: }
327:
328: if (wild) {
329: return wild->quality;
330: }
331: else {
332: return 0.1;
333: }
334: }
335:
336:
337: PRIVATE float encoding_value ARGS2(HTAtom *, encoding,
338: HTList *, accepted)
339: {
340: HTList * cur = accepted;
341: HTAcceptNode * node;
342: HTAcceptNode * wild = NULL;
343: char * e;
344:
345: if (!encoding || !accepted || HTList_isEmpty(accepted))
346: return 1;
347:
348: e = HTAtom_name(encoding);
349: if (!strcmp(e, "7bit") || !strcmp(e, "8bit") || !strcmp(e, "binary"))
350: return 1;
351:
352: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
353: if (node->atom == encoding)
354: return node->quality;
355: else if (wild_match(node->atom, encoding))
356: wild = node;
357: }
358: if (wild) return wild->quality;
359: else return 1;
360: }
361:
362:
363: PUBLIC BOOL HTRank ARGS4(HTList *, possibilities,
364: HTList *, accepted_content_types,
365: HTList *, accepted_languages,
366: HTList *, accepted_encodings)
367: {
368: int accepted_cnt = 0;
369: HTList * accepted;
370: HTList * sorted;
371: HTList * cur;
372: HTContentDescription * d;
373:
374: if (!possibilities) return NO;
375:
376: accepted = HTList_new();
377: cur = possibilities;
378: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
379: float tv = type_value(d->content_type, accepted_content_types);
380: float lv = lang_value(d->content_language, accepted_languages);
381: float ev = encoding_value(d->content_encoding, accepted_encodings);
382:
383: if (tv > 0) {
384: d->quality *= tv * lv * ev;
385: HTList_addObject(accepted, d);
386: accepted_cnt++;
387: }
1.18 luotonen 388: else {
389: if (d->filename) free(d->filename);
390: free(d);
391: }
1.17 luotonen 392: }
393:
1.18 luotonen 394: CTRACE(stderr, "Ranking.....\n");
1.17 luotonen 395: CTRACE(stderr,
1.18 luotonen 396: "\nRANK QUALITY CONTENT-TYPE LANGUAGE ENCODING FILE\n");
1.17 luotonen 397:
398: sorted = HTList_new();
399: while (accepted_cnt-- > 0) {
400: HTContentDescription * worst = NULL;
401: cur = accepted;
402: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
403: if (!worst || d->quality < worst->quality)
404: worst = d;
405: }
406: if (worst) {
407: CTRACE(stderr, "%d. %.4f %-20.20s %-8.8s %-10.10s %s\n",
408: accepted_cnt+1,
409: worst->quality,
410: (worst->content_type
411: ? HTAtom_name(worst->content_type) : "-"),
412: (worst->content_language
413: ? HTAtom_name(worst->content_language) :"-"),
414: (worst->content_encoding
415: ? HTAtom_name(worst->content_encoding) :"-"),
416: (worst->filename
417: ? worst->filename :"-"));
418: HTList_removeObject(accepted, (void*)worst);
419: HTList_addObject(sorted, (void*)worst);
420: }
421: }
1.18 luotonen 422: CTRACE(stderr, "\n");
1.17 luotonen 423: HTList_delete(accepted);
424: HTList_delete(possibilities->next);
425: possibilities->next = sorted->next;
426: sorted->next = NULL;
427: HTList_delete(sorted);
428:
429: if (!HTList_isEmpty(possibilities)) return YES;
430: else return NO;
431: }
432:
433:
434:
435:
436:
1.13 timbl 437: /* Socket Input Buffering
438: ** ----------------------
1.1 timbl 439: **
1.13 timbl 440: ** This code is used because one cannot in general open a
441: ** file descriptor for a socket.
442: **
1.1 timbl 443: ** The input file is read using the macro which can read from
1.13 timbl 444: ** a socket or a file, but this should not be used for files
445: ** as fopen() etc is more portable of course.
446: **
1.1 timbl 447: ** The input buffer size, if large will give greater efficiency and
448: ** release the server faster, and if small will save space on PCs etc.
449: */
450:
451:
452: /* Set up the buffering
453: **
454: ** These routines are public because they are in fact needed by
455: ** many parsers, and on PCs and Macs we should not duplicate
456: ** the static buffer area.
457: */
1.13 timbl 458: PUBLIC HTInputSocket * HTInputSocket_new ARGS1 (int,file_number)
1.1 timbl 459: {
1.28 frystyk 460: HTInputSocket *isoc = (HTInputSocket *)calloc(1, sizeof(*isoc));
1.13 timbl 461: if (!isoc) outofmem(__FILE__, "HTInputSocket_new");
462: isoc->input_file_number = file_number;
463: isoc->input_pointer = isoc->input_limit = isoc->input_buffer;
464: return isoc;
1.1 timbl 465: }
466:
1.35 frystyk 467: /* This should return HT_INTERRUPTED if interrupted BUT the connection
468: MUST not be closed */
469: PUBLIC int HTInputSocket_getCharacter ARGS1(HTInputSocket*, isoc)
1.1 timbl 470: {
1.35 frystyk 471: int ch;
1.1 timbl 472: do {
1.13 timbl 473: if (isoc-> input_pointer >= isoc->input_limit) {
1.1 timbl 474: int status = NETREAD(
1.13 timbl 475: isoc->input_file_number,
476: isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 477: if (status <= 0) {
1.39 frystyk 478: if (status == 0)
479: return EOF;
480: if (status == HT_INTERRUPTED) {
481: if (TRACE)
482: fprintf(stderr, "Get Char.... Interrupted in HTInputSocket_getCharacter\n");
483: return HT_INTERRUPTED;
484: }
485: HTInetStatus("read");
486: return EOF; /* -1 is returned by UCX at end of HTTP link */
1.1 timbl 487: }
1.35 frystyk 488: isoc->input_pointer = isoc->input_buffer;
1.13 timbl 489: isoc->input_limit = isoc->input_buffer + status;
1.1 timbl 490: }
1.39 frystyk 491: ch = (unsigned char) *isoc->input_pointer++;
492: } while (ch == 13); /* Ignore ASCII carriage return */
1.1 timbl 493:
494: return FROMASCII(ch);
495: }
496:
1.17 luotonen 497: PUBLIC void HTInputSocket_free ARGS1(HTInputSocket *, me)
1.13 timbl 498: {
499: if (me) free(me);
500: }
501:
502:
1.16 luotonen 503: PUBLIC char * HTInputSocket_getBlock ARGS2(HTInputSocket*, isoc,
504: int *, len)
505: {
506: if (isoc->input_pointer >= isoc->input_limit) {
507: int status = NETREAD(isoc->input_file_number,
508: isoc->input_buffer,
509: ((*len < INPUT_BUFFER_SIZE) ?
510: *len : INPUT_BUFFER_SIZE));
511: if (status <= 0) {
512: isoc->input_limit = isoc->input_buffer;
513: if (status < 0)
1.39 frystyk 514: HTInetStatus("read");
1.16 luotonen 515: *len = 0;
516: return NULL;
517: }
518: else {
519: *len = status;
520: return isoc->input_buffer;
521: }
522: }
523: else {
524: char * ret = isoc->input_pointer;
525: *len = isoc->input_limit - isoc->input_pointer;
526: isoc->input_pointer = isoc->input_limit;
527: return ret;
528: }
529: }
530:
531:
1.15 luotonen 532: PRIVATE int fill_in_buffer ARGS1(HTInputSocket *, isoc)
533: {
534: if (isoc) {
535: int status;
536:
537: isoc->input_pointer = isoc->input_buffer;
538: status = NETREAD(isoc->input_file_number,
539: isoc->input_buffer,
540: INPUT_BUFFER_SIZE);
541: if (status <= 0) {
542: isoc->input_limit = isoc->input_buffer;
543: if (status < 0)
1.39 frystyk 544: HTInetStatus("read");
1.15 luotonen 545: }
546: else
547: isoc->input_limit = isoc->input_buffer + status;
548: return status;
549: }
550: return -1;
551: }
552:
553:
554: PRIVATE void ascii_cat ARGS3(char **, linep,
555: char *, start,
556: char *, end)
557: {
558: if (linep && start && end && start <= end) {
559: char *ptr;
560:
561: if (*linep) {
562: int len = strlen(*linep);
563: *linep = (char*)realloc(*linep, len + end-start + 1);
564: ptr = *linep + len;
565: }
566: else {
567: ptr = *linep = (char*)malloc(end-start + 1);
568: }
569:
570: while (start < end) {
571: *ptr = FROMASCII(*start);
572: ptr++;
573: start++;
574: }
575: *ptr = 0;
576: }
577: }
578:
579:
580: PRIVATE char * get_some_line ARGS2(HTInputSocket *, isoc,
581: BOOL, unfold)
582: {
583: if (!isoc)
584: return NULL;
585: else {
586: BOOL check_unfold = NO;
587: int prev_cr = 0;
588: char *start = isoc->input_pointer;
589: char *cur = isoc->input_pointer;
590: char * line = NULL;
591:
592: for(;;) {
593: /*
594: ** Get more if needed to complete line
595: */
596: if (cur >= isoc->input_limit) { /* Need more data */
597: ascii_cat(&line, start, cur);
598: if (fill_in_buffer(isoc) <= 0)
599: return line;
600: start = cur = isoc->input_pointer;
601: } /* if need more data */
602:
603: /*
604: ** Find a line feed if there is one
605: */
606: for(; cur < isoc->input_limit; cur++) {
607: char c = FROMASCII(*cur);
608: if (!c) {
1.18 luotonen 609: if (line) free(line); /* Leak fixed AL 6 Feb 94 */
1.15 luotonen 610: return NULL; /* Panic! read a 0! */
611: }
612: if (check_unfold && c != ' ' && c != '\t') {
613: return line; /* Note: didn't update isoc->input_pointer */
614: }
615: else {
616: check_unfold = NO;
617: }
618:
619: if (c=='\r') {
620: prev_cr = 1;
621: }
622: else {
623: if (c=='\n') { /* Found a line feed */
624: ascii_cat(&line, start, cur-prev_cr);
625: start = isoc->input_pointer = cur+1;
626:
627: if (line && strlen(line) > 0 && unfold) {
628: check_unfold = YES;
629: }
630: else {
631: return line;
632: }
633: } /* if NL */
634: /* else just a regular character */
635: prev_cr = 0;
636: } /* if not CR */
637: } /* while characters in buffer remain */
638: } /* until line read or end-of-file */
639: } /* valid parameters to function */
640: }
641:
642:
643: PUBLIC char * HTInputSocket_getLine ARGS1(HTInputSocket *, isoc)
644: {
645: return get_some_line(isoc, NO);
646: }
647:
648: PUBLIC char * HTInputSocket_getUnfoldedLine ARGS1(HTInputSocket *, isoc)
649: {
650: return get_some_line(isoc, YES);
651: }
652:
653:
654: /*
655: ** Read HTTP status line (if there is one).
656: **
657: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
658: ** First look at the stub in ASCII and check if it starts "HTTP/".
659: **
660: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
661: ** will be taken as a HTTP 1.0 server. Failure.
662: */
663: #define STUB_LENGTH 20
664: PUBLIC char * HTInputSocket_getStatusLine ARGS1(HTInputSocket *, isoc)
665: {
666: if (!isoc) {
667: return NULL;
668: }
669: else {
670: char buf[STUB_LENGTH + 1];
671: int i;
672: char server_version[STUB_LENGTH+1];
673: int server_status;
674:
675: /*
676: ** Read initial buffer
677: */
678: if (isoc->input_pointer >= isoc->input_limit &&
679: fill_in_buffer(isoc) <= 0) {
680: return NULL;
681: }
682:
683: for (i=0; i < STUB_LENGTH; i++)
684: buf[i] = FROMASCII(isoc->input_buffer[i]);
685: buf[STUB_LENGTH] = 0;
686:
687: if (0 != strncmp(buf, "HTTP/", 5) ||
688: sscanf(buf, "%20s%d", server_version, &server_status) < 2)
689: return NULL;
690: else
691: return get_some_line(isoc, NO);
692: }
693: }
694:
695:
696: /*
697: ** Do heuristic test to see if this is binary.
698: **
699: ** We check for characters above 128 in the first few bytes, and
700: ** if we find them we forget the html default.
701: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
702: **
703: ** Bugs: An HTTP 0.9 server returning a binary document with
704: ** characters < 128 will be read as ASCII.
705: */
706: PUBLIC BOOL HTInputSocket_seemsBinary ARGS1(HTInputSocket *, isoc)
707: {
708: if (isoc &&
709: (isoc->input_pointer < isoc->input_limit ||
710: fill_in_buffer(isoc) > 0)) {
711: char *p = isoc->input_buffer;
712: int i = STUB_LENGTH;
713:
714: for( ; i && p < isoc->input_limit; p++, i++)
715: if (((int)*p)&128)
716: return YES;
717: }
718: return NO;
719: }
720:
721:
722:
1.1 timbl 723: /* Stream the data to an ouput file as binary
724: */
1.38 luotonen 725: PUBLIC int HTOutputBinary ARGS3(HTInputSocket *,isoc,
1.13 timbl 726: int, input,
727: FILE *, output)
1.1 timbl 728: {
729: do {
730: int status = NETREAD(
1.13 timbl 731: input, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 732: if (status <= 0) {
733: if (status == 0) return 0;
734: if (TRACE) fprintf(stderr,
1.39 frystyk 735: "Out Binary.. Socket read error %d\n", status);
1.1 timbl 736: return 2; /* Error */
737: }
1.13 timbl 738: fwrite(isoc->input_buffer, sizeof(char), status, output);
1.1 timbl 739: } while (YES);
740: }
741:
1.38 luotonen 742:
743: /*
744: * Normal HTTP headers are never bigger than 2K.
745: */
746: #define S_BUFFER_SIZE 2000
747:
748: PUBLIC void HTInputSocket_startBuffering ARGS1(HTInputSocket *, isoc)
749: {
750: if (isoc) {
751: isoc->s_do_buffering = YES;
752: if (!isoc->s_buffer) {
753: isoc->s_buffer = (char*)malloc(S_BUFFER_SIZE + 1);
754: isoc->s_buffer_size = S_BUFFER_SIZE;
755: }
756: isoc->s_buffer_cur = isoc->s_buffer;
757: }
758: }
759:
760: PUBLIC void HTInputSocket_stopBuffering ARGS1(HTInputSocket *, isoc)
761: {
762: if (isoc) {
763: isoc->s_do_buffering = NO;
764: if (isoc->s_buffer_cur)
765: *isoc->s_buffer_cur = 0;
766: }
767: }
768:
769: PUBLIC int HTInputSocket_getBuffer ARGS2(HTInputSocket *, isoc,
770: char **, buffer_ptr)
771: {
772: if (!isoc || !isoc->s_buffer || !isoc->s_buffer_cur)
773: return 0;
774: else {
775: *isoc->s_buffer_cur = 0;
776: if (buffer_ptr)
777: *buffer_ptr = isoc->s_buffer;
778: return (int) (isoc->s_buffer_cur - isoc->s_buffer);
779: }
780: }
1.1 timbl 781:
1.33 luotonen 782: PRIVATE BOOL better_match ARGS2(HTFormat, f,
783: HTFormat, g)
784: {
785: CONST char *p, *q;
786:
787: if (f && g && (p = HTAtom_name(f)) && (q = HTAtom_name(g))) {
788: int i,j;
789: for(i=0 ; *p; p++) if (*p == '*') i++;
790: for(j=0 ; *q; q++) if (*q == '*') j++;
791: if (i < j) return YES;
792: }
793: return NO;
794: }
795:
1.17 luotonen 796:
1.2 timbl 797: /* Create a filter stack
798: ** ---------------------
799: **
1.7 secret 800: ** If a wildcard match is made, a temporary HTPresentation
1.2 timbl 801: ** structure is made to hold the destination format while the
802: ** new stack is generated. This is just to pass the out format to
803: ** MIME so far. Storing the format of a stream in the stream might
804: ** be a lot neater.
1.10 timbl 805: **
1.29 frystyk 806: ** The star/star format is special, in that if you can take
1.40 frystyk 807: ** that you can take anything.
808: **
809: ** On succes, request->error_block is set to YES so no more error
810: ** messages to the stream as the stream might be of any format.
1.2 timbl 811: */
1.34 luotonen 812: PUBLIC HTStream * HTStreamStack ARGS3(HTFormat, rep_in,
813: HTRequest *, request,
814: BOOL, guess)
1.2 timbl 815: {
1.12 timbl 816: HTFormat rep_out = request->output_format; /* Could be a param */
1.14 timbl 817: HTList * conversion[2];
818: int which_list;
1.25 frystyk 819: float best_quality = -1e30; /* Pretty bad! */
1.29 frystyk 820: HTPresentation *pres, *match, *best_match=0;
1.14 timbl 821:
1.2 timbl 822: if (TRACE) fprintf(stderr,
1.39 frystyk 823: "StreamStack. Constructing stream stack for %s to %s\n",
1.10 timbl 824: HTAtom_name(rep_in),
1.2 timbl 825: HTAtom_name(rep_out));
1.34 luotonen 826:
827: if (guess && rep_in == WWW_UNKNOWN) {
828: CTRACE(stderr, "Returning... guessing stream\n");
1.40 frystyk 829: request->error_block = YES; /* No more error output to stream */
1.34 luotonen 830: return HTGuess_new(request);
831: }
832:
1.21 luotonen 833: if (rep_out == WWW_SOURCE || rep_out == rep_in)
834: return request->output_stream;
1.2 timbl 835:
1.14 timbl 836: conversion[0] = request->conversions;
837: conversion[1] = HTConversions;
1.17 luotonen 838:
1.15 luotonen 839: for(which_list = 0; which_list<2; which_list++) {
840: HTList * cur = conversion[which_list];
841:
842: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
1.25 frystyk 843: if ((pres->rep == rep_in || wild_match(pres->rep, rep_in)) &&
1.33 luotonen 844: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
845: if (!best_match ||
846: better_match(pres->rep, best_match->rep) ||
847: (!better_match(best_match->rep, pres->rep) &&
848: pres->quality > best_quality)) {
1.25 frystyk 849: best_match = pres;
850: best_quality = pres->quality;
1.10 timbl 851: }
852: }
1.33 luotonen 853:
1.29 frystyk 854: #ifdef OLD_CODE
855: /* This case is now included in the best_match loop */
1.25 frystyk 856: /* Special case when input format is 'www/source' */
1.10 timbl 857: if (pres->rep == source) {
1.29 frystyk 858: if (pres->rep_out == rep_out ||
859: wild_match(pres->rep_out, rep_out))
1.10 timbl 860: source_match = pres;
1.2 timbl 861: }
1.29 frystyk 862: #endif
1.2 timbl 863: }
864: }
1.33 luotonen 865:
1.29 frystyk 866: match = best_match ? best_match : NULL;
867: if (match) {
1.40 frystyk 868: request->error_block = YES; /* No more error output to stream */
1.29 frystyk 869: if (match->rep == WWW_SOURCE) {
1.39 frystyk 870: if (TRACE) fprintf(stderr, "StreamStack. Don't know how to handle this, so put out %s to %s\n",
1.29 frystyk 871: HTAtom_name(match->rep),
872: HTAtom_name(rep_out));
873: }
874: return (*match->converter)(
1.25 frystyk 875: request, match->command, rep_in, rep_out,
876: request->output_stream);
1.29 frystyk 877: }
1.2 timbl 878: return NULL;
879: }
880:
881:
882: /* Find the cost of a filter stack
883: ** -------------------------------
884: **
885: ** Must return the cost of the same stack which StreamStack would set up.
886: **
887: ** On entry,
888: ** length The size of the data to be converted
889: */
1.12 timbl 890: PUBLIC float HTStackValue ARGS5(
1.14 timbl 891: HTList *, theseConversions,
1.10 timbl 892: HTFormat, rep_in,
1.2 timbl 893: HTFormat, rep_out,
894: float, initial_value,
895: long int, length)
896: {
1.14 timbl 897: int which_list;
898: HTList* conversion[2];
899:
1.2 timbl 900: if (TRACE) fprintf(stderr,
1.39 frystyk 901: "StackValue.. Evaluating stream stack for %s worth %.3f to %s\n",
1.10 timbl 902: HTAtom_name(rep_in), initial_value,
1.2 timbl 903: HTAtom_name(rep_out));
904:
905: if (rep_out == WWW_SOURCE ||
1.10 timbl 906: rep_out == rep_in) return 0.0;
1.2 timbl 907:
1.12 timbl 908: /* if (!HTPresentations) HTFormatInit(); set up the list */
1.2 timbl 909:
1.14 timbl 910: conversion[0] = theseConversions;
911: conversion[1] = HTConversions;
912:
913: for(which_list = 0; which_list<2; which_list++)
914: if (conversion[which_list]) {
1.15 luotonen 915: HTList * cur = conversion[which_list];
1.2 timbl 916: HTPresentation * pres;
1.15 luotonen 917: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
918: if (pres->rep == rep_in &&
1.17 luotonen 919: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
1.2 timbl 920: float value = initial_value * pres->quality;
921: if (HTMaxSecs != 0.0)
1.15 luotonen 922: value = value - (length*pres->secs_per_byte + pres->secs)
1.2 timbl 923: /HTMaxSecs;
924: return value;
925: }
926: }
927: }
928:
929: return -1e30; /* Really bad */
1.17 luotonen 930: }
931:
932:
1.2 timbl 933:
1.1 timbl 934:
1.2 timbl 935: /* Push data from a socket down a stream
936: ** -------------------------------------
1.1 timbl 937: **
1.2 timbl 938: ** This routine is responsible for creating and PRESENTING any
1.1 timbl 939: ** graphic (or other) objects described by the file.
1.2 timbl 940: **
941: ** The file number given is assumed to be a TELNET stream ie containing
942: ** CRLF at the end of lines which need to be stripped to LF for unix
943: ** when the format is textual.
944: **
1.26 luotonen 945: ** RETURNS the number of bytes transferred.
946: **
1.1 timbl 947: */
1.26 luotonen 948: PUBLIC int HTCopy ARGS2(
1.2 timbl 949: int, file_number,
950: HTStream*, sink)
1.1 timbl 951: {
1.2 timbl 952: HTStreamClass targetClass;
1.13 timbl 953: HTInputSocket * isoc;
1.26 luotonen 954: int cnt = 0;
955:
1.5 timbl 956: /* Push the data down the stream
1.2 timbl 957: **
958: */
959: targetClass = *(sink->isa); /* Copy pointers to procedures */
1.13 timbl 960: isoc = HTInputSocket_new(file_number);
1.2 timbl 961:
962: /* Push binary from socket down sink
1.10 timbl 963: **
964: ** This operation could be put into a main event loop
1.2 timbl 965: */
966: for(;;) {
967: int status = NETREAD(
1.13 timbl 968: file_number, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.2 timbl 969: if (status <= 0) {
970: if (status == 0) break;
971: if (TRACE) fprintf(stderr,
1.39 frystyk 972: "Socket Copy. Read error, read returns %d with errno=%d\n",
1.24 luotonen 973: status, errno);
1.2 timbl 974: break;
975: }
1.26 luotonen 976:
1.8 timbl 977: #ifdef NOT_ASCII
978: {
979: char * p;
1.13 timbl 980: for(p = isoc->input_buffer; p < isoc->input_buffer+status; p++) {
1.8 timbl 981: *p = FROMASCII(*p);
982: }
983: }
984: #endif
985:
1.13 timbl 986: (*targetClass.put_block)(sink, isoc->input_buffer, status);
1.26 luotonen 987: cnt += status;
1.2 timbl 988: } /* next bufferload */
1.26 luotonen 989:
1.13 timbl 990: HTInputSocket_free(isoc);
1.26 luotonen 991:
992: return cnt;
1.2 timbl 993: }
994:
1.1 timbl 995:
1.7 secret 996:
997: /* Push data from a file pointer down a stream
998: ** -------------------------------------
999: **
1000: ** This routine is responsible for creating and PRESENTING any
1001: ** graphic (or other) objects described by the file.
1002: **
1003: **
1004: */
1005: PUBLIC void HTFileCopy ARGS2(
1006: FILE *, fp,
1007: HTStream*, sink)
1008: {
1009: HTStreamClass targetClass;
1.13 timbl 1010: char input_buffer[INPUT_BUFFER_SIZE];
1.7 secret 1011:
1012: /* Push the data down the stream
1013: **
1014: */
1015: targetClass = *(sink->isa); /* Copy pointers to procedures */
1016:
1017: /* Push binary from socket down sink
1018: */
1019: for(;;) {
1020: int status = fread(
1021: input_buffer, 1, INPUT_BUFFER_SIZE, fp);
1022: if (status == 0) { /* EOF or error */
1023: if (ferror(fp) == 0) break;
1024: if (TRACE) fprintf(stderr,
1.39 frystyk 1025: "File Copy... Read error, read returns %d\n", ferror(fp));
1.7 secret 1026: break;
1027: }
1028: (*targetClass.put_block)(sink, input_buffer, status);
1.13 timbl 1029: } /* next bufferload */
1.7 secret 1030: }
1031:
1032:
1033:
1034:
1.2 timbl 1035: /* Push data from a socket down a stream STRIPPING CR
1036: ** --------------------------------------------------
1037: **
1038: ** This routine is responsible for creating and PRESENTING any
1.8 timbl 1039: ** graphic (or other) objects described by the socket.
1.2 timbl 1040: **
1041: ** The file number given is assumed to be a TELNET stream ie containing
1042: ** CRLF at the end of lines which need to be stripped to LF for unix
1043: ** when the format is textual.
1.37 frystyk 1044: **
1045: ** Character handling is now of type int, Henrik, May 09-94
1.1 timbl 1046: */
1.2 timbl 1047: PUBLIC void HTCopyNoCR ARGS2(
1048: int, file_number,
1049: HTStream*, sink)
1050: {
1.13 timbl 1051: HTStreamClass targetClass;
1052: HTInputSocket * isoc;
1.37 frystyk 1053: int ch;
1.1 timbl 1054:
1.2 timbl 1055: /* Push the data, ignoring CRLF, down the stream
1056: **
1057: */
1058: targetClass = *(sink->isa); /* Copy pointers to procedures */
1059:
1060: /* Push text from telnet socket down sink
1061: **
1062: ** @@@@@ To push strings could be faster? (especially is we
1063: ** cheat and don't ignore CR! :-}
1064: */
1.13 timbl 1065: isoc = HTInputSocket_new(file_number);
1.37 frystyk 1066: while ((ch = HTInputSocket_getCharacter(isoc)) >= 0)
1067: (*targetClass.put_character)(sink, ch);
1.13 timbl 1068: HTInputSocket_free(isoc);
1.2 timbl 1069: }
1.1 timbl 1070:
1.2 timbl 1071:
1.7 secret 1072:
1.2 timbl 1073: /* Parse a socket given format and file number
1074: **
1075: ** This routine is responsible for creating and PRESENTING any
1076: ** graphic (or other) objects described by the file.
1077: **
1078: ** The file number given is assumed to be a TELNET stream ie containing
1079: ** CRLF at the end of lines which need to be stripped to LF for unix
1080: ** when the format is textual.
1081: **
1082: */
1.14 timbl 1083:
1.12 timbl 1084: PUBLIC int HTParseSocket ARGS3(
1.10 timbl 1085: HTFormat, rep_in,
1.2 timbl 1086: int, file_number,
1.12 timbl 1087: HTRequest *, request)
1.2 timbl 1088: {
1089: HTStream * stream;
1090: HTStreamClass targetClass;
1.1 timbl 1091:
1.40 frystyk 1092: if (request->error_stack) {
1093: if (TRACE) fprintf(stderr, "ParseSocket. Called whith non-empty error stack, so I return right away!\n");
1094: return -1;
1095: }
1096:
1.34 luotonen 1097: stream = HTStreamStack(rep_in, request, YES);
1.29 frystyk 1098:
1.2 timbl 1099: if (!stream) {
1.30 frystyk 1100: char buffer[1024]; /* @@@@@@@@ */
1.2 timbl 1101: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12 timbl 1102: HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.39 frystyk 1103: if (TRACE) fprintf(stderr, "ParseSocket. %s\n", buffer);
1.16 luotonen 1104: return HTLoadError(request, 501, buffer);
1.2 timbl 1105: }
1.1 timbl 1106:
1.3 timbl 1107: /* Push the data, ignoring CRLF if necessary, down the stream
1108: **
1.2 timbl 1109: **
1.3 timbl 1110: ** @@ Bug: This decision ought to be made based on "encoding"
1.9 timbl 1111: ** rather than on format. @@@ When we handle encoding.
1.3 timbl 1112: ** The current method smells anyway.
1.2 timbl 1113: */
1114: targetClass = *(stream->isa); /* Copy pointers to procedures */
1.32 luotonen 1115: if (rep_in == WWW_BINARY || rep_in == WWW_UNKNOWN || HTOutputSource
1.26 luotonen 1116: || (request->content_encoding &&
1117: request->content_encoding != HTAtom_for("8bit") &&
1118: request->content_encoding != HTAtom_for("7bit"))
1.10 timbl 1119: || strstr(HTAtom_name(rep_in), "image/")
1120: || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.29 frystyk 1121: HTCopy(file_number, stream);
1.2 timbl 1122: } else { /* ascii text with CRLFs :-( */
1123: HTCopyNoCR(file_number, stream);
1124: }
1.7 secret 1125: (*targetClass.free)(stream);
1126:
1127: return HT_LOADED;
1128: }
1129:
1130:
1131:
1132: /* Parse a file given format and file pointer
1133: **
1134: ** This routine is responsible for creating and PRESENTING any
1135: ** graphic (or other) objects described by the file.
1136: **
1137: ** The file number given is assumed to be a TELNET stream ie containing
1.10 timbl 1138: ** CRLF at the end of lines which need to be stripped to \n for unix
1.7 secret 1139: ** when the format is textual.
1140: **
1141: */
1.12 timbl 1142: PUBLIC int HTParseFile ARGS3(
1.10 timbl 1143: HTFormat, rep_in,
1.7 secret 1144: FILE *, fp,
1.12 timbl 1145: HTRequest *, request)
1.7 secret 1146: {
1147: HTStream * stream;
1148: HTStreamClass targetClass;
1.40 frystyk 1149:
1150: if (request->error_stack) {
1151: if (TRACE) fprintf(stderr, "ParseFile... Called whith non-empty error stack, so I return right away!\n");
1152: return -1;
1153: }
1.7 secret 1154:
1.34 luotonen 1155: stream = HTStreamStack(rep_in, request, YES);
1.7 secret 1156:
1157: if (!stream) {
1.30 frystyk 1158: char buffer[1024]; /* @@@@@@@@ */
1.7 secret 1159: sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12 timbl 1160: HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.39 frystyk 1161: if (TRACE) fprintf(stderr, "ParseFile... %s\n", buffer);
1.29 frystyk 1162: return HTLoadError(request, 501, buffer);
1.7 secret 1163: }
1164:
1.9 timbl 1165: /* Push the data down the stream
1.7 secret 1166: **
1167: **
1168: ** @@ Bug: This decision ought to be made based on "encoding"
1.10 timbl 1169: ** rather than on content-type. @@@ When we handle encoding.
1.7 secret 1170: ** The current method smells anyway.
1171: */
1172: targetClass = *(stream->isa); /* Copy pointers to procedures */
1173: HTFileCopy(fp, stream);
1.2 timbl 1174: (*targetClass.free)(stream);
1.1 timbl 1175:
1.2 timbl 1176: return HT_LOADED;
1.1 timbl 1177: }
1.2 timbl 1178:
1.10 timbl 1179:
1180: /* Converter stream: Network Telnet to internal character text
1181: ** -----------------------------------------------------------
1182: **
1183: ** The input is assumed to be in ASCII, with lines delimited
1184: ** by (13,10) pairs, These pairs are converted into (CR,LF)
1185: ** pairs in the local representation. The (CR,LF) sequence
1186: ** when found is changed to a '\n' character, the internal
1187: ** C representation of a new line.
1188: */
1189:
1190:
1.11 timbl 1191: PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char)
1.10 timbl 1192: {
1193: char c = FROMASCII(net_char);
1194: if (me->had_cr) {
1195: if (c==LF) {
1196: me->sink->isa->put_character(me->sink, '\n'); /* Newline */
1197: me->had_cr = NO;
1198: return;
1199: } else {
1200: me->sink->isa->put_character(me->sink, CR); /* leftover */
1201: }
1202: }
1203: me->had_cr = (c==CR);
1204: if (!me->had_cr)
1205: me->sink->isa->put_character(me->sink, c); /* normal */
1206: }
1207:
1.11 timbl 1208: PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s)
1.10 timbl 1209: {
1210: CONST char * p;
1211: for(p=s; *p; p++) NetToText_put_character(me, *p);
1212: }
1213:
1.11 timbl 1214: PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l)
1.10 timbl 1215: {
1216: CONST char * p;
1217: for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
1218: }
1219:
1220: PRIVATE void NetToText_free ARGS1(HTStream *, me)
1221: {
1222: me->sink->isa->free(me->sink); /* Close rest of pipe */
1223: free(me);
1224: }
1225:
1226: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
1227: {
1228: me->sink->isa->abort(me->sink,e); /* Abort rest of pipe */
1229: free(me);
1230: }
1231:
1232: /* The class structure
1233: */
1234: PRIVATE HTStreamClass NetToTextClass = {
1235: "NetToText",
1236: NetToText_free,
1237: NetToText_abort,
1238: NetToText_put_character,
1239: NetToText_put_string,
1240: NetToText_put_block
1241: };
1242:
1243: /* The creation method
1244: */
1245: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
1246: {
1247: HTStream* me = (HTStream*)malloc(sizeof(*me));
1248: if (me == NULL) outofmem(__FILE__, "NetToText");
1249: me->isa = &NetToTextClass;
1250:
1251: me->had_cr = NO;
1252: me->sink = sink;
1253: return me;
1254: }
1.2 timbl 1255:
1256:
Webmaster