Annotation of libwww/Library/src/HTFormat.c, revision 1.49
1.7 secret 1:
1.1 timbl 2: /* Manage different file formats HTFormat.c
3: ** =============================
4: **
5: ** Bugs:
6: ** Not reentrant.
7: **
8: ** Assumes the incoming stream is ASCII, rather than a local file
9: ** format, and so ALWAYS converts from ASCII on non-ASCII machines.
10: ** Therefore, non-ASCII machines can't read local files.
1.2 timbl 11: **
1.45 duns 12: ** HISTORY:
13: ** 8 Jul 94 FM Insulate free() from _free structure element.
14: **
1.2 timbl 15: */
16:
1.10 timbl 17:
1.2 timbl 18: /* Implements:
1.1 timbl 19: */
1.2 timbl 20: #include "HTFormat.h"
21:
22: PUBLIC float HTMaxSecs = 1e10; /* No effective limit */
23: PUBLIC float HTMaxLength = 1e10; /* No effective limit */
24:
25: #ifdef unix
26: #ifdef NeXT
27: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
28: #else
29: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n"
30: /* Full pathname would be better! */
31: #endif
32: #endif
33:
1.1 timbl 34:
35: #include "HTUtils.h"
36: #include "tcp.h"
37:
38: #include "HTML.h"
1.12 timbl 39: #include "HTMLPDTD.h"
1.2 timbl 40: #include "HTAlert.h"
41: #include "HTList.h"
42: #include "HTInit.h"
43: /* Streams and structured streams which we use:
44: */
45: #include "HTFWriter.h"
46: #include "HTPlain.h"
47: #include "SGML.h"
48: #include "HTML.h"
49: #include "HTMLGen.h"
1.41 frystyk 50: #include "HTTCP.h"
1.34 luotonen 51: #include "HTGuess.h"
1.42 frystyk 52: #include "HTError.h"
1.34 luotonen 53:
1.2 timbl 54:
55: PUBLIC BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */
56:
1.10 timbl 57: #ifdef ORIGINAL
1.2 timbl 58: struct _HTStream {
59: CONST HTStreamClass* isa;
60: /* ... */
61: };
1.10 timbl 62: #endif
63:
64: /* this version used by the NetToText stream */
65: struct _HTStream {
66: CONST HTStreamClass * isa;
67: BOOL had_cr;
68: HTStream * sink;
69: };
1.2 timbl 70:
71:
1.17 luotonen 72: /*
73: ** Accept-Encoding and Accept-Language
74: */
75: typedef struct _HTAcceptNode {
76: HTAtom * atom;
77: float quality;
78: } HTAcceptNode;
79:
80:
81:
82:
1.2 timbl 83: /* Presentation methods
84: ** --------------------
85: */
86:
1.14 timbl 87: PUBLIC HTList * HTConversions = NULL;
1.2 timbl 88:
1.31 frystyk 89: /* -------------------------------------------------------------------------
90: This function replaces the code in HTRequest_delete() in order to keep
91: the data structure hidden (it is NOT a joke!)
92: Henrik 14/03-94
93: ------------------------------------------------------------------------- */
94: PUBLIC void HTFormatDelete ARGS1(HTList *, me)
95: {
96: HTList *cur = me;
97: HTPresentation *pres;
98: if (!me)
99: return;
100: while ((pres = (HTPresentation*) HTList_nextObject(cur))) {
101: FREE(pres->command); /* Leak fixed AL 6 Feb 1994 */
102: free(pres);
103: }
104: HTList_delete(me); /* Leak fixed AL 6 Feb 1994 */
105: }
106:
1.2 timbl 107:
108: /* Define a presentation system command for a content-type
109: ** -------------------------------------------------------
110: */
1.49 ! howcome 111: PUBLIC void HTSetPresentation ARGS7(
1.12 timbl 112: HTList *, conversions,
113: CONST char *, representation,
114: CONST char *, command,
1.49 ! howcome 115: CONST char *, test_command, /* HWL 27/9/94: mailcap functionality */
1.12 timbl 116: float, quality,
117: float, secs,
118: float, secs_per_byte
1.2 timbl 119: ){
1.49 ! howcome 120: char * test_command = NULL;
1.2 timbl 121:
122: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
123: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
124:
125: pres->rep = HTAtom_for(representation);
126: pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */
127: pres->converter = HTSaveAndExecute; /* Fixed for now ... */
128: pres->quality = quality;
129: pres->secs = secs;
130: pres->secs_per_byte = secs_per_byte;
131: pres->rep = HTAtom_for(representation);
1.49 ! howcome 132: pres->command = NULL;
1.2 timbl 133: StrAllocCopy(pres->command, command);
1.49 ! howcome 134: pres->test_command = NULL;
! 135: StrAllocCopy(pres->test_command, test_command);
1.2 timbl 136:
1.12 timbl 137: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 138:
1.15 luotonen 139: #ifdef OLD_CODE
140: if (strcmp(representation, "*")==0) {
1.2 timbl 141: if (default_presentation) free(default_presentation);
142: default_presentation = pres;
1.12 timbl 143: } else
144: #endif
145: HTList_addObject(conversions, pres);
1.2 timbl 146: }
147:
148:
149: /* Define a built-in function for a content-type
150: ** ---------------------------------------------
151: */
1.12 timbl 152: PUBLIC void HTSetConversion ARGS7(
153: HTList *, conversions,
154: CONST char *, representation_in,
155: CONST char *, representation_out,
1.6 timbl 156: HTConverter*, converter,
1.12 timbl 157: float, quality,
158: float, secs,
159: float, secs_per_byte
1.2 timbl 160: ){
1.1 timbl 161:
1.2 timbl 162: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
163: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
164:
165: pres->rep = HTAtom_for(representation_in);
166: pres->rep_out = HTAtom_for(representation_out);
167: pres->converter = converter;
168: pres->command = NULL; /* Fixed */
1.49 ! howcome 169: pres->test_command = NULL;
1.2 timbl 170: pres->quality = quality;
171: pres->secs = secs;
172: pres->secs_per_byte = secs_per_byte;
1.49 ! howcome 173: /* pres->command = 0; */
1.2 timbl 174:
1.12 timbl 175: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 176:
1.12 timbl 177: #ifdef OLD_CODE
1.2 timbl 178: if (strcmp(representation_in, "*")==0) {
179: if (default_presentation) free(default_presentation);
180: default_presentation = pres;
1.12 timbl 181: } else
182: #endif
183: HTList_addObject(conversions, pres);
1.2 timbl 184: }
1.1 timbl 185:
186:
187:
1.17 luotonen 188: PUBLIC void HTAcceptEncoding ARGS3(HTList *, list,
189: char *, enc,
190: float, quality)
191: {
192: HTAcceptNode * node;
193: char * cur;
194:
195: if (!list || !enc || !*enc) return;
196:
197: for(cur=enc; *cur; cur++) *cur=TOLOWER(*cur);
198:
199: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
200: if (!node) outofmem(__FILE__, "HTAcceptEncoding");
201: HTList_addObject(list, (void*)node);
202:
203: node->atom = HTAtom_for(enc);
204: node->quality = quality;
205: }
206:
207:
208: PUBLIC void HTAcceptLanguage ARGS3(HTList *, list,
209: char *, lang,
210: float, quality)
211: {
212: HTAcceptNode * node;
213:
214: if (!list || !lang || !*lang) return;
215:
216: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
217: if (!node) outofmem(__FILE__, "HTAcceptLanguage");
218:
219: HTList_addObject(list, (void*)node);
220: node->atom = HTAtom_for(lang);
221: node->quality = quality;
222: }
223:
224:
1.48 frystyk 225: PRIVATE BOOL wild_match ARGS2(HTAtom *, tmplate,
1.17 luotonen 226: HTAtom *, actual)
227: {
228: char *t, *a, *st, *sa;
229: BOOL match = NO;
230:
1.48 frystyk 231: if (tmplate && actual && (t = HTAtom_name(tmplate))) {
1.22 luotonen 232: if (!strcmp(t, "*"))
233: return YES;
1.17 luotonen 234:
1.22 luotonen 235: if (strchr(t, '*') &&
236: (a = HTAtom_name(actual)) &&
237: (st = strchr(t, '/')) && (sa = strchr(a,'/'))) {
1.17 luotonen 238:
1.22 luotonen 239: *sa = 0;
240: *st = 0;
241:
242: if ((*(st-1)=='*' &&
243: (*(st+1)=='*' || !strcasecomp(st+1, sa+1))) ||
244: (*(st+1)=='*' && !strcasecomp(t,a)))
245: match = YES;
246:
247: *sa = '/';
248: *st = '/';
249: }
250: }
1.23 luotonen 251: return match;
1.17 luotonen 252: }
253:
1.36 luotonen 254: /*
255: * Added by takada@seraph.ntt.jp (94/04/08)
256: */
1.48 frystyk 257: PRIVATE BOOL lang_match ARGS2(HTAtom *, tmplate,
1.36 luotonen 258: HTAtom *, actual)
259: {
260: char *t, *a, *st, *sa;
261: BOOL match = NO;
262:
1.48 frystyk 263: if (tmplate && actual &&
264: (t = HTAtom_name(tmplate)) && (a = HTAtom_name(actual))) {
1.36 luotonen 265: st = strchr(t, '_');
266: sa = strchr(a, '_');
267: if ((st != NULL) && (sa != NULL)) {
268: if (!strcasecomp(t, a))
269: match = YES;
270: else
271: match = NO;
272: }
273: else {
274: if (st != NULL) *st = 0;
275: if (sa != NULL) *sa = 0;
276: if (!strcasecomp(t, a))
277: match = YES;
278: else
279: match = NO;
280: if (st != NULL) *st = '_';
281: if (sa != NULL) *sa = '_';
282: }
283: }
284: return match;
285: }
286: /* end of addition */
287:
288:
1.17 luotonen 289:
290: PRIVATE float type_value ARGS2(HTAtom *, content_type,
291: HTList *, accepted)
292: {
293: HTList * cur = accepted;
294: HTPresentation * pres;
295: HTPresentation * wild = NULL;
296:
297: if (!content_type || !accepted) return -1;
298:
299: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
300: if (pres->rep == content_type)
301: return pres->quality;
302: else if (wild_match(pres->rep, content_type))
303: wild = pres;
304: }
305: if (wild) return wild->quality;
306: else return -1;
307: }
308:
309:
310: PRIVATE float lang_value ARGS2(HTAtom *, language,
311: HTList *, accepted)
312: {
313: HTList * cur = accepted;
314: HTAcceptNode * node;
315: HTAcceptNode * wild = NULL;
316:
317: if (!language || !accepted || HTList_isEmpty(accepted)) {
318: return 0.1;
319: }
320:
321: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
322: if (node->atom == language) {
323: return node->quality;
324: }
1.36 luotonen 325: /*
326: * patch by takada@seraph.ntt.jp (94/04/08)
327: * the original line was
328: * else if (wild_match(node->atom, language)) {
329: * and the new line is
330: */
331: else if (lang_match(node->atom, language)) {
1.17 luotonen 332: wild = node;
333: }
334: }
335:
336: if (wild) {
337: return wild->quality;
338: }
339: else {
340: return 0.1;
341: }
342: }
343:
344:
345: PRIVATE float encoding_value ARGS2(HTAtom *, encoding,
346: HTList *, accepted)
347: {
348: HTList * cur = accepted;
349: HTAcceptNode * node;
350: HTAcceptNode * wild = NULL;
351: char * e;
352:
353: if (!encoding || !accepted || HTList_isEmpty(accepted))
354: return 1;
355:
356: e = HTAtom_name(encoding);
357: if (!strcmp(e, "7bit") || !strcmp(e, "8bit") || !strcmp(e, "binary"))
358: return 1;
359:
360: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
361: if (node->atom == encoding)
362: return node->quality;
363: else if (wild_match(node->atom, encoding))
364: wild = node;
365: }
366: if (wild) return wild->quality;
367: else return 1;
368: }
369:
370:
371: PUBLIC BOOL HTRank ARGS4(HTList *, possibilities,
372: HTList *, accepted_content_types,
373: HTList *, accepted_languages,
374: HTList *, accepted_encodings)
375: {
376: int accepted_cnt = 0;
377: HTList * accepted;
378: HTList * sorted;
379: HTList * cur;
380: HTContentDescription * d;
381:
382: if (!possibilities) return NO;
383:
384: accepted = HTList_new();
385: cur = possibilities;
386: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
387: float tv = type_value(d->content_type, accepted_content_types);
388: float lv = lang_value(d->content_language, accepted_languages);
389: float ev = encoding_value(d->content_encoding, accepted_encodings);
390:
391: if (tv > 0) {
392: d->quality *= tv * lv * ev;
393: HTList_addObject(accepted, d);
394: accepted_cnt++;
395: }
1.18 luotonen 396: else {
397: if (d->filename) free(d->filename);
398: free(d);
399: }
1.17 luotonen 400: }
401:
1.18 luotonen 402: CTRACE(stderr, "Ranking.....\n");
1.17 luotonen 403: CTRACE(stderr,
1.18 luotonen 404: "\nRANK QUALITY CONTENT-TYPE LANGUAGE ENCODING FILE\n");
1.17 luotonen 405:
406: sorted = HTList_new();
407: while (accepted_cnt-- > 0) {
408: HTContentDescription * worst = NULL;
409: cur = accepted;
410: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
411: if (!worst || d->quality < worst->quality)
412: worst = d;
413: }
414: if (worst) {
415: CTRACE(stderr, "%d. %.4f %-20.20s %-8.8s %-10.10s %s\n",
416: accepted_cnt+1,
417: worst->quality,
418: (worst->content_type
419: ? HTAtom_name(worst->content_type) : "-"),
420: (worst->content_language
421: ? HTAtom_name(worst->content_language) :"-"),
422: (worst->content_encoding
423: ? HTAtom_name(worst->content_encoding) :"-"),
424: (worst->filename
425: ? worst->filename :"-"));
426: HTList_removeObject(accepted, (void*)worst);
427: HTList_addObject(sorted, (void*)worst);
428: }
429: }
1.18 luotonen 430: CTRACE(stderr, "\n");
1.17 luotonen 431: HTList_delete(accepted);
432: HTList_delete(possibilities->next);
433: possibilities->next = sorted->next;
434: sorted->next = NULL;
435: HTList_delete(sorted);
436:
437: if (!HTList_isEmpty(possibilities)) return YES;
438: else return NO;
439: }
440:
441:
442:
443:
444:
1.13 timbl 445: /* Socket Input Buffering
446: ** ----------------------
1.1 timbl 447: **
1.13 timbl 448: ** This code is used because one cannot in general open a
449: ** file descriptor for a socket.
450: **
1.1 timbl 451: ** The input file is read using the macro which can read from
1.13 timbl 452: ** a socket or a file, but this should not be used for files
453: ** as fopen() etc is more portable of course.
454: **
1.1 timbl 455: ** The input buffer size, if large will give greater efficiency and
456: ** release the server faster, and if small will save space on PCs etc.
457: */
458:
459:
460: /* Set up the buffering
461: **
462: ** These routines are public because they are in fact needed by
463: ** many parsers, and on PCs and Macs we should not duplicate
464: ** the static buffer area.
465: */
1.13 timbl 466: PUBLIC HTInputSocket * HTInputSocket_new ARGS1 (int,file_number)
1.1 timbl 467: {
1.28 frystyk 468: HTInputSocket *isoc = (HTInputSocket *)calloc(1, sizeof(*isoc));
1.13 timbl 469: if (!isoc) outofmem(__FILE__, "HTInputSocket_new");
470: isoc->input_file_number = file_number;
471: isoc->input_pointer = isoc->input_limit = isoc->input_buffer;
472: return isoc;
1.1 timbl 473: }
474:
1.35 frystyk 475: /* This should return HT_INTERRUPTED if interrupted BUT the connection
476: MUST not be closed */
477: PUBLIC int HTInputSocket_getCharacter ARGS1(HTInputSocket*, isoc)
1.1 timbl 478: {
1.35 frystyk 479: int ch;
1.1 timbl 480: do {
1.13 timbl 481: if (isoc-> input_pointer >= isoc->input_limit) {
1.1 timbl 482: int status = NETREAD(
1.13 timbl 483: isoc->input_file_number,
484: isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 485: if (status <= 0) {
1.39 frystyk 486: if (status == 0)
487: return EOF;
488: if (status == HT_INTERRUPTED) {
489: if (TRACE)
490: fprintf(stderr, "Get Char.... Interrupted in HTInputSocket_getCharacter\n");
491: return HT_INTERRUPTED;
492: }
493: HTInetStatus("read");
494: return EOF; /* -1 is returned by UCX at end of HTTP link */
1.1 timbl 495: }
1.35 frystyk 496: isoc->input_pointer = isoc->input_buffer;
1.13 timbl 497: isoc->input_limit = isoc->input_buffer + status;
1.1 timbl 498: }
1.39 frystyk 499: ch = (unsigned char) *isoc->input_pointer++;
500: } while (ch == 13); /* Ignore ASCII carriage return */
1.1 timbl 501:
502: return FROMASCII(ch);
503: }
504:
1.17 luotonen 505: PUBLIC void HTInputSocket_free ARGS1(HTInputSocket *, me)
1.13 timbl 506: {
507: if (me) free(me);
508: }
509:
510:
1.16 luotonen 511: PUBLIC char * HTInputSocket_getBlock ARGS2(HTInputSocket*, isoc,
512: int *, len)
513: {
514: if (isoc->input_pointer >= isoc->input_limit) {
515: int status = NETREAD(isoc->input_file_number,
516: isoc->input_buffer,
517: ((*len < INPUT_BUFFER_SIZE) ?
518: *len : INPUT_BUFFER_SIZE));
519: if (status <= 0) {
520: isoc->input_limit = isoc->input_buffer;
521: if (status < 0)
1.39 frystyk 522: HTInetStatus("read");
1.16 luotonen 523: *len = 0;
524: return NULL;
525: }
526: else {
527: *len = status;
528: return isoc->input_buffer;
529: }
530: }
531: else {
532: char * ret = isoc->input_pointer;
533: *len = isoc->input_limit - isoc->input_pointer;
534: isoc->input_pointer = isoc->input_limit;
535: return ret;
536: }
537: }
538:
539:
1.15 luotonen 540: PRIVATE int fill_in_buffer ARGS1(HTInputSocket *, isoc)
541: {
542: if (isoc) {
543: int status;
544:
545: isoc->input_pointer = isoc->input_buffer;
546: status = NETREAD(isoc->input_file_number,
547: isoc->input_buffer,
548: INPUT_BUFFER_SIZE);
549: if (status <= 0) {
550: isoc->input_limit = isoc->input_buffer;
551: if (status < 0)
1.39 frystyk 552: HTInetStatus("read");
1.15 luotonen 553: }
554: else
555: isoc->input_limit = isoc->input_buffer + status;
556: return status;
557: }
558: return -1;
559: }
560:
561:
562: PRIVATE void ascii_cat ARGS3(char **, linep,
563: char *, start,
564: char *, end)
565: {
566: if (linep && start && end && start <= end) {
567: char *ptr;
568:
569: if (*linep) {
570: int len = strlen(*linep);
571: *linep = (char*)realloc(*linep, len + end-start + 1);
572: ptr = *linep + len;
573: }
574: else {
575: ptr = *linep = (char*)malloc(end-start + 1);
576: }
577:
578: while (start < end) {
579: *ptr = FROMASCII(*start);
580: ptr++;
581: start++;
582: }
583: *ptr = 0;
584: }
585: }
586:
587:
588: PRIVATE char * get_some_line ARGS2(HTInputSocket *, isoc,
589: BOOL, unfold)
590: {
591: if (!isoc)
592: return NULL;
593: else {
594: BOOL check_unfold = NO;
595: int prev_cr = 0;
596: char *start = isoc->input_pointer;
597: char *cur = isoc->input_pointer;
598: char * line = NULL;
599:
600: for(;;) {
601: /*
602: ** Get more if needed to complete line
603: */
604: if (cur >= isoc->input_limit) { /* Need more data */
605: ascii_cat(&line, start, cur);
606: if (fill_in_buffer(isoc) <= 0)
607: return line;
608: start = cur = isoc->input_pointer;
609: } /* if need more data */
610:
611: /*
612: ** Find a line feed if there is one
613: */
614: for(; cur < isoc->input_limit; cur++) {
615: char c = FROMASCII(*cur);
616: if (!c) {
1.18 luotonen 617: if (line) free(line); /* Leak fixed AL 6 Feb 94 */
1.15 luotonen 618: return NULL; /* Panic! read a 0! */
619: }
620: if (check_unfold && c != ' ' && c != '\t') {
621: return line; /* Note: didn't update isoc->input_pointer */
622: }
623: else {
624: check_unfold = NO;
625: }
626:
627: if (c=='\r') {
628: prev_cr = 1;
629: }
630: else {
631: if (c=='\n') { /* Found a line feed */
632: ascii_cat(&line, start, cur-prev_cr);
633: start = isoc->input_pointer = cur+1;
634:
1.44 frystyk 635: if (line && (int) strlen(line) > 0 && unfold) {
1.15 luotonen 636: check_unfold = YES;
637: }
638: else {
639: return line;
640: }
641: } /* if NL */
642: /* else just a regular character */
643: prev_cr = 0;
644: } /* if not CR */
645: } /* while characters in buffer remain */
646: } /* until line read or end-of-file */
647: } /* valid parameters to function */
648: }
649:
1.43 frystyk 650: /* The returned string must be freed by the caller */
1.15 luotonen 651: PUBLIC char * HTInputSocket_getLine ARGS1(HTInputSocket *, isoc)
652: {
653: return get_some_line(isoc, NO);
654: }
655:
1.43 frystyk 656: /* The returned string must be freed by the caller */
1.15 luotonen 657: PUBLIC char * HTInputSocket_getUnfoldedLine ARGS1(HTInputSocket *, isoc)
658: {
659: return get_some_line(isoc, YES);
660: }
661:
662:
663: /*
664: ** Read HTTP status line (if there is one).
665: **
666: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
667: ** First look at the stub in ASCII and check if it starts "HTTP/".
668: **
669: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
670: ** will be taken as a HTTP 1.0 server. Failure.
671: */
672: #define STUB_LENGTH 20
673: PUBLIC char * HTInputSocket_getStatusLine ARGS1(HTInputSocket *, isoc)
674: {
675: if (!isoc) {
676: return NULL;
677: }
678: else {
679: char buf[STUB_LENGTH + 1];
680: int i;
681: char server_version[STUB_LENGTH+1];
682: int server_status;
683:
684: /*
685: ** Read initial buffer
686: */
687: if (isoc->input_pointer >= isoc->input_limit &&
688: fill_in_buffer(isoc) <= 0) {
689: return NULL;
690: }
691:
692: for (i=0; i < STUB_LENGTH; i++)
693: buf[i] = FROMASCII(isoc->input_buffer[i]);
694: buf[STUB_LENGTH] = 0;
695:
696: if (0 != strncmp(buf, "HTTP/", 5) ||
697: sscanf(buf, "%20s%d", server_version, &server_status) < 2)
698: return NULL;
699: else
700: return get_some_line(isoc, NO);
701: }
702: }
703:
704:
705: /*
706: ** Do heuristic test to see if this is binary.
707: **
708: ** We check for characters above 128 in the first few bytes, and
709: ** if we find them we forget the html default.
710: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
711: **
712: ** Bugs: An HTTP 0.9 server returning a binary document with
713: ** characters < 128 will be read as ASCII.
714: */
715: PUBLIC BOOL HTInputSocket_seemsBinary ARGS1(HTInputSocket *, isoc)
716: {
717: if (isoc &&
718: (isoc->input_pointer < isoc->input_limit ||
719: fill_in_buffer(isoc) > 0)) {
720: char *p = isoc->input_buffer;
721: int i = STUB_LENGTH;
722:
723: for( ; i && p < isoc->input_limit; p++, i++)
724: if (((int)*p)&128)
725: return YES;
726: }
727: return NO;
728: }
729:
730:
731:
1.1 timbl 732: /* Stream the data to an ouput file as binary
733: */
1.38 luotonen 734: PUBLIC int HTOutputBinary ARGS3(HTInputSocket *,isoc,
1.13 timbl 735: int, input,
736: FILE *, output)
1.1 timbl 737: {
738: do {
739: int status = NETREAD(
1.13 timbl 740: input, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 741: if (status <= 0) {
742: if (status == 0) return 0;
743: if (TRACE) fprintf(stderr,
1.39 frystyk 744: "Out Binary.. Socket read error %d\n", status);
1.1 timbl 745: return 2; /* Error */
746: }
1.13 timbl 747: fwrite(isoc->input_buffer, sizeof(char), status, output);
1.1 timbl 748: } while (YES);
749: }
750:
1.38 luotonen 751:
752: /*
753: * Normal HTTP headers are never bigger than 2K.
754: */
755: #define S_BUFFER_SIZE 2000
756:
757: PUBLIC void HTInputSocket_startBuffering ARGS1(HTInputSocket *, isoc)
758: {
759: if (isoc) {
760: isoc->s_do_buffering = YES;
761: if (!isoc->s_buffer) {
762: isoc->s_buffer = (char*)malloc(S_BUFFER_SIZE + 1);
763: isoc->s_buffer_size = S_BUFFER_SIZE;
764: }
765: isoc->s_buffer_cur = isoc->s_buffer;
766: }
767: }
768:
769: PUBLIC void HTInputSocket_stopBuffering ARGS1(HTInputSocket *, isoc)
770: {
771: if (isoc) {
772: isoc->s_do_buffering = NO;
773: if (isoc->s_buffer_cur)
774: *isoc->s_buffer_cur = 0;
775: }
776: }
777:
778: PUBLIC int HTInputSocket_getBuffer ARGS2(HTInputSocket *, isoc,
779: char **, buffer_ptr)
780: {
781: if (!isoc || !isoc->s_buffer || !isoc->s_buffer_cur)
782: return 0;
783: else {
784: *isoc->s_buffer_cur = 0;
785: if (buffer_ptr)
786: *buffer_ptr = isoc->s_buffer;
787: return (int) (isoc->s_buffer_cur - isoc->s_buffer);
788: }
789: }
1.1 timbl 790:
1.33 luotonen 791: PRIVATE BOOL better_match ARGS2(HTFormat, f,
792: HTFormat, g)
793: {
794: CONST char *p, *q;
795:
796: if (f && g && (p = HTAtom_name(f)) && (q = HTAtom_name(g))) {
797: int i,j;
798: for(i=0 ; *p; p++) if (*p == '*') i++;
799: for(j=0 ; *q; q++) if (*q == '*') j++;
800: if (i < j) return YES;
801: }
802: return NO;
803: }
804:
1.17 luotonen 805:
1.2 timbl 806: /* Create a filter stack
807: ** ---------------------
808: **
1.7 secret 809: ** If a wildcard match is made, a temporary HTPresentation
1.2 timbl 810: ** structure is made to hold the destination format while the
811: ** new stack is generated. This is just to pass the out format to
812: ** MIME so far. Storing the format of a stream in the stream might
813: ** be a lot neater.
1.10 timbl 814: **
1.29 frystyk 815: ** The star/star format is special, in that if you can take
1.40 frystyk 816: ** that you can take anything.
817: **
818: ** On succes, request->error_block is set to YES so no more error
819: ** messages to the stream as the stream might be of any format.
1.2 timbl 820: */
1.34 luotonen 821: PUBLIC HTStream * HTStreamStack ARGS3(HTFormat, rep_in,
822: HTRequest *, request,
823: BOOL, guess)
1.2 timbl 824: {
1.12 timbl 825: HTFormat rep_out = request->output_format; /* Could be a param */
1.14 timbl 826: HTList * conversion[2];
827: int which_list;
1.25 frystyk 828: float best_quality = -1e30; /* Pretty bad! */
1.29 frystyk 829: HTPresentation *pres, *match, *best_match=0;
1.14 timbl 830:
1.47 frystyk 831: request->error_block = YES; /* No more error output to stream */
1.2 timbl 832: if (TRACE) fprintf(stderr,
1.39 frystyk 833: "StreamStack. Constructing stream stack for %s to %s\n",
1.10 timbl 834: HTAtom_name(rep_in),
1.2 timbl 835: HTAtom_name(rep_out));
1.34 luotonen 836:
837: if (guess && rep_in == WWW_UNKNOWN) {
838: CTRACE(stderr, "Returning... guessing stream\n");
839: return HTGuess_new(request);
840: }
841:
1.47 frystyk 842: if (rep_out == WWW_SOURCE || rep_out == rep_in) {
1.21 luotonen 843: return request->output_stream;
1.47 frystyk 844: }
1.2 timbl 845:
1.14 timbl 846: conversion[0] = request->conversions;
847: conversion[1] = HTConversions;
1.17 luotonen 848:
1.15 luotonen 849: for(which_list = 0; which_list<2; which_list++) {
850: HTList * cur = conversion[which_list];
851:
852: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
1.25 frystyk 853: if ((pres->rep == rep_in || wild_match(pres->rep, rep_in)) &&
1.33 luotonen 854: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
855: if (!best_match ||
856: better_match(pres->rep, best_match->rep) ||
857: (!better_match(best_match->rep, pres->rep) &&
858: pres->quality > best_quality)) {
1.49 ! howcome 859: /* HWL */
! 860: if (!pres->test_command || (system(pres->test_command)==0)) {
! 861: if (TRACE && pres->test_command)
! 862: printf("HTStreamStack testing %s %d\n",pres->test_command,system(pres->test_command));
! 863: best_match = pres;
! 864: best_quality = pres->quality;
! 865: }
1.10 timbl 866: }
867: }
1.2 timbl 868: }
869: }
1.33 luotonen 870:
1.29 frystyk 871: match = best_match ? best_match : NULL;
872: if (match) {
873: if (match->rep == WWW_SOURCE) {
1.39 frystyk 874: if (TRACE) fprintf(stderr, "StreamStack. Don't know how to handle this, so put out %s to %s\n",
1.29 frystyk 875: HTAtom_name(match->rep),
876: HTAtom_name(rep_out));
877: }
878: return (*match->converter)(
1.25 frystyk 879: request, match->command, rep_in, rep_out,
880: request->output_stream);
1.29 frystyk 881: }
1.42 frystyk 882: {
883: char *msg = NULL;
884: StrAllocCopy(msg, "Can't convert from ");
885: StrAllocCat(msg, HTAtom_name(rep_in));
886: StrAllocCat(msg, " to ");
887: StrAllocCat(msg, HTAtom_name(rep_out));
888: HTErrorAdd(request, ERR_FATAL, NO, HTERR_NOT_IMPLEMENTED,
889: (void *) msg, (int) strlen(msg), "HTStreamStack");
890: free(msg);
891: }
1.47 frystyk 892: request->error_block = NO; /* We didn't put up a stream anyway */
1.2 timbl 893: return NULL;
894: }
895:
896:
897: /* Find the cost of a filter stack
898: ** -------------------------------
899: **
900: ** Must return the cost of the same stack which StreamStack would set up.
901: **
902: ** On entry,
903: ** length The size of the data to be converted
904: */
1.12 timbl 905: PUBLIC float HTStackValue ARGS5(
1.14 timbl 906: HTList *, theseConversions,
1.10 timbl 907: HTFormat, rep_in,
1.2 timbl 908: HTFormat, rep_out,
909: float, initial_value,
910: long int, length)
911: {
1.14 timbl 912: int which_list;
913: HTList* conversion[2];
914:
1.2 timbl 915: if (TRACE) fprintf(stderr,
1.39 frystyk 916: "StackValue.. Evaluating stream stack for %s worth %.3f to %s\n",
1.10 timbl 917: HTAtom_name(rep_in), initial_value,
1.2 timbl 918: HTAtom_name(rep_out));
919:
920: if (rep_out == WWW_SOURCE ||
1.10 timbl 921: rep_out == rep_in) return 0.0;
1.2 timbl 922:
1.12 timbl 923: /* if (!HTPresentations) HTFormatInit(); set up the list */
1.2 timbl 924:
1.14 timbl 925: conversion[0] = theseConversions;
926: conversion[1] = HTConversions;
927:
928: for(which_list = 0; which_list<2; which_list++)
929: if (conversion[which_list]) {
1.15 luotonen 930: HTList * cur = conversion[which_list];
1.2 timbl 931: HTPresentation * pres;
1.15 luotonen 932: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
933: if (pres->rep == rep_in &&
1.17 luotonen 934: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
1.2 timbl 935: float value = initial_value * pres->quality;
936: if (HTMaxSecs != 0.0)
1.15 luotonen 937: value = value - (length*pres->secs_per_byte + pres->secs)
1.2 timbl 938: /HTMaxSecs;
939: return value;
940: }
941: }
942: }
943:
944: return -1e30; /* Really bad */
1.17 luotonen 945: }
946:
947:
1.2 timbl 948:
1.1 timbl 949:
1.2 timbl 950: /* Push data from a socket down a stream
951: ** -------------------------------------
1.1 timbl 952: **
1.2 timbl 953: ** This routine is responsible for creating and PRESENTING any
1.1 timbl 954: ** graphic (or other) objects described by the file.
1.2 timbl 955: **
956: ** The file number given is assumed to be a TELNET stream ie containing
957: ** CRLF at the end of lines which need to be stripped to LF for unix
958: ** when the format is textual.
959: **
1.26 luotonen 960: ** RETURNS the number of bytes transferred.
961: **
1.1 timbl 962: */
1.26 luotonen 963: PUBLIC int HTCopy ARGS2(
1.2 timbl 964: int, file_number,
965: HTStream*, sink)
1.1 timbl 966: {
1.2 timbl 967: HTStreamClass targetClass;
1.13 timbl 968: HTInputSocket * isoc;
1.26 luotonen 969: int cnt = 0;
970:
1.5 timbl 971: /* Push the data down the stream
1.2 timbl 972: **
973: */
974: targetClass = *(sink->isa); /* Copy pointers to procedures */
1.13 timbl 975: isoc = HTInputSocket_new(file_number);
1.2 timbl 976:
977: /* Push binary from socket down sink
1.10 timbl 978: **
979: ** This operation could be put into a main event loop
1.2 timbl 980: */
981: for(;;) {
982: int status = NETREAD(
1.13 timbl 983: file_number, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.2 timbl 984: if (status <= 0) {
985: if (status == 0) break;
986: if (TRACE) fprintf(stderr,
1.39 frystyk 987: "Socket Copy. Read error, read returns %d with errno=%d\n",
1.24 luotonen 988: status, errno);
1.2 timbl 989: break;
990: }
1.26 luotonen 991:
1.8 timbl 992: #ifdef NOT_ASCII
993: {
994: char * p;
1.13 timbl 995: for(p = isoc->input_buffer; p < isoc->input_buffer+status; p++) {
1.8 timbl 996: *p = FROMASCII(*p);
997: }
998: }
999: #endif
1000:
1.13 timbl 1001: (*targetClass.put_block)(sink, isoc->input_buffer, status);
1.26 luotonen 1002: cnt += status;
1.2 timbl 1003: } /* next bufferload */
1.26 luotonen 1004:
1.13 timbl 1005: HTInputSocket_free(isoc);
1.26 luotonen 1006:
1007: return cnt;
1.2 timbl 1008: }
1009:
1.1 timbl 1010:
1.7 secret 1011:
1012: /* Push data from a file pointer down a stream
1013: ** -------------------------------------
1014: **
1015: ** This routine is responsible for creating and PRESENTING any
1016: ** graphic (or other) objects described by the file.
1017: **
1018: **
1019: */
1020: PUBLIC void HTFileCopy ARGS2(
1021: FILE *, fp,
1022: HTStream*, sink)
1023: {
1024: HTStreamClass targetClass;
1.13 timbl 1025: char input_buffer[INPUT_BUFFER_SIZE];
1.7 secret 1026:
1027: /* Push the data down the stream
1028: **
1029: */
1030: targetClass = *(sink->isa); /* Copy pointers to procedures */
1031:
1032: /* Push binary from socket down sink
1033: */
1034: for(;;) {
1035: int status = fread(
1036: input_buffer, 1, INPUT_BUFFER_SIZE, fp);
1037: if (status == 0) { /* EOF or error */
1038: if (ferror(fp) == 0) break;
1039: if (TRACE) fprintf(stderr,
1.39 frystyk 1040: "File Copy... Read error, read returns %d\n", ferror(fp));
1.7 secret 1041: break;
1042: }
1043: (*targetClass.put_block)(sink, input_buffer, status);
1.13 timbl 1044: } /* next bufferload */
1.7 secret 1045: }
1046:
1047:
1048:
1049:
1.2 timbl 1050: /* Push data from a socket down a stream STRIPPING CR
1051: ** --------------------------------------------------
1052: **
1053: ** This routine is responsible for creating and PRESENTING any
1.8 timbl 1054: ** graphic (or other) objects described by the socket.
1.2 timbl 1055: **
1056: ** The file number given is assumed to be a TELNET stream ie containing
1057: ** CRLF at the end of lines which need to be stripped to LF for unix
1058: ** when the format is textual.
1.37 frystyk 1059: **
1060: ** Character handling is now of type int, Henrik, May 09-94
1.1 timbl 1061: */
1.2 timbl 1062: PUBLIC void HTCopyNoCR ARGS2(
1063: int, file_number,
1064: HTStream*, sink)
1065: {
1.13 timbl 1066: HTStreamClass targetClass;
1067: HTInputSocket * isoc;
1.37 frystyk 1068: int ch;
1.1 timbl 1069:
1.2 timbl 1070: /* Push the data, ignoring CRLF, down the stream
1071: **
1072: */
1073: targetClass = *(sink->isa); /* Copy pointers to procedures */
1074:
1075: /* Push text from telnet socket down sink
1076: **
1077: ** @@@@@ To push strings could be faster? (especially is we
1078: ** cheat and don't ignore CR! :-}
1079: */
1.13 timbl 1080: isoc = HTInputSocket_new(file_number);
1.37 frystyk 1081: while ((ch = HTInputSocket_getCharacter(isoc)) >= 0)
1082: (*targetClass.put_character)(sink, ch);
1.13 timbl 1083: HTInputSocket_free(isoc);
1.2 timbl 1084: }
1.1 timbl 1085:
1.2 timbl 1086:
1.46 frystyk 1087: /* To be replaced by a stream */
1088: PUBLIC void HTCopyDot ARGS2(int, file_number,
1089: HTStream *, sink)
1090: {
1091: HTStreamClass targetClass;
1092: HTInputSocket * isoc;
1093: int ch;
1094: int state=3;
1095:
1096: /* Push the data, ignoring CRLF, down the stream */
1097: targetClass = *(sink->isa); /* Copy pointers to procedures */
1098: isoc = HTInputSocket_new(file_number);
1099: while (state && (ch = HTInputSocket_getCharacter(isoc)) >= 0) {
1100: if (ch == '\n')
1101: state--;
1102: else if (state==2 && ch=='.')
1103: state--;
1104: else
1105: state = 3;
1106: (*targetClass.put_character)(sink, ch);
1107: }
1108: HTInputSocket_free(isoc);
1109: }
1110:
1111:
1.7 secret 1112:
1.2 timbl 1113: /* Parse a socket given format and file number
1114: **
1115: ** This routine is responsible for creating and PRESENTING any
1116: ** graphic (or other) objects described by the file.
1117: **
1118: ** The file number given is assumed to be a TELNET stream ie containing
1119: ** CRLF at the end of lines which need to be stripped to LF for unix
1120: ** when the format is textual.
1121: **
1.42 frystyk 1122: ** Returns <0 on error, HT_LOADED on success.
1.2 timbl 1123: */
1.14 timbl 1124:
1.46 frystyk 1125: /* The parameter to this function and HTParsefile should be HTRequest */
1126:
1.12 timbl 1127: PUBLIC int HTParseSocket ARGS3(
1.10 timbl 1128: HTFormat, rep_in,
1.2 timbl 1129: int, file_number,
1.12 timbl 1130: HTRequest *, request)
1.2 timbl 1131: {
1132: HTStream * stream;
1133: HTStreamClass targetClass;
1.1 timbl 1134:
1.40 frystyk 1135: if (request->error_stack) {
1136: if (TRACE) fprintf(stderr, "ParseSocket. Called whith non-empty error stack, so I return right away!\n");
1137: return -1;
1138: }
1139:
1.42 frystyk 1140: /* Set up stream stack */
1141: if ((stream = HTStreamStack(rep_in, request, YES)) == NULL)
1142: return -1;
1.1 timbl 1143:
1.3 timbl 1144: /* Push the data, ignoring CRLF if necessary, down the stream
1145: **
1.2 timbl 1146: **
1.3 timbl 1147: ** @@ Bug: This decision ought to be made based on "encoding"
1.9 timbl 1148: ** rather than on format. @@@ When we handle encoding.
1.3 timbl 1149: ** The current method smells anyway.
1.2 timbl 1150: */
1151: targetClass = *(stream->isa); /* Copy pointers to procedures */
1.46 frystyk 1152: if (request->output_format == WWW_SOURCE && request->net_info->CRLFdotCRLF)
1153: HTCopyDot(file_number, stream);
1154: else if (rep_in == WWW_BINARY || rep_in == WWW_UNKNOWN || HTOutputSource
1.26 luotonen 1155: || (request->content_encoding &&
1156: request->content_encoding != HTAtom_for("8bit") &&
1157: request->content_encoding != HTAtom_for("7bit"))
1.10 timbl 1158: || strstr(HTAtom_name(rep_in), "image/")
1159: || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.29 frystyk 1160: HTCopy(file_number, stream);
1.46 frystyk 1161: } else if (request->net_info->CRLFdotCRLF)
1162: HTCopyDot(file_number, stream);
1163: else
1.2 timbl 1164: HTCopyNoCR(file_number, stream);
1.45 duns 1165: (*targetClass._free)(stream);
1.7 secret 1166:
1167: return HT_LOADED;
1168: }
1169:
1170:
1171:
1172: /* Parse a file given format and file pointer
1173: **
1174: ** This routine is responsible for creating and PRESENTING any
1175: ** graphic (or other) objects described by the file.
1176: **
1177: ** The file number given is assumed to be a TELNET stream ie containing
1.10 timbl 1178: ** CRLF at the end of lines which need to be stripped to \n for unix
1.7 secret 1179: ** when the format is textual.
1180: **
1181: */
1.12 timbl 1182: PUBLIC int HTParseFile ARGS3(
1.10 timbl 1183: HTFormat, rep_in,
1.7 secret 1184: FILE *, fp,
1.12 timbl 1185: HTRequest *, request)
1.7 secret 1186: {
1187: HTStream * stream;
1188: HTStreamClass targetClass;
1.40 frystyk 1189:
1190: if (request->error_stack) {
1191: if (TRACE) fprintf(stderr, "ParseFile... Called whith non-empty error stack, so I return right away!\n");
1192: return -1;
1193: }
1.7 secret 1194:
1.42 frystyk 1195: /* Set up stream stack */
1196: if ((stream = HTStreamStack(rep_in, request, YES)) == NULL)
1197: return -1;
1.7 secret 1198:
1.9 timbl 1199: /* Push the data down the stream
1.7 secret 1200: **
1201: **
1202: ** @@ Bug: This decision ought to be made based on "encoding"
1.10 timbl 1203: ** rather than on content-type. @@@ When we handle encoding.
1.7 secret 1204: ** The current method smells anyway.
1205: */
1206: targetClass = *(stream->isa); /* Copy pointers to procedures */
1207: HTFileCopy(fp, stream);
1.45 duns 1208: (*targetClass._free)(stream);
1.1 timbl 1209:
1.2 timbl 1210: return HT_LOADED;
1.1 timbl 1211: }
1.2 timbl 1212:
1.10 timbl 1213:
1214: /* Converter stream: Network Telnet to internal character text
1215: ** -----------------------------------------------------------
1216: **
1217: ** The input is assumed to be in ASCII, with lines delimited
1218: ** by (13,10) pairs, These pairs are converted into (CR,LF)
1219: ** pairs in the local representation. The (CR,LF) sequence
1220: ** when found is changed to a '\n' character, the internal
1221: ** C representation of a new line.
1222: */
1223:
1224:
1.11 timbl 1225: PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char)
1.10 timbl 1226: {
1227: char c = FROMASCII(net_char);
1228: if (me->had_cr) {
1229: if (c==LF) {
1230: me->sink->isa->put_character(me->sink, '\n'); /* Newline */
1231: me->had_cr = NO;
1232: return;
1233: } else {
1234: me->sink->isa->put_character(me->sink, CR); /* leftover */
1235: }
1236: }
1237: me->had_cr = (c==CR);
1238: if (!me->had_cr)
1239: me->sink->isa->put_character(me->sink, c); /* normal */
1240: }
1241:
1.11 timbl 1242: PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s)
1.10 timbl 1243: {
1244: CONST char * p;
1245: for(p=s; *p; p++) NetToText_put_character(me, *p);
1246: }
1247:
1.11 timbl 1248: PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l)
1.10 timbl 1249: {
1250: CONST char * p;
1251: for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
1252: }
1253:
1254: PRIVATE void NetToText_free ARGS1(HTStream *, me)
1255: {
1.45 duns 1256: me->sink->isa->_free(me->sink); /* Close rest of pipe */
1.10 timbl 1257: free(me);
1258: }
1259:
1260: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
1261: {
1262: me->sink->isa->abort(me->sink,e); /* Abort rest of pipe */
1263: free(me);
1264: }
1265:
1266: /* The class structure
1267: */
1268: PRIVATE HTStreamClass NetToTextClass = {
1269: "NetToText",
1270: NetToText_free,
1271: NetToText_abort,
1272: NetToText_put_character,
1273: NetToText_put_string,
1274: NetToText_put_block
1275: };
1276:
1277: /* The creation method
1278: */
1279: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
1280: {
1281: HTStream* me = (HTStream*)malloc(sizeof(*me));
1282: if (me == NULL) outofmem(__FILE__, "NetToText");
1283: me->isa = &NetToTextClass;
1284:
1285: me->had_cr = NO;
1286: me->sink = sink;
1287: return me;
1288: }
1.2 timbl 1289:
1290:
Webmaster