Annotation of libwww/Library/src/HTFormat.c, revision 1.51
1.7 secret 1:
1.1 timbl 2: /* Manage different file formats HTFormat.c
3: ** =============================
4: **
5: ** Bugs:
6: ** Not reentrant.
7: **
8: ** Assumes the incoming stream is ASCII, rather than a local file
9: ** format, and so ALWAYS converts from ASCII on non-ASCII machines.
10: ** Therefore, non-ASCII machines can't read local files.
1.2 timbl 11: **
1.45 duns 12: ** HISTORY:
13: ** 8 Jul 94 FM Insulate free() from _free structure element.
14: **
1.2 timbl 15: */
16:
1.10 timbl 17:
1.2 timbl 18: /* Implements:
1.1 timbl 19: */
1.2 timbl 20: #include "HTFormat.h"
21:
22: PUBLIC float HTMaxSecs = 1e10; /* No effective limit */
23: PUBLIC float HTMaxLength = 1e10; /* No effective limit */
24:
25: #ifdef unix
26: #ifdef NeXT
27: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
28: #else
29: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n"
30: /* Full pathname would be better! */
31: #endif
32: #endif
33:
1.1 timbl 34:
35: #include "HTUtils.h"
36: #include "tcp.h"
37:
38: #include "HTML.h"
1.12 timbl 39: #include "HTMLPDTD.h"
1.2 timbl 40: #include "HTAlert.h"
41: #include "HTList.h"
42: #include "HTInit.h"
43: /* Streams and structured streams which we use:
44: */
45: #include "HTFWriter.h"
46: #include "HTPlain.h"
47: #include "SGML.h"
48: #include "HTML.h"
49: #include "HTMLGen.h"
1.41 frystyk 50: #include "HTTCP.h"
1.34 luotonen 51: #include "HTGuess.h"
1.42 frystyk 52: #include "HTError.h"
1.34 luotonen 53:
1.2 timbl 54:
55: PUBLIC BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */
56:
1.10 timbl 57: #ifdef ORIGINAL
1.2 timbl 58: struct _HTStream {
59: CONST HTStreamClass* isa;
60: /* ... */
61: };
1.10 timbl 62: #endif
63:
64: /* this version used by the NetToText stream */
65: struct _HTStream {
66: CONST HTStreamClass * isa;
67: BOOL had_cr;
68: HTStream * sink;
69: };
1.2 timbl 70:
71:
1.17 luotonen 72: /*
73: ** Accept-Encoding and Accept-Language
74: */
75: typedef struct _HTAcceptNode {
76: HTAtom * atom;
77: float quality;
78: } HTAcceptNode;
79:
80:
81:
82:
1.2 timbl 83: /* Presentation methods
84: ** --------------------
85: */
86:
1.14 timbl 87: PUBLIC HTList * HTConversions = NULL;
1.2 timbl 88:
1.31 frystyk 89: /* -------------------------------------------------------------------------
90: This function replaces the code in HTRequest_delete() in order to keep
91: the data structure hidden (it is NOT a joke!)
92: Henrik 14/03-94
93: ------------------------------------------------------------------------- */
94: PUBLIC void HTFormatDelete ARGS1(HTList *, me)
95: {
96: HTList *cur = me;
97: HTPresentation *pres;
98: if (!me)
99: return;
100: while ((pres = (HTPresentation*) HTList_nextObject(cur))) {
101: FREE(pres->command); /* Leak fixed AL 6 Feb 1994 */
102: free(pres);
103: }
104: HTList_delete(me); /* Leak fixed AL 6 Feb 1994 */
105: }
106:
1.2 timbl 107:
108: /* Define a presentation system command for a content-type
109: ** -------------------------------------------------------
110: */
1.49 howcome 111: PUBLIC void HTSetPresentation ARGS7(
1.12 timbl 112: HTList *, conversions,
113: CONST char *, representation,
114: CONST char *, command,
1.49 howcome 115: CONST char *, test_command, /* HWL 27/9/94: mailcap functionality */
1.12 timbl 116: float, quality,
117: float, secs,
1.51 ! howcome 118: float, secs_per_byte
! 119: ){
1.2 timbl 120: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
121: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
122:
123: pres->rep = HTAtom_for(representation);
124: pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */
125: pres->converter = HTSaveAndExecute; /* Fixed for now ... */
126: pres->quality = quality;
127: pres->secs = secs;
128: pres->secs_per_byte = secs_per_byte;
129: pres->rep = HTAtom_for(representation);
1.49 howcome 130: pres->command = NULL;
1.2 timbl 131: StrAllocCopy(pres->command, command);
1.49 howcome 132: pres->test_command = NULL;
133: StrAllocCopy(pres->test_command, test_command);
1.2 timbl 134:
1.12 timbl 135: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 136:
1.15 luotonen 137: #ifdef OLD_CODE
138: if (strcmp(representation, "*")==0) {
1.2 timbl 139: if (default_presentation) free(default_presentation);
140: default_presentation = pres;
1.12 timbl 141: } else
142: #endif
143: HTList_addObject(conversions, pres);
1.2 timbl 144: }
145:
146:
147: /* Define a built-in function for a content-type
148: ** ---------------------------------------------
149: */
1.12 timbl 150: PUBLIC void HTSetConversion ARGS7(
151: HTList *, conversions,
152: CONST char *, representation_in,
153: CONST char *, representation_out,
1.6 timbl 154: HTConverter*, converter,
1.12 timbl 155: float, quality,
156: float, secs,
157: float, secs_per_byte
1.2 timbl 158: ){
1.1 timbl 159:
1.2 timbl 160: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
161: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
162:
163: pres->rep = HTAtom_for(representation_in);
164: pres->rep_out = HTAtom_for(representation_out);
165: pres->converter = converter;
166: pres->command = NULL; /* Fixed */
1.49 howcome 167: pres->test_command = NULL;
1.2 timbl 168: pres->quality = quality;
169: pres->secs = secs;
170: pres->secs_per_byte = secs_per_byte;
1.49 howcome 171: /* pres->command = 0; */
1.2 timbl 172:
1.12 timbl 173: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 174:
1.12 timbl 175: #ifdef OLD_CODE
1.2 timbl 176: if (strcmp(representation_in, "*")==0) {
177: if (default_presentation) free(default_presentation);
178: default_presentation = pres;
1.12 timbl 179: } else
180: #endif
181: HTList_addObject(conversions, pres);
1.2 timbl 182: }
1.1 timbl 183:
184:
185:
1.17 luotonen 186: PUBLIC void HTAcceptEncoding ARGS3(HTList *, list,
187: char *, enc,
188: float, quality)
189: {
190: HTAcceptNode * node;
191: char * cur;
192:
193: if (!list || !enc || !*enc) return;
194:
195: for(cur=enc; *cur; cur++) *cur=TOLOWER(*cur);
196:
197: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
198: if (!node) outofmem(__FILE__, "HTAcceptEncoding");
199: HTList_addObject(list, (void*)node);
200:
201: node->atom = HTAtom_for(enc);
202: node->quality = quality;
203: }
204:
205:
206: PUBLIC void HTAcceptLanguage ARGS3(HTList *, list,
207: char *, lang,
208: float, quality)
209: {
210: HTAcceptNode * node;
211:
212: if (!list || !lang || !*lang) return;
213:
214: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
215: if (!node) outofmem(__FILE__, "HTAcceptLanguage");
216:
217: HTList_addObject(list, (void*)node);
218: node->atom = HTAtom_for(lang);
219: node->quality = quality;
220: }
221:
222:
1.48 frystyk 223: PRIVATE BOOL wild_match ARGS2(HTAtom *, tmplate,
1.17 luotonen 224: HTAtom *, actual)
225: {
226: char *t, *a, *st, *sa;
227: BOOL match = NO;
228:
1.48 frystyk 229: if (tmplate && actual && (t = HTAtom_name(tmplate))) {
1.22 luotonen 230: if (!strcmp(t, "*"))
231: return YES;
1.17 luotonen 232:
1.22 luotonen 233: if (strchr(t, '*') &&
234: (a = HTAtom_name(actual)) &&
235: (st = strchr(t, '/')) && (sa = strchr(a,'/'))) {
1.17 luotonen 236:
1.22 luotonen 237: *sa = 0;
238: *st = 0;
239:
240: if ((*(st-1)=='*' &&
241: (*(st+1)=='*' || !strcasecomp(st+1, sa+1))) ||
242: (*(st+1)=='*' && !strcasecomp(t,a)))
243: match = YES;
244:
245: *sa = '/';
246: *st = '/';
247: }
248: }
1.23 luotonen 249: return match;
1.17 luotonen 250: }
251:
1.36 luotonen 252: /*
253: * Added by takada@seraph.ntt.jp (94/04/08)
254: */
1.48 frystyk 255: PRIVATE BOOL lang_match ARGS2(HTAtom *, tmplate,
1.36 luotonen 256: HTAtom *, actual)
257: {
258: char *t, *a, *st, *sa;
259: BOOL match = NO;
260:
1.48 frystyk 261: if (tmplate && actual &&
262: (t = HTAtom_name(tmplate)) && (a = HTAtom_name(actual))) {
1.36 luotonen 263: st = strchr(t, '_');
264: sa = strchr(a, '_');
265: if ((st != NULL) && (sa != NULL)) {
266: if (!strcasecomp(t, a))
267: match = YES;
268: else
269: match = NO;
270: }
271: else {
272: if (st != NULL) *st = 0;
273: if (sa != NULL) *sa = 0;
274: if (!strcasecomp(t, a))
275: match = YES;
276: else
277: match = NO;
278: if (st != NULL) *st = '_';
279: if (sa != NULL) *sa = '_';
280: }
281: }
282: return match;
283: }
284: /* end of addition */
285:
286:
1.17 luotonen 287:
288: PRIVATE float type_value ARGS2(HTAtom *, content_type,
289: HTList *, accepted)
290: {
291: HTList * cur = accepted;
292: HTPresentation * pres;
293: HTPresentation * wild = NULL;
294:
295: if (!content_type || !accepted) return -1;
296:
297: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
298: if (pres->rep == content_type)
299: return pres->quality;
300: else if (wild_match(pres->rep, content_type))
301: wild = pres;
302: }
303: if (wild) return wild->quality;
304: else return -1;
305: }
306:
307:
308: PRIVATE float lang_value ARGS2(HTAtom *, language,
309: HTList *, accepted)
310: {
311: HTList * cur = accepted;
312: HTAcceptNode * node;
313: HTAcceptNode * wild = NULL;
314:
315: if (!language || !accepted || HTList_isEmpty(accepted)) {
316: return 0.1;
317: }
318:
319: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
320: if (node->atom == language) {
321: return node->quality;
322: }
1.36 luotonen 323: /*
324: * patch by takada@seraph.ntt.jp (94/04/08)
325: * the original line was
326: * else if (wild_match(node->atom, language)) {
327: * and the new line is
328: */
329: else if (lang_match(node->atom, language)) {
1.17 luotonen 330: wild = node;
331: }
332: }
333:
334: if (wild) {
335: return wild->quality;
336: }
337: else {
338: return 0.1;
339: }
340: }
341:
342:
343: PRIVATE float encoding_value ARGS2(HTAtom *, encoding,
344: HTList *, accepted)
345: {
346: HTList * cur = accepted;
347: HTAcceptNode * node;
348: HTAcceptNode * wild = NULL;
349: char * e;
350:
351: if (!encoding || !accepted || HTList_isEmpty(accepted))
352: return 1;
353:
354: e = HTAtom_name(encoding);
355: if (!strcmp(e, "7bit") || !strcmp(e, "8bit") || !strcmp(e, "binary"))
356: return 1;
357:
358: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
359: if (node->atom == encoding)
360: return node->quality;
361: else if (wild_match(node->atom, encoding))
362: wild = node;
363: }
364: if (wild) return wild->quality;
365: else return 1;
366: }
367:
368:
369: PUBLIC BOOL HTRank ARGS4(HTList *, possibilities,
370: HTList *, accepted_content_types,
371: HTList *, accepted_languages,
372: HTList *, accepted_encodings)
373: {
374: int accepted_cnt = 0;
375: HTList * accepted;
376: HTList * sorted;
377: HTList * cur;
378: HTContentDescription * d;
379:
380: if (!possibilities) return NO;
381:
382: accepted = HTList_new();
383: cur = possibilities;
384: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
385: float tv = type_value(d->content_type, accepted_content_types);
386: float lv = lang_value(d->content_language, accepted_languages);
387: float ev = encoding_value(d->content_encoding, accepted_encodings);
388:
389: if (tv > 0) {
390: d->quality *= tv * lv * ev;
391: HTList_addObject(accepted, d);
392: accepted_cnt++;
393: }
1.18 luotonen 394: else {
395: if (d->filename) free(d->filename);
396: free(d);
397: }
1.17 luotonen 398: }
399:
1.18 luotonen 400: CTRACE(stderr, "Ranking.....\n");
1.17 luotonen 401: CTRACE(stderr,
1.18 luotonen 402: "\nRANK QUALITY CONTENT-TYPE LANGUAGE ENCODING FILE\n");
1.17 luotonen 403:
404: sorted = HTList_new();
405: while (accepted_cnt-- > 0) {
406: HTContentDescription * worst = NULL;
407: cur = accepted;
408: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
409: if (!worst || d->quality < worst->quality)
410: worst = d;
411: }
412: if (worst) {
413: CTRACE(stderr, "%d. %.4f %-20.20s %-8.8s %-10.10s %s\n",
414: accepted_cnt+1,
415: worst->quality,
416: (worst->content_type
417: ? HTAtom_name(worst->content_type) : "-"),
418: (worst->content_language
419: ? HTAtom_name(worst->content_language) :"-"),
420: (worst->content_encoding
421: ? HTAtom_name(worst->content_encoding) :"-"),
422: (worst->filename
423: ? worst->filename :"-"));
424: HTList_removeObject(accepted, (void*)worst);
425: HTList_addObject(sorted, (void*)worst);
426: }
427: }
1.18 luotonen 428: CTRACE(stderr, "\n");
1.17 luotonen 429: HTList_delete(accepted);
430: HTList_delete(possibilities->next);
431: possibilities->next = sorted->next;
432: sorted->next = NULL;
433: HTList_delete(sorted);
434:
435: if (!HTList_isEmpty(possibilities)) return YES;
436: else return NO;
437: }
438:
439:
440:
441:
442:
1.13 timbl 443: /* Socket Input Buffering
444: ** ----------------------
1.1 timbl 445: **
1.13 timbl 446: ** This code is used because one cannot in general open a
447: ** file descriptor for a socket.
448: **
1.1 timbl 449: ** The input file is read using the macro which can read from
1.13 timbl 450: ** a socket or a file, but this should not be used for files
451: ** as fopen() etc is more portable of course.
452: **
1.1 timbl 453: ** The input buffer size, if large will give greater efficiency and
454: ** release the server faster, and if small will save space on PCs etc.
455: */
456:
457:
458: /* Set up the buffering
459: **
460: ** These routines are public because they are in fact needed by
461: ** many parsers, and on PCs and Macs we should not duplicate
462: ** the static buffer area.
463: */
1.13 timbl 464: PUBLIC HTInputSocket * HTInputSocket_new ARGS1 (int,file_number)
1.1 timbl 465: {
1.28 frystyk 466: HTInputSocket *isoc = (HTInputSocket *)calloc(1, sizeof(*isoc));
1.13 timbl 467: if (!isoc) outofmem(__FILE__, "HTInputSocket_new");
468: isoc->input_file_number = file_number;
469: isoc->input_pointer = isoc->input_limit = isoc->input_buffer;
470: return isoc;
1.1 timbl 471: }
472:
1.35 frystyk 473: /* This should return HT_INTERRUPTED if interrupted BUT the connection
474: MUST not be closed */
475: PUBLIC int HTInputSocket_getCharacter ARGS1(HTInputSocket*, isoc)
1.1 timbl 476: {
1.35 frystyk 477: int ch;
1.1 timbl 478: do {
1.13 timbl 479: if (isoc-> input_pointer >= isoc->input_limit) {
1.1 timbl 480: int status = NETREAD(
1.13 timbl 481: isoc->input_file_number,
482: isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 483: if (status <= 0) {
1.39 frystyk 484: if (status == 0)
485: return EOF;
486: if (status == HT_INTERRUPTED) {
487: if (TRACE)
488: fprintf(stderr, "Get Char.... Interrupted in HTInputSocket_getCharacter\n");
489: return HT_INTERRUPTED;
490: }
491: HTInetStatus("read");
492: return EOF; /* -1 is returned by UCX at end of HTTP link */
1.1 timbl 493: }
1.35 frystyk 494: isoc->input_pointer = isoc->input_buffer;
1.13 timbl 495: isoc->input_limit = isoc->input_buffer + status;
1.1 timbl 496: }
1.39 frystyk 497: ch = (unsigned char) *isoc->input_pointer++;
498: } while (ch == 13); /* Ignore ASCII carriage return */
1.1 timbl 499:
500: return FROMASCII(ch);
501: }
502:
1.17 luotonen 503: PUBLIC void HTInputSocket_free ARGS1(HTInputSocket *, me)
1.13 timbl 504: {
505: if (me) free(me);
506: }
507:
508:
1.16 luotonen 509: PUBLIC char * HTInputSocket_getBlock ARGS2(HTInputSocket*, isoc,
510: int *, len)
511: {
512: if (isoc->input_pointer >= isoc->input_limit) {
513: int status = NETREAD(isoc->input_file_number,
514: isoc->input_buffer,
515: ((*len < INPUT_BUFFER_SIZE) ?
516: *len : INPUT_BUFFER_SIZE));
517: if (status <= 0) {
518: isoc->input_limit = isoc->input_buffer;
519: if (status < 0)
1.39 frystyk 520: HTInetStatus("read");
1.16 luotonen 521: *len = 0;
522: return NULL;
523: }
524: else {
525: *len = status;
526: return isoc->input_buffer;
527: }
528: }
529: else {
530: char * ret = isoc->input_pointer;
531: *len = isoc->input_limit - isoc->input_pointer;
532: isoc->input_pointer = isoc->input_limit;
533: return ret;
534: }
535: }
536:
537:
1.15 luotonen 538: PRIVATE int fill_in_buffer ARGS1(HTInputSocket *, isoc)
539: {
540: if (isoc) {
541: int status;
542:
543: isoc->input_pointer = isoc->input_buffer;
544: status = NETREAD(isoc->input_file_number,
545: isoc->input_buffer,
546: INPUT_BUFFER_SIZE);
547: if (status <= 0) {
548: isoc->input_limit = isoc->input_buffer;
549: if (status < 0)
1.39 frystyk 550: HTInetStatus("read");
1.15 luotonen 551: }
552: else
553: isoc->input_limit = isoc->input_buffer + status;
554: return status;
555: }
556: return -1;
557: }
558:
559:
560: PRIVATE void ascii_cat ARGS3(char **, linep,
561: char *, start,
562: char *, end)
563: {
564: if (linep && start && end && start <= end) {
565: char *ptr;
566:
567: if (*linep) {
568: int len = strlen(*linep);
569: *linep = (char*)realloc(*linep, len + end-start + 1);
570: ptr = *linep + len;
571: }
572: else {
573: ptr = *linep = (char*)malloc(end-start + 1);
574: }
575:
576: while (start < end) {
577: *ptr = FROMASCII(*start);
578: ptr++;
579: start++;
580: }
581: *ptr = 0;
582: }
583: }
584:
585:
586: PRIVATE char * get_some_line ARGS2(HTInputSocket *, isoc,
587: BOOL, unfold)
588: {
589: if (!isoc)
590: return NULL;
591: else {
592: BOOL check_unfold = NO;
593: int prev_cr = 0;
594: char *start = isoc->input_pointer;
595: char *cur = isoc->input_pointer;
596: char * line = NULL;
597:
598: for(;;) {
599: /*
600: ** Get more if needed to complete line
601: */
602: if (cur >= isoc->input_limit) { /* Need more data */
603: ascii_cat(&line, start, cur);
604: if (fill_in_buffer(isoc) <= 0)
605: return line;
606: start = cur = isoc->input_pointer;
607: } /* if need more data */
608:
609: /*
610: ** Find a line feed if there is one
611: */
612: for(; cur < isoc->input_limit; cur++) {
613: char c = FROMASCII(*cur);
614: if (!c) {
1.18 luotonen 615: if (line) free(line); /* Leak fixed AL 6 Feb 94 */
1.15 luotonen 616: return NULL; /* Panic! read a 0! */
617: }
618: if (check_unfold && c != ' ' && c != '\t') {
619: return line; /* Note: didn't update isoc->input_pointer */
620: }
621: else {
622: check_unfold = NO;
623: }
624:
625: if (c=='\r') {
626: prev_cr = 1;
627: }
628: else {
629: if (c=='\n') { /* Found a line feed */
630: ascii_cat(&line, start, cur-prev_cr);
631: start = isoc->input_pointer = cur+1;
632:
1.44 frystyk 633: if (line && (int) strlen(line) > 0 && unfold) {
1.15 luotonen 634: check_unfold = YES;
635: }
636: else {
637: return line;
638: }
639: } /* if NL */
640: /* else just a regular character */
641: prev_cr = 0;
642: } /* if not CR */
643: } /* while characters in buffer remain */
644: } /* until line read or end-of-file */
645: } /* valid parameters to function */
646: }
647:
1.43 frystyk 648: /* The returned string must be freed by the caller */
1.15 luotonen 649: PUBLIC char * HTInputSocket_getLine ARGS1(HTInputSocket *, isoc)
650: {
651: return get_some_line(isoc, NO);
652: }
653:
1.43 frystyk 654: /* The returned string must be freed by the caller */
1.15 luotonen 655: PUBLIC char * HTInputSocket_getUnfoldedLine ARGS1(HTInputSocket *, isoc)
656: {
657: return get_some_line(isoc, YES);
658: }
659:
660:
661: /*
662: ** Read HTTP status line (if there is one).
663: **
664: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
665: ** First look at the stub in ASCII and check if it starts "HTTP/".
666: **
667: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
668: ** will be taken as a HTTP 1.0 server. Failure.
669: */
670: #define STUB_LENGTH 20
671: PUBLIC char * HTInputSocket_getStatusLine ARGS1(HTInputSocket *, isoc)
672: {
673: if (!isoc) {
674: return NULL;
675: }
676: else {
677: char buf[STUB_LENGTH + 1];
678: int i;
679: char server_version[STUB_LENGTH+1];
680: int server_status;
681:
682: /*
683: ** Read initial buffer
684: */
685: if (isoc->input_pointer >= isoc->input_limit &&
686: fill_in_buffer(isoc) <= 0) {
687: return NULL;
688: }
689:
690: for (i=0; i < STUB_LENGTH; i++)
691: buf[i] = FROMASCII(isoc->input_buffer[i]);
692: buf[STUB_LENGTH] = 0;
693:
694: if (0 != strncmp(buf, "HTTP/", 5) ||
695: sscanf(buf, "%20s%d", server_version, &server_status) < 2)
696: return NULL;
697: else
698: return get_some_line(isoc, NO);
699: }
700: }
701:
702:
703: /*
704: ** Do heuristic test to see if this is binary.
705: **
706: ** We check for characters above 128 in the first few bytes, and
707: ** if we find them we forget the html default.
708: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
709: **
710: ** Bugs: An HTTP 0.9 server returning a binary document with
711: ** characters < 128 will be read as ASCII.
712: */
713: PUBLIC BOOL HTInputSocket_seemsBinary ARGS1(HTInputSocket *, isoc)
714: {
715: if (isoc &&
716: (isoc->input_pointer < isoc->input_limit ||
717: fill_in_buffer(isoc) > 0)) {
718: char *p = isoc->input_buffer;
719: int i = STUB_LENGTH;
720:
721: for( ; i && p < isoc->input_limit; p++, i++)
722: if (((int)*p)&128)
723: return YES;
724: }
725: return NO;
726: }
727:
728:
729:
1.1 timbl 730: /* Stream the data to an ouput file as binary
731: */
1.38 luotonen 732: PUBLIC int HTOutputBinary ARGS3(HTInputSocket *,isoc,
1.13 timbl 733: int, input,
734: FILE *, output)
1.1 timbl 735: {
736: do {
737: int status = NETREAD(
1.13 timbl 738: input, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 739: if (status <= 0) {
740: if (status == 0) return 0;
741: if (TRACE) fprintf(stderr,
1.39 frystyk 742: "Out Binary.. Socket read error %d\n", status);
1.1 timbl 743: return 2; /* Error */
744: }
1.13 timbl 745: fwrite(isoc->input_buffer, sizeof(char), status, output);
1.1 timbl 746: } while (YES);
747: }
748:
1.38 luotonen 749:
750: /*
751: * Normal HTTP headers are never bigger than 2K.
752: */
753: #define S_BUFFER_SIZE 2000
754:
755: PUBLIC void HTInputSocket_startBuffering ARGS1(HTInputSocket *, isoc)
756: {
757: if (isoc) {
758: isoc->s_do_buffering = YES;
759: if (!isoc->s_buffer) {
760: isoc->s_buffer = (char*)malloc(S_BUFFER_SIZE + 1);
761: isoc->s_buffer_size = S_BUFFER_SIZE;
762: }
763: isoc->s_buffer_cur = isoc->s_buffer;
764: }
765: }
766:
767: PUBLIC void HTInputSocket_stopBuffering ARGS1(HTInputSocket *, isoc)
768: {
769: if (isoc) {
770: isoc->s_do_buffering = NO;
771: if (isoc->s_buffer_cur)
772: *isoc->s_buffer_cur = 0;
773: }
774: }
775:
776: PUBLIC int HTInputSocket_getBuffer ARGS2(HTInputSocket *, isoc,
777: char **, buffer_ptr)
778: {
779: if (!isoc || !isoc->s_buffer || !isoc->s_buffer_cur)
780: return 0;
781: else {
782: *isoc->s_buffer_cur = 0;
783: if (buffer_ptr)
784: *buffer_ptr = isoc->s_buffer;
785: return (int) (isoc->s_buffer_cur - isoc->s_buffer);
786: }
787: }
1.1 timbl 788:
1.33 luotonen 789: PRIVATE BOOL better_match ARGS2(HTFormat, f,
790: HTFormat, g)
791: {
792: CONST char *p, *q;
793:
794: if (f && g && (p = HTAtom_name(f)) && (q = HTAtom_name(g))) {
795: int i,j;
796: for(i=0 ; *p; p++) if (*p == '*') i++;
797: for(j=0 ; *q; q++) if (*q == '*') j++;
798: if (i < j) return YES;
799: }
800: return NO;
801: }
802:
1.17 luotonen 803:
1.2 timbl 804: /* Create a filter stack
805: ** ---------------------
806: **
1.7 secret 807: ** If a wildcard match is made, a temporary HTPresentation
1.2 timbl 808: ** structure is made to hold the destination format while the
809: ** new stack is generated. This is just to pass the out format to
810: ** MIME so far. Storing the format of a stream in the stream might
811: ** be a lot neater.
1.10 timbl 812: **
1.29 frystyk 813: ** The star/star format is special, in that if you can take
1.40 frystyk 814: ** that you can take anything.
815: **
816: ** On succes, request->error_block is set to YES so no more error
817: ** messages to the stream as the stream might be of any format.
1.2 timbl 818: */
1.34 luotonen 819: PUBLIC HTStream * HTStreamStack ARGS3(HTFormat, rep_in,
820: HTRequest *, request,
821: BOOL, guess)
1.2 timbl 822: {
1.12 timbl 823: HTFormat rep_out = request->output_format; /* Could be a param */
1.14 timbl 824: HTList * conversion[2];
825: int which_list;
1.25 frystyk 826: float best_quality = -1e30; /* Pretty bad! */
1.29 frystyk 827: HTPresentation *pres, *match, *best_match=0;
1.14 timbl 828:
1.47 frystyk 829: request->error_block = YES; /* No more error output to stream */
1.2 timbl 830: if (TRACE) fprintf(stderr,
1.39 frystyk 831: "StreamStack. Constructing stream stack for %s to %s\n",
1.10 timbl 832: HTAtom_name(rep_in),
1.2 timbl 833: HTAtom_name(rep_out));
1.34 luotonen 834:
835: if (guess && rep_in == WWW_UNKNOWN) {
836: CTRACE(stderr, "Returning... guessing stream\n");
837: return HTGuess_new(request);
838: }
839:
1.47 frystyk 840: if (rep_out == WWW_SOURCE || rep_out == rep_in) {
1.21 luotonen 841: return request->output_stream;
1.47 frystyk 842: }
1.2 timbl 843:
1.14 timbl 844: conversion[0] = request->conversions;
845: conversion[1] = HTConversions;
1.17 luotonen 846:
1.15 luotonen 847: for(which_list = 0; which_list<2; which_list++) {
848: HTList * cur = conversion[which_list];
849:
850: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
1.25 frystyk 851: if ((pres->rep == rep_in || wild_match(pres->rep, rep_in)) &&
1.33 luotonen 852: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
853: if (!best_match ||
854: better_match(pres->rep, best_match->rep) ||
855: (!better_match(best_match->rep, pres->rep) &&
856: pres->quality > best_quality)) {
1.49 howcome 857: /* HWL */
858: if (!pres->test_command || (system(pres->test_command)==0)) {
859: if (TRACE && pres->test_command)
860: printf("HTStreamStack testing %s %d\n",pres->test_command,system(pres->test_command));
861: best_match = pres;
862: best_quality = pres->quality;
863: }
1.10 timbl 864: }
865: }
1.2 timbl 866: }
867: }
1.33 luotonen 868:
1.29 frystyk 869: match = best_match ? best_match : NULL;
870: if (match) {
871: if (match->rep == WWW_SOURCE) {
1.39 frystyk 872: if (TRACE) fprintf(stderr, "StreamStack. Don't know how to handle this, so put out %s to %s\n",
1.29 frystyk 873: HTAtom_name(match->rep),
874: HTAtom_name(rep_out));
875: }
876: return (*match->converter)(
1.25 frystyk 877: request, match->command, rep_in, rep_out,
878: request->output_stream);
1.29 frystyk 879: }
1.42 frystyk 880: {
881: char *msg = NULL;
882: StrAllocCopy(msg, "Can't convert from ");
883: StrAllocCat(msg, HTAtom_name(rep_in));
884: StrAllocCat(msg, " to ");
885: StrAllocCat(msg, HTAtom_name(rep_out));
886: HTErrorAdd(request, ERR_FATAL, NO, HTERR_NOT_IMPLEMENTED,
887: (void *) msg, (int) strlen(msg), "HTStreamStack");
888: free(msg);
889: }
1.47 frystyk 890: request->error_block = NO; /* We didn't put up a stream anyway */
1.2 timbl 891: return NULL;
892: }
893:
894:
895: /* Find the cost of a filter stack
896: ** -------------------------------
897: **
898: ** Must return the cost of the same stack which StreamStack would set up.
899: **
900: ** On entry,
901: ** length The size of the data to be converted
902: */
1.12 timbl 903: PUBLIC float HTStackValue ARGS5(
1.14 timbl 904: HTList *, theseConversions,
1.10 timbl 905: HTFormat, rep_in,
1.2 timbl 906: HTFormat, rep_out,
907: float, initial_value,
908: long int, length)
909: {
1.14 timbl 910: int which_list;
911: HTList* conversion[2];
912:
1.2 timbl 913: if (TRACE) fprintf(stderr,
1.39 frystyk 914: "StackValue.. Evaluating stream stack for %s worth %.3f to %s\n",
1.10 timbl 915: HTAtom_name(rep_in), initial_value,
1.2 timbl 916: HTAtom_name(rep_out));
917:
918: if (rep_out == WWW_SOURCE ||
1.10 timbl 919: rep_out == rep_in) return 0.0;
1.2 timbl 920:
1.12 timbl 921: /* if (!HTPresentations) HTFormatInit(); set up the list */
1.2 timbl 922:
1.14 timbl 923: conversion[0] = theseConversions;
924: conversion[1] = HTConversions;
925:
926: for(which_list = 0; which_list<2; which_list++)
927: if (conversion[which_list]) {
1.15 luotonen 928: HTList * cur = conversion[which_list];
1.2 timbl 929: HTPresentation * pres;
1.15 luotonen 930: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
931: if (pres->rep == rep_in &&
1.17 luotonen 932: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
1.2 timbl 933: float value = initial_value * pres->quality;
934: if (HTMaxSecs != 0.0)
1.15 luotonen 935: value = value - (length*pres->secs_per_byte + pres->secs)
1.2 timbl 936: /HTMaxSecs;
937: return value;
938: }
939: }
940: }
941:
942: return -1e30; /* Really bad */
1.17 luotonen 943: }
944:
945:
1.2 timbl 946:
1.1 timbl 947:
1.2 timbl 948: /* Push data from a socket down a stream
949: ** -------------------------------------
1.1 timbl 950: **
1.2 timbl 951: ** This routine is responsible for creating and PRESENTING any
1.1 timbl 952: ** graphic (or other) objects described by the file.
1.2 timbl 953: **
954: ** The file number given is assumed to be a TELNET stream ie containing
955: ** CRLF at the end of lines which need to be stripped to LF for unix
956: ** when the format is textual.
957: **
1.26 luotonen 958: ** RETURNS the number of bytes transferred.
959: **
1.1 timbl 960: */
1.26 luotonen 961: PUBLIC int HTCopy ARGS2(
1.2 timbl 962: int, file_number,
963: HTStream*, sink)
1.1 timbl 964: {
1.2 timbl 965: HTStreamClass targetClass;
1.13 timbl 966: HTInputSocket * isoc;
1.26 luotonen 967: int cnt = 0;
968:
1.5 timbl 969: /* Push the data down the stream
1.2 timbl 970: **
971: */
972: targetClass = *(sink->isa); /* Copy pointers to procedures */
1.13 timbl 973: isoc = HTInputSocket_new(file_number);
1.2 timbl 974:
975: /* Push binary from socket down sink
1.10 timbl 976: **
977: ** This operation could be put into a main event loop
1.2 timbl 978: */
979: for(;;) {
980: int status = NETREAD(
1.13 timbl 981: file_number, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.2 timbl 982: if (status <= 0) {
983: if (status == 0) break;
984: if (TRACE) fprintf(stderr,
1.39 frystyk 985: "Socket Copy. Read error, read returns %d with errno=%d\n",
1.24 luotonen 986: status, errno);
1.2 timbl 987: break;
988: }
1.26 luotonen 989:
1.8 timbl 990: #ifdef NOT_ASCII
991: {
992: char * p;
1.13 timbl 993: for(p = isoc->input_buffer; p < isoc->input_buffer+status; p++) {
1.8 timbl 994: *p = FROMASCII(*p);
995: }
996: }
997: #endif
998:
1.13 timbl 999: (*targetClass.put_block)(sink, isoc->input_buffer, status);
1.26 luotonen 1000: cnt += status;
1.2 timbl 1001: } /* next bufferload */
1.26 luotonen 1002:
1.13 timbl 1003: HTInputSocket_free(isoc);
1.26 luotonen 1004:
1005: return cnt;
1.2 timbl 1006: }
1007:
1.1 timbl 1008:
1.7 secret 1009:
1010: /* Push data from a file pointer down a stream
1011: ** -------------------------------------
1012: **
1013: ** This routine is responsible for creating and PRESENTING any
1014: ** graphic (or other) objects described by the file.
1015: **
1016: **
1017: */
1018: PUBLIC void HTFileCopy ARGS2(
1019: FILE *, fp,
1020: HTStream*, sink)
1021: {
1022: HTStreamClass targetClass;
1.13 timbl 1023: char input_buffer[INPUT_BUFFER_SIZE];
1.7 secret 1024:
1025: /* Push the data down the stream
1026: **
1027: */
1028: targetClass = *(sink->isa); /* Copy pointers to procedures */
1029:
1030: /* Push binary from socket down sink
1031: */
1032: for(;;) {
1033: int status = fread(
1034: input_buffer, 1, INPUT_BUFFER_SIZE, fp);
1035: if (status == 0) { /* EOF or error */
1036: if (ferror(fp) == 0) break;
1037: if (TRACE) fprintf(stderr,
1.39 frystyk 1038: "File Copy... Read error, read returns %d\n", ferror(fp));
1.7 secret 1039: break;
1040: }
1041: (*targetClass.put_block)(sink, input_buffer, status);
1.13 timbl 1042: } /* next bufferload */
1.7 secret 1043: }
1044:
1045:
1046:
1047:
1.2 timbl 1048: /* Push data from a socket down a stream STRIPPING CR
1049: ** --------------------------------------------------
1050: **
1051: ** This routine is responsible for creating and PRESENTING any
1.8 timbl 1052: ** graphic (or other) objects described by the socket.
1.2 timbl 1053: **
1054: ** The file number given is assumed to be a TELNET stream ie containing
1055: ** CRLF at the end of lines which need to be stripped to LF for unix
1056: ** when the format is textual.
1.37 frystyk 1057: **
1058: ** Character handling is now of type int, Henrik, May 09-94
1.1 timbl 1059: */
1.2 timbl 1060: PUBLIC void HTCopyNoCR ARGS2(
1061: int, file_number,
1062: HTStream*, sink)
1063: {
1.13 timbl 1064: HTStreamClass targetClass;
1065: HTInputSocket * isoc;
1.37 frystyk 1066: int ch;
1.1 timbl 1067:
1.2 timbl 1068: /* Push the data, ignoring CRLF, down the stream
1069: **
1070: */
1071: targetClass = *(sink->isa); /* Copy pointers to procedures */
1072:
1073: /* Push text from telnet socket down sink
1074: **
1075: ** @@@@@ To push strings could be faster? (especially is we
1076: ** cheat and don't ignore CR! :-}
1077: */
1.13 timbl 1078: isoc = HTInputSocket_new(file_number);
1.37 frystyk 1079: while ((ch = HTInputSocket_getCharacter(isoc)) >= 0)
1080: (*targetClass.put_character)(sink, ch);
1.13 timbl 1081: HTInputSocket_free(isoc);
1.2 timbl 1082: }
1.1 timbl 1083:
1.2 timbl 1084:
1.46 frystyk 1085: /* To be replaced by a stream */
1086: PUBLIC void HTCopyDot ARGS2(int, file_number,
1087: HTStream *, sink)
1088: {
1089: HTStreamClass targetClass;
1090: HTInputSocket * isoc;
1091: int ch;
1092: int state=3;
1093:
1094: /* Push the data, ignoring CRLF, down the stream */
1095: targetClass = *(sink->isa); /* Copy pointers to procedures */
1096: isoc = HTInputSocket_new(file_number);
1097: while (state && (ch = HTInputSocket_getCharacter(isoc)) >= 0) {
1098: if (ch == '\n')
1099: state--;
1100: else if (state==2 && ch=='.')
1101: state--;
1102: else
1103: state = 3;
1104: (*targetClass.put_character)(sink, ch);
1105: }
1106: HTInputSocket_free(isoc);
1107: }
1108:
1109:
1.7 secret 1110:
1.2 timbl 1111: /* Parse a socket given format and file number
1112: **
1113: ** This routine is responsible for creating and PRESENTING any
1114: ** graphic (or other) objects described by the file.
1115: **
1116: ** The file number given is assumed to be a TELNET stream ie containing
1117: ** CRLF at the end of lines which need to be stripped to LF for unix
1118: ** when the format is textual.
1119: **
1.42 frystyk 1120: ** Returns <0 on error, HT_LOADED on success.
1.2 timbl 1121: */
1.14 timbl 1122:
1.46 frystyk 1123: /* The parameter to this function and HTParsefile should be HTRequest */
1124:
1.12 timbl 1125: PUBLIC int HTParseSocket ARGS3(
1.10 timbl 1126: HTFormat, rep_in,
1.2 timbl 1127: int, file_number,
1.12 timbl 1128: HTRequest *, request)
1.2 timbl 1129: {
1130: HTStream * stream;
1131: HTStreamClass targetClass;
1.1 timbl 1132:
1.40 frystyk 1133: if (request->error_stack) {
1134: if (TRACE) fprintf(stderr, "ParseSocket. Called whith non-empty error stack, so I return right away!\n");
1135: return -1;
1136: }
1137:
1.42 frystyk 1138: /* Set up stream stack */
1139: if ((stream = HTStreamStack(rep_in, request, YES)) == NULL)
1140: return -1;
1.1 timbl 1141:
1.3 timbl 1142: /* Push the data, ignoring CRLF if necessary, down the stream
1143: **
1.2 timbl 1144: **
1.3 timbl 1145: ** @@ Bug: This decision ought to be made based on "encoding"
1.9 timbl 1146: ** rather than on format. @@@ When we handle encoding.
1.3 timbl 1147: ** The current method smells anyway.
1.2 timbl 1148: */
1149: targetClass = *(stream->isa); /* Copy pointers to procedures */
1.46 frystyk 1150: if (request->output_format == WWW_SOURCE && request->net_info->CRLFdotCRLF)
1151: HTCopyDot(file_number, stream);
1152: else if (rep_in == WWW_BINARY || rep_in == WWW_UNKNOWN || HTOutputSource
1.26 luotonen 1153: || (request->content_encoding &&
1154: request->content_encoding != HTAtom_for("8bit") &&
1155: request->content_encoding != HTAtom_for("7bit"))
1.10 timbl 1156: || strstr(HTAtom_name(rep_in), "image/")
1157: || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.29 frystyk 1158: HTCopy(file_number, stream);
1.46 frystyk 1159: } else if (request->net_info->CRLFdotCRLF)
1160: HTCopyDot(file_number, stream);
1161: else
1.2 timbl 1162: HTCopyNoCR(file_number, stream);
1.45 duns 1163: (*targetClass._free)(stream);
1.7 secret 1164:
1165: return HT_LOADED;
1166: }
1167:
1168:
1169:
1170: /* Parse a file given format and file pointer
1171: **
1172: ** This routine is responsible for creating and PRESENTING any
1173: ** graphic (or other) objects described by the file.
1174: **
1175: ** The file number given is assumed to be a TELNET stream ie containing
1.10 timbl 1176: ** CRLF at the end of lines which need to be stripped to \n for unix
1.7 secret 1177: ** when the format is textual.
1178: **
1179: */
1.12 timbl 1180: PUBLIC int HTParseFile ARGS3(
1.10 timbl 1181: HTFormat, rep_in,
1.7 secret 1182: FILE *, fp,
1.12 timbl 1183: HTRequest *, request)
1.7 secret 1184: {
1185: HTStream * stream;
1186: HTStreamClass targetClass;
1.40 frystyk 1187:
1188: if (request->error_stack) {
1189: if (TRACE) fprintf(stderr, "ParseFile... Called whith non-empty error stack, so I return right away!\n");
1190: return -1;
1191: }
1.7 secret 1192:
1.42 frystyk 1193: /* Set up stream stack */
1194: if ((stream = HTStreamStack(rep_in, request, YES)) == NULL)
1195: return -1;
1.7 secret 1196:
1.9 timbl 1197: /* Push the data down the stream
1.7 secret 1198: **
1199: **
1200: ** @@ Bug: This decision ought to be made based on "encoding"
1.10 timbl 1201: ** rather than on content-type. @@@ When we handle encoding.
1.7 secret 1202: ** The current method smells anyway.
1203: */
1204: targetClass = *(stream->isa); /* Copy pointers to procedures */
1205: HTFileCopy(fp, stream);
1.45 duns 1206: (*targetClass._free)(stream);
1.1 timbl 1207:
1.2 timbl 1208: return HT_LOADED;
1.1 timbl 1209: }
1.2 timbl 1210:
1.10 timbl 1211:
1212: /* Converter stream: Network Telnet to internal character text
1213: ** -----------------------------------------------------------
1214: **
1215: ** The input is assumed to be in ASCII, with lines delimited
1216: ** by (13,10) pairs, These pairs are converted into (CR,LF)
1217: ** pairs in the local representation. The (CR,LF) sequence
1218: ** when found is changed to a '\n' character, the internal
1219: ** C representation of a new line.
1220: */
1221:
1222:
1.11 timbl 1223: PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char)
1.10 timbl 1224: {
1225: char c = FROMASCII(net_char);
1226: if (me->had_cr) {
1227: if (c==LF) {
1228: me->sink->isa->put_character(me->sink, '\n'); /* Newline */
1229: me->had_cr = NO;
1230: return;
1231: } else {
1232: me->sink->isa->put_character(me->sink, CR); /* leftover */
1233: }
1234: }
1235: me->had_cr = (c==CR);
1236: if (!me->had_cr)
1237: me->sink->isa->put_character(me->sink, c); /* normal */
1238: }
1239:
1.11 timbl 1240: PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s)
1.10 timbl 1241: {
1242: CONST char * p;
1243: for(p=s; *p; p++) NetToText_put_character(me, *p);
1244: }
1245:
1.11 timbl 1246: PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l)
1.10 timbl 1247: {
1248: CONST char * p;
1249: for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
1250: }
1251:
1252: PRIVATE void NetToText_free ARGS1(HTStream *, me)
1253: {
1.45 duns 1254: me->sink->isa->_free(me->sink); /* Close rest of pipe */
1.10 timbl 1255: free(me);
1256: }
1257:
1258: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
1259: {
1260: me->sink->isa->abort(me->sink,e); /* Abort rest of pipe */
1261: free(me);
1262: }
1263:
1264: /* The class structure
1265: */
1266: PRIVATE HTStreamClass NetToTextClass = {
1267: "NetToText",
1268: NetToText_free,
1269: NetToText_abort,
1270: NetToText_put_character,
1271: NetToText_put_string,
1272: NetToText_put_block
1273: };
1274:
1275: /* The creation method
1276: */
1277: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
1278: {
1279: HTStream* me = (HTStream*)malloc(sizeof(*me));
1280: if (me == NULL) outofmem(__FILE__, "NetToText");
1281: me->isa = &NetToTextClass;
1282:
1283: me->had_cr = NO;
1284: me->sink = sink;
1285: return me;
1286: }
1.2 timbl 1287:
1288:
Webmaster