Annotation of libwww/Library/src/HTFormat.c, revision 1.48
1.7 secret 1:
1.1 timbl 2: /* Manage different file formats HTFormat.c
3: ** =============================
4: **
5: ** Bugs:
6: ** Not reentrant.
7: **
8: ** Assumes the incoming stream is ASCII, rather than a local file
9: ** format, and so ALWAYS converts from ASCII on non-ASCII machines.
10: ** Therefore, non-ASCII machines can't read local files.
1.2 timbl 11: **
1.45 duns 12: ** HISTORY:
13: ** 8 Jul 94 FM Insulate free() from _free structure element.
14: **
1.2 timbl 15: */
16:
1.10 timbl 17:
1.2 timbl 18: /* Implements:
1.1 timbl 19: */
1.2 timbl 20: #include "HTFormat.h"
21:
22: PUBLIC float HTMaxSecs = 1e10; /* No effective limit */
23: PUBLIC float HTMaxLength = 1e10; /* No effective limit */
24:
25: #ifdef unix
26: #ifdef NeXT
27: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
28: #else
29: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n"
30: /* Full pathname would be better! */
31: #endif
32: #endif
33:
1.1 timbl 34:
35: #include "HTUtils.h"
36: #include "tcp.h"
37:
38: #include "HTML.h"
1.12 timbl 39: #include "HTMLPDTD.h"
1.2 timbl 40: #include "HTAlert.h"
41: #include "HTList.h"
42: #include "HTInit.h"
43: /* Streams and structured streams which we use:
44: */
45: #include "HTFWriter.h"
46: #include "HTPlain.h"
47: #include "SGML.h"
48: #include "HTML.h"
49: #include "HTMLGen.h"
1.41 frystyk 50: #include "HTTCP.h"
1.34 luotonen 51: #include "HTGuess.h"
1.42 frystyk 52: #include "HTError.h"
1.34 luotonen 53:
1.2 timbl 54:
55: PUBLIC BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */
56:
1.10 timbl 57: #ifdef ORIGINAL
1.2 timbl 58: struct _HTStream {
59: CONST HTStreamClass* isa;
60: /* ... */
61: };
1.10 timbl 62: #endif
63:
64: /* this version used by the NetToText stream */
65: struct _HTStream {
66: CONST HTStreamClass * isa;
67: BOOL had_cr;
68: HTStream * sink;
69: };
1.2 timbl 70:
71:
1.17 luotonen 72: /*
73: ** Accept-Encoding and Accept-Language
74: */
75: typedef struct _HTAcceptNode {
76: HTAtom * atom;
77: float quality;
78: } HTAcceptNode;
79:
80:
81:
82:
1.2 timbl 83: /* Presentation methods
84: ** --------------------
85: */
86:
1.14 timbl 87: PUBLIC HTList * HTConversions = NULL;
1.2 timbl 88:
1.31 frystyk 89: /* -------------------------------------------------------------------------
90: This function replaces the code in HTRequest_delete() in order to keep
91: the data structure hidden (it is NOT a joke!)
92: Henrik 14/03-94
93: ------------------------------------------------------------------------- */
94: PUBLIC void HTFormatDelete ARGS1(HTList *, me)
95: {
96: HTList *cur = me;
97: HTPresentation *pres;
98: if (!me)
99: return;
100: while ((pres = (HTPresentation*) HTList_nextObject(cur))) {
101: FREE(pres->command); /* Leak fixed AL 6 Feb 1994 */
102: free(pres);
103: }
104: HTList_delete(me); /* Leak fixed AL 6 Feb 1994 */
105: }
106:
1.2 timbl 107:
108: /* Define a presentation system command for a content-type
109: ** -------------------------------------------------------
110: */
1.12 timbl 111: PUBLIC void HTSetPresentation ARGS6(
112: HTList *, conversions,
113: CONST char *, representation,
114: CONST char *, command,
115: float, quality,
116: float, secs,
117: float, secs_per_byte
1.2 timbl 118: ){
119:
120: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
121: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
122:
123: pres->rep = HTAtom_for(representation);
124: pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */
125: pres->converter = HTSaveAndExecute; /* Fixed for now ... */
126: pres->quality = quality;
127: pres->secs = secs;
128: pres->secs_per_byte = secs_per_byte;
129: pres->rep = HTAtom_for(representation);
130: pres->command = 0;
131: StrAllocCopy(pres->command, command);
132:
1.12 timbl 133: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 134:
1.15 luotonen 135: #ifdef OLD_CODE
136: if (strcmp(representation, "*")==0) {
1.2 timbl 137: if (default_presentation) free(default_presentation);
138: default_presentation = pres;
1.12 timbl 139: } else
140: #endif
141: HTList_addObject(conversions, pres);
1.2 timbl 142: }
143:
144:
145: /* Define a built-in function for a content-type
146: ** ---------------------------------------------
147: */
1.12 timbl 148: PUBLIC void HTSetConversion ARGS7(
149: HTList *, conversions,
150: CONST char *, representation_in,
151: CONST char *, representation_out,
1.6 timbl 152: HTConverter*, converter,
1.12 timbl 153: float, quality,
154: float, secs,
155: float, secs_per_byte
1.2 timbl 156: ){
1.1 timbl 157:
1.2 timbl 158: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
159: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
160:
161: pres->rep = HTAtom_for(representation_in);
162: pres->rep_out = HTAtom_for(representation_out);
163: pres->converter = converter;
164: pres->command = NULL; /* Fixed */
165: pres->quality = quality;
166: pres->secs = secs;
167: pres->secs_per_byte = secs_per_byte;
168: pres->command = 0;
169:
1.12 timbl 170: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 171:
1.12 timbl 172: #ifdef OLD_CODE
1.2 timbl 173: if (strcmp(representation_in, "*")==0) {
174: if (default_presentation) free(default_presentation);
175: default_presentation = pres;
1.12 timbl 176: } else
177: #endif
178: HTList_addObject(conversions, pres);
1.2 timbl 179: }
1.1 timbl 180:
181:
182:
1.17 luotonen 183: PUBLIC void HTAcceptEncoding ARGS3(HTList *, list,
184: char *, enc,
185: float, quality)
186: {
187: HTAcceptNode * node;
188: char * cur;
189:
190: if (!list || !enc || !*enc) return;
191:
192: for(cur=enc; *cur; cur++) *cur=TOLOWER(*cur);
193:
194: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
195: if (!node) outofmem(__FILE__, "HTAcceptEncoding");
196: HTList_addObject(list, (void*)node);
197:
198: node->atom = HTAtom_for(enc);
199: node->quality = quality;
200: }
201:
202:
203: PUBLIC void HTAcceptLanguage ARGS3(HTList *, list,
204: char *, lang,
205: float, quality)
206: {
207: HTAcceptNode * node;
208:
209: if (!list || !lang || !*lang) return;
210:
211: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
212: if (!node) outofmem(__FILE__, "HTAcceptLanguage");
213:
214: HTList_addObject(list, (void*)node);
215: node->atom = HTAtom_for(lang);
216: node->quality = quality;
217: }
218:
219:
1.48 ! frystyk 220: PRIVATE BOOL wild_match ARGS2(HTAtom *, tmplate,
1.17 luotonen 221: HTAtom *, actual)
222: {
223: char *t, *a, *st, *sa;
224: BOOL match = NO;
225:
1.48 ! frystyk 226: if (tmplate && actual && (t = HTAtom_name(tmplate))) {
1.22 luotonen 227: if (!strcmp(t, "*"))
228: return YES;
1.17 luotonen 229:
1.22 luotonen 230: if (strchr(t, '*') &&
231: (a = HTAtom_name(actual)) &&
232: (st = strchr(t, '/')) && (sa = strchr(a,'/'))) {
1.17 luotonen 233:
1.22 luotonen 234: *sa = 0;
235: *st = 0;
236:
237: if ((*(st-1)=='*' &&
238: (*(st+1)=='*' || !strcasecomp(st+1, sa+1))) ||
239: (*(st+1)=='*' && !strcasecomp(t,a)))
240: match = YES;
241:
242: *sa = '/';
243: *st = '/';
244: }
245: }
1.23 luotonen 246: return match;
1.17 luotonen 247: }
248:
1.36 luotonen 249: /*
250: * Added by takada@seraph.ntt.jp (94/04/08)
251: */
1.48 ! frystyk 252: PRIVATE BOOL lang_match ARGS2(HTAtom *, tmplate,
1.36 luotonen 253: HTAtom *, actual)
254: {
255: char *t, *a, *st, *sa;
256: BOOL match = NO;
257:
1.48 ! frystyk 258: if (tmplate && actual &&
! 259: (t = HTAtom_name(tmplate)) && (a = HTAtom_name(actual))) {
1.36 luotonen 260: st = strchr(t, '_');
261: sa = strchr(a, '_');
262: if ((st != NULL) && (sa != NULL)) {
263: if (!strcasecomp(t, a))
264: match = YES;
265: else
266: match = NO;
267: }
268: else {
269: if (st != NULL) *st = 0;
270: if (sa != NULL) *sa = 0;
271: if (!strcasecomp(t, a))
272: match = YES;
273: else
274: match = NO;
275: if (st != NULL) *st = '_';
276: if (sa != NULL) *sa = '_';
277: }
278: }
279: return match;
280: }
281: /* end of addition */
282:
283:
1.17 luotonen 284:
285: PRIVATE float type_value ARGS2(HTAtom *, content_type,
286: HTList *, accepted)
287: {
288: HTList * cur = accepted;
289: HTPresentation * pres;
290: HTPresentation * wild = NULL;
291:
292: if (!content_type || !accepted) return -1;
293:
294: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
295: if (pres->rep == content_type)
296: return pres->quality;
297: else if (wild_match(pres->rep, content_type))
298: wild = pres;
299: }
300: if (wild) return wild->quality;
301: else return -1;
302: }
303:
304:
305: PRIVATE float lang_value ARGS2(HTAtom *, language,
306: HTList *, accepted)
307: {
308: HTList * cur = accepted;
309: HTAcceptNode * node;
310: HTAcceptNode * wild = NULL;
311:
312: if (!language || !accepted || HTList_isEmpty(accepted)) {
313: return 0.1;
314: }
315:
316: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
317: if (node->atom == language) {
318: return node->quality;
319: }
1.36 luotonen 320: /*
321: * patch by takada@seraph.ntt.jp (94/04/08)
322: * the original line was
323: * else if (wild_match(node->atom, language)) {
324: * and the new line is
325: */
326: else if (lang_match(node->atom, language)) {
1.17 luotonen 327: wild = node;
328: }
329: }
330:
331: if (wild) {
332: return wild->quality;
333: }
334: else {
335: return 0.1;
336: }
337: }
338:
339:
340: PRIVATE float encoding_value ARGS2(HTAtom *, encoding,
341: HTList *, accepted)
342: {
343: HTList * cur = accepted;
344: HTAcceptNode * node;
345: HTAcceptNode * wild = NULL;
346: char * e;
347:
348: if (!encoding || !accepted || HTList_isEmpty(accepted))
349: return 1;
350:
351: e = HTAtom_name(encoding);
352: if (!strcmp(e, "7bit") || !strcmp(e, "8bit") || !strcmp(e, "binary"))
353: return 1;
354:
355: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
356: if (node->atom == encoding)
357: return node->quality;
358: else if (wild_match(node->atom, encoding))
359: wild = node;
360: }
361: if (wild) return wild->quality;
362: else return 1;
363: }
364:
365:
366: PUBLIC BOOL HTRank ARGS4(HTList *, possibilities,
367: HTList *, accepted_content_types,
368: HTList *, accepted_languages,
369: HTList *, accepted_encodings)
370: {
371: int accepted_cnt = 0;
372: HTList * accepted;
373: HTList * sorted;
374: HTList * cur;
375: HTContentDescription * d;
376:
377: if (!possibilities) return NO;
378:
379: accepted = HTList_new();
380: cur = possibilities;
381: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
382: float tv = type_value(d->content_type, accepted_content_types);
383: float lv = lang_value(d->content_language, accepted_languages);
384: float ev = encoding_value(d->content_encoding, accepted_encodings);
385:
386: if (tv > 0) {
387: d->quality *= tv * lv * ev;
388: HTList_addObject(accepted, d);
389: accepted_cnt++;
390: }
1.18 luotonen 391: else {
392: if (d->filename) free(d->filename);
393: free(d);
394: }
1.17 luotonen 395: }
396:
1.18 luotonen 397: CTRACE(stderr, "Ranking.....\n");
1.17 luotonen 398: CTRACE(stderr,
1.18 luotonen 399: "\nRANK QUALITY CONTENT-TYPE LANGUAGE ENCODING FILE\n");
1.17 luotonen 400:
401: sorted = HTList_new();
402: while (accepted_cnt-- > 0) {
403: HTContentDescription * worst = NULL;
404: cur = accepted;
405: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
406: if (!worst || d->quality < worst->quality)
407: worst = d;
408: }
409: if (worst) {
410: CTRACE(stderr, "%d. %.4f %-20.20s %-8.8s %-10.10s %s\n",
411: accepted_cnt+1,
412: worst->quality,
413: (worst->content_type
414: ? HTAtom_name(worst->content_type) : "-"),
415: (worst->content_language
416: ? HTAtom_name(worst->content_language) :"-"),
417: (worst->content_encoding
418: ? HTAtom_name(worst->content_encoding) :"-"),
419: (worst->filename
420: ? worst->filename :"-"));
421: HTList_removeObject(accepted, (void*)worst);
422: HTList_addObject(sorted, (void*)worst);
423: }
424: }
1.18 luotonen 425: CTRACE(stderr, "\n");
1.17 luotonen 426: HTList_delete(accepted);
427: HTList_delete(possibilities->next);
428: possibilities->next = sorted->next;
429: sorted->next = NULL;
430: HTList_delete(sorted);
431:
432: if (!HTList_isEmpty(possibilities)) return YES;
433: else return NO;
434: }
435:
436:
437:
438:
439:
1.13 timbl 440: /* Socket Input Buffering
441: ** ----------------------
1.1 timbl 442: **
1.13 timbl 443: ** This code is used because one cannot in general open a
444: ** file descriptor for a socket.
445: **
1.1 timbl 446: ** The input file is read using the macro which can read from
1.13 timbl 447: ** a socket or a file, but this should not be used for files
448: ** as fopen() etc is more portable of course.
449: **
1.1 timbl 450: ** The input buffer size, if large will give greater efficiency and
451: ** release the server faster, and if small will save space on PCs etc.
452: */
453:
454:
455: /* Set up the buffering
456: **
457: ** These routines are public because they are in fact needed by
458: ** many parsers, and on PCs and Macs we should not duplicate
459: ** the static buffer area.
460: */
1.13 timbl 461: PUBLIC HTInputSocket * HTInputSocket_new ARGS1 (int,file_number)
1.1 timbl 462: {
1.28 frystyk 463: HTInputSocket *isoc = (HTInputSocket *)calloc(1, sizeof(*isoc));
1.13 timbl 464: if (!isoc) outofmem(__FILE__, "HTInputSocket_new");
465: isoc->input_file_number = file_number;
466: isoc->input_pointer = isoc->input_limit = isoc->input_buffer;
467: return isoc;
1.1 timbl 468: }
469:
1.35 frystyk 470: /* This should return HT_INTERRUPTED if interrupted BUT the connection
471: MUST not be closed */
472: PUBLIC int HTInputSocket_getCharacter ARGS1(HTInputSocket*, isoc)
1.1 timbl 473: {
1.35 frystyk 474: int ch;
1.1 timbl 475: do {
1.13 timbl 476: if (isoc-> input_pointer >= isoc->input_limit) {
1.1 timbl 477: int status = NETREAD(
1.13 timbl 478: isoc->input_file_number,
479: isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 480: if (status <= 0) {
1.39 frystyk 481: if (status == 0)
482: return EOF;
483: if (status == HT_INTERRUPTED) {
484: if (TRACE)
485: fprintf(stderr, "Get Char.... Interrupted in HTInputSocket_getCharacter\n");
486: return HT_INTERRUPTED;
487: }
488: HTInetStatus("read");
489: return EOF; /* -1 is returned by UCX at end of HTTP link */
1.1 timbl 490: }
1.35 frystyk 491: isoc->input_pointer = isoc->input_buffer;
1.13 timbl 492: isoc->input_limit = isoc->input_buffer + status;
1.1 timbl 493: }
1.39 frystyk 494: ch = (unsigned char) *isoc->input_pointer++;
495: } while (ch == 13); /* Ignore ASCII carriage return */
1.1 timbl 496:
497: return FROMASCII(ch);
498: }
499:
1.17 luotonen 500: PUBLIC void HTInputSocket_free ARGS1(HTInputSocket *, me)
1.13 timbl 501: {
502: if (me) free(me);
503: }
504:
505:
1.16 luotonen 506: PUBLIC char * HTInputSocket_getBlock ARGS2(HTInputSocket*, isoc,
507: int *, len)
508: {
509: if (isoc->input_pointer >= isoc->input_limit) {
510: int status = NETREAD(isoc->input_file_number,
511: isoc->input_buffer,
512: ((*len < INPUT_BUFFER_SIZE) ?
513: *len : INPUT_BUFFER_SIZE));
514: if (status <= 0) {
515: isoc->input_limit = isoc->input_buffer;
516: if (status < 0)
1.39 frystyk 517: HTInetStatus("read");
1.16 luotonen 518: *len = 0;
519: return NULL;
520: }
521: else {
522: *len = status;
523: return isoc->input_buffer;
524: }
525: }
526: else {
527: char * ret = isoc->input_pointer;
528: *len = isoc->input_limit - isoc->input_pointer;
529: isoc->input_pointer = isoc->input_limit;
530: return ret;
531: }
532: }
533:
534:
1.15 luotonen 535: PRIVATE int fill_in_buffer ARGS1(HTInputSocket *, isoc)
536: {
537: if (isoc) {
538: int status;
539:
540: isoc->input_pointer = isoc->input_buffer;
541: status = NETREAD(isoc->input_file_number,
542: isoc->input_buffer,
543: INPUT_BUFFER_SIZE);
544: if (status <= 0) {
545: isoc->input_limit = isoc->input_buffer;
546: if (status < 0)
1.39 frystyk 547: HTInetStatus("read");
1.15 luotonen 548: }
549: else
550: isoc->input_limit = isoc->input_buffer + status;
551: return status;
552: }
553: return -1;
554: }
555:
556:
557: PRIVATE void ascii_cat ARGS3(char **, linep,
558: char *, start,
559: char *, end)
560: {
561: if (linep && start && end && start <= end) {
562: char *ptr;
563:
564: if (*linep) {
565: int len = strlen(*linep);
566: *linep = (char*)realloc(*linep, len + end-start + 1);
567: ptr = *linep + len;
568: }
569: else {
570: ptr = *linep = (char*)malloc(end-start + 1);
571: }
572:
573: while (start < end) {
574: *ptr = FROMASCII(*start);
575: ptr++;
576: start++;
577: }
578: *ptr = 0;
579: }
580: }
581:
582:
583: PRIVATE char * get_some_line ARGS2(HTInputSocket *, isoc,
584: BOOL, unfold)
585: {
586: if (!isoc)
587: return NULL;
588: else {
589: BOOL check_unfold = NO;
590: int prev_cr = 0;
591: char *start = isoc->input_pointer;
592: char *cur = isoc->input_pointer;
593: char * line = NULL;
594:
595: for(;;) {
596: /*
597: ** Get more if needed to complete line
598: */
599: if (cur >= isoc->input_limit) { /* Need more data */
600: ascii_cat(&line, start, cur);
601: if (fill_in_buffer(isoc) <= 0)
602: return line;
603: start = cur = isoc->input_pointer;
604: } /* if need more data */
605:
606: /*
607: ** Find a line feed if there is one
608: */
609: for(; cur < isoc->input_limit; cur++) {
610: char c = FROMASCII(*cur);
611: if (!c) {
1.18 luotonen 612: if (line) free(line); /* Leak fixed AL 6 Feb 94 */
1.15 luotonen 613: return NULL; /* Panic! read a 0! */
614: }
615: if (check_unfold && c != ' ' && c != '\t') {
616: return line; /* Note: didn't update isoc->input_pointer */
617: }
618: else {
619: check_unfold = NO;
620: }
621:
622: if (c=='\r') {
623: prev_cr = 1;
624: }
625: else {
626: if (c=='\n') { /* Found a line feed */
627: ascii_cat(&line, start, cur-prev_cr);
628: start = isoc->input_pointer = cur+1;
629:
1.44 frystyk 630: if (line && (int) strlen(line) > 0 && unfold) {
1.15 luotonen 631: check_unfold = YES;
632: }
633: else {
634: return line;
635: }
636: } /* if NL */
637: /* else just a regular character */
638: prev_cr = 0;
639: } /* if not CR */
640: } /* while characters in buffer remain */
641: } /* until line read or end-of-file */
642: } /* valid parameters to function */
643: }
644:
1.43 frystyk 645: /* The returned string must be freed by the caller */
1.15 luotonen 646: PUBLIC char * HTInputSocket_getLine ARGS1(HTInputSocket *, isoc)
647: {
648: return get_some_line(isoc, NO);
649: }
650:
1.43 frystyk 651: /* The returned string must be freed by the caller */
1.15 luotonen 652: PUBLIC char * HTInputSocket_getUnfoldedLine ARGS1(HTInputSocket *, isoc)
653: {
654: return get_some_line(isoc, YES);
655: }
656:
657:
658: /*
659: ** Read HTTP status line (if there is one).
660: **
661: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
662: ** First look at the stub in ASCII and check if it starts "HTTP/".
663: **
664: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
665: ** will be taken as a HTTP 1.0 server. Failure.
666: */
667: #define STUB_LENGTH 20
668: PUBLIC char * HTInputSocket_getStatusLine ARGS1(HTInputSocket *, isoc)
669: {
670: if (!isoc) {
671: return NULL;
672: }
673: else {
674: char buf[STUB_LENGTH + 1];
675: int i;
676: char server_version[STUB_LENGTH+1];
677: int server_status;
678:
679: /*
680: ** Read initial buffer
681: */
682: if (isoc->input_pointer >= isoc->input_limit &&
683: fill_in_buffer(isoc) <= 0) {
684: return NULL;
685: }
686:
687: for (i=0; i < STUB_LENGTH; i++)
688: buf[i] = FROMASCII(isoc->input_buffer[i]);
689: buf[STUB_LENGTH] = 0;
690:
691: if (0 != strncmp(buf, "HTTP/", 5) ||
692: sscanf(buf, "%20s%d", server_version, &server_status) < 2)
693: return NULL;
694: else
695: return get_some_line(isoc, NO);
696: }
697: }
698:
699:
700: /*
701: ** Do heuristic test to see if this is binary.
702: **
703: ** We check for characters above 128 in the first few bytes, and
704: ** if we find them we forget the html default.
705: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
706: **
707: ** Bugs: An HTTP 0.9 server returning a binary document with
708: ** characters < 128 will be read as ASCII.
709: */
710: PUBLIC BOOL HTInputSocket_seemsBinary ARGS1(HTInputSocket *, isoc)
711: {
712: if (isoc &&
713: (isoc->input_pointer < isoc->input_limit ||
714: fill_in_buffer(isoc) > 0)) {
715: char *p = isoc->input_buffer;
716: int i = STUB_LENGTH;
717:
718: for( ; i && p < isoc->input_limit; p++, i++)
719: if (((int)*p)&128)
720: return YES;
721: }
722: return NO;
723: }
724:
725:
726:
1.1 timbl 727: /* Stream the data to an ouput file as binary
728: */
1.38 luotonen 729: PUBLIC int HTOutputBinary ARGS3(HTInputSocket *,isoc,
1.13 timbl 730: int, input,
731: FILE *, output)
1.1 timbl 732: {
733: do {
734: int status = NETREAD(
1.13 timbl 735: input, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 736: if (status <= 0) {
737: if (status == 0) return 0;
738: if (TRACE) fprintf(stderr,
1.39 frystyk 739: "Out Binary.. Socket read error %d\n", status);
1.1 timbl 740: return 2; /* Error */
741: }
1.13 timbl 742: fwrite(isoc->input_buffer, sizeof(char), status, output);
1.1 timbl 743: } while (YES);
744: }
745:
1.38 luotonen 746:
747: /*
748: * Normal HTTP headers are never bigger than 2K.
749: */
750: #define S_BUFFER_SIZE 2000
751:
752: PUBLIC void HTInputSocket_startBuffering ARGS1(HTInputSocket *, isoc)
753: {
754: if (isoc) {
755: isoc->s_do_buffering = YES;
756: if (!isoc->s_buffer) {
757: isoc->s_buffer = (char*)malloc(S_BUFFER_SIZE + 1);
758: isoc->s_buffer_size = S_BUFFER_SIZE;
759: }
760: isoc->s_buffer_cur = isoc->s_buffer;
761: }
762: }
763:
764: PUBLIC void HTInputSocket_stopBuffering ARGS1(HTInputSocket *, isoc)
765: {
766: if (isoc) {
767: isoc->s_do_buffering = NO;
768: if (isoc->s_buffer_cur)
769: *isoc->s_buffer_cur = 0;
770: }
771: }
772:
773: PUBLIC int HTInputSocket_getBuffer ARGS2(HTInputSocket *, isoc,
774: char **, buffer_ptr)
775: {
776: if (!isoc || !isoc->s_buffer || !isoc->s_buffer_cur)
777: return 0;
778: else {
779: *isoc->s_buffer_cur = 0;
780: if (buffer_ptr)
781: *buffer_ptr = isoc->s_buffer;
782: return (int) (isoc->s_buffer_cur - isoc->s_buffer);
783: }
784: }
1.1 timbl 785:
1.33 luotonen 786: PRIVATE BOOL better_match ARGS2(HTFormat, f,
787: HTFormat, g)
788: {
789: CONST char *p, *q;
790:
791: if (f && g && (p = HTAtom_name(f)) && (q = HTAtom_name(g))) {
792: int i,j;
793: for(i=0 ; *p; p++) if (*p == '*') i++;
794: for(j=0 ; *q; q++) if (*q == '*') j++;
795: if (i < j) return YES;
796: }
797: return NO;
798: }
799:
1.17 luotonen 800:
1.2 timbl 801: /* Create a filter stack
802: ** ---------------------
803: **
1.7 secret 804: ** If a wildcard match is made, a temporary HTPresentation
1.2 timbl 805: ** structure is made to hold the destination format while the
806: ** new stack is generated. This is just to pass the out format to
807: ** MIME so far. Storing the format of a stream in the stream might
808: ** be a lot neater.
1.10 timbl 809: **
1.29 frystyk 810: ** The star/star format is special, in that if you can take
1.40 frystyk 811: ** that you can take anything.
812: **
813: ** On succes, request->error_block is set to YES so no more error
814: ** messages to the stream as the stream might be of any format.
1.2 timbl 815: */
1.34 luotonen 816: PUBLIC HTStream * HTStreamStack ARGS3(HTFormat, rep_in,
817: HTRequest *, request,
818: BOOL, guess)
1.2 timbl 819: {
1.12 timbl 820: HTFormat rep_out = request->output_format; /* Could be a param */
1.14 timbl 821: HTList * conversion[2];
822: int which_list;
1.25 frystyk 823: float best_quality = -1e30; /* Pretty bad! */
1.29 frystyk 824: HTPresentation *pres, *match, *best_match=0;
1.14 timbl 825:
1.47 frystyk 826: request->error_block = YES; /* No more error output to stream */
1.2 timbl 827: if (TRACE) fprintf(stderr,
1.39 frystyk 828: "StreamStack. Constructing stream stack for %s to %s\n",
1.10 timbl 829: HTAtom_name(rep_in),
1.2 timbl 830: HTAtom_name(rep_out));
1.34 luotonen 831:
832: if (guess && rep_in == WWW_UNKNOWN) {
833: CTRACE(stderr, "Returning... guessing stream\n");
834: return HTGuess_new(request);
835: }
836:
1.47 frystyk 837: if (rep_out == WWW_SOURCE || rep_out == rep_in) {
1.21 luotonen 838: return request->output_stream;
1.47 frystyk 839: }
1.2 timbl 840:
1.14 timbl 841: conversion[0] = request->conversions;
842: conversion[1] = HTConversions;
1.17 luotonen 843:
1.15 luotonen 844: for(which_list = 0; which_list<2; which_list++) {
845: HTList * cur = conversion[which_list];
846:
847: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
1.25 frystyk 848: if ((pres->rep == rep_in || wild_match(pres->rep, rep_in)) &&
1.33 luotonen 849: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
850: if (!best_match ||
851: better_match(pres->rep, best_match->rep) ||
852: (!better_match(best_match->rep, pres->rep) &&
853: pres->quality > best_quality)) {
1.25 frystyk 854: best_match = pres;
855: best_quality = pres->quality;
1.10 timbl 856: }
857: }
1.2 timbl 858: }
859: }
1.33 luotonen 860:
1.29 frystyk 861: match = best_match ? best_match : NULL;
862: if (match) {
863: if (match->rep == WWW_SOURCE) {
1.39 frystyk 864: if (TRACE) fprintf(stderr, "StreamStack. Don't know how to handle this, so put out %s to %s\n",
1.29 frystyk 865: HTAtom_name(match->rep),
866: HTAtom_name(rep_out));
867: }
868: return (*match->converter)(
1.25 frystyk 869: request, match->command, rep_in, rep_out,
870: request->output_stream);
1.29 frystyk 871: }
1.42 frystyk 872: {
873: char *msg = NULL;
874: StrAllocCopy(msg, "Can't convert from ");
875: StrAllocCat(msg, HTAtom_name(rep_in));
876: StrAllocCat(msg, " to ");
877: StrAllocCat(msg, HTAtom_name(rep_out));
878: HTErrorAdd(request, ERR_FATAL, NO, HTERR_NOT_IMPLEMENTED,
879: (void *) msg, (int) strlen(msg), "HTStreamStack");
880: free(msg);
881: }
1.47 frystyk 882: request->error_block = NO; /* We didn't put up a stream anyway */
1.2 timbl 883: return NULL;
884: }
885:
886:
887: /* Find the cost of a filter stack
888: ** -------------------------------
889: **
890: ** Must return the cost of the same stack which StreamStack would set up.
891: **
892: ** On entry,
893: ** length The size of the data to be converted
894: */
1.12 timbl 895: PUBLIC float HTStackValue ARGS5(
1.14 timbl 896: HTList *, theseConversions,
1.10 timbl 897: HTFormat, rep_in,
1.2 timbl 898: HTFormat, rep_out,
899: float, initial_value,
900: long int, length)
901: {
1.14 timbl 902: int which_list;
903: HTList* conversion[2];
904:
1.2 timbl 905: if (TRACE) fprintf(stderr,
1.39 frystyk 906: "StackValue.. Evaluating stream stack for %s worth %.3f to %s\n",
1.10 timbl 907: HTAtom_name(rep_in), initial_value,
1.2 timbl 908: HTAtom_name(rep_out));
909:
910: if (rep_out == WWW_SOURCE ||
1.10 timbl 911: rep_out == rep_in) return 0.0;
1.2 timbl 912:
1.12 timbl 913: /* if (!HTPresentations) HTFormatInit(); set up the list */
1.2 timbl 914:
1.14 timbl 915: conversion[0] = theseConversions;
916: conversion[1] = HTConversions;
917:
918: for(which_list = 0; which_list<2; which_list++)
919: if (conversion[which_list]) {
1.15 luotonen 920: HTList * cur = conversion[which_list];
1.2 timbl 921: HTPresentation * pres;
1.15 luotonen 922: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
923: if (pres->rep == rep_in &&
1.17 luotonen 924: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
1.2 timbl 925: float value = initial_value * pres->quality;
926: if (HTMaxSecs != 0.0)
1.15 luotonen 927: value = value - (length*pres->secs_per_byte + pres->secs)
1.2 timbl 928: /HTMaxSecs;
929: return value;
930: }
931: }
932: }
933:
934: return -1e30; /* Really bad */
1.17 luotonen 935: }
936:
937:
1.2 timbl 938:
1.1 timbl 939:
1.2 timbl 940: /* Push data from a socket down a stream
941: ** -------------------------------------
1.1 timbl 942: **
1.2 timbl 943: ** This routine is responsible for creating and PRESENTING any
1.1 timbl 944: ** graphic (or other) objects described by the file.
1.2 timbl 945: **
946: ** The file number given is assumed to be a TELNET stream ie containing
947: ** CRLF at the end of lines which need to be stripped to LF for unix
948: ** when the format is textual.
949: **
1.26 luotonen 950: ** RETURNS the number of bytes transferred.
951: **
1.1 timbl 952: */
1.26 luotonen 953: PUBLIC int HTCopy ARGS2(
1.2 timbl 954: int, file_number,
955: HTStream*, sink)
1.1 timbl 956: {
1.2 timbl 957: HTStreamClass targetClass;
1.13 timbl 958: HTInputSocket * isoc;
1.26 luotonen 959: int cnt = 0;
960:
1.5 timbl 961: /* Push the data down the stream
1.2 timbl 962: **
963: */
964: targetClass = *(sink->isa); /* Copy pointers to procedures */
1.13 timbl 965: isoc = HTInputSocket_new(file_number);
1.2 timbl 966:
967: /* Push binary from socket down sink
1.10 timbl 968: **
969: ** This operation could be put into a main event loop
1.2 timbl 970: */
971: for(;;) {
972: int status = NETREAD(
1.13 timbl 973: file_number, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.2 timbl 974: if (status <= 0) {
975: if (status == 0) break;
976: if (TRACE) fprintf(stderr,
1.39 frystyk 977: "Socket Copy. Read error, read returns %d with errno=%d\n",
1.24 luotonen 978: status, errno);
1.2 timbl 979: break;
980: }
1.26 luotonen 981:
1.8 timbl 982: #ifdef NOT_ASCII
983: {
984: char * p;
1.13 timbl 985: for(p = isoc->input_buffer; p < isoc->input_buffer+status; p++) {
1.8 timbl 986: *p = FROMASCII(*p);
987: }
988: }
989: #endif
990:
1.13 timbl 991: (*targetClass.put_block)(sink, isoc->input_buffer, status);
1.26 luotonen 992: cnt += status;
1.2 timbl 993: } /* next bufferload */
1.26 luotonen 994:
1.13 timbl 995: HTInputSocket_free(isoc);
1.26 luotonen 996:
997: return cnt;
1.2 timbl 998: }
999:
1.1 timbl 1000:
1.7 secret 1001:
1002: /* Push data from a file pointer down a stream
1003: ** -------------------------------------
1004: **
1005: ** This routine is responsible for creating and PRESENTING any
1006: ** graphic (or other) objects described by the file.
1007: **
1008: **
1009: */
1010: PUBLIC void HTFileCopy ARGS2(
1011: FILE *, fp,
1012: HTStream*, sink)
1013: {
1014: HTStreamClass targetClass;
1.13 timbl 1015: char input_buffer[INPUT_BUFFER_SIZE];
1.7 secret 1016:
1017: /* Push the data down the stream
1018: **
1019: */
1020: targetClass = *(sink->isa); /* Copy pointers to procedures */
1021:
1022: /* Push binary from socket down sink
1023: */
1024: for(;;) {
1025: int status = fread(
1026: input_buffer, 1, INPUT_BUFFER_SIZE, fp);
1027: if (status == 0) { /* EOF or error */
1028: if (ferror(fp) == 0) break;
1029: if (TRACE) fprintf(stderr,
1.39 frystyk 1030: "File Copy... Read error, read returns %d\n", ferror(fp));
1.7 secret 1031: break;
1032: }
1033: (*targetClass.put_block)(sink, input_buffer, status);
1.13 timbl 1034: } /* next bufferload */
1.7 secret 1035: }
1036:
1037:
1038:
1039:
1.2 timbl 1040: /* Push data from a socket down a stream STRIPPING CR
1041: ** --------------------------------------------------
1042: **
1043: ** This routine is responsible for creating and PRESENTING any
1.8 timbl 1044: ** graphic (or other) objects described by the socket.
1.2 timbl 1045: **
1046: ** The file number given is assumed to be a TELNET stream ie containing
1047: ** CRLF at the end of lines which need to be stripped to LF for unix
1048: ** when the format is textual.
1.37 frystyk 1049: **
1050: ** Character handling is now of type int, Henrik, May 09-94
1.1 timbl 1051: */
1.2 timbl 1052: PUBLIC void HTCopyNoCR ARGS2(
1053: int, file_number,
1054: HTStream*, sink)
1055: {
1.13 timbl 1056: HTStreamClass targetClass;
1057: HTInputSocket * isoc;
1.37 frystyk 1058: int ch;
1.1 timbl 1059:
1.2 timbl 1060: /* Push the data, ignoring CRLF, down the stream
1061: **
1062: */
1063: targetClass = *(sink->isa); /* Copy pointers to procedures */
1064:
1065: /* Push text from telnet socket down sink
1066: **
1067: ** @@@@@ To push strings could be faster? (especially is we
1068: ** cheat and don't ignore CR! :-}
1069: */
1.13 timbl 1070: isoc = HTInputSocket_new(file_number);
1.37 frystyk 1071: while ((ch = HTInputSocket_getCharacter(isoc)) >= 0)
1072: (*targetClass.put_character)(sink, ch);
1.13 timbl 1073: HTInputSocket_free(isoc);
1.2 timbl 1074: }
1.1 timbl 1075:
1.2 timbl 1076:
1.46 frystyk 1077: /* To be replaced by a stream */
1078: PUBLIC void HTCopyDot ARGS2(int, file_number,
1079: HTStream *, sink)
1080: {
1081: HTStreamClass targetClass;
1082: HTInputSocket * isoc;
1083: int ch;
1084: int state=3;
1085:
1086: /* Push the data, ignoring CRLF, down the stream */
1087: targetClass = *(sink->isa); /* Copy pointers to procedures */
1088: isoc = HTInputSocket_new(file_number);
1089: while (state && (ch = HTInputSocket_getCharacter(isoc)) >= 0) {
1090: if (ch == '\n')
1091: state--;
1092: else if (state==2 && ch=='.')
1093: state--;
1094: else
1095: state = 3;
1096: (*targetClass.put_character)(sink, ch);
1097: }
1098: HTInputSocket_free(isoc);
1099: }
1100:
1101:
1.7 secret 1102:
1.2 timbl 1103: /* Parse a socket given format and file number
1104: **
1105: ** This routine is responsible for creating and PRESENTING any
1106: ** graphic (or other) objects described by the file.
1107: **
1108: ** The file number given is assumed to be a TELNET stream ie containing
1109: ** CRLF at the end of lines which need to be stripped to LF for unix
1110: ** when the format is textual.
1111: **
1.42 frystyk 1112: ** Returns <0 on error, HT_LOADED on success.
1.2 timbl 1113: */
1.14 timbl 1114:
1.46 frystyk 1115: /* The parameter to this function and HTParsefile should be HTRequest */
1116:
1.12 timbl 1117: PUBLIC int HTParseSocket ARGS3(
1.10 timbl 1118: HTFormat, rep_in,
1.2 timbl 1119: int, file_number,
1.12 timbl 1120: HTRequest *, request)
1.2 timbl 1121: {
1122: HTStream * stream;
1123: HTStreamClass targetClass;
1.1 timbl 1124:
1.40 frystyk 1125: if (request->error_stack) {
1126: if (TRACE) fprintf(stderr, "ParseSocket. Called whith non-empty error stack, so I return right away!\n");
1127: return -1;
1128: }
1129:
1.42 frystyk 1130: /* Set up stream stack */
1131: if ((stream = HTStreamStack(rep_in, request, YES)) == NULL)
1132: return -1;
1.1 timbl 1133:
1.3 timbl 1134: /* Push the data, ignoring CRLF if necessary, down the stream
1135: **
1.2 timbl 1136: **
1.3 timbl 1137: ** @@ Bug: This decision ought to be made based on "encoding"
1.9 timbl 1138: ** rather than on format. @@@ When we handle encoding.
1.3 timbl 1139: ** The current method smells anyway.
1.2 timbl 1140: */
1141: targetClass = *(stream->isa); /* Copy pointers to procedures */
1.46 frystyk 1142: if (request->output_format == WWW_SOURCE && request->net_info->CRLFdotCRLF)
1143: HTCopyDot(file_number, stream);
1144: else if (rep_in == WWW_BINARY || rep_in == WWW_UNKNOWN || HTOutputSource
1.26 luotonen 1145: || (request->content_encoding &&
1146: request->content_encoding != HTAtom_for("8bit") &&
1147: request->content_encoding != HTAtom_for("7bit"))
1.10 timbl 1148: || strstr(HTAtom_name(rep_in), "image/")
1149: || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.29 frystyk 1150: HTCopy(file_number, stream);
1.46 frystyk 1151: } else if (request->net_info->CRLFdotCRLF)
1152: HTCopyDot(file_number, stream);
1153: else
1.2 timbl 1154: HTCopyNoCR(file_number, stream);
1.45 duns 1155: (*targetClass._free)(stream);
1.7 secret 1156:
1157: return HT_LOADED;
1158: }
1159:
1160:
1161:
1162: /* Parse a file given format and file pointer
1163: **
1164: ** This routine is responsible for creating and PRESENTING any
1165: ** graphic (or other) objects described by the file.
1166: **
1167: ** The file number given is assumed to be a TELNET stream ie containing
1.10 timbl 1168: ** CRLF at the end of lines which need to be stripped to \n for unix
1.7 secret 1169: ** when the format is textual.
1170: **
1171: */
1.12 timbl 1172: PUBLIC int HTParseFile ARGS3(
1.10 timbl 1173: HTFormat, rep_in,
1.7 secret 1174: FILE *, fp,
1.12 timbl 1175: HTRequest *, request)
1.7 secret 1176: {
1177: HTStream * stream;
1178: HTStreamClass targetClass;
1.40 frystyk 1179:
1180: if (request->error_stack) {
1181: if (TRACE) fprintf(stderr, "ParseFile... Called whith non-empty error stack, so I return right away!\n");
1182: return -1;
1183: }
1.7 secret 1184:
1.42 frystyk 1185: /* Set up stream stack */
1186: if ((stream = HTStreamStack(rep_in, request, YES)) == NULL)
1187: return -1;
1.7 secret 1188:
1.9 timbl 1189: /* Push the data down the stream
1.7 secret 1190: **
1191: **
1192: ** @@ Bug: This decision ought to be made based on "encoding"
1.10 timbl 1193: ** rather than on content-type. @@@ When we handle encoding.
1.7 secret 1194: ** The current method smells anyway.
1195: */
1196: targetClass = *(stream->isa); /* Copy pointers to procedures */
1197: HTFileCopy(fp, stream);
1.45 duns 1198: (*targetClass._free)(stream);
1.1 timbl 1199:
1.2 timbl 1200: return HT_LOADED;
1.1 timbl 1201: }
1.2 timbl 1202:
1.10 timbl 1203:
1204: /* Converter stream: Network Telnet to internal character text
1205: ** -----------------------------------------------------------
1206: **
1207: ** The input is assumed to be in ASCII, with lines delimited
1208: ** by (13,10) pairs, These pairs are converted into (CR,LF)
1209: ** pairs in the local representation. The (CR,LF) sequence
1210: ** when found is changed to a '\n' character, the internal
1211: ** C representation of a new line.
1212: */
1213:
1214:
1.11 timbl 1215: PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char)
1.10 timbl 1216: {
1217: char c = FROMASCII(net_char);
1218: if (me->had_cr) {
1219: if (c==LF) {
1220: me->sink->isa->put_character(me->sink, '\n'); /* Newline */
1221: me->had_cr = NO;
1222: return;
1223: } else {
1224: me->sink->isa->put_character(me->sink, CR); /* leftover */
1225: }
1226: }
1227: me->had_cr = (c==CR);
1228: if (!me->had_cr)
1229: me->sink->isa->put_character(me->sink, c); /* normal */
1230: }
1231:
1.11 timbl 1232: PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s)
1.10 timbl 1233: {
1234: CONST char * p;
1235: for(p=s; *p; p++) NetToText_put_character(me, *p);
1236: }
1237:
1.11 timbl 1238: PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l)
1.10 timbl 1239: {
1240: CONST char * p;
1241: for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
1242: }
1243:
1244: PRIVATE void NetToText_free ARGS1(HTStream *, me)
1245: {
1.45 duns 1246: me->sink->isa->_free(me->sink); /* Close rest of pipe */
1.10 timbl 1247: free(me);
1248: }
1249:
1250: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
1251: {
1252: me->sink->isa->abort(me->sink,e); /* Abort rest of pipe */
1253: free(me);
1254: }
1255:
1256: /* The class structure
1257: */
1258: PRIVATE HTStreamClass NetToTextClass = {
1259: "NetToText",
1260: NetToText_free,
1261: NetToText_abort,
1262: NetToText_put_character,
1263: NetToText_put_string,
1264: NetToText_put_block
1265: };
1266:
1267: /* The creation method
1268: */
1269: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
1270: {
1271: HTStream* me = (HTStream*)malloc(sizeof(*me));
1272: if (me == NULL) outofmem(__FILE__, "NetToText");
1273: me->isa = &NetToTextClass;
1274:
1275: me->had_cr = NO;
1276: me->sink = sink;
1277: return me;
1278: }
1.2 timbl 1279:
1280:
Webmaster