Annotation of libwww/Library/src/HTFormat.c, revision 1.46
1.7 secret 1:
1.1 timbl 2: /* Manage different file formats HTFormat.c
3: ** =============================
4: **
5: ** Bugs:
6: ** Not reentrant.
7: **
8: ** Assumes the incoming stream is ASCII, rather than a local file
9: ** format, and so ALWAYS converts from ASCII on non-ASCII machines.
10: ** Therefore, non-ASCII machines can't read local files.
1.2 timbl 11: **
1.45 duns 12: ** HISTORY:
13: ** 8 Jul 94 FM Insulate free() from _free structure element.
14: **
1.2 timbl 15: */
16:
1.10 timbl 17:
1.2 timbl 18: /* Implements:
1.1 timbl 19: */
1.2 timbl 20: #include "HTFormat.h"
21:
22: PUBLIC float HTMaxSecs = 1e10; /* No effective limit */
23: PUBLIC float HTMaxLength = 1e10; /* No effective limit */
24:
25: #ifdef unix
26: #ifdef NeXT
27: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
28: #else
29: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n"
30: /* Full pathname would be better! */
31: #endif
32: #endif
33:
1.1 timbl 34:
35: #include "HTUtils.h"
36: #include "tcp.h"
37:
38: #include "HTML.h"
1.12 timbl 39: #include "HTMLPDTD.h"
1.2 timbl 40: #include "HTAlert.h"
41: #include "HTList.h"
42: #include "HTInit.h"
43: /* Streams and structured streams which we use:
44: */
45: #include "HTFWriter.h"
46: #include "HTPlain.h"
47: #include "SGML.h"
48: #include "HTML.h"
49: #include "HTMLGen.h"
1.41 frystyk 50: #include "HTTCP.h"
1.34 luotonen 51: #include "HTGuess.h"
1.42 frystyk 52: #include "HTError.h"
1.34 luotonen 53:
1.2 timbl 54:
55: PUBLIC BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */
56:
1.10 timbl 57: #ifdef ORIGINAL
1.2 timbl 58: struct _HTStream {
59: CONST HTStreamClass* isa;
60: /* ... */
61: };
1.10 timbl 62: #endif
63:
64: /* this version used by the NetToText stream */
65: struct _HTStream {
66: CONST HTStreamClass * isa;
67: BOOL had_cr;
68: HTStream * sink;
69: };
1.2 timbl 70:
71:
1.17 luotonen 72: /*
73: ** Accept-Encoding and Accept-Language
74: */
75: typedef struct _HTAcceptNode {
76: HTAtom * atom;
77: float quality;
78: } HTAcceptNode;
79:
80:
81:
82:
1.2 timbl 83: /* Presentation methods
84: ** --------------------
85: */
86:
1.14 timbl 87: PUBLIC HTList * HTConversions = NULL;
1.2 timbl 88:
1.31 frystyk 89: /* -------------------------------------------------------------------------
90: This function replaces the code in HTRequest_delete() in order to keep
91: the data structure hidden (it is NOT a joke!)
92: Henrik 14/03-94
93: ------------------------------------------------------------------------- */
94: PUBLIC void HTFormatDelete ARGS1(HTList *, me)
95: {
96: HTList *cur = me;
97: HTPresentation *pres;
98: if (!me)
99: return;
100: while ((pres = (HTPresentation*) HTList_nextObject(cur))) {
101: FREE(pres->command); /* Leak fixed AL 6 Feb 1994 */
102: free(pres);
103: }
104: HTList_delete(me); /* Leak fixed AL 6 Feb 1994 */
105: }
106:
1.2 timbl 107:
108: /* Define a presentation system command for a content-type
109: ** -------------------------------------------------------
110: */
1.12 timbl 111: PUBLIC void HTSetPresentation ARGS6(
112: HTList *, conversions,
113: CONST char *, representation,
114: CONST char *, command,
115: float, quality,
116: float, secs,
117: float, secs_per_byte
1.2 timbl 118: ){
119:
120: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
121: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
122:
123: pres->rep = HTAtom_for(representation);
124: pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */
125: pres->converter = HTSaveAndExecute; /* Fixed for now ... */
126: pres->quality = quality;
127: pres->secs = secs;
128: pres->secs_per_byte = secs_per_byte;
129: pres->rep = HTAtom_for(representation);
130: pres->command = 0;
131: StrAllocCopy(pres->command, command);
132:
1.12 timbl 133: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 134:
1.15 luotonen 135: #ifdef OLD_CODE
136: if (strcmp(representation, "*")==0) {
1.2 timbl 137: if (default_presentation) free(default_presentation);
138: default_presentation = pres;
1.12 timbl 139: } else
140: #endif
141: HTList_addObject(conversions, pres);
1.2 timbl 142: }
143:
144:
145: /* Define a built-in function for a content-type
146: ** ---------------------------------------------
147: */
1.12 timbl 148: PUBLIC void HTSetConversion ARGS7(
149: HTList *, conversions,
150: CONST char *, representation_in,
151: CONST char *, representation_out,
1.6 timbl 152: HTConverter*, converter,
1.12 timbl 153: float, quality,
154: float, secs,
155: float, secs_per_byte
1.2 timbl 156: ){
1.1 timbl 157:
1.2 timbl 158: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
159: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
160:
161: pres->rep = HTAtom_for(representation_in);
162: pres->rep_out = HTAtom_for(representation_out);
163: pres->converter = converter;
164: pres->command = NULL; /* Fixed */
165: pres->quality = quality;
166: pres->secs = secs;
167: pres->secs_per_byte = secs_per_byte;
168: pres->command = 0;
169:
1.12 timbl 170: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 171:
1.12 timbl 172: #ifdef OLD_CODE
1.2 timbl 173: if (strcmp(representation_in, "*")==0) {
174: if (default_presentation) free(default_presentation);
175: default_presentation = pres;
1.12 timbl 176: } else
177: #endif
178: HTList_addObject(conversions, pres);
1.2 timbl 179: }
1.1 timbl 180:
181:
182:
1.17 luotonen 183: PUBLIC void HTAcceptEncoding ARGS3(HTList *, list,
184: char *, enc,
185: float, quality)
186: {
187: HTAcceptNode * node;
188: char * cur;
189:
190: if (!list || !enc || !*enc) return;
191:
192: for(cur=enc; *cur; cur++) *cur=TOLOWER(*cur);
193:
194: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
195: if (!node) outofmem(__FILE__, "HTAcceptEncoding");
196: HTList_addObject(list, (void*)node);
197:
198: node->atom = HTAtom_for(enc);
199: node->quality = quality;
200: }
201:
202:
203: PUBLIC void HTAcceptLanguage ARGS3(HTList *, list,
204: char *, lang,
205: float, quality)
206: {
207: HTAcceptNode * node;
208:
209: if (!list || !lang || !*lang) return;
210:
211: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
212: if (!node) outofmem(__FILE__, "HTAcceptLanguage");
213:
214: HTList_addObject(list, (void*)node);
215: node->atom = HTAtom_for(lang);
216: node->quality = quality;
217: }
218:
219:
220: PRIVATE BOOL wild_match ARGS2(HTAtom *, template,
221: HTAtom *, actual)
222: {
223: char *t, *a, *st, *sa;
224: BOOL match = NO;
225:
1.22 luotonen 226: if (template && actual && (t = HTAtom_name(template))) {
227: if (!strcmp(t, "*"))
228: return YES;
1.17 luotonen 229:
1.22 luotonen 230: if (strchr(t, '*') &&
231: (a = HTAtom_name(actual)) &&
232: (st = strchr(t, '/')) && (sa = strchr(a,'/'))) {
1.17 luotonen 233:
1.22 luotonen 234: *sa = 0;
235: *st = 0;
236:
237: if ((*(st-1)=='*' &&
238: (*(st+1)=='*' || !strcasecomp(st+1, sa+1))) ||
239: (*(st+1)=='*' && !strcasecomp(t,a)))
240: match = YES;
241:
242: *sa = '/';
243: *st = '/';
244: }
245: }
1.23 luotonen 246: return match;
1.17 luotonen 247: }
248:
1.36 luotonen 249: /*
250: * Added by takada@seraph.ntt.jp (94/04/08)
251: */
252: PRIVATE BOOL lang_match ARGS2(HTAtom *, template,
253: HTAtom *, actual)
254: {
255: char *t, *a, *st, *sa;
256: BOOL match = NO;
257:
258: if (template && actual &&
259: (t = HTAtom_name(template)) && (a = HTAtom_name(actual))) {
260: st = strchr(t, '_');
261: sa = strchr(a, '_');
262: if ((st != NULL) && (sa != NULL)) {
263: if (!strcasecomp(t, a))
264: match = YES;
265: else
266: match = NO;
267: }
268: else {
269: if (st != NULL) *st = 0;
270: if (sa != NULL) *sa = 0;
271: if (!strcasecomp(t, a))
272: match = YES;
273: else
274: match = NO;
275: if (st != NULL) *st = '_';
276: if (sa != NULL) *sa = '_';
277: }
278: }
279: return match;
280: }
281: /* end of addition */
282:
283:
1.17 luotonen 284:
285: PRIVATE float type_value ARGS2(HTAtom *, content_type,
286: HTList *, accepted)
287: {
288: HTList * cur = accepted;
289: HTPresentation * pres;
290: HTPresentation * wild = NULL;
291:
292: if (!content_type || !accepted) return -1;
293:
294: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
295: if (pres->rep == content_type)
296: return pres->quality;
297: else if (wild_match(pres->rep, content_type))
298: wild = pres;
299: }
300: if (wild) return wild->quality;
301: else return -1;
302: }
303:
304:
305: PRIVATE float lang_value ARGS2(HTAtom *, language,
306: HTList *, accepted)
307: {
308: HTList * cur = accepted;
309: HTAcceptNode * node;
310: HTAcceptNode * wild = NULL;
311:
312: if (!language || !accepted || HTList_isEmpty(accepted)) {
313: return 0.1;
314: }
315:
316: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
317: if (node->atom == language) {
318: return node->quality;
319: }
1.36 luotonen 320: /*
321: * patch by takada@seraph.ntt.jp (94/04/08)
322: * the original line was
323: * else if (wild_match(node->atom, language)) {
324: * and the new line is
325: */
326: else if (lang_match(node->atom, language)) {
1.17 luotonen 327: wild = node;
328: }
329: }
330:
331: if (wild) {
332: return wild->quality;
333: }
334: else {
335: return 0.1;
336: }
337: }
338:
339:
340: PRIVATE float encoding_value ARGS2(HTAtom *, encoding,
341: HTList *, accepted)
342: {
343: HTList * cur = accepted;
344: HTAcceptNode * node;
345: HTAcceptNode * wild = NULL;
346: char * e;
347:
348: if (!encoding || !accepted || HTList_isEmpty(accepted))
349: return 1;
350:
351: e = HTAtom_name(encoding);
352: if (!strcmp(e, "7bit") || !strcmp(e, "8bit") || !strcmp(e, "binary"))
353: return 1;
354:
355: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
356: if (node->atom == encoding)
357: return node->quality;
358: else if (wild_match(node->atom, encoding))
359: wild = node;
360: }
361: if (wild) return wild->quality;
362: else return 1;
363: }
364:
365:
366: PUBLIC BOOL HTRank ARGS4(HTList *, possibilities,
367: HTList *, accepted_content_types,
368: HTList *, accepted_languages,
369: HTList *, accepted_encodings)
370: {
371: int accepted_cnt = 0;
372: HTList * accepted;
373: HTList * sorted;
374: HTList * cur;
375: HTContentDescription * d;
376:
377: if (!possibilities) return NO;
378:
379: accepted = HTList_new();
380: cur = possibilities;
381: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
382: float tv = type_value(d->content_type, accepted_content_types);
383: float lv = lang_value(d->content_language, accepted_languages);
384: float ev = encoding_value(d->content_encoding, accepted_encodings);
385:
386: if (tv > 0) {
387: d->quality *= tv * lv * ev;
388: HTList_addObject(accepted, d);
389: accepted_cnt++;
390: }
1.18 luotonen 391: else {
392: if (d->filename) free(d->filename);
393: free(d);
394: }
1.17 luotonen 395: }
396:
1.18 luotonen 397: CTRACE(stderr, "Ranking.....\n");
1.17 luotonen 398: CTRACE(stderr,
1.18 luotonen 399: "\nRANK QUALITY CONTENT-TYPE LANGUAGE ENCODING FILE\n");
1.17 luotonen 400:
401: sorted = HTList_new();
402: while (accepted_cnt-- > 0) {
403: HTContentDescription * worst = NULL;
404: cur = accepted;
405: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
406: if (!worst || d->quality < worst->quality)
407: worst = d;
408: }
409: if (worst) {
410: CTRACE(stderr, "%d. %.4f %-20.20s %-8.8s %-10.10s %s\n",
411: accepted_cnt+1,
412: worst->quality,
413: (worst->content_type
414: ? HTAtom_name(worst->content_type) : "-"),
415: (worst->content_language
416: ? HTAtom_name(worst->content_language) :"-"),
417: (worst->content_encoding
418: ? HTAtom_name(worst->content_encoding) :"-"),
419: (worst->filename
420: ? worst->filename :"-"));
421: HTList_removeObject(accepted, (void*)worst);
422: HTList_addObject(sorted, (void*)worst);
423: }
424: }
1.18 luotonen 425: CTRACE(stderr, "\n");
1.17 luotonen 426: HTList_delete(accepted);
427: HTList_delete(possibilities->next);
428: possibilities->next = sorted->next;
429: sorted->next = NULL;
430: HTList_delete(sorted);
431:
432: if (!HTList_isEmpty(possibilities)) return YES;
433: else return NO;
434: }
435:
436:
437:
438:
439:
1.13 timbl 440: /* Socket Input Buffering
441: ** ----------------------
1.1 timbl 442: **
1.13 timbl 443: ** This code is used because one cannot in general open a
444: ** file descriptor for a socket.
445: **
1.1 timbl 446: ** The input file is read using the macro which can read from
1.13 timbl 447: ** a socket or a file, but this should not be used for files
448: ** as fopen() etc is more portable of course.
449: **
1.1 timbl 450: ** The input buffer size, if large will give greater efficiency and
451: ** release the server faster, and if small will save space on PCs etc.
452: */
453:
454:
455: /* Set up the buffering
456: **
457: ** These routines are public because they are in fact needed by
458: ** many parsers, and on PCs and Macs we should not duplicate
459: ** the static buffer area.
460: */
1.13 timbl 461: PUBLIC HTInputSocket * HTInputSocket_new ARGS1 (int,file_number)
1.1 timbl 462: {
1.28 frystyk 463: HTInputSocket *isoc = (HTInputSocket *)calloc(1, sizeof(*isoc));
1.13 timbl 464: if (!isoc) outofmem(__FILE__, "HTInputSocket_new");
465: isoc->input_file_number = file_number;
466: isoc->input_pointer = isoc->input_limit = isoc->input_buffer;
467: return isoc;
1.1 timbl 468: }
469:
1.35 frystyk 470: /* This should return HT_INTERRUPTED if interrupted BUT the connection
471: MUST not be closed */
472: PUBLIC int HTInputSocket_getCharacter ARGS1(HTInputSocket*, isoc)
1.1 timbl 473: {
1.35 frystyk 474: int ch;
1.1 timbl 475: do {
1.13 timbl 476: if (isoc-> input_pointer >= isoc->input_limit) {
1.1 timbl 477: int status = NETREAD(
1.13 timbl 478: isoc->input_file_number,
479: isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 480: if (status <= 0) {
1.39 frystyk 481: if (status == 0)
482: return EOF;
483: if (status == HT_INTERRUPTED) {
484: if (TRACE)
485: fprintf(stderr, "Get Char.... Interrupted in HTInputSocket_getCharacter\n");
486: return HT_INTERRUPTED;
487: }
488: HTInetStatus("read");
489: return EOF; /* -1 is returned by UCX at end of HTTP link */
1.1 timbl 490: }
1.35 frystyk 491: isoc->input_pointer = isoc->input_buffer;
1.13 timbl 492: isoc->input_limit = isoc->input_buffer + status;
1.1 timbl 493: }
1.39 frystyk 494: ch = (unsigned char) *isoc->input_pointer++;
495: } while (ch == 13); /* Ignore ASCII carriage return */
1.1 timbl 496:
497: return FROMASCII(ch);
498: }
499:
1.17 luotonen 500: PUBLIC void HTInputSocket_free ARGS1(HTInputSocket *, me)
1.13 timbl 501: {
502: if (me) free(me);
503: }
504:
505:
1.16 luotonen 506: PUBLIC char * HTInputSocket_getBlock ARGS2(HTInputSocket*, isoc,
507: int *, len)
508: {
509: if (isoc->input_pointer >= isoc->input_limit) {
510: int status = NETREAD(isoc->input_file_number,
511: isoc->input_buffer,
512: ((*len < INPUT_BUFFER_SIZE) ?
513: *len : INPUT_BUFFER_SIZE));
514: if (status <= 0) {
515: isoc->input_limit = isoc->input_buffer;
516: if (status < 0)
1.39 frystyk 517: HTInetStatus("read");
1.16 luotonen 518: *len = 0;
519: return NULL;
520: }
521: else {
522: *len = status;
523: return isoc->input_buffer;
524: }
525: }
526: else {
527: char * ret = isoc->input_pointer;
528: *len = isoc->input_limit - isoc->input_pointer;
529: isoc->input_pointer = isoc->input_limit;
530: return ret;
531: }
532: }
533:
534:
1.15 luotonen 535: PRIVATE int fill_in_buffer ARGS1(HTInputSocket *, isoc)
536: {
537: if (isoc) {
538: int status;
539:
540: isoc->input_pointer = isoc->input_buffer;
541: status = NETREAD(isoc->input_file_number,
542: isoc->input_buffer,
543: INPUT_BUFFER_SIZE);
544: if (status <= 0) {
545: isoc->input_limit = isoc->input_buffer;
546: if (status < 0)
1.39 frystyk 547: HTInetStatus("read");
1.15 luotonen 548: }
549: else
550: isoc->input_limit = isoc->input_buffer + status;
551: return status;
552: }
553: return -1;
554: }
555:
556:
557: PRIVATE void ascii_cat ARGS3(char **, linep,
558: char *, start,
559: char *, end)
560: {
561: if (linep && start && end && start <= end) {
562: char *ptr;
563:
564: if (*linep) {
565: int len = strlen(*linep);
566: *linep = (char*)realloc(*linep, len + end-start + 1);
567: ptr = *linep + len;
568: }
569: else {
570: ptr = *linep = (char*)malloc(end-start + 1);
571: }
572:
573: while (start < end) {
574: *ptr = FROMASCII(*start);
575: ptr++;
576: start++;
577: }
578: *ptr = 0;
579: }
580: }
581:
582:
583: PRIVATE char * get_some_line ARGS2(HTInputSocket *, isoc,
584: BOOL, unfold)
585: {
586: if (!isoc)
587: return NULL;
588: else {
589: BOOL check_unfold = NO;
590: int prev_cr = 0;
591: char *start = isoc->input_pointer;
592: char *cur = isoc->input_pointer;
593: char * line = NULL;
594:
595: for(;;) {
596: /*
597: ** Get more if needed to complete line
598: */
599: if (cur >= isoc->input_limit) { /* Need more data */
600: ascii_cat(&line, start, cur);
601: if (fill_in_buffer(isoc) <= 0)
602: return line;
603: start = cur = isoc->input_pointer;
604: } /* if need more data */
605:
606: /*
607: ** Find a line feed if there is one
608: */
609: for(; cur < isoc->input_limit; cur++) {
610: char c = FROMASCII(*cur);
611: if (!c) {
1.18 luotonen 612: if (line) free(line); /* Leak fixed AL 6 Feb 94 */
1.15 luotonen 613: return NULL; /* Panic! read a 0! */
614: }
615: if (check_unfold && c != ' ' && c != '\t') {
616: return line; /* Note: didn't update isoc->input_pointer */
617: }
618: else {
619: check_unfold = NO;
620: }
621:
622: if (c=='\r') {
623: prev_cr = 1;
624: }
625: else {
626: if (c=='\n') { /* Found a line feed */
627: ascii_cat(&line, start, cur-prev_cr);
628: start = isoc->input_pointer = cur+1;
629:
1.44 frystyk 630: if (line && (int) strlen(line) > 0 && unfold) {
1.15 luotonen 631: check_unfold = YES;
632: }
633: else {
634: return line;
635: }
636: } /* if NL */
637: /* else just a regular character */
638: prev_cr = 0;
639: } /* if not CR */
640: } /* while characters in buffer remain */
641: } /* until line read or end-of-file */
642: } /* valid parameters to function */
643: }
644:
1.43 frystyk 645: /* The returned string must be freed by the caller */
1.15 luotonen 646: PUBLIC char * HTInputSocket_getLine ARGS1(HTInputSocket *, isoc)
647: {
648: return get_some_line(isoc, NO);
649: }
650:
1.43 frystyk 651: /* The returned string must be freed by the caller */
1.15 luotonen 652: PUBLIC char * HTInputSocket_getUnfoldedLine ARGS1(HTInputSocket *, isoc)
653: {
654: return get_some_line(isoc, YES);
655: }
656:
657:
658: /*
659: ** Read HTTP status line (if there is one).
660: **
661: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
662: ** First look at the stub in ASCII and check if it starts "HTTP/".
663: **
664: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
665: ** will be taken as a HTTP 1.0 server. Failure.
666: */
667: #define STUB_LENGTH 20
668: PUBLIC char * HTInputSocket_getStatusLine ARGS1(HTInputSocket *, isoc)
669: {
670: if (!isoc) {
671: return NULL;
672: }
673: else {
674: char buf[STUB_LENGTH + 1];
675: int i;
676: char server_version[STUB_LENGTH+1];
677: int server_status;
678:
679: /*
680: ** Read initial buffer
681: */
682: if (isoc->input_pointer >= isoc->input_limit &&
683: fill_in_buffer(isoc) <= 0) {
684: return NULL;
685: }
686:
687: for (i=0; i < STUB_LENGTH; i++)
688: buf[i] = FROMASCII(isoc->input_buffer[i]);
689: buf[STUB_LENGTH] = 0;
690:
691: if (0 != strncmp(buf, "HTTP/", 5) ||
692: sscanf(buf, "%20s%d", server_version, &server_status) < 2)
693: return NULL;
694: else
695: return get_some_line(isoc, NO);
696: }
697: }
698:
699:
700: /*
701: ** Do heuristic test to see if this is binary.
702: **
703: ** We check for characters above 128 in the first few bytes, and
704: ** if we find them we forget the html default.
705: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
706: **
707: ** Bugs: An HTTP 0.9 server returning a binary document with
708: ** characters < 128 will be read as ASCII.
709: */
710: PUBLIC BOOL HTInputSocket_seemsBinary ARGS1(HTInputSocket *, isoc)
711: {
712: if (isoc &&
713: (isoc->input_pointer < isoc->input_limit ||
714: fill_in_buffer(isoc) > 0)) {
715: char *p = isoc->input_buffer;
716: int i = STUB_LENGTH;
717:
718: for( ; i && p < isoc->input_limit; p++, i++)
719: if (((int)*p)&128)
720: return YES;
721: }
722: return NO;
723: }
724:
725:
726:
1.1 timbl 727: /* Stream the data to an ouput file as binary
728: */
1.38 luotonen 729: PUBLIC int HTOutputBinary ARGS3(HTInputSocket *,isoc,
1.13 timbl 730: int, input,
731: FILE *, output)
1.1 timbl 732: {
733: do {
734: int status = NETREAD(
1.13 timbl 735: input, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 736: if (status <= 0) {
737: if (status == 0) return 0;
738: if (TRACE) fprintf(stderr,
1.39 frystyk 739: "Out Binary.. Socket read error %d\n", status);
1.1 timbl 740: return 2; /* Error */
741: }
1.13 timbl 742: fwrite(isoc->input_buffer, sizeof(char), status, output);
1.1 timbl 743: } while (YES);
744: }
745:
1.38 luotonen 746:
747: /*
748: * Normal HTTP headers are never bigger than 2K.
749: */
750: #define S_BUFFER_SIZE 2000
751:
752: PUBLIC void HTInputSocket_startBuffering ARGS1(HTInputSocket *, isoc)
753: {
754: if (isoc) {
755: isoc->s_do_buffering = YES;
756: if (!isoc->s_buffer) {
757: isoc->s_buffer = (char*)malloc(S_BUFFER_SIZE + 1);
758: isoc->s_buffer_size = S_BUFFER_SIZE;
759: }
760: isoc->s_buffer_cur = isoc->s_buffer;
761: }
762: }
763:
764: PUBLIC void HTInputSocket_stopBuffering ARGS1(HTInputSocket *, isoc)
765: {
766: if (isoc) {
767: isoc->s_do_buffering = NO;
768: if (isoc->s_buffer_cur)
769: *isoc->s_buffer_cur = 0;
770: }
771: }
772:
773: PUBLIC int HTInputSocket_getBuffer ARGS2(HTInputSocket *, isoc,
774: char **, buffer_ptr)
775: {
776: if (!isoc || !isoc->s_buffer || !isoc->s_buffer_cur)
777: return 0;
778: else {
779: *isoc->s_buffer_cur = 0;
780: if (buffer_ptr)
781: *buffer_ptr = isoc->s_buffer;
782: return (int) (isoc->s_buffer_cur - isoc->s_buffer);
783: }
784: }
1.1 timbl 785:
1.33 luotonen 786: PRIVATE BOOL better_match ARGS2(HTFormat, f,
787: HTFormat, g)
788: {
789: CONST char *p, *q;
790:
791: if (f && g && (p = HTAtom_name(f)) && (q = HTAtom_name(g))) {
792: int i,j;
793: for(i=0 ; *p; p++) if (*p == '*') i++;
794: for(j=0 ; *q; q++) if (*q == '*') j++;
795: if (i < j) return YES;
796: }
797: return NO;
798: }
799:
1.17 luotonen 800:
1.2 timbl 801: /* Create a filter stack
802: ** ---------------------
803: **
1.7 secret 804: ** If a wildcard match is made, a temporary HTPresentation
1.2 timbl 805: ** structure is made to hold the destination format while the
806: ** new stack is generated. This is just to pass the out format to
807: ** MIME so far. Storing the format of a stream in the stream might
808: ** be a lot neater.
1.10 timbl 809: **
1.29 frystyk 810: ** The star/star format is special, in that if you can take
1.40 frystyk 811: ** that you can take anything.
812: **
813: ** On succes, request->error_block is set to YES so no more error
814: ** messages to the stream as the stream might be of any format.
1.2 timbl 815: */
1.34 luotonen 816: PUBLIC HTStream * HTStreamStack ARGS3(HTFormat, rep_in,
817: HTRequest *, request,
818: BOOL, guess)
1.2 timbl 819: {
1.12 timbl 820: HTFormat rep_out = request->output_format; /* Could be a param */
1.14 timbl 821: HTList * conversion[2];
822: int which_list;
1.25 frystyk 823: float best_quality = -1e30; /* Pretty bad! */
1.29 frystyk 824: HTPresentation *pres, *match, *best_match=0;
1.14 timbl 825:
1.2 timbl 826: if (TRACE) fprintf(stderr,
1.39 frystyk 827: "StreamStack. Constructing stream stack for %s to %s\n",
1.10 timbl 828: HTAtom_name(rep_in),
1.2 timbl 829: HTAtom_name(rep_out));
1.34 luotonen 830:
831: if (guess && rep_in == WWW_UNKNOWN) {
832: CTRACE(stderr, "Returning... guessing stream\n");
1.40 frystyk 833: request->error_block = YES; /* No more error output to stream */
1.34 luotonen 834: return HTGuess_new(request);
835: }
836:
1.21 luotonen 837: if (rep_out == WWW_SOURCE || rep_out == rep_in)
838: return request->output_stream;
1.2 timbl 839:
1.14 timbl 840: conversion[0] = request->conversions;
841: conversion[1] = HTConversions;
1.17 luotonen 842:
1.15 luotonen 843: for(which_list = 0; which_list<2; which_list++) {
844: HTList * cur = conversion[which_list];
845:
846: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
1.25 frystyk 847: if ((pres->rep == rep_in || wild_match(pres->rep, rep_in)) &&
1.33 luotonen 848: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
849: if (!best_match ||
850: better_match(pres->rep, best_match->rep) ||
851: (!better_match(best_match->rep, pres->rep) &&
852: pres->quality > best_quality)) {
1.25 frystyk 853: best_match = pres;
854: best_quality = pres->quality;
1.10 timbl 855: }
856: }
1.2 timbl 857: }
858: }
1.33 luotonen 859:
1.29 frystyk 860: match = best_match ? best_match : NULL;
861: if (match) {
1.40 frystyk 862: request->error_block = YES; /* No more error output to stream */
1.29 frystyk 863: if (match->rep == WWW_SOURCE) {
1.39 frystyk 864: if (TRACE) fprintf(stderr, "StreamStack. Don't know how to handle this, so put out %s to %s\n",
1.29 frystyk 865: HTAtom_name(match->rep),
866: HTAtom_name(rep_out));
867: }
868: return (*match->converter)(
1.25 frystyk 869: request, match->command, rep_in, rep_out,
870: request->output_stream);
1.29 frystyk 871: }
1.42 frystyk 872: {
873: char *msg = NULL;
874: StrAllocCopy(msg, "Can't convert from ");
875: StrAllocCat(msg, HTAtom_name(rep_in));
876: StrAllocCat(msg, " to ");
877: StrAllocCat(msg, HTAtom_name(rep_out));
878: HTErrorAdd(request, ERR_FATAL, NO, HTERR_NOT_IMPLEMENTED,
879: (void *) msg, (int) strlen(msg), "HTStreamStack");
880: free(msg);
881: }
1.2 timbl 882: return NULL;
883: }
884:
885:
886: /* Find the cost of a filter stack
887: ** -------------------------------
888: **
889: ** Must return the cost of the same stack which StreamStack would set up.
890: **
891: ** On entry,
892: ** length The size of the data to be converted
893: */
1.12 timbl 894: PUBLIC float HTStackValue ARGS5(
1.14 timbl 895: HTList *, theseConversions,
1.10 timbl 896: HTFormat, rep_in,
1.2 timbl 897: HTFormat, rep_out,
898: float, initial_value,
899: long int, length)
900: {
1.14 timbl 901: int which_list;
902: HTList* conversion[2];
903:
1.2 timbl 904: if (TRACE) fprintf(stderr,
1.39 frystyk 905: "StackValue.. Evaluating stream stack for %s worth %.3f to %s\n",
1.10 timbl 906: HTAtom_name(rep_in), initial_value,
1.2 timbl 907: HTAtom_name(rep_out));
908:
909: if (rep_out == WWW_SOURCE ||
1.10 timbl 910: rep_out == rep_in) return 0.0;
1.2 timbl 911:
1.12 timbl 912: /* if (!HTPresentations) HTFormatInit(); set up the list */
1.2 timbl 913:
1.14 timbl 914: conversion[0] = theseConversions;
915: conversion[1] = HTConversions;
916:
917: for(which_list = 0; which_list<2; which_list++)
918: if (conversion[which_list]) {
1.15 luotonen 919: HTList * cur = conversion[which_list];
1.2 timbl 920: HTPresentation * pres;
1.15 luotonen 921: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
922: if (pres->rep == rep_in &&
1.17 luotonen 923: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
1.2 timbl 924: float value = initial_value * pres->quality;
925: if (HTMaxSecs != 0.0)
1.15 luotonen 926: value = value - (length*pres->secs_per_byte + pres->secs)
1.2 timbl 927: /HTMaxSecs;
928: return value;
929: }
930: }
931: }
932:
933: return -1e30; /* Really bad */
1.17 luotonen 934: }
935:
936:
1.2 timbl 937:
1.1 timbl 938:
1.2 timbl 939: /* Push data from a socket down a stream
940: ** -------------------------------------
1.1 timbl 941: **
1.2 timbl 942: ** This routine is responsible for creating and PRESENTING any
1.1 timbl 943: ** graphic (or other) objects described by the file.
1.2 timbl 944: **
945: ** The file number given is assumed to be a TELNET stream ie containing
946: ** CRLF at the end of lines which need to be stripped to LF for unix
947: ** when the format is textual.
948: **
1.26 luotonen 949: ** RETURNS the number of bytes transferred.
950: **
1.1 timbl 951: */
1.26 luotonen 952: PUBLIC int HTCopy ARGS2(
1.2 timbl 953: int, file_number,
954: HTStream*, sink)
1.1 timbl 955: {
1.2 timbl 956: HTStreamClass targetClass;
1.13 timbl 957: HTInputSocket * isoc;
1.26 luotonen 958: int cnt = 0;
959:
1.5 timbl 960: /* Push the data down the stream
1.2 timbl 961: **
962: */
963: targetClass = *(sink->isa); /* Copy pointers to procedures */
1.13 timbl 964: isoc = HTInputSocket_new(file_number);
1.2 timbl 965:
966: /* Push binary from socket down sink
1.10 timbl 967: **
968: ** This operation could be put into a main event loop
1.2 timbl 969: */
970: for(;;) {
971: int status = NETREAD(
1.13 timbl 972: file_number, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.2 timbl 973: if (status <= 0) {
974: if (status == 0) break;
975: if (TRACE) fprintf(stderr,
1.39 frystyk 976: "Socket Copy. Read error, read returns %d with errno=%d\n",
1.24 luotonen 977: status, errno);
1.2 timbl 978: break;
979: }
1.26 luotonen 980:
1.8 timbl 981: #ifdef NOT_ASCII
982: {
983: char * p;
1.13 timbl 984: for(p = isoc->input_buffer; p < isoc->input_buffer+status; p++) {
1.8 timbl 985: *p = FROMASCII(*p);
986: }
987: }
988: #endif
989:
1.13 timbl 990: (*targetClass.put_block)(sink, isoc->input_buffer, status);
1.26 luotonen 991: cnt += status;
1.2 timbl 992: } /* next bufferload */
1.26 luotonen 993:
1.13 timbl 994: HTInputSocket_free(isoc);
1.26 luotonen 995:
996: return cnt;
1.2 timbl 997: }
998:
1.1 timbl 999:
1.7 secret 1000:
1001: /* Push data from a file pointer down a stream
1002: ** -------------------------------------
1003: **
1004: ** This routine is responsible for creating and PRESENTING any
1005: ** graphic (or other) objects described by the file.
1006: **
1007: **
1008: */
1009: PUBLIC void HTFileCopy ARGS2(
1010: FILE *, fp,
1011: HTStream*, sink)
1012: {
1013: HTStreamClass targetClass;
1.13 timbl 1014: char input_buffer[INPUT_BUFFER_SIZE];
1.7 secret 1015:
1016: /* Push the data down the stream
1017: **
1018: */
1019: targetClass = *(sink->isa); /* Copy pointers to procedures */
1020:
1021: /* Push binary from socket down sink
1022: */
1023: for(;;) {
1024: int status = fread(
1025: input_buffer, 1, INPUT_BUFFER_SIZE, fp);
1026: if (status == 0) { /* EOF or error */
1027: if (ferror(fp) == 0) break;
1028: if (TRACE) fprintf(stderr,
1.39 frystyk 1029: "File Copy... Read error, read returns %d\n", ferror(fp));
1.7 secret 1030: break;
1031: }
1032: (*targetClass.put_block)(sink, input_buffer, status);
1.13 timbl 1033: } /* next bufferload */
1.7 secret 1034: }
1035:
1036:
1037:
1038:
1.2 timbl 1039: /* Push data from a socket down a stream STRIPPING CR
1040: ** --------------------------------------------------
1041: **
1042: ** This routine is responsible for creating and PRESENTING any
1.8 timbl 1043: ** graphic (or other) objects described by the socket.
1.2 timbl 1044: **
1045: ** The file number given is assumed to be a TELNET stream ie containing
1046: ** CRLF at the end of lines which need to be stripped to LF for unix
1047: ** when the format is textual.
1.37 frystyk 1048: **
1049: ** Character handling is now of type int, Henrik, May 09-94
1.1 timbl 1050: */
1.2 timbl 1051: PUBLIC void HTCopyNoCR ARGS2(
1052: int, file_number,
1053: HTStream*, sink)
1054: {
1.13 timbl 1055: HTStreamClass targetClass;
1056: HTInputSocket * isoc;
1.37 frystyk 1057: int ch;
1.1 timbl 1058:
1.2 timbl 1059: /* Push the data, ignoring CRLF, down the stream
1060: **
1061: */
1062: targetClass = *(sink->isa); /* Copy pointers to procedures */
1063:
1064: /* Push text from telnet socket down sink
1065: **
1066: ** @@@@@ To push strings could be faster? (especially is we
1067: ** cheat and don't ignore CR! :-}
1068: */
1.13 timbl 1069: isoc = HTInputSocket_new(file_number);
1.37 frystyk 1070: while ((ch = HTInputSocket_getCharacter(isoc)) >= 0)
1071: (*targetClass.put_character)(sink, ch);
1.13 timbl 1072: HTInputSocket_free(isoc);
1.2 timbl 1073: }
1.1 timbl 1074:
1.2 timbl 1075:
1.46 ! frystyk 1076: /* To be replaced by a stream */
! 1077: PUBLIC void HTCopyDot ARGS2(int, file_number,
! 1078: HTStream *, sink)
! 1079: {
! 1080: HTStreamClass targetClass;
! 1081: HTInputSocket * isoc;
! 1082: int ch;
! 1083: int state=3;
! 1084:
! 1085: /* Push the data, ignoring CRLF, down the stream */
! 1086: targetClass = *(sink->isa); /* Copy pointers to procedures */
! 1087: isoc = HTInputSocket_new(file_number);
! 1088: while (state && (ch = HTInputSocket_getCharacter(isoc)) >= 0) {
! 1089: if (ch == '\n')
! 1090: state--;
! 1091: else if (state==2 && ch=='.')
! 1092: state--;
! 1093: else
! 1094: state = 3;
! 1095: (*targetClass.put_character)(sink, ch);
! 1096: }
! 1097: HTInputSocket_free(isoc);
! 1098: }
! 1099:
! 1100:
1.7 secret 1101:
1.2 timbl 1102: /* Parse a socket given format and file number
1103: **
1104: ** This routine is responsible for creating and PRESENTING any
1105: ** graphic (or other) objects described by the file.
1106: **
1107: ** The file number given is assumed to be a TELNET stream ie containing
1108: ** CRLF at the end of lines which need to be stripped to LF for unix
1109: ** when the format is textual.
1110: **
1.42 frystyk 1111: ** Returns <0 on error, HT_LOADED on success.
1.2 timbl 1112: */
1.14 timbl 1113:
1.46 ! frystyk 1114: /* The parameter to this function and HTParsefile should be HTRequest */
! 1115:
1.12 timbl 1116: PUBLIC int HTParseSocket ARGS3(
1.10 timbl 1117: HTFormat, rep_in,
1.2 timbl 1118: int, file_number,
1.12 timbl 1119: HTRequest *, request)
1.2 timbl 1120: {
1121: HTStream * stream;
1122: HTStreamClass targetClass;
1.1 timbl 1123:
1.40 frystyk 1124: if (request->error_stack) {
1125: if (TRACE) fprintf(stderr, "ParseSocket. Called whith non-empty error stack, so I return right away!\n");
1126: return -1;
1127: }
1128:
1.42 frystyk 1129: /* Set up stream stack */
1130: if ((stream = HTStreamStack(rep_in, request, YES)) == NULL)
1131: return -1;
1.1 timbl 1132:
1.3 timbl 1133: /* Push the data, ignoring CRLF if necessary, down the stream
1134: **
1.2 timbl 1135: **
1.3 timbl 1136: ** @@ Bug: This decision ought to be made based on "encoding"
1.9 timbl 1137: ** rather than on format. @@@ When we handle encoding.
1.3 timbl 1138: ** The current method smells anyway.
1.2 timbl 1139: */
1140: targetClass = *(stream->isa); /* Copy pointers to procedures */
1.46 ! frystyk 1141: if (request->output_format == WWW_SOURCE && request->net_info->CRLFdotCRLF)
! 1142: HTCopyDot(file_number, stream);
! 1143: else if (rep_in == WWW_BINARY || rep_in == WWW_UNKNOWN || HTOutputSource
1.26 luotonen 1144: || (request->content_encoding &&
1145: request->content_encoding != HTAtom_for("8bit") &&
1146: request->content_encoding != HTAtom_for("7bit"))
1.10 timbl 1147: || strstr(HTAtom_name(rep_in), "image/")
1148: || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.29 frystyk 1149: HTCopy(file_number, stream);
1.46 ! frystyk 1150: } else if (request->net_info->CRLFdotCRLF)
! 1151: HTCopyDot(file_number, stream);
! 1152: else
1.2 timbl 1153: HTCopyNoCR(file_number, stream);
1.45 duns 1154: (*targetClass._free)(stream);
1.7 secret 1155:
1156: return HT_LOADED;
1157: }
1158:
1159:
1160:
1161: /* Parse a file given format and file pointer
1162: **
1163: ** This routine is responsible for creating and PRESENTING any
1164: ** graphic (or other) objects described by the file.
1165: **
1166: ** The file number given is assumed to be a TELNET stream ie containing
1.10 timbl 1167: ** CRLF at the end of lines which need to be stripped to \n for unix
1.7 secret 1168: ** when the format is textual.
1169: **
1170: */
1.12 timbl 1171: PUBLIC int HTParseFile ARGS3(
1.10 timbl 1172: HTFormat, rep_in,
1.7 secret 1173: FILE *, fp,
1.12 timbl 1174: HTRequest *, request)
1.7 secret 1175: {
1176: HTStream * stream;
1177: HTStreamClass targetClass;
1.40 frystyk 1178:
1179: if (request->error_stack) {
1180: if (TRACE) fprintf(stderr, "ParseFile... Called whith non-empty error stack, so I return right away!\n");
1181: return -1;
1182: }
1.7 secret 1183:
1.42 frystyk 1184: /* Set up stream stack */
1185: if ((stream = HTStreamStack(rep_in, request, YES)) == NULL)
1186: return -1;
1.7 secret 1187:
1.9 timbl 1188: /* Push the data down the stream
1.7 secret 1189: **
1190: **
1191: ** @@ Bug: This decision ought to be made based on "encoding"
1.10 timbl 1192: ** rather than on content-type. @@@ When we handle encoding.
1.7 secret 1193: ** The current method smells anyway.
1194: */
1195: targetClass = *(stream->isa); /* Copy pointers to procedures */
1196: HTFileCopy(fp, stream);
1.45 duns 1197: (*targetClass._free)(stream);
1.1 timbl 1198:
1.2 timbl 1199: return HT_LOADED;
1.1 timbl 1200: }
1.2 timbl 1201:
1.10 timbl 1202:
1203: /* Converter stream: Network Telnet to internal character text
1204: ** -----------------------------------------------------------
1205: **
1206: ** The input is assumed to be in ASCII, with lines delimited
1207: ** by (13,10) pairs, These pairs are converted into (CR,LF)
1208: ** pairs in the local representation. The (CR,LF) sequence
1209: ** when found is changed to a '\n' character, the internal
1210: ** C representation of a new line.
1211: */
1212:
1213:
1.11 timbl 1214: PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char)
1.10 timbl 1215: {
1216: char c = FROMASCII(net_char);
1217: if (me->had_cr) {
1218: if (c==LF) {
1219: me->sink->isa->put_character(me->sink, '\n'); /* Newline */
1220: me->had_cr = NO;
1221: return;
1222: } else {
1223: me->sink->isa->put_character(me->sink, CR); /* leftover */
1224: }
1225: }
1226: me->had_cr = (c==CR);
1227: if (!me->had_cr)
1228: me->sink->isa->put_character(me->sink, c); /* normal */
1229: }
1230:
1.11 timbl 1231: PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s)
1.10 timbl 1232: {
1233: CONST char * p;
1234: for(p=s; *p; p++) NetToText_put_character(me, *p);
1235: }
1236:
1.11 timbl 1237: PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l)
1.10 timbl 1238: {
1239: CONST char * p;
1240: for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
1241: }
1242:
1243: PRIVATE void NetToText_free ARGS1(HTStream *, me)
1244: {
1.45 duns 1245: me->sink->isa->_free(me->sink); /* Close rest of pipe */
1.10 timbl 1246: free(me);
1247: }
1248:
1249: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
1250: {
1251: me->sink->isa->abort(me->sink,e); /* Abort rest of pipe */
1252: free(me);
1253: }
1254:
1255: /* The class structure
1256: */
1257: PRIVATE HTStreamClass NetToTextClass = {
1258: "NetToText",
1259: NetToText_free,
1260: NetToText_abort,
1261: NetToText_put_character,
1262: NetToText_put_string,
1263: NetToText_put_block
1264: };
1265:
1266: /* The creation method
1267: */
1268: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
1269: {
1270: HTStream* me = (HTStream*)malloc(sizeof(*me));
1271: if (me == NULL) outofmem(__FILE__, "NetToText");
1272: me->isa = &NetToTextClass;
1273:
1274: me->had_cr = NO;
1275: me->sink = sink;
1276: return me;
1277: }
1.2 timbl 1278:
1279:
Webmaster