Annotation of libwww/Library/src/HTFormat.c, revision 1.45
1.7 secret 1:
1.1 timbl 2: /* Manage different file formats HTFormat.c
3: ** =============================
4: **
5: ** Bugs:
6: ** Not reentrant.
7: **
8: ** Assumes the incoming stream is ASCII, rather than a local file
9: ** format, and so ALWAYS converts from ASCII on non-ASCII machines.
10: ** Therefore, non-ASCII machines can't read local files.
1.2 timbl 11: **
1.45 ! duns 12: ** HISTORY:
! 13: ** 8 Jul 94 FM Insulate free() from _free structure element.
! 14: **
1.2 timbl 15: */
16:
1.10 timbl 17:
1.2 timbl 18: /* Implements:
1.1 timbl 19: */
1.2 timbl 20: #include "HTFormat.h"
21:
22: PUBLIC float HTMaxSecs = 1e10; /* No effective limit */
23: PUBLIC float HTMaxLength = 1e10; /* No effective limit */
24:
25: #ifdef unix
26: #ifdef NeXT
27: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
28: #else
29: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n"
30: /* Full pathname would be better! */
31: #endif
32: #endif
33:
1.1 timbl 34:
35: #include "HTUtils.h"
36: #include "tcp.h"
37:
38: #include "HTML.h"
1.12 timbl 39: #include "HTMLPDTD.h"
1.1 timbl 40: #include "HText.h"
1.2 timbl 41: #include "HTAlert.h"
42: #include "HTList.h"
43: #include "HTInit.h"
44: /* Streams and structured streams which we use:
45: */
46: #include "HTFWriter.h"
47: #include "HTPlain.h"
48: #include "SGML.h"
49: #include "HTML.h"
50: #include "HTMLGen.h"
1.41 frystyk 51: #include "HTTCP.h"
1.34 luotonen 52: #include "HTGuess.h"
1.42 frystyk 53: #include "HTError.h"
1.34 luotonen 54:
1.2 timbl 55:
56: PUBLIC BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */
57:
1.10 timbl 58: #ifdef ORIGINAL
1.2 timbl 59: struct _HTStream {
60: CONST HTStreamClass* isa;
61: /* ... */
62: };
1.10 timbl 63: #endif
64:
65: /* this version used by the NetToText stream */
66: struct _HTStream {
67: CONST HTStreamClass * isa;
68: BOOL had_cr;
69: HTStream * sink;
70: };
1.2 timbl 71:
72:
1.17 luotonen 73: /*
74: ** Accept-Encoding and Accept-Language
75: */
76: typedef struct _HTAcceptNode {
77: HTAtom * atom;
78: float quality;
79: } HTAcceptNode;
80:
81:
82:
83:
1.2 timbl 84: /* Presentation methods
85: ** --------------------
86: */
87:
1.14 timbl 88: PUBLIC HTList * HTConversions = NULL;
1.2 timbl 89:
1.31 frystyk 90: /* -------------------------------------------------------------------------
91: This function replaces the code in HTRequest_delete() in order to keep
92: the data structure hidden (it is NOT a joke!)
93: Henrik 14/03-94
94: ------------------------------------------------------------------------- */
95: PUBLIC void HTFormatDelete ARGS1(HTList *, me)
96: {
97: HTList *cur = me;
98: HTPresentation *pres;
99: if (!me)
100: return;
101: while ((pres = (HTPresentation*) HTList_nextObject(cur))) {
102: FREE(pres->command); /* Leak fixed AL 6 Feb 1994 */
103: free(pres);
104: }
105: HTList_delete(me); /* Leak fixed AL 6 Feb 1994 */
106: }
107:
1.2 timbl 108:
109: /* Define a presentation system command for a content-type
110: ** -------------------------------------------------------
111: */
1.12 timbl 112: PUBLIC void HTSetPresentation ARGS6(
113: HTList *, conversions,
114: CONST char *, representation,
115: CONST char *, command,
116: float, quality,
117: float, secs,
118: float, secs_per_byte
1.2 timbl 119: ){
120:
121: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
122: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
123:
124: pres->rep = HTAtom_for(representation);
125: pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */
126: pres->converter = HTSaveAndExecute; /* Fixed for now ... */
127: pres->quality = quality;
128: pres->secs = secs;
129: pres->secs_per_byte = secs_per_byte;
130: pres->rep = HTAtom_for(representation);
131: pres->command = 0;
132: StrAllocCopy(pres->command, command);
133:
1.12 timbl 134: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 135:
1.15 luotonen 136: #ifdef OLD_CODE
137: if (strcmp(representation, "*")==0) {
1.2 timbl 138: if (default_presentation) free(default_presentation);
139: default_presentation = pres;
1.12 timbl 140: } else
141: #endif
142: HTList_addObject(conversions, pres);
1.2 timbl 143: }
144:
145:
146: /* Define a built-in function for a content-type
147: ** ---------------------------------------------
148: */
1.12 timbl 149: PUBLIC void HTSetConversion ARGS7(
150: HTList *, conversions,
151: CONST char *, representation_in,
152: CONST char *, representation_out,
1.6 timbl 153: HTConverter*, converter,
1.12 timbl 154: float, quality,
155: float, secs,
156: float, secs_per_byte
1.2 timbl 157: ){
1.1 timbl 158:
1.2 timbl 159: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
160: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
161:
162: pres->rep = HTAtom_for(representation_in);
163: pres->rep_out = HTAtom_for(representation_out);
164: pres->converter = converter;
165: pres->command = NULL; /* Fixed */
166: pres->quality = quality;
167: pres->secs = secs;
168: pres->secs_per_byte = secs_per_byte;
169: pres->command = 0;
170:
1.12 timbl 171: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 172:
1.12 timbl 173: #ifdef OLD_CODE
1.2 timbl 174: if (strcmp(representation_in, "*")==0) {
175: if (default_presentation) free(default_presentation);
176: default_presentation = pres;
1.12 timbl 177: } else
178: #endif
179: HTList_addObject(conversions, pres);
1.2 timbl 180: }
1.1 timbl 181:
182:
183:
1.17 luotonen 184: PUBLIC void HTAcceptEncoding ARGS3(HTList *, list,
185: char *, enc,
186: float, quality)
187: {
188: HTAcceptNode * node;
189: char * cur;
190:
191: if (!list || !enc || !*enc) return;
192:
193: for(cur=enc; *cur; cur++) *cur=TOLOWER(*cur);
194:
195: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
196: if (!node) outofmem(__FILE__, "HTAcceptEncoding");
197: HTList_addObject(list, (void*)node);
198:
199: node->atom = HTAtom_for(enc);
200: node->quality = quality;
201: }
202:
203:
204: PUBLIC void HTAcceptLanguage ARGS3(HTList *, list,
205: char *, lang,
206: float, quality)
207: {
208: HTAcceptNode * node;
209:
210: if (!list || !lang || !*lang) return;
211:
212: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
213: if (!node) outofmem(__FILE__, "HTAcceptLanguage");
214:
215: HTList_addObject(list, (void*)node);
216: node->atom = HTAtom_for(lang);
217: node->quality = quality;
218: }
219:
220:
221: PRIVATE BOOL wild_match ARGS2(HTAtom *, template,
222: HTAtom *, actual)
223: {
224: char *t, *a, *st, *sa;
225: BOOL match = NO;
226:
1.22 luotonen 227: if (template && actual && (t = HTAtom_name(template))) {
228: if (!strcmp(t, "*"))
229: return YES;
1.17 luotonen 230:
1.22 luotonen 231: if (strchr(t, '*') &&
232: (a = HTAtom_name(actual)) &&
233: (st = strchr(t, '/')) && (sa = strchr(a,'/'))) {
1.17 luotonen 234:
1.22 luotonen 235: *sa = 0;
236: *st = 0;
237:
238: if ((*(st-1)=='*' &&
239: (*(st+1)=='*' || !strcasecomp(st+1, sa+1))) ||
240: (*(st+1)=='*' && !strcasecomp(t,a)))
241: match = YES;
242:
243: *sa = '/';
244: *st = '/';
245: }
246: }
1.23 luotonen 247: return match;
1.17 luotonen 248: }
249:
1.36 luotonen 250: /*
251: * Added by takada@seraph.ntt.jp (94/04/08)
252: */
253: PRIVATE BOOL lang_match ARGS2(HTAtom *, template,
254: HTAtom *, actual)
255: {
256: char *t, *a, *st, *sa;
257: BOOL match = NO;
258:
259: if (template && actual &&
260: (t = HTAtom_name(template)) && (a = HTAtom_name(actual))) {
261: st = strchr(t, '_');
262: sa = strchr(a, '_');
263: if ((st != NULL) && (sa != NULL)) {
264: if (!strcasecomp(t, a))
265: match = YES;
266: else
267: match = NO;
268: }
269: else {
270: if (st != NULL) *st = 0;
271: if (sa != NULL) *sa = 0;
272: if (!strcasecomp(t, a))
273: match = YES;
274: else
275: match = NO;
276: if (st != NULL) *st = '_';
277: if (sa != NULL) *sa = '_';
278: }
279: }
280: return match;
281: }
282: /* end of addition */
283:
284:
1.17 luotonen 285:
286: PRIVATE float type_value ARGS2(HTAtom *, content_type,
287: HTList *, accepted)
288: {
289: HTList * cur = accepted;
290: HTPresentation * pres;
291: HTPresentation * wild = NULL;
292:
293: if (!content_type || !accepted) return -1;
294:
295: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
296: if (pres->rep == content_type)
297: return pres->quality;
298: else if (wild_match(pres->rep, content_type))
299: wild = pres;
300: }
301: if (wild) return wild->quality;
302: else return -1;
303: }
304:
305:
306: PRIVATE float lang_value ARGS2(HTAtom *, language,
307: HTList *, accepted)
308: {
309: HTList * cur = accepted;
310: HTAcceptNode * node;
311: HTAcceptNode * wild = NULL;
312:
313: if (!language || !accepted || HTList_isEmpty(accepted)) {
314: return 0.1;
315: }
316:
317: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
318: if (node->atom == language) {
319: return node->quality;
320: }
1.36 luotonen 321: /*
322: * patch by takada@seraph.ntt.jp (94/04/08)
323: * the original line was
324: * else if (wild_match(node->atom, language)) {
325: * and the new line is
326: */
327: else if (lang_match(node->atom, language)) {
1.17 luotonen 328: wild = node;
329: }
330: }
331:
332: if (wild) {
333: return wild->quality;
334: }
335: else {
336: return 0.1;
337: }
338: }
339:
340:
341: PRIVATE float encoding_value ARGS2(HTAtom *, encoding,
342: HTList *, accepted)
343: {
344: HTList * cur = accepted;
345: HTAcceptNode * node;
346: HTAcceptNode * wild = NULL;
347: char * e;
348:
349: if (!encoding || !accepted || HTList_isEmpty(accepted))
350: return 1;
351:
352: e = HTAtom_name(encoding);
353: if (!strcmp(e, "7bit") || !strcmp(e, "8bit") || !strcmp(e, "binary"))
354: return 1;
355:
356: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
357: if (node->atom == encoding)
358: return node->quality;
359: else if (wild_match(node->atom, encoding))
360: wild = node;
361: }
362: if (wild) return wild->quality;
363: else return 1;
364: }
365:
366:
367: PUBLIC BOOL HTRank ARGS4(HTList *, possibilities,
368: HTList *, accepted_content_types,
369: HTList *, accepted_languages,
370: HTList *, accepted_encodings)
371: {
372: int accepted_cnt = 0;
373: HTList * accepted;
374: HTList * sorted;
375: HTList * cur;
376: HTContentDescription * d;
377:
378: if (!possibilities) return NO;
379:
380: accepted = HTList_new();
381: cur = possibilities;
382: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
383: float tv = type_value(d->content_type, accepted_content_types);
384: float lv = lang_value(d->content_language, accepted_languages);
385: float ev = encoding_value(d->content_encoding, accepted_encodings);
386:
387: if (tv > 0) {
388: d->quality *= tv * lv * ev;
389: HTList_addObject(accepted, d);
390: accepted_cnt++;
391: }
1.18 luotonen 392: else {
393: if (d->filename) free(d->filename);
394: free(d);
395: }
1.17 luotonen 396: }
397:
1.18 luotonen 398: CTRACE(stderr, "Ranking.....\n");
1.17 luotonen 399: CTRACE(stderr,
1.18 luotonen 400: "\nRANK QUALITY CONTENT-TYPE LANGUAGE ENCODING FILE\n");
1.17 luotonen 401:
402: sorted = HTList_new();
403: while (accepted_cnt-- > 0) {
404: HTContentDescription * worst = NULL;
405: cur = accepted;
406: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
407: if (!worst || d->quality < worst->quality)
408: worst = d;
409: }
410: if (worst) {
411: CTRACE(stderr, "%d. %.4f %-20.20s %-8.8s %-10.10s %s\n",
412: accepted_cnt+1,
413: worst->quality,
414: (worst->content_type
415: ? HTAtom_name(worst->content_type) : "-"),
416: (worst->content_language
417: ? HTAtom_name(worst->content_language) :"-"),
418: (worst->content_encoding
419: ? HTAtom_name(worst->content_encoding) :"-"),
420: (worst->filename
421: ? worst->filename :"-"));
422: HTList_removeObject(accepted, (void*)worst);
423: HTList_addObject(sorted, (void*)worst);
424: }
425: }
1.18 luotonen 426: CTRACE(stderr, "\n");
1.17 luotonen 427: HTList_delete(accepted);
428: HTList_delete(possibilities->next);
429: possibilities->next = sorted->next;
430: sorted->next = NULL;
431: HTList_delete(sorted);
432:
433: if (!HTList_isEmpty(possibilities)) return YES;
434: else return NO;
435: }
436:
437:
438:
439:
440:
1.13 timbl 441: /* Socket Input Buffering
442: ** ----------------------
1.1 timbl 443: **
1.13 timbl 444: ** This code is used because one cannot in general open a
445: ** file descriptor for a socket.
446: **
1.1 timbl 447: ** The input file is read using the macro which can read from
1.13 timbl 448: ** a socket or a file, but this should not be used for files
449: ** as fopen() etc is more portable of course.
450: **
1.1 timbl 451: ** The input buffer size, if large will give greater efficiency and
452: ** release the server faster, and if small will save space on PCs etc.
453: */
454:
455:
456: /* Set up the buffering
457: **
458: ** These routines are public because they are in fact needed by
459: ** many parsers, and on PCs and Macs we should not duplicate
460: ** the static buffer area.
461: */
1.13 timbl 462: PUBLIC HTInputSocket * HTInputSocket_new ARGS1 (int,file_number)
1.1 timbl 463: {
1.28 frystyk 464: HTInputSocket *isoc = (HTInputSocket *)calloc(1, sizeof(*isoc));
1.13 timbl 465: if (!isoc) outofmem(__FILE__, "HTInputSocket_new");
466: isoc->input_file_number = file_number;
467: isoc->input_pointer = isoc->input_limit = isoc->input_buffer;
468: return isoc;
1.1 timbl 469: }
470:
1.35 frystyk 471: /* This should return HT_INTERRUPTED if interrupted BUT the connection
472: MUST not be closed */
473: PUBLIC int HTInputSocket_getCharacter ARGS1(HTInputSocket*, isoc)
1.1 timbl 474: {
1.35 frystyk 475: int ch;
1.1 timbl 476: do {
1.13 timbl 477: if (isoc-> input_pointer >= isoc->input_limit) {
1.1 timbl 478: int status = NETREAD(
1.13 timbl 479: isoc->input_file_number,
480: isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 481: if (status <= 0) {
1.39 frystyk 482: if (status == 0)
483: return EOF;
484: if (status == HT_INTERRUPTED) {
485: if (TRACE)
486: fprintf(stderr, "Get Char.... Interrupted in HTInputSocket_getCharacter\n");
487: return HT_INTERRUPTED;
488: }
489: HTInetStatus("read");
490: return EOF; /* -1 is returned by UCX at end of HTTP link */
1.1 timbl 491: }
1.35 frystyk 492: isoc->input_pointer = isoc->input_buffer;
1.13 timbl 493: isoc->input_limit = isoc->input_buffer + status;
1.1 timbl 494: }
1.39 frystyk 495: ch = (unsigned char) *isoc->input_pointer++;
496: } while (ch == 13); /* Ignore ASCII carriage return */
1.1 timbl 497:
498: return FROMASCII(ch);
499: }
500:
1.17 luotonen 501: PUBLIC void HTInputSocket_free ARGS1(HTInputSocket *, me)
1.13 timbl 502: {
503: if (me) free(me);
504: }
505:
506:
1.16 luotonen 507: PUBLIC char * HTInputSocket_getBlock ARGS2(HTInputSocket*, isoc,
508: int *, len)
509: {
510: if (isoc->input_pointer >= isoc->input_limit) {
511: int status = NETREAD(isoc->input_file_number,
512: isoc->input_buffer,
513: ((*len < INPUT_BUFFER_SIZE) ?
514: *len : INPUT_BUFFER_SIZE));
515: if (status <= 0) {
516: isoc->input_limit = isoc->input_buffer;
517: if (status < 0)
1.39 frystyk 518: HTInetStatus("read");
1.16 luotonen 519: *len = 0;
520: return NULL;
521: }
522: else {
523: *len = status;
524: return isoc->input_buffer;
525: }
526: }
527: else {
528: char * ret = isoc->input_pointer;
529: *len = isoc->input_limit - isoc->input_pointer;
530: isoc->input_pointer = isoc->input_limit;
531: return ret;
532: }
533: }
534:
535:
1.15 luotonen 536: PRIVATE int fill_in_buffer ARGS1(HTInputSocket *, isoc)
537: {
538: if (isoc) {
539: int status;
540:
541: isoc->input_pointer = isoc->input_buffer;
542: status = NETREAD(isoc->input_file_number,
543: isoc->input_buffer,
544: INPUT_BUFFER_SIZE);
545: if (status <= 0) {
546: isoc->input_limit = isoc->input_buffer;
547: if (status < 0)
1.39 frystyk 548: HTInetStatus("read");
1.15 luotonen 549: }
550: else
551: isoc->input_limit = isoc->input_buffer + status;
552: return status;
553: }
554: return -1;
555: }
556:
557:
558: PRIVATE void ascii_cat ARGS3(char **, linep,
559: char *, start,
560: char *, end)
561: {
562: if (linep && start && end && start <= end) {
563: char *ptr;
564:
565: if (*linep) {
566: int len = strlen(*linep);
567: *linep = (char*)realloc(*linep, len + end-start + 1);
568: ptr = *linep + len;
569: }
570: else {
571: ptr = *linep = (char*)malloc(end-start + 1);
572: }
573:
574: while (start < end) {
575: *ptr = FROMASCII(*start);
576: ptr++;
577: start++;
578: }
579: *ptr = 0;
580: }
581: }
582:
583:
584: PRIVATE char * get_some_line ARGS2(HTInputSocket *, isoc,
585: BOOL, unfold)
586: {
587: if (!isoc)
588: return NULL;
589: else {
590: BOOL check_unfold = NO;
591: int prev_cr = 0;
592: char *start = isoc->input_pointer;
593: char *cur = isoc->input_pointer;
594: char * line = NULL;
595:
596: for(;;) {
597: /*
598: ** Get more if needed to complete line
599: */
600: if (cur >= isoc->input_limit) { /* Need more data */
601: ascii_cat(&line, start, cur);
602: if (fill_in_buffer(isoc) <= 0)
603: return line;
604: start = cur = isoc->input_pointer;
605: } /* if need more data */
606:
607: /*
608: ** Find a line feed if there is one
609: */
610: for(; cur < isoc->input_limit; cur++) {
611: char c = FROMASCII(*cur);
612: if (!c) {
1.18 luotonen 613: if (line) free(line); /* Leak fixed AL 6 Feb 94 */
1.15 luotonen 614: return NULL; /* Panic! read a 0! */
615: }
616: if (check_unfold && c != ' ' && c != '\t') {
617: return line; /* Note: didn't update isoc->input_pointer */
618: }
619: else {
620: check_unfold = NO;
621: }
622:
623: if (c=='\r') {
624: prev_cr = 1;
625: }
626: else {
627: if (c=='\n') { /* Found a line feed */
628: ascii_cat(&line, start, cur-prev_cr);
629: start = isoc->input_pointer = cur+1;
630:
1.44 frystyk 631: if (line && (int) strlen(line) > 0 && unfold) {
1.15 luotonen 632: check_unfold = YES;
633: }
634: else {
635: return line;
636: }
637: } /* if NL */
638: /* else just a regular character */
639: prev_cr = 0;
640: } /* if not CR */
641: } /* while characters in buffer remain */
642: } /* until line read or end-of-file */
643: } /* valid parameters to function */
644: }
645:
1.43 frystyk 646: /* The returned string must be freed by the caller */
1.15 luotonen 647: PUBLIC char * HTInputSocket_getLine ARGS1(HTInputSocket *, isoc)
648: {
649: return get_some_line(isoc, NO);
650: }
651:
1.43 frystyk 652: /* The returned string must be freed by the caller */
1.15 luotonen 653: PUBLIC char * HTInputSocket_getUnfoldedLine ARGS1(HTInputSocket *, isoc)
654: {
655: return get_some_line(isoc, YES);
656: }
657:
658:
659: /*
660: ** Read HTTP status line (if there is one).
661: **
662: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
663: ** First look at the stub in ASCII and check if it starts "HTTP/".
664: **
665: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
666: ** will be taken as a HTTP 1.0 server. Failure.
667: */
668: #define STUB_LENGTH 20
669: PUBLIC char * HTInputSocket_getStatusLine ARGS1(HTInputSocket *, isoc)
670: {
671: if (!isoc) {
672: return NULL;
673: }
674: else {
675: char buf[STUB_LENGTH + 1];
676: int i;
677: char server_version[STUB_LENGTH+1];
678: int server_status;
679:
680: /*
681: ** Read initial buffer
682: */
683: if (isoc->input_pointer >= isoc->input_limit &&
684: fill_in_buffer(isoc) <= 0) {
685: return NULL;
686: }
687:
688: for (i=0; i < STUB_LENGTH; i++)
689: buf[i] = FROMASCII(isoc->input_buffer[i]);
690: buf[STUB_LENGTH] = 0;
691:
692: if (0 != strncmp(buf, "HTTP/", 5) ||
693: sscanf(buf, "%20s%d", server_version, &server_status) < 2)
694: return NULL;
695: else
696: return get_some_line(isoc, NO);
697: }
698: }
699:
700:
701: /*
702: ** Do heuristic test to see if this is binary.
703: **
704: ** We check for characters above 128 in the first few bytes, and
705: ** if we find them we forget the html default.
706: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
707: **
708: ** Bugs: An HTTP 0.9 server returning a binary document with
709: ** characters < 128 will be read as ASCII.
710: */
711: PUBLIC BOOL HTInputSocket_seemsBinary ARGS1(HTInputSocket *, isoc)
712: {
713: if (isoc &&
714: (isoc->input_pointer < isoc->input_limit ||
715: fill_in_buffer(isoc) > 0)) {
716: char *p = isoc->input_buffer;
717: int i = STUB_LENGTH;
718:
719: for( ; i && p < isoc->input_limit; p++, i++)
720: if (((int)*p)&128)
721: return YES;
722: }
723: return NO;
724: }
725:
726:
727:
1.1 timbl 728: /* Stream the data to an ouput file as binary
729: */
1.38 luotonen 730: PUBLIC int HTOutputBinary ARGS3(HTInputSocket *,isoc,
1.13 timbl 731: int, input,
732: FILE *, output)
1.1 timbl 733: {
734: do {
735: int status = NETREAD(
1.13 timbl 736: input, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 737: if (status <= 0) {
738: if (status == 0) return 0;
739: if (TRACE) fprintf(stderr,
1.39 frystyk 740: "Out Binary.. Socket read error %d\n", status);
1.1 timbl 741: return 2; /* Error */
742: }
1.13 timbl 743: fwrite(isoc->input_buffer, sizeof(char), status, output);
1.1 timbl 744: } while (YES);
745: }
746:
1.38 luotonen 747:
748: /*
749: * Normal HTTP headers are never bigger than 2K.
750: */
751: #define S_BUFFER_SIZE 2000
752:
753: PUBLIC void HTInputSocket_startBuffering ARGS1(HTInputSocket *, isoc)
754: {
755: if (isoc) {
756: isoc->s_do_buffering = YES;
757: if (!isoc->s_buffer) {
758: isoc->s_buffer = (char*)malloc(S_BUFFER_SIZE + 1);
759: isoc->s_buffer_size = S_BUFFER_SIZE;
760: }
761: isoc->s_buffer_cur = isoc->s_buffer;
762: }
763: }
764:
765: PUBLIC void HTInputSocket_stopBuffering ARGS1(HTInputSocket *, isoc)
766: {
767: if (isoc) {
768: isoc->s_do_buffering = NO;
769: if (isoc->s_buffer_cur)
770: *isoc->s_buffer_cur = 0;
771: }
772: }
773:
774: PUBLIC int HTInputSocket_getBuffer ARGS2(HTInputSocket *, isoc,
775: char **, buffer_ptr)
776: {
777: if (!isoc || !isoc->s_buffer || !isoc->s_buffer_cur)
778: return 0;
779: else {
780: *isoc->s_buffer_cur = 0;
781: if (buffer_ptr)
782: *buffer_ptr = isoc->s_buffer;
783: return (int) (isoc->s_buffer_cur - isoc->s_buffer);
784: }
785: }
1.1 timbl 786:
1.33 luotonen 787: PRIVATE BOOL better_match ARGS2(HTFormat, f,
788: HTFormat, g)
789: {
790: CONST char *p, *q;
791:
792: if (f && g && (p = HTAtom_name(f)) && (q = HTAtom_name(g))) {
793: int i,j;
794: for(i=0 ; *p; p++) if (*p == '*') i++;
795: for(j=0 ; *q; q++) if (*q == '*') j++;
796: if (i < j) return YES;
797: }
798: return NO;
799: }
800:
1.17 luotonen 801:
1.2 timbl 802: /* Create a filter stack
803: ** ---------------------
804: **
1.7 secret 805: ** If a wildcard match is made, a temporary HTPresentation
1.2 timbl 806: ** structure is made to hold the destination format while the
807: ** new stack is generated. This is just to pass the out format to
808: ** MIME so far. Storing the format of a stream in the stream might
809: ** be a lot neater.
1.10 timbl 810: **
1.29 frystyk 811: ** The star/star format is special, in that if you can take
1.40 frystyk 812: ** that you can take anything.
813: **
814: ** On succes, request->error_block is set to YES so no more error
815: ** messages to the stream as the stream might be of any format.
1.2 timbl 816: */
1.34 luotonen 817: PUBLIC HTStream * HTStreamStack ARGS3(HTFormat, rep_in,
818: HTRequest *, request,
819: BOOL, guess)
1.2 timbl 820: {
1.12 timbl 821: HTFormat rep_out = request->output_format; /* Could be a param */
1.14 timbl 822: HTList * conversion[2];
823: int which_list;
1.25 frystyk 824: float best_quality = -1e30; /* Pretty bad! */
1.29 frystyk 825: HTPresentation *pres, *match, *best_match=0;
1.14 timbl 826:
1.2 timbl 827: if (TRACE) fprintf(stderr,
1.39 frystyk 828: "StreamStack. Constructing stream stack for %s to %s\n",
1.10 timbl 829: HTAtom_name(rep_in),
1.2 timbl 830: HTAtom_name(rep_out));
1.34 luotonen 831:
832: if (guess && rep_in == WWW_UNKNOWN) {
833: CTRACE(stderr, "Returning... guessing stream\n");
1.40 frystyk 834: request->error_block = YES; /* No more error output to stream */
1.34 luotonen 835: return HTGuess_new(request);
836: }
837:
1.21 luotonen 838: if (rep_out == WWW_SOURCE || rep_out == rep_in)
839: return request->output_stream;
1.2 timbl 840:
1.14 timbl 841: conversion[0] = request->conversions;
842: conversion[1] = HTConversions;
1.17 luotonen 843:
1.15 luotonen 844: for(which_list = 0; which_list<2; which_list++) {
845: HTList * cur = conversion[which_list];
846:
847: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
1.25 frystyk 848: if ((pres->rep == rep_in || wild_match(pres->rep, rep_in)) &&
1.33 luotonen 849: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
850: if (!best_match ||
851: better_match(pres->rep, best_match->rep) ||
852: (!better_match(best_match->rep, pres->rep) &&
853: pres->quality > best_quality)) {
1.25 frystyk 854: best_match = pres;
855: best_quality = pres->quality;
1.10 timbl 856: }
857: }
1.2 timbl 858: }
859: }
1.33 luotonen 860:
1.29 frystyk 861: match = best_match ? best_match : NULL;
862: if (match) {
1.40 frystyk 863: request->error_block = YES; /* No more error output to stream */
1.29 frystyk 864: if (match->rep == WWW_SOURCE) {
1.39 frystyk 865: if (TRACE) fprintf(stderr, "StreamStack. Don't know how to handle this, so put out %s to %s\n",
1.29 frystyk 866: HTAtom_name(match->rep),
867: HTAtom_name(rep_out));
868: }
869: return (*match->converter)(
1.25 frystyk 870: request, match->command, rep_in, rep_out,
871: request->output_stream);
1.29 frystyk 872: }
1.42 frystyk 873: {
874: char *msg = NULL;
875: StrAllocCopy(msg, "Can't convert from ");
876: StrAllocCat(msg, HTAtom_name(rep_in));
877: StrAllocCat(msg, " to ");
878: StrAllocCat(msg, HTAtom_name(rep_out));
879: HTErrorAdd(request, ERR_FATAL, NO, HTERR_NOT_IMPLEMENTED,
880: (void *) msg, (int) strlen(msg), "HTStreamStack");
881: free(msg);
882: }
1.2 timbl 883: return NULL;
884: }
885:
886:
887: /* Find the cost of a filter stack
888: ** -------------------------------
889: **
890: ** Must return the cost of the same stack which StreamStack would set up.
891: **
892: ** On entry,
893: ** length The size of the data to be converted
894: */
1.12 timbl 895: PUBLIC float HTStackValue ARGS5(
1.14 timbl 896: HTList *, theseConversions,
1.10 timbl 897: HTFormat, rep_in,
1.2 timbl 898: HTFormat, rep_out,
899: float, initial_value,
900: long int, length)
901: {
1.14 timbl 902: int which_list;
903: HTList* conversion[2];
904:
1.2 timbl 905: if (TRACE) fprintf(stderr,
1.39 frystyk 906: "StackValue.. Evaluating stream stack for %s worth %.3f to %s\n",
1.10 timbl 907: HTAtom_name(rep_in), initial_value,
1.2 timbl 908: HTAtom_name(rep_out));
909:
910: if (rep_out == WWW_SOURCE ||
1.10 timbl 911: rep_out == rep_in) return 0.0;
1.2 timbl 912:
1.12 timbl 913: /* if (!HTPresentations) HTFormatInit(); set up the list */
1.2 timbl 914:
1.14 timbl 915: conversion[0] = theseConversions;
916: conversion[1] = HTConversions;
917:
918: for(which_list = 0; which_list<2; which_list++)
919: if (conversion[which_list]) {
1.15 luotonen 920: HTList * cur = conversion[which_list];
1.2 timbl 921: HTPresentation * pres;
1.15 luotonen 922: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
923: if (pres->rep == rep_in &&
1.17 luotonen 924: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
1.2 timbl 925: float value = initial_value * pres->quality;
926: if (HTMaxSecs != 0.0)
1.15 luotonen 927: value = value - (length*pres->secs_per_byte + pres->secs)
1.2 timbl 928: /HTMaxSecs;
929: return value;
930: }
931: }
932: }
933:
934: return -1e30; /* Really bad */
1.17 luotonen 935: }
936:
937:
1.2 timbl 938:
1.1 timbl 939:
1.2 timbl 940: /* Push data from a socket down a stream
941: ** -------------------------------------
1.1 timbl 942: **
1.2 timbl 943: ** This routine is responsible for creating and PRESENTING any
1.1 timbl 944: ** graphic (or other) objects described by the file.
1.2 timbl 945: **
946: ** The file number given is assumed to be a TELNET stream ie containing
947: ** CRLF at the end of lines which need to be stripped to LF for unix
948: ** when the format is textual.
949: **
1.26 luotonen 950: ** RETURNS the number of bytes transferred.
951: **
1.1 timbl 952: */
1.26 luotonen 953: PUBLIC int HTCopy ARGS2(
1.2 timbl 954: int, file_number,
955: HTStream*, sink)
1.1 timbl 956: {
1.2 timbl 957: HTStreamClass targetClass;
1.13 timbl 958: HTInputSocket * isoc;
1.26 luotonen 959: int cnt = 0;
960:
1.5 timbl 961: /* Push the data down the stream
1.2 timbl 962: **
963: */
964: targetClass = *(sink->isa); /* Copy pointers to procedures */
1.13 timbl 965: isoc = HTInputSocket_new(file_number);
1.2 timbl 966:
967: /* Push binary from socket down sink
1.10 timbl 968: **
969: ** This operation could be put into a main event loop
1.2 timbl 970: */
971: for(;;) {
972: int status = NETREAD(
1.13 timbl 973: file_number, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.2 timbl 974: if (status <= 0) {
975: if (status == 0) break;
976: if (TRACE) fprintf(stderr,
1.39 frystyk 977: "Socket Copy. Read error, read returns %d with errno=%d\n",
1.24 luotonen 978: status, errno);
1.2 timbl 979: break;
980: }
1.26 luotonen 981:
1.8 timbl 982: #ifdef NOT_ASCII
983: {
984: char * p;
1.13 timbl 985: for(p = isoc->input_buffer; p < isoc->input_buffer+status; p++) {
1.8 timbl 986: *p = FROMASCII(*p);
987: }
988: }
989: #endif
990:
1.13 timbl 991: (*targetClass.put_block)(sink, isoc->input_buffer, status);
1.26 luotonen 992: cnt += status;
1.2 timbl 993: } /* next bufferload */
1.26 luotonen 994:
1.13 timbl 995: HTInputSocket_free(isoc);
1.26 luotonen 996:
997: return cnt;
1.2 timbl 998: }
999:
1.1 timbl 1000:
1.7 secret 1001:
1002: /* Push data from a file pointer down a stream
1003: ** -------------------------------------
1004: **
1005: ** This routine is responsible for creating and PRESENTING any
1006: ** graphic (or other) objects described by the file.
1007: **
1008: **
1009: */
1010: PUBLIC void HTFileCopy ARGS2(
1011: FILE *, fp,
1012: HTStream*, sink)
1013: {
1014: HTStreamClass targetClass;
1.13 timbl 1015: char input_buffer[INPUT_BUFFER_SIZE];
1.7 secret 1016:
1017: /* Push the data down the stream
1018: **
1019: */
1020: targetClass = *(sink->isa); /* Copy pointers to procedures */
1021:
1022: /* Push binary from socket down sink
1023: */
1024: for(;;) {
1025: int status = fread(
1026: input_buffer, 1, INPUT_BUFFER_SIZE, fp);
1027: if (status == 0) { /* EOF or error */
1028: if (ferror(fp) == 0) break;
1029: if (TRACE) fprintf(stderr,
1.39 frystyk 1030: "File Copy... Read error, read returns %d\n", ferror(fp));
1.7 secret 1031: break;
1032: }
1033: (*targetClass.put_block)(sink, input_buffer, status);
1.13 timbl 1034: } /* next bufferload */
1.7 secret 1035: }
1036:
1037:
1038:
1039:
1.2 timbl 1040: /* Push data from a socket down a stream STRIPPING CR
1041: ** --------------------------------------------------
1042: **
1043: ** This routine is responsible for creating and PRESENTING any
1.8 timbl 1044: ** graphic (or other) objects described by the socket.
1.2 timbl 1045: **
1046: ** The file number given is assumed to be a TELNET stream ie containing
1047: ** CRLF at the end of lines which need to be stripped to LF for unix
1048: ** when the format is textual.
1.37 frystyk 1049: **
1050: ** Character handling is now of type int, Henrik, May 09-94
1.1 timbl 1051: */
1.2 timbl 1052: PUBLIC void HTCopyNoCR ARGS2(
1053: int, file_number,
1054: HTStream*, sink)
1055: {
1.13 timbl 1056: HTStreamClass targetClass;
1057: HTInputSocket * isoc;
1.37 frystyk 1058: int ch;
1.1 timbl 1059:
1.2 timbl 1060: /* Push the data, ignoring CRLF, down the stream
1061: **
1062: */
1063: targetClass = *(sink->isa); /* Copy pointers to procedures */
1064:
1065: /* Push text from telnet socket down sink
1066: **
1067: ** @@@@@ To push strings could be faster? (especially is we
1068: ** cheat and don't ignore CR! :-}
1069: */
1.13 timbl 1070: isoc = HTInputSocket_new(file_number);
1.37 frystyk 1071: while ((ch = HTInputSocket_getCharacter(isoc)) >= 0)
1072: (*targetClass.put_character)(sink, ch);
1.13 timbl 1073: HTInputSocket_free(isoc);
1.2 timbl 1074: }
1.1 timbl 1075:
1.2 timbl 1076:
1.7 secret 1077:
1.2 timbl 1078: /* Parse a socket given format and file number
1079: **
1080: ** This routine is responsible for creating and PRESENTING any
1081: ** graphic (or other) objects described by the file.
1082: **
1083: ** The file number given is assumed to be a TELNET stream ie containing
1084: ** CRLF at the end of lines which need to be stripped to LF for unix
1085: ** when the format is textual.
1086: **
1.42 frystyk 1087: ** Returns <0 on error, HT_LOADED on success.
1.2 timbl 1088: */
1.14 timbl 1089:
1.12 timbl 1090: PUBLIC int HTParseSocket ARGS3(
1.10 timbl 1091: HTFormat, rep_in,
1.2 timbl 1092: int, file_number,
1.12 timbl 1093: HTRequest *, request)
1.2 timbl 1094: {
1095: HTStream * stream;
1096: HTStreamClass targetClass;
1.1 timbl 1097:
1.40 frystyk 1098: if (request->error_stack) {
1099: if (TRACE) fprintf(stderr, "ParseSocket. Called whith non-empty error stack, so I return right away!\n");
1100: return -1;
1101: }
1102:
1.42 frystyk 1103: /* Set up stream stack */
1104: if ((stream = HTStreamStack(rep_in, request, YES)) == NULL)
1105: return -1;
1.1 timbl 1106:
1.3 timbl 1107: /* Push the data, ignoring CRLF if necessary, down the stream
1108: **
1.2 timbl 1109: **
1.3 timbl 1110: ** @@ Bug: This decision ought to be made based on "encoding"
1.9 timbl 1111: ** rather than on format. @@@ When we handle encoding.
1.3 timbl 1112: ** The current method smells anyway.
1.2 timbl 1113: */
1114: targetClass = *(stream->isa); /* Copy pointers to procedures */
1.32 luotonen 1115: if (rep_in == WWW_BINARY || rep_in == WWW_UNKNOWN || HTOutputSource
1.26 luotonen 1116: || (request->content_encoding &&
1117: request->content_encoding != HTAtom_for("8bit") &&
1118: request->content_encoding != HTAtom_for("7bit"))
1.10 timbl 1119: || strstr(HTAtom_name(rep_in), "image/")
1120: || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.29 frystyk 1121: HTCopy(file_number, stream);
1.2 timbl 1122: } else { /* ascii text with CRLFs :-( */
1123: HTCopyNoCR(file_number, stream);
1124: }
1.45 ! duns 1125: (*targetClass._free)(stream);
1.7 secret 1126:
1127: return HT_LOADED;
1128: }
1129:
1130:
1131:
1132: /* Parse a file given format and file pointer
1133: **
1134: ** This routine is responsible for creating and PRESENTING any
1135: ** graphic (or other) objects described by the file.
1136: **
1137: ** The file number given is assumed to be a TELNET stream ie containing
1.10 timbl 1138: ** CRLF at the end of lines which need to be stripped to \n for unix
1.7 secret 1139: ** when the format is textual.
1140: **
1141: */
1.12 timbl 1142: PUBLIC int HTParseFile ARGS3(
1.10 timbl 1143: HTFormat, rep_in,
1.7 secret 1144: FILE *, fp,
1.12 timbl 1145: HTRequest *, request)
1.7 secret 1146: {
1147: HTStream * stream;
1148: HTStreamClass targetClass;
1.40 frystyk 1149:
1150: if (request->error_stack) {
1151: if (TRACE) fprintf(stderr, "ParseFile... Called whith non-empty error stack, so I return right away!\n");
1152: return -1;
1153: }
1.7 secret 1154:
1.42 frystyk 1155: /* Set up stream stack */
1156: if ((stream = HTStreamStack(rep_in, request, YES)) == NULL)
1157: return -1;
1.7 secret 1158:
1.9 timbl 1159: /* Push the data down the stream
1.7 secret 1160: **
1161: **
1162: ** @@ Bug: This decision ought to be made based on "encoding"
1.10 timbl 1163: ** rather than on content-type. @@@ When we handle encoding.
1.7 secret 1164: ** The current method smells anyway.
1165: */
1166: targetClass = *(stream->isa); /* Copy pointers to procedures */
1167: HTFileCopy(fp, stream);
1.45 ! duns 1168: (*targetClass._free)(stream);
1.1 timbl 1169:
1.2 timbl 1170: return HT_LOADED;
1.1 timbl 1171: }
1.2 timbl 1172:
1.10 timbl 1173:
1174: /* Converter stream: Network Telnet to internal character text
1175: ** -----------------------------------------------------------
1176: **
1177: ** The input is assumed to be in ASCII, with lines delimited
1178: ** by (13,10) pairs, These pairs are converted into (CR,LF)
1179: ** pairs in the local representation. The (CR,LF) sequence
1180: ** when found is changed to a '\n' character, the internal
1181: ** C representation of a new line.
1182: */
1183:
1184:
1.11 timbl 1185: PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char)
1.10 timbl 1186: {
1187: char c = FROMASCII(net_char);
1188: if (me->had_cr) {
1189: if (c==LF) {
1190: me->sink->isa->put_character(me->sink, '\n'); /* Newline */
1191: me->had_cr = NO;
1192: return;
1193: } else {
1194: me->sink->isa->put_character(me->sink, CR); /* leftover */
1195: }
1196: }
1197: me->had_cr = (c==CR);
1198: if (!me->had_cr)
1199: me->sink->isa->put_character(me->sink, c); /* normal */
1200: }
1201:
1.11 timbl 1202: PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s)
1.10 timbl 1203: {
1204: CONST char * p;
1205: for(p=s; *p; p++) NetToText_put_character(me, *p);
1206: }
1207:
1.11 timbl 1208: PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l)
1.10 timbl 1209: {
1210: CONST char * p;
1211: for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
1212: }
1213:
1214: PRIVATE void NetToText_free ARGS1(HTStream *, me)
1215: {
1.45 ! duns 1216: me->sink->isa->_free(me->sink); /* Close rest of pipe */
1.10 timbl 1217: free(me);
1218: }
1219:
1220: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
1221: {
1222: me->sink->isa->abort(me->sink,e); /* Abort rest of pipe */
1223: free(me);
1224: }
1225:
1226: /* The class structure
1227: */
1228: PRIVATE HTStreamClass NetToTextClass = {
1229: "NetToText",
1230: NetToText_free,
1231: NetToText_abort,
1232: NetToText_put_character,
1233: NetToText_put_string,
1234: NetToText_put_block
1235: };
1236:
1237: /* The creation method
1238: */
1239: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
1240: {
1241: HTStream* me = (HTStream*)malloc(sizeof(*me));
1242: if (me == NULL) outofmem(__FILE__, "NetToText");
1243: me->isa = &NetToTextClass;
1244:
1245: me->had_cr = NO;
1246: me->sink = sink;
1247: return me;
1248: }
1.2 timbl 1249:
1250:
Webmaster