Annotation of libwww/Library/src/HTFormat.c, revision 1.42
1.7 secret 1:
1.1 timbl 2: /* Manage different file formats HTFormat.c
3: ** =============================
4: **
5: ** Bugs:
6: ** Not reentrant.
7: **
8: ** Assumes the incoming stream is ASCII, rather than a local file
9: ** format, and so ALWAYS converts from ASCII on non-ASCII machines.
10: ** Therefore, non-ASCII machines can't read local files.
1.2 timbl 11: **
12: */
13:
1.10 timbl 14:
1.2 timbl 15: /* Implements:
1.1 timbl 16: */
1.2 timbl 17: #include "HTFormat.h"
18:
19: PUBLIC float HTMaxSecs = 1e10; /* No effective limit */
20: PUBLIC float HTMaxLength = 1e10; /* No effective limit */
21:
22: #ifdef unix
23: #ifdef NeXT
24: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
25: #else
26: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n"
27: /* Full pathname would be better! */
28: #endif
29: #endif
30:
1.1 timbl 31:
32: #include "HTUtils.h"
33: #include "tcp.h"
34:
35: #include "HTML.h"
1.12 timbl 36: #include "HTMLPDTD.h"
1.1 timbl 37: #include "HText.h"
1.2 timbl 38: #include "HTAlert.h"
39: #include "HTList.h"
40: #include "HTInit.h"
41: /* Streams and structured streams which we use:
42: */
43: #include "HTFWriter.h"
44: #include "HTPlain.h"
45: #include "SGML.h"
46: #include "HTML.h"
47: #include "HTMLGen.h"
1.41 frystyk 48: #include "HTTCP.h"
1.34 luotonen 49: #include "HTGuess.h"
1.42 ! frystyk 50: #include "HTError.h"
1.34 luotonen 51:
1.2 timbl 52:
53: PUBLIC BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */
54:
1.10 timbl 55: #ifdef ORIGINAL
1.2 timbl 56: struct _HTStream {
57: CONST HTStreamClass* isa;
58: /* ... */
59: };
1.10 timbl 60: #endif
61:
62: /* this version used by the NetToText stream */
63: struct _HTStream {
64: CONST HTStreamClass * isa;
65: BOOL had_cr;
66: HTStream * sink;
67: };
1.2 timbl 68:
69:
1.17 luotonen 70: /*
71: ** Accept-Encoding and Accept-Language
72: */
73: typedef struct _HTAcceptNode {
74: HTAtom * atom;
75: float quality;
76: } HTAcceptNode;
77:
78:
79:
80:
1.2 timbl 81: /* Presentation methods
82: ** --------------------
83: */
84:
1.14 timbl 85: PUBLIC HTList * HTConversions = NULL;
1.2 timbl 86:
1.31 frystyk 87: /* -------------------------------------------------------------------------
88: This function replaces the code in HTRequest_delete() in order to keep
89: the data structure hidden (it is NOT a joke!)
90: Henrik 14/03-94
91: ------------------------------------------------------------------------- */
92: PUBLIC void HTFormatDelete ARGS1(HTList *, me)
93: {
94: HTList *cur = me;
95: HTPresentation *pres;
96: if (!me)
97: return;
98: while ((pres = (HTPresentation*) HTList_nextObject(cur))) {
99: FREE(pres->command); /* Leak fixed AL 6 Feb 1994 */
100: free(pres);
101: }
102: HTList_delete(me); /* Leak fixed AL 6 Feb 1994 */
103: }
104:
1.2 timbl 105:
106: /* Define a presentation system command for a content-type
107: ** -------------------------------------------------------
108: */
1.12 timbl 109: PUBLIC void HTSetPresentation ARGS6(
110: HTList *, conversions,
111: CONST char *, representation,
112: CONST char *, command,
113: float, quality,
114: float, secs,
115: float, secs_per_byte
1.2 timbl 116: ){
117:
118: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
119: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
120:
121: pres->rep = HTAtom_for(representation);
122: pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */
123: pres->converter = HTSaveAndExecute; /* Fixed for now ... */
124: pres->quality = quality;
125: pres->secs = secs;
126: pres->secs_per_byte = secs_per_byte;
127: pres->rep = HTAtom_for(representation);
128: pres->command = 0;
129: StrAllocCopy(pres->command, command);
130:
1.12 timbl 131: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 132:
1.15 luotonen 133: #ifdef OLD_CODE
134: if (strcmp(representation, "*")==0) {
1.2 timbl 135: if (default_presentation) free(default_presentation);
136: default_presentation = pres;
1.12 timbl 137: } else
138: #endif
139: HTList_addObject(conversions, pres);
1.2 timbl 140: }
141:
142:
143: /* Define a built-in function for a content-type
144: ** ---------------------------------------------
145: */
1.12 timbl 146: PUBLIC void HTSetConversion ARGS7(
147: HTList *, conversions,
148: CONST char *, representation_in,
149: CONST char *, representation_out,
1.6 timbl 150: HTConverter*, converter,
1.12 timbl 151: float, quality,
152: float, secs,
153: float, secs_per_byte
1.2 timbl 154: ){
1.1 timbl 155:
1.2 timbl 156: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
157: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
158:
159: pres->rep = HTAtom_for(representation_in);
160: pres->rep_out = HTAtom_for(representation_out);
161: pres->converter = converter;
162: pres->command = NULL; /* Fixed */
163: pres->quality = quality;
164: pres->secs = secs;
165: pres->secs_per_byte = secs_per_byte;
166: pres->command = 0;
167:
1.12 timbl 168: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 169:
1.12 timbl 170: #ifdef OLD_CODE
1.2 timbl 171: if (strcmp(representation_in, "*")==0) {
172: if (default_presentation) free(default_presentation);
173: default_presentation = pres;
1.12 timbl 174: } else
175: #endif
176: HTList_addObject(conversions, pres);
1.2 timbl 177: }
1.1 timbl 178:
179:
180:
1.17 luotonen 181: PUBLIC void HTAcceptEncoding ARGS3(HTList *, list,
182: char *, enc,
183: float, quality)
184: {
185: HTAcceptNode * node;
186: char * cur;
187:
188: if (!list || !enc || !*enc) return;
189:
190: for(cur=enc; *cur; cur++) *cur=TOLOWER(*cur);
191:
192: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
193: if (!node) outofmem(__FILE__, "HTAcceptEncoding");
194: HTList_addObject(list, (void*)node);
195:
196: node->atom = HTAtom_for(enc);
197: node->quality = quality;
198: }
199:
200:
201: PUBLIC void HTAcceptLanguage ARGS3(HTList *, list,
202: char *, lang,
203: float, quality)
204: {
205: HTAcceptNode * node;
206:
207: if (!list || !lang || !*lang) return;
208:
209: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
210: if (!node) outofmem(__FILE__, "HTAcceptLanguage");
211:
212: HTList_addObject(list, (void*)node);
213: node->atom = HTAtom_for(lang);
214: node->quality = quality;
215: }
216:
217:
218: PRIVATE BOOL wild_match ARGS2(HTAtom *, template,
219: HTAtom *, actual)
220: {
221: char *t, *a, *st, *sa;
222: BOOL match = NO;
223:
1.22 luotonen 224: if (template && actual && (t = HTAtom_name(template))) {
225: if (!strcmp(t, "*"))
226: return YES;
1.17 luotonen 227:
1.22 luotonen 228: if (strchr(t, '*') &&
229: (a = HTAtom_name(actual)) &&
230: (st = strchr(t, '/')) && (sa = strchr(a,'/'))) {
1.17 luotonen 231:
1.22 luotonen 232: *sa = 0;
233: *st = 0;
234:
235: if ((*(st-1)=='*' &&
236: (*(st+1)=='*' || !strcasecomp(st+1, sa+1))) ||
237: (*(st+1)=='*' && !strcasecomp(t,a)))
238: match = YES;
239:
240: *sa = '/';
241: *st = '/';
242: }
243: }
1.23 luotonen 244: return match;
1.17 luotonen 245: }
246:
1.36 luotonen 247: /*
248: * Added by takada@seraph.ntt.jp (94/04/08)
249: */
250: PRIVATE BOOL lang_match ARGS2(HTAtom *, template,
251: HTAtom *, actual)
252: {
253: char *t, *a, *st, *sa;
254: BOOL match = NO;
255:
256: if (template && actual &&
257: (t = HTAtom_name(template)) && (a = HTAtom_name(actual))) {
258: st = strchr(t, '_');
259: sa = strchr(a, '_');
260: if ((st != NULL) && (sa != NULL)) {
261: if (!strcasecomp(t, a))
262: match = YES;
263: else
264: match = NO;
265: }
266: else {
267: if (st != NULL) *st = 0;
268: if (sa != NULL) *sa = 0;
269: if (!strcasecomp(t, a))
270: match = YES;
271: else
272: match = NO;
273: if (st != NULL) *st = '_';
274: if (sa != NULL) *sa = '_';
275: }
276: }
277: return match;
278: }
279: /* end of addition */
280:
281:
1.17 luotonen 282:
283: PRIVATE float type_value ARGS2(HTAtom *, content_type,
284: HTList *, accepted)
285: {
286: HTList * cur = accepted;
287: HTPresentation * pres;
288: HTPresentation * wild = NULL;
289:
290: if (!content_type || !accepted) return -1;
291:
292: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
293: if (pres->rep == content_type)
294: return pres->quality;
295: else if (wild_match(pres->rep, content_type))
296: wild = pres;
297: }
298: if (wild) return wild->quality;
299: else return -1;
300: }
301:
302:
303: PRIVATE float lang_value ARGS2(HTAtom *, language,
304: HTList *, accepted)
305: {
306: HTList * cur = accepted;
307: HTAcceptNode * node;
308: HTAcceptNode * wild = NULL;
309:
310: if (!language || !accepted || HTList_isEmpty(accepted)) {
311: return 0.1;
312: }
313:
314: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
315: if (node->atom == language) {
316: return node->quality;
317: }
1.36 luotonen 318: /*
319: * patch by takada@seraph.ntt.jp (94/04/08)
320: * the original line was
321: * else if (wild_match(node->atom, language)) {
322: * and the new line is
323: */
324: else if (lang_match(node->atom, language)) {
1.17 luotonen 325: wild = node;
326: }
327: }
328:
329: if (wild) {
330: return wild->quality;
331: }
332: else {
333: return 0.1;
334: }
335: }
336:
337:
338: PRIVATE float encoding_value ARGS2(HTAtom *, encoding,
339: HTList *, accepted)
340: {
341: HTList * cur = accepted;
342: HTAcceptNode * node;
343: HTAcceptNode * wild = NULL;
344: char * e;
345:
346: if (!encoding || !accepted || HTList_isEmpty(accepted))
347: return 1;
348:
349: e = HTAtom_name(encoding);
350: if (!strcmp(e, "7bit") || !strcmp(e, "8bit") || !strcmp(e, "binary"))
351: return 1;
352:
353: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
354: if (node->atom == encoding)
355: return node->quality;
356: else if (wild_match(node->atom, encoding))
357: wild = node;
358: }
359: if (wild) return wild->quality;
360: else return 1;
361: }
362:
363:
364: PUBLIC BOOL HTRank ARGS4(HTList *, possibilities,
365: HTList *, accepted_content_types,
366: HTList *, accepted_languages,
367: HTList *, accepted_encodings)
368: {
369: int accepted_cnt = 0;
370: HTList * accepted;
371: HTList * sorted;
372: HTList * cur;
373: HTContentDescription * d;
374:
375: if (!possibilities) return NO;
376:
377: accepted = HTList_new();
378: cur = possibilities;
379: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
380: float tv = type_value(d->content_type, accepted_content_types);
381: float lv = lang_value(d->content_language, accepted_languages);
382: float ev = encoding_value(d->content_encoding, accepted_encodings);
383:
384: if (tv > 0) {
385: d->quality *= tv * lv * ev;
386: HTList_addObject(accepted, d);
387: accepted_cnt++;
388: }
1.18 luotonen 389: else {
390: if (d->filename) free(d->filename);
391: free(d);
392: }
1.17 luotonen 393: }
394:
1.18 luotonen 395: CTRACE(stderr, "Ranking.....\n");
1.17 luotonen 396: CTRACE(stderr,
1.18 luotonen 397: "\nRANK QUALITY CONTENT-TYPE LANGUAGE ENCODING FILE\n");
1.17 luotonen 398:
399: sorted = HTList_new();
400: while (accepted_cnt-- > 0) {
401: HTContentDescription * worst = NULL;
402: cur = accepted;
403: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
404: if (!worst || d->quality < worst->quality)
405: worst = d;
406: }
407: if (worst) {
408: CTRACE(stderr, "%d. %.4f %-20.20s %-8.8s %-10.10s %s\n",
409: accepted_cnt+1,
410: worst->quality,
411: (worst->content_type
412: ? HTAtom_name(worst->content_type) : "-"),
413: (worst->content_language
414: ? HTAtom_name(worst->content_language) :"-"),
415: (worst->content_encoding
416: ? HTAtom_name(worst->content_encoding) :"-"),
417: (worst->filename
418: ? worst->filename :"-"));
419: HTList_removeObject(accepted, (void*)worst);
420: HTList_addObject(sorted, (void*)worst);
421: }
422: }
1.18 luotonen 423: CTRACE(stderr, "\n");
1.17 luotonen 424: HTList_delete(accepted);
425: HTList_delete(possibilities->next);
426: possibilities->next = sorted->next;
427: sorted->next = NULL;
428: HTList_delete(sorted);
429:
430: if (!HTList_isEmpty(possibilities)) return YES;
431: else return NO;
432: }
433:
434:
435:
436:
437:
1.13 timbl 438: /* Socket Input Buffering
439: ** ----------------------
1.1 timbl 440: **
1.13 timbl 441: ** This code is used because one cannot in general open a
442: ** file descriptor for a socket.
443: **
1.1 timbl 444: ** The input file is read using the macro which can read from
1.13 timbl 445: ** a socket or a file, but this should not be used for files
446: ** as fopen() etc is more portable of course.
447: **
1.1 timbl 448: ** The input buffer size, if large will give greater efficiency and
449: ** release the server faster, and if small will save space on PCs etc.
450: */
451:
452:
453: /* Set up the buffering
454: **
455: ** These routines are public because they are in fact needed by
456: ** many parsers, and on PCs and Macs we should not duplicate
457: ** the static buffer area.
458: */
1.13 timbl 459: PUBLIC HTInputSocket * HTInputSocket_new ARGS1 (int,file_number)
1.1 timbl 460: {
1.28 frystyk 461: HTInputSocket *isoc = (HTInputSocket *)calloc(1, sizeof(*isoc));
1.13 timbl 462: if (!isoc) outofmem(__FILE__, "HTInputSocket_new");
463: isoc->input_file_number = file_number;
464: isoc->input_pointer = isoc->input_limit = isoc->input_buffer;
465: return isoc;
1.1 timbl 466: }
467:
1.35 frystyk 468: /* This should return HT_INTERRUPTED if interrupted BUT the connection
469: MUST not be closed */
470: PUBLIC int HTInputSocket_getCharacter ARGS1(HTInputSocket*, isoc)
1.1 timbl 471: {
1.35 frystyk 472: int ch;
1.1 timbl 473: do {
1.13 timbl 474: if (isoc-> input_pointer >= isoc->input_limit) {
1.1 timbl 475: int status = NETREAD(
1.13 timbl 476: isoc->input_file_number,
477: isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 478: if (status <= 0) {
1.39 frystyk 479: if (status == 0)
480: return EOF;
481: if (status == HT_INTERRUPTED) {
482: if (TRACE)
483: fprintf(stderr, "Get Char.... Interrupted in HTInputSocket_getCharacter\n");
484: return HT_INTERRUPTED;
485: }
486: HTInetStatus("read");
487: return EOF; /* -1 is returned by UCX at end of HTTP link */
1.1 timbl 488: }
1.35 frystyk 489: isoc->input_pointer = isoc->input_buffer;
1.13 timbl 490: isoc->input_limit = isoc->input_buffer + status;
1.1 timbl 491: }
1.39 frystyk 492: ch = (unsigned char) *isoc->input_pointer++;
493: } while (ch == 13); /* Ignore ASCII carriage return */
1.1 timbl 494:
495: return FROMASCII(ch);
496: }
497:
1.17 luotonen 498: PUBLIC void HTInputSocket_free ARGS1(HTInputSocket *, me)
1.13 timbl 499: {
500: if (me) free(me);
501: }
502:
503:
1.16 luotonen 504: PUBLIC char * HTInputSocket_getBlock ARGS2(HTInputSocket*, isoc,
505: int *, len)
506: {
507: if (isoc->input_pointer >= isoc->input_limit) {
508: int status = NETREAD(isoc->input_file_number,
509: isoc->input_buffer,
510: ((*len < INPUT_BUFFER_SIZE) ?
511: *len : INPUT_BUFFER_SIZE));
512: if (status <= 0) {
513: isoc->input_limit = isoc->input_buffer;
514: if (status < 0)
1.39 frystyk 515: HTInetStatus("read");
1.16 luotonen 516: *len = 0;
517: return NULL;
518: }
519: else {
520: *len = status;
521: return isoc->input_buffer;
522: }
523: }
524: else {
525: char * ret = isoc->input_pointer;
526: *len = isoc->input_limit - isoc->input_pointer;
527: isoc->input_pointer = isoc->input_limit;
528: return ret;
529: }
530: }
531:
532:
1.15 luotonen 533: PRIVATE int fill_in_buffer ARGS1(HTInputSocket *, isoc)
534: {
535: if (isoc) {
536: int status;
537:
538: isoc->input_pointer = isoc->input_buffer;
539: status = NETREAD(isoc->input_file_number,
540: isoc->input_buffer,
541: INPUT_BUFFER_SIZE);
542: if (status <= 0) {
543: isoc->input_limit = isoc->input_buffer;
544: if (status < 0)
1.39 frystyk 545: HTInetStatus("read");
1.15 luotonen 546: }
547: else
548: isoc->input_limit = isoc->input_buffer + status;
549: return status;
550: }
551: return -1;
552: }
553:
554:
555: PRIVATE void ascii_cat ARGS3(char **, linep,
556: char *, start,
557: char *, end)
558: {
559: if (linep && start && end && start <= end) {
560: char *ptr;
561:
562: if (*linep) {
563: int len = strlen(*linep);
564: *linep = (char*)realloc(*linep, len + end-start + 1);
565: ptr = *linep + len;
566: }
567: else {
568: ptr = *linep = (char*)malloc(end-start + 1);
569: }
570:
571: while (start < end) {
572: *ptr = FROMASCII(*start);
573: ptr++;
574: start++;
575: }
576: *ptr = 0;
577: }
578: }
579:
580:
581: PRIVATE char * get_some_line ARGS2(HTInputSocket *, isoc,
582: BOOL, unfold)
583: {
584: if (!isoc)
585: return NULL;
586: else {
587: BOOL check_unfold = NO;
588: int prev_cr = 0;
589: char *start = isoc->input_pointer;
590: char *cur = isoc->input_pointer;
591: char * line = NULL;
592:
593: for(;;) {
594: /*
595: ** Get more if needed to complete line
596: */
597: if (cur >= isoc->input_limit) { /* Need more data */
598: ascii_cat(&line, start, cur);
599: if (fill_in_buffer(isoc) <= 0)
600: return line;
601: start = cur = isoc->input_pointer;
602: } /* if need more data */
603:
604: /*
605: ** Find a line feed if there is one
606: */
607: for(; cur < isoc->input_limit; cur++) {
608: char c = FROMASCII(*cur);
609: if (!c) {
1.18 luotonen 610: if (line) free(line); /* Leak fixed AL 6 Feb 94 */
1.15 luotonen 611: return NULL; /* Panic! read a 0! */
612: }
613: if (check_unfold && c != ' ' && c != '\t') {
614: return line; /* Note: didn't update isoc->input_pointer */
615: }
616: else {
617: check_unfold = NO;
618: }
619:
620: if (c=='\r') {
621: prev_cr = 1;
622: }
623: else {
624: if (c=='\n') { /* Found a line feed */
625: ascii_cat(&line, start, cur-prev_cr);
626: start = isoc->input_pointer = cur+1;
627:
628: if (line && strlen(line) > 0 && unfold) {
629: check_unfold = YES;
630: }
631: else {
632: return line;
633: }
634: } /* if NL */
635: /* else just a regular character */
636: prev_cr = 0;
637: } /* if not CR */
638: } /* while characters in buffer remain */
639: } /* until line read or end-of-file */
640: } /* valid parameters to function */
641: }
642:
643:
644: PUBLIC char * HTInputSocket_getLine ARGS1(HTInputSocket *, isoc)
645: {
646: return get_some_line(isoc, NO);
647: }
648:
649: PUBLIC char * HTInputSocket_getUnfoldedLine ARGS1(HTInputSocket *, isoc)
650: {
651: return get_some_line(isoc, YES);
652: }
653:
654:
655: /*
656: ** Read HTTP status line (if there is one).
657: **
658: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
659: ** First look at the stub in ASCII and check if it starts "HTTP/".
660: **
661: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
662: ** will be taken as a HTTP 1.0 server. Failure.
663: */
664: #define STUB_LENGTH 20
665: PUBLIC char * HTInputSocket_getStatusLine ARGS1(HTInputSocket *, isoc)
666: {
667: if (!isoc) {
668: return NULL;
669: }
670: else {
671: char buf[STUB_LENGTH + 1];
672: int i;
673: char server_version[STUB_LENGTH+1];
674: int server_status;
675:
676: /*
677: ** Read initial buffer
678: */
679: if (isoc->input_pointer >= isoc->input_limit &&
680: fill_in_buffer(isoc) <= 0) {
681: return NULL;
682: }
683:
684: for (i=0; i < STUB_LENGTH; i++)
685: buf[i] = FROMASCII(isoc->input_buffer[i]);
686: buf[STUB_LENGTH] = 0;
687:
688: if (0 != strncmp(buf, "HTTP/", 5) ||
689: sscanf(buf, "%20s%d", server_version, &server_status) < 2)
690: return NULL;
691: else
692: return get_some_line(isoc, NO);
693: }
694: }
695:
696:
697: /*
698: ** Do heuristic test to see if this is binary.
699: **
700: ** We check for characters above 128 in the first few bytes, and
701: ** if we find them we forget the html default.
702: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
703: **
704: ** Bugs: An HTTP 0.9 server returning a binary document with
705: ** characters < 128 will be read as ASCII.
706: */
707: PUBLIC BOOL HTInputSocket_seemsBinary ARGS1(HTInputSocket *, isoc)
708: {
709: if (isoc &&
710: (isoc->input_pointer < isoc->input_limit ||
711: fill_in_buffer(isoc) > 0)) {
712: char *p = isoc->input_buffer;
713: int i = STUB_LENGTH;
714:
715: for( ; i && p < isoc->input_limit; p++, i++)
716: if (((int)*p)&128)
717: return YES;
718: }
719: return NO;
720: }
721:
722:
723:
1.1 timbl 724: /* Stream the data to an ouput file as binary
725: */
1.38 luotonen 726: PUBLIC int HTOutputBinary ARGS3(HTInputSocket *,isoc,
1.13 timbl 727: int, input,
728: FILE *, output)
1.1 timbl 729: {
730: do {
731: int status = NETREAD(
1.13 timbl 732: input, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 733: if (status <= 0) {
734: if (status == 0) return 0;
735: if (TRACE) fprintf(stderr,
1.39 frystyk 736: "Out Binary.. Socket read error %d\n", status);
1.1 timbl 737: return 2; /* Error */
738: }
1.13 timbl 739: fwrite(isoc->input_buffer, sizeof(char), status, output);
1.1 timbl 740: } while (YES);
741: }
742:
1.38 luotonen 743:
744: /*
745: * Normal HTTP headers are never bigger than 2K.
746: */
747: #define S_BUFFER_SIZE 2000
748:
749: PUBLIC void HTInputSocket_startBuffering ARGS1(HTInputSocket *, isoc)
750: {
751: if (isoc) {
752: isoc->s_do_buffering = YES;
753: if (!isoc->s_buffer) {
754: isoc->s_buffer = (char*)malloc(S_BUFFER_SIZE + 1);
755: isoc->s_buffer_size = S_BUFFER_SIZE;
756: }
757: isoc->s_buffer_cur = isoc->s_buffer;
758: }
759: }
760:
761: PUBLIC void HTInputSocket_stopBuffering ARGS1(HTInputSocket *, isoc)
762: {
763: if (isoc) {
764: isoc->s_do_buffering = NO;
765: if (isoc->s_buffer_cur)
766: *isoc->s_buffer_cur = 0;
767: }
768: }
769:
770: PUBLIC int HTInputSocket_getBuffer ARGS2(HTInputSocket *, isoc,
771: char **, buffer_ptr)
772: {
773: if (!isoc || !isoc->s_buffer || !isoc->s_buffer_cur)
774: return 0;
775: else {
776: *isoc->s_buffer_cur = 0;
777: if (buffer_ptr)
778: *buffer_ptr = isoc->s_buffer;
779: return (int) (isoc->s_buffer_cur - isoc->s_buffer);
780: }
781: }
1.1 timbl 782:
1.33 luotonen 783: PRIVATE BOOL better_match ARGS2(HTFormat, f,
784: HTFormat, g)
785: {
786: CONST char *p, *q;
787:
788: if (f && g && (p = HTAtom_name(f)) && (q = HTAtom_name(g))) {
789: int i,j;
790: for(i=0 ; *p; p++) if (*p == '*') i++;
791: for(j=0 ; *q; q++) if (*q == '*') j++;
792: if (i < j) return YES;
793: }
794: return NO;
795: }
796:
1.17 luotonen 797:
1.2 timbl 798: /* Create a filter stack
799: ** ---------------------
800: **
1.7 secret 801: ** If a wildcard match is made, a temporary HTPresentation
1.2 timbl 802: ** structure is made to hold the destination format while the
803: ** new stack is generated. This is just to pass the out format to
804: ** MIME so far. Storing the format of a stream in the stream might
805: ** be a lot neater.
1.10 timbl 806: **
1.29 frystyk 807: ** The star/star format is special, in that if you can take
1.40 frystyk 808: ** that you can take anything.
809: **
810: ** On succes, request->error_block is set to YES so no more error
811: ** messages to the stream as the stream might be of any format.
1.2 timbl 812: */
1.34 luotonen 813: PUBLIC HTStream * HTStreamStack ARGS3(HTFormat, rep_in,
814: HTRequest *, request,
815: BOOL, guess)
1.2 timbl 816: {
1.12 timbl 817: HTFormat rep_out = request->output_format; /* Could be a param */
1.14 timbl 818: HTList * conversion[2];
819: int which_list;
1.25 frystyk 820: float best_quality = -1e30; /* Pretty bad! */
1.29 frystyk 821: HTPresentation *pres, *match, *best_match=0;
1.14 timbl 822:
1.2 timbl 823: if (TRACE) fprintf(stderr,
1.39 frystyk 824: "StreamStack. Constructing stream stack for %s to %s\n",
1.10 timbl 825: HTAtom_name(rep_in),
1.2 timbl 826: HTAtom_name(rep_out));
1.34 luotonen 827:
828: if (guess && rep_in == WWW_UNKNOWN) {
829: CTRACE(stderr, "Returning... guessing stream\n");
1.40 frystyk 830: request->error_block = YES; /* No more error output to stream */
1.34 luotonen 831: return HTGuess_new(request);
832: }
833:
1.21 luotonen 834: if (rep_out == WWW_SOURCE || rep_out == rep_in)
835: return request->output_stream;
1.2 timbl 836:
1.14 timbl 837: conversion[0] = request->conversions;
838: conversion[1] = HTConversions;
1.17 luotonen 839:
1.15 luotonen 840: for(which_list = 0; which_list<2; which_list++) {
841: HTList * cur = conversion[which_list];
842:
843: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
1.25 frystyk 844: if ((pres->rep == rep_in || wild_match(pres->rep, rep_in)) &&
1.33 luotonen 845: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
846: if (!best_match ||
847: better_match(pres->rep, best_match->rep) ||
848: (!better_match(best_match->rep, pres->rep) &&
849: pres->quality > best_quality)) {
1.25 frystyk 850: best_match = pres;
851: best_quality = pres->quality;
1.10 timbl 852: }
853: }
1.2 timbl 854: }
855: }
1.33 luotonen 856:
1.29 frystyk 857: match = best_match ? best_match : NULL;
858: if (match) {
1.40 frystyk 859: request->error_block = YES; /* No more error output to stream */
1.29 frystyk 860: if (match->rep == WWW_SOURCE) {
1.39 frystyk 861: if (TRACE) fprintf(stderr, "StreamStack. Don't know how to handle this, so put out %s to %s\n",
1.29 frystyk 862: HTAtom_name(match->rep),
863: HTAtom_name(rep_out));
864: }
865: return (*match->converter)(
1.25 frystyk 866: request, match->command, rep_in, rep_out,
867: request->output_stream);
1.29 frystyk 868: }
1.42 ! frystyk 869: {
! 870: char *msg = NULL;
! 871: StrAllocCopy(msg, "Can't convert from ");
! 872: StrAllocCat(msg, HTAtom_name(rep_in));
! 873: StrAllocCat(msg, " to ");
! 874: StrAllocCat(msg, HTAtom_name(rep_out));
! 875: HTErrorAdd(request, ERR_FATAL, NO, HTERR_NOT_IMPLEMENTED,
! 876: (void *) msg, (int) strlen(msg), "HTStreamStack");
! 877: free(msg);
! 878: }
1.2 timbl 879: return NULL;
880: }
881:
882:
883: /* Find the cost of a filter stack
884: ** -------------------------------
885: **
886: ** Must return the cost of the same stack which StreamStack would set up.
887: **
888: ** On entry,
889: ** length The size of the data to be converted
890: */
1.12 timbl 891: PUBLIC float HTStackValue ARGS5(
1.14 timbl 892: HTList *, theseConversions,
1.10 timbl 893: HTFormat, rep_in,
1.2 timbl 894: HTFormat, rep_out,
895: float, initial_value,
896: long int, length)
897: {
1.14 timbl 898: int which_list;
899: HTList* conversion[2];
900:
1.2 timbl 901: if (TRACE) fprintf(stderr,
1.39 frystyk 902: "StackValue.. Evaluating stream stack for %s worth %.3f to %s\n",
1.10 timbl 903: HTAtom_name(rep_in), initial_value,
1.2 timbl 904: HTAtom_name(rep_out));
905:
906: if (rep_out == WWW_SOURCE ||
1.10 timbl 907: rep_out == rep_in) return 0.0;
1.2 timbl 908:
1.12 timbl 909: /* if (!HTPresentations) HTFormatInit(); set up the list */
1.2 timbl 910:
1.14 timbl 911: conversion[0] = theseConversions;
912: conversion[1] = HTConversions;
913:
914: for(which_list = 0; which_list<2; which_list++)
915: if (conversion[which_list]) {
1.15 luotonen 916: HTList * cur = conversion[which_list];
1.2 timbl 917: HTPresentation * pres;
1.15 luotonen 918: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
919: if (pres->rep == rep_in &&
1.17 luotonen 920: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
1.2 timbl 921: float value = initial_value * pres->quality;
922: if (HTMaxSecs != 0.0)
1.15 luotonen 923: value = value - (length*pres->secs_per_byte + pres->secs)
1.2 timbl 924: /HTMaxSecs;
925: return value;
926: }
927: }
928: }
929:
930: return -1e30; /* Really bad */
1.17 luotonen 931: }
932:
933:
1.2 timbl 934:
1.1 timbl 935:
1.2 timbl 936: /* Push data from a socket down a stream
937: ** -------------------------------------
1.1 timbl 938: **
1.2 timbl 939: ** This routine is responsible for creating and PRESENTING any
1.1 timbl 940: ** graphic (or other) objects described by the file.
1.2 timbl 941: **
942: ** The file number given is assumed to be a TELNET stream ie containing
943: ** CRLF at the end of lines which need to be stripped to LF for unix
944: ** when the format is textual.
945: **
1.26 luotonen 946: ** RETURNS the number of bytes transferred.
947: **
1.1 timbl 948: */
1.26 luotonen 949: PUBLIC int HTCopy ARGS2(
1.2 timbl 950: int, file_number,
951: HTStream*, sink)
1.1 timbl 952: {
1.2 timbl 953: HTStreamClass targetClass;
1.13 timbl 954: HTInputSocket * isoc;
1.26 luotonen 955: int cnt = 0;
956:
1.5 timbl 957: /* Push the data down the stream
1.2 timbl 958: **
959: */
960: targetClass = *(sink->isa); /* Copy pointers to procedures */
1.13 timbl 961: isoc = HTInputSocket_new(file_number);
1.2 timbl 962:
963: /* Push binary from socket down sink
1.10 timbl 964: **
965: ** This operation could be put into a main event loop
1.2 timbl 966: */
967: for(;;) {
968: int status = NETREAD(
1.13 timbl 969: file_number, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.2 timbl 970: if (status <= 0) {
971: if (status == 0) break;
972: if (TRACE) fprintf(stderr,
1.39 frystyk 973: "Socket Copy. Read error, read returns %d with errno=%d\n",
1.24 luotonen 974: status, errno);
1.2 timbl 975: break;
976: }
1.26 luotonen 977:
1.8 timbl 978: #ifdef NOT_ASCII
979: {
980: char * p;
1.13 timbl 981: for(p = isoc->input_buffer; p < isoc->input_buffer+status; p++) {
1.8 timbl 982: *p = FROMASCII(*p);
983: }
984: }
985: #endif
986:
1.13 timbl 987: (*targetClass.put_block)(sink, isoc->input_buffer, status);
1.26 luotonen 988: cnt += status;
1.2 timbl 989: } /* next bufferload */
1.26 luotonen 990:
1.13 timbl 991: HTInputSocket_free(isoc);
1.26 luotonen 992:
993: return cnt;
1.2 timbl 994: }
995:
1.1 timbl 996:
1.7 secret 997:
998: /* Push data from a file pointer down a stream
999: ** -------------------------------------
1000: **
1001: ** This routine is responsible for creating and PRESENTING any
1002: ** graphic (or other) objects described by the file.
1003: **
1004: **
1005: */
1006: PUBLIC void HTFileCopy ARGS2(
1007: FILE *, fp,
1008: HTStream*, sink)
1009: {
1010: HTStreamClass targetClass;
1.13 timbl 1011: char input_buffer[INPUT_BUFFER_SIZE];
1.7 secret 1012:
1013: /* Push the data down the stream
1014: **
1015: */
1016: targetClass = *(sink->isa); /* Copy pointers to procedures */
1017:
1018: /* Push binary from socket down sink
1019: */
1020: for(;;) {
1021: int status = fread(
1022: input_buffer, 1, INPUT_BUFFER_SIZE, fp);
1023: if (status == 0) { /* EOF or error */
1024: if (ferror(fp) == 0) break;
1025: if (TRACE) fprintf(stderr,
1.39 frystyk 1026: "File Copy... Read error, read returns %d\n", ferror(fp));
1.7 secret 1027: break;
1028: }
1029: (*targetClass.put_block)(sink, input_buffer, status);
1.13 timbl 1030: } /* next bufferload */
1.7 secret 1031: }
1032:
1033:
1034:
1035:
1.2 timbl 1036: /* Push data from a socket down a stream STRIPPING CR
1037: ** --------------------------------------------------
1038: **
1039: ** This routine is responsible for creating and PRESENTING any
1.8 timbl 1040: ** graphic (or other) objects described by the socket.
1.2 timbl 1041: **
1042: ** The file number given is assumed to be a TELNET stream ie containing
1043: ** CRLF at the end of lines which need to be stripped to LF for unix
1044: ** when the format is textual.
1.37 frystyk 1045: **
1046: ** Character handling is now of type int, Henrik, May 09-94
1.1 timbl 1047: */
1.2 timbl 1048: PUBLIC void HTCopyNoCR ARGS2(
1049: int, file_number,
1050: HTStream*, sink)
1051: {
1.13 timbl 1052: HTStreamClass targetClass;
1053: HTInputSocket * isoc;
1.37 frystyk 1054: int ch;
1.1 timbl 1055:
1.2 timbl 1056: /* Push the data, ignoring CRLF, down the stream
1057: **
1058: */
1059: targetClass = *(sink->isa); /* Copy pointers to procedures */
1060:
1061: /* Push text from telnet socket down sink
1062: **
1063: ** @@@@@ To push strings could be faster? (especially is we
1064: ** cheat and don't ignore CR! :-}
1065: */
1.13 timbl 1066: isoc = HTInputSocket_new(file_number);
1.37 frystyk 1067: while ((ch = HTInputSocket_getCharacter(isoc)) >= 0)
1068: (*targetClass.put_character)(sink, ch);
1.13 timbl 1069: HTInputSocket_free(isoc);
1.2 timbl 1070: }
1.1 timbl 1071:
1.2 timbl 1072:
1.7 secret 1073:
1.2 timbl 1074: /* Parse a socket given format and file number
1075: **
1076: ** This routine is responsible for creating and PRESENTING any
1077: ** graphic (or other) objects described by the file.
1078: **
1079: ** The file number given is assumed to be a TELNET stream ie containing
1080: ** CRLF at the end of lines which need to be stripped to LF for unix
1081: ** when the format is textual.
1082: **
1.42 ! frystyk 1083: ** Returns <0 on error, HT_LOADED on success.
1.2 timbl 1084: */
1.14 timbl 1085:
1.12 timbl 1086: PUBLIC int HTParseSocket ARGS3(
1.10 timbl 1087: HTFormat, rep_in,
1.2 timbl 1088: int, file_number,
1.12 timbl 1089: HTRequest *, request)
1.2 timbl 1090: {
1091: HTStream * stream;
1092: HTStreamClass targetClass;
1.1 timbl 1093:
1.40 frystyk 1094: if (request->error_stack) {
1095: if (TRACE) fprintf(stderr, "ParseSocket. Called whith non-empty error stack, so I return right away!\n");
1096: return -1;
1097: }
1098:
1.42 ! frystyk 1099: /* Set up stream stack */
! 1100: if ((stream = HTStreamStack(rep_in, request, YES)) == NULL)
! 1101: return -1;
1.1 timbl 1102:
1.3 timbl 1103: /* Push the data, ignoring CRLF if necessary, down the stream
1104: **
1.2 timbl 1105: **
1.3 timbl 1106: ** @@ Bug: This decision ought to be made based on "encoding"
1.9 timbl 1107: ** rather than on format. @@@ When we handle encoding.
1.3 timbl 1108: ** The current method smells anyway.
1.2 timbl 1109: */
1110: targetClass = *(stream->isa); /* Copy pointers to procedures */
1.32 luotonen 1111: if (rep_in == WWW_BINARY || rep_in == WWW_UNKNOWN || HTOutputSource
1.26 luotonen 1112: || (request->content_encoding &&
1113: request->content_encoding != HTAtom_for("8bit") &&
1114: request->content_encoding != HTAtom_for("7bit"))
1.10 timbl 1115: || strstr(HTAtom_name(rep_in), "image/")
1116: || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.29 frystyk 1117: HTCopy(file_number, stream);
1.2 timbl 1118: } else { /* ascii text with CRLFs :-( */
1119: HTCopyNoCR(file_number, stream);
1120: }
1.7 secret 1121: (*targetClass.free)(stream);
1122:
1123: return HT_LOADED;
1124: }
1125:
1126:
1127:
1128: /* Parse a file given format and file pointer
1129: **
1130: ** This routine is responsible for creating and PRESENTING any
1131: ** graphic (or other) objects described by the file.
1132: **
1133: ** The file number given is assumed to be a TELNET stream ie containing
1.10 timbl 1134: ** CRLF at the end of lines which need to be stripped to \n for unix
1.7 secret 1135: ** when the format is textual.
1136: **
1137: */
1.12 timbl 1138: PUBLIC int HTParseFile ARGS3(
1.10 timbl 1139: HTFormat, rep_in,
1.7 secret 1140: FILE *, fp,
1.12 timbl 1141: HTRequest *, request)
1.7 secret 1142: {
1143: HTStream * stream;
1144: HTStreamClass targetClass;
1.40 frystyk 1145:
1146: if (request->error_stack) {
1147: if (TRACE) fprintf(stderr, "ParseFile... Called whith non-empty error stack, so I return right away!\n");
1148: return -1;
1149: }
1.7 secret 1150:
1.42 ! frystyk 1151: /* Set up stream stack */
! 1152: if ((stream = HTStreamStack(rep_in, request, YES)) == NULL)
! 1153: return -1;
1.7 secret 1154:
1.9 timbl 1155: /* Push the data down the stream
1.7 secret 1156: **
1157: **
1158: ** @@ Bug: This decision ought to be made based on "encoding"
1.10 timbl 1159: ** rather than on content-type. @@@ When we handle encoding.
1.7 secret 1160: ** The current method smells anyway.
1161: */
1162: targetClass = *(stream->isa); /* Copy pointers to procedures */
1163: HTFileCopy(fp, stream);
1.2 timbl 1164: (*targetClass.free)(stream);
1.1 timbl 1165:
1.2 timbl 1166: return HT_LOADED;
1.1 timbl 1167: }
1.2 timbl 1168:
1.10 timbl 1169:
1170: /* Converter stream: Network Telnet to internal character text
1171: ** -----------------------------------------------------------
1172: **
1173: ** The input is assumed to be in ASCII, with lines delimited
1174: ** by (13,10) pairs, These pairs are converted into (CR,LF)
1175: ** pairs in the local representation. The (CR,LF) sequence
1176: ** when found is changed to a '\n' character, the internal
1177: ** C representation of a new line.
1178: */
1179:
1180:
1.11 timbl 1181: PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char)
1.10 timbl 1182: {
1183: char c = FROMASCII(net_char);
1184: if (me->had_cr) {
1185: if (c==LF) {
1186: me->sink->isa->put_character(me->sink, '\n'); /* Newline */
1187: me->had_cr = NO;
1188: return;
1189: } else {
1190: me->sink->isa->put_character(me->sink, CR); /* leftover */
1191: }
1192: }
1193: me->had_cr = (c==CR);
1194: if (!me->had_cr)
1195: me->sink->isa->put_character(me->sink, c); /* normal */
1196: }
1197:
1.11 timbl 1198: PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s)
1.10 timbl 1199: {
1200: CONST char * p;
1201: for(p=s; *p; p++) NetToText_put_character(me, *p);
1202: }
1203:
1.11 timbl 1204: PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l)
1.10 timbl 1205: {
1206: CONST char * p;
1207: for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
1208: }
1209:
1210: PRIVATE void NetToText_free ARGS1(HTStream *, me)
1211: {
1212: me->sink->isa->free(me->sink); /* Close rest of pipe */
1213: free(me);
1214: }
1215:
1216: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
1217: {
1218: me->sink->isa->abort(me->sink,e); /* Abort rest of pipe */
1219: free(me);
1220: }
1221:
1222: /* The class structure
1223: */
1224: PRIVATE HTStreamClass NetToTextClass = {
1225: "NetToText",
1226: NetToText_free,
1227: NetToText_abort,
1228: NetToText_put_character,
1229: NetToText_put_string,
1230: NetToText_put_block
1231: };
1232:
1233: /* The creation method
1234: */
1235: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
1236: {
1237: HTStream* me = (HTStream*)malloc(sizeof(*me));
1238: if (me == NULL) outofmem(__FILE__, "NetToText");
1239: me->isa = &NetToTextClass;
1240:
1241: me->had_cr = NO;
1242: me->sink = sink;
1243: return me;
1244: }
1.2 timbl 1245:
1246:
Webmaster