Annotation of libwww/Library/src/HTFormat.c, revision 1.43
1.7 secret 1:
1.1 timbl 2: /* Manage different file formats HTFormat.c
3: ** =============================
4: **
5: ** Bugs:
6: ** Not reentrant.
7: **
8: ** Assumes the incoming stream is ASCII, rather than a local file
9: ** format, and so ALWAYS converts from ASCII on non-ASCII machines.
10: ** Therefore, non-ASCII machines can't read local files.
1.2 timbl 11: **
12: */
13:
1.10 timbl 14:
1.2 timbl 15: /* Implements:
1.1 timbl 16: */
1.2 timbl 17: #include "HTFormat.h"
18:
19: PUBLIC float HTMaxSecs = 1e10; /* No effective limit */
20: PUBLIC float HTMaxLength = 1e10; /* No effective limit */
21:
22: #ifdef unix
23: #ifdef NeXT
24: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
25: #else
26: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n"
27: /* Full pathname would be better! */
28: #endif
29: #endif
30:
1.1 timbl 31:
32: #include "HTUtils.h"
33: #include "tcp.h"
34:
35: #include "HTML.h"
1.12 timbl 36: #include "HTMLPDTD.h"
1.1 timbl 37: #include "HText.h"
1.2 timbl 38: #include "HTAlert.h"
39: #include "HTList.h"
40: #include "HTInit.h"
41: /* Streams and structured streams which we use:
42: */
43: #include "HTFWriter.h"
44: #include "HTPlain.h"
45: #include "SGML.h"
46: #include "HTML.h"
47: #include "HTMLGen.h"
1.41 frystyk 48: #include "HTTCP.h"
1.34 luotonen 49: #include "HTGuess.h"
1.42 frystyk 50: #include "HTError.h"
1.34 luotonen 51:
1.2 timbl 52:
53: PUBLIC BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */
54:
1.10 timbl 55: #ifdef ORIGINAL
1.2 timbl 56: struct _HTStream {
57: CONST HTStreamClass* isa;
58: /* ... */
59: };
1.10 timbl 60: #endif
61:
62: /* this version used by the NetToText stream */
63: struct _HTStream {
64: CONST HTStreamClass * isa;
65: BOOL had_cr;
66: HTStream * sink;
67: };
1.2 timbl 68:
69:
1.17 luotonen 70: /*
71: ** Accept-Encoding and Accept-Language
72: */
73: typedef struct _HTAcceptNode {
74: HTAtom * atom;
75: float quality;
76: } HTAcceptNode;
77:
78:
79:
80:
1.2 timbl 81: /* Presentation methods
82: ** --------------------
83: */
84:
1.14 timbl 85: PUBLIC HTList * HTConversions = NULL;
1.2 timbl 86:
1.31 frystyk 87: /* -------------------------------------------------------------------------
88: This function replaces the code in HTRequest_delete() in order to keep
89: the data structure hidden (it is NOT a joke!)
90: Henrik 14/03-94
91: ------------------------------------------------------------------------- */
92: PUBLIC void HTFormatDelete ARGS1(HTList *, me)
93: {
94: HTList *cur = me;
95: HTPresentation *pres;
96: if (!me)
97: return;
98: while ((pres = (HTPresentation*) HTList_nextObject(cur))) {
99: FREE(pres->command); /* Leak fixed AL 6 Feb 1994 */
100: free(pres);
101: }
102: HTList_delete(me); /* Leak fixed AL 6 Feb 1994 */
103: }
104:
1.2 timbl 105:
106: /* Define a presentation system command for a content-type
107: ** -------------------------------------------------------
108: */
1.12 timbl 109: PUBLIC void HTSetPresentation ARGS6(
110: HTList *, conversions,
111: CONST char *, representation,
112: CONST char *, command,
113: float, quality,
114: float, secs,
115: float, secs_per_byte
1.2 timbl 116: ){
117:
118: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
119: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
120:
121: pres->rep = HTAtom_for(representation);
122: pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */
123: pres->converter = HTSaveAndExecute; /* Fixed for now ... */
124: pres->quality = quality;
125: pres->secs = secs;
126: pres->secs_per_byte = secs_per_byte;
127: pres->rep = HTAtom_for(representation);
128: pres->command = 0;
129: StrAllocCopy(pres->command, command);
130:
1.12 timbl 131: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 132:
1.15 luotonen 133: #ifdef OLD_CODE
134: if (strcmp(representation, "*")==0) {
1.2 timbl 135: if (default_presentation) free(default_presentation);
136: default_presentation = pres;
1.12 timbl 137: } else
138: #endif
139: HTList_addObject(conversions, pres);
1.2 timbl 140: }
141:
142:
143: /* Define a built-in function for a content-type
144: ** ---------------------------------------------
145: */
1.12 timbl 146: PUBLIC void HTSetConversion ARGS7(
147: HTList *, conversions,
148: CONST char *, representation_in,
149: CONST char *, representation_out,
1.6 timbl 150: HTConverter*, converter,
1.12 timbl 151: float, quality,
152: float, secs,
153: float, secs_per_byte
1.2 timbl 154: ){
1.1 timbl 155:
1.2 timbl 156: HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
157: if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
158:
159: pres->rep = HTAtom_for(representation_in);
160: pres->rep_out = HTAtom_for(representation_out);
161: pres->converter = converter;
162: pres->command = NULL; /* Fixed */
163: pres->quality = quality;
164: pres->secs = secs;
165: pres->secs_per_byte = secs_per_byte;
166: pres->command = 0;
167:
1.12 timbl 168: /* if (!HTPresentations) HTPresentations = HTList_new(); */
1.2 timbl 169:
1.12 timbl 170: #ifdef OLD_CODE
1.2 timbl 171: if (strcmp(representation_in, "*")==0) {
172: if (default_presentation) free(default_presentation);
173: default_presentation = pres;
1.12 timbl 174: } else
175: #endif
176: HTList_addObject(conversions, pres);
1.2 timbl 177: }
1.1 timbl 178:
179:
180:
1.17 luotonen 181: PUBLIC void HTAcceptEncoding ARGS3(HTList *, list,
182: char *, enc,
183: float, quality)
184: {
185: HTAcceptNode * node;
186: char * cur;
187:
188: if (!list || !enc || !*enc) return;
189:
190: for(cur=enc; *cur; cur++) *cur=TOLOWER(*cur);
191:
192: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
193: if (!node) outofmem(__FILE__, "HTAcceptEncoding");
194: HTList_addObject(list, (void*)node);
195:
196: node->atom = HTAtom_for(enc);
197: node->quality = quality;
198: }
199:
200:
201: PUBLIC void HTAcceptLanguage ARGS3(HTList *, list,
202: char *, lang,
203: float, quality)
204: {
205: HTAcceptNode * node;
206:
207: if (!list || !lang || !*lang) return;
208:
209: node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
210: if (!node) outofmem(__FILE__, "HTAcceptLanguage");
211:
212: HTList_addObject(list, (void*)node);
213: node->atom = HTAtom_for(lang);
214: node->quality = quality;
215: }
216:
217:
218: PRIVATE BOOL wild_match ARGS2(HTAtom *, template,
219: HTAtom *, actual)
220: {
221: char *t, *a, *st, *sa;
222: BOOL match = NO;
223:
1.22 luotonen 224: if (template && actual && (t = HTAtom_name(template))) {
225: if (!strcmp(t, "*"))
226: return YES;
1.17 luotonen 227:
1.22 luotonen 228: if (strchr(t, '*') &&
229: (a = HTAtom_name(actual)) &&
230: (st = strchr(t, '/')) && (sa = strchr(a,'/'))) {
1.17 luotonen 231:
1.22 luotonen 232: *sa = 0;
233: *st = 0;
234:
235: if ((*(st-1)=='*' &&
236: (*(st+1)=='*' || !strcasecomp(st+1, sa+1))) ||
237: (*(st+1)=='*' && !strcasecomp(t,a)))
238: match = YES;
239:
240: *sa = '/';
241: *st = '/';
242: }
243: }
1.23 luotonen 244: return match;
1.17 luotonen 245: }
246:
1.36 luotonen 247: /*
248: * Added by takada@seraph.ntt.jp (94/04/08)
249: */
250: PRIVATE BOOL lang_match ARGS2(HTAtom *, template,
251: HTAtom *, actual)
252: {
253: char *t, *a, *st, *sa;
254: BOOL match = NO;
255:
256: if (template && actual &&
257: (t = HTAtom_name(template)) && (a = HTAtom_name(actual))) {
258: st = strchr(t, '_');
259: sa = strchr(a, '_');
260: if ((st != NULL) && (sa != NULL)) {
261: if (!strcasecomp(t, a))
262: match = YES;
263: else
264: match = NO;
265: }
266: else {
267: if (st != NULL) *st = 0;
268: if (sa != NULL) *sa = 0;
269: if (!strcasecomp(t, a))
270: match = YES;
271: else
272: match = NO;
273: if (st != NULL) *st = '_';
274: if (sa != NULL) *sa = '_';
275: }
276: }
277: return match;
278: }
279: /* end of addition */
280:
281:
1.17 luotonen 282:
283: PRIVATE float type_value ARGS2(HTAtom *, content_type,
284: HTList *, accepted)
285: {
286: HTList * cur = accepted;
287: HTPresentation * pres;
288: HTPresentation * wild = NULL;
289:
290: if (!content_type || !accepted) return -1;
291:
292: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
293: if (pres->rep == content_type)
294: return pres->quality;
295: else if (wild_match(pres->rep, content_type))
296: wild = pres;
297: }
298: if (wild) return wild->quality;
299: else return -1;
300: }
301:
302:
303: PRIVATE float lang_value ARGS2(HTAtom *, language,
304: HTList *, accepted)
305: {
306: HTList * cur = accepted;
307: HTAcceptNode * node;
308: HTAcceptNode * wild = NULL;
309:
310: if (!language || !accepted || HTList_isEmpty(accepted)) {
311: return 0.1;
312: }
313:
314: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
315: if (node->atom == language) {
316: return node->quality;
317: }
1.36 luotonen 318: /*
319: * patch by takada@seraph.ntt.jp (94/04/08)
320: * the original line was
321: * else if (wild_match(node->atom, language)) {
322: * and the new line is
323: */
324: else if (lang_match(node->atom, language)) {
1.17 luotonen 325: wild = node;
326: }
327: }
328:
329: if (wild) {
330: return wild->quality;
331: }
332: else {
333: return 0.1;
334: }
335: }
336:
337:
338: PRIVATE float encoding_value ARGS2(HTAtom *, encoding,
339: HTList *, accepted)
340: {
341: HTList * cur = accepted;
342: HTAcceptNode * node;
343: HTAcceptNode * wild = NULL;
344: char * e;
345:
346: if (!encoding || !accepted || HTList_isEmpty(accepted))
347: return 1;
348:
349: e = HTAtom_name(encoding);
350: if (!strcmp(e, "7bit") || !strcmp(e, "8bit") || !strcmp(e, "binary"))
351: return 1;
352:
353: while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
354: if (node->atom == encoding)
355: return node->quality;
356: else if (wild_match(node->atom, encoding))
357: wild = node;
358: }
359: if (wild) return wild->quality;
360: else return 1;
361: }
362:
363:
364: PUBLIC BOOL HTRank ARGS4(HTList *, possibilities,
365: HTList *, accepted_content_types,
366: HTList *, accepted_languages,
367: HTList *, accepted_encodings)
368: {
369: int accepted_cnt = 0;
370: HTList * accepted;
371: HTList * sorted;
372: HTList * cur;
373: HTContentDescription * d;
374:
375: if (!possibilities) return NO;
376:
377: accepted = HTList_new();
378: cur = possibilities;
379: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
380: float tv = type_value(d->content_type, accepted_content_types);
381: float lv = lang_value(d->content_language, accepted_languages);
382: float ev = encoding_value(d->content_encoding, accepted_encodings);
383:
384: if (tv > 0) {
385: d->quality *= tv * lv * ev;
386: HTList_addObject(accepted, d);
387: accepted_cnt++;
388: }
1.18 luotonen 389: else {
390: if (d->filename) free(d->filename);
391: free(d);
392: }
1.17 luotonen 393: }
394:
1.18 luotonen 395: CTRACE(stderr, "Ranking.....\n");
1.17 luotonen 396: CTRACE(stderr,
1.18 luotonen 397: "\nRANK QUALITY CONTENT-TYPE LANGUAGE ENCODING FILE\n");
1.17 luotonen 398:
399: sorted = HTList_new();
400: while (accepted_cnt-- > 0) {
401: HTContentDescription * worst = NULL;
402: cur = accepted;
403: while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
404: if (!worst || d->quality < worst->quality)
405: worst = d;
406: }
407: if (worst) {
408: CTRACE(stderr, "%d. %.4f %-20.20s %-8.8s %-10.10s %s\n",
409: accepted_cnt+1,
410: worst->quality,
411: (worst->content_type
412: ? HTAtom_name(worst->content_type) : "-"),
413: (worst->content_language
414: ? HTAtom_name(worst->content_language) :"-"),
415: (worst->content_encoding
416: ? HTAtom_name(worst->content_encoding) :"-"),
417: (worst->filename
418: ? worst->filename :"-"));
419: HTList_removeObject(accepted, (void*)worst);
420: HTList_addObject(sorted, (void*)worst);
421: }
422: }
1.18 luotonen 423: CTRACE(stderr, "\n");
1.17 luotonen 424: HTList_delete(accepted);
425: HTList_delete(possibilities->next);
426: possibilities->next = sorted->next;
427: sorted->next = NULL;
428: HTList_delete(sorted);
429:
430: if (!HTList_isEmpty(possibilities)) return YES;
431: else return NO;
432: }
433:
434:
435:
436:
437:
1.13 timbl 438: /* Socket Input Buffering
439: ** ----------------------
1.1 timbl 440: **
1.13 timbl 441: ** This code is used because one cannot in general open a
442: ** file descriptor for a socket.
443: **
1.1 timbl 444: ** The input file is read using the macro which can read from
1.13 timbl 445: ** a socket or a file, but this should not be used for files
446: ** as fopen() etc is more portable of course.
447: **
1.1 timbl 448: ** The input buffer size, if large will give greater efficiency and
449: ** release the server faster, and if small will save space on PCs etc.
450: */
451:
452:
453: /* Set up the buffering
454: **
455: ** These routines are public because they are in fact needed by
456: ** many parsers, and on PCs and Macs we should not duplicate
457: ** the static buffer area.
458: */
1.13 timbl 459: PUBLIC HTInputSocket * HTInputSocket_new ARGS1 (int,file_number)
1.1 timbl 460: {
1.28 frystyk 461: HTInputSocket *isoc = (HTInputSocket *)calloc(1, sizeof(*isoc));
1.13 timbl 462: if (!isoc) outofmem(__FILE__, "HTInputSocket_new");
463: isoc->input_file_number = file_number;
464: isoc->input_pointer = isoc->input_limit = isoc->input_buffer;
465: return isoc;
1.1 timbl 466: }
467:
1.35 frystyk 468: /* This should return HT_INTERRUPTED if interrupted BUT the connection
469: MUST not be closed */
470: PUBLIC int HTInputSocket_getCharacter ARGS1(HTInputSocket*, isoc)
1.1 timbl 471: {
1.35 frystyk 472: int ch;
1.1 timbl 473: do {
1.13 timbl 474: if (isoc-> input_pointer >= isoc->input_limit) {
1.1 timbl 475: int status = NETREAD(
1.13 timbl 476: isoc->input_file_number,
477: isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 478: if (status <= 0) {
1.39 frystyk 479: if (status == 0)
480: return EOF;
481: if (status == HT_INTERRUPTED) {
482: if (TRACE)
483: fprintf(stderr, "Get Char.... Interrupted in HTInputSocket_getCharacter\n");
484: return HT_INTERRUPTED;
485: }
486: HTInetStatus("read");
487: return EOF; /* -1 is returned by UCX at end of HTTP link */
1.1 timbl 488: }
1.35 frystyk 489: isoc->input_pointer = isoc->input_buffer;
1.13 timbl 490: isoc->input_limit = isoc->input_buffer + status;
1.1 timbl 491: }
1.39 frystyk 492: ch = (unsigned char) *isoc->input_pointer++;
493: } while (ch == 13); /* Ignore ASCII carriage return */
1.1 timbl 494:
495: return FROMASCII(ch);
496: }
497:
1.17 luotonen 498: PUBLIC void HTInputSocket_free ARGS1(HTInputSocket *, me)
1.13 timbl 499: {
500: if (me) free(me);
501: }
502:
503:
1.16 luotonen 504: PUBLIC char * HTInputSocket_getBlock ARGS2(HTInputSocket*, isoc,
505: int *, len)
506: {
507: if (isoc->input_pointer >= isoc->input_limit) {
508: int status = NETREAD(isoc->input_file_number,
509: isoc->input_buffer,
510: ((*len < INPUT_BUFFER_SIZE) ?
511: *len : INPUT_BUFFER_SIZE));
512: if (status <= 0) {
513: isoc->input_limit = isoc->input_buffer;
514: if (status < 0)
1.39 frystyk 515: HTInetStatus("read");
1.16 luotonen 516: *len = 0;
517: return NULL;
518: }
519: else {
520: *len = status;
521: return isoc->input_buffer;
522: }
523: }
524: else {
525: char * ret = isoc->input_pointer;
526: *len = isoc->input_limit - isoc->input_pointer;
527: isoc->input_pointer = isoc->input_limit;
528: return ret;
529: }
530: }
531:
532:
1.15 luotonen 533: PRIVATE int fill_in_buffer ARGS1(HTInputSocket *, isoc)
534: {
535: if (isoc) {
536: int status;
537:
538: isoc->input_pointer = isoc->input_buffer;
539: status = NETREAD(isoc->input_file_number,
540: isoc->input_buffer,
541: INPUT_BUFFER_SIZE);
542: if (status <= 0) {
543: isoc->input_limit = isoc->input_buffer;
544: if (status < 0)
1.39 frystyk 545: HTInetStatus("read");
1.15 luotonen 546: }
547: else
548: isoc->input_limit = isoc->input_buffer + status;
549: return status;
550: }
551: return -1;
552: }
553:
554:
555: PRIVATE void ascii_cat ARGS3(char **, linep,
556: char *, start,
557: char *, end)
558: {
559: if (linep && start && end && start <= end) {
560: char *ptr;
561:
562: if (*linep) {
563: int len = strlen(*linep);
564: *linep = (char*)realloc(*linep, len + end-start + 1);
565: ptr = *linep + len;
566: }
567: else {
568: ptr = *linep = (char*)malloc(end-start + 1);
569: }
570:
571: while (start < end) {
572: *ptr = FROMASCII(*start);
573: ptr++;
574: start++;
575: }
576: *ptr = 0;
577: }
578: }
579:
580:
581: PRIVATE char * get_some_line ARGS2(HTInputSocket *, isoc,
582: BOOL, unfold)
583: {
584: if (!isoc)
585: return NULL;
586: else {
587: BOOL check_unfold = NO;
588: int prev_cr = 0;
589: char *start = isoc->input_pointer;
590: char *cur = isoc->input_pointer;
591: char * line = NULL;
592:
593: for(;;) {
594: /*
595: ** Get more if needed to complete line
596: */
597: if (cur >= isoc->input_limit) { /* Need more data */
598: ascii_cat(&line, start, cur);
599: if (fill_in_buffer(isoc) <= 0)
600: return line;
601: start = cur = isoc->input_pointer;
602: } /* if need more data */
603:
604: /*
605: ** Find a line feed if there is one
606: */
607: for(; cur < isoc->input_limit; cur++) {
608: char c = FROMASCII(*cur);
609: if (!c) {
1.18 luotonen 610: if (line) free(line); /* Leak fixed AL 6 Feb 94 */
1.15 luotonen 611: return NULL; /* Panic! read a 0! */
612: }
613: if (check_unfold && c != ' ' && c != '\t') {
614: return line; /* Note: didn't update isoc->input_pointer */
615: }
616: else {
617: check_unfold = NO;
618: }
619:
620: if (c=='\r') {
621: prev_cr = 1;
622: }
623: else {
624: if (c=='\n') { /* Found a line feed */
625: ascii_cat(&line, start, cur-prev_cr);
626: start = isoc->input_pointer = cur+1;
627:
628: if (line && strlen(line) > 0 && unfold) {
629: check_unfold = YES;
630: }
631: else {
632: return line;
633: }
634: } /* if NL */
635: /* else just a regular character */
636: prev_cr = 0;
637: } /* if not CR */
638: } /* while characters in buffer remain */
639: } /* until line read or end-of-file */
640: } /* valid parameters to function */
641: }
642:
1.43 ! frystyk 643: /* The returned string must be freed by the caller */
1.15 luotonen 644: PUBLIC char * HTInputSocket_getLine ARGS1(HTInputSocket *, isoc)
645: {
646: return get_some_line(isoc, NO);
647: }
648:
1.43 ! frystyk 649: /* The returned string must be freed by the caller */
1.15 luotonen 650: PUBLIC char * HTInputSocket_getUnfoldedLine ARGS1(HTInputSocket *, isoc)
651: {
652: return get_some_line(isoc, YES);
653: }
654:
655:
656: /*
657: ** Read HTTP status line (if there is one).
658: **
659: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
660: ** First look at the stub in ASCII and check if it starts "HTTP/".
661: **
662: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
663: ** will be taken as a HTTP 1.0 server. Failure.
664: */
665: #define STUB_LENGTH 20
666: PUBLIC char * HTInputSocket_getStatusLine ARGS1(HTInputSocket *, isoc)
667: {
668: if (!isoc) {
669: return NULL;
670: }
671: else {
672: char buf[STUB_LENGTH + 1];
673: int i;
674: char server_version[STUB_LENGTH+1];
675: int server_status;
676:
677: /*
678: ** Read initial buffer
679: */
680: if (isoc->input_pointer >= isoc->input_limit &&
681: fill_in_buffer(isoc) <= 0) {
682: return NULL;
683: }
684:
685: for (i=0; i < STUB_LENGTH; i++)
686: buf[i] = FROMASCII(isoc->input_buffer[i]);
687: buf[STUB_LENGTH] = 0;
688:
689: if (0 != strncmp(buf, "HTTP/", 5) ||
690: sscanf(buf, "%20s%d", server_version, &server_status) < 2)
691: return NULL;
692: else
693: return get_some_line(isoc, NO);
694: }
695: }
696:
697:
698: /*
699: ** Do heuristic test to see if this is binary.
700: **
701: ** We check for characters above 128 in the first few bytes, and
702: ** if we find them we forget the html default.
703: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
704: **
705: ** Bugs: An HTTP 0.9 server returning a binary document with
706: ** characters < 128 will be read as ASCII.
707: */
708: PUBLIC BOOL HTInputSocket_seemsBinary ARGS1(HTInputSocket *, isoc)
709: {
710: if (isoc &&
711: (isoc->input_pointer < isoc->input_limit ||
712: fill_in_buffer(isoc) > 0)) {
713: char *p = isoc->input_buffer;
714: int i = STUB_LENGTH;
715:
716: for( ; i && p < isoc->input_limit; p++, i++)
717: if (((int)*p)&128)
718: return YES;
719: }
720: return NO;
721: }
722:
723:
724:
1.1 timbl 725: /* Stream the data to an ouput file as binary
726: */
1.38 luotonen 727: PUBLIC int HTOutputBinary ARGS3(HTInputSocket *,isoc,
1.13 timbl 728: int, input,
729: FILE *, output)
1.1 timbl 730: {
731: do {
732: int status = NETREAD(
1.13 timbl 733: input, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1 timbl 734: if (status <= 0) {
735: if (status == 0) return 0;
736: if (TRACE) fprintf(stderr,
1.39 frystyk 737: "Out Binary.. Socket read error %d\n", status);
1.1 timbl 738: return 2; /* Error */
739: }
1.13 timbl 740: fwrite(isoc->input_buffer, sizeof(char), status, output);
1.1 timbl 741: } while (YES);
742: }
743:
1.38 luotonen 744:
745: /*
746: * Normal HTTP headers are never bigger than 2K.
747: */
748: #define S_BUFFER_SIZE 2000
749:
750: PUBLIC void HTInputSocket_startBuffering ARGS1(HTInputSocket *, isoc)
751: {
752: if (isoc) {
753: isoc->s_do_buffering = YES;
754: if (!isoc->s_buffer) {
755: isoc->s_buffer = (char*)malloc(S_BUFFER_SIZE + 1);
756: isoc->s_buffer_size = S_BUFFER_SIZE;
757: }
758: isoc->s_buffer_cur = isoc->s_buffer;
759: }
760: }
761:
762: PUBLIC void HTInputSocket_stopBuffering ARGS1(HTInputSocket *, isoc)
763: {
764: if (isoc) {
765: isoc->s_do_buffering = NO;
766: if (isoc->s_buffer_cur)
767: *isoc->s_buffer_cur = 0;
768: }
769: }
770:
771: PUBLIC int HTInputSocket_getBuffer ARGS2(HTInputSocket *, isoc,
772: char **, buffer_ptr)
773: {
774: if (!isoc || !isoc->s_buffer || !isoc->s_buffer_cur)
775: return 0;
776: else {
777: *isoc->s_buffer_cur = 0;
778: if (buffer_ptr)
779: *buffer_ptr = isoc->s_buffer;
780: return (int) (isoc->s_buffer_cur - isoc->s_buffer);
781: }
782: }
1.1 timbl 783:
1.33 luotonen 784: PRIVATE BOOL better_match ARGS2(HTFormat, f,
785: HTFormat, g)
786: {
787: CONST char *p, *q;
788:
789: if (f && g && (p = HTAtom_name(f)) && (q = HTAtom_name(g))) {
790: int i,j;
791: for(i=0 ; *p; p++) if (*p == '*') i++;
792: for(j=0 ; *q; q++) if (*q == '*') j++;
793: if (i < j) return YES;
794: }
795: return NO;
796: }
797:
1.17 luotonen 798:
1.2 timbl 799: /* Create a filter stack
800: ** ---------------------
801: **
1.7 secret 802: ** If a wildcard match is made, a temporary HTPresentation
1.2 timbl 803: ** structure is made to hold the destination format while the
804: ** new stack is generated. This is just to pass the out format to
805: ** MIME so far. Storing the format of a stream in the stream might
806: ** be a lot neater.
1.10 timbl 807: **
1.29 frystyk 808: ** The star/star format is special, in that if you can take
1.40 frystyk 809: ** that you can take anything.
810: **
811: ** On succes, request->error_block is set to YES so no more error
812: ** messages to the stream as the stream might be of any format.
1.2 timbl 813: */
1.34 luotonen 814: PUBLIC HTStream * HTStreamStack ARGS3(HTFormat, rep_in,
815: HTRequest *, request,
816: BOOL, guess)
1.2 timbl 817: {
1.12 timbl 818: HTFormat rep_out = request->output_format; /* Could be a param */
1.14 timbl 819: HTList * conversion[2];
820: int which_list;
1.25 frystyk 821: float best_quality = -1e30; /* Pretty bad! */
1.29 frystyk 822: HTPresentation *pres, *match, *best_match=0;
1.14 timbl 823:
1.2 timbl 824: if (TRACE) fprintf(stderr,
1.39 frystyk 825: "StreamStack. Constructing stream stack for %s to %s\n",
1.10 timbl 826: HTAtom_name(rep_in),
1.2 timbl 827: HTAtom_name(rep_out));
1.34 luotonen 828:
829: if (guess && rep_in == WWW_UNKNOWN) {
830: CTRACE(stderr, "Returning... guessing stream\n");
1.40 frystyk 831: request->error_block = YES; /* No more error output to stream */
1.34 luotonen 832: return HTGuess_new(request);
833: }
834:
1.21 luotonen 835: if (rep_out == WWW_SOURCE || rep_out == rep_in)
836: return request->output_stream;
1.2 timbl 837:
1.14 timbl 838: conversion[0] = request->conversions;
839: conversion[1] = HTConversions;
1.17 luotonen 840:
1.15 luotonen 841: for(which_list = 0; which_list<2; which_list++) {
842: HTList * cur = conversion[which_list];
843:
844: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
1.25 frystyk 845: if ((pres->rep == rep_in || wild_match(pres->rep, rep_in)) &&
1.33 luotonen 846: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
847: if (!best_match ||
848: better_match(pres->rep, best_match->rep) ||
849: (!better_match(best_match->rep, pres->rep) &&
850: pres->quality > best_quality)) {
1.25 frystyk 851: best_match = pres;
852: best_quality = pres->quality;
1.10 timbl 853: }
854: }
1.2 timbl 855: }
856: }
1.33 luotonen 857:
1.29 frystyk 858: match = best_match ? best_match : NULL;
859: if (match) {
1.40 frystyk 860: request->error_block = YES; /* No more error output to stream */
1.29 frystyk 861: if (match->rep == WWW_SOURCE) {
1.39 frystyk 862: if (TRACE) fprintf(stderr, "StreamStack. Don't know how to handle this, so put out %s to %s\n",
1.29 frystyk 863: HTAtom_name(match->rep),
864: HTAtom_name(rep_out));
865: }
866: return (*match->converter)(
1.25 frystyk 867: request, match->command, rep_in, rep_out,
868: request->output_stream);
1.29 frystyk 869: }
1.42 frystyk 870: {
871: char *msg = NULL;
872: StrAllocCopy(msg, "Can't convert from ");
873: StrAllocCat(msg, HTAtom_name(rep_in));
874: StrAllocCat(msg, " to ");
875: StrAllocCat(msg, HTAtom_name(rep_out));
876: HTErrorAdd(request, ERR_FATAL, NO, HTERR_NOT_IMPLEMENTED,
877: (void *) msg, (int) strlen(msg), "HTStreamStack");
878: free(msg);
879: }
1.2 timbl 880: return NULL;
881: }
882:
883:
884: /* Find the cost of a filter stack
885: ** -------------------------------
886: **
887: ** Must return the cost of the same stack which StreamStack would set up.
888: **
889: ** On entry,
890: ** length The size of the data to be converted
891: */
1.12 timbl 892: PUBLIC float HTStackValue ARGS5(
1.14 timbl 893: HTList *, theseConversions,
1.10 timbl 894: HTFormat, rep_in,
1.2 timbl 895: HTFormat, rep_out,
896: float, initial_value,
897: long int, length)
898: {
1.14 timbl 899: int which_list;
900: HTList* conversion[2];
901:
1.2 timbl 902: if (TRACE) fprintf(stderr,
1.39 frystyk 903: "StackValue.. Evaluating stream stack for %s worth %.3f to %s\n",
1.10 timbl 904: HTAtom_name(rep_in), initial_value,
1.2 timbl 905: HTAtom_name(rep_out));
906:
907: if (rep_out == WWW_SOURCE ||
1.10 timbl 908: rep_out == rep_in) return 0.0;
1.2 timbl 909:
1.12 timbl 910: /* if (!HTPresentations) HTFormatInit(); set up the list */
1.2 timbl 911:
1.14 timbl 912: conversion[0] = theseConversions;
913: conversion[1] = HTConversions;
914:
915: for(which_list = 0; which_list<2; which_list++)
916: if (conversion[which_list]) {
1.15 luotonen 917: HTList * cur = conversion[which_list];
1.2 timbl 918: HTPresentation * pres;
1.15 luotonen 919: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
920: if (pres->rep == rep_in &&
1.17 luotonen 921: (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
1.2 timbl 922: float value = initial_value * pres->quality;
923: if (HTMaxSecs != 0.0)
1.15 luotonen 924: value = value - (length*pres->secs_per_byte + pres->secs)
1.2 timbl 925: /HTMaxSecs;
926: return value;
927: }
928: }
929: }
930:
931: return -1e30; /* Really bad */
1.17 luotonen 932: }
933:
934:
1.2 timbl 935:
1.1 timbl 936:
1.2 timbl 937: /* Push data from a socket down a stream
938: ** -------------------------------------
1.1 timbl 939: **
1.2 timbl 940: ** This routine is responsible for creating and PRESENTING any
1.1 timbl 941: ** graphic (or other) objects described by the file.
1.2 timbl 942: **
943: ** The file number given is assumed to be a TELNET stream ie containing
944: ** CRLF at the end of lines which need to be stripped to LF for unix
945: ** when the format is textual.
946: **
1.26 luotonen 947: ** RETURNS the number of bytes transferred.
948: **
1.1 timbl 949: */
1.26 luotonen 950: PUBLIC int HTCopy ARGS2(
1.2 timbl 951: int, file_number,
952: HTStream*, sink)
1.1 timbl 953: {
1.2 timbl 954: HTStreamClass targetClass;
1.13 timbl 955: HTInputSocket * isoc;
1.26 luotonen 956: int cnt = 0;
957:
1.5 timbl 958: /* Push the data down the stream
1.2 timbl 959: **
960: */
961: targetClass = *(sink->isa); /* Copy pointers to procedures */
1.13 timbl 962: isoc = HTInputSocket_new(file_number);
1.2 timbl 963:
964: /* Push binary from socket down sink
1.10 timbl 965: **
966: ** This operation could be put into a main event loop
1.2 timbl 967: */
968: for(;;) {
969: int status = NETREAD(
1.13 timbl 970: file_number, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.2 timbl 971: if (status <= 0) {
972: if (status == 0) break;
973: if (TRACE) fprintf(stderr,
1.39 frystyk 974: "Socket Copy. Read error, read returns %d with errno=%d\n",
1.24 luotonen 975: status, errno);
1.2 timbl 976: break;
977: }
1.26 luotonen 978:
1.8 timbl 979: #ifdef NOT_ASCII
980: {
981: char * p;
1.13 timbl 982: for(p = isoc->input_buffer; p < isoc->input_buffer+status; p++) {
1.8 timbl 983: *p = FROMASCII(*p);
984: }
985: }
986: #endif
987:
1.13 timbl 988: (*targetClass.put_block)(sink, isoc->input_buffer, status);
1.26 luotonen 989: cnt += status;
1.2 timbl 990: } /* next bufferload */
1.26 luotonen 991:
1.13 timbl 992: HTInputSocket_free(isoc);
1.26 luotonen 993:
994: return cnt;
1.2 timbl 995: }
996:
1.1 timbl 997:
1.7 secret 998:
999: /* Push data from a file pointer down a stream
1000: ** -------------------------------------
1001: **
1002: ** This routine is responsible for creating and PRESENTING any
1003: ** graphic (or other) objects described by the file.
1004: **
1005: **
1006: */
1007: PUBLIC void HTFileCopy ARGS2(
1008: FILE *, fp,
1009: HTStream*, sink)
1010: {
1011: HTStreamClass targetClass;
1.13 timbl 1012: char input_buffer[INPUT_BUFFER_SIZE];
1.7 secret 1013:
1014: /* Push the data down the stream
1015: **
1016: */
1017: targetClass = *(sink->isa); /* Copy pointers to procedures */
1018:
1019: /* Push binary from socket down sink
1020: */
1021: for(;;) {
1022: int status = fread(
1023: input_buffer, 1, INPUT_BUFFER_SIZE, fp);
1024: if (status == 0) { /* EOF or error */
1025: if (ferror(fp) == 0) break;
1026: if (TRACE) fprintf(stderr,
1.39 frystyk 1027: "File Copy... Read error, read returns %d\n", ferror(fp));
1.7 secret 1028: break;
1029: }
1030: (*targetClass.put_block)(sink, input_buffer, status);
1.13 timbl 1031: } /* next bufferload */
1.7 secret 1032: }
1033:
1034:
1035:
1036:
1.2 timbl 1037: /* Push data from a socket down a stream STRIPPING CR
1038: ** --------------------------------------------------
1039: **
1040: ** This routine is responsible for creating and PRESENTING any
1.8 timbl 1041: ** graphic (or other) objects described by the socket.
1.2 timbl 1042: **
1043: ** The file number given is assumed to be a TELNET stream ie containing
1044: ** CRLF at the end of lines which need to be stripped to LF for unix
1045: ** when the format is textual.
1.37 frystyk 1046: **
1047: ** Character handling is now of type int, Henrik, May 09-94
1.1 timbl 1048: */
1.2 timbl 1049: PUBLIC void HTCopyNoCR ARGS2(
1050: int, file_number,
1051: HTStream*, sink)
1052: {
1.13 timbl 1053: HTStreamClass targetClass;
1054: HTInputSocket * isoc;
1.37 frystyk 1055: int ch;
1.1 timbl 1056:
1.2 timbl 1057: /* Push the data, ignoring CRLF, down the stream
1058: **
1059: */
1060: targetClass = *(sink->isa); /* Copy pointers to procedures */
1061:
1062: /* Push text from telnet socket down sink
1063: **
1064: ** @@@@@ To push strings could be faster? (especially is we
1065: ** cheat and don't ignore CR! :-}
1066: */
1.13 timbl 1067: isoc = HTInputSocket_new(file_number);
1.37 frystyk 1068: while ((ch = HTInputSocket_getCharacter(isoc)) >= 0)
1069: (*targetClass.put_character)(sink, ch);
1.13 timbl 1070: HTInputSocket_free(isoc);
1.2 timbl 1071: }
1.1 timbl 1072:
1.2 timbl 1073:
1.7 secret 1074:
1.2 timbl 1075: /* Parse a socket given format and file number
1076: **
1077: ** This routine is responsible for creating and PRESENTING any
1078: ** graphic (or other) objects described by the file.
1079: **
1080: ** The file number given is assumed to be a TELNET stream ie containing
1081: ** CRLF at the end of lines which need to be stripped to LF for unix
1082: ** when the format is textual.
1083: **
1.42 frystyk 1084: ** Returns <0 on error, HT_LOADED on success.
1.2 timbl 1085: */
1.14 timbl 1086:
1.12 timbl 1087: PUBLIC int HTParseSocket ARGS3(
1.10 timbl 1088: HTFormat, rep_in,
1.2 timbl 1089: int, file_number,
1.12 timbl 1090: HTRequest *, request)
1.2 timbl 1091: {
1092: HTStream * stream;
1093: HTStreamClass targetClass;
1.1 timbl 1094:
1.40 frystyk 1095: if (request->error_stack) {
1096: if (TRACE) fprintf(stderr, "ParseSocket. Called whith non-empty error stack, so I return right away!\n");
1097: return -1;
1098: }
1099:
1.42 frystyk 1100: /* Set up stream stack */
1101: if ((stream = HTStreamStack(rep_in, request, YES)) == NULL)
1102: return -1;
1.1 timbl 1103:
1.3 timbl 1104: /* Push the data, ignoring CRLF if necessary, down the stream
1105: **
1.2 timbl 1106: **
1.3 timbl 1107: ** @@ Bug: This decision ought to be made based on "encoding"
1.9 timbl 1108: ** rather than on format. @@@ When we handle encoding.
1.3 timbl 1109: ** The current method smells anyway.
1.2 timbl 1110: */
1111: targetClass = *(stream->isa); /* Copy pointers to procedures */
1.32 luotonen 1112: if (rep_in == WWW_BINARY || rep_in == WWW_UNKNOWN || HTOutputSource
1.26 luotonen 1113: || (request->content_encoding &&
1114: request->content_encoding != HTAtom_for("8bit") &&
1115: request->content_encoding != HTAtom_for("7bit"))
1.10 timbl 1116: || strstr(HTAtom_name(rep_in), "image/")
1117: || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.29 frystyk 1118: HTCopy(file_number, stream);
1.2 timbl 1119: } else { /* ascii text with CRLFs :-( */
1120: HTCopyNoCR(file_number, stream);
1121: }
1.7 secret 1122: (*targetClass.free)(stream);
1123:
1124: return HT_LOADED;
1125: }
1126:
1127:
1128:
1129: /* Parse a file given format and file pointer
1130: **
1131: ** This routine is responsible for creating and PRESENTING any
1132: ** graphic (or other) objects described by the file.
1133: **
1134: ** The file number given is assumed to be a TELNET stream ie containing
1.10 timbl 1135: ** CRLF at the end of lines which need to be stripped to \n for unix
1.7 secret 1136: ** when the format is textual.
1137: **
1138: */
1.12 timbl 1139: PUBLIC int HTParseFile ARGS3(
1.10 timbl 1140: HTFormat, rep_in,
1.7 secret 1141: FILE *, fp,
1.12 timbl 1142: HTRequest *, request)
1.7 secret 1143: {
1144: HTStream * stream;
1145: HTStreamClass targetClass;
1.40 frystyk 1146:
1147: if (request->error_stack) {
1148: if (TRACE) fprintf(stderr, "ParseFile... Called whith non-empty error stack, so I return right away!\n");
1149: return -1;
1150: }
1.7 secret 1151:
1.42 frystyk 1152: /* Set up stream stack */
1153: if ((stream = HTStreamStack(rep_in, request, YES)) == NULL)
1154: return -1;
1.7 secret 1155:
1.9 timbl 1156: /* Push the data down the stream
1.7 secret 1157: **
1158: **
1159: ** @@ Bug: This decision ought to be made based on "encoding"
1.10 timbl 1160: ** rather than on content-type. @@@ When we handle encoding.
1.7 secret 1161: ** The current method smells anyway.
1162: */
1163: targetClass = *(stream->isa); /* Copy pointers to procedures */
1164: HTFileCopy(fp, stream);
1.2 timbl 1165: (*targetClass.free)(stream);
1.1 timbl 1166:
1.2 timbl 1167: return HT_LOADED;
1.1 timbl 1168: }
1.2 timbl 1169:
1.10 timbl 1170:
1171: /* Converter stream: Network Telnet to internal character text
1172: ** -----------------------------------------------------------
1173: **
1174: ** The input is assumed to be in ASCII, with lines delimited
1175: ** by (13,10) pairs, These pairs are converted into (CR,LF)
1176: ** pairs in the local representation. The (CR,LF) sequence
1177: ** when found is changed to a '\n' character, the internal
1178: ** C representation of a new line.
1179: */
1180:
1181:
1.11 timbl 1182: PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char)
1.10 timbl 1183: {
1184: char c = FROMASCII(net_char);
1185: if (me->had_cr) {
1186: if (c==LF) {
1187: me->sink->isa->put_character(me->sink, '\n'); /* Newline */
1188: me->had_cr = NO;
1189: return;
1190: } else {
1191: me->sink->isa->put_character(me->sink, CR); /* leftover */
1192: }
1193: }
1194: me->had_cr = (c==CR);
1195: if (!me->had_cr)
1196: me->sink->isa->put_character(me->sink, c); /* normal */
1197: }
1198:
1.11 timbl 1199: PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s)
1.10 timbl 1200: {
1201: CONST char * p;
1202: for(p=s; *p; p++) NetToText_put_character(me, *p);
1203: }
1204:
1.11 timbl 1205: PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l)
1.10 timbl 1206: {
1207: CONST char * p;
1208: for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
1209: }
1210:
1211: PRIVATE void NetToText_free ARGS1(HTStream *, me)
1212: {
1213: me->sink->isa->free(me->sink); /* Close rest of pipe */
1214: free(me);
1215: }
1216:
1217: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
1218: {
1219: me->sink->isa->abort(me->sink,e); /* Abort rest of pipe */
1220: free(me);
1221: }
1222:
1223: /* The class structure
1224: */
1225: PRIVATE HTStreamClass NetToTextClass = {
1226: "NetToText",
1227: NetToText_free,
1228: NetToText_abort,
1229: NetToText_put_character,
1230: NetToText_put_string,
1231: NetToText_put_block
1232: };
1233:
1234: /* The creation method
1235: */
1236: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
1237: {
1238: HTStream* me = (HTStream*)malloc(sizeof(*me));
1239: if (me == NULL) outofmem(__FILE__, "NetToText");
1240: me->isa = &NetToTextClass;
1241:
1242: me->had_cr = NO;
1243: me->sink = sink;
1244: return me;
1245: }
1.2 timbl 1246:
1247:
Webmaster