Annotation of libwww/Library/src/HTGopher.c, revision 2.8
1.1 timbl 1: /* GOPHER ACCESS HTGopher.c
2: ** =============
3: **
4: ** History:
5: ** 26 Sep 90 Adapted from other accesses (News, HTTP) TBL
6: ** 29 Nov 91 Downgraded to C, for portable implementation.
7: */
8:
1.2 timbl 9: /* Implements:
10: */
11: #include "HTGopher.h"
12:
1.3 timbl 13:
1.1 timbl 14: #define GOPHER_PORT 70 /* See protocol spec */
15: #define BIG 1024 /* Bug */
16: #define LINE_LENGTH 256 /* Bug */
17:
18: /* Gopher entity types:
19: */
20: #define GOPHER_TEXT '0'
21: #define GOPHER_MENU '1'
22: #define GOPHER_CSO '2'
23: #define GOPHER_ERROR '3'
24: #define GOPHER_MACBINHEX '4'
25: #define GOPHER_PCBINHEX '5'
26: #define GOPHER_UUENCODED '6'
27: #define GOPHER_INDEX '7'
28: #define GOPHER_TELNET '8'
2.7 secret 29: #define GOPHER_BINARY '9'
1.3 timbl 30: #define GOPHER_GIF 'g'
2.7 secret 31: #define GOPHER_HTML 'h' /* HTML */
32: #define GOPHER_SOUND 's'
33: #define GOPHER_WWW 'w' /* W3 address */
1.3 timbl 34: #define GOPHER_IMAGE 'I'
2.7 secret 35: #define GOPHER_TN3270 'T'
1.1 timbl 36: #define GOPHER_DUPLICATE '+'
37:
38: #include <ctype.h>
39: #include "HTUtils.h" /* Coding convention macros */
40: #include "tcp.h"
41:
42:
43: #include "HTParse.h"
44: #include "HTFormat.h"
45: #include "HTTCP.h"
46:
1.2 timbl 47: /* Hypertext object building machinery
48: */
49: #include "HTML.h"
50:
51: #define PUTC(c) (*targetClass.put_character)(target, c)
52: #define PUTS(s) (*targetClass.put_string)(target, s)
53: #define START(e) (*targetClass.start_element)(target, e, 0, 0)
54: #define END(e) (*targetClass.end_element)(target, e)
55: #define FREE_TARGET (*targetClass.free)(target)
56: struct _HTStructured {
57: CONST HTStructuredClass * isa;
58: /* ... */
59: };
60:
61: PRIVATE HTStructured *target; /* the new hypertext */
62: PRIVATE HTStructuredClass targetClass; /* Its action routines */
63:
64:
2.8 ! timbl 65: #define GOPHER_PROGRESS(foo) HTAlert(foo)
1.1 timbl 66:
67:
2.8 ! timbl 68: #define NEXT_CHAR HTGetChararcter()
1.1 timbl 69:
70:
2.8 ! timbl 71:
1.1 timbl 72: /* Module-wide variables
73: */
74: PRIVATE int s; /* Socket for GopherHost */
75:
76:
1.2 timbl 77:
1.1 timbl 78: /* Matrix of allowed characters in filenames
79: ** -----------------------------------------
80: */
81:
82: PRIVATE BOOL acceptable[256];
83: PRIVATE BOOL acceptable_inited = NO;
84:
85: PRIVATE void init_acceptable NOARGS
86: {
87: unsigned int i;
88: char * good =
89: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$";
90: for(i=0; i<256; i++) acceptable[i] = NO;
91: for(;*good; good++) acceptable[(unsigned int)*good] = YES;
92: acceptable_inited = YES;
93: }
94:
95: PRIVATE CONST char hex[17] = "0123456789abcdef";
96:
97: /* Decdoe one hex character
98: */
99:
100: PRIVATE char from_hex ARGS1(char, c)
101: {
102: return (c>='0')&&(c<='9') ? c-'0'
103: : (c>='A')&&(c<='F') ? c-'A'+10
104: : (c>='a')&&(c<='f') ? c-'a'+10
105: : 0;
106: }
107:
108:
109:
110: /* Paste in an Anchor
111: ** ------------------
112: **
113: ** The title of the destination is set, as there is no way
114: ** of knowing what the title is when we arrive.
115: **
116: ** On entry,
117: ** HT is in append mode.
118: ** text points to the text to be put into the file, 0 terminated.
119: ** addr points to the hypertext refernce address 0 terminated.
120: */
121: PRIVATE void write_anchor ARGS2(CONST char *,text, CONST char *,addr)
122: {
1.2 timbl 123:
124:
125:
126: BOOL present[HTML_A_ATTRIBUTES];
127: CONST char * value[HTML_A_ATTRIBUTES];
1.1 timbl 128:
1.2 timbl 129: int i;
130:
131: for (i=0; i<HTML_A_ATTRIBUTES; i++) present[i]=0;
132: present[HTML_A_HREF] = YES;
133: value[HTML_A_HREF] = addr;
134: present[HTML_A_TITLE] = YES;
135: value[HTML_A_TITLE] = text;
136:
137: (*targetClass.start_element)(target, HTML_A, present, value);
1.1 timbl 138:
1.2 timbl 139: PUTS(text);
140: END(HTML_A);
1.1 timbl 141: }
142:
143:
144: /* Parse a Gopher Menu document
145: ** ============================
146: **
147: */
148:
149: PRIVATE void parse_menu ARGS2 (
1.2 timbl 150: CONST char *, arg,
151: HTParentAnchor *, anAnchor)
1.1 timbl 152: {
153: char gtype;
154: char ch;
155: char line[BIG];
156: char address[BIG];
157: char *name, *selector; /* Gopher menu fields */
158: char *host;
159: char *port;
160: char *p = line;
1.2 timbl 161: CONST char *title;
1.1 timbl 162:
163: #define TAB '\t'
164: #define HEX_ESCAPE '%'
165:
166:
1.2 timbl 167: title = HTAnchor_title(anAnchor);
168: if (title) {
169: START(HTML_H1);
170: PUTS(title);
171: END(HTML_H1);
172: } else
173: PUTS("Select one of:\n\n");
1.1 timbl 174:
1.2 timbl 175: START(HTML_MENU);
1.1 timbl 176: while ((ch=NEXT_CHAR) != (char)EOF) {
1.3 timbl 177: if (ch != LF) {
1.1 timbl 178: *p = ch; /* Put character in line */
179: if (p< &line[BIG-1]) p++;
180:
181: } else {
182: *p++ = 0; /* Terminate line */
183: p = line; /* Scan it to parse it */
184: port = 0; /* Flag "not parsed" */
185: if (TRACE) fprintf(stderr, "HTGopher: Menu item: %s\n", line);
186: gtype = *p++;
187:
188: /* Break on line with a dot by itself */
189: if ((gtype=='.') && ((*p=='\r') || (*p==0))) break;
190:
191: if (gtype && *p) {
192: name = p;
193: selector = strchr(name, TAB);
1.3 timbl 194: START(HTML_LI);
1.1 timbl 195: if (selector) {
196: *selector++ = 0; /* Terminate name */
197: host = strchr(selector, TAB);
198: if (host) {
199: *host++ = 0; /* Terminate selector */
200: port = strchr(host, TAB);
201: if (port) {
202: char *junk;
203: port[0] = ':'; /* delimit host a la W3 */
204: junk = strchr(port, TAB);
205: if (junk) *junk++ = 0; /* Chop port */
206: if ((port[1]=='0') && (!port[2]))
207: port[0] = 0; /* 0 means none */
208: } /* no port */
209: } /* host ok */
210: } /* selector ok */
211: } /* gtype and name ok */
212:
213: if (gtype == GOPHER_WWW) { /* Gopher pointer to W3 */
214: write_anchor(name, selector);
2.7 secret 215:
1.1 timbl 216: } else if (port) { /* Other types need port */
217: if (gtype == GOPHER_TELNET) {
218: if (*selector) sprintf(address, "telnet://%s@%s/",
2.7 secret 219: selector, host);
1.1 timbl 220: else sprintf(address, "telnet://%s/", host);
2.7 secret 221: }
222: else if (gtype == GOPHER_TN3270)
223: {
224: if (*selector)
225: sprintf(address, "tn3270://%s@%s/",
226: selector, host);
227: else
228: sprintf(address, "tn3270://%s/", host);
229: }
230: else { /* If parsed ok */
1.1 timbl 231: char *q;
232: char *p;
233: sprintf(address, "//%s/%c", host, gtype);
234: q = address+ strlen(address);
235: for(p=selector; *p; p++) { /* Encode selector string */
236: if (acceptable[*p]) *q++ = *p;
237: else {
238: *q++ = HEX_ESCAPE; /* Means hex coming */
239: *q++ = hex[(TOASCII(*p)) >> 4];
240: *q++ = hex[(TOASCII(*p)) & 15];
241: }
242: }
243: *q++ = 0; /* terminate address */
244: }
1.2 timbl 245: PUTS(" "); /* Prettier JW/TBL */
2.7 secret 246: /* Error response from Gopher doesn't deserve to
247: be a hyperlink. */
248: if (strcmp (address, "gopher://error.host:1/0"))
249: write_anchor(name, address);
250: else
251: PUTS(name);
252: PUTS("\n");
1.1 timbl 253: } else { /* parse error */
254: if (TRACE) fprintf(stderr,
255: "HTGopher: Bad menu item.\n");
1.2 timbl 256: PUTS(line);
257:
1.1 timbl 258: } /* parse error */
259:
260: p = line; /* Start again at beginning of line */
261:
262: } /* if end of line */
263:
264: } /* Loop over characters */
265:
1.2 timbl 266: END(HTML_MENU);
267: FREE_TARGET;
268:
1.1 timbl 269: return;
270: }
2.7 secret 271: /* Parse a Gopher CSO document
272: ** ============================
273: **
274: ** Accepts an open socket to a CSO server waiting to send us
275: ** data and puts it on the screen in a reasonable manner.
276: **
277: ** Perhaps this data can be automatically linked to some
278: ** other source as well???
279: **
280: ** Taken from hacking by Lou Montulli@ukanaix.cc.ukans.edu
281: ** on XMosaic-1.1, and put on libwww 2.11 by Arthur Secret,
282: ** secret@dxcern.cern.ch .
283: */
284:
285: PRIVATE void parse_cso ARGS2 (
286: CONST char *, arg,
287: HTParentAnchor *,anAnchor)
288: {
289: char ch;
290: char line[BIG];
291: char *p = line;
292: char *second_colon, last_char='\0';
293: CONST char *title;
294:
295: title = HTAnchor_title(anAnchor);
296: START(HTML_H1);
297: PUTS("CSO Search Results");
298: END(HTML_H1);
299: START(HTML_PRE);
300:
301: /* start grabbing chars from the network */
302: while ((ch=NEXT_CHAR) != (char)EOF)
303: {
304: if (ch != '\n')
305: {
306: *p = ch; /* Put character in line */
307: if (p< &line[BIG-1]) p++;
308: }
309: else
310: {
311: *p++ = 0; /* Terminate line */
312: p = line; /* Scan it to parse it */
313:
314: /* OK we now have a line in 'p' lets parse it and
315: print it */
316:
317: /* Break on line that begins with a 2. It's the end of
318: * data.
319: */
320: if (*p == '2')
321: break;
322:
323: /* lines beginning with 5 are errors,
324: * print them and quit
325: */
326: if (*p == '5') {
327: START(HTML_H2);
328: PUTS(p+4);
329: END(HTML_H2);
330: break;
331: }
332:
333: if(*p == '-') {
334: /* data lines look like -200:#:
335: * where # is the search result number and can be
336: * multiple digits (infinate?)
337: * find the second colon and check the digit to the
338: * left of it to see if they are diferent
339: * if they are then a different person is starting.
340: * make this line an <h2>
341: */
342:
343: /* find the second_colon */
344: second_colon = strchr( strchr(p,':')+1, ':');
345:
346: if(second_colon != NULL) { /* error check */
347:
348: if (*(second_colon-1) != last_char)
349: /* print seperator */
350: {
351: END(HTML_PRE);
352: START(HTML_H2);
353: }
354:
355:
356: /* right now the record appears with the alias
357: * (first line)
358: * as the header and the rest as <pre> text
359: * It might look better with the name as the
360: * header and the rest as a <ul> with <li> tags
361: * I'm not sure whether the name field comes in any
362: * special order or if its even required in a
363: * record,
364: * so for now the first line is the header no
365: * matter
366: * what it is (it's almost always the alias)
367: * A <dl> with the first line as the <DT> and
368: * the rest as some form of <DD> might good also?
369: */
370:
371: /* print data */
372: PUTS(second_colon+1);
373: PUTS("\n");
374:
375: if (*(second_colon-1) != last_char)
376: /* end seperator */
377: {
378: END(HTML_H2);
379: START(HTML_PRE);
380: }
381:
382: /* save the char before the second colon
383: * for comparison on the next pass
384: */
385: last_char = *(second_colon-1) ;
386:
387: } /* end if second_colon */
388: } /* end if *p == '-' */
389: } /* if end of line */
390:
391: } /* Loop over characters */
392:
393: /* end the text block */
394: PUTS("\n");
395: END(HTML_PRE);
396: PUTS("\n");
397: FREE_TARGET;
398:
399: return; /* all done */
400: } /* end of procedure */
1.1 timbl 401:
402: /* Display a Gopher Index document
2.7 secret 403: ** -------------------------------
404: */
1.1 timbl 405:
406: PRIVATE void display_index ARGS2 (
2.7 secret 407: CONST char *, arg,
408: HTParentAnchor *,anAnchor)
1.1 timbl 409: {
1.2 timbl 410:
411: START(HTML_H1);
412: PUTS(arg);
2.7 secret 413: PUTS(" index");
1.2 timbl 414: END(HTML_H1);
2.7 secret 415: START(HTML_ISINDEX);
416: PUTS("\nThis is a searchable Gopher index.");
417: PUTS(" Please enter keywords to search for.\n");
418:
419: if (!HTAnchor_title(anAnchor))
420: HTAnchor_setTitle(anAnchor, arg);
1.2 timbl 421:
2.7 secret 422: FREE_TARGET;
423: return;
424: }
425:
426:
427: /* Display a CSO index document
428: ** -------------------------------
429: */
430:
431: PRIVATE void display_cso ARGS2 (
432: CONST char *, arg,
433: HTParentAnchor *,anAnchor)
434: {
435: START(HTML_H1);
436: PUTS(arg);
437: PUTS(" index");
438: END(HTML_H1);
439: START(HTML_ISINDEX);
440: PUTS("\nThis is a searchable index of a CSO database.\n");
441: PUTS(" Please enter keywords to search for. The keywords that you enter");
442: PUTS(" will allow you to search on a person's name in the database.\n");
443:
1.1 timbl 444: if (!HTAnchor_title(anAnchor))
1.2 timbl 445: HTAnchor_setTitle(anAnchor, arg);
1.1 timbl 446:
1.2 timbl 447: FREE_TARGET;
1.1 timbl 448: return;
449: }
450:
451:
452: /* De-escape a selector into a command
453: ** -----------------------------------
454: **
455: ** The % hex escapes are converted. Otheriwse, the string is copied.
456: */
457: PRIVATE void de_escape ARGS2(char *, command, CONST char *, selector)
458: {
459: CONST char * p = selector;
460: char * q = command;
461: if (command == NULL) outofmem(__FILE__, "HTLoadGopher");
462: while (*p) { /* Decode hex */
463: if (*p == HEX_ESCAPE) {
464: char c;
465: unsigned int b;
466: p++;
467: c = *p++;
468: b = from_hex(c);
469: c = *p++;
470: if (!c) break; /* Odd number of chars! */
471: *q++ = FROMASCII((b<<4) + from_hex(c));
472: } else {
473: *q++ = *p++; /* Record */
474: }
475: }
476: *q++ = 0; /* Terminate command */
477:
478: }
479:
480:
481: /* Load by name HTLoadGopher
482: ** ============
483: **
484: ** Bug: No decoding of strange data types as yet.
485: **
486: */
1.2 timbl 487: PUBLIC int HTLoadGopher ARGS4(
488: CONST char *, arg,
489: HTParentAnchor *, anAnchor,
490: HTFormat, format_out,
491: HTStream*, sink)
1.1 timbl 492: {
493: char *command; /* The whole command */
494: int status; /* tcp return */
495: char gtype; /* Gopher Node type */
496: char * selector; /* Selector string */
497:
498: struct sockaddr_in soc_address; /* Binary network address */
499: struct sockaddr_in* sin = &soc_address;
500:
501: if (!acceptable_inited) init_acceptable();
502:
503: if (!arg) return -3; /* Bad if no name sepcified */
504: if (!*arg) return -2; /* Bad if name had zero length */
505:
506: if (TRACE) fprintf(stderr, "HTGopher: Looking for %s\n", arg);
507:
508:
509: /* Set up defaults:
510: */
511: sin->sin_family = AF_INET; /* Family, host order */
512: sin->sin_port = htons(GOPHER_PORT); /* Default: new port, */
513:
514: /* Get node name and optional port number:
515: */
516: {
517: char *p1 = HTParse(arg, "", PARSE_HOST);
518: int status = HTParseInet(sin, p1);
519: free(p1);
520: if (status) return status; /* Bad */
521: }
522:
523: /* Get entity type, and selector string.
524: */
525: {
526: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
527: gtype = '1'; /* Default = menu */
528: selector = p1;
529: if ((*selector++=='/') && (*selector)) { /* Skip first slash */
530: gtype = *selector++; /* Pick up gtype */
531: }
532: if (gtype == GOPHER_INDEX) {
533: char * query;
534: HTAnchor_setIndex(anAnchor); /* Search is allowed */
535: query = strchr(selector, '?'); /* Look for search string */
536: if (!query || !query[1]) { /* No search required */
1.3 timbl 537: target = HTML_new(anAnchor, format_out, sink);
1.2 timbl 538: targetClass = *target->isa;
1.1 timbl 539: display_index(arg, anAnchor); /* Display "cover page" */
2.6 timbl 540: return HT_LOADED; /* Local function only */
1.1 timbl 541: }
542: *query++ = 0; /* Skip '?' */
543: command = malloc(strlen(selector)+ 1 + strlen(query)+ 2 + 1);
544: if (command == NULL) outofmem(__FILE__, "HTLoadGopher");
545:
546: de_escape(command, selector); /* Bug fix TBL 921208 */
547:
548: strcat(command, "\t");
549:
550: { /* Remove plus signs 921006 */
551: char *p;
552: for (p=query; *p; p++) {
553: if (*p == '+') *p = ' ';
554: }
555: }
556: strcat(command, query);
2.7 secret 557: } else if (gtype == GOPHER_CSO) {
558: char * query;
559: HTAnchor_setIndex(anAnchor); /* Search is allowed */
560: query = strchr(selector, '?'); /* Look for search string */
561: if (!query || !query[1]) { /* No search required */
562: target = HTML_new(anAnchor, format_out, sink);
563: targetClass = *target->isa;
564: display_cso(arg, anAnchor); /* Display "cover page" */
565: return HT_LOADED; /* Local function only */
566: }
567: *query++ = 0; /* Skip '?' */
568: command = malloc(strlen("query")+ 1 + strlen(query)+ 2 + 1);
569: if (command == NULL) outofmem(__FILE__, "HTLoadGopher");
570:
571: de_escape(command, selector); /* Bug fix TBL 921208 */
572:
573: strcpy(command, "query ");
574:
575: { /* Remove plus signs 921006 */
576: char *p;
577: for (p=query; *p; p++) {
578: if (*p == '+') *p = ' ';
579: }
580: }
581: strcat(command, query);
582:
1.1 timbl 583:
584: } else { /* Not index */
585: command = command = malloc(strlen(selector)+2+1);
586: de_escape(command, selector);
587: }
588: free(p1);
589: }
590:
1.3 timbl 591: {
592: char * p = command + strlen(command);
593: *p++ = CR; /* Macros to be correct on Mac */
594: *p++ = LF;
595: *p++ = 0;
596: /* strcat(command, "\r\n"); */ /* CR LF, as in rfc 977 */
597: }
1.1 timbl 598:
599: /* Set up a socket to the server for the data:
600: */
601: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
602: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
603: if (status<0){
604: if (TRACE) fprintf(stderr, "HTTPAccess: Unable to connect to remote host for `%s'.\n",
605: arg);
606: free(command);
607: return HTInetStatus("connect");
608: }
609:
610: HTInitInput(s); /* Set up input buffering */
611:
612: if (TRACE) fprintf(stderr, "HTGopher: Connected, writing command `%s' to socket %d\n", command, s);
613:
614: #ifdef NOT_ASCII
615: {
616: char * p;
617: for(p = command; *p; p++) {
618: *p = TOASCII(*p);
619: }
620: }
621: #endif
622:
623: status = NETWRITE(s, command, (int)strlen(command));
624: free(command);
625: if (status<0){
626: if (TRACE) fprintf(stderr, "HTGopher: Unable to send command.\n");
627: return HTInetStatus("send");
628: }
629:
630: /* Now read the data from the socket:
631: */
632: switch (gtype) {
633:
634: case GOPHER_HTML :
1.2 timbl 635: HTParseSocket(WWW_HTML, format_out, anAnchor, s, sink);
636: break;
1.1 timbl 637:
1.3 timbl 638: case GOPHER_GIF:
639: case GOPHER_IMAGE:
640: HTParseSocket(HTAtom_for("image/gif"),
641: format_out, anAnchor, s, sink);
642: break;
1.1 timbl 643: case GOPHER_MENU :
644: case GOPHER_INDEX :
1.3 timbl 645: target = HTML_new(anAnchor, format_out, sink);
1.2 timbl 646: targetClass = *target->isa;
1.1 timbl 647: parse_menu(arg, anAnchor);
1.2 timbl 648: break;
2.7 secret 649:
650: case GOPHER_CSO:
651: target = HTML_new(anAnchor, format_out, sink);
652: targetClass = *target->isa;
653: parse_cso(arg, anAnchor);
654: break;
655:
656: case GOPHER_MACBINHEX:
657: case GOPHER_PCBINHEX:
658: case GOPHER_UUENCODED:
659: case GOPHER_BINARY:
660: /* Specifying WWW_UNKNOWN forces dump to local disk. */
661: HTParseSocket (WWW_UNKNOWN, format_out, anAnchor, s, sink);
662: break;
663:
1.1 timbl 664: case GOPHER_TEXT :
665: default: /* @@ parse as plain text */
1.2 timbl 666: HTParseSocket(WWW_PLAINTEXT, format_out, anAnchor, s, sink);
2.7 secret 667: break;
668:
669: case GOPHER_SOUND :
670: HTParseSocket(WWW_AUDIO, format_out, anAnchor, s, sink);
1.2 timbl 671: break;
672:
1.1 timbl 673: } /* switch(gtype) */
1.2 timbl 674:
675: NETCLOSE(s);
676: return HT_LOADED;
1.1 timbl 677: }
1.2 timbl 678:
679: PUBLIC HTProtocol HTGopher = { "gopher", HTLoadGopher, NULL };
1.1 timbl 680:
Webmaster