Annotation of libwww/Library/src/HTGopher.c, revision 2.15
1.1 timbl 1: /* GOPHER ACCESS HTGopher.c
2: ** =============
3: **
4: ** History:
5: ** 26 Sep 90 Adapted from other accesses (News, HTTP) TBL
6: ** 29 Nov 91 Downgraded to C, for portable implementation.
7: */
8:
1.2 timbl 9: /* Implements:
10: */
11: #include "HTGopher.h"
12:
1.3 timbl 13:
1.1 timbl 14: #define GOPHER_PORT 70 /* See protocol spec */
15: #define BIG 1024 /* Bug */
16: #define LINE_LENGTH 256 /* Bug */
17:
18: /* Gopher entity types:
19: */
20: #define GOPHER_TEXT '0'
21: #define GOPHER_MENU '1'
22: #define GOPHER_CSO '2'
23: #define GOPHER_ERROR '3'
24: #define GOPHER_MACBINHEX '4'
25: #define GOPHER_PCBINHEX '5'
26: #define GOPHER_UUENCODED '6'
27: #define GOPHER_INDEX '7'
28: #define GOPHER_TELNET '8'
2.7 secret 29: #define GOPHER_BINARY '9'
1.3 timbl 30: #define GOPHER_GIF 'g'
2.7 secret 31: #define GOPHER_HTML 'h' /* HTML */
32: #define GOPHER_SOUND 's'
33: #define GOPHER_WWW 'w' /* W3 address */
1.3 timbl 34: #define GOPHER_IMAGE 'I'
2.7 secret 35: #define GOPHER_TN3270 'T'
1.1 timbl 36: #define GOPHER_DUPLICATE '+'
37:
38: #include <ctype.h>
39: #include "HTUtils.h" /* Coding convention macros */
40: #include "tcp.h"
41:
42:
43: #include "HTParse.h"
44: #include "HTFormat.h"
45: #include "HTTCP.h"
46:
1.2 timbl 47: /* Hypertext object building machinery
48: */
49: #include "HTML.h"
50:
51: #define PUTC(c) (*targetClass.put_character)(target, c)
52: #define PUTS(s) (*targetClass.put_string)(target, s)
53: #define START(e) (*targetClass.start_element)(target, e, 0, 0)
54: #define END(e) (*targetClass.end_element)(target, e)
55: #define FREE_TARGET (*targetClass.free)(target)
56: struct _HTStructured {
57: CONST HTStructuredClass * isa;
58: /* ... */
59: };
60:
61: PRIVATE HTStructured *target; /* the new hypertext */
62: PRIVATE HTStructuredClass targetClass; /* Its action routines */
63:
64:
2.8 timbl 65: #define GOPHER_PROGRESS(foo) HTAlert(foo)
1.1 timbl 66:
67:
2.12 timbl 68: #define NEXT_CHAR HTInputSocket_getCharacter(isoc)
1.1 timbl 69:
70:
2.8 timbl 71:
1.1 timbl 72: /* Module-wide variables
73: */
74: PRIVATE int s; /* Socket for GopherHost */
75:
76:
1.2 timbl 77:
1.1 timbl 78: /* Matrix of allowed characters in filenames
79: ** -----------------------------------------
80: */
81:
82: PRIVATE BOOL acceptable[256];
83: PRIVATE BOOL acceptable_inited = NO;
84:
85: PRIVATE void init_acceptable NOARGS
86: {
87: unsigned int i;
88: char * good =
89: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$";
90: for(i=0; i<256; i++) acceptable[i] = NO;
91: for(;*good; good++) acceptable[(unsigned int)*good] = YES;
92: acceptable_inited = YES;
93: }
94:
95: PRIVATE CONST char hex[17] = "0123456789abcdef";
96:
97: /* Decdoe one hex character
98: */
99:
100: PRIVATE char from_hex ARGS1(char, c)
101: {
102: return (c>='0')&&(c<='9') ? c-'0'
103: : (c>='A')&&(c<='F') ? c-'A'+10
104: : (c>='a')&&(c<='f') ? c-'a'+10
105: : 0;
106: }
107:
108:
109:
110: /* Paste in an Anchor
111: ** ------------------
112: **
113: ** The title of the destination is set, as there is no way
114: ** of knowing what the title is when we arrive.
115: **
116: ** On entry,
117: ** HT is in append mode.
118: ** text points to the text to be put into the file, 0 terminated.
119: ** addr points to the hypertext refernce address 0 terminated.
120: */
121: PRIVATE void write_anchor ARGS2(CONST char *,text, CONST char *,addr)
122: {
1.2 timbl 123:
124:
125:
126: BOOL present[HTML_A_ATTRIBUTES];
127: CONST char * value[HTML_A_ATTRIBUTES];
1.1 timbl 128:
1.2 timbl 129: int i;
130:
131: for (i=0; i<HTML_A_ATTRIBUTES; i++) present[i]=0;
132: present[HTML_A_HREF] = YES;
133: value[HTML_A_HREF] = addr;
134: present[HTML_A_TITLE] = YES;
135: value[HTML_A_TITLE] = text;
136:
137: (*targetClass.start_element)(target, HTML_A, present, value);
1.1 timbl 138:
1.2 timbl 139: PUTS(text);
140: END(HTML_A);
1.1 timbl 141: }
142:
143:
144: /* Parse a Gopher Menu document
145: ** ============================
146: **
147: */
148:
2.11 timbl 149: PRIVATE void parse_menu ARGS3 (
150: int , s,
1.2 timbl 151: CONST char *, arg,
152: HTParentAnchor *, anAnchor)
1.1 timbl 153: {
154: char gtype;
155: char ch;
156: char line[BIG];
157: char address[BIG];
2.14 luotonen 158: char *name = "";
159: char *selector = ""; /* Gopher menu fields */
160: char *host = "";
1.1 timbl 161: char *port;
162: char *p = line;
1.2 timbl 163: CONST char *title;
2.12 timbl 164: HTInputSocket * isoc = HTInputSocket_new(s);
2.11 timbl 165:
1.1 timbl 166: #define TAB '\t'
167: #define HEX_ESCAPE '%'
168:
169:
1.2 timbl 170: title = HTAnchor_title(anAnchor);
171: if (title) {
172: START(HTML_H1);
173: PUTS(title);
174: END(HTML_H1);
175: } else
176: PUTS("Select one of:\n\n");
1.1 timbl 177:
1.2 timbl 178: START(HTML_MENU);
1.1 timbl 179: while ((ch=NEXT_CHAR) != (char)EOF) {
1.3 timbl 180: if (ch != LF) {
1.1 timbl 181: *p = ch; /* Put character in line */
182: if (p< &line[BIG-1]) p++;
183:
184: } else {
185: *p++ = 0; /* Terminate line */
186: p = line; /* Scan it to parse it */
187: port = 0; /* Flag "not parsed" */
188: if (TRACE) fprintf(stderr, "HTGopher: Menu item: %s\n", line);
189: gtype = *p++;
190:
191: /* Break on line with a dot by itself */
192: if ((gtype=='.') && ((*p=='\r') || (*p==0))) break;
193:
194: if (gtype && *p) {
195: name = p;
196: selector = strchr(name, TAB);
1.3 timbl 197: START(HTML_LI);
1.1 timbl 198: if (selector) {
199: *selector++ = 0; /* Terminate name */
200: host = strchr(selector, TAB);
201: if (host) {
202: *host++ = 0; /* Terminate selector */
203: port = strchr(host, TAB);
204: if (port) {
205: char *junk;
206: port[0] = ':'; /* delimit host a la W3 */
207: junk = strchr(port, TAB);
208: if (junk) *junk++ = 0; /* Chop port */
209: if ((port[1]=='0') && (!port[2]))
210: port[0] = 0; /* 0 means none */
211: } /* no port */
212: } /* host ok */
213: } /* selector ok */
214: } /* gtype and name ok */
215:
216: if (gtype == GOPHER_WWW) { /* Gopher pointer to W3 */
217: write_anchor(name, selector);
2.7 secret 218:
1.1 timbl 219: } else if (port) { /* Other types need port */
220: if (gtype == GOPHER_TELNET) {
221: if (*selector) sprintf(address, "telnet://%s@%s/",
2.7 secret 222: selector, host);
1.1 timbl 223: else sprintf(address, "telnet://%s/", host);
2.7 secret 224: }
225: else if (gtype == GOPHER_TN3270)
226: {
227: if (*selector)
228: sprintf(address, "tn3270://%s@%s/",
229: selector, host);
230: else
231: sprintf(address, "tn3270://%s/", host);
232: }
233: else { /* If parsed ok */
1.1 timbl 234: char *q;
235: char *p;
236: sprintf(address, "//%s/%c", host, gtype);
237: q = address+ strlen(address);
238: for(p=selector; *p; p++) { /* Encode selector string */
2.14 luotonen 239: if (acceptable[(int)*p]) *q++ = *p;
1.1 timbl 240: else {
241: *q++ = HEX_ESCAPE; /* Means hex coming */
242: *q++ = hex[(TOASCII(*p)) >> 4];
243: *q++ = hex[(TOASCII(*p)) & 15];
244: }
245: }
246: *q++ = 0; /* terminate address */
247: }
1.2 timbl 248: PUTS(" "); /* Prettier JW/TBL */
2.7 secret 249: /* Error response from Gopher doesn't deserve to
250: be a hyperlink. */
251: if (strcmp (address, "gopher://error.host:1/0"))
252: write_anchor(name, address);
253: else
254: PUTS(name);
255: PUTS("\n");
1.1 timbl 256: } else { /* parse error */
257: if (TRACE) fprintf(stderr,
258: "HTGopher: Bad menu item.\n");
1.2 timbl 259: PUTS(line);
260:
1.1 timbl 261: } /* parse error */
262:
263: p = line; /* Start again at beginning of line */
264:
265: } /* if end of line */
266:
267: } /* Loop over characters */
268:
1.2 timbl 269: END(HTML_MENU);
270: FREE_TARGET;
271:
2.11 timbl 272: HTInputSocket_free(isoc);
1.1 timbl 273: return;
274: }
2.11 timbl 275:
276:
2.7 secret 277: /* Parse a Gopher CSO document
278: ** ============================
279: **
280: ** Accepts an open socket to a CSO server waiting to send us
281: ** data and puts it on the screen in a reasonable manner.
282: **
283: ** Perhaps this data can be automatically linked to some
284: ** other source as well???
285: **
286: ** Taken from hacking by Lou Montulli@ukanaix.cc.ukans.edu
287: ** on XMosaic-1.1, and put on libwww 2.11 by Arthur Secret,
288: ** secret@dxcern.cern.ch .
289: */
290:
2.11 timbl 291: PRIVATE void parse_cso ARGS3 (
292: int, s,
293: CONST char *, arg,
294: HTParentAnchor *, anAnchor)
2.7 secret 295: {
296: char ch;
297: char line[BIG];
298: char *p = line;
299: char *second_colon, last_char='\0';
300: CONST char *title;
2.11 timbl 301: HTInputSocket * isoc = HTInputSocket_new(s);
2.7 secret 302:
303: title = HTAnchor_title(anAnchor);
304: START(HTML_H1);
305: PUTS("CSO Search Results");
306: END(HTML_H1);
307: START(HTML_PRE);
308:
309: /* start grabbing chars from the network */
310: while ((ch=NEXT_CHAR) != (char)EOF)
311: {
312: if (ch != '\n')
313: {
314: *p = ch; /* Put character in line */
315: if (p< &line[BIG-1]) p++;
316: }
317: else
318: {
319: *p++ = 0; /* Terminate line */
320: p = line; /* Scan it to parse it */
321:
322: /* OK we now have a line in 'p' lets parse it and
323: print it */
324:
325: /* Break on line that begins with a 2. It's the end of
326: * data.
327: */
328: if (*p == '2')
329: break;
330:
331: /* lines beginning with 5 are errors,
332: * print them and quit
333: */
334: if (*p == '5') {
335: START(HTML_H2);
336: PUTS(p+4);
337: END(HTML_H2);
338: break;
339: }
340:
341: if(*p == '-') {
342: /* data lines look like -200:#:
343: * where # is the search result number and can be
344: * multiple digits (infinate?)
345: * find the second colon and check the digit to the
346: * left of it to see if they are diferent
347: * if they are then a different person is starting.
348: * make this line an <h2>
349: */
350:
351: /* find the second_colon */
352: second_colon = strchr( strchr(p,':')+1, ':');
353:
354: if(second_colon != NULL) { /* error check */
355:
356: if (*(second_colon-1) != last_char)
357: /* print seperator */
358: {
359: END(HTML_PRE);
360: START(HTML_H2);
361: }
362:
363:
364: /* right now the record appears with the alias
365: * (first line)
366: * as the header and the rest as <pre> text
367: * It might look better with the name as the
368: * header and the rest as a <ul> with <li> tags
369: * I'm not sure whether the name field comes in any
370: * special order or if its even required in a
371: * record,
372: * so for now the first line is the header no
373: * matter
374: * what it is (it's almost always the alias)
375: * A <dl> with the first line as the <DT> and
376: * the rest as some form of <DD> might good also?
377: */
378:
379: /* print data */
380: PUTS(second_colon+1);
381: PUTS("\n");
382:
383: if (*(second_colon-1) != last_char)
384: /* end seperator */
385: {
386: END(HTML_H2);
387: START(HTML_PRE);
388: }
389:
390: /* save the char before the second colon
391: * for comparison on the next pass
392: */
393: last_char = *(second_colon-1) ;
394:
395: } /* end if second_colon */
396: } /* end if *p == '-' */
397: } /* if end of line */
398:
399: } /* Loop over characters */
400:
401: /* end the text block */
402: PUTS("\n");
403: END(HTML_PRE);
404: PUTS("\n");
405: FREE_TARGET;
2.11 timbl 406: HTInputSocket_free(isoc);
2.7 secret 407:
408: return; /* all done */
409: } /* end of procedure */
1.1 timbl 410:
411: /* Display a Gopher Index document
2.7 secret 412: ** -------------------------------
413: */
1.1 timbl 414:
415: PRIVATE void display_index ARGS2 (
2.7 secret 416: CONST char *, arg,
417: HTParentAnchor *,anAnchor)
1.1 timbl 418: {
1.2 timbl 419:
420: START(HTML_H1);
421: PUTS(arg);
2.7 secret 422: PUTS(" index");
1.2 timbl 423: END(HTML_H1);
2.7 secret 424: START(HTML_ISINDEX);
425: PUTS("\nThis is a searchable Gopher index.");
426: PUTS(" Please enter keywords to search for.\n");
427:
428: if (!HTAnchor_title(anAnchor))
429: HTAnchor_setTitle(anAnchor, arg);
1.2 timbl 430:
2.7 secret 431: FREE_TARGET;
432: return;
433: }
434:
435:
436: /* Display a CSO index document
437: ** -------------------------------
438: */
439:
440: PRIVATE void display_cso ARGS2 (
441: CONST char *, arg,
442: HTParentAnchor *,anAnchor)
443: {
444: START(HTML_H1);
445: PUTS(arg);
446: PUTS(" index");
447: END(HTML_H1);
448: START(HTML_ISINDEX);
449: PUTS("\nThis is a searchable index of a CSO database.\n");
450: PUTS(" Please enter keywords to search for. The keywords that you enter");
451: PUTS(" will allow you to search on a person's name in the database.\n");
452:
1.1 timbl 453: if (!HTAnchor_title(anAnchor))
1.2 timbl 454: HTAnchor_setTitle(anAnchor, arg);
1.1 timbl 455:
1.2 timbl 456: FREE_TARGET;
1.1 timbl 457: return;
458: }
459:
460:
461: /* De-escape a selector into a command
462: ** -----------------------------------
463: **
464: ** The % hex escapes are converted. Otheriwse, the string is copied.
465: */
466: PRIVATE void de_escape ARGS2(char *, command, CONST char *, selector)
467: {
468: CONST char * p = selector;
469: char * q = command;
470: if (command == NULL) outofmem(__FILE__, "HTLoadGopher");
471: while (*p) { /* Decode hex */
472: if (*p == HEX_ESCAPE) {
473: char c;
474: unsigned int b;
475: p++;
476: c = *p++;
477: b = from_hex(c);
478: c = *p++;
479: if (!c) break; /* Odd number of chars! */
480: *q++ = FROMASCII((b<<4) + from_hex(c));
481: } else {
482: *q++ = *p++; /* Record */
483: }
484: }
485: *q++ = 0; /* Terminate command */
486:
487: }
488:
489:
490: /* Load by name HTLoadGopher
491: ** ============
492: **
493: ** Bug: No decoding of strange data types as yet.
494: **
495: */
2.13 timbl 496: PUBLIC int HTLoadGopher ARGS1(HTRequest *, request)
1.1 timbl 497: {
2.13 timbl 498: CONST char * arg = HTAnchor_physical(request->anchor);
1.1 timbl 499: char *command; /* The whole command */
500: int status; /* tcp return */
501: char gtype; /* Gopher Node type */
502: char * selector; /* Selector string */
503: struct sockaddr_in soc_address; /* Binary network address */
504: struct sockaddr_in* sin = &soc_address;
505:
506: if (!acceptable_inited) init_acceptable();
507:
508: if (!arg) return -3; /* Bad if no name sepcified */
509: if (!*arg) return -2; /* Bad if name had zero length */
510:
511: if (TRACE) fprintf(stderr, "HTGopher: Looking for %s\n", arg);
512:
513:
514: /* Set up defaults:
515: */
516: sin->sin_family = AF_INET; /* Family, host order */
517: sin->sin_port = htons(GOPHER_PORT); /* Default: new port, */
518:
519: /* Get node name and optional port number:
520: */
521: {
522: char *p1 = HTParse(arg, "", PARSE_HOST);
523: int status = HTParseInet(sin, p1);
524: free(p1);
525: if (status) return status; /* Bad */
526: }
527:
528: /* Get entity type, and selector string.
529: */
530: {
531: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
532: gtype = '1'; /* Default = menu */
533: selector = p1;
534: if ((*selector++=='/') && (*selector)) { /* Skip first slash */
535: gtype = *selector++; /* Pick up gtype */
536: }
537: if (gtype == GOPHER_INDEX) {
538: char * query;
2.10 timbl 539: HTAnchor_setIndex(request->anchor); /* Search is allowed */
1.1 timbl 540: query = strchr(selector, '?'); /* Look for search string */
541: if (!query || !query[1]) { /* No search required */
2.11 timbl 542: target = HTML_new(request, NULL, WWW_HTML,
543: request->output_format, request->output_stream);
1.2 timbl 544: targetClass = *target->isa;
2.10 timbl 545: display_index(arg, request->anchor); /* Display "cover page" */
2.15 ! luotonen 546: free(p1); /* Leak fixed Henrik 27 Feb 94 */
2.6 timbl 547: return HT_LOADED; /* Local function only */
1.1 timbl 548: }
549: *query++ = 0; /* Skip '?' */
550: command = malloc(strlen(selector)+ 1 + strlen(query)+ 2 + 1);
551: if (command == NULL) outofmem(__FILE__, "HTLoadGopher");
552:
553: de_escape(command, selector); /* Bug fix TBL 921208 */
554:
555: strcat(command, "\t");
556:
557: { /* Remove plus signs 921006 */
558: char *p;
559: for (p=query; *p; p++) {
560: if (*p == '+') *p = ' ';
561: }
562: }
563: strcat(command, query);
2.7 secret 564: } else if (gtype == GOPHER_CSO) {
565: char * query;
2.10 timbl 566: HTAnchor_setIndex(request->anchor); /* Search is allowed */
2.7 secret 567: query = strchr(selector, '?'); /* Look for search string */
568: if (!query || !query[1]) { /* No search required */
2.11 timbl 569: target = HTML_new(request, NULL, WWW_HTML,
570: request->output_format, request->output_stream);
2.7 secret 571: targetClass = *target->isa;
2.10 timbl 572: display_cso(arg, request->anchor); /* Display "cover page" */
2.15 ! luotonen 573: free(p1); /* Leak fixed Henrik 27 Feb 94 */
2.7 secret 574: return HT_LOADED; /* Local function only */
575: }
576: *query++ = 0; /* Skip '?' */
577: command = malloc(strlen("query")+ 1 + strlen(query)+ 2 + 1);
578: if (command == NULL) outofmem(__FILE__, "HTLoadGopher");
579:
580: de_escape(command, selector); /* Bug fix TBL 921208 */
581:
582: strcpy(command, "query ");
583:
584: { /* Remove plus signs 921006 */
585: char *p;
586: for (p=query; *p; p++) {
587: if (*p == '+') *p = ' ';
588: }
589: }
590: strcat(command, query);
591:
1.1 timbl 592:
593: } else { /* Not index */
594: command = command = malloc(strlen(selector)+2+1);
595: de_escape(command, selector);
596: }
597: free(p1);
598: }
599:
1.3 timbl 600: {
601: char * p = command + strlen(command);
602: *p++ = CR; /* Macros to be correct on Mac */
603: *p++ = LF;
604: *p++ = 0;
605: /* strcat(command, "\r\n"); */ /* CR LF, as in rfc 977 */
606: }
1.1 timbl 607:
608: /* Set up a socket to the server for the data:
609: */
610: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
611: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
612: if (status<0){
613: if (TRACE) fprintf(stderr, "HTTPAccess: Unable to connect to remote host for `%s'.\n",
614: arg);
615: free(command);
616: return HTInetStatus("connect");
617: }
618:
619:
620: if (TRACE) fprintf(stderr, "HTGopher: Connected, writing command `%s' to socket %d\n", command, s);
621:
622: #ifdef NOT_ASCII
623: {
624: char * p;
625: for(p = command; *p; p++) {
626: *p = TOASCII(*p);
627: }
628: }
629: #endif
630:
631: status = NETWRITE(s, command, (int)strlen(command));
632: free(command);
633: if (status<0){
634: if (TRACE) fprintf(stderr, "HTGopher: Unable to send command.\n");
635: return HTInetStatus("send");
636: }
637:
638: /* Now read the data from the socket:
639: */
640: switch (gtype) {
641:
642: case GOPHER_HTML :
2.11 timbl 643: HTParseSocket(WWW_HTML, s, request);
1.2 timbl 644: break;
1.1 timbl 645:
1.3 timbl 646: case GOPHER_GIF:
647: case GOPHER_IMAGE:
2.11 timbl 648: HTParseSocket(HTAtom_for("image/gif"), s, request);
1.3 timbl 649: break;
1.1 timbl 650: case GOPHER_MENU :
651: case GOPHER_INDEX :
2.11 timbl 652: target = HTML_new(request, NULL, WWW_HTML,
653: request->output_format, request->output_stream);
1.2 timbl 654: targetClass = *target->isa;
2.11 timbl 655: parse_menu(s,arg, request->anchor);
1.2 timbl 656: break;
2.7 secret 657:
658: case GOPHER_CSO:
2.11 timbl 659: target = HTML_new(request, NULL, WWW_HTML,
660: request->output_format, request->output_stream);
2.7 secret 661: targetClass = *target->isa;
2.11 timbl 662: parse_cso(s, arg, request->anchor);
2.7 secret 663: break;
664:
665: case GOPHER_MACBINHEX:
666: case GOPHER_PCBINHEX:
667: case GOPHER_UUENCODED:
668: case GOPHER_BINARY:
669: /* Specifying WWW_UNKNOWN forces dump to local disk. */
2.11 timbl 670: HTParseSocket (WWW_UNKNOWN, s, request);
2.7 secret 671: break;
672:
1.1 timbl 673: case GOPHER_TEXT :
674: default: /* @@ parse as plain text */
2.11 timbl 675: HTParseSocket(WWW_PLAINTEXT, s, request);
2.7 secret 676: break;
677:
678: case GOPHER_SOUND :
2.11 timbl 679: HTParseSocket(WWW_AUDIO, s, request);
1.2 timbl 680: break;
681:
1.1 timbl 682: } /* switch(gtype) */
1.2 timbl 683:
684: NETCLOSE(s);
685: return HT_LOADED;
1.1 timbl 686: }
1.2 timbl 687:
2.10 timbl 688: GLOBALDEF PUBLIC HTProtocol HTGopher = { "gopher", HTLoadGopher, NULL, NULL };
1.1 timbl 689:
Webmaster