Annotation of libwww/Library/src/HTGopher.c, revision 2.13
1.1 timbl 1: /* GOPHER ACCESS HTGopher.c
2: ** =============
3: **
4: ** History:
5: ** 26 Sep 90 Adapted from other accesses (News, HTTP) TBL
6: ** 29 Nov 91 Downgraded to C, for portable implementation.
7: */
8:
1.2 timbl 9: /* Implements:
10: */
11: #include "HTGopher.h"
12:
1.3 timbl 13:
1.1 timbl 14: #define GOPHER_PORT 70 /* See protocol spec */
15: #define BIG 1024 /* Bug */
16: #define LINE_LENGTH 256 /* Bug */
17:
18: /* Gopher entity types:
19: */
20: #define GOPHER_TEXT '0'
21: #define GOPHER_MENU '1'
22: #define GOPHER_CSO '2'
23: #define GOPHER_ERROR '3'
24: #define GOPHER_MACBINHEX '4'
25: #define GOPHER_PCBINHEX '5'
26: #define GOPHER_UUENCODED '6'
27: #define GOPHER_INDEX '7'
28: #define GOPHER_TELNET '8'
2.7 secret 29: #define GOPHER_BINARY '9'
1.3 timbl 30: #define GOPHER_GIF 'g'
2.7 secret 31: #define GOPHER_HTML 'h' /* HTML */
32: #define GOPHER_SOUND 's'
33: #define GOPHER_WWW 'w' /* W3 address */
1.3 timbl 34: #define GOPHER_IMAGE 'I'
2.7 secret 35: #define GOPHER_TN3270 'T'
1.1 timbl 36: #define GOPHER_DUPLICATE '+'
37:
38: #include <ctype.h>
39: #include "HTUtils.h" /* Coding convention macros */
40: #include "tcp.h"
41:
42:
43: #include "HTParse.h"
44: #include "HTFormat.h"
45: #include "HTTCP.h"
46:
1.2 timbl 47: /* Hypertext object building machinery
48: */
49: #include "HTML.h"
50:
51: #define PUTC(c) (*targetClass.put_character)(target, c)
52: #define PUTS(s) (*targetClass.put_string)(target, s)
53: #define START(e) (*targetClass.start_element)(target, e, 0, 0)
54: #define END(e) (*targetClass.end_element)(target, e)
55: #define FREE_TARGET (*targetClass.free)(target)
56: struct _HTStructured {
57: CONST HTStructuredClass * isa;
58: /* ... */
59: };
60:
61: PRIVATE HTStructured *target; /* the new hypertext */
62: PRIVATE HTStructuredClass targetClass; /* Its action routines */
63:
64:
2.8 timbl 65: #define GOPHER_PROGRESS(foo) HTAlert(foo)
1.1 timbl 66:
67:
2.12 timbl 68: #define NEXT_CHAR HTInputSocket_getCharacter(isoc)
1.1 timbl 69:
70:
2.8 timbl 71:
1.1 timbl 72: /* Module-wide variables
73: */
74: PRIVATE int s; /* Socket for GopherHost */
75:
76:
1.2 timbl 77:
1.1 timbl 78: /* Matrix of allowed characters in filenames
79: ** -----------------------------------------
80: */
81:
82: PRIVATE BOOL acceptable[256];
83: PRIVATE BOOL acceptable_inited = NO;
84:
85: PRIVATE void init_acceptable NOARGS
86: {
87: unsigned int i;
88: char * good =
89: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$";
90: for(i=0; i<256; i++) acceptable[i] = NO;
91: for(;*good; good++) acceptable[(unsigned int)*good] = YES;
92: acceptable_inited = YES;
93: }
94:
95: PRIVATE CONST char hex[17] = "0123456789abcdef";
96:
97: /* Decdoe one hex character
98: */
99:
100: PRIVATE char from_hex ARGS1(char, c)
101: {
102: return (c>='0')&&(c<='9') ? c-'0'
103: : (c>='A')&&(c<='F') ? c-'A'+10
104: : (c>='a')&&(c<='f') ? c-'a'+10
105: : 0;
106: }
107:
108:
109:
110: /* Paste in an Anchor
111: ** ------------------
112: **
113: ** The title of the destination is set, as there is no way
114: ** of knowing what the title is when we arrive.
115: **
116: ** On entry,
117: ** HT is in append mode.
118: ** text points to the text to be put into the file, 0 terminated.
119: ** addr points to the hypertext refernce address 0 terminated.
120: */
121: PRIVATE void write_anchor ARGS2(CONST char *,text, CONST char *,addr)
122: {
1.2 timbl 123:
124:
125:
126: BOOL present[HTML_A_ATTRIBUTES];
127: CONST char * value[HTML_A_ATTRIBUTES];
1.1 timbl 128:
1.2 timbl 129: int i;
130:
131: for (i=0; i<HTML_A_ATTRIBUTES; i++) present[i]=0;
132: present[HTML_A_HREF] = YES;
133: value[HTML_A_HREF] = addr;
134: present[HTML_A_TITLE] = YES;
135: value[HTML_A_TITLE] = text;
136:
137: (*targetClass.start_element)(target, HTML_A, present, value);
1.1 timbl 138:
1.2 timbl 139: PUTS(text);
140: END(HTML_A);
1.1 timbl 141: }
142:
143:
144: /* Parse a Gopher Menu document
145: ** ============================
146: **
147: */
148:
2.11 timbl 149: PRIVATE void parse_menu ARGS3 (
150: int , s,
1.2 timbl 151: CONST char *, arg,
152: HTParentAnchor *, anAnchor)
1.1 timbl 153: {
154: char gtype;
155: char ch;
156: char line[BIG];
157: char address[BIG];
158: char *name, *selector; /* Gopher menu fields */
159: char *host;
160: char *port;
161: char *p = line;
1.2 timbl 162: CONST char *title;
2.12 timbl 163: HTInputSocket * isoc = HTInputSocket_new(s);
2.11 timbl 164:
1.1 timbl 165: #define TAB '\t'
166: #define HEX_ESCAPE '%'
167:
168:
1.2 timbl 169: title = HTAnchor_title(anAnchor);
170: if (title) {
171: START(HTML_H1);
172: PUTS(title);
173: END(HTML_H1);
174: } else
175: PUTS("Select one of:\n\n");
1.1 timbl 176:
1.2 timbl 177: START(HTML_MENU);
1.1 timbl 178: while ((ch=NEXT_CHAR) != (char)EOF) {
1.3 timbl 179: if (ch != LF) {
1.1 timbl 180: *p = ch; /* Put character in line */
181: if (p< &line[BIG-1]) p++;
182:
183: } else {
184: *p++ = 0; /* Terminate line */
185: p = line; /* Scan it to parse it */
186: port = 0; /* Flag "not parsed" */
187: if (TRACE) fprintf(stderr, "HTGopher: Menu item: %s\n", line);
188: gtype = *p++;
189:
190: /* Break on line with a dot by itself */
191: if ((gtype=='.') && ((*p=='\r') || (*p==0))) break;
192:
193: if (gtype && *p) {
194: name = p;
195: selector = strchr(name, TAB);
1.3 timbl 196: START(HTML_LI);
1.1 timbl 197: if (selector) {
198: *selector++ = 0; /* Terminate name */
199: host = strchr(selector, TAB);
200: if (host) {
201: *host++ = 0; /* Terminate selector */
202: port = strchr(host, TAB);
203: if (port) {
204: char *junk;
205: port[0] = ':'; /* delimit host a la W3 */
206: junk = strchr(port, TAB);
207: if (junk) *junk++ = 0; /* Chop port */
208: if ((port[1]=='0') && (!port[2]))
209: port[0] = 0; /* 0 means none */
210: } /* no port */
211: } /* host ok */
212: } /* selector ok */
213: } /* gtype and name ok */
214:
215: if (gtype == GOPHER_WWW) { /* Gopher pointer to W3 */
216: write_anchor(name, selector);
2.7 secret 217:
1.1 timbl 218: } else if (port) { /* Other types need port */
219: if (gtype == GOPHER_TELNET) {
220: if (*selector) sprintf(address, "telnet://%s@%s/",
2.7 secret 221: selector, host);
1.1 timbl 222: else sprintf(address, "telnet://%s/", host);
2.7 secret 223: }
224: else if (gtype == GOPHER_TN3270)
225: {
226: if (*selector)
227: sprintf(address, "tn3270://%s@%s/",
228: selector, host);
229: else
230: sprintf(address, "tn3270://%s/", host);
231: }
232: else { /* If parsed ok */
1.1 timbl 233: char *q;
234: char *p;
235: sprintf(address, "//%s/%c", host, gtype);
236: q = address+ strlen(address);
237: for(p=selector; *p; p++) { /* Encode selector string */
238: if (acceptable[*p]) *q++ = *p;
239: else {
240: *q++ = HEX_ESCAPE; /* Means hex coming */
241: *q++ = hex[(TOASCII(*p)) >> 4];
242: *q++ = hex[(TOASCII(*p)) & 15];
243: }
244: }
245: *q++ = 0; /* terminate address */
246: }
1.2 timbl 247: PUTS(" "); /* Prettier JW/TBL */
2.7 secret 248: /* Error response from Gopher doesn't deserve to
249: be a hyperlink. */
250: if (strcmp (address, "gopher://error.host:1/0"))
251: write_anchor(name, address);
252: else
253: PUTS(name);
254: PUTS("\n");
1.1 timbl 255: } else { /* parse error */
256: if (TRACE) fprintf(stderr,
257: "HTGopher: Bad menu item.\n");
1.2 timbl 258: PUTS(line);
259:
1.1 timbl 260: } /* parse error */
261:
262: p = line; /* Start again at beginning of line */
263:
264: } /* if end of line */
265:
266: } /* Loop over characters */
267:
1.2 timbl 268: END(HTML_MENU);
269: FREE_TARGET;
270:
2.11 timbl 271: HTInputSocket_free(isoc);
1.1 timbl 272: return;
273: }
2.11 timbl 274:
275:
2.7 secret 276: /* Parse a Gopher CSO document
277: ** ============================
278: **
279: ** Accepts an open socket to a CSO server waiting to send us
280: ** data and puts it on the screen in a reasonable manner.
281: **
282: ** Perhaps this data can be automatically linked to some
283: ** other source as well???
284: **
285: ** Taken from hacking by Lou Montulli@ukanaix.cc.ukans.edu
286: ** on XMosaic-1.1, and put on libwww 2.11 by Arthur Secret,
287: ** secret@dxcern.cern.ch .
288: */
289:
2.11 timbl 290: PRIVATE void parse_cso ARGS3 (
291: int, s,
292: CONST char *, arg,
293: HTParentAnchor *, anAnchor)
2.7 secret 294: {
295: char ch;
296: char line[BIG];
297: char *p = line;
298: char *second_colon, last_char='\0';
299: CONST char *title;
2.11 timbl 300: HTInputSocket * isoc = HTInputSocket_new(s);
2.7 secret 301:
302: title = HTAnchor_title(anAnchor);
303: START(HTML_H1);
304: PUTS("CSO Search Results");
305: END(HTML_H1);
306: START(HTML_PRE);
307:
308: /* start grabbing chars from the network */
309: while ((ch=NEXT_CHAR) != (char)EOF)
310: {
311: if (ch != '\n')
312: {
313: *p = ch; /* Put character in line */
314: if (p< &line[BIG-1]) p++;
315: }
316: else
317: {
318: *p++ = 0; /* Terminate line */
319: p = line; /* Scan it to parse it */
320:
321: /* OK we now have a line in 'p' lets parse it and
322: print it */
323:
324: /* Break on line that begins with a 2. It's the end of
325: * data.
326: */
327: if (*p == '2')
328: break;
329:
330: /* lines beginning with 5 are errors,
331: * print them and quit
332: */
333: if (*p == '5') {
334: START(HTML_H2);
335: PUTS(p+4);
336: END(HTML_H2);
337: break;
338: }
339:
340: if(*p == '-') {
341: /* data lines look like -200:#:
342: * where # is the search result number and can be
343: * multiple digits (infinate?)
344: * find the second colon and check the digit to the
345: * left of it to see if they are diferent
346: * if they are then a different person is starting.
347: * make this line an <h2>
348: */
349:
350: /* find the second_colon */
351: second_colon = strchr( strchr(p,':')+1, ':');
352:
353: if(second_colon != NULL) { /* error check */
354:
355: if (*(second_colon-1) != last_char)
356: /* print seperator */
357: {
358: END(HTML_PRE);
359: START(HTML_H2);
360: }
361:
362:
363: /* right now the record appears with the alias
364: * (first line)
365: * as the header and the rest as <pre> text
366: * It might look better with the name as the
367: * header and the rest as a <ul> with <li> tags
368: * I'm not sure whether the name field comes in any
369: * special order or if its even required in a
370: * record,
371: * so for now the first line is the header no
372: * matter
373: * what it is (it's almost always the alias)
374: * A <dl> with the first line as the <DT> and
375: * the rest as some form of <DD> might good also?
376: */
377:
378: /* print data */
379: PUTS(second_colon+1);
380: PUTS("\n");
381:
382: if (*(second_colon-1) != last_char)
383: /* end seperator */
384: {
385: END(HTML_H2);
386: START(HTML_PRE);
387: }
388:
389: /* save the char before the second colon
390: * for comparison on the next pass
391: */
392: last_char = *(second_colon-1) ;
393:
394: } /* end if second_colon */
395: } /* end if *p == '-' */
396: } /* if end of line */
397:
398: } /* Loop over characters */
399:
400: /* end the text block */
401: PUTS("\n");
402: END(HTML_PRE);
403: PUTS("\n");
404: FREE_TARGET;
2.11 timbl 405: HTInputSocket_free(isoc);
2.7 secret 406:
407: return; /* all done */
408: } /* end of procedure */
1.1 timbl 409:
410: /* Display a Gopher Index document
2.7 secret 411: ** -------------------------------
412: */
1.1 timbl 413:
414: PRIVATE void display_index ARGS2 (
2.7 secret 415: CONST char *, arg,
416: HTParentAnchor *,anAnchor)
1.1 timbl 417: {
1.2 timbl 418:
419: START(HTML_H1);
420: PUTS(arg);
2.7 secret 421: PUTS(" index");
1.2 timbl 422: END(HTML_H1);
2.7 secret 423: START(HTML_ISINDEX);
424: PUTS("\nThis is a searchable Gopher index.");
425: PUTS(" Please enter keywords to search for.\n");
426:
427: if (!HTAnchor_title(anAnchor))
428: HTAnchor_setTitle(anAnchor, arg);
1.2 timbl 429:
2.7 secret 430: FREE_TARGET;
431: return;
432: }
433:
434:
435: /* Display a CSO index document
436: ** -------------------------------
437: */
438:
439: PRIVATE void display_cso ARGS2 (
440: CONST char *, arg,
441: HTParentAnchor *,anAnchor)
442: {
443: START(HTML_H1);
444: PUTS(arg);
445: PUTS(" index");
446: END(HTML_H1);
447: START(HTML_ISINDEX);
448: PUTS("\nThis is a searchable index of a CSO database.\n");
449: PUTS(" Please enter keywords to search for. The keywords that you enter");
450: PUTS(" will allow you to search on a person's name in the database.\n");
451:
1.1 timbl 452: if (!HTAnchor_title(anAnchor))
1.2 timbl 453: HTAnchor_setTitle(anAnchor, arg);
1.1 timbl 454:
1.2 timbl 455: FREE_TARGET;
1.1 timbl 456: return;
457: }
458:
459:
460: /* De-escape a selector into a command
461: ** -----------------------------------
462: **
463: ** The % hex escapes are converted. Otheriwse, the string is copied.
464: */
465: PRIVATE void de_escape ARGS2(char *, command, CONST char *, selector)
466: {
467: CONST char * p = selector;
468: char * q = command;
469: if (command == NULL) outofmem(__FILE__, "HTLoadGopher");
470: while (*p) { /* Decode hex */
471: if (*p == HEX_ESCAPE) {
472: char c;
473: unsigned int b;
474: p++;
475: c = *p++;
476: b = from_hex(c);
477: c = *p++;
478: if (!c) break; /* Odd number of chars! */
479: *q++ = FROMASCII((b<<4) + from_hex(c));
480: } else {
481: *q++ = *p++; /* Record */
482: }
483: }
484: *q++ = 0; /* Terminate command */
485:
486: }
487:
488:
489: /* Load by name HTLoadGopher
490: ** ============
491: **
492: ** Bug: No decoding of strange data types as yet.
493: **
494: */
2.13 ! timbl 495: PUBLIC int HTLoadGopher ARGS1(HTRequest *, request)
1.1 timbl 496: {
2.13 ! timbl 497: CONST char * arg = HTAnchor_physical(request->anchor);
1.1 timbl 498: char *command; /* The whole command */
499: int status; /* tcp return */
500: char gtype; /* Gopher Node type */
501: char * selector; /* Selector string */
502: struct sockaddr_in soc_address; /* Binary network address */
503: struct sockaddr_in* sin = &soc_address;
504:
505: if (!acceptable_inited) init_acceptable();
506:
507: if (!arg) return -3; /* Bad if no name sepcified */
508: if (!*arg) return -2; /* Bad if name had zero length */
509:
510: if (TRACE) fprintf(stderr, "HTGopher: Looking for %s\n", arg);
511:
512:
513: /* Set up defaults:
514: */
515: sin->sin_family = AF_INET; /* Family, host order */
516: sin->sin_port = htons(GOPHER_PORT); /* Default: new port, */
517:
518: /* Get node name and optional port number:
519: */
520: {
521: char *p1 = HTParse(arg, "", PARSE_HOST);
522: int status = HTParseInet(sin, p1);
523: free(p1);
524: if (status) return status; /* Bad */
525: }
526:
527: /* Get entity type, and selector string.
528: */
529: {
530: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
531: gtype = '1'; /* Default = menu */
532: selector = p1;
533: if ((*selector++=='/') && (*selector)) { /* Skip first slash */
534: gtype = *selector++; /* Pick up gtype */
535: }
536: if (gtype == GOPHER_INDEX) {
537: char * query;
2.10 timbl 538: HTAnchor_setIndex(request->anchor); /* Search is allowed */
1.1 timbl 539: query = strchr(selector, '?'); /* Look for search string */
540: if (!query || !query[1]) { /* No search required */
2.11 timbl 541: target = HTML_new(request, NULL, WWW_HTML,
542: request->output_format, request->output_stream);
1.2 timbl 543: targetClass = *target->isa;
2.10 timbl 544: display_index(arg, request->anchor); /* Display "cover page" */
2.6 timbl 545: return HT_LOADED; /* Local function only */
1.1 timbl 546: }
547: *query++ = 0; /* Skip '?' */
548: command = malloc(strlen(selector)+ 1 + strlen(query)+ 2 + 1);
549: if (command == NULL) outofmem(__FILE__, "HTLoadGopher");
550:
551: de_escape(command, selector); /* Bug fix TBL 921208 */
552:
553: strcat(command, "\t");
554:
555: { /* Remove plus signs 921006 */
556: char *p;
557: for (p=query; *p; p++) {
558: if (*p == '+') *p = ' ';
559: }
560: }
561: strcat(command, query);
2.7 secret 562: } else if (gtype == GOPHER_CSO) {
563: char * query;
2.10 timbl 564: HTAnchor_setIndex(request->anchor); /* Search is allowed */
2.7 secret 565: query = strchr(selector, '?'); /* Look for search string */
566: if (!query || !query[1]) { /* No search required */
2.11 timbl 567: target = HTML_new(request, NULL, WWW_HTML,
568: request->output_format, request->output_stream);
2.7 secret 569: targetClass = *target->isa;
2.10 timbl 570: display_cso(arg, request->anchor); /* Display "cover page" */
2.7 secret 571: return HT_LOADED; /* Local function only */
572: }
573: *query++ = 0; /* Skip '?' */
574: command = malloc(strlen("query")+ 1 + strlen(query)+ 2 + 1);
575: if (command == NULL) outofmem(__FILE__, "HTLoadGopher");
576:
577: de_escape(command, selector); /* Bug fix TBL 921208 */
578:
579: strcpy(command, "query ");
580:
581: { /* Remove plus signs 921006 */
582: char *p;
583: for (p=query; *p; p++) {
584: if (*p == '+') *p = ' ';
585: }
586: }
587: strcat(command, query);
588:
1.1 timbl 589:
590: } else { /* Not index */
591: command = command = malloc(strlen(selector)+2+1);
592: de_escape(command, selector);
593: }
594: free(p1);
595: }
596:
1.3 timbl 597: {
598: char * p = command + strlen(command);
599: *p++ = CR; /* Macros to be correct on Mac */
600: *p++ = LF;
601: *p++ = 0;
602: /* strcat(command, "\r\n"); */ /* CR LF, as in rfc 977 */
603: }
1.1 timbl 604:
605: /* Set up a socket to the server for the data:
606: */
607: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
608: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
609: if (status<0){
610: if (TRACE) fprintf(stderr, "HTTPAccess: Unable to connect to remote host for `%s'.\n",
611: arg);
612: free(command);
613: return HTInetStatus("connect");
614: }
615:
616:
617: if (TRACE) fprintf(stderr, "HTGopher: Connected, writing command `%s' to socket %d\n", command, s);
618:
619: #ifdef NOT_ASCII
620: {
621: char * p;
622: for(p = command; *p; p++) {
623: *p = TOASCII(*p);
624: }
625: }
626: #endif
627:
628: status = NETWRITE(s, command, (int)strlen(command));
629: free(command);
630: if (status<0){
631: if (TRACE) fprintf(stderr, "HTGopher: Unable to send command.\n");
632: return HTInetStatus("send");
633: }
634:
635: /* Now read the data from the socket:
636: */
637: switch (gtype) {
638:
639: case GOPHER_HTML :
2.11 timbl 640: HTParseSocket(WWW_HTML, s, request);
1.2 timbl 641: break;
1.1 timbl 642:
1.3 timbl 643: case GOPHER_GIF:
644: case GOPHER_IMAGE:
2.11 timbl 645: HTParseSocket(HTAtom_for("image/gif"), s, request);
1.3 timbl 646: break;
1.1 timbl 647: case GOPHER_MENU :
648: case GOPHER_INDEX :
2.11 timbl 649: target = HTML_new(request, NULL, WWW_HTML,
650: request->output_format, request->output_stream);
1.2 timbl 651: targetClass = *target->isa;
2.11 timbl 652: parse_menu(s,arg, request->anchor);
1.2 timbl 653: break;
2.7 secret 654:
655: case GOPHER_CSO:
2.11 timbl 656: target = HTML_new(request, NULL, WWW_HTML,
657: request->output_format, request->output_stream);
2.7 secret 658: targetClass = *target->isa;
2.11 timbl 659: parse_cso(s, arg, request->anchor);
2.7 secret 660: break;
661:
662: case GOPHER_MACBINHEX:
663: case GOPHER_PCBINHEX:
664: case GOPHER_UUENCODED:
665: case GOPHER_BINARY:
666: /* Specifying WWW_UNKNOWN forces dump to local disk. */
2.11 timbl 667: HTParseSocket (WWW_UNKNOWN, s, request);
2.7 secret 668: break;
669:
1.1 timbl 670: case GOPHER_TEXT :
671: default: /* @@ parse as plain text */
2.11 timbl 672: HTParseSocket(WWW_PLAINTEXT, s, request);
2.7 secret 673: break;
674:
675: case GOPHER_SOUND :
2.11 timbl 676: HTParseSocket(WWW_AUDIO, s, request);
1.2 timbl 677: break;
678:
1.1 timbl 679: } /* switch(gtype) */
1.2 timbl 680:
681: NETCLOSE(s);
682: return HT_LOADED;
1.1 timbl 683: }
1.2 timbl 684:
2.10 timbl 685: GLOBALDEF PUBLIC HTProtocol HTGopher = { "gopher", HTLoadGopher, NULL, NULL };
1.1 timbl 686:
Webmaster