Annotation of libwww/Library/src/HTGopher.c, revision 2.11
1.1 timbl 1: /* GOPHER ACCESS HTGopher.c
2: ** =============
3: **
4: ** History:
5: ** 26 Sep 90 Adapted from other accesses (News, HTTP) TBL
6: ** 29 Nov 91 Downgraded to C, for portable implementation.
7: */
8:
1.2 timbl 9: /* Implements:
10: */
11: #include "HTGopher.h"
12:
1.3 timbl 13:
1.1 timbl 14: #define GOPHER_PORT 70 /* See protocol spec */
15: #define BIG 1024 /* Bug */
16: #define LINE_LENGTH 256 /* Bug */
17:
18: /* Gopher entity types:
19: */
20: #define GOPHER_TEXT '0'
21: #define GOPHER_MENU '1'
22: #define GOPHER_CSO '2'
23: #define GOPHER_ERROR '3'
24: #define GOPHER_MACBINHEX '4'
25: #define GOPHER_PCBINHEX '5'
26: #define GOPHER_UUENCODED '6'
27: #define GOPHER_INDEX '7'
28: #define GOPHER_TELNET '8'
2.7 secret 29: #define GOPHER_BINARY '9'
1.3 timbl 30: #define GOPHER_GIF 'g'
2.7 secret 31: #define GOPHER_HTML 'h' /* HTML */
32: #define GOPHER_SOUND 's'
33: #define GOPHER_WWW 'w' /* W3 address */
1.3 timbl 34: #define GOPHER_IMAGE 'I'
2.7 secret 35: #define GOPHER_TN3270 'T'
1.1 timbl 36: #define GOPHER_DUPLICATE '+'
37:
38: #include <ctype.h>
39: #include "HTUtils.h" /* Coding convention macros */
40: #include "tcp.h"
41:
42:
43: #include "HTParse.h"
44: #include "HTFormat.h"
45: #include "HTTCP.h"
46:
1.2 timbl 47: /* Hypertext object building machinery
48: */
49: #include "HTML.h"
50:
51: #define PUTC(c) (*targetClass.put_character)(target, c)
52: #define PUTS(s) (*targetClass.put_string)(target, s)
53: #define START(e) (*targetClass.start_element)(target, e, 0, 0)
54: #define END(e) (*targetClass.end_element)(target, e)
55: #define FREE_TARGET (*targetClass.free)(target)
56: struct _HTStructured {
57: CONST HTStructuredClass * isa;
58: /* ... */
59: };
60:
61: PRIVATE HTStructured *target; /* the new hypertext */
62: PRIVATE HTStructuredClass targetClass; /* Its action routines */
63:
64:
2.8 timbl 65: #define GOPHER_PROGRESS(foo) HTAlert(foo)
1.1 timbl 66:
67:
2.11 ! timbl 68: #define NEXT_CHAR HTInputSocket_getChararcter(isoc)
1.1 timbl 69:
70:
2.8 timbl 71:
1.1 timbl 72: /* Module-wide variables
73: */
74: PRIVATE int s; /* Socket for GopherHost */
75:
76:
1.2 timbl 77:
1.1 timbl 78: /* Matrix of allowed characters in filenames
79: ** -----------------------------------------
80: */
81:
82: PRIVATE BOOL acceptable[256];
83: PRIVATE BOOL acceptable_inited = NO;
84:
85: PRIVATE void init_acceptable NOARGS
86: {
87: unsigned int i;
88: char * good =
89: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$";
90: for(i=0; i<256; i++) acceptable[i] = NO;
91: for(;*good; good++) acceptable[(unsigned int)*good] = YES;
92: acceptable_inited = YES;
93: }
94:
95: PRIVATE CONST char hex[17] = "0123456789abcdef";
96:
97: /* Decdoe one hex character
98: */
99:
100: PRIVATE char from_hex ARGS1(char, c)
101: {
102: return (c>='0')&&(c<='9') ? c-'0'
103: : (c>='A')&&(c<='F') ? c-'A'+10
104: : (c>='a')&&(c<='f') ? c-'a'+10
105: : 0;
106: }
107:
108:
109:
110: /* Paste in an Anchor
111: ** ------------------
112: **
113: ** The title of the destination is set, as there is no way
114: ** of knowing what the title is when we arrive.
115: **
116: ** On entry,
117: ** HT is in append mode.
118: ** text points to the text to be put into the file, 0 terminated.
119: ** addr points to the hypertext refernce address 0 terminated.
120: */
121: PRIVATE void write_anchor ARGS2(CONST char *,text, CONST char *,addr)
122: {
1.2 timbl 123:
124:
125:
126: BOOL present[HTML_A_ATTRIBUTES];
127: CONST char * value[HTML_A_ATTRIBUTES];
1.1 timbl 128:
1.2 timbl 129: int i;
130:
131: for (i=0; i<HTML_A_ATTRIBUTES; i++) present[i]=0;
132: present[HTML_A_HREF] = YES;
133: value[HTML_A_HREF] = addr;
134: present[HTML_A_TITLE] = YES;
135: value[HTML_A_TITLE] = text;
136:
137: (*targetClass.start_element)(target, HTML_A, present, value);
1.1 timbl 138:
1.2 timbl 139: PUTS(text);
140: END(HTML_A);
1.1 timbl 141: }
142:
143:
144: /* Parse a Gopher Menu document
145: ** ============================
146: **
147: */
148:
2.11 ! timbl 149: PRIVATE void parse_menu ARGS3 (
! 150: int , s,
1.2 timbl 151: CONST char *, arg,
152: HTParentAnchor *, anAnchor)
1.1 timbl 153: {
154: char gtype;
155: char ch;
156: char line[BIG];
157: char address[BIG];
158: char *name, *selector; /* Gopher menu fields */
159: char *host;
160: char *port;
161: char *p = line;
1.2 timbl 162: CONST char *title;
2.11 ! timbl 163: HTInputSocket * isoc = HTINputSocket_new(s);
! 164:
1.1 timbl 165: #define TAB '\t'
166: #define HEX_ESCAPE '%'
167:
168:
1.2 timbl 169: title = HTAnchor_title(anAnchor);
170: if (title) {
171: START(HTML_H1);
172: PUTS(title);
173: END(HTML_H1);
174: } else
175: PUTS("Select one of:\n\n");
1.1 timbl 176:
1.2 timbl 177: START(HTML_MENU);
1.1 timbl 178: while ((ch=NEXT_CHAR) != (char)EOF) {
1.3 timbl 179: if (ch != LF) {
1.1 timbl 180: *p = ch; /* Put character in line */
181: if (p< &line[BIG-1]) p++;
182:
183: } else {
184: *p++ = 0; /* Terminate line */
185: p = line; /* Scan it to parse it */
186: port = 0; /* Flag "not parsed" */
187: if (TRACE) fprintf(stderr, "HTGopher: Menu item: %s\n", line);
188: gtype = *p++;
189:
190: /* Break on line with a dot by itself */
191: if ((gtype=='.') && ((*p=='\r') || (*p==0))) break;
192:
193: if (gtype && *p) {
194: name = p;
195: selector = strchr(name, TAB);
1.3 timbl 196: START(HTML_LI);
1.1 timbl 197: if (selector) {
198: *selector++ = 0; /* Terminate name */
199: host = strchr(selector, TAB);
200: if (host) {
201: *host++ = 0; /* Terminate selector */
202: port = strchr(host, TAB);
203: if (port) {
204: char *junk;
205: port[0] = ':'; /* delimit host a la W3 */
206: junk = strchr(port, TAB);
207: if (junk) *junk++ = 0; /* Chop port */
208: if ((port[1]=='0') && (!port[2]))
209: port[0] = 0; /* 0 means none */
210: } /* no port */
211: } /* host ok */
212: } /* selector ok */
213: } /* gtype and name ok */
214:
215: if (gtype == GOPHER_WWW) { /* Gopher pointer to W3 */
216: write_anchor(name, selector);
2.7 secret 217:
1.1 timbl 218: } else if (port) { /* Other types need port */
219: if (gtype == GOPHER_TELNET) {
220: if (*selector) sprintf(address, "telnet://%s@%s/",
2.7 secret 221: selector, host);
1.1 timbl 222: else sprintf(address, "telnet://%s/", host);
2.7 secret 223: }
224: else if (gtype == GOPHER_TN3270)
225: {
226: if (*selector)
227: sprintf(address, "tn3270://%s@%s/",
228: selector, host);
229: else
230: sprintf(address, "tn3270://%s/", host);
231: }
232: else { /* If parsed ok */
1.1 timbl 233: char *q;
234: char *p;
235: sprintf(address, "//%s/%c", host, gtype);
236: q = address+ strlen(address);
237: for(p=selector; *p; p++) { /* Encode selector string */
238: if (acceptable[*p]) *q++ = *p;
239: else {
240: *q++ = HEX_ESCAPE; /* Means hex coming */
241: *q++ = hex[(TOASCII(*p)) >> 4];
242: *q++ = hex[(TOASCII(*p)) & 15];
243: }
244: }
245: *q++ = 0; /* terminate address */
246: }
1.2 timbl 247: PUTS(" "); /* Prettier JW/TBL */
2.7 secret 248: /* Error response from Gopher doesn't deserve to
249: be a hyperlink. */
250: if (strcmp (address, "gopher://error.host:1/0"))
251: write_anchor(name, address);
252: else
253: PUTS(name);
254: PUTS("\n");
1.1 timbl 255: } else { /* parse error */
256: if (TRACE) fprintf(stderr,
257: "HTGopher: Bad menu item.\n");
1.2 timbl 258: PUTS(line);
259:
1.1 timbl 260: } /* parse error */
261:
262: p = line; /* Start again at beginning of line */
263:
264: } /* if end of line */
265:
266: } /* Loop over characters */
267:
1.2 timbl 268: END(HTML_MENU);
269: FREE_TARGET;
270:
2.11 ! timbl 271: HTInputSocket_free(isoc);
1.1 timbl 272: return;
273: }
2.11 ! timbl 274:
! 275:
2.7 secret 276: /* Parse a Gopher CSO document
277: ** ============================
278: **
279: ** Accepts an open socket to a CSO server waiting to send us
280: ** data and puts it on the screen in a reasonable manner.
281: **
282: ** Perhaps this data can be automatically linked to some
283: ** other source as well???
284: **
285: ** Taken from hacking by Lou Montulli@ukanaix.cc.ukans.edu
286: ** on XMosaic-1.1, and put on libwww 2.11 by Arthur Secret,
287: ** secret@dxcern.cern.ch .
288: */
289:
2.11 ! timbl 290: PRIVATE void parse_cso ARGS3 (
! 291: int, s,
! 292: CONST char *, arg,
! 293: HTParentAnchor *, anAnchor)
2.7 secret 294: {
295: char ch;
296: char line[BIG];
297: char *p = line;
298: char *second_colon, last_char='\0';
299: CONST char *title;
2.11 ! timbl 300: HTInputSocket * isoc = HTInputSocket_new(s);
2.7 secret 301:
302: title = HTAnchor_title(anAnchor);
303: START(HTML_H1);
304: PUTS("CSO Search Results");
305: END(HTML_H1);
306: START(HTML_PRE);
307:
308: /* start grabbing chars from the network */
309: while ((ch=NEXT_CHAR) != (char)EOF)
310: {
311: if (ch != '\n')
312: {
313: *p = ch; /* Put character in line */
314: if (p< &line[BIG-1]) p++;
315: }
316: else
317: {
318: *p++ = 0; /* Terminate line */
319: p = line; /* Scan it to parse it */
320:
321: /* OK we now have a line in 'p' lets parse it and
322: print it */
323:
324: /* Break on line that begins with a 2. It's the end of
325: * data.
326: */
327: if (*p == '2')
328: break;
329:
330: /* lines beginning with 5 are errors,
331: * print them and quit
332: */
333: if (*p == '5') {
334: START(HTML_H2);
335: PUTS(p+4);
336: END(HTML_H2);
337: break;
338: }
339:
340: if(*p == '-') {
341: /* data lines look like -200:#:
342: * where # is the search result number and can be
343: * multiple digits (infinate?)
344: * find the second colon and check the digit to the
345: * left of it to see if they are diferent
346: * if they are then a different person is starting.
347: * make this line an <h2>
348: */
349:
350: /* find the second_colon */
351: second_colon = strchr( strchr(p,':')+1, ':');
352:
353: if(second_colon != NULL) { /* error check */
354:
355: if (*(second_colon-1) != last_char)
356: /* print seperator */
357: {
358: END(HTML_PRE);
359: START(HTML_H2);
360: }
361:
362:
363: /* right now the record appears with the alias
364: * (first line)
365: * as the header and the rest as <pre> text
366: * It might look better with the name as the
367: * header and the rest as a <ul> with <li> tags
368: * I'm not sure whether the name field comes in any
369: * special order or if its even required in a
370: * record,
371: * so for now the first line is the header no
372: * matter
373: * what it is (it's almost always the alias)
374: * A <dl> with the first line as the <DT> and
375: * the rest as some form of <DD> might good also?
376: */
377:
378: /* print data */
379: PUTS(second_colon+1);
380: PUTS("\n");
381:
382: if (*(second_colon-1) != last_char)
383: /* end seperator */
384: {
385: END(HTML_H2);
386: START(HTML_PRE);
387: }
388:
389: /* save the char before the second colon
390: * for comparison on the next pass
391: */
392: last_char = *(second_colon-1) ;
393:
394: } /* end if second_colon */
395: } /* end if *p == '-' */
396: } /* if end of line */
397:
398: } /* Loop over characters */
399:
400: /* end the text block */
401: PUTS("\n");
402: END(HTML_PRE);
403: PUTS("\n");
404: FREE_TARGET;
2.11 ! timbl 405: HTInputSocket_free(isoc);
2.7 secret 406:
407: return; /* all done */
408: } /* end of procedure */
1.1 timbl 409:
410: /* Display a Gopher Index document
2.7 secret 411: ** -------------------------------
412: */
1.1 timbl 413:
414: PRIVATE void display_index ARGS2 (
2.7 secret 415: CONST char *, arg,
416: HTParentAnchor *,anAnchor)
1.1 timbl 417: {
1.2 timbl 418:
419: START(HTML_H1);
420: PUTS(arg);
2.7 secret 421: PUTS(" index");
1.2 timbl 422: END(HTML_H1);
2.7 secret 423: START(HTML_ISINDEX);
424: PUTS("\nThis is a searchable Gopher index.");
425: PUTS(" Please enter keywords to search for.\n");
426:
427: if (!HTAnchor_title(anAnchor))
428: HTAnchor_setTitle(anAnchor, arg);
1.2 timbl 429:
2.7 secret 430: FREE_TARGET;
431: return;
432: }
433:
434:
435: /* Display a CSO index document
436: ** -------------------------------
437: */
438:
439: PRIVATE void display_cso ARGS2 (
440: CONST char *, arg,
441: HTParentAnchor *,anAnchor)
442: {
443: START(HTML_H1);
444: PUTS(arg);
445: PUTS(" index");
446: END(HTML_H1);
447: START(HTML_ISINDEX);
448: PUTS("\nThis is a searchable index of a CSO database.\n");
449: PUTS(" Please enter keywords to search for. The keywords that you enter");
450: PUTS(" will allow you to search on a person's name in the database.\n");
451:
1.1 timbl 452: if (!HTAnchor_title(anAnchor))
1.2 timbl 453: HTAnchor_setTitle(anAnchor, arg);
1.1 timbl 454:
1.2 timbl 455: FREE_TARGET;
1.1 timbl 456: return;
457: }
458:
459:
460: /* De-escape a selector into a command
461: ** -----------------------------------
462: **
463: ** The % hex escapes are converted. Otheriwse, the string is copied.
464: */
465: PRIVATE void de_escape ARGS2(char *, command, CONST char *, selector)
466: {
467: CONST char * p = selector;
468: char * q = command;
469: if (command == NULL) outofmem(__FILE__, "HTLoadGopher");
470: while (*p) { /* Decode hex */
471: if (*p == HEX_ESCAPE) {
472: char c;
473: unsigned int b;
474: p++;
475: c = *p++;
476: b = from_hex(c);
477: c = *p++;
478: if (!c) break; /* Odd number of chars! */
479: *q++ = FROMASCII((b<<4) + from_hex(c));
480: } else {
481: *q++ = *p++; /* Record */
482: }
483: }
484: *q++ = 0; /* Terminate command */
485:
486: }
487:
488:
489: /* Load by name HTLoadGopher
490: ** ============
491: **
492: ** Bug: No decoding of strange data types as yet.
493: **
494: */
2.10 timbl 495: PUBLIC int HTLoadGopher ARGS2(
1.2 timbl 496: CONST char *, arg,
2.10 timbl 497: HTRequest *, request)
1.1 timbl 498: {
499: char *command; /* The whole command */
500: int status; /* tcp return */
501: char gtype; /* Gopher Node type */
502: char * selector; /* Selector string */
2.11 ! timbl 503: HTInputSocket * isoc; /* Buffers for reading socket */
1.1 timbl 504: struct sockaddr_in soc_address; /* Binary network address */
505: struct sockaddr_in* sin = &soc_address;
506:
507: if (!acceptable_inited) init_acceptable();
508:
509: if (!arg) return -3; /* Bad if no name sepcified */
510: if (!*arg) return -2; /* Bad if name had zero length */
511:
512: if (TRACE) fprintf(stderr, "HTGopher: Looking for %s\n", arg);
513:
514:
515: /* Set up defaults:
516: */
517: sin->sin_family = AF_INET; /* Family, host order */
518: sin->sin_port = htons(GOPHER_PORT); /* Default: new port, */
519:
520: /* Get node name and optional port number:
521: */
522: {
523: char *p1 = HTParse(arg, "", PARSE_HOST);
524: int status = HTParseInet(sin, p1);
525: free(p1);
526: if (status) return status; /* Bad */
527: }
528:
529: /* Get entity type, and selector string.
530: */
531: {
532: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
533: gtype = '1'; /* Default = menu */
534: selector = p1;
535: if ((*selector++=='/') && (*selector)) { /* Skip first slash */
536: gtype = *selector++; /* Pick up gtype */
537: }
538: if (gtype == GOPHER_INDEX) {
539: char * query;
2.10 timbl 540: HTAnchor_setIndex(request->anchor); /* Search is allowed */
1.1 timbl 541: query = strchr(selector, '?'); /* Look for search string */
542: if (!query || !query[1]) { /* No search required */
2.11 ! timbl 543: target = HTML_new(request, NULL, WWW_HTML,
! 544: request->output_format, request->output_stream);
1.2 timbl 545: targetClass = *target->isa;
2.10 timbl 546: display_index(arg, request->anchor); /* Display "cover page" */
2.6 timbl 547: return HT_LOADED; /* Local function only */
1.1 timbl 548: }
549: *query++ = 0; /* Skip '?' */
550: command = malloc(strlen(selector)+ 1 + strlen(query)+ 2 + 1);
551: if (command == NULL) outofmem(__FILE__, "HTLoadGopher");
552:
553: de_escape(command, selector); /* Bug fix TBL 921208 */
554:
555: strcat(command, "\t");
556:
557: { /* Remove plus signs 921006 */
558: char *p;
559: for (p=query; *p; p++) {
560: if (*p == '+') *p = ' ';
561: }
562: }
563: strcat(command, query);
2.7 secret 564: } else if (gtype == GOPHER_CSO) {
565: char * query;
2.10 timbl 566: HTAnchor_setIndex(request->anchor); /* Search is allowed */
2.7 secret 567: query = strchr(selector, '?'); /* Look for search string */
568: if (!query || !query[1]) { /* No search required */
2.11 ! timbl 569: target = HTML_new(request, NULL, WWW_HTML,
! 570: request->output_format, request->output_stream);
2.7 secret 571: targetClass = *target->isa;
2.10 timbl 572: display_cso(arg, request->anchor); /* Display "cover page" */
2.7 secret 573: return HT_LOADED; /* Local function only */
574: }
575: *query++ = 0; /* Skip '?' */
576: command = malloc(strlen("query")+ 1 + strlen(query)+ 2 + 1);
577: if (command == NULL) outofmem(__FILE__, "HTLoadGopher");
578:
579: de_escape(command, selector); /* Bug fix TBL 921208 */
580:
581: strcpy(command, "query ");
582:
583: { /* Remove plus signs 921006 */
584: char *p;
585: for (p=query; *p; p++) {
586: if (*p == '+') *p = ' ';
587: }
588: }
589: strcat(command, query);
590:
1.1 timbl 591:
592: } else { /* Not index */
593: command = command = malloc(strlen(selector)+2+1);
594: de_escape(command, selector);
595: }
596: free(p1);
597: }
598:
1.3 timbl 599: {
600: char * p = command + strlen(command);
601: *p++ = CR; /* Macros to be correct on Mac */
602: *p++ = LF;
603: *p++ = 0;
604: /* strcat(command, "\r\n"); */ /* CR LF, as in rfc 977 */
605: }
1.1 timbl 606:
607: /* Set up a socket to the server for the data:
608: */
609: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
610: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
611: if (status<0){
612: if (TRACE) fprintf(stderr, "HTTPAccess: Unable to connect to remote host for `%s'.\n",
613: arg);
614: free(command);
615: return HTInetStatus("connect");
616: }
617:
618:
619: if (TRACE) fprintf(stderr, "HTGopher: Connected, writing command `%s' to socket %d\n", command, s);
620:
621: #ifdef NOT_ASCII
622: {
623: char * p;
624: for(p = command; *p; p++) {
625: *p = TOASCII(*p);
626: }
627: }
628: #endif
629:
630: status = NETWRITE(s, command, (int)strlen(command));
631: free(command);
632: if (status<0){
633: if (TRACE) fprintf(stderr, "HTGopher: Unable to send command.\n");
634: return HTInetStatus("send");
635: }
636:
637: /* Now read the data from the socket:
638: */
639: switch (gtype) {
640:
641: case GOPHER_HTML :
2.11 ! timbl 642: HTParseSocket(WWW_HTML, s, request);
1.2 timbl 643: break;
1.1 timbl 644:
1.3 timbl 645: case GOPHER_GIF:
646: case GOPHER_IMAGE:
2.11 ! timbl 647: HTParseSocket(HTAtom_for("image/gif"), s, request);
1.3 timbl 648: break;
1.1 timbl 649: case GOPHER_MENU :
650: case GOPHER_INDEX :
2.11 ! timbl 651: target = HTML_new(request, NULL, WWW_HTML,
! 652: request->output_format, request->output_stream);
1.2 timbl 653: targetClass = *target->isa;
2.11 ! timbl 654: parse_menu(s,arg, request->anchor);
1.2 timbl 655: break;
2.7 secret 656:
657: case GOPHER_CSO:
2.11 ! timbl 658: target = HTML_new(request, NULL, WWW_HTML,
! 659: request->output_format, request->output_stream);
2.7 secret 660: targetClass = *target->isa;
2.11 ! timbl 661: parse_cso(s, arg, request->anchor);
2.7 secret 662: break;
663:
664: case GOPHER_MACBINHEX:
665: case GOPHER_PCBINHEX:
666: case GOPHER_UUENCODED:
667: case GOPHER_BINARY:
668: /* Specifying WWW_UNKNOWN forces dump to local disk. */
2.11 ! timbl 669: HTParseSocket (WWW_UNKNOWN, s, request);
2.7 secret 670: break;
671:
1.1 timbl 672: case GOPHER_TEXT :
673: default: /* @@ parse as plain text */
2.11 ! timbl 674: HTParseSocket(WWW_PLAINTEXT, s, request);
2.7 secret 675: break;
676:
677: case GOPHER_SOUND :
2.11 ! timbl 678: HTParseSocket(WWW_AUDIO, s, request);
1.2 timbl 679: break;
680:
1.1 timbl 681: } /* switch(gtype) */
1.2 timbl 682:
683: NETCLOSE(s);
684: return HT_LOADED;
1.1 timbl 685: }
1.2 timbl 686:
2.10 timbl 687: GLOBALDEF PUBLIC HTProtocol HTGopher = { "gopher", HTLoadGopher, NULL, NULL };
1.1 timbl 688:
Webmaster