Annotation of libwww/Library/src/HTGopher.c, revision 2.6
1.1 timbl 1: /* GOPHER ACCESS HTGopher.c
2: ** =============
3: **
4: ** History:
5: ** 26 Sep 90 Adapted from other accesses (News, HTTP) TBL
6: ** 29 Nov 91 Downgraded to C, for portable implementation.
7: */
8:
1.2 timbl 9: /* Implements:
10: */
11: #include "HTGopher.h"
12:
1.3 timbl 13: #define CR FROMASCII('\015') /* Carriage return */
14: #define LF FROMASCII('\012') /* ASCII line feed
15: (sometimes \n is CR on Mac) */
16:
1.1 timbl 17: #define GOPHER_PORT 70 /* See protocol spec */
18: #define BIG 1024 /* Bug */
19: #define LINE_LENGTH 256 /* Bug */
20:
21: /* Gopher entity types:
22: */
23: #define GOPHER_TEXT '0'
24: #define GOPHER_MENU '1'
25: #define GOPHER_CSO '2'
26: #define GOPHER_ERROR '3'
27: #define GOPHER_MACBINHEX '4'
28: #define GOPHER_PCBINHEX '5'
29: #define GOPHER_UUENCODED '6'
30: #define GOPHER_INDEX '7'
31: #define GOPHER_TELNET '8'
1.3 timbl 32: #define GOPHER_GIF 'g'
1.1 timbl 33: #define GOPHER_HTML 'h' /* HTML */
1.3 timbl 34: #define GOPHER_IMAGE 'I'
1.1 timbl 35: #define GOPHER_DUPLICATE '+'
36: #define GOPHER_WWW 'w' /* W3 address */
37:
38: #include <ctype.h>
39: #include "HTUtils.h" /* Coding convention macros */
40: #include "tcp.h"
41:
42:
43: #include "HTParse.h"
44: #include "HTFormat.h"
45: #include "HTTCP.h"
46:
1.2 timbl 47: /* Hypertext object building machinery
48: */
49: #include "HTML.h"
50:
51: #define PUTC(c) (*targetClass.put_character)(target, c)
52: #define PUTS(s) (*targetClass.put_string)(target, s)
53: #define START(e) (*targetClass.start_element)(target, e, 0, 0)
54: #define END(e) (*targetClass.end_element)(target, e)
55: #define END_TARGET (*targetClass.end_document)(target)
56: #define FREE_TARGET (*targetClass.free)(target)
57: struct _HTStructured {
58: CONST HTStructuredClass * isa;
59: /* ... */
60: };
61:
62: PRIVATE HTStructured *target; /* the new hypertext */
63: PRIVATE HTStructuredClass targetClass; /* Its action routines */
64:
65:
1.1 timbl 66: #ifdef NeXTStep
67: #include <appkit/defaults.h>
68: #define GOPHER_PROGRESS(foo)
69: #else
70: #define GOPHER_PROGRESS(foo) fprintf(stderr, "%s\n", (foo))
71: #endif
72:
73: #define NEXT_CHAR HTGetChararcter()
74:
75:
76:
77: /* Module-wide variables
78: */
79: PRIVATE int s; /* Socket for GopherHost */
80:
81:
1.2 timbl 82:
1.1 timbl 83: /* Matrix of allowed characters in filenames
84: ** -----------------------------------------
85: */
86:
87: PRIVATE BOOL acceptable[256];
88: PRIVATE BOOL acceptable_inited = NO;
89:
90: PRIVATE void init_acceptable NOARGS
91: {
92: unsigned int i;
93: char * good =
94: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$";
95: for(i=0; i<256; i++) acceptable[i] = NO;
96: for(;*good; good++) acceptable[(unsigned int)*good] = YES;
97: acceptable_inited = YES;
98: }
99:
100: PRIVATE CONST char hex[17] = "0123456789abcdef";
101:
102: /* Decdoe one hex character
103: */
104:
105: PRIVATE char from_hex ARGS1(char, c)
106: {
107: return (c>='0')&&(c<='9') ? c-'0'
108: : (c>='A')&&(c<='F') ? c-'A'+10
109: : (c>='a')&&(c<='f') ? c-'a'+10
110: : 0;
111: }
112:
113:
114:
115: /* Paste in an Anchor
116: ** ------------------
117: **
118: ** The title of the destination is set, as there is no way
119: ** of knowing what the title is when we arrive.
120: **
121: ** On entry,
122: ** HT is in append mode.
123: ** text points to the text to be put into the file, 0 terminated.
124: ** addr points to the hypertext refernce address 0 terminated.
125: */
126: PRIVATE void write_anchor ARGS2(CONST char *,text, CONST char *,addr)
127: {
1.2 timbl 128:
129:
130:
131: BOOL present[HTML_A_ATTRIBUTES];
132: CONST char * value[HTML_A_ATTRIBUTES];
1.1 timbl 133:
1.2 timbl 134: int i;
135:
136: for (i=0; i<HTML_A_ATTRIBUTES; i++) present[i]=0;
137: present[HTML_A_HREF] = YES;
138: value[HTML_A_HREF] = addr;
139: present[HTML_A_TITLE] = YES;
140: value[HTML_A_TITLE] = text;
141:
142: (*targetClass.start_element)(target, HTML_A, present, value);
1.1 timbl 143:
1.2 timbl 144: PUTS(text);
145: END(HTML_A);
1.1 timbl 146: }
147:
148:
149: /* Parse a Gopher Menu document
150: ** ============================
151: **
152: */
153:
154: PRIVATE void parse_menu ARGS2 (
1.2 timbl 155: CONST char *, arg,
156: HTParentAnchor *, anAnchor)
1.1 timbl 157: {
158: char gtype;
159: char ch;
160: char line[BIG];
161: char address[BIG];
162: char *name, *selector; /* Gopher menu fields */
163: char *host;
164: char *port;
165: char *p = line;
1.2 timbl 166: CONST char *title;
1.1 timbl 167:
168: #define TAB '\t'
169: #define HEX_ESCAPE '%'
170:
171:
1.2 timbl 172: title = HTAnchor_title(anAnchor);
173: if (title) {
174: START(HTML_H1);
175: PUTS(title);
176: END(HTML_H1);
177: } else
178: PUTS("Select one of:\n\n");
1.1 timbl 179:
1.2 timbl 180: START(HTML_MENU);
1.1 timbl 181: while ((ch=NEXT_CHAR) != (char)EOF) {
1.3 timbl 182: if (ch != LF) {
1.1 timbl 183: *p = ch; /* Put character in line */
184: if (p< &line[BIG-1]) p++;
185:
186: } else {
187: *p++ = 0; /* Terminate line */
188: p = line; /* Scan it to parse it */
189: port = 0; /* Flag "not parsed" */
190: if (TRACE) fprintf(stderr, "HTGopher: Menu item: %s\n", line);
191: gtype = *p++;
192:
193: /* Break on line with a dot by itself */
194: if ((gtype=='.') && ((*p=='\r') || (*p==0))) break;
195:
196: if (gtype && *p) {
197: name = p;
198: selector = strchr(name, TAB);
1.3 timbl 199: START(HTML_LI);
1.1 timbl 200: if (selector) {
201: *selector++ = 0; /* Terminate name */
202: host = strchr(selector, TAB);
203: if (host) {
204: *host++ = 0; /* Terminate selector */
205: port = strchr(host, TAB);
206: if (port) {
207: char *junk;
208: port[0] = ':'; /* delimit host a la W3 */
209: junk = strchr(port, TAB);
210: if (junk) *junk++ = 0; /* Chop port */
211: if ((port[1]=='0') && (!port[2]))
212: port[0] = 0; /* 0 means none */
213: } /* no port */
214: } /* host ok */
215: } /* selector ok */
216: } /* gtype and name ok */
217:
218: if (gtype == GOPHER_WWW) { /* Gopher pointer to W3 */
219: write_anchor(name, selector);
220:
221: } else if (port) { /* Other types need port */
222: if (gtype == GOPHER_TELNET) {
223: if (*selector) sprintf(address, "telnet://%s@%s/",
224: selector, host);
225: else sprintf(address, "telnet://%s/", host);
226:
227: } else { /* If parsed ok */
228: char *q;
229: char *p;
230: sprintf(address, "//%s/%c", host, gtype);
231: q = address+ strlen(address);
232: for(p=selector; *p; p++) { /* Encode selector string */
233: if (acceptable[*p]) *q++ = *p;
234: else {
235: *q++ = HEX_ESCAPE; /* Means hex coming */
236: *q++ = hex[(TOASCII(*p)) >> 4];
237: *q++ = hex[(TOASCII(*p)) & 15];
238: }
239: }
240: *q++ = 0; /* terminate address */
241: }
1.2 timbl 242: PUTS(" "); /* Prettier JW/TBL */
1.1 timbl 243: write_anchor(name, address);
1.2 timbl 244:
1.1 timbl 245: } else { /* parse error */
246: if (TRACE) fprintf(stderr,
247: "HTGopher: Bad menu item.\n");
1.2 timbl 248: PUTS(line);
249:
1.1 timbl 250: } /* parse error */
251:
252: p = line; /* Start again at beginning of line */
253:
254: } /* if end of line */
255:
256: } /* Loop over characters */
257:
1.2 timbl 258: END(HTML_MENU);
259: END_TARGET;
260: FREE_TARGET;
261:
1.1 timbl 262: return;
263: }
264:
265: /* Display a Gopher Index document
266: ** -------------------------------
267: */
268:
269: PRIVATE void display_index ARGS2 (
270: CONST char *, arg,
271: HTParentAnchor *,anAnchor)
272: {
1.2 timbl 273:
274: START(HTML_H1);
275: PUTS(arg);
276: END(HTML_H1);
277:
278: PUTS("\nPlease enter words to search for.\n");
1.1 timbl 279:
280: if (!HTAnchor_title(anAnchor))
1.2 timbl 281: HTAnchor_setTitle(anAnchor, arg);
1.1 timbl 282:
1.2 timbl 283: END_TARGET;
284: FREE_TARGET;
1.1 timbl 285: return;
286: }
287:
288:
289: /* De-escape a selector into a command
290: ** -----------------------------------
291: **
292: ** The % hex escapes are converted. Otheriwse, the string is copied.
293: */
294: PRIVATE void de_escape ARGS2(char *, command, CONST char *, selector)
295: {
296: CONST char * p = selector;
297: char * q = command;
298: if (command == NULL) outofmem(__FILE__, "HTLoadGopher");
299: while (*p) { /* Decode hex */
300: if (*p == HEX_ESCAPE) {
301: char c;
302: unsigned int b;
303: p++;
304: c = *p++;
305: b = from_hex(c);
306: c = *p++;
307: if (!c) break; /* Odd number of chars! */
308: *q++ = FROMASCII((b<<4) + from_hex(c));
309: } else {
310: *q++ = *p++; /* Record */
311: }
312: }
313: *q++ = 0; /* Terminate command */
314:
315: }
316:
317:
318: /* Load by name HTLoadGopher
319: ** ============
320: **
321: ** Bug: No decoding of strange data types as yet.
322: **
323: */
1.2 timbl 324: PUBLIC int HTLoadGopher ARGS4(
325: CONST char *, arg,
326: HTParentAnchor *, anAnchor,
327: HTFormat, format_out,
328: HTStream*, sink)
1.1 timbl 329: {
330: char *command; /* The whole command */
331: int status; /* tcp return */
332: char gtype; /* Gopher Node type */
333: char * selector; /* Selector string */
334:
335: struct sockaddr_in soc_address; /* Binary network address */
336: struct sockaddr_in* sin = &soc_address;
337:
338: if (!acceptable_inited) init_acceptable();
339:
340: if (!arg) return -3; /* Bad if no name sepcified */
341: if (!*arg) return -2; /* Bad if name had zero length */
342:
343: if (TRACE) fprintf(stderr, "HTGopher: Looking for %s\n", arg);
344:
345:
346: /* Set up defaults:
347: */
348: sin->sin_family = AF_INET; /* Family, host order */
349: sin->sin_port = htons(GOPHER_PORT); /* Default: new port, */
350:
351: /* Get node name and optional port number:
352: */
353: {
354: char *p1 = HTParse(arg, "", PARSE_HOST);
355: int status = HTParseInet(sin, p1);
356: free(p1);
357: if (status) return status; /* Bad */
358: }
359:
360: /* Get entity type, and selector string.
361: */
362: {
363: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
364: gtype = '1'; /* Default = menu */
365: selector = p1;
366: if ((*selector++=='/') && (*selector)) { /* Skip first slash */
367: gtype = *selector++; /* Pick up gtype */
368: }
369: if (gtype == GOPHER_INDEX) {
370: char * query;
371: HTAnchor_setIndex(anAnchor); /* Search is allowed */
372: query = strchr(selector, '?'); /* Look for search string */
373: if (!query || !query[1]) { /* No search required */
1.3 timbl 374: target = HTML_new(anAnchor, format_out, sink);
1.2 timbl 375: targetClass = *target->isa;
1.1 timbl 376: display_index(arg, anAnchor); /* Display "cover page" */
2.6 ! timbl 377: return HT_LOADED; /* Local function only */
1.1 timbl 378: }
379: *query++ = 0; /* Skip '?' */
380: command = malloc(strlen(selector)+ 1 + strlen(query)+ 2 + 1);
381: if (command == NULL) outofmem(__FILE__, "HTLoadGopher");
382:
383: de_escape(command, selector); /* Bug fix TBL 921208 */
384:
385: strcat(command, "\t");
386:
387: { /* Remove plus signs 921006 */
388: char *p;
389: for (p=query; *p; p++) {
390: if (*p == '+') *p = ' ';
391: }
392: }
393: strcat(command, query);
394:
395: } else { /* Not index */
396: command = command = malloc(strlen(selector)+2+1);
397: de_escape(command, selector);
398: }
399: free(p1);
400: }
401:
1.3 timbl 402: {
403: char * p = command + strlen(command);
404: *p++ = CR; /* Macros to be correct on Mac */
405: *p++ = LF;
406: *p++ = 0;
407: /* strcat(command, "\r\n"); */ /* CR LF, as in rfc 977 */
408: }
1.1 timbl 409:
410: /* Set up a socket to the server for the data:
411: */
412: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
413: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
414: if (status<0){
415: if (TRACE) fprintf(stderr, "HTTPAccess: Unable to connect to remote host for `%s'.\n",
416: arg);
417: free(command);
418: return HTInetStatus("connect");
419: }
420:
421: HTInitInput(s); /* Set up input buffering */
422:
423: if (TRACE) fprintf(stderr, "HTGopher: Connected, writing command `%s' to socket %d\n", command, s);
424:
425: #ifdef NOT_ASCII
426: {
427: char * p;
428: for(p = command; *p; p++) {
429: *p = TOASCII(*p);
430: }
431: }
432: #endif
433:
434: status = NETWRITE(s, command, (int)strlen(command));
435: free(command);
436: if (status<0){
437: if (TRACE) fprintf(stderr, "HTGopher: Unable to send command.\n");
438: return HTInetStatus("send");
439: }
440:
441: /* Now read the data from the socket:
442: */
443: switch (gtype) {
444:
445: case GOPHER_HTML :
1.2 timbl 446: HTParseSocket(WWW_HTML, format_out, anAnchor, s, sink);
447: break;
1.1 timbl 448:
1.3 timbl 449: case GOPHER_GIF:
450: case GOPHER_IMAGE:
451: HTParseSocket(HTAtom_for("image/gif"),
452: format_out, anAnchor, s, sink);
453: break;
1.1 timbl 454: case GOPHER_MENU :
455: case GOPHER_INDEX :
1.3 timbl 456: target = HTML_new(anAnchor, format_out, sink);
1.2 timbl 457: targetClass = *target->isa;
1.1 timbl 458: parse_menu(arg, anAnchor);
1.2 timbl 459: break;
460:
1.1 timbl 461: case GOPHER_TEXT :
462: default: /* @@ parse as plain text */
1.2 timbl 463: HTParseSocket(WWW_PLAINTEXT, format_out, anAnchor, s, sink);
464: break;
465:
1.1 timbl 466: } /* switch(gtype) */
1.2 timbl 467:
468: NETCLOSE(s);
469: return HT_LOADED;
1.1 timbl 470: }
1.2 timbl 471:
472: PUBLIC HTProtocol HTGopher = { "gopher", HTLoadGopher, NULL };
1.1 timbl 473:
Webmaster