Annotation of libwww/Library/src/HTWSRC.c, revision 2.27
2.14 frystyk 1: /* HTWSRC.c
2: ** PARSE WAIS SOURCE FILE
3: **
2.18 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.14 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This module parses a stream with WAIS source file
8: ** format information on it and creates a structured stream.
9: ** That structured stream is then converted into whatever.
10: **
2.11 duns 11: ** 3 Jun 93 Bug fix: Won't crash if no description
12: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.1 timbl 13: */
14:
2.16 frystyk 15: /* Library include files */
16: #include "tcp.h"
17: #include "HTUtils.h"
18: #include "HTString.h"
2.23 frystyk 19: #include "HTMLPDTD.h"
2.20 frystyk 20: #include "HTMLGen.h"
2.2 timbl 21: #include "HTParse.h"
2.22 frystyk 22: #include "HTReqMan.h"
2.19 frystyk 23: #include "HTProxy.h"
2.12 frystyk 24: #include "HTWSRC.h" /* Implemented here */
2.1 timbl 25:
26: #define BIG 10000 /* Arbitrary limit to value length */
27: #define PARAM_MAX BIG
28: #define CACHE_PERIOD (7*86400) /* Time to keep .src file in seconds */
29:
30: #define HEX_ESCAPE '%'
31:
32: struct _HTStructured {
33: CONST HTStructuredClass * isa;
34: /* ... */
35: };
36:
2.4 timbl 37: #define PUTC(c) (*me->target->isa->put_character)(me->target, c)
38: #define PUTS(s) (*me->target->isa->put_string)(me->target, s)
39: #define START(e) (*me->target->isa->start_element)(me->target, e, 0, 0)
40: #define END(e) (*me->target->isa->end_element)(me->target, e)
2.1 timbl 41:
42:
43: /* Here are the parameters which can be specified in a source file
44: */
45: PRIVATE CONST char* par_name[] = {
46: "version",
47: "ip-address",
48: #define PAR_IP_NAME 2
49: "ip-name",
50: #define PAR_TCP_PORT 3
51: "tcp-port",
52: #define PAR_DATABASE_NAME 4
53: "database-name",
54: #define PAR_COST 5
55: "cost",
56: #define PAR_COST_UNIT 6
57: "cost-unit",
58: #define PAR_FREE 7
59: "free",
60: #define PAR_MAINTAINER 8
61: "maintainer",
62: #define PAR_DESCRIPTION 9
2.5 timbl 63: "description",
64: "keyword-list",
2.1 timbl 65: "source",
2.5 timbl 66: #define PAR_UNKNOWN 12
67: "unknown",
2.1 timbl 68: 0, /* Terminate list */
2.5 timbl 69: #define PAR_COUNT 13
2.1 timbl 70: } ;
71:
72:
73: enum tokenstate { beginning, before_tag, colon, before_value,
2.5 timbl 74: value, bracketed_value, quoted_value, escape_in_quoted, done };
2.1 timbl 75:
76:
77: /* Stream Object
78: ** ------------
79: **
80: ** The target is the structured stream down which the
81: ** parsed results will go.
82: **
83: ** all the static stuff below should go in here to make it reentrant
84: */
85:
86: struct _HTStream {
87: CONST HTStreamClass * isa;
88: HTStructured * target;
2.19 frystyk 89: HTRequest * request;
2.1 timbl 90: char * par_value[PAR_COUNT];
91: enum tokenstate state;
92: char param[BIG+1];
93: int param_number;
94: int param_count;
95: };
96:
97:
98:
99:
100: PUBLIC CONST char * hex = "0123456789ABCDEF";
101:
102: /* Decode one hex character
103: */
104:
105: PUBLIC char from_hex ARGS1(char, c)
106: {
107: return (c>='0')&&(c<='9') ? c-'0'
108: : (c>='A')&&(c<='F') ? c-'A'+10
109: : (c>='a')&&(c<='f') ? c-'a'+10
110: : 0;
111: }
112:
113:
114: /* State machine
115: ** -------------
116: **
117: ** On entry,
118: ** me->state is a valid state (see WSRC_init)
119: ** c is the next character
120: ** On exit,
121: ** returns 1 Done with file
122: ** 0 Continue. me->state is updated if necessary.
123: ** -1 Syntax error error
124: */
125:
126:
127: /* Treat One Character
128: ** -------------------
129: */
2.17 frystyk 130: PRIVATE int WSRCParser_put_character ARGS2(HTStream*, me, char, c)
2.1 timbl 131: {
132: switch (me->state) {
133: case beginning:
134: if (c=='(') me->state = before_tag;
135: break;
136:
137: case before_tag:
138: if (c==')') {
139: me->state = done;
2.17 frystyk 140: return HT_OK; /* Done with input file */
2.1 timbl 141: } else if (c==':') {
142: me->param_count = 0;
143: me->state = colon;
144: } /* Ignore other text */
145: break;
146:
147: case colon:
148: if (WHITE(c)) {
149: me->param[me->param_count++] = 0; /* Terminate */
150: for(me->param_number = 0; par_name[me->param_number]; me->param_number++) {
151: if (0==strcmp(par_name[me->param_number], me->param)) {
152: break;
153: }
154: }
155: if (!par_name[me->param_number]) { /* Unknown field */
2.26 frystyk 156: if (WWWTRACE) TTYPrint(TDEST,
2.5 timbl 157: "HTWSRC: Unknown field `%s' in source file\n",
2.1 timbl 158: me->param);
2.5 timbl 159: me->param_number = PAR_UNKNOWN;
160: me->state = before_value; /* Could be better ignore */
2.17 frystyk 161: return HT_OK;
2.1 timbl 162: }
163: me->state = before_value;
164: } else {
165: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
166: }
167: break;
168:
169: case before_value:
170: if (c==')') {
171: me->state = done;
2.17 frystyk 172: return HT_OK; /* Done with input file */
2.1 timbl 173: }
2.17 frystyk 174: if (WHITE(c)) return HT_OK; /* Skip white space */
2.1 timbl 175: me->param_count = 0;
176: if (c=='"') {
177: me->state = quoted_value;
178: break;
179: }
2.5 timbl 180: me->state = (c=='"') ? quoted_value :
181: (c=='(') ? bracketed_value : value;
2.1 timbl 182: me->param[me->param_count++] = c; /* Don't miss first character */
183: break;
184:
185: case value:
186: if (WHITE(c)) {
187: me->param[me->param_count] = 0;
188: StrAllocCopy(me->par_value[me->param_number], me->param);
189: me->state = before_tag;
190: } else {
191: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
192: }
193: break;
194:
2.5 timbl 195: case bracketed_value:
196: if (c==')') {
197: me->param[me->param_count] = 0;
198: StrAllocCopy(me->par_value[me->param_number], me->param);
199: me->state = before_tag;
200: break;
201: }
202: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
203: break;
204:
2.1 timbl 205: case quoted_value:
206: if (c=='"') {
207: me->param[me->param_count] = 0;
208: StrAllocCopy(me->par_value[me->param_number], me->param);
209: me->state = before_tag;
2.5 timbl 210: break;
211: }
212:
213: if (c=='\\') { /* Ignore escape but switch state */
214: me->state = escape_in_quoted;
215: break;
2.1 timbl 216: }
2.5 timbl 217: /* Fall through! */
218:
219: case escape_in_quoted:
220: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
2.1 timbl 221: break;
222:
223: case done: /* Ignore anything after EOF */
2.17 frystyk 224: return HT_OK;
2.1 timbl 225:
226: } /* switch me->state */
2.17 frystyk 227: return HT_OK;
2.1 timbl 228: }
229:
230:
2.4 timbl 231: /* Open Cache file
232: ** ===============
233: **
234: ** Bugs: Maybe for filesystem-challenged platforms (MSDOS for example) we
235: ** should make a hash code for the filename.
236: */
237:
238: #ifdef CACHE_FILE_PREFIX
239: PRIVATE BOOL write_cache ARGS1(HTStream *, me)
240: {
241: FILE * fp;
242: char cache_file_name[256];
243: char * www_database;
2.10 timbl 244: if (!me->par_value[PAR_DATABASE_NAME]
245: || !me->par_value[PAR_IP_NAME]
246: ) return NO;
247:
2.4 timbl 248: www_database = HTEscape(me->par_value[PAR_DATABASE_NAME], URL_XALPHAS);
2.5 timbl 249: sprintf(cache_file_name, "%sWSRC-%s:%s:%.100s.txt",
2.4 timbl 250: CACHE_FILE_PREFIX,
251: me->par_value[PAR_IP_NAME],
252: me->par_value[PAR_TCP_PORT] ? me->par_value[PAR_TCP_PORT] : "210",
253: www_database);
254: free(www_database);
2.21 frystyk 255: fp = fopen(cache_file_name, "wb");
2.4 timbl 256: if (!fp) return NO;
257:
2.5 timbl 258: if (me->par_value[PAR_DESCRIPTION])
259: fputs(me->par_value[PAR_DESCRIPTION], fp);
260: else
261: fputs("Description not available\n", fp);
2.4 timbl 262: fclose(fp);
263: return YES;
264: }
265: #endif
266:
2.1 timbl 267: /* Output equivalent HTML
268: ** ----------------------
269: **
270: */
271:
272: void give_parameter ARGS2(HTStream *, me, int, p)
273: {
274: PUTS(par_name[p]);
275: if (me->par_value[p]) {
276: PUTS(": ");
277: PUTS(me->par_value[p]);
278: PUTS("; ");
279: } else {
280: PUTS(" NOT GIVEN in source file; ");
281: }
282: }
283:
284:
285: /* Generate Outout
286: ** ===============
287: */
2.4 timbl 288: PRIVATE void WSRC_gen_html ARGS2(HTStream *, me, BOOL, source_file)
2.1 timbl 289:
290: {
291: if (me->par_value[PAR_DATABASE_NAME]) {
2.19 frystyk 292: char * shortname = NULL;
2.1 timbl 293: int l;
294: StrAllocCopy(shortname, me->par_value[PAR_DATABASE_NAME]);
295: l = strlen(shortname);
296: if ( l > 4 && !strcasecomp(shortname + l -4, ".src")) {
297: shortname[l-4] = 0; /* Chop of .src -- boring! */
298: }
299:
300: START(HTML_TITLE);
301: PUTS(shortname);
2.4 timbl 302: PUTS(source_file ? " WAIS source file" : " index");
2.1 timbl 303: END(HTML_TITLE);
304:
305: START(HTML_H1);
306: PUTS(shortname);
2.4 timbl 307: PUTS(source_file ? " description" : " index");
2.1 timbl 308: END(HTML_H1);
2.19 frystyk 309: free(shortname); /* memleak, henrik */
2.1 timbl 310: }
311:
312: START(HTML_DL); /* Definition list of details */
313:
2.4 timbl 314: if (source_file) {
315: START(HTML_DT);
2.19 frystyk 316: PUTS("Access link");
2.4 timbl 317: START(HTML_DD);
318: if (me->par_value[PAR_IP_NAME] &&
319: me->par_value[PAR_DATABASE_NAME]) {
320: char WSRC_address[256];
2.19 frystyk 321: char *addr = HTAnchor_address((HTAnchor*) me->request->anchor);
2.25 frystyk 322: char *gate = HTGateway_find(addr);
2.19 frystyk 323: char *www_database = HTEscape(me->par_value[PAR_DATABASE_NAME],
324: URL_XALPHAS);
325: if (!gate) {
326: sprintf(WSRC_address, "wais://%s%s%s/%s",
327: me->par_value[PAR_IP_NAME],
328: me->par_value[PAR_TCP_PORT] ? ":" : "",
329: me->par_value[PAR_TCP_PORT] ?
330: me->par_value[PAR_TCP_PORT] :"", www_database);
331: HTStartAnchor(me->target, NULL, WSRC_address);
332: PUTS("Direct access");
333: END(HTML_A);
334: } else {
335: sprintf(WSRC_address, "%s%s%s%s/%s",
336: gate,
337: me->par_value[PAR_IP_NAME],
338: me->par_value[PAR_TCP_PORT] ? ":" : "",
339: me->par_value[PAR_TCP_PORT] ?
340: me->par_value[PAR_TCP_PORT] : "",
341: www_database);
342: HTStartAnchor(me->target, NULL, WSRC_address);
343: PUTS("Through a gateway");
344: END(HTML_A);
345: }
346: FREE(gate);
347: free(addr);
2.4 timbl 348: free(www_database);
349:
350: } else {
351: give_parameter(me, PAR_IP_NAME);
2.10 timbl 352: give_parameter(me, PAR_DATABASE_NAME);
2.4 timbl 353: }
354:
355: } /* end if source_file */
356:
2.1 timbl 357: if (me->par_value[PAR_MAINTAINER]) {
358: START(HTML_DT);
359: PUTS("Maintainer");
360: START(HTML_DD);
361: PUTS(me->par_value[PAR_MAINTAINER]);
362: }
2.10 timbl 363: if (me->par_value[PAR_IP_NAME]) {
364: START(HTML_DT);
365: PUTS("Host");
366: START(HTML_DD);
367: PUTS(me->par_value[PAR_IP_NAME]);
368: }
369:
2.1 timbl 370: END(HTML_DL);
371:
2.5 timbl 372: if (me->par_value[PAR_DESCRIPTION]) {
373: START(HTML_PRE); /* Preformatted description */
374: PUTS(me->par_value[PAR_DESCRIPTION]);
375: END(HTML_PRE);
376: }
377:
2.11 duns 378: (*me->target->isa->_free)(me->target);
2.1 timbl 379:
380: return;
381: } /* generate html */
382:
383:
2.17 frystyk 384: PRIVATE int WSRCParser_put_string ARGS2(HTStream *, context, CONST char*, s)
2.1 timbl 385: {
2.17 frystyk 386: while (*s)
387: WSRCParser_put_character(context, *s++);
388: return HT_OK;
2.1 timbl 389: }
390:
391:
2.17 frystyk 392: PRIVATE int WSRCParser_write ARGS3(HTStream *, context,
393: CONST char*, b,
394: int, l)
2.1 timbl 395: {
2.17 frystyk 396: while (l-- > 0)
397: WSRCParser_put_character(context, *b++);
398: return HT_OK;
2.1 timbl 399: }
400:
2.17 frystyk 401: PRIVATE int WSRCParser_flush ARGS1(HTStream *, me)
402: {
403: return HT_OK;
404: }
2.1 timbl 405:
2.13 frystyk 406: PRIVATE int WSRCParser_free ARGS1(HTStream *, me)
2.1 timbl 407: {
2.4 timbl 408: WSRC_gen_html(me, YES);
2.6 timbl 409: #ifdef CACHE_FILE_PREFIX
2.4 timbl 410: write_cache(me);
2.6 timbl 411: #endif
2.1 timbl 412: {
413: int p;
414: for(p=0; par_name[p]; p++) { /* Clear out old values */
415: if (me->par_value[p]) {
416: free(me->par_value[p]);
417: }
418: }
419: }
420: free(me);
2.17 frystyk 421: return HT_OK;
2.1 timbl 422: }
423:
2.27 ! frystyk 424: PRIVATE int WSRCParser_abort ARGS2(HTStream *, me, HTList *, e)
2.1 timbl 425: {
2.7 timbl 426: WSRCParser_free(me);
2.17 frystyk 427: return HT_ERROR;
2.1 timbl 428: }
429:
430:
431: /* Stream subclass -- method routines
432: ** ---------------
433: */
434:
435: HTStreamClass WSRCParserClass = {
436: "WSRCParser",
2.17 frystyk 437: WSRCParser_flush,
2.1 timbl 438: WSRCParser_free,
2.7 timbl 439: WSRCParser_abort,
2.1 timbl 440: WSRCParser_put_character,
441: WSRCParser_put_string,
442: WSRCParser_write
443:
444: };
445:
2.4 timbl 446:
2.1 timbl 447: /* Converter from WAIS Source to whatever
448: ** --------------------------------------
449: */
2.8 timbl 450: PUBLIC HTStream* HTWSRCConvert ARGS5(
451: HTRequest *, request,
452: void *, param,
453: HTFormat, input_format,
454: HTFormat, output_format,
455: HTStream *, output_stream)
2.1 timbl 456: {
2.19 frystyk 457: HTStream * me = (HTStream *) calloc(1, sizeof(HTStream));
2.1 timbl 458: if (!me) outofmem(__FILE__, "HTWSRCConvert");
459: me->isa = &WSRCParserClass;
2.20 frystyk 460: me->target = HTMLGenerator(request, param, input_format, output_format,
461: output_stream);
2.19 frystyk 462: me->request = request;
2.1 timbl 463: me->state = beginning;
464:
465: return me;
466: }
467:
Webmaster