Annotation of libwww/Library/src/HTWSRC.c, revision 2.21
2.14 frystyk 1: /* HTWSRC.c
2: ** PARSE WAIS SOURCE FILE
3: **
2.18 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.14 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This module parses a stream with WAIS source file
8: ** format information on it and creates a structured stream.
9: ** That structured stream is then converted into whatever.
10: **
2.11 duns 11: ** 3 Jun 93 Bug fix: Won't crash if no description
12: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.1 timbl 13: */
14:
2.16 frystyk 15: /* Library include files */
16: #include "tcp.h"
17: #include "HTUtils.h"
18: #include "HTString.h"
2.20 frystyk 19: #include "HTMLGen.h"
2.2 timbl 20: #include "HTParse.h"
2.19 frystyk 21: #include "HTProxy.h"
2.12 frystyk 22: #include "HTWSRC.h" /* Implemented here */
2.1 timbl 23:
24: #define BIG 10000 /* Arbitrary limit to value length */
25: #define PARAM_MAX BIG
26: #define CACHE_PERIOD (7*86400) /* Time to keep .src file in seconds */
27:
28: #define HEX_ESCAPE '%'
29:
30: struct _HTStructured {
31: CONST HTStructuredClass * isa;
32: /* ... */
33: };
34:
2.4 timbl 35: #define PUTC(c) (*me->target->isa->put_character)(me->target, c)
36: #define PUTS(s) (*me->target->isa->put_string)(me->target, s)
37: #define START(e) (*me->target->isa->start_element)(me->target, e, 0, 0)
38: #define END(e) (*me->target->isa->end_element)(me->target, e)
2.1 timbl 39:
40:
41: /* Here are the parameters which can be specified in a source file
42: */
43: PRIVATE CONST char* par_name[] = {
44: "version",
45: "ip-address",
46: #define PAR_IP_NAME 2
47: "ip-name",
48: #define PAR_TCP_PORT 3
49: "tcp-port",
50: #define PAR_DATABASE_NAME 4
51: "database-name",
52: #define PAR_COST 5
53: "cost",
54: #define PAR_COST_UNIT 6
55: "cost-unit",
56: #define PAR_FREE 7
57: "free",
58: #define PAR_MAINTAINER 8
59: "maintainer",
60: #define PAR_DESCRIPTION 9
2.5 timbl 61: "description",
62: "keyword-list",
2.1 timbl 63: "source",
2.5 timbl 64: #define PAR_UNKNOWN 12
65: "unknown",
2.1 timbl 66: 0, /* Terminate list */
2.5 timbl 67: #define PAR_COUNT 13
2.1 timbl 68: } ;
69:
70:
71: enum tokenstate { beginning, before_tag, colon, before_value,
2.5 timbl 72: value, bracketed_value, quoted_value, escape_in_quoted, done };
2.1 timbl 73:
74:
75: /* Stream Object
76: ** ------------
77: **
78: ** The target is the structured stream down which the
79: ** parsed results will go.
80: **
81: ** all the static stuff below should go in here to make it reentrant
82: */
83:
84: struct _HTStream {
85: CONST HTStreamClass * isa;
86: HTStructured * target;
2.19 frystyk 87: HTRequest * request;
2.1 timbl 88: char * par_value[PAR_COUNT];
89: enum tokenstate state;
90: char param[BIG+1];
91: int param_number;
92: int param_count;
93: };
94:
95:
96:
97:
98: PUBLIC CONST char * hex = "0123456789ABCDEF";
99:
100: /* Decode one hex character
101: */
102:
103: PUBLIC char from_hex ARGS1(char, c)
104: {
105: return (c>='0')&&(c<='9') ? c-'0'
106: : (c>='A')&&(c<='F') ? c-'A'+10
107: : (c>='a')&&(c<='f') ? c-'a'+10
108: : 0;
109: }
110:
111:
112: /* State machine
113: ** -------------
114: **
115: ** On entry,
116: ** me->state is a valid state (see WSRC_init)
117: ** c is the next character
118: ** On exit,
119: ** returns 1 Done with file
120: ** 0 Continue. me->state is updated if necessary.
121: ** -1 Syntax error error
122: */
123:
124:
125: /* Treat One Character
126: ** -------------------
127: */
2.17 frystyk 128: PRIVATE int WSRCParser_put_character ARGS2(HTStream*, me, char, c)
2.1 timbl 129: {
130: switch (me->state) {
131: case beginning:
132: if (c=='(') me->state = before_tag;
133: break;
134:
135: case before_tag:
136: if (c==')') {
137: me->state = done;
2.17 frystyk 138: return HT_OK; /* Done with input file */
2.1 timbl 139: } else if (c==':') {
140: me->param_count = 0;
141: me->state = colon;
142: } /* Ignore other text */
143: break;
144:
145: case colon:
146: if (WHITE(c)) {
147: me->param[me->param_count++] = 0; /* Terminate */
148: for(me->param_number = 0; par_name[me->param_number]; me->param_number++) {
149: if (0==strcmp(par_name[me->param_number], me->param)) {
150: break;
151: }
152: }
153: if (!par_name[me->param_number]) { /* Unknown field */
2.16 frystyk 154: if (TRACE) fprintf(TDEST,
2.5 timbl 155: "HTWSRC: Unknown field `%s' in source file\n",
2.1 timbl 156: me->param);
2.5 timbl 157: me->param_number = PAR_UNKNOWN;
158: me->state = before_value; /* Could be better ignore */
2.17 frystyk 159: return HT_OK;
2.1 timbl 160: }
161: me->state = before_value;
162: } else {
163: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
164: }
165: break;
166:
167: case before_value:
168: if (c==')') {
169: me->state = done;
2.17 frystyk 170: return HT_OK; /* Done with input file */
2.1 timbl 171: }
2.17 frystyk 172: if (WHITE(c)) return HT_OK; /* Skip white space */
2.1 timbl 173: me->param_count = 0;
174: if (c=='"') {
175: me->state = quoted_value;
176: break;
177: }
2.5 timbl 178: me->state = (c=='"') ? quoted_value :
179: (c=='(') ? bracketed_value : value;
2.1 timbl 180: me->param[me->param_count++] = c; /* Don't miss first character */
181: break;
182:
183: case value:
184: if (WHITE(c)) {
185: me->param[me->param_count] = 0;
186: StrAllocCopy(me->par_value[me->param_number], me->param);
187: me->state = before_tag;
188: } else {
189: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
190: }
191: break;
192:
2.5 timbl 193: case bracketed_value:
194: if (c==')') {
195: me->param[me->param_count] = 0;
196: StrAllocCopy(me->par_value[me->param_number], me->param);
197: me->state = before_tag;
198: break;
199: }
200: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
201: break;
202:
2.1 timbl 203: case quoted_value:
204: if (c=='"') {
205: me->param[me->param_count] = 0;
206: StrAllocCopy(me->par_value[me->param_number], me->param);
207: me->state = before_tag;
2.5 timbl 208: break;
209: }
210:
211: if (c=='\\') { /* Ignore escape but switch state */
212: me->state = escape_in_quoted;
213: break;
2.1 timbl 214: }
2.5 timbl 215: /* Fall through! */
216:
217: case escape_in_quoted:
218: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
2.1 timbl 219: break;
220:
221: case done: /* Ignore anything after EOF */
2.17 frystyk 222: return HT_OK;
2.1 timbl 223:
224: } /* switch me->state */
2.17 frystyk 225: return HT_OK;
2.1 timbl 226: }
227:
228:
2.4 timbl 229: /* Open Cache file
230: ** ===============
231: **
232: ** Bugs: Maybe for filesystem-challenged platforms (MSDOS for example) we
233: ** should make a hash code for the filename.
234: */
235:
236: #ifdef CACHE_FILE_PREFIX
237: PRIVATE BOOL write_cache ARGS1(HTStream *, me)
238: {
239: FILE * fp;
240: char cache_file_name[256];
241: char * www_database;
2.10 timbl 242: if (!me->par_value[PAR_DATABASE_NAME]
243: || !me->par_value[PAR_IP_NAME]
244: ) return NO;
245:
2.4 timbl 246: www_database = HTEscape(me->par_value[PAR_DATABASE_NAME], URL_XALPHAS);
2.5 timbl 247: sprintf(cache_file_name, "%sWSRC-%s:%s:%.100s.txt",
2.4 timbl 248: CACHE_FILE_PREFIX,
249: me->par_value[PAR_IP_NAME],
250: me->par_value[PAR_TCP_PORT] ? me->par_value[PAR_TCP_PORT] : "210",
251: www_database);
252: free(www_database);
2.21 ! frystyk 253: fp = fopen(cache_file_name, "wb");
2.4 timbl 254: if (!fp) return NO;
255:
2.5 timbl 256: if (me->par_value[PAR_DESCRIPTION])
257: fputs(me->par_value[PAR_DESCRIPTION], fp);
258: else
259: fputs("Description not available\n", fp);
2.4 timbl 260: fclose(fp);
261: return YES;
262: }
263: #endif
264:
2.1 timbl 265: /* Output equivalent HTML
266: ** ----------------------
267: **
268: */
269:
270: void give_parameter ARGS2(HTStream *, me, int, p)
271: {
272: PUTS(par_name[p]);
273: if (me->par_value[p]) {
274: PUTS(": ");
275: PUTS(me->par_value[p]);
276: PUTS("; ");
277: } else {
278: PUTS(" NOT GIVEN in source file; ");
279: }
280: }
281:
282:
283: /* Generate Outout
284: ** ===============
285: */
2.4 timbl 286: PRIVATE void WSRC_gen_html ARGS2(HTStream *, me, BOOL, source_file)
2.1 timbl 287:
288: {
289: if (me->par_value[PAR_DATABASE_NAME]) {
2.19 frystyk 290: char * shortname = NULL;
2.1 timbl 291: int l;
292: StrAllocCopy(shortname, me->par_value[PAR_DATABASE_NAME]);
293: l = strlen(shortname);
294: if ( l > 4 && !strcasecomp(shortname + l -4, ".src")) {
295: shortname[l-4] = 0; /* Chop of .src -- boring! */
296: }
297:
298: START(HTML_TITLE);
299: PUTS(shortname);
2.4 timbl 300: PUTS(source_file ? " WAIS source file" : " index");
2.1 timbl 301: END(HTML_TITLE);
302:
303: START(HTML_H1);
304: PUTS(shortname);
2.4 timbl 305: PUTS(source_file ? " description" : " index");
2.1 timbl 306: END(HTML_H1);
2.19 frystyk 307: free(shortname); /* memleak, henrik */
2.1 timbl 308: }
309:
310: START(HTML_DL); /* Definition list of details */
311:
2.4 timbl 312: if (source_file) {
313: START(HTML_DT);
2.19 frystyk 314: PUTS("Access link");
2.4 timbl 315: START(HTML_DD);
316: if (me->par_value[PAR_IP_NAME] &&
317: me->par_value[PAR_DATABASE_NAME]) {
318: char WSRC_address[256];
2.19 frystyk 319: char *addr = HTAnchor_address((HTAnchor*) me->request->anchor);
320: char *gate = HTProxy_getGateway(addr);
321: char *www_database = HTEscape(me->par_value[PAR_DATABASE_NAME],
322: URL_XALPHAS);
323: if (!gate) {
324: sprintf(WSRC_address, "wais://%s%s%s/%s",
325: me->par_value[PAR_IP_NAME],
326: me->par_value[PAR_TCP_PORT] ? ":" : "",
327: me->par_value[PAR_TCP_PORT] ?
328: me->par_value[PAR_TCP_PORT] :"", www_database);
329: HTStartAnchor(me->target, NULL, WSRC_address);
330: PUTS("Direct access");
331: END(HTML_A);
332: } else {
333: sprintf(WSRC_address, "%s%s%s%s/%s",
334: gate,
335: me->par_value[PAR_IP_NAME],
336: me->par_value[PAR_TCP_PORT] ? ":" : "",
337: me->par_value[PAR_TCP_PORT] ?
338: me->par_value[PAR_TCP_PORT] : "",
339: www_database);
340: HTStartAnchor(me->target, NULL, WSRC_address);
341: PUTS("Through a gateway");
342: END(HTML_A);
343: }
344: FREE(gate);
345: free(addr);
2.4 timbl 346: free(www_database);
347:
348: } else {
349: give_parameter(me, PAR_IP_NAME);
2.10 timbl 350: give_parameter(me, PAR_DATABASE_NAME);
2.4 timbl 351: }
352:
353: } /* end if source_file */
354:
2.1 timbl 355: if (me->par_value[PAR_MAINTAINER]) {
356: START(HTML_DT);
357: PUTS("Maintainer");
358: START(HTML_DD);
359: PUTS(me->par_value[PAR_MAINTAINER]);
360: }
2.10 timbl 361: if (me->par_value[PAR_IP_NAME]) {
362: START(HTML_DT);
363: PUTS("Host");
364: START(HTML_DD);
365: PUTS(me->par_value[PAR_IP_NAME]);
366: }
367:
2.1 timbl 368: END(HTML_DL);
369:
2.5 timbl 370: if (me->par_value[PAR_DESCRIPTION]) {
371: START(HTML_PRE); /* Preformatted description */
372: PUTS(me->par_value[PAR_DESCRIPTION]);
373: END(HTML_PRE);
374: }
375:
2.11 duns 376: (*me->target->isa->_free)(me->target);
2.1 timbl 377:
378: return;
379: } /* generate html */
380:
381:
2.17 frystyk 382: PRIVATE int WSRCParser_put_string ARGS2(HTStream *, context, CONST char*, s)
2.1 timbl 383: {
2.17 frystyk 384: while (*s)
385: WSRCParser_put_character(context, *s++);
386: return HT_OK;
2.1 timbl 387: }
388:
389:
2.17 frystyk 390: PRIVATE int WSRCParser_write ARGS3(HTStream *, context,
391: CONST char*, b,
392: int, l)
2.1 timbl 393: {
2.17 frystyk 394: while (l-- > 0)
395: WSRCParser_put_character(context, *b++);
396: return HT_OK;
2.1 timbl 397: }
398:
2.17 frystyk 399: PRIVATE int WSRCParser_flush ARGS1(HTStream *, me)
400: {
401: return HT_OK;
402: }
2.1 timbl 403:
2.13 frystyk 404: PRIVATE int WSRCParser_free ARGS1(HTStream *, me)
2.1 timbl 405: {
2.4 timbl 406: WSRC_gen_html(me, YES);
2.6 timbl 407: #ifdef CACHE_FILE_PREFIX
2.4 timbl 408: write_cache(me);
2.6 timbl 409: #endif
2.1 timbl 410: {
411: int p;
412: for(p=0; par_name[p]; p++) { /* Clear out old values */
413: if (me->par_value[p]) {
414: free(me->par_value[p]);
415: }
416: }
417: }
418: free(me);
2.17 frystyk 419: return HT_OK;
2.1 timbl 420: }
421:
2.13 frystyk 422: PRIVATE int WSRCParser_abort ARGS2(HTStream *, me, HTError, e)
2.1 timbl 423: {
2.7 timbl 424: WSRCParser_free(me);
2.17 frystyk 425: return HT_ERROR;
2.1 timbl 426: }
427:
428:
429: /* Stream subclass -- method routines
430: ** ---------------
431: */
432:
433: HTStreamClass WSRCParserClass = {
434: "WSRCParser",
2.17 frystyk 435: WSRCParser_flush,
2.1 timbl 436: WSRCParser_free,
2.7 timbl 437: WSRCParser_abort,
2.1 timbl 438: WSRCParser_put_character,
439: WSRCParser_put_string,
440: WSRCParser_write
441:
442: };
443:
2.4 timbl 444:
2.1 timbl 445: /* Converter from WAIS Source to whatever
446: ** --------------------------------------
447: */
2.8 timbl 448: PUBLIC HTStream* HTWSRCConvert ARGS5(
449: HTRequest *, request,
450: void *, param,
451: HTFormat, input_format,
452: HTFormat, output_format,
453: HTStream *, output_stream)
2.1 timbl 454: {
2.19 frystyk 455: HTStream * me = (HTStream *) calloc(1, sizeof(HTStream));
2.1 timbl 456: if (!me) outofmem(__FILE__, "HTWSRCConvert");
457: me->isa = &WSRCParserClass;
2.20 frystyk 458: me->target = HTMLGenerator(request, param, input_format, output_format,
459: output_stream);
2.19 frystyk 460: me->request = request;
2.1 timbl 461: me->state = beginning;
462:
463: return me;
464: }
465:
Webmaster