Annotation of libwww/Library/src/HTWSRC.c, revision 2.32
2.14 frystyk 1: /* HTWSRC.c
2: ** PARSE WAIS SOURCE FILE
3: **
2.18 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.14 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This module parses a stream with WAIS source file
8: ** format information on it and creates a structured stream.
9: ** That structured stream is then converted into whatever.
10: **
2.11 duns 11: ** 3 Jun 93 Bug fix: Won't crash if no description
12: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.1 timbl 13: */
14:
2.16 frystyk 15: /* Library include files */
2.32 ! frystyk 16: #include "sysdep.h"
2.16 frystyk 17: #include "HTUtils.h"
18: #include "HTString.h"
2.23 frystyk 19: #include "HTMLPDTD.h"
2.20 frystyk 20: #include "HTMLGen.h"
2.2 timbl 21: #include "HTParse.h"
2.22 frystyk 22: #include "HTReqMan.h"
2.19 frystyk 23: #include "HTProxy.h"
2.12 frystyk 24: #include "HTWSRC.h" /* Implemented here */
2.1 timbl 25:
26: #define BIG 10000 /* Arbitrary limit to value length */
27: #define PARAM_MAX BIG
28: #define CACHE_PERIOD (7*86400) /* Time to keep .src file in seconds */
29:
30: #define HEX_ESCAPE '%'
31:
32: struct _HTStructured {
2.32 ! frystyk 33: const HTStructuredClass * isa;
2.1 timbl 34: /* ... */
35: };
36:
2.4 timbl 37: #define PUTC(c) (*me->target->isa->put_character)(me->target, c)
38: #define PUTS(s) (*me->target->isa->put_string)(me->target, s)
39: #define START(e) (*me->target->isa->start_element)(me->target, e, 0, 0)
40: #define END(e) (*me->target->isa->end_element)(me->target, e)
2.1 timbl 41:
42:
43: /* Here are the parameters which can be specified in a source file
44: */
2.32 ! frystyk 45: PRIVATE const char* par_name[] = {
2.1 timbl 46: "version",
47: "ip-address",
48: #define PAR_IP_NAME 2
49: "ip-name",
50: #define PAR_TCP_PORT 3
51: "tcp-port",
52: #define PAR_DATABASE_NAME 4
53: "database-name",
54: #define PAR_COST 5
55: "cost",
56: #define PAR_COST_UNIT 6
57: "cost-unit",
58: #define PAR_FREE 7
59: "free",
60: #define PAR_MAINTAINER 8
61: "maintainer",
62: #define PAR_DESCRIPTION 9
2.5 timbl 63: "description",
64: "keyword-list",
2.1 timbl 65: "source",
2.5 timbl 66: #define PAR_UNKNOWN 12
67: "unknown",
2.1 timbl 68: 0, /* Terminate list */
2.5 timbl 69: #define PAR_COUNT 13
2.1 timbl 70: } ;
71:
2.29 frystyk 72: #if 0
2.32 ! frystyk 73: PRIVATE const char * hex = "0123456789ABCDEF";
2.29 frystyk 74: #endif
2.1 timbl 75:
76: enum tokenstate { beginning, before_tag, colon, before_value,
2.5 timbl 77: value, bracketed_value, quoted_value, escape_in_quoted, done };
2.1 timbl 78:
79:
80: /* Stream Object
81: ** ------------
82: **
83: ** The target is the structured stream down which the
84: ** parsed results will go.
85: **
86: ** all the static stuff below should go in here to make it reentrant
87: */
88:
89: struct _HTStream {
2.32 ! frystyk 90: const HTStreamClass * isa;
2.1 timbl 91: HTStructured * target;
2.19 frystyk 92: HTRequest * request;
2.1 timbl 93: char * par_value[PAR_COUNT];
94: enum tokenstate state;
95: char param[BIG+1];
96: int param_number;
97: int param_count;
98: };
99:
100:
101:
2.29 frystyk 102: #if 0
2.1 timbl 103: /* Decode one hex character
104: */
105:
2.29 frystyk 106: PRIVATE char from_hex (char c)
2.1 timbl 107: {
108: return (c>='0')&&(c<='9') ? c-'0'
109: : (c>='A')&&(c<='F') ? c-'A'+10
110: : (c>='a')&&(c<='f') ? c-'a'+10
111: : 0;
112: }
2.29 frystyk 113: #endif
2.1 timbl 114:
115: /* State machine
116: ** -------------
117: **
118: ** On entry,
119: ** me->state is a valid state (see WSRC_init)
120: ** c is the next character
121: ** On exit,
122: ** returns 1 Done with file
123: ** 0 Continue. me->state is updated if necessary.
124: ** -1 Syntax error error
125: */
126:
127:
128: /* Treat One Character
129: ** -------------------
130: */
2.28 frystyk 131: PRIVATE int WSRCParser_put_character (HTStream* me, char c)
2.1 timbl 132: {
133: switch (me->state) {
134: case beginning:
135: if (c=='(') me->state = before_tag;
136: break;
137:
138: case before_tag:
139: if (c==')') {
140: me->state = done;
2.17 frystyk 141: return HT_OK; /* Done with input file */
2.1 timbl 142: } else if (c==':') {
143: me->param_count = 0;
144: me->state = colon;
145: } /* Ignore other text */
146: break;
147:
148: case colon:
149: if (WHITE(c)) {
150: me->param[me->param_count++] = 0; /* Terminate */
151: for(me->param_number = 0; par_name[me->param_number]; me->param_number++) {
152: if (0==strcmp(par_name[me->param_number], me->param)) {
153: break;
154: }
155: }
156: if (!par_name[me->param_number]) { /* Unknown field */
2.31 eric 157: if (WWWTRACE) HTTrace(
2.5 timbl 158: "HTWSRC: Unknown field `%s' in source file\n",
2.1 timbl 159: me->param);
2.5 timbl 160: me->param_number = PAR_UNKNOWN;
161: me->state = before_value; /* Could be better ignore */
2.17 frystyk 162: return HT_OK;
2.1 timbl 163: }
164: me->state = before_value;
165: } else {
166: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
167: }
168: break;
169:
170: case before_value:
171: if (c==')') {
172: me->state = done;
2.17 frystyk 173: return HT_OK; /* Done with input file */
2.1 timbl 174: }
2.17 frystyk 175: if (WHITE(c)) return HT_OK; /* Skip white space */
2.1 timbl 176: me->param_count = 0;
177: if (c=='"') {
178: me->state = quoted_value;
179: break;
180: }
2.5 timbl 181: me->state = (c=='"') ? quoted_value :
182: (c=='(') ? bracketed_value : value;
2.1 timbl 183: me->param[me->param_count++] = c; /* Don't miss first character */
184: break;
185:
186: case value:
187: if (WHITE(c)) {
188: me->param[me->param_count] = 0;
189: StrAllocCopy(me->par_value[me->param_number], me->param);
190: me->state = before_tag;
191: } else {
192: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
193: }
194: break;
195:
2.5 timbl 196: case bracketed_value:
197: if (c==')') {
198: me->param[me->param_count] = 0;
199: StrAllocCopy(me->par_value[me->param_number], me->param);
200: me->state = before_tag;
201: break;
202: }
203: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
204: break;
205:
2.1 timbl 206: case quoted_value:
207: if (c=='"') {
208: me->param[me->param_count] = 0;
209: StrAllocCopy(me->par_value[me->param_number], me->param);
210: me->state = before_tag;
2.5 timbl 211: break;
212: }
213:
214: if (c=='\\') { /* Ignore escape but switch state */
215: me->state = escape_in_quoted;
216: break;
2.1 timbl 217: }
2.5 timbl 218: /* Fall through! */
219:
220: case escape_in_quoted:
221: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
2.1 timbl 222: break;
223:
224: case done: /* Ignore anything after EOF */
2.17 frystyk 225: return HT_OK;
2.1 timbl 226:
227: } /* switch me->state */
2.17 frystyk 228: return HT_OK;
2.1 timbl 229: }
230:
231:
2.4 timbl 232: /* Open Cache file
233: ** ===============
234: **
235: ** Bugs: Maybe for filesystem-challenged platforms (MSDOS for example) we
236: ** should make a hash code for the filename.
237: */
238:
239: #ifdef CACHE_FILE_PREFIX
2.28 frystyk 240: PRIVATE BOOL write_cache (HTStream * me)
2.4 timbl 241: {
242: FILE * fp;
243: char cache_file_name[256];
244: char * www_database;
2.10 timbl 245: if (!me->par_value[PAR_DATABASE_NAME]
246: || !me->par_value[PAR_IP_NAME]
247: ) return NO;
248:
2.4 timbl 249: www_database = HTEscape(me->par_value[PAR_DATABASE_NAME], URL_XALPHAS);
2.5 timbl 250: sprintf(cache_file_name, "%sWSRC-%s:%s:%.100s.txt",
2.4 timbl 251: CACHE_FILE_PREFIX,
252: me->par_value[PAR_IP_NAME],
253: me->par_value[PAR_TCP_PORT] ? me->par_value[PAR_TCP_PORT] : "210",
254: www_database);
2.30 frystyk 255: HT_FREE(www_database);
2.21 frystyk 256: fp = fopen(cache_file_name, "wb");
2.4 timbl 257: if (!fp) return NO;
258:
2.5 timbl 259: if (me->par_value[PAR_DESCRIPTION])
260: fputs(me->par_value[PAR_DESCRIPTION], fp);
261: else
262: fputs("Description not available\n", fp);
2.4 timbl 263: fclose(fp);
264: return YES;
265: }
266: #endif
267:
2.1 timbl 268: /* Output equivalent HTML
269: ** ----------------------
270: **
271: */
272:
2.28 frystyk 273: void give_parameter (HTStream * me, int p)
2.1 timbl 274: {
275: PUTS(par_name[p]);
276: if (me->par_value[p]) {
277: PUTS(": ");
278: PUTS(me->par_value[p]);
279: PUTS("; ");
280: } else {
281: PUTS(" NOT GIVEN in source file; ");
282: }
283: }
284:
285:
286: /* Generate Outout
287: ** ===============
288: */
2.28 frystyk 289: PRIVATE void WSRC_gen_html (HTStream * me, BOOL source_file)
2.1 timbl 290:
291: {
292: if (me->par_value[PAR_DATABASE_NAME]) {
2.19 frystyk 293: char * shortname = NULL;
2.1 timbl 294: int l;
295: StrAllocCopy(shortname, me->par_value[PAR_DATABASE_NAME]);
296: l = strlen(shortname);
297: if ( l > 4 && !strcasecomp(shortname + l -4, ".src")) {
298: shortname[l-4] = 0; /* Chop of .src -- boring! */
299: }
300:
301: START(HTML_TITLE);
302: PUTS(shortname);
2.4 timbl 303: PUTS(source_file ? " WAIS source file" : " index");
2.1 timbl 304: END(HTML_TITLE);
305:
306: START(HTML_H1);
307: PUTS(shortname);
2.4 timbl 308: PUTS(source_file ? " description" : " index");
2.1 timbl 309: END(HTML_H1);
2.30 frystyk 310: HT_FREE(shortname); /* memleak, henrik */
2.1 timbl 311: }
312:
313: START(HTML_DL); /* Definition list of details */
314:
2.4 timbl 315: if (source_file) {
316: START(HTML_DT);
2.19 frystyk 317: PUTS("Access link");
2.4 timbl 318: START(HTML_DD);
319: if (me->par_value[PAR_IP_NAME] &&
320: me->par_value[PAR_DATABASE_NAME]) {
321: char WSRC_address[256];
2.19 frystyk 322: char *addr = HTAnchor_address((HTAnchor*) me->request->anchor);
2.25 frystyk 323: char *gate = HTGateway_find(addr);
2.19 frystyk 324: char *www_database = HTEscape(me->par_value[PAR_DATABASE_NAME],
325: URL_XALPHAS);
326: if (!gate) {
327: sprintf(WSRC_address, "wais://%s%s%s/%s",
328: me->par_value[PAR_IP_NAME],
329: me->par_value[PAR_TCP_PORT] ? ":" : "",
330: me->par_value[PAR_TCP_PORT] ?
331: me->par_value[PAR_TCP_PORT] :"", www_database);
332: HTStartAnchor(me->target, NULL, WSRC_address);
333: PUTS("Direct access");
334: END(HTML_A);
335: } else {
336: sprintf(WSRC_address, "%s%s%s%s/%s",
337: gate,
338: me->par_value[PAR_IP_NAME],
339: me->par_value[PAR_TCP_PORT] ? ":" : "",
340: me->par_value[PAR_TCP_PORT] ?
341: me->par_value[PAR_TCP_PORT] : "",
342: www_database);
343: HTStartAnchor(me->target, NULL, WSRC_address);
344: PUTS("Through a gateway");
345: END(HTML_A);
346: }
2.30 frystyk 347: HT_FREE(gate);
348: HT_FREE(addr);
349: HT_FREE(www_database);
2.4 timbl 350:
351: } else {
352: give_parameter(me, PAR_IP_NAME);
2.10 timbl 353: give_parameter(me, PAR_DATABASE_NAME);
2.4 timbl 354: }
355:
356: } /* end if source_file */
357:
2.1 timbl 358: if (me->par_value[PAR_MAINTAINER]) {
359: START(HTML_DT);
360: PUTS("Maintainer");
361: START(HTML_DD);
362: PUTS(me->par_value[PAR_MAINTAINER]);
363: }
2.10 timbl 364: if (me->par_value[PAR_IP_NAME]) {
365: START(HTML_DT);
366: PUTS("Host");
367: START(HTML_DD);
368: PUTS(me->par_value[PAR_IP_NAME]);
369: }
370:
2.1 timbl 371: END(HTML_DL);
372:
2.5 timbl 373: if (me->par_value[PAR_DESCRIPTION]) {
374: START(HTML_PRE); /* Preformatted description */
375: PUTS(me->par_value[PAR_DESCRIPTION]);
376: END(HTML_PRE);
377: }
378:
2.11 duns 379: (*me->target->isa->_free)(me->target);
2.1 timbl 380:
381: return;
382: } /* generate html */
383:
384:
2.32 ! frystyk 385: PRIVATE int WSRCParser_put_string (HTStream * context, const char* s)
2.1 timbl 386: {
2.17 frystyk 387: while (*s)
388: WSRCParser_put_character(context, *s++);
389: return HT_OK;
2.1 timbl 390: }
391:
392:
2.28 frystyk 393: PRIVATE int WSRCParser_write (HTStream * context,
2.32 ! frystyk 394: const char* b,
2.28 frystyk 395: int l)
2.1 timbl 396: {
2.17 frystyk 397: while (l-- > 0)
398: WSRCParser_put_character(context, *b++);
399: return HT_OK;
2.1 timbl 400: }
401:
2.28 frystyk 402: PRIVATE int WSRCParser_flush (HTStream * me)
2.17 frystyk 403: {
404: return HT_OK;
405: }
2.1 timbl 406:
2.28 frystyk 407: PRIVATE int WSRCParser_free (HTStream * me)
2.1 timbl 408: {
2.4 timbl 409: WSRC_gen_html(me, YES);
2.6 timbl 410: #ifdef CACHE_FILE_PREFIX
2.4 timbl 411: write_cache(me);
2.6 timbl 412: #endif
2.1 timbl 413: {
414: int p;
415: for(p=0; par_name[p]; p++) { /* Clear out old values */
416: if (me->par_value[p]) {
2.30 frystyk 417: HT_FREE(me->par_value[p]);
2.1 timbl 418: }
419: }
420: }
2.30 frystyk 421: HT_FREE(me);
2.17 frystyk 422: return HT_OK;
2.1 timbl 423: }
424:
2.28 frystyk 425: PRIVATE int WSRCParser_abort (HTStream * me, HTList * e)
2.1 timbl 426: {
2.7 timbl 427: WSRCParser_free(me);
2.17 frystyk 428: return HT_ERROR;
2.1 timbl 429: }
430:
431:
432: /* Stream subclass -- method routines
433: ** ---------------
434: */
435:
436: HTStreamClass WSRCParserClass = {
437: "WSRCParser",
2.17 frystyk 438: WSRCParser_flush,
2.1 timbl 439: WSRCParser_free,
2.7 timbl 440: WSRCParser_abort,
2.1 timbl 441: WSRCParser_put_character,
442: WSRCParser_put_string,
443: WSRCParser_write
444:
445: };
446:
2.4 timbl 447:
2.1 timbl 448: /* Converter from WAIS Source to whatever
449: ** --------------------------------------
450: */
2.28 frystyk 451: PUBLIC HTStream* HTWSRCConvert (HTRequest * request,
452: void * param,
453: HTFormat input_format,
454: HTFormat output_format,
455: HTStream * output_stream)
2.1 timbl 456: {
2.30 frystyk 457: HTStream * me;
458: if ((me = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
459: HT_OUTOFMEM("HTWSRCConvert");
2.1 timbl 460: me->isa = &WSRCParserClass;
2.20 frystyk 461: me->target = HTMLGenerator(request, param, input_format, output_format,
462: output_stream);
2.19 frystyk 463: me->request = request;
2.1 timbl 464: me->state = beginning;
465:
466: return me;
467: }
468:
Webmaster