Annotation of libwww/Library/src/HTWSRC.c, revision 2.37
2.14 frystyk 1: /* HTWSRC.c
2: ** PARSE WAIS SOURCE FILE
3: **
2.18 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.14 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.37 ! frystyk 6: ** @(#) $Id: HTWSRC.c,v 2.36 1999/02/19 23:09:35 frystyk Exp $
2.1 timbl 7: **
8: ** This module parses a stream with WAIS source file
9: ** format information on it and creates a structured stream.
10: ** That structured stream is then converted into whatever.
11: **
2.11 duns 12: ** 3 Jun 93 Bug fix: Won't crash if no description
13: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.1 timbl 14: */
15:
2.16 frystyk 16: /* Library include files */
2.35 frystyk 17: #include "wwwsys.h"
2.16 frystyk 18: #include "HTUtils.h"
19: #include "HTString.h"
2.23 frystyk 20: #include "HTMLPDTD.h"
2.20 frystyk 21: #include "HTMLGen.h"
2.2 timbl 22: #include "HTParse.h"
2.22 frystyk 23: #include "HTReqMan.h"
2.19 frystyk 24: #include "HTProxy.h"
2.12 frystyk 25: #include "HTWSRC.h" /* Implemented here */
2.1 timbl 26:
27: #define BIG 10000 /* Arbitrary limit to value length */
28: #define PARAM_MAX BIG
29: #define CACHE_PERIOD (7*86400) /* Time to keep .src file in seconds */
30:
31: #define HEX_ESCAPE '%'
32:
33: struct _HTStructured {
2.32 frystyk 34: const HTStructuredClass * isa;
2.1 timbl 35: /* ... */
36: };
37:
2.4 timbl 38: #define PUTC(c) (*me->target->isa->put_character)(me->target, c)
39: #define PUTS(s) (*me->target->isa->put_string)(me->target, s)
40: #define START(e) (*me->target->isa->start_element)(me->target, e, 0, 0)
41: #define END(e) (*me->target->isa->end_element)(me->target, e)
2.1 timbl 42:
43:
44: /* Here are the parameters which can be specified in a source file
45: */
2.32 frystyk 46: PRIVATE const char* par_name[] = {
2.1 timbl 47: "version",
48: "ip-address",
49: #define PAR_IP_NAME 2
50: "ip-name",
51: #define PAR_TCP_PORT 3
52: "tcp-port",
53: #define PAR_DATABASE_NAME 4
54: "database-name",
55: #define PAR_COST 5
56: "cost",
57: #define PAR_COST_UNIT 6
58: "cost-unit",
59: #define PAR_FREE 7
60: "free",
61: #define PAR_MAINTAINER 8
62: "maintainer",
63: #define PAR_DESCRIPTION 9
2.5 timbl 64: "description",
65: "keyword-list",
2.1 timbl 66: "source",
2.5 timbl 67: #define PAR_UNKNOWN 12
68: "unknown",
2.1 timbl 69: 0, /* Terminate list */
2.5 timbl 70: #define PAR_COUNT 13
2.1 timbl 71: } ;
72:
2.29 frystyk 73: #if 0
2.32 frystyk 74: PRIVATE const char * hex = "0123456789ABCDEF";
2.29 frystyk 75: #endif
2.1 timbl 76:
77: enum tokenstate { beginning, before_tag, colon, before_value,
2.5 timbl 78: value, bracketed_value, quoted_value, escape_in_quoted, done };
2.1 timbl 79:
80:
81: /* Stream Object
82: ** ------------
83: **
84: ** The target is the structured stream down which the
85: ** parsed results will go.
86: **
87: ** all the static stuff below should go in here to make it reentrant
88: */
89:
90: struct _HTStream {
2.32 frystyk 91: const HTStreamClass * isa;
2.1 timbl 92: HTStructured * target;
2.19 frystyk 93: HTRequest * request;
2.1 timbl 94: char * par_value[PAR_COUNT];
95: enum tokenstate state;
96: char param[BIG+1];
97: int param_number;
98: int param_count;
99: };
100:
101:
102:
2.29 frystyk 103: #if 0
2.1 timbl 104: /* Decode one hex character
105: */
106:
2.29 frystyk 107: PRIVATE char from_hex (char c)
2.1 timbl 108: {
109: return (c>='0')&&(c<='9') ? c-'0'
110: : (c>='A')&&(c<='F') ? c-'A'+10
111: : (c>='a')&&(c<='f') ? c-'a'+10
112: : 0;
113: }
2.29 frystyk 114: #endif
2.1 timbl 115:
116: /* State machine
117: ** -------------
118: **
119: ** On entry,
120: ** me->state is a valid state (see WSRC_init)
121: ** c is the next character
122: ** On exit,
123: ** returns 1 Done with file
124: ** 0 Continue. me->state is updated if necessary.
125: ** -1 Syntax error error
126: */
127:
128:
129: /* Treat One Character
130: ** -------------------
131: */
2.28 frystyk 132: PRIVATE int WSRCParser_put_character (HTStream* me, char c)
2.1 timbl 133: {
134: switch (me->state) {
135: case beginning:
136: if (c=='(') me->state = before_tag;
137: break;
138:
139: case before_tag:
140: if (c==')') {
141: me->state = done;
2.17 frystyk 142: return HT_OK; /* Done with input file */
2.1 timbl 143: } else if (c==':') {
144: me->param_count = 0;
145: me->state = colon;
146: } /* Ignore other text */
147: break;
148:
149: case colon:
2.34 frystyk 150: if (isspace((int) c)) {
2.1 timbl 151: me->param[me->param_count++] = 0; /* Terminate */
152: for(me->param_number = 0; par_name[me->param_number]; me->param_number++) {
153: if (0==strcmp(par_name[me->param_number], me->param)) {
154: break;
155: }
156: }
157: if (!par_name[me->param_number]) { /* Unknown field */
2.37 ! frystyk 158: HTTRACE(PROT_TRACE, "HTWSRC: Unknown field `%s' in source file\n" _
2.1 timbl 159: me->param);
2.5 timbl 160: me->param_number = PAR_UNKNOWN;
161: me->state = before_value; /* Could be better ignore */
2.17 frystyk 162: return HT_OK;
2.1 timbl 163: }
164: me->state = before_value;
165: } else {
166: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
167: }
168: break;
169:
170: case before_value:
171: if (c==')') {
172: me->state = done;
2.17 frystyk 173: return HT_OK; /* Done with input file */
2.1 timbl 174: }
2.34 frystyk 175: if (isspace((int) c)) return HT_OK; /* Skip white space */
2.1 timbl 176: me->param_count = 0;
177: if (c=='"') {
178: me->state = quoted_value;
179: break;
180: }
2.5 timbl 181: me->state = (c=='"') ? quoted_value :
182: (c=='(') ? bracketed_value : value;
2.1 timbl 183: me->param[me->param_count++] = c; /* Don't miss first character */
184: break;
185:
186: case value:
2.34 frystyk 187: if (isspace((int) c)) {
2.1 timbl 188: me->param[me->param_count] = 0;
189: StrAllocCopy(me->par_value[me->param_number], me->param);
190: me->state = before_tag;
191: } else {
192: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
193: }
194: break;
195:
2.5 timbl 196: case bracketed_value:
197: if (c==')') {
198: me->param[me->param_count] = 0;
199: StrAllocCopy(me->par_value[me->param_number], me->param);
200: me->state = before_tag;
201: break;
202: }
203: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
204: break;
205:
2.1 timbl 206: case quoted_value:
207: if (c=='"') {
208: me->param[me->param_count] = 0;
209: StrAllocCopy(me->par_value[me->param_number], me->param);
210: me->state = before_tag;
2.5 timbl 211: break;
212: }
213:
214: if (c=='\\') { /* Ignore escape but switch state */
215: me->state = escape_in_quoted;
216: break;
2.1 timbl 217: }
2.5 timbl 218: /* Fall through! */
219:
220: case escape_in_quoted:
221: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
2.1 timbl 222: break;
223:
224: case done: /* Ignore anything after EOF */
2.17 frystyk 225: return HT_OK;
2.1 timbl 226:
227: } /* switch me->state */
2.17 frystyk 228: return HT_OK;
2.1 timbl 229: }
230:
231:
2.4 timbl 232: /* Open Cache file
233: ** ===============
234: **
235: ** Bugs: Maybe for filesystem-challenged platforms (MSDOS for example) we
236: ** should make a hash code for the filename.
237: */
238:
239: #ifdef CACHE_FILE_PREFIX
2.28 frystyk 240: PRIVATE BOOL write_cache (HTStream * me)
2.4 timbl 241: {
242: FILE * fp;
243: char cache_file_name[256];
244: char * www_database;
2.10 timbl 245: if (!me->par_value[PAR_DATABASE_NAME]
246: || !me->par_value[PAR_IP_NAME]
247: ) return NO;
248:
2.4 timbl 249: www_database = HTEscape(me->par_value[PAR_DATABASE_NAME], URL_XALPHAS);
2.5 timbl 250: sprintf(cache_file_name, "%sWSRC-%s:%s:%.100s.txt",
2.4 timbl 251: CACHE_FILE_PREFIX,
252: me->par_value[PAR_IP_NAME],
253: me->par_value[PAR_TCP_PORT] ? me->par_value[PAR_TCP_PORT] : "210",
254: www_database);
2.30 frystyk 255: HT_FREE(www_database);
2.21 frystyk 256: fp = fopen(cache_file_name, "wb");
2.4 timbl 257: if (!fp) return NO;
258:
2.5 timbl 259: if (me->par_value[PAR_DESCRIPTION])
260: fputs(me->par_value[PAR_DESCRIPTION], fp);
261: else
262: fputs("Description not available\n", fp);
2.4 timbl 263: fclose(fp);
264: return YES;
265: }
266: #endif
267:
2.1 timbl 268: /* Output equivalent HTML
269: ** ----------------------
270: **
271: */
272:
2.28 frystyk 273: void give_parameter (HTStream * me, int p)
2.1 timbl 274: {
275: PUTS(par_name[p]);
276: if (me->par_value[p]) {
277: PUTS(": ");
278: PUTS(me->par_value[p]);
279: PUTS("; ");
280: } else {
281: PUTS(" NOT GIVEN in source file; ");
282: }
283: }
284:
285:
286: /* Generate Outout
287: ** ===============
288: */
2.28 frystyk 289: PRIVATE void WSRC_gen_html (HTStream * me, BOOL source_file)
2.1 timbl 290:
291: {
292: if (me->par_value[PAR_DATABASE_NAME]) {
2.19 frystyk 293: char * shortname = NULL;
2.1 timbl 294: int l;
295: StrAllocCopy(shortname, me->par_value[PAR_DATABASE_NAME]);
296: l = strlen(shortname);
297: if ( l > 4 && !strcasecomp(shortname + l -4, ".src")) {
298: shortname[l-4] = 0; /* Chop of .src -- boring! */
299: }
300:
301: START(HTML_TITLE);
302: PUTS(shortname);
2.4 timbl 303: PUTS(source_file ? " WAIS source file" : " index");
2.1 timbl 304: END(HTML_TITLE);
305:
306: START(HTML_H1);
307: PUTS(shortname);
2.4 timbl 308: PUTS(source_file ? " description" : " index");
2.1 timbl 309: END(HTML_H1);
2.30 frystyk 310: HT_FREE(shortname); /* memleak, henrik */
2.1 timbl 311: }
312:
313: START(HTML_DL); /* Definition list of details */
314:
2.4 timbl 315: if (source_file) {
316: START(HTML_DT);
2.19 frystyk 317: PUTS("Access link");
2.4 timbl 318: START(HTML_DD);
319: if (me->par_value[PAR_IP_NAME] &&
320: me->par_value[PAR_DATABASE_NAME]) {
321: char WSRC_address[256];
2.19 frystyk 322: char *addr = HTAnchor_address((HTAnchor*) me->request->anchor);
2.25 frystyk 323: char *gate = HTGateway_find(addr);
2.19 frystyk 324: char *www_database = HTEscape(me->par_value[PAR_DATABASE_NAME],
325: URL_XALPHAS);
326: if (!gate) {
327: sprintf(WSRC_address, "wais://%s%s%s/%s",
328: me->par_value[PAR_IP_NAME],
329: me->par_value[PAR_TCP_PORT] ? ":" : "",
330: me->par_value[PAR_TCP_PORT] ?
331: me->par_value[PAR_TCP_PORT] :"", www_database);
332: HTStartAnchor(me->target, NULL, WSRC_address);
333: PUTS("Direct access");
334: END(HTML_A);
335: } else {
336: sprintf(WSRC_address, "%s%s%s%s/%s",
337: gate,
338: me->par_value[PAR_IP_NAME],
339: me->par_value[PAR_TCP_PORT] ? ":" : "",
340: me->par_value[PAR_TCP_PORT] ?
341: me->par_value[PAR_TCP_PORT] : "",
342: www_database);
343: HTStartAnchor(me->target, NULL, WSRC_address);
344: PUTS("Through a gateway");
345: END(HTML_A);
346: }
2.30 frystyk 347: HT_FREE(gate);
348: HT_FREE(addr);
349: HT_FREE(www_database);
2.4 timbl 350:
351: } else {
352: give_parameter(me, PAR_IP_NAME);
2.10 timbl 353: give_parameter(me, PAR_DATABASE_NAME);
2.4 timbl 354: }
355:
356: } /* end if source_file */
357:
2.1 timbl 358: if (me->par_value[PAR_MAINTAINER]) {
359: START(HTML_DT);
360: PUTS("Maintainer");
361: START(HTML_DD);
362: PUTS(me->par_value[PAR_MAINTAINER]);
363: }
2.10 timbl 364: if (me->par_value[PAR_IP_NAME]) {
365: START(HTML_DT);
366: PUTS("Host");
367: START(HTML_DD);
368: PUTS(me->par_value[PAR_IP_NAME]);
369: }
370:
2.1 timbl 371: END(HTML_DL);
372:
2.5 timbl 373: if (me->par_value[PAR_DESCRIPTION]) {
374: START(HTML_PRE); /* Preformatted description */
375: PUTS(me->par_value[PAR_DESCRIPTION]);
376: END(HTML_PRE);
377: }
378:
2.11 duns 379: (*me->target->isa->_free)(me->target);
2.1 timbl 380:
381: return;
382: } /* generate html */
383:
384:
2.32 frystyk 385: PRIVATE int WSRCParser_put_string (HTStream * context, const char* s)
2.1 timbl 386: {
2.17 frystyk 387: while (*s)
388: WSRCParser_put_character(context, *s++);
389: return HT_OK;
2.1 timbl 390: }
391:
392:
2.28 frystyk 393: PRIVATE int WSRCParser_write (HTStream * context,
2.32 frystyk 394: const char* b,
2.28 frystyk 395: int l)
2.1 timbl 396: {
2.17 frystyk 397: while (l-- > 0)
398: WSRCParser_put_character(context, *b++);
399: return HT_OK;
2.1 timbl 400: }
401:
2.28 frystyk 402: PRIVATE int WSRCParser_flush (HTStream * me)
2.17 frystyk 403: {
404: return HT_OK;
405: }
2.1 timbl 406:
2.28 frystyk 407: PRIVATE int WSRCParser_free (HTStream * me)
2.1 timbl 408: {
2.4 timbl 409: WSRC_gen_html(me, YES);
2.6 timbl 410: #ifdef CACHE_FILE_PREFIX
2.4 timbl 411: write_cache(me);
2.6 timbl 412: #endif
2.1 timbl 413: {
414: int p;
415: for(p=0; par_name[p]; p++) { /* Clear out old values */
416: if (me->par_value[p]) {
2.30 frystyk 417: HT_FREE(me->par_value[p]);
2.1 timbl 418: }
419: }
420: }
2.30 frystyk 421: HT_FREE(me);
2.17 frystyk 422: return HT_OK;
2.1 timbl 423: }
424:
2.28 frystyk 425: PRIVATE int WSRCParser_abort (HTStream * me, HTList * e)
2.1 timbl 426: {
2.7 timbl 427: WSRCParser_free(me);
2.17 frystyk 428: return HT_ERROR;
2.1 timbl 429: }
430:
431:
432: /* Stream subclass -- method routines
433: ** ---------------
434: */
435:
436: HTStreamClass WSRCParserClass = {
437: "WSRCParser",
2.17 frystyk 438: WSRCParser_flush,
2.1 timbl 439: WSRCParser_free,
2.7 timbl 440: WSRCParser_abort,
2.1 timbl 441: WSRCParser_put_character,
442: WSRCParser_put_string,
443: WSRCParser_write
444:
445: };
446:
2.4 timbl 447:
2.1 timbl 448: /* Converter from WAIS Source to whatever
449: ** --------------------------------------
450: */
2.28 frystyk 451: PUBLIC HTStream* HTWSRCConvert (HTRequest * request,
452: void * param,
453: HTFormat input_format,
454: HTFormat output_format,
455: HTStream * output_stream)
2.1 timbl 456: {
2.30 frystyk 457: HTStream * me;
458: if ((me = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
459: HT_OUTOFMEM("HTWSRCConvert");
2.1 timbl 460: me->isa = &WSRCParserClass;
2.20 frystyk 461: me->target = HTMLGenerator(request, param, input_format, output_format,
462: output_stream);
2.19 frystyk 463: me->request = request;
2.1 timbl 464: me->state = beginning;
465:
466: return me;
467: }
468:
Webmaster