Annotation of libwww/Library/src/HTWSRC.c, revision 2.5
2.1 timbl 1: /* Parse WAIS Source file HTWSRC.c
2: ** ======================
3: **
4: ** This module parses a stream with WAIS source file
5: ** format information on it and creates a structured stream.
6: ** That structured stream is then converted into whatever.
7: **
2.5 ! timbl 8: ** 3 June 93 Bug fix: Won't crash if no description
2.1 timbl 9: */
10:
11: #include "HTWSRC.h"
12:
13:
2.3 timbl 14: /* #include <sys/types.h> already in tcp.h */
15: /* #include <sys/stat.h> this too */
2.1 timbl 16: #include <stdio.h>
17: #include "HTML.h"
18:
19: #include "HTUtils.h"
20: #include "tcp.h"
2.2 timbl 21: #include "HTParse.h"
2.1 timbl 22:
23: #define BIG 10000 /* Arbitrary limit to value length */
24: #define PARAM_MAX BIG
25: #define CACHE_PERIOD (7*86400) /* Time to keep .src file in seconds */
26:
27: #define HEX_ESCAPE '%'
28:
29: struct _HTStructured {
30: CONST HTStructuredClass * isa;
31: /* ... */
32: };
33:
2.4 timbl 34: #define PUTC(c) (*me->target->isa->put_character)(me->target, c)
35: #define PUTS(s) (*me->target->isa->put_string)(me->target, s)
36: #define START(e) (*me->target->isa->start_element)(me->target, e, 0, 0)
37: #define END(e) (*me->target->isa->end_element)(me->target, e)
2.1 timbl 38:
39:
40: /* Here are the parameters which can be specified in a source file
41: */
42: PRIVATE CONST char* par_name[] = {
43: "version",
44: "ip-address",
45: #define PAR_IP_NAME 2
46: "ip-name",
47: #define PAR_TCP_PORT 3
48: "tcp-port",
49: #define PAR_DATABASE_NAME 4
50: "database-name",
51: #define PAR_COST 5
52: "cost",
53: #define PAR_COST_UNIT 6
54: "cost-unit",
55: #define PAR_FREE 7
56: "free",
57: #define PAR_MAINTAINER 8
58: "maintainer",
59: #define PAR_DESCRIPTION 9
2.5 ! timbl 60: "description",
! 61: "keyword-list",
2.1 timbl 62: "source",
2.5 ! timbl 63: #define PAR_UNKNOWN 12
! 64: "unknown",
2.1 timbl 65: 0, /* Terminate list */
2.5 ! timbl 66: #define PAR_COUNT 13
2.1 timbl 67: } ;
68:
69:
70: enum tokenstate { beginning, before_tag, colon, before_value,
2.5 ! timbl 71: value, bracketed_value, quoted_value, escape_in_quoted, done };
2.1 timbl 72:
73:
74: /* Stream Object
75: ** ------------
76: **
77: ** The target is the structured stream down which the
78: ** parsed results will go.
79: **
80: ** all the static stuff below should go in here to make it reentrant
81: */
82:
83: struct _HTStream {
84: CONST HTStreamClass * isa;
85: HTStructured * target;
86: char * par_value[PAR_COUNT];
87: enum tokenstate state;
88: char param[BIG+1];
89: int param_number;
90: int param_count;
91: };
92:
93:
94:
95:
96: PUBLIC CONST char * hex = "0123456789ABCDEF";
97:
98: /* Decode one hex character
99: */
100:
101: PUBLIC char from_hex ARGS1(char, c)
102: {
103: return (c>='0')&&(c<='9') ? c-'0'
104: : (c>='A')&&(c<='F') ? c-'A'+10
105: : (c>='a')&&(c<='f') ? c-'a'+10
106: : 0;
107: }
108:
109:
110: /* State machine
111: ** -------------
112: **
113: ** On entry,
114: ** me->state is a valid state (see WSRC_init)
115: ** c is the next character
116: ** On exit,
117: ** returns 1 Done with file
118: ** 0 Continue. me->state is updated if necessary.
119: ** -1 Syntax error error
120: */
121:
122:
123: /* Treat One Character
124: ** -------------------
125: */
126: PRIVATE void WSRCParser_put_character ARGS2(HTStream*, me, char, c)
127: {
128: switch (me->state) {
129: case beginning:
130: if (c=='(') me->state = before_tag;
131: break;
132:
133: case before_tag:
134: if (c==')') {
135: me->state = done;
136: return; /* Done with input file */
137: } else if (c==':') {
138: me->param_count = 0;
139: me->state = colon;
140: } /* Ignore other text */
141: break;
142:
143: case colon:
144: if (WHITE(c)) {
145: me->param[me->param_count++] = 0; /* Terminate */
146: for(me->param_number = 0; par_name[me->param_number]; me->param_number++) {
147: if (0==strcmp(par_name[me->param_number], me->param)) {
148: break;
149: }
150: }
151: if (!par_name[me->param_number]) { /* Unknown field */
152: if (TRACE) fprintf(stderr,
2.5 ! timbl 153: "HTWSRC: Unknown field `%s' in source file\n",
2.1 timbl 154: me->param);
2.5 ! timbl 155: me->param_number = PAR_UNKNOWN;
! 156: me->state = before_value; /* Could be better ignore */
2.1 timbl 157: return;
158: }
159: me->state = before_value;
160: } else {
161: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
162: }
163: break;
164:
165: case before_value:
166: if (c==')') {
167: me->state = done;
168: return; /* Done with input file */
169: }
170: if (WHITE(c)) return; /* Skip white space */
171: me->param_count = 0;
172: if (c=='"') {
173: me->state = quoted_value;
174: break;
175: }
2.5 ! timbl 176: me->state = (c=='"') ? quoted_value :
! 177: (c=='(') ? bracketed_value : value;
2.1 timbl 178: me->param[me->param_count++] = c; /* Don't miss first character */
179: break;
180:
181: case value:
182: if (WHITE(c)) {
183: me->param[me->param_count] = 0;
184: StrAllocCopy(me->par_value[me->param_number], me->param);
185: me->state = before_tag;
186: } else {
187: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
188: }
189: break;
190:
2.5 ! timbl 191: case bracketed_value:
! 192: if (c==')') {
! 193: me->param[me->param_count] = 0;
! 194: StrAllocCopy(me->par_value[me->param_number], me->param);
! 195: me->state = before_tag;
! 196: break;
! 197: }
! 198: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
! 199: break;
! 200:
2.1 timbl 201: case quoted_value:
202: if (c=='"') {
203: me->param[me->param_count] = 0;
204: StrAllocCopy(me->par_value[me->param_number], me->param);
205: me->state = before_tag;
2.5 ! timbl 206: break;
! 207: }
! 208:
! 209: if (c=='\\') { /* Ignore escape but switch state */
! 210: me->state = escape_in_quoted;
! 211: break;
2.1 timbl 212: }
2.5 ! timbl 213: /* Fall through! */
! 214:
! 215: case escape_in_quoted:
! 216: if (me->param_count < PARAM_MAX) me->param[me->param_count++] = c;
2.1 timbl 217: break;
218:
219: case done: /* Ignore anything after EOF */
220: return;
221:
222: } /* switch me->state */
223: }
224:
225:
2.4 timbl 226: /* Open Cache file
227: ** ===============
228: **
229: ** Bugs: Maybe for filesystem-challenged platforms (MSDOS for example) we
230: ** should make a hash code for the filename.
231: */
232:
233: #ifdef CACHE_FILE_PREFIX
234: PRIVATE BOOL write_cache ARGS1(HTStream *, me)
235: {
236: FILE * fp;
237: char cache_file_name[256];
238: char * www_database;
239: www_database = HTEscape(me->par_value[PAR_DATABASE_NAME], URL_XALPHAS);
2.5 ! timbl 240: sprintf(cache_file_name, "%sWSRC-%s:%s:%.100s.txt",
2.4 timbl 241: CACHE_FILE_PREFIX,
242: me->par_value[PAR_IP_NAME],
243: me->par_value[PAR_TCP_PORT] ? me->par_value[PAR_TCP_PORT] : "210",
244: www_database);
245: free(www_database);
246: fp = fopen(cache_file_name, "w");
247: if (!fp) return NO;
248:
2.5 ! timbl 249: if (me->par_value[PAR_DESCRIPTION])
! 250: fputs(me->par_value[PAR_DESCRIPTION], fp);
! 251: else
! 252: fputs("Description not available\n", fp);
2.4 timbl 253: fclose(fp);
254: return YES;
255: }
256: #endif
257:
2.1 timbl 258: /* Output equivalent HTML
259: ** ----------------------
260: **
261: */
262:
263: void give_parameter ARGS2(HTStream *, me, int, p)
264: {
265: PUTS(par_name[p]);
266: if (me->par_value[p]) {
267: PUTS(": ");
268: PUTS(me->par_value[p]);
269: PUTS("; ");
270: } else {
271: PUTS(" NOT GIVEN in source file; ");
272: }
273: }
274:
275:
276: /* Generate Outout
277: ** ===============
278: */
2.4 timbl 279: PRIVATE void WSRC_gen_html ARGS2(HTStream *, me, BOOL, source_file)
2.1 timbl 280:
281: {
282: if (me->par_value[PAR_DATABASE_NAME]) {
283: char * shortname = 0;
284: int l;
285: StrAllocCopy(shortname, me->par_value[PAR_DATABASE_NAME]);
286: l = strlen(shortname);
287: if ( l > 4 && !strcasecomp(shortname + l -4, ".src")) {
288: shortname[l-4] = 0; /* Chop of .src -- boring! */
289: }
290:
291: START(HTML_TITLE);
292: PUTS(shortname);
2.4 timbl 293: PUTS(source_file ? " WAIS source file" : " index");
2.1 timbl 294: END(HTML_TITLE);
295:
296: START(HTML_H1);
297: PUTS(shortname);
2.4 timbl 298: PUTS(source_file ? " description" : " index");
2.1 timbl 299: END(HTML_H1);
300: }
301:
302: START(HTML_DL); /* Definition list of details */
303:
2.4 timbl 304: if (source_file) {
305: START(HTML_DT);
306: PUTS("Access links");
307: START(HTML_DD);
308: if (me->par_value[PAR_IP_NAME] &&
309: me->par_value[PAR_DATABASE_NAME]) {
2.1 timbl 310:
2.4 timbl 311: char WSRC_address[256];
312: char * www_database;
313: www_database = HTEscape(me->par_value[PAR_DATABASE_NAME],
314: URL_XALPHAS);
315: sprintf(WSRC_address, "wais://%s:%s/%s",
316: me->par_value[PAR_IP_NAME],
317: me->par_value[PAR_TCP_PORT] ? me->par_value[PAR_TCP_PORT]
318: : "210",
319: www_database);
2.1 timbl 320:
2.4 timbl 321: HTStartAnchor(me->target, NULL, WSRC_address);
322: PUTS("Direct access");
323: END(HTML_A);
324:
325: PUTS(" or ");
326:
327: sprintf(WSRC_address, "http://info.cern.ch:8001/%s:%s/%s",
328: me->par_value[PAR_IP_NAME],
329: me->par_value[PAR_TCP_PORT] ? me->par_value[PAR_TCP_PORT]
330: : "210",
331: www_database);
332: HTStartAnchor(me->target, NULL, WSRC_address);
333: PUTS("through CERN gateway");
334: END(HTML_A);
335:
336: free(www_database);
337:
338: } else {
339: give_parameter(me, PAR_IP_NAME);
340: give_parameter(me, PAR_IP_NAME);
341: }
342:
343: } /* end if source_file */
344:
2.1 timbl 345: if (me->par_value[PAR_MAINTAINER]) {
346: START(HTML_DT);
347: PUTS("Maintainer");
348: START(HTML_DD);
349: PUTS(me->par_value[PAR_MAINTAINER]);
350: }
351: START(HTML_DT);
352: PUTS("Host");
353: START(HTML_DD);
354: PUTS(me->par_value[PAR_IP_NAME]);
355:
356: END(HTML_DL);
357:
2.5 ! timbl 358: if (me->par_value[PAR_DESCRIPTION]) {
! 359: START(HTML_PRE); /* Preformatted description */
! 360: PUTS(me->par_value[PAR_DESCRIPTION]);
! 361: END(HTML_PRE);
! 362: }
! 363:
2.4 timbl 364: (*me->target->isa->end_document)(me->target);
365: (*me->target->isa->free)(me->target);
2.1 timbl 366:
367: return;
368: } /* generate html */
369:
370:
371: PRIVATE void WSRCParser_put_string ARGS2(HTStream *, context, CONST char*, str)
372: {
373: CONST char *p;
374: for(p=str; *p; p++)
375: WSRCParser_put_character(context, *p);
376: }
377:
378:
379: PRIVATE void WSRCParser_write ARGS3(
380: HTStream *, context,
381: CONST char*, str,
382: int, l)
383: {
384: CONST char *p;
385: CONST char *e = str+l;
386: for(p=str; p<e; p++)
387: WSRCParser_put_character(context, *p);
388: }
389:
390:
391: PRIVATE void WSRCParser_free ARGS1(HTStream *, me)
392: {
2.4 timbl 393: WSRC_gen_html(me, YES);
394: write_cache(me);
2.1 timbl 395: {
396: int p;
397: for(p=0; par_name[p]; p++) { /* Clear out old values */
398: if (me->par_value[p]) {
399: free(me->par_value[p]);
400: }
401: }
402: }
403: free(me);
404: }
405:
406: PRIVATE void WSRCParser_end_document ARGS1(HTStream *, me)
407: {
408: /* Nothing */
409: }
410:
411:
412: /* Stream subclass -- method routines
413: ** ---------------
414: */
415:
416: HTStreamClass WSRCParserClass = {
417: "WSRCParser",
418: WSRCParser_free,
419: WSRCParser_end_document,
420: WSRCParser_put_character,
421: WSRCParser_put_string,
422: WSRCParser_write
423:
424: };
425:
2.4 timbl 426:
2.1 timbl 427: /* Converter from WAIS Source to whatever
428: ** --------------------------------------
429: */
430: PUBLIC HTStream* HTWSRCConvert ARGS3(
431: HTPresentation *, pres,
432: HTParentAnchor *, anchor,
433: HTStream *, sink)
434: {
435: HTStream * me = (HTStream*) malloc(sizeof(*me));
436: if (!me) outofmem(__FILE__, "HTWSRCConvert");
437:
438: me->isa = &WSRCParserClass;
439: me->target = HTML_new(anchor, pres->rep_out, sink);
440:
441: {
442: int p;
443: for(p=0; p < PAR_COUNT; p++) { /* Clear out parameter values */
444: me->par_value[p] = 0;
445: }
446: }
447: me->state = beginning;
448:
449: return me;
450: }
451:
Webmaster