Annotation of libwww/Library/src/HTWAIS.c, revision 2.29
2.29 ! frystyk 1: /* HTWAIS.c
! 2: ** WORLDWIDEWEB - WIDE AREA INFORMAION SERVER ACCESS
! 3: **
! 4: ** (c) COPYRIGHT CERN 1994.
! 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This module allows a WWW server or client to read data from a
2.29 ! frystyk 8: ** remote WAIS server, and provide that data to a WWW client in
! 9: ** hypertext form. Source files, once retrieved, are stored and used
! 10: ** to provide information about the index when that is acessed.
2.1 timbl 11: **
12: ** Authors
13: ** BK Brewster Kahle, Thinking Machines, <Brewster@think.com>
14: ** TBL Tim Berners-Lee, CERN <timbl@info.cern.ch>
15: **
16: ** History
17: ** Sep 91 TBL adapted shell-ui.c (BK) with HTRetrieve.c from WWW.
18: ** Feb 91 TBL Generated HTML cleaned up a bit (quotes, escaping)
19: ** Refers to lists of sources.
2.2 timbl 20: ** Mar 93 TBL Lib 2.0 compatible module made.
2.1 timbl 21: **
22: ** Bugs
23: ** Uses C stream i/o to read and write sockets, which won't work
24: ** on VMS TCP systems.
25: **
26: ** Should cache connections.
27: **
28: ** ANSI C only as written
29: **
2.11 secret 30: ** Bugs fixed
31: ** NT Nathan Torkington (Nathan.Torkington@vuw.ac.nz)
32: **
2.1 timbl 33: ** WAIS comments:
34: **
35: ** 1. Separate directories for different system's .o would help
36: ** 2. Document ids are rather long!
37: **
38: ** WWW Address mapping convention:
39: **
40: ** /servername/database/type/length/document-id
41: **
42: ** /servername/database?word+word+word
43: */
44: /* WIDE AREA INFORMATION SERVER SOFTWARE:
45: No guarantees or restrictions. See the readme file for the full standard
46: disclaimer.
47:
48: Brewster@think.com
49: */
50:
51:
2.8 timbl 52: #define DIRECTORY "/cnidr.org:210/directory-of-servers"
53: /* define DIRECTORY "/quake.think.com:210/directory-of-servers" */
2.1 timbl 54:
55: #define BIG 1024 /* identifier size limit @@@@@ */
56:
2.2 timbl 57: /* From WAIS
58: ** ---------
2.1 timbl 59: */
60:
61: #include <ui.h>
62:
63: #define MAX_MESSAGE_LEN 100000
64: #define CHARS_PER_PAGE 10000 /* number of chars retrieved in each request */
65: #define WAISSEARCH_DATE "Fri Jul 19 1991"
66:
67:
2.2 timbl 68: /* FROM WWW
69: ** --------
2.1 timbl 70: */
71: #define BUFFER_SIZE 4096 /* Arbitrary size for efficiency */
72:
2.2 timbl 73: #define HEX_ESCAPE '%'
74:
2.24 frystyk 75: /* Platform dependent stuff */
76: #include "tcp.h"
77:
78: /* Library includes */
2.1 timbl 79: #include "HTUtils.h"
80: #include "HTParse.h"
2.2 timbl 81: #include "HTAccess.h" /* We implement a protocol */
2.21 frystyk 82: #include "HTError.h"
2.2 timbl 83: #include "HTML.h" /* The object we will generate */
2.25 frystyk 84: #include "HTParse.h"
85: #include "HTFormat.h"
86: #include "HTTCP.h"
2.2 timbl 87:
2.1 timbl 88: extern FILE * logfile; /* Log file output */
89:
2.27 frystyk 90: PUBLIC int HTMaxWAISLines = 200;/* Max number of entries from a search */
2.21 frystyk 91:
2.1 timbl 92: PRIVATE BOOL as_gate; /* Client is using us as gateway */
93:
94: PRIVATE char line[2048]; /* For building strings to display */
95: /* Must be able to take id */
2.2 timbl 96:
2.25 frystyk 97: /* Hypertext object building machinery */
2.2 timbl 98: #include "HTML.h"
99:
100: #define PUTC(c) (*target->isa->put_character)(target, c)
101: #define PUTS(s) (*target->isa->put_string)(target, s)
102: #define START(e) (*target->isa->start_element)(target, e, 0, 0)
103: #define END(e) (*target->isa->end_element)(target, e)
2.23 frystyk 104: #define FREE_TARGET (*target->isa->_free)(target)
2.2 timbl 105:
106: struct _HTStructured {
107: CONST HTStructuredClass * isa;
108: /* ... */
109: };
110:
111: struct _HTStream {
112: CONST HTStreamClass * isa;
113: /* ... */
114: };
115:
116:
2.1 timbl 117: /* showDiags
118: */
119: /* modified from Jonny G's version in ui/question.c */
120:
2.2 timbl 121: void showDiags ARGS2(
122: HTStream *, target,
123: diagnosticRecord **, d)
2.1 timbl 124: {
125: long i;
126:
127: for (i = 0; d[i] != NULL; i++) {
128: if (d[i]->ADDINFO != NULL) {
129: PUTS("Diagnostic code is ");
130: PUTS(d[i]->DIAG);
131: PUTC(' ');
132: PUTS(d[i]->ADDINFO);
133: PUTC('\n'); ;
134: }
135: }
136: }
137:
138: /* Matrix of allowed characters in filenames
139: ** -----------------------------------------
140: */
141:
142: PRIVATE BOOL acceptable[256];
143: PRIVATE BOOL acceptable_inited = NO;
144:
145: PRIVATE void init_acceptable NOARGS
146: {
147: unsigned int i;
148: char * good =
149: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$";
150: for(i=0; i<256; i++) acceptable[i] = NO;
151: for(;*good; good++) acceptable[(unsigned int)*good] = YES;
152: acceptable_inited = YES;
153: }
154:
155: /* Transform file identifier into WWW address
156: ** ------------------------------------------
157: **
158: **
159: ** On exit,
160: ** returns nil if error
161: ** pointer to malloced string (must be freed) if ok
162: */
163: char * WWW_from_archie ARGS1 (char *, file)
164: {
165: char * end;
166: char * result;
167: char * colon;
168: for(end=file; *end > ' '; end++); /* assumes ASCII encoding*/
169: result = (char *)malloc(10 + (end-file));
170: if (!result) return result; /* Malloc error */
171: strcpy(result, "file://");
172: strncat(result, file, end-file);
173: colon = strchr(result+7, ':'); /* Expect colon after host */
174: if (colon) {
175: for(; colon[0]; colon[0]=colon[1], colon++); /* move down */
176: }
177: return result;
178: } /* WWW_from_archie */
179:
2.2 timbl 180: /* Transform document identifier into URL
181: ** --------------------------------------
2.1 timbl 182: **
183: ** Bugs: A static buffer of finite size is used!
184: ** The format of the docid MUST be good!
185: **
186: ** On exit,
187: ** returns nil if error
188: ** pointer to malloced string (must be freed) if ok
189: */
2.9 timbl 190: PRIVATE char hex [17] = "0123456789ABCDEF";
2.2 timbl 191: extern char from_hex PARAMS((char a)); /* In HTWSRC @@ */
192:
193: PRIVATE char * WWW_from_WAIS ARGS1(any *, docid)
194:
2.1 timbl 195: {
2.21 frystyk 196: static unsigned char buf[BIG];
197: char num[10];
198: unsigned char * q = buf;
2.1 timbl 199: char * p = (docid->bytes);
200: int i, l;
2.25 frystyk 201: if (PROT_TRACE) {
2.1 timbl 202: char *p;
2.21 frystyk 203: fprintf(stderr, "HTLoadWAIS.. id (%d bytes) is ", (int)docid->size);
2.1 timbl 204: for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
205: if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
206: fprintf(stderr, "%c", *p);
207: else
2.2 timbl 208: fprintf(stderr, "<%x>", (unsigned)*p);
2.1 timbl 209: }
210: fprintf(stderr, "\n");
211: }
212: for (p=docid->bytes; (p<docid->bytes+docid->size) && (q<&buf[BIG]);) {
2.25 frystyk 213: if (PROT_TRACE)
2.24 frystyk 214: fprintf(stderr, "............ Record type %d, length %d\n",
215: (unsigned char) p[0], (unsigned char) p[1]);
2.21 frystyk 216: sprintf(num, "%d", (int)*p);
217: memcpy(q, num, strlen(num));
218: q += strlen(num);
219: p++;
220: *q++ = '='; /* Separate */
221: l = (int)((unsigned char)*p);
222: p++;
223: if (l > 127)
224: {
225: l = (l - 128) * 128;
226: l = l + (int)((unsigned char)*p);
227: p++;
228: }
229:
230: for (i = 0; i < l; i++, p++)
231: {
232: if (!acceptable[(unsigned char)*p])
233: {
234: *q++ = HEX_ESCAPE;
235: *q++ = hex[((unsigned char)*p) >> 4];
236: *q++ = hex[((unsigned char)*p) & 15];
237: }
238: else *q++ = (unsigned char)*p;
239: }
240: *q++= ';'; /* Terminate field */
241: #ifdef OLD_CODE
2.1 timbl 242: if (*p>10) {
2.25 frystyk 243: if (PROT_TRACE)
2.21 frystyk 244: fprintf(stderr, "WAIS........ DOCID record type of %d!\n", *p);
2.1 timbl 245: return 0;
246: }
2.2 timbl 247: { /* Bug fix -- allow any byte value 15 Apr 93 */
248: unsigned int i = (unsigned) *p++;
249:
250: if (i > 99) {
251: *q++ = (i/100) + '0';
252: i = i % 100;
253: }
254: if (i > 9) {
255: *q++ = (i/10) + '0';
256: i = i % 10;
257: }
258: *q++ = i + '0'; /* Record type */
259: }
2.1 timbl 260: *q++ = '='; /* Separate */
261: l = *p++; /* Length */
262: for(i=0; i<l; i++, p++){
2.18 luotonen 263: if (!acceptable[(int)*p]) {
2.1 timbl 264: *q++ = HEX_ESCAPE; /* Means hex commming */
265: *q++ = hex[(*p) >> 4];
266: *q++ = hex[(*p) & 15];
267: }
268: else *q++ = *p;
269: }
270: *q++= ';'; /* Terminate field */
2.21 frystyk 271: #endif /* OLD_CODE */
2.1 timbl 272: }
273: *q++ = 0; /* Terminate string */
2.25 frystyk 274: if (PROT_TRACE) fprintf(stderr, "HTLoadWAIS.. WWW form of id: %s\n", buf);
2.1 timbl 275: {
2.24 frystyk 276: char *result;
277: if ((result = (char *) malloc((int) strlen(buf)+1)) == NULL)
278: outofmem(__FILE__, "WWW_from_WAIS");
2.1 timbl 279: strcpy(result, buf);
280: return result;
281: }
282: } /* WWW_from_WAIS */
283:
284:
2.2 timbl 285: /* Transform URL into WAIS document identifier
286: ** -------------------------------------------
2.1 timbl 287: **
288: ** On entry,
289: ** docname points to valid name produced originally by
290: ** WWW_from_WAIS
291: ** On exit,
292: ** docid->size is valid
293: ** docid->bytes is malloced and must later be freed.
294: */
295: PRIVATE any * WAIS_from_WWW ARGS2 (any *, docid, char *, docname)
296: {
297: char *z; /* Output pointer */
298: char *sor; /* Start of record - points to size field. */
299: char *p; /* Input pointer */
300: char *q; /* Poisition of "=" */
301: char *s; /* Position of semicolon */
302: int n; /* size */
2.25 frystyk 303: if (PROT_TRACE)
2.24 frystyk 304: fprintf(stderr, "HTLoadWAIS.. WWW id (to become WAIS id): %s\n",
305: docname);
2.1 timbl 306: for(n=0, p = docname; *p; p++) { /* Count sizes of strings */
307: n++;
308: if (*p == ';') n--; /* Not converted */
309: else if (*p == HEX_ESCAPE) n=n-2; /* Save two bytes */
310: docid->size = n;
311: }
312:
2.21 frystyk 313: docid->bytes = (char *) malloc(docid->size+32); /* result record */
2.1 timbl 314: z = docid->bytes;
315:
2.21 frystyk 316: for(p = docname; *p; ) {
317: q = strchr(p, '=');
318: if (!q)
319: return 0;
320: *q = '\0';
321: *z++ = atoi(p);
322: *q = '=';
323: s = strchr(q, ';'); /* (Check only) */
324: if (!s)
325: return 0; /* Bad! No ';'; */
326: sor = z; /* Remember where the size field was */
327: z++; /* Skip record size for now */
328:
329: {
330: int len;
331: int tmp;
332: for(p=q+1; *p!=';' ; ) {
333: if (*p == HEX_ESCAPE) {
334: char c;
335: unsigned int b;
336: p++;
337: c = *p++;
338: b = from_hex(c);
339: c = *p++;
340: if (!c)
341: break; /* Odd number of chars! */
342: *z++ = (b<<4) + from_hex(c);
343: } else {
344: *z++ = *p++; /* Record */
345: }
346: }
347: len = (z-sor-1);
348:
349: z = sor;
350: if (len > 127) {
351: tmp = (len / 128);
352: len = len - (tmp * 128);
353: tmp = tmp + 128;
354: *z++ = (char)tmp;
355: *z = (char)len;
356: } else {
357: *z = (char)len;
358: }
359: z++;
360: }
361:
362: for(p=q+1; *p!=';' ; ) {
363: if (*p == HEX_ESCAPE) {
364: char c;
365: unsigned int b;
366: p++;
367: c = *p++;
368: b = from_hex(c);
369: c = *p++;
370: if (!c)
371: break; /* Odd number of chars! */
372: *z++ = (b<<4) + from_hex(c);
373: } else {
374: *z++ = *p++; /* Record */
375: }
376: }
377: p++; /* After semicolon: start of next record */
378: }
379:
380: #ifdef OLD_CODE
2.1 timbl 381: for(p = docname; *p; ) { /* Convert of strings */
2.2 timbl 382: /* Record type */
383:
384: *z = 0; /* Initialize record type */
385: while (*p >= '0' && *p <= '9') {
386: *z = *z*10 + (*p++ - '0'); /* Decode decimal record type */
387: }
388: z++;
389: if (*p != '=') return 0;
390: q = p;
391:
392: /* *z++ = *p++ - '0';
2.1 timbl 393: q = strchr(p , '=');
394: if (!q) return 0;
2.2 timbl 395: */
2.1 timbl 396: s = strchr(q, ';'); /* (Check only) */
397: if (!s) return 0; /* Bad! No ';'; */
398: sor = z; /* Remember where the size field was */
399: z++; /* Skip record size for now */
400: for(p=q+1; *p!=';' ; ) {
401: if (*p == HEX_ESCAPE) {
402: char c;
403: unsigned int b;
404: p++;
405: c = *p++;
406: b = from_hex(c);
407: c = *p++;
408: if (!c) break; /* Odd number of chars! */
409: *z++ = (b<<4) + from_hex(c);
410: } else {
411: *z++ = *p++; /* Record */
412: }
413: }
414: *sor = (z-sor-1); /* Fill in size -- not counting size itself */
415: p++; /* After semicolon: start of next record */
416: }
2.21 frystyk 417: #endif /* OLD_CODE */
2.25 frystyk 418: if (PROT_TRACE) {
2.1 timbl 419: char *p;
2.21 frystyk 420: fprintf(stderr, "WAIS........ id (%d bytes) is ", (int)docid->size);
2.1 timbl 421: for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
422: if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
423: fprintf(stderr, "%c", *p);
424: else
2.2 timbl 425: fprintf(stderr, "<%x>", (unsigned)*p);
2.1 timbl 426: }
427: fprintf(stderr, "\n");
428: }
429: return docid; /* Ok */
430:
431: } /* WAIS_from_WWW */
432:
433:
434: /* Send a plain text record to the client output_text_record()
435: ** --------------------------------------
436: */
2.2 timbl 437:
2.9 timbl 438: PRIVATE void output_text_record ARGS4(
2.2 timbl 439: HTStream *, target,
440: WAISDocumentText *, record,
2.9 timbl 441: boolean, quote_string_quotes,
442: boolean, binary)
2.1 timbl 443: {
444: long count;
445: /* printf(" Text\n");
446: print_any(" DocumentID: ", record->DocumentID);
447: printf(" VersionNumber: %d\n", record->VersionNumber);
448: */
2.9 timbl 449:
450: if (binary) {
451: (*target->isa->put_block)(target,
452: record->DocumentText->bytes,
453: record->DocumentText->size);
454: return;
455: }
456:
2.1 timbl 457: for(count = 0; count < record->DocumentText->size; count++){
458: long ch = (unsigned char)record->DocumentText->bytes[count];
2.2 timbl 459: if (ch == 27) { /* What is this in for? Tim */
2.1 timbl 460:
461: /* then we have an escape code */
462: /* if the next letter is '(' or ')', then ignore two letters */
463: if('(' == record->DocumentText->bytes[count + 1] ||
464: ')' == record->DocumentText->bytes[count + 1])
465: count += 1; /* it is a term marker */
466: else count += 4; /* it is a paragraph marker */
467: } else if (ch == '\n' || ch == '\r') {
2.2 timbl 468: PUTC('\n');
2.1 timbl 469: } else if ((ch=='\t') || isprint(ch)){
2.2 timbl 470: PUTC(ch);
2.1 timbl 471: }
472: }
473: } /* output text record */
474:
475:
2.2 timbl 476:
2.1 timbl 477: /* Format A Search response for the client display_search_response
478: ** ---------------------------------------
479: */
480: /* modified from tracy shen's version in wutil.c
481: * displays either a text record or a set of headlines.
482: */
483: void
2.2 timbl 484: display_search_response ARGS4(
485: HTStructured *, target,
2.1 timbl 486: SearchResponseAPDU *, response,
487: char *, database,
488: char *, keywords)
489: {
490: WAISSearchResponse *info;
491: long i, k;
492:
493: BOOL archie = strstr(database, "archie")!=0; /* Specical handling */
494:
2.25 frystyk 495: if (PROT_TRACE) fprintf(stderr, "WAIS........ Displaying search response\n");
2.1 timbl 496: sprintf(line,
2.21 frystyk 497: "Index %s contains the following %d item%s relevant to '%s'.\n",
498: database,
499: (int)(response->NumberOfRecordsReturned),
500: response->NumberOfRecordsReturned ==1 ? "" : "s",
501: keywords);
2.2 timbl 502: PUTS(line);
503: PUTS("The first figure for each entry is its relative score, ");
504: PUTS("the second the number of lines in the item.");
505: START(HTML_MENU);
506:
2.1 timbl 507: if ( response->DatabaseDiagnosticRecords != 0 ) {
508: info = (WAISSearchResponse *)response->DatabaseDiagnosticRecords;
509: i =0;
510:
511: if (info->Diagnostics != NULL)
2.2 timbl 512: showDiags((HTStream*)target, info->Diagnostics);
2.1 timbl 513:
514: if ( info->DocHeaders != 0 ) {
515: for (k=0; info->DocHeaders[k] != 0; k++ ) {
516: WAISDocumentHeader* head = info->DocHeaders[k];
517: char * headline = trim_junk(head->Headline);
518: any * docid = head->DocumentID;
519: char * docname; /* printable version of docid */
520: i++;
521:
522: /* Make a printable string out of the document id.
523: */
2.25 frystyk 524: if (PROT_TRACE)
2.24 frystyk 525: fprintf(stderr, "HTWAIS: %2ld: Score: %4ld, lines:%4ld '%s'\n", i,
526: (long int)(info->DocHeaders[k]->Score),
527: (long int)(info->DocHeaders[k]->Lines),
528: headline);
2.1 timbl 529:
2.2 timbl 530: START(HTML_LI);
531: sprintf(line, "%4ld %4ld ",
532: head->Score,
533: head->Lines);
2.23 frystyk 534: PUTS(line);
2.2 timbl 535:
2.1 timbl 536: if (archie) {
537: char * www_name = WWW_from_archie(headline);
538: if (www_name) {
2.2 timbl 539: HTStartAnchor(target, NULL, www_name);
2.1 timbl 540: PUTS(headline);
2.2 timbl 541:
542: END(HTML_A);
2.1 timbl 543: free(www_name);
544: } else {
2.2 timbl 545: PUTS(headline);
546: PUTS(" (bad file name)");
2.1 timbl 547: }
548: } else { /* Not archie */
549: docname = WWW_from_WAIS(docid);
550: if (docname) {
2.6 timbl 551: char * dbname = HTEscape(database, URL_XPALPHAS);
2.21 frystyk 552: char types_array[1000]; /* bad */
553: char *type_escaped;
554: types_array[0] = 0;
555: if (head->Types) {
556: int i;
557: for (i = 0; head->Types[i]; i++)
558: {
559: if (i)
560: strcat (types_array, ",");
561:
562: type_escaped = HTEscape (head->Types[i], URL_XALPHAS);
563: strcat (types_array, type_escaped);
564: free (type_escaped);
565: }
2.25 frystyk 566: if (PROT_TRACE)
2.24 frystyk 567: fprintf (stderr, "WAIS........ Types_array `%s\'\n",
568: types_array);
2.21 frystyk 569: } else {
570: strcat (types_array, "TEXT");
571: }
572: sprintf(line, "%s/%s/%d/%s",
573: dbname,
574: types_array,
575: (int)(head->DocumentLength),
576: docname);
577: #ifdef OLD_CODE
2.1 timbl 578: sprintf(line, "%s/%s/%d/%s", /* W3 address */
579: dbname,
580: head->Types ? head->Types[0] : "TEXT",
2.2 timbl 581: (int)(head->DocumentLength),
2.1 timbl 582: docname);
2.21 frystyk 583: #endif /* OLD_CODE */
2.11 secret 584: HTStartAnchor(target, NULL, ( (head->Types)
585: && (!strcmp(head->Types[0], "URL"))) ?
586: headline : line); /* NT, Sep 93 */
2.2 timbl 587: PUTS(headline);
588: END(HTML_A);
2.1 timbl 589: free(dbname);
590: free(docname);
591: } else {
2.2 timbl 592: PUTS("(bad doc id)");
2.1 timbl 593: }
594: }
595: } /* next document header */
596: } /* if there were any document headers */
597:
598: if ( info->ShortHeaders != 0 ) {
599: k =0;
600: while (info->ShortHeaders[k] != 0 ) {
601: i++;
2.2 timbl 602: PUTS( "(Short Header record, can't display)");
2.1 timbl 603: }
604: }
605: if ( info->LongHeaders != 0 ) {
606: k =0;
607: while (info->LongHeaders[k] != 0) {
608: i++;
609: PUTS( "\nLong Header record, can't display\n");
610: }
611: }
612: if ( info->Text != 0 ) {
613: k =0;
614: while (info->Text[k] != 0) {
615: i++;
616: PUTS( "\nText record\n");
2.9 timbl 617: output_text_record((HTStream*)target, info->Text[k++], false, false);
2.1 timbl 618: }
619: }
620: if ( info->Headlines != 0 ) {
621: k =0;
622: while (info->Headlines[k] != 0) {
623: i++;
624: PUTS( "\nHeadline record, can't display\n");
625: /* dsply_headline_record( info->Headlines[k++]); */
626: }
627: }
628: if ( info->Codes != 0 ) {
629: k =0;
630: while (info->Codes[k] != 0) {
631: i++;
632: PUTS( "\nCode record, can't display\n");
633: /* dsply_code_record( info->Codes[k++]); */
634: }
635: }
636: } /* Loop: display user info */
2.2 timbl 637: END(HTML_MENU);
2.1 timbl 638: PUTC('\n'); ;
639: }
640:
641:
642:
2.2 timbl 643:
2.20 frystyk 644: /* Load Document from WAIS Server HTLoadWAIS()
645: ** ------------------------------
2.2 timbl 646: **
2.20 frystyk 647: ** On entry,
648: ** request This is the request structure
649: ** On exit,
650: ** returns <0 Error has occured
651: ** HT_LOADED OK
2.1 timbl 652: */
2.13 timbl 653: PUBLIC int HTLoadWAIS ARGS1(HTRequest * , request)
2.1 timbl 654:
2.21 frystyk 655: #define MAX_KEYWORDS_LENGTH 4000
2.1 timbl 656: #define MAX_SERVER_LENGTH 1000
657: #define MAX_DATABASE_LENGTH 1000
658: #define MAX_SERVICE_LENGTH 1000
659:
660: {
2.13 timbl 661: CONST char * arg = HTAnchor_physical(request->anchor);
662: HTFormat format_out = request->output_format;
663: HTStream* sink = request->output_stream;
2.20 frystyk 664: #if 0
2.1 timbl 665: static CONST char * error_header =
2.7 timbl 666: "<h1>Access error</h1>\nThe following error occured in accesing a WAIS server:<P>\n";
2.20 frystyk 667: #endif
2.2 timbl 668: char * key; /* pointer to keywords in URL */
2.1 timbl 669: char* request_message = NULL; /* arbitrary message limit */
670: char* response_message = NULL; /* arbitrary message limit */
671: long request_buffer_length; /* how of the request is left */
672: SearchResponseAPDU *retrieval_response = 0;
673: char keywords[MAX_KEYWORDS_LENGTH + 1];
674: char *server_name;
2.6 timbl 675: char *wais_database = NULL; /* name of current database */
676: char *www_database; /* Same name escaped */
2.1 timbl 677: char *service;
678: char *doctype;
679: char *doclength;
680: long document_length;
681: char *docname;
682: FILE *connection = 0;
683: char * names; /* Copy of arg to be hacked up */
684: BOOL ok = NO;
2.20 frystyk 685: int status = -1;
2.21 frystyk 686: char *basetitle = NULL;
2.1 timbl 687:
688: extern FILE * connect_to_server();
689:
2.27 frystyk 690: if (PROT_TRACE)
691: fprintf(stderr, "HTLoadWAIS.. Looking for `%s\'\n", arg);
692:
2.1 timbl 693: if (!acceptable_inited) init_acceptable();
694:
695:
696: /* Decipher and check syntax of WWW address:
697: ** ----------------------------------------
698: **
699: ** First we remove the "wais:" if it was spcified. 920110
700: */
701: names = HTParse(arg, "", PARSE_HOST | PARSE_PATH | PARSE_PUNCTUATION);
2.2 timbl 702: key = strchr(names, '?');
703:
704: if (key) {
705: char * p;
706: *key++ = 0; /* Split off keywords */
707: for (p=key; *p; p++) if (*p == '+') *p = ' ';
708: HTUnEscape(key);
709: }
2.1 timbl 710: if (names[0]== '/') {
711: server_name = names+1;
2.18 luotonen 712: if ((as_gate =(*server_name == '/')))
2.1 timbl 713: server_name++; /* Accept one or two */
714: www_database = strchr(server_name,'/');
715: if (www_database) {
716: *www_database++ = 0; /* Separate database name */
717: doctype = strchr(www_database, '/');
718: if (key) ok = YES; /* Don't need doc details */
719: else if (doctype) { /* If not search parse doc details */
720: *doctype++ = 0; /* Separate rest of doc address */
721: doclength = strchr(doctype, '/');
722: if(doclength) {
723: *doclength++ = 0;
724: document_length = atol(doclength);
725: if (document_length) {
726: docname=strchr(doclength, '/');
727: if (docname) {
728: *docname++ = 0;
729: ok = YES; /* To avoid a goto! */
730: } /* if docname */
731: } /* if document_length valid */
732: } /* if doclength */
733: } else { /* no doctype? Assume index required */
734: if (!key) key = "";
735: ok = YES;
736: } /* if doctype */
737: } /* if database */
738: }
739:
2.21 frystyk 740: if (!ok) {
741: char *unescaped = NULL;
742: StrAllocCopy(unescaped, arg);
743: HTUnEscape(unescaped);
744: HTErrorAdd(request, ERR_FATAL, NO, HTERR_BAD_REQUEST,
745: (void *) unescaped, (int) strlen(unescaped),
746: "HTLoadWAIS");
747: free(unescaped);
748: free(names);
749: return -1;
750: }
751:
2.25 frystyk 752: if (PROT_TRACE) fprintf(stderr, "HTLoadWAIS.. URL Parsed OK\n");
2.1 timbl 753:
754: service = strchr(names, ':');
755: if (service) *service++ = 0;
756: else service = "210";
757:
758: if (server_name[0] == 0)
759: connection = NULL;
760:
761: else if (!(key && !*key))
2.20 frystyk 762: if ((connection=connect_to_server(server_name,atoi(service))) == NULL) {
2.21 frystyk 763: char *host = HTParse(arg, "", PARSE_HOST);
2.25 frystyk 764: if (PROT_TRACE)
2.24 frystyk 765: fprintf (stderr, "HTLoadWAIS.. Can't open connection to %s via service %s.\n",
766: server_name, service);
2.21 frystyk 767: HTErrorAdd(request, ERR_FATAL, NO, HTERR_WAIS_NO_CONNECT,
2.20 frystyk 768: (void *) host, (int) strlen(host), "HTLoadWAIS");
769: goto cleanup;
770: }
2.1 timbl 771:
2.6 timbl 772: StrAllocCopy(wais_database,www_database);
773: HTUnEscape(wais_database);
2.21 frystyk 774:
775: /* Make title name without the .src */
776: {
777: char *srcstr;
778: StrAllocCopy(basetitle, wais_database);
779: if ((srcstr = strstr(basetitle, ".src")) != NULL)
780: *srcstr = '\0';
781: }
2.6 timbl 782:
2.21 frystyk 783: /* This below fixed size stuff is terrible */
2.1 timbl 784: request_message = (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char));
785: response_message = (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char));
786:
787: /* If keyword search is performed but there are no keywords,
788: ** the user has followed a link to the index itself. It would be
789: ** appropriate at this point to send him the .SRC file - how?
790: */
791:
792: if (key && !*key) { /* I N D E X */
793:
2.7 timbl 794: #ifdef CACHE_FILE_PREFIX
795: char filename[256];
796: FILE * fp;
797: #endif
2.13 timbl 798: HTStructured * target = HTML_new(request, NULL,
799: WWW_HTML, format_out, sink);
2.1 timbl 800:
2.8 timbl 801: {
2.27 frystyk 802: START(HTML_HTML);
803: START(HTML_HEAD);
2.8 timbl 804: START(HTML_TITLE);
2.21 frystyk 805: PUTS(basetitle);
806: PUTS(" Index");
2.8 timbl 807: END(HTML_TITLE);
2.27 frystyk 808: END(HTML_HEAD);
2.8 timbl 809:
2.27 frystyk 810: START(HTML_BODY);
2.8 timbl 811: START(HTML_H1);
2.21 frystyk 812: PUTS("WAIS Index: ");
813: PUTS(basetitle);
2.8 timbl 814: END(HTML_H1);
815:
816: }
2.21 frystyk 817: START(HTML_ISINDEX);
818:
819: /* If we have seen a source file for this database, use that: */
2.2 timbl 820:
2.7 timbl 821: #ifdef CACHE_FILE_PREFIX
2.8 timbl 822: sprintf(filename, "%sWSRC-%s:%s:%.100s.txt",
2.7 timbl 823: CACHE_FILE_PREFIX,
2.1 timbl 824: server_name, service, www_database);
825:
826: fp = fopen(filename, "r"); /* Have we found this already? */
2.25 frystyk 827: if (PROT_TRACE) fprintf(stderr,
2.21 frystyk 828: "HTLoadWAIS.. Description of server %s %s.\n",
2.1 timbl 829: filename,
830: fp ? "exists already" : "does NOT exist!");
2.2 timbl 831:
2.1 timbl 832: if (fp) {
2.24 frystyk 833: int c;
834: START(HTML_PRE); /* Preformatted description */
835: while((c=getc(fp)) != EOF)
836: PUTC(c); /* Transfer file */
2.7 timbl 837: END(HTML_PRE);
2.1 timbl 838: fclose(fp);
2.8 timbl 839: }
2.2 timbl 840: #endif
2.27 frystyk 841: END(HTML_BODY);
842: END(HTML_HTML);
2.2 timbl 843: FREE_TARGET;
2.1 timbl 844:
845: } else if (key) { /* S E A R C H */
846: char *p;
2.2 timbl 847: HTStructured * target;
848:
2.1 timbl 849: strncpy(keywords, key, MAX_KEYWORDS_LENGTH);
2.18 luotonen 850: while ((p = strchr(keywords,'+'))) *p = ' ';
2.1 timbl 851:
852: /* Send advance title to get something fast to the other end */
853:
2.13 timbl 854: target = HTML_new(request, NULL, WWW_HTML, format_out, sink);
2.2 timbl 855:
2.23 frystyk 856: START(HTML_HTML);
857: START(HTML_HEAD);
2.2 timbl 858: START(HTML_TITLE);
859: PUTS(keywords);
2.21 frystyk 860: PUTS(" in ");
861: PUTS(basetitle);
2.2 timbl 862: END(HTML_TITLE);
2.23 frystyk 863: END(HTML_HEAD);
2.2 timbl 864:
2.23 frystyk 865: START(HTML_BODY);
2.2 timbl 866: START(HTML_H1);
2.21 frystyk 867: PUTS("WAIS Search of \"");
2.2 timbl 868: PUTS(keywords);
2.21 frystyk 869: PUTS("\" in ");
870: PUTS(basetitle);
2.2 timbl 871: END(HTML_H1);
2.1 timbl 872:
2.21 frystyk 873: START(HTML_ISINDEX);
874:
2.1 timbl 875: request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */
2.25 frystyk 876: if (PROT_TRACE)
2.24 frystyk 877: fprintf(stderr, "HTLoadWAIS.. Search for `%s' in `%s'\n",
878: keywords, wais_database);
2.21 frystyk 879: if(generate_search_apdu(request_message + HEADER_LENGTH,
2.1 timbl 880: &request_buffer_length,
2.21 frystyk 881: keywords, wais_database, NULL,
882: HTMaxWAISLines) == NULL) {
2.25 frystyk 883: if (PROT_TRACE)
2.23 frystyk 884: fprintf(stderr, "WAIS Search. Too many lines in response\n");
2.21 frystyk 885: HTErrorAdd(request, ERR_WARNING, NO, HTERR_WAIS_OVERFLOW,
886: NULL, 0, "HTLoadWAIS");
887: }
2.1 timbl 888:
889: if(!interpret_message(request_message,
890: MAX_MESSAGE_LEN - request_buffer_length,
891: response_message,
892: MAX_MESSAGE_LEN,
893: connection,
894: false /* true verbose */
895: )) {
2.25 frystyk 896: if (PROT_TRACE)
2.23 frystyk 897: fprintf(stderr, "WAIS Search. Too many lines in response\n");
2.21 frystyk 898: HTErrorAdd(request, ERR_WARNING, NO, HTERR_WAIS_OVERFLOW,
899: NULL, 0, "HTLoadWAIS");
2.1 timbl 900: } else { /* returned message ok */
901: SearchResponseAPDU *query_response = 0;
902: readSearchResponseAPDU(&query_response,
903: response_message + HEADER_LENGTH);
2.2 timbl 904: display_search_response(target,
905: query_response, wais_database, keywords);
2.1 timbl 906: if (query_response->DatabaseDiagnosticRecords)
907: freeWAISSearchResponse(
908: query_response->DatabaseDiagnosticRecords);
909: freeSearchResponseAPDU( query_response);
910: } /* returned message not too large */
911:
2.23 frystyk 912: END(HTML_BODY);
913: END(HTML_HTML);
2.2 timbl 914: FREE_TARGET;
915:
2.1 timbl 916: } else { /* D O C U M E N T F E T C H */
917:
2.2 timbl 918: HTFormat format_in;
2.9 timbl 919: boolean binary; /* how to transfer stuff coming over */
2.2 timbl 920: HTStream * target;
2.1 timbl 921: long count;
922: any doc_chunk;
923: any * docid = &doc_chunk;
2.25 frystyk 924: if (PROT_TRACE)
2.24 frystyk 925: fprintf(stderr,
926: "HTLoadWAIS.. Retrieve document `%s'\n............ type `%s' length %ld\n", docname, doctype, document_length);
2.2 timbl 927:
928: format_in =
929: !strcmp(doctype, "WSRC") ? HTAtom_for("application/x-wais-source") :
2.23 frystyk 930: !strcmp(doctype, "TEXT") ? WWW_UNKNOWN :
2.9 timbl 931: !strcmp(doctype, "HTML") ? HTAtom_for("text/html") :
2.2 timbl 932: !strcmp(doctype, "GIF") ? HTAtom_for("image/gif") :
2.11 secret 933: HTAtom_for("application/octet-stream");
2.9 timbl 934: binary =
935: 0 != strcmp(doctype, "WSRC") &&
936: 0 != strcmp(doctype, "TEXT") &&
937: 0 != strcmp(doctype, "HTML") ;
938:
2.23 frystyk 939: /* Guess on TEXT format as it might be HTML */
2.28 frystyk 940: if ((target = HTStreamStack(format_in, request->output_format,
941: request->output_stream,
942: request, YES)) == NULL) {
2.21 frystyk 943: status = -1;
944: goto cleanup;
945: }
2.2 timbl 946:
2.21 frystyk 947: /* Decode hex or litteral format for document ID */
2.1 timbl 948: WAIS_from_WWW(docid, docname);
949:
2.21 frystyk 950: /* Loop over slices of the document */
951: for (count = 0; count * CHARS_PER_PAGE < document_length; count++) {
952: char *type = s_strdup(doctype);
953: request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */
2.25 frystyk 954: if (PROT_TRACE) fprintf(stderr, "HTLoadWAIS.. Slice number %ld\n",
2.21 frystyk 955: count);
956: if (generate_retrieval_apdu(request_message + HEADER_LENGTH,
957: &request_buffer_length,
958: docid, CT_byte,
959: count * CHARS_PER_PAGE,
960: HTMIN((count + 1) * CHARS_PER_PAGE,
961: document_length),
962: type,
963: wais_database) == 0) {
964: HTErrorAdd(request, ERR_WARNING, NO, HTERR_WAIS_OVERFLOW,
965: NULL, 0, "HTLoadWAIS");
966: }
967: FREE(type);
968:
969: /* Actually do the transaction given by request_message */
970: if (interpret_message(request_message,
971: MAX_MESSAGE_LEN - request_buffer_length,
972: response_message,
973: MAX_MESSAGE_LEN,
974: connection,
975: false /* true verbose */
976: ) == 0) {
977: HTErrorAdd(request, ERR_WARNING, NO, HTERR_WAIS_OVERFLOW,
978: NULL, 0, "HTLoadWAIS");
979: }
980:
981: /* Parse the result which came back into memory. */
982: readSearchResponseAPDU(&retrieval_response,
983: response_message + HEADER_LENGTH);
984: {
985: WAISSearchResponse *searchres = (WAISSearchResponse *) retrieval_response->DatabaseDiagnosticRecords;
986: if (!searchres->Text) {
987: if (searchres->Diagnostics && *searchres->Diagnostics &&
988: (*searchres->Diagnostics)->ADDINFO) {
989: char *errmsg = (*searchres->Diagnostics)->ADDINFO;
990: HTErrorAdd(request, ERR_WARNING, NO, HTERR_WAIS_MODULE,
991: (void *) errmsg, (int) strlen(errmsg),
992: "HTLoadWAIS");
993: } else {
994: HTErrorAdd(request, ERR_WARNING, NO, HTERR_WAIS_MODULE,
995: NULL, 0, "HTLoadWAIS");
996: }
2.23 frystyk 997: (*target->isa->_free)(target);
2.26 frystyk 998: request->output_stream = NULL;
2.22 frystyk 999: free (docid->bytes);
1000: freeWAISSearchResponse(retrieval_response->DatabaseDiagnosticRecords);
1001: freeSearchResponseAPDU( retrieval_response);
1002: goto cleanup;
2.21 frystyk 1003: } else {
1004: output_text_record(target, *searchres->Text,
1005: false, binary);
2.22 frystyk 1006: freeWAISSearchResponse( retrieval_response->DatabaseDiagnosticRecords);
1007: freeSearchResponseAPDU( retrieval_response);
2.21 frystyk 1008: } /* If text existed */
1009: }
1010:
1011: } /* Loop over slices */
2.1 timbl 1012:
2.23 frystyk 1013: (*target->isa->_free)(target);
2.26 frystyk 1014: request->output_stream = NULL;
2.1 timbl 1015: free (docid->bytes);
1016: } /* If document rather than search */
2.20 frystyk 1017: status = HT_LOADED;
2.2 timbl 1018:
2.20 frystyk 1019: cleanup:
2.1 timbl 1020: if (connection) close_connection(connection);
1021: if (wais_database) free(wais_database);
2.20 frystyk 1022: if (request_message) s_free(request_message);
1023: if (response_message) s_free(response_message);
1024: FREE(names);
2.21 frystyk 1025: FREE(basetitle);
2.20 frystyk 1026: if (status < 0) {
1027: char *unescaped = NULL;
1028: StrAllocCopy(unescaped, arg);
1029: HTUnEscape(unescaped);
1030: HTErrorAdd(request, ERR_FATAL, NO, HTERR_INTERNAL, (void *) unescaped,
1031: (int) strlen(unescaped), "HTLoadWAIS");
1032: free(unescaped);
1033: }
1034: return status;
2.1 timbl 1035: }
1036:
2.28 frystyk 1037: GLOBALDEF PUBLIC HTProtocol HTWAIS = {
1038: "wais", SOC_BLOCK, HTLoadWAIS, NULL, NULL
1039: };
2.1 timbl 1040:
1041:
Webmaster