Annotation of libwww/Library/src/HTWAIS.c, revision 2.30
2.29 frystyk 1: /* HTWAIS.c
2: ** WORLDWIDEWEB - WIDE AREA INFORMAION SERVER ACCESS
3: **
4: ** (c) COPYRIGHT CERN 1994.
5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This module allows a WWW server or client to read data from a
2.29 frystyk 8: ** remote WAIS server, and provide that data to a WWW client in
9: ** hypertext form. Source files, once retrieved, are stored and used
10: ** to provide information about the index when that is acessed.
2.1 timbl 11: **
12: ** Authors
13: ** BK Brewster Kahle, Thinking Machines, <Brewster@think.com>
14: ** TBL Tim Berners-Lee, CERN <timbl@info.cern.ch>
15: **
16: ** History
17: ** Sep 91 TBL adapted shell-ui.c (BK) with HTRetrieve.c from WWW.
18: ** Feb 91 TBL Generated HTML cleaned up a bit (quotes, escaping)
19: ** Refers to lists of sources.
2.2 timbl 20: ** Mar 93 TBL Lib 2.0 compatible module made.
2.1 timbl 21: **
22: ** Bugs
23: ** Uses C stream i/o to read and write sockets, which won't work
24: ** on VMS TCP systems.
25: **
26: ** Should cache connections.
27: **
28: ** ANSI C only as written
29: **
2.11 secret 30: ** Bugs fixed
31: ** NT Nathan Torkington (Nathan.Torkington@vuw.ac.nz)
32: **
2.1 timbl 33: ** WAIS comments:
34: **
35: ** 1. Separate directories for different system's .o would help
36: ** 2. Document ids are rather long!
37: **
38: ** WWW Address mapping convention:
39: **
40: ** /servername/database/type/length/document-id
41: **
42: ** /servername/database?word+word+word
43: */
44: /* WIDE AREA INFORMATION SERVER SOFTWARE:
45: No guarantees or restrictions. See the readme file for the full standard
46: disclaimer.
47:
48: Brewster@think.com
49: */
50:
2.30 ! roeber 51: #include "sysdep.h"
! 52:
2.1 timbl 53:
2.8 timbl 54: #define DIRECTORY "/cnidr.org:210/directory-of-servers"
55: /* define DIRECTORY "/quake.think.com:210/directory-of-servers" */
2.1 timbl 56:
57: #define BIG 1024 /* identifier size limit @@@@@ */
58:
2.2 timbl 59: /* From WAIS
60: ** ---------
2.1 timbl 61: */
62:
63: #include <ui.h>
64:
65: #define MAX_MESSAGE_LEN 100000
66: #define CHARS_PER_PAGE 10000 /* number of chars retrieved in each request */
67: #define WAISSEARCH_DATE "Fri Jul 19 1991"
68:
69:
2.2 timbl 70: /* FROM WWW
71: ** --------
2.1 timbl 72: */
73: #define BUFFER_SIZE 4096 /* Arbitrary size for efficiency */
74:
2.2 timbl 75: #define HEX_ESCAPE '%'
2.24 frystyk 76:
77: /* Library includes */
2.1 timbl 78: #include "HTUtils.h"
79: #include "HTParse.h"
2.2 timbl 80: #include "HTAccess.h" /* We implement a protocol */
2.21 frystyk 81: #include "HTError.h"
2.2 timbl 82: #include "HTML.h" /* The object we will generate */
2.25 frystyk 83: #include "HTParse.h"
84: #include "HTFormat.h"
85: #include "HTTCP.h"
2.2 timbl 86:
2.1 timbl 87: extern FILE * logfile; /* Log file output */
88:
2.27 frystyk 89: PUBLIC int HTMaxWAISLines = 200;/* Max number of entries from a search */
2.21 frystyk 90:
2.1 timbl 91: PRIVATE BOOL as_gate; /* Client is using us as gateway */
92:
93: PRIVATE char line[2048]; /* For building strings to display */
94: /* Must be able to take id */
2.2 timbl 95:
2.25 frystyk 96: /* Hypertext object building machinery */
2.2 timbl 97: #include "HTML.h"
98:
99: #define PUTC(c) (*target->isa->put_character)(target, c)
100: #define PUTS(s) (*target->isa->put_string)(target, s)
101: #define START(e) (*target->isa->start_element)(target, e, 0, 0)
102: #define END(e) (*target->isa->end_element)(target, e)
2.23 frystyk 103: #define FREE_TARGET (*target->isa->_free)(target)
2.2 timbl 104:
105: struct _HTStructured {
106: CONST HTStructuredClass * isa;
107: /* ... */
108: };
109:
110: struct _HTStream {
111: CONST HTStreamClass * isa;
112: /* ... */
113: };
114:
115:
2.1 timbl 116: /* showDiags
117: */
118: /* modified from Jonny G's version in ui/question.c */
119:
2.2 timbl 120: void showDiags ARGS2(
121: HTStream *, target,
122: diagnosticRecord **, d)
2.1 timbl 123: {
124: long i;
125:
126: for (i = 0; d[i] != NULL; i++) {
127: if (d[i]->ADDINFO != NULL) {
128: PUTS("Diagnostic code is ");
129: PUTS(d[i]->DIAG);
130: PUTC(' ');
131: PUTS(d[i]->ADDINFO);
132: PUTC('\n'); ;
133: }
134: }
135: }
136:
137: /* Matrix of allowed characters in filenames
138: ** -----------------------------------------
139: */
140:
141: PRIVATE BOOL acceptable[256];
142: PRIVATE BOOL acceptable_inited = NO;
143:
144: PRIVATE void init_acceptable NOARGS
145: {
146: unsigned int i;
147: char * good =
148: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$";
149: for(i=0; i<256; i++) acceptable[i] = NO;
150: for(;*good; good++) acceptable[(unsigned int)*good] = YES;
151: acceptable_inited = YES;
152: }
153:
154: /* Transform file identifier into WWW address
155: ** ------------------------------------------
156: **
157: **
158: ** On exit,
159: ** returns nil if error
160: ** pointer to malloced string (must be freed) if ok
161: */
162: char * WWW_from_archie ARGS1 (char *, file)
163: {
164: char * end;
165: char * result;
166: char * colon;
167: for(end=file; *end > ' '; end++); /* assumes ASCII encoding*/
168: result = (char *)malloc(10 + (end-file));
169: if (!result) return result; /* Malloc error */
170: strcpy(result, "file://");
171: strncat(result, file, end-file);
172: colon = strchr(result+7, ':'); /* Expect colon after host */
173: if (colon) {
174: for(; colon[0]; colon[0]=colon[1], colon++); /* move down */
175: }
176: return result;
177: } /* WWW_from_archie */
178:
2.2 timbl 179: /* Transform document identifier into URL
180: ** --------------------------------------
2.1 timbl 181: **
182: ** Bugs: A static buffer of finite size is used!
183: ** The format of the docid MUST be good!
184: **
185: ** On exit,
186: ** returns nil if error
187: ** pointer to malloced string (must be freed) if ok
188: */
2.9 timbl 189: PRIVATE char hex [17] = "0123456789ABCDEF";
2.2 timbl 190: extern char from_hex PARAMS((char a)); /* In HTWSRC @@ */
191:
192: PRIVATE char * WWW_from_WAIS ARGS1(any *, docid)
193:
2.1 timbl 194: {
2.21 frystyk 195: static unsigned char buf[BIG];
196: char num[10];
197: unsigned char * q = buf;
2.1 timbl 198: char * p = (docid->bytes);
199: int i, l;
2.25 frystyk 200: if (PROT_TRACE) {
2.1 timbl 201: char *p;
2.21 frystyk 202: fprintf(stderr, "HTLoadWAIS.. id (%d bytes) is ", (int)docid->size);
2.1 timbl 203: for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
204: if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
205: fprintf(stderr, "%c", *p);
206: else
2.2 timbl 207: fprintf(stderr, "<%x>", (unsigned)*p);
2.1 timbl 208: }
209: fprintf(stderr, "\n");
210: }
211: for (p=docid->bytes; (p<docid->bytes+docid->size) && (q<&buf[BIG]);) {
2.25 frystyk 212: if (PROT_TRACE)
2.24 frystyk 213: fprintf(stderr, "............ Record type %d, length %d\n",
214: (unsigned char) p[0], (unsigned char) p[1]);
2.21 frystyk 215: sprintf(num, "%d", (int)*p);
216: memcpy(q, num, strlen(num));
217: q += strlen(num);
218: p++;
219: *q++ = '='; /* Separate */
220: l = (int)((unsigned char)*p);
221: p++;
222: if (l > 127)
223: {
224: l = (l - 128) * 128;
225: l = l + (int)((unsigned char)*p);
226: p++;
227: }
228:
229: for (i = 0; i < l; i++, p++)
230: {
231: if (!acceptable[(unsigned char)*p])
232: {
233: *q++ = HEX_ESCAPE;
234: *q++ = hex[((unsigned char)*p) >> 4];
235: *q++ = hex[((unsigned char)*p) & 15];
236: }
237: else *q++ = (unsigned char)*p;
238: }
239: *q++= ';'; /* Terminate field */
240: #ifdef OLD_CODE
2.1 timbl 241: if (*p>10) {
2.25 frystyk 242: if (PROT_TRACE)
2.21 frystyk 243: fprintf(stderr, "WAIS........ DOCID record type of %d!\n", *p);
2.1 timbl 244: return 0;
245: }
2.2 timbl 246: { /* Bug fix -- allow any byte value 15 Apr 93 */
247: unsigned int i = (unsigned) *p++;
248:
249: if (i > 99) {
250: *q++ = (i/100) + '0';
251: i = i % 100;
252: }
253: if (i > 9) {
254: *q++ = (i/10) + '0';
255: i = i % 10;
256: }
257: *q++ = i + '0'; /* Record type */
258: }
2.1 timbl 259: *q++ = '='; /* Separate */
260: l = *p++; /* Length */
261: for(i=0; i<l; i++, p++){
2.18 luotonen 262: if (!acceptable[(int)*p]) {
2.1 timbl 263: *q++ = HEX_ESCAPE; /* Means hex commming */
264: *q++ = hex[(*p) >> 4];
265: *q++ = hex[(*p) & 15];
266: }
267: else *q++ = *p;
268: }
269: *q++= ';'; /* Terminate field */
2.21 frystyk 270: #endif /* OLD_CODE */
2.1 timbl 271: }
272: *q++ = 0; /* Terminate string */
2.25 frystyk 273: if (PROT_TRACE) fprintf(stderr, "HTLoadWAIS.. WWW form of id: %s\n", buf);
2.1 timbl 274: {
2.24 frystyk 275: char *result;
276: if ((result = (char *) malloc((int) strlen(buf)+1)) == NULL)
277: outofmem(__FILE__, "WWW_from_WAIS");
2.1 timbl 278: strcpy(result, buf);
279: return result;
280: }
281: } /* WWW_from_WAIS */
282:
283:
2.2 timbl 284: /* Transform URL into WAIS document identifier
285: ** -------------------------------------------
2.1 timbl 286: **
287: ** On entry,
288: ** docname points to valid name produced originally by
289: ** WWW_from_WAIS
290: ** On exit,
291: ** docid->size is valid
292: ** docid->bytes is malloced and must later be freed.
293: */
294: PRIVATE any * WAIS_from_WWW ARGS2 (any *, docid, char *, docname)
295: {
296: char *z; /* Output pointer */
297: char *sor; /* Start of record - points to size field. */
298: char *p; /* Input pointer */
299: char *q; /* Poisition of "=" */
300: char *s; /* Position of semicolon */
301: int n; /* size */
2.25 frystyk 302: if (PROT_TRACE)
2.24 frystyk 303: fprintf(stderr, "HTLoadWAIS.. WWW id (to become WAIS id): %s\n",
304: docname);
2.1 timbl 305: for(n=0, p = docname; *p; p++) { /* Count sizes of strings */
306: n++;
307: if (*p == ';') n--; /* Not converted */
308: else if (*p == HEX_ESCAPE) n=n-2; /* Save two bytes */
309: docid->size = n;
310: }
311:
2.21 frystyk 312: docid->bytes = (char *) malloc(docid->size+32); /* result record */
2.1 timbl 313: z = docid->bytes;
314:
2.21 frystyk 315: for(p = docname; *p; ) {
316: q = strchr(p, '=');
317: if (!q)
318: return 0;
319: *q = '\0';
320: *z++ = atoi(p);
321: *q = '=';
322: s = strchr(q, ';'); /* (Check only) */
323: if (!s)
324: return 0; /* Bad! No ';'; */
325: sor = z; /* Remember where the size field was */
326: z++; /* Skip record size for now */
327:
328: {
329: int len;
330: int tmp;
331: for(p=q+1; *p!=';' ; ) {
332: if (*p == HEX_ESCAPE) {
333: char c;
334: unsigned int b;
335: p++;
336: c = *p++;
337: b = from_hex(c);
338: c = *p++;
339: if (!c)
340: break; /* Odd number of chars! */
341: *z++ = (b<<4) + from_hex(c);
342: } else {
343: *z++ = *p++; /* Record */
344: }
345: }
346: len = (z-sor-1);
347:
348: z = sor;
349: if (len > 127) {
350: tmp = (len / 128);
351: len = len - (tmp * 128);
352: tmp = tmp + 128;
353: *z++ = (char)tmp;
354: *z = (char)len;
355: } else {
356: *z = (char)len;
357: }
358: z++;
359: }
360:
361: for(p=q+1; *p!=';' ; ) {
362: if (*p == HEX_ESCAPE) {
363: char c;
364: unsigned int b;
365: p++;
366: c = *p++;
367: b = from_hex(c);
368: c = *p++;
369: if (!c)
370: break; /* Odd number of chars! */
371: *z++ = (b<<4) + from_hex(c);
372: } else {
373: *z++ = *p++; /* Record */
374: }
375: }
376: p++; /* After semicolon: start of next record */
377: }
378:
379: #ifdef OLD_CODE
2.1 timbl 380: for(p = docname; *p; ) { /* Convert of strings */
2.2 timbl 381: /* Record type */
382:
383: *z = 0; /* Initialize record type */
384: while (*p >= '0' && *p <= '9') {
385: *z = *z*10 + (*p++ - '0'); /* Decode decimal record type */
386: }
387: z++;
388: if (*p != '=') return 0;
389: q = p;
390:
391: /* *z++ = *p++ - '0';
2.1 timbl 392: q = strchr(p , '=');
393: if (!q) return 0;
2.2 timbl 394: */
2.1 timbl 395: s = strchr(q, ';'); /* (Check only) */
396: if (!s) return 0; /* Bad! No ';'; */
397: sor = z; /* Remember where the size field was */
398: z++; /* Skip record size for now */
399: for(p=q+1; *p!=';' ; ) {
400: if (*p == HEX_ESCAPE) {
401: char c;
402: unsigned int b;
403: p++;
404: c = *p++;
405: b = from_hex(c);
406: c = *p++;
407: if (!c) break; /* Odd number of chars! */
408: *z++ = (b<<4) + from_hex(c);
409: } else {
410: *z++ = *p++; /* Record */
411: }
412: }
413: *sor = (z-sor-1); /* Fill in size -- not counting size itself */
414: p++; /* After semicolon: start of next record */
415: }
2.21 frystyk 416: #endif /* OLD_CODE */
2.25 frystyk 417: if (PROT_TRACE) {
2.1 timbl 418: char *p;
2.21 frystyk 419: fprintf(stderr, "WAIS........ id (%d bytes) is ", (int)docid->size);
2.1 timbl 420: for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
421: if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
422: fprintf(stderr, "%c", *p);
423: else
2.2 timbl 424: fprintf(stderr, "<%x>", (unsigned)*p);
2.1 timbl 425: }
426: fprintf(stderr, "\n");
427: }
428: return docid; /* Ok */
429:
430: } /* WAIS_from_WWW */
431:
432:
433: /* Send a plain text record to the client output_text_record()
434: ** --------------------------------------
435: */
2.2 timbl 436:
2.9 timbl 437: PRIVATE void output_text_record ARGS4(
2.2 timbl 438: HTStream *, target,
439: WAISDocumentText *, record,
2.9 timbl 440: boolean, quote_string_quotes,
441: boolean, binary)
2.1 timbl 442: {
443: long count;
444: /* printf(" Text\n");
445: print_any(" DocumentID: ", record->DocumentID);
446: printf(" VersionNumber: %d\n", record->VersionNumber);
447: */
2.9 timbl 448:
449: if (binary) {
450: (*target->isa->put_block)(target,
451: record->DocumentText->bytes,
452: record->DocumentText->size);
453: return;
454: }
455:
2.1 timbl 456: for(count = 0; count < record->DocumentText->size; count++){
457: long ch = (unsigned char)record->DocumentText->bytes[count];
2.2 timbl 458: if (ch == 27) { /* What is this in for? Tim */
2.1 timbl 459:
460: /* then we have an escape code */
461: /* if the next letter is '(' or ')', then ignore two letters */
462: if('(' == record->DocumentText->bytes[count + 1] ||
463: ')' == record->DocumentText->bytes[count + 1])
464: count += 1; /* it is a term marker */
465: else count += 4; /* it is a paragraph marker */
466: } else if (ch == '\n' || ch == '\r') {
2.2 timbl 467: PUTC('\n');
2.1 timbl 468: } else if ((ch=='\t') || isprint(ch)){
2.2 timbl 469: PUTC(ch);
2.1 timbl 470: }
471: }
472: } /* output text record */
473:
474:
2.2 timbl 475:
2.1 timbl 476: /* Format A Search response for the client display_search_response
477: ** ---------------------------------------
478: */
479: /* modified from tracy shen's version in wutil.c
480: * displays either a text record or a set of headlines.
481: */
482: void
2.2 timbl 483: display_search_response ARGS4(
484: HTStructured *, target,
2.1 timbl 485: SearchResponseAPDU *, response,
486: char *, database,
487: char *, keywords)
488: {
489: WAISSearchResponse *info;
490: long i, k;
491:
492: BOOL archie = strstr(database, "archie")!=0; /* Specical handling */
493:
2.25 frystyk 494: if (PROT_TRACE) fprintf(stderr, "WAIS........ Displaying search response\n");
2.1 timbl 495: sprintf(line,
2.21 frystyk 496: "Index %s contains the following %d item%s relevant to '%s'.\n",
497: database,
498: (int)(response->NumberOfRecordsReturned),
499: response->NumberOfRecordsReturned ==1 ? "" : "s",
500: keywords);
2.2 timbl 501: PUTS(line);
502: PUTS("The first figure for each entry is its relative score, ");
503: PUTS("the second the number of lines in the item.");
504: START(HTML_MENU);
505:
2.1 timbl 506: if ( response->DatabaseDiagnosticRecords != 0 ) {
507: info = (WAISSearchResponse *)response->DatabaseDiagnosticRecords;
508: i =0;
509:
510: if (info->Diagnostics != NULL)
2.2 timbl 511: showDiags((HTStream*)target, info->Diagnostics);
2.1 timbl 512:
513: if ( info->DocHeaders != 0 ) {
514: for (k=0; info->DocHeaders[k] != 0; k++ ) {
515: WAISDocumentHeader* head = info->DocHeaders[k];
516: char * headline = trim_junk(head->Headline);
517: any * docid = head->DocumentID;
518: char * docname; /* printable version of docid */
519: i++;
520:
521: /* Make a printable string out of the document id.
522: */
2.25 frystyk 523: if (PROT_TRACE)
2.24 frystyk 524: fprintf(stderr, "HTWAIS: %2ld: Score: %4ld, lines:%4ld '%s'\n", i,
525: (long int)(info->DocHeaders[k]->Score),
526: (long int)(info->DocHeaders[k]->Lines),
527: headline);
2.1 timbl 528:
2.2 timbl 529: START(HTML_LI);
530: sprintf(line, "%4ld %4ld ",
531: head->Score,
532: head->Lines);
2.23 frystyk 533: PUTS(line);
2.2 timbl 534:
2.1 timbl 535: if (archie) {
536: char * www_name = WWW_from_archie(headline);
537: if (www_name) {
2.2 timbl 538: HTStartAnchor(target, NULL, www_name);
2.1 timbl 539: PUTS(headline);
2.2 timbl 540:
541: END(HTML_A);
2.1 timbl 542: free(www_name);
543: } else {
2.2 timbl 544: PUTS(headline);
545: PUTS(" (bad file name)");
2.1 timbl 546: }
547: } else { /* Not archie */
548: docname = WWW_from_WAIS(docid);
549: if (docname) {
2.6 timbl 550: char * dbname = HTEscape(database, URL_XPALPHAS);
2.21 frystyk 551: char types_array[1000]; /* bad */
552: char *type_escaped;
553: types_array[0] = 0;
554: if (head->Types) {
555: int i;
556: for (i = 0; head->Types[i]; i++)
557: {
558: if (i)
559: strcat (types_array, ",");
560:
561: type_escaped = HTEscape (head->Types[i], URL_XALPHAS);
562: strcat (types_array, type_escaped);
563: free (type_escaped);
564: }
2.25 frystyk 565: if (PROT_TRACE)
2.24 frystyk 566: fprintf (stderr, "WAIS........ Types_array `%s\'\n",
567: types_array);
2.21 frystyk 568: } else {
569: strcat (types_array, "TEXT");
570: }
571: sprintf(line, "%s/%s/%d/%s",
572: dbname,
573: types_array,
574: (int)(head->DocumentLength),
575: docname);
576: #ifdef OLD_CODE
2.1 timbl 577: sprintf(line, "%s/%s/%d/%s", /* W3 address */
578: dbname,
579: head->Types ? head->Types[0] : "TEXT",
2.2 timbl 580: (int)(head->DocumentLength),
2.1 timbl 581: docname);
2.21 frystyk 582: #endif /* OLD_CODE */
2.11 secret 583: HTStartAnchor(target, NULL, ( (head->Types)
584: && (!strcmp(head->Types[0], "URL"))) ?
585: headline : line); /* NT, Sep 93 */
2.2 timbl 586: PUTS(headline);
587: END(HTML_A);
2.1 timbl 588: free(dbname);
589: free(docname);
590: } else {
2.2 timbl 591: PUTS("(bad doc id)");
2.1 timbl 592: }
593: }
594: } /* next document header */
595: } /* if there were any document headers */
596:
597: if ( info->ShortHeaders != 0 ) {
598: k =0;
599: while (info->ShortHeaders[k] != 0 ) {
600: i++;
2.2 timbl 601: PUTS( "(Short Header record, can't display)");
2.1 timbl 602: }
603: }
604: if ( info->LongHeaders != 0 ) {
605: k =0;
606: while (info->LongHeaders[k] != 0) {
607: i++;
608: PUTS( "\nLong Header record, can't display\n");
609: }
610: }
611: if ( info->Text != 0 ) {
612: k =0;
613: while (info->Text[k] != 0) {
614: i++;
615: PUTS( "\nText record\n");
2.9 timbl 616: output_text_record((HTStream*)target, info->Text[k++], false, false);
2.1 timbl 617: }
618: }
619: if ( info->Headlines != 0 ) {
620: k =0;
621: while (info->Headlines[k] != 0) {
622: i++;
623: PUTS( "\nHeadline record, can't display\n");
624: /* dsply_headline_record( info->Headlines[k++]); */
625: }
626: }
627: if ( info->Codes != 0 ) {
628: k =0;
629: while (info->Codes[k] != 0) {
630: i++;
631: PUTS( "\nCode record, can't display\n");
632: /* dsply_code_record( info->Codes[k++]); */
633: }
634: }
635: } /* Loop: display user info */
2.2 timbl 636: END(HTML_MENU);
2.1 timbl 637: PUTC('\n'); ;
638: }
639:
640:
641:
2.2 timbl 642:
2.20 frystyk 643: /* Load Document from WAIS Server HTLoadWAIS()
644: ** ------------------------------
2.2 timbl 645: **
2.20 frystyk 646: ** On entry,
647: ** request This is the request structure
648: ** On exit,
649: ** returns <0 Error has occured
650: ** HT_LOADED OK
2.1 timbl 651: */
2.13 timbl 652: PUBLIC int HTLoadWAIS ARGS1(HTRequest * , request)
2.1 timbl 653:
2.21 frystyk 654: #define MAX_KEYWORDS_LENGTH 4000
2.1 timbl 655: #define MAX_SERVER_LENGTH 1000
656: #define MAX_DATABASE_LENGTH 1000
657: #define MAX_SERVICE_LENGTH 1000
658:
659: {
2.13 timbl 660: CONST char * arg = HTAnchor_physical(request->anchor);
661: HTFormat format_out = request->output_format;
662: HTStream* sink = request->output_stream;
2.20 frystyk 663: #if 0
2.1 timbl 664: static CONST char * error_header =
2.7 timbl 665: "<h1>Access error</h1>\nThe following error occured in accesing a WAIS server:<P>\n";
2.20 frystyk 666: #endif
2.2 timbl 667: char * key; /* pointer to keywords in URL */
2.1 timbl 668: char* request_message = NULL; /* arbitrary message limit */
669: char* response_message = NULL; /* arbitrary message limit */
670: long request_buffer_length; /* how of the request is left */
671: SearchResponseAPDU *retrieval_response = 0;
672: char keywords[MAX_KEYWORDS_LENGTH + 1];
673: char *server_name;
2.6 timbl 674: char *wais_database = NULL; /* name of current database */
675: char *www_database; /* Same name escaped */
2.1 timbl 676: char *service;
677: char *doctype;
678: char *doclength;
679: long document_length;
680: char *docname;
681: FILE *connection = 0;
682: char * names; /* Copy of arg to be hacked up */
683: BOOL ok = NO;
2.20 frystyk 684: int status = -1;
2.21 frystyk 685: char *basetitle = NULL;
2.1 timbl 686:
687: extern FILE * connect_to_server();
688:
2.27 frystyk 689: if (PROT_TRACE)
690: fprintf(stderr, "HTLoadWAIS.. Looking for `%s\'\n", arg);
691:
2.1 timbl 692: if (!acceptable_inited) init_acceptable();
693:
694:
695: /* Decipher and check syntax of WWW address:
696: ** ----------------------------------------
697: **
698: ** First we remove the "wais:" if it was spcified. 920110
699: */
700: names = HTParse(arg, "", PARSE_HOST | PARSE_PATH | PARSE_PUNCTUATION);
2.2 timbl 701: key = strchr(names, '?');
702:
703: if (key) {
704: char * p;
705: *key++ = 0; /* Split off keywords */
706: for (p=key; *p; p++) if (*p == '+') *p = ' ';
707: HTUnEscape(key);
708: }
2.1 timbl 709: if (names[0]== '/') {
710: server_name = names+1;
2.18 luotonen 711: if ((as_gate =(*server_name == '/')))
2.1 timbl 712: server_name++; /* Accept one or two */
713: www_database = strchr(server_name,'/');
714: if (www_database) {
715: *www_database++ = 0; /* Separate database name */
716: doctype = strchr(www_database, '/');
717: if (key) ok = YES; /* Don't need doc details */
718: else if (doctype) { /* If not search parse doc details */
719: *doctype++ = 0; /* Separate rest of doc address */
720: doclength = strchr(doctype, '/');
721: if(doclength) {
722: *doclength++ = 0;
723: document_length = atol(doclength);
724: if (document_length) {
725: docname=strchr(doclength, '/');
726: if (docname) {
727: *docname++ = 0;
728: ok = YES; /* To avoid a goto! */
729: } /* if docname */
730: } /* if document_length valid */
731: } /* if doclength */
732: } else { /* no doctype? Assume index required */
733: if (!key) key = "";
734: ok = YES;
735: } /* if doctype */
736: } /* if database */
737: }
738:
2.21 frystyk 739: if (!ok) {
740: char *unescaped = NULL;
741: StrAllocCopy(unescaped, arg);
742: HTUnEscape(unescaped);
743: HTErrorAdd(request, ERR_FATAL, NO, HTERR_BAD_REQUEST,
744: (void *) unescaped, (int) strlen(unescaped),
745: "HTLoadWAIS");
746: free(unescaped);
747: free(names);
748: return -1;
749: }
750:
2.25 frystyk 751: if (PROT_TRACE) fprintf(stderr, "HTLoadWAIS.. URL Parsed OK\n");
2.1 timbl 752:
753: service = strchr(names, ':');
754: if (service) *service++ = 0;
755: else service = "210";
756:
757: if (server_name[0] == 0)
758: connection = NULL;
759:
760: else if (!(key && !*key))
2.20 frystyk 761: if ((connection=connect_to_server(server_name,atoi(service))) == NULL) {
2.21 frystyk 762: char *host = HTParse(arg, "", PARSE_HOST);
2.25 frystyk 763: if (PROT_TRACE)
2.24 frystyk 764: fprintf (stderr, "HTLoadWAIS.. Can't open connection to %s via service %s.\n",
765: server_name, service);
2.21 frystyk 766: HTErrorAdd(request, ERR_FATAL, NO, HTERR_WAIS_NO_CONNECT,
2.20 frystyk 767: (void *) host, (int) strlen(host), "HTLoadWAIS");
768: goto cleanup;
769: }
2.1 timbl 770:
2.6 timbl 771: StrAllocCopy(wais_database,www_database);
772: HTUnEscape(wais_database);
2.21 frystyk 773:
774: /* Make title name without the .src */
775: {
776: char *srcstr;
777: StrAllocCopy(basetitle, wais_database);
778: if ((srcstr = strstr(basetitle, ".src")) != NULL)
779: *srcstr = '\0';
780: }
2.6 timbl 781:
2.21 frystyk 782: /* This below fixed size stuff is terrible */
2.1 timbl 783: request_message = (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char));
784: response_message = (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char));
785:
786: /* If keyword search is performed but there are no keywords,
787: ** the user has followed a link to the index itself. It would be
788: ** appropriate at this point to send him the .SRC file - how?
789: */
790:
791: if (key && !*key) { /* I N D E X */
792:
2.7 timbl 793: #ifdef CACHE_FILE_PREFIX
794: char filename[256];
795: FILE * fp;
796: #endif
2.13 timbl 797: HTStructured * target = HTML_new(request, NULL,
798: WWW_HTML, format_out, sink);
2.1 timbl 799:
2.8 timbl 800: {
2.27 frystyk 801: START(HTML_HTML);
802: START(HTML_HEAD);
2.8 timbl 803: START(HTML_TITLE);
2.21 frystyk 804: PUTS(basetitle);
805: PUTS(" Index");
2.8 timbl 806: END(HTML_TITLE);
2.27 frystyk 807: END(HTML_HEAD);
2.8 timbl 808:
2.27 frystyk 809: START(HTML_BODY);
2.8 timbl 810: START(HTML_H1);
2.21 frystyk 811: PUTS("WAIS Index: ");
812: PUTS(basetitle);
2.8 timbl 813: END(HTML_H1);
814:
815: }
2.21 frystyk 816: START(HTML_ISINDEX);
817:
818: /* If we have seen a source file for this database, use that: */
2.2 timbl 819:
2.7 timbl 820: #ifdef CACHE_FILE_PREFIX
2.8 timbl 821: sprintf(filename, "%sWSRC-%s:%s:%.100s.txt",
2.7 timbl 822: CACHE_FILE_PREFIX,
2.1 timbl 823: server_name, service, www_database);
824:
825: fp = fopen(filename, "r"); /* Have we found this already? */
2.25 frystyk 826: if (PROT_TRACE) fprintf(stderr,
2.21 frystyk 827: "HTLoadWAIS.. Description of server %s %s.\n",
2.1 timbl 828: filename,
829: fp ? "exists already" : "does NOT exist!");
2.2 timbl 830:
2.1 timbl 831: if (fp) {
2.24 frystyk 832: int c;
833: START(HTML_PRE); /* Preformatted description */
834: while((c=getc(fp)) != EOF)
835: PUTC(c); /* Transfer file */
2.7 timbl 836: END(HTML_PRE);
2.1 timbl 837: fclose(fp);
2.8 timbl 838: }
2.2 timbl 839: #endif
2.27 frystyk 840: END(HTML_BODY);
841: END(HTML_HTML);
2.2 timbl 842: FREE_TARGET;
2.1 timbl 843:
844: } else if (key) { /* S E A R C H */
845: char *p;
2.2 timbl 846: HTStructured * target;
847:
2.1 timbl 848: strncpy(keywords, key, MAX_KEYWORDS_LENGTH);
2.18 luotonen 849: while ((p = strchr(keywords,'+'))) *p = ' ';
2.1 timbl 850:
851: /* Send advance title to get something fast to the other end */
852:
2.13 timbl 853: target = HTML_new(request, NULL, WWW_HTML, format_out, sink);
2.2 timbl 854:
2.23 frystyk 855: START(HTML_HTML);
856: START(HTML_HEAD);
2.2 timbl 857: START(HTML_TITLE);
858: PUTS(keywords);
2.21 frystyk 859: PUTS(" in ");
860: PUTS(basetitle);
2.2 timbl 861: END(HTML_TITLE);
2.23 frystyk 862: END(HTML_HEAD);
2.2 timbl 863:
2.23 frystyk 864: START(HTML_BODY);
2.2 timbl 865: START(HTML_H1);
2.21 frystyk 866: PUTS("WAIS Search of \"");
2.2 timbl 867: PUTS(keywords);
2.21 frystyk 868: PUTS("\" in ");
869: PUTS(basetitle);
2.2 timbl 870: END(HTML_H1);
2.1 timbl 871:
2.21 frystyk 872: START(HTML_ISINDEX);
873:
2.1 timbl 874: request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */
2.25 frystyk 875: if (PROT_TRACE)
2.24 frystyk 876: fprintf(stderr, "HTLoadWAIS.. Search for `%s' in `%s'\n",
877: keywords, wais_database);
2.21 frystyk 878: if(generate_search_apdu(request_message + HEADER_LENGTH,
2.1 timbl 879: &request_buffer_length,
2.21 frystyk 880: keywords, wais_database, NULL,
881: HTMaxWAISLines) == NULL) {
2.25 frystyk 882: if (PROT_TRACE)
2.23 frystyk 883: fprintf(stderr, "WAIS Search. Too many lines in response\n");
2.21 frystyk 884: HTErrorAdd(request, ERR_WARNING, NO, HTERR_WAIS_OVERFLOW,
885: NULL, 0, "HTLoadWAIS");
886: }
2.1 timbl 887:
888: if(!interpret_message(request_message,
889: MAX_MESSAGE_LEN - request_buffer_length,
890: response_message,
891: MAX_MESSAGE_LEN,
892: connection,
893: false /* true verbose */
894: )) {
2.25 frystyk 895: if (PROT_TRACE)
2.23 frystyk 896: fprintf(stderr, "WAIS Search. Too many lines in response\n");
2.21 frystyk 897: HTErrorAdd(request, ERR_WARNING, NO, HTERR_WAIS_OVERFLOW,
898: NULL, 0, "HTLoadWAIS");
2.1 timbl 899: } else { /* returned message ok */
900: SearchResponseAPDU *query_response = 0;
901: readSearchResponseAPDU(&query_response,
902: response_message + HEADER_LENGTH);
2.2 timbl 903: display_search_response(target,
904: query_response, wais_database, keywords);
2.1 timbl 905: if (query_response->DatabaseDiagnosticRecords)
906: freeWAISSearchResponse(
907: query_response->DatabaseDiagnosticRecords);
908: freeSearchResponseAPDU( query_response);
909: } /* returned message not too large */
910:
2.23 frystyk 911: END(HTML_BODY);
912: END(HTML_HTML);
2.2 timbl 913: FREE_TARGET;
914:
2.1 timbl 915: } else { /* D O C U M E N T F E T C H */
916:
2.2 timbl 917: HTFormat format_in;
2.9 timbl 918: boolean binary; /* how to transfer stuff coming over */
2.2 timbl 919: HTStream * target;
2.1 timbl 920: long count;
921: any doc_chunk;
922: any * docid = &doc_chunk;
2.25 frystyk 923: if (PROT_TRACE)
2.24 frystyk 924: fprintf(stderr,
925: "HTLoadWAIS.. Retrieve document `%s'\n............ type `%s' length %ld\n", docname, doctype, document_length);
2.2 timbl 926:
927: format_in =
928: !strcmp(doctype, "WSRC") ? HTAtom_for("application/x-wais-source") :
2.23 frystyk 929: !strcmp(doctype, "TEXT") ? WWW_UNKNOWN :
2.9 timbl 930: !strcmp(doctype, "HTML") ? HTAtom_for("text/html") :
2.2 timbl 931: !strcmp(doctype, "GIF") ? HTAtom_for("image/gif") :
2.11 secret 932: HTAtom_for("application/octet-stream");
2.9 timbl 933: binary =
934: 0 != strcmp(doctype, "WSRC") &&
935: 0 != strcmp(doctype, "TEXT") &&
936: 0 != strcmp(doctype, "HTML") ;
937:
2.23 frystyk 938: /* Guess on TEXT format as it might be HTML */
2.28 frystyk 939: if ((target = HTStreamStack(format_in, request->output_format,
940: request->output_stream,
941: request, YES)) == NULL) {
2.21 frystyk 942: status = -1;
943: goto cleanup;
944: }
2.2 timbl 945:
2.21 frystyk 946: /* Decode hex or litteral format for document ID */
2.1 timbl 947: WAIS_from_WWW(docid, docname);
948:
2.21 frystyk 949: /* Loop over slices of the document */
950: for (count = 0; count * CHARS_PER_PAGE < document_length; count++) {
951: char *type = s_strdup(doctype);
952: request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */
2.25 frystyk 953: if (PROT_TRACE) fprintf(stderr, "HTLoadWAIS.. Slice number %ld\n",
2.21 frystyk 954: count);
955: if (generate_retrieval_apdu(request_message + HEADER_LENGTH,
956: &request_buffer_length,
957: docid, CT_byte,
958: count * CHARS_PER_PAGE,
959: HTMIN((count + 1) * CHARS_PER_PAGE,
960: document_length),
961: type,
962: wais_database) == 0) {
963: HTErrorAdd(request, ERR_WARNING, NO, HTERR_WAIS_OVERFLOW,
964: NULL, 0, "HTLoadWAIS");
965: }
966: FREE(type);
967:
968: /* Actually do the transaction given by request_message */
969: if (interpret_message(request_message,
970: MAX_MESSAGE_LEN - request_buffer_length,
971: response_message,
972: MAX_MESSAGE_LEN,
973: connection,
974: false /* true verbose */
975: ) == 0) {
976: HTErrorAdd(request, ERR_WARNING, NO, HTERR_WAIS_OVERFLOW,
977: NULL, 0, "HTLoadWAIS");
978: }
979:
980: /* Parse the result which came back into memory. */
981: readSearchResponseAPDU(&retrieval_response,
982: response_message + HEADER_LENGTH);
983: {
984: WAISSearchResponse *searchres = (WAISSearchResponse *) retrieval_response->DatabaseDiagnosticRecords;
985: if (!searchres->Text) {
986: if (searchres->Diagnostics && *searchres->Diagnostics &&
987: (*searchres->Diagnostics)->ADDINFO) {
988: char *errmsg = (*searchres->Diagnostics)->ADDINFO;
989: HTErrorAdd(request, ERR_WARNING, NO, HTERR_WAIS_MODULE,
990: (void *) errmsg, (int) strlen(errmsg),
991: "HTLoadWAIS");
992: } else {
993: HTErrorAdd(request, ERR_WARNING, NO, HTERR_WAIS_MODULE,
994: NULL, 0, "HTLoadWAIS");
995: }
2.23 frystyk 996: (*target->isa->_free)(target);
2.26 frystyk 997: request->output_stream = NULL;
2.22 frystyk 998: free (docid->bytes);
999: freeWAISSearchResponse(retrieval_response->DatabaseDiagnosticRecords);
1000: freeSearchResponseAPDU( retrieval_response);
1001: goto cleanup;
2.21 frystyk 1002: } else {
1003: output_text_record(target, *searchres->Text,
1004: false, binary);
2.22 frystyk 1005: freeWAISSearchResponse( retrieval_response->DatabaseDiagnosticRecords);
1006: freeSearchResponseAPDU( retrieval_response);
2.21 frystyk 1007: } /* If text existed */
1008: }
1009:
1010: } /* Loop over slices */
2.1 timbl 1011:
2.23 frystyk 1012: (*target->isa->_free)(target);
2.26 frystyk 1013: request->output_stream = NULL;
2.1 timbl 1014: free (docid->bytes);
1015: } /* If document rather than search */
2.20 frystyk 1016: status = HT_LOADED;
2.2 timbl 1017:
2.20 frystyk 1018: cleanup:
2.1 timbl 1019: if (connection) close_connection(connection);
1020: if (wais_database) free(wais_database);
2.20 frystyk 1021: if (request_message) s_free(request_message);
1022: if (response_message) s_free(response_message);
1023: FREE(names);
2.21 frystyk 1024: FREE(basetitle);
2.20 frystyk 1025: if (status < 0) {
1026: char *unescaped = NULL;
1027: StrAllocCopy(unescaped, arg);
1028: HTUnEscape(unescaped);
1029: HTErrorAdd(request, ERR_FATAL, NO, HTERR_INTERNAL, (void *) unescaped,
1030: (int) strlen(unescaped), "HTLoadWAIS");
1031: free(unescaped);
1032: }
1033: return status;
2.1 timbl 1034: }
1035:
2.28 frystyk 1036: GLOBALDEF PUBLIC HTProtocol HTWAIS = {
1037: "wais", SOC_BLOCK, HTLoadWAIS, NULL, NULL
1038: };
2.1 timbl 1039:
1040:
Webmaster