Annotation of libwww/Library/src/HTWAIS.c, revision 2.53
2.29 frystyk 1: /* HTWAIS.c
2: ** WORLDWIDEWEB - WIDE AREA INFORMAION SERVER ACCESS
3: **
2.33 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.29 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.53 ! frystyk 6: ** @(#) $Id: HTWAIS.c,v 2.52 1997/01/26 08:13:51 frystyk Exp $
2.1 timbl 7: **
8: ** This module allows a WWW server or client to read data from a
2.29 frystyk 9: ** remote WAIS server, and provide that data to a WWW client in
10: ** hypertext form. Source files, once retrieved, are stored and used
11: ** to provide information about the index when that is acessed.
2.1 timbl 12: **
13: ** Authors
14: ** BK Brewster Kahle, Thinking Machines, <Brewster@think.com>
2.35 frystyk 15: ** TBL Tim Berners-Lee, CERN <timbl@w3.org>
2.1 timbl 16: **
17: ** History
18: ** Sep 91 TBL adapted shell-ui.c (BK) with HTRetrieve.c from WWW.
19: ** Feb 91 TBL Generated HTML cleaned up a bit (quotes, escaping)
20: ** Refers to lists of sources.
2.2 timbl 21: ** Mar 93 TBL Lib 2.0 compatible module made.
2.34 frystyk 22: ** May 95 CHJ modified for freeWAIS-0.5
2.1 timbl 23: **
24: ** Bugs
25: ** Uses C stream i/o to read and write sockets, which won't work
26: ** on VMS TCP systems.
27: **
28: ** Should cache connections.
29: **
30: ** ANSI C only as written
31: **
2.11 secret 32: ** Bugs fixed
33: ** NT Nathan Torkington (Nathan.Torkington@vuw.ac.nz)
34: **
2.1 timbl 35: ** WAIS comments:
36: **
37: ** 1. Separate directories for different system's .o would help
38: ** 2. Document ids are rather long!
39: **
40: ** WWW Address mapping convention:
41: **
42: ** /servername/database/type/length/document-id
43: **
44: ** /servername/database?word+word+word
45: */
46: /* WIDE AREA INFORMATION SERVER SOFTWARE:
47: No guarantees or restrictions. See the readme file for the full standard
48: disclaimer.
49:
50: Brewster@think.com
51: */
52:
53:
2.8 timbl 54: #define DIRECTORY "/cnidr.org:210/directory-of-servers"
55: /* define DIRECTORY "/quake.think.com:210/directory-of-servers" */
2.1 timbl 56:
57: #define BIG 1024 /* identifier size limit @@@@@ */
58:
2.2 timbl 59: /* From WAIS
60: ** ---------
2.1 timbl 61: */
2.53 ! frystyk 62: #if 0
2.52 frystyk 63: #include "wais.h"
2.53 ! frystyk 64: #endif
2.1 timbl 65:
66: #define MAX_MESSAGE_LEN 100000
67: #define CHARS_PER_PAGE 10000 /* number of chars retrieved in each request */
68: #define WAISSEARCH_DATE "Fri Jul 19 1991"
69:
70:
2.2 timbl 71: /* FROM WWW
72: ** --------
2.1 timbl 73: */
74: #define BUFFER_SIZE 4096 /* Arbitrary size for efficiency */
75:
2.2 timbl 76: #define HEX_ESCAPE '%'
2.24 frystyk 77:
2.31 frystyk 78: /* Library include files */
2.46 frystyk 79: #include "sysdep.h"
2.48 frystyk 80: #include "WWWUtil.h"
81: #include "WWWCore.h"
2.49 frystyk 82: #include "WWWHTML.h"
2.2 timbl 83:
2.1 timbl 84: extern FILE * logfile; /* Log file output */
85:
2.43 frystyk 86: PRIVATE int HTMaxWAISLines = 200;/* Max number of entries from a search */
2.21 frystyk 87:
2.1 timbl 88: PRIVATE BOOL as_gate; /* Client is using us as gateway */
89:
90: PRIVATE char line[2048]; /* For building strings to display */
91: /* Must be able to take id */
2.2 timbl 92:
2.25 frystyk 93: /* Hypertext object building machinery */
2.2 timbl 94: #define PUTC(c) (*target->isa->put_character)(target, c)
95: #define PUTS(s) (*target->isa->put_string)(target, s)
96: #define START(e) (*target->isa->start_element)(target, e, 0, 0)
97: #define END(e) (*target->isa->end_element)(target, e)
2.23 frystyk 98: #define FREE_TARGET (*target->isa->_free)(target)
2.2 timbl 99:
100: struct _HTStructured {
2.46 frystyk 101: const HTStructuredClass * isa;
2.2 timbl 102: /* ... */
103: };
104:
105: struct _HTStream {
2.46 frystyk 106: const HTStreamClass * isa;
2.2 timbl 107: /* ... */
108: };
109:
110:
2.1 timbl 111: /* showDiags
112: */
113: /* modified from Jonny G's version in ui/question.c */
114:
2.42 frystyk 115: void showDiags (
116: HTStream * target,
117: diagnosticRecord ** d)
2.1 timbl 118: {
119: long i;
120:
121: for (i = 0; d[i] != NULL; i++) {
122: if (d[i]->ADDINFO != NULL) {
123: PUTS("Diagnostic code is ");
124: PUTS(d[i]->DIAG);
125: PUTC(' ');
126: PUTS(d[i]->ADDINFO);
127: PUTC('\n'); ;
128: }
129: }
130: }
131:
132: /* Matrix of allowed characters in filenames
133: ** -----------------------------------------
134: */
135:
136: PRIVATE BOOL acceptable[256];
137: PRIVATE BOOL acceptable_inited = NO;
138:
2.42 frystyk 139: PRIVATE void init_acceptable (void)
2.1 timbl 140: {
141: unsigned int i;
142: char * good =
143: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$";
144: for(i=0; i<256; i++) acceptable[i] = NO;
145: for(;*good; good++) acceptable[(unsigned int)*good] = YES;
146: acceptable_inited = YES;
147: }
148:
149: /* Transform file identifier into WWW address
150: ** ------------------------------------------
151: **
152: **
153: ** On exit,
154: ** returns nil if error
155: ** pointer to malloced string (must be freed) if ok
156: */
2.42 frystyk 157: char * WWW_from_archie (char * file)
2.1 timbl 158: {
159: char * end;
160: char * result;
161: char * colon;
162: for(end=file; *end > ' '; end++); /* assumes ASCII encoding*/
2.44 frystyk 163: if ((result = (char *) HT_MALLOC(10 + (end-file))) == NULL)
164: HT_OUTOFMEM("result ");
2.1 timbl 165: if (!result) return result; /* Malloc error */
166: strcpy(result, "file://");
167: strncat(result, file, end-file);
168: colon = strchr(result+7, ':'); /* Expect colon after host */
169: if (colon) {
170: for(; colon[0]; colon[0]=colon[1], colon++); /* move down */
171: }
172: return result;
173: } /* WWW_from_archie */
174:
2.2 timbl 175: /* Transform document identifier into URL
176: ** --------------------------------------
2.1 timbl 177: **
178: ** Bugs: A static buffer of finite size is used!
179: ** The format of the docid MUST be good!
180: **
181: ** On exit,
182: ** returns nil if error
183: ** pointer to malloced string (must be freed) if ok
184: */
2.9 timbl 185: PRIVATE char hex [17] = "0123456789ABCDEF";
2.41 frystyk 186: extern char from_hex (char a); /* In HTWSRC @@ */
2.2 timbl 187:
2.42 frystyk 188: PRIVATE char * WWW_from_WAIS (any * docid)
2.2 timbl 189:
2.1 timbl 190: {
2.21 frystyk 191: static unsigned char buf[BIG];
192: char num[10];
193: unsigned char * q = buf;
2.1 timbl 194: char * p = (docid->bytes);
195: int i, l;
2.25 frystyk 196: if (PROT_TRACE) {
2.1 timbl 197: char *p;
2.45 eric 198: HTTrace("HTLoadWAIS.. id (%d bytes) is ", (int)docid->size);
2.1 timbl 199: for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
200: if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
2.45 eric 201: HTTrace("%c", *p);
2.1 timbl 202: else
2.45 eric 203: HTTrace("<%x>", (unsigned)*p);
2.1 timbl 204: }
2.45 eric 205: HTTrace("\n");
2.1 timbl 206: }
207: for (p=docid->bytes; (p<docid->bytes+docid->size) && (q<&buf[BIG]);) {
2.25 frystyk 208: if (PROT_TRACE)
2.45 eric 209: HTTrace("............ Record type %d, length %d\n",
2.24 frystyk 210: (unsigned char) p[0], (unsigned char) p[1]);
2.21 frystyk 211: sprintf(num, "%d", (int)*p);
212: memcpy(q, num, strlen(num));
213: q += strlen(num);
214: p++;
215: *q++ = '='; /* Separate */
216: l = (int)((unsigned char)*p);
217: p++;
218: if (l > 127)
219: {
220: l = (l - 128) * 128;
221: l = l + (int)((unsigned char)*p);
222: p++;
223: }
224:
225: for (i = 0; i < l; i++, p++)
226: {
227: if (!acceptable[(unsigned char)*p])
228: {
229: *q++ = HEX_ESCAPE;
230: *q++ = hex[((unsigned char)*p) >> 4];
231: *q++ = hex[((unsigned char)*p) & 15];
232: }
233: else *q++ = (unsigned char)*p;
234: }
235: *q++= ';'; /* Terminate field */
236: #ifdef OLD_CODE
2.1 timbl 237: if (*p>10) {
2.25 frystyk 238: if (PROT_TRACE)
2.45 eric 239: HTTrace("WAIS........ DOCID record type of %d!\n", *p);
2.1 timbl 240: return 0;
241: }
2.2 timbl 242: { /* Bug fix -- allow any byte value 15 Apr 93 */
243: unsigned int i = (unsigned) *p++;
244:
245: if (i > 99) {
246: *q++ = (i/100) + '0';
247: i = i % 100;
248: }
249: if (i > 9) {
250: *q++ = (i/10) + '0';
251: i = i % 10;
252: }
253: *q++ = i + '0'; /* Record type */
254: }
2.1 timbl 255: *q++ = '='; /* Separate */
256: l = *p++; /* Length */
257: for(i=0; i<l; i++, p++){
2.18 luotonen 258: if (!acceptable[(int)*p]) {
2.1 timbl 259: *q++ = HEX_ESCAPE; /* Means hex commming */
260: *q++ = hex[(*p) >> 4];
261: *q++ = hex[(*p) & 15];
262: }
263: else *q++ = *p;
264: }
265: *q++= ';'; /* Terminate field */
2.21 frystyk 266: #endif /* OLD_CODE */
2.1 timbl 267: }
268: *q++ = 0; /* Terminate string */
2.45 eric 269: if (PROT_TRACE) HTTrace("HTLoadWAIS.. WWW form of id: %s\n", buf);
2.1 timbl 270: {
2.24 frystyk 271: char *result;
2.49 frystyk 272: if ((result = (char *) HT_MALLOC((int) strlen((char *) buf)+1))==NULL)
2.44 frystyk 273: HT_OUTOFMEM("WWW_from_WAIS");
2.49 frystyk 274: strcpy(result, (char *) buf);
2.1 timbl 275: return result;
276: }
277: } /* WWW_from_WAIS */
278:
279:
2.2 timbl 280: /* Transform URL into WAIS document identifier
281: ** -------------------------------------------
2.1 timbl 282: **
283: ** On entry,
284: ** docname points to valid name produced originally by
285: ** WWW_from_WAIS
286: ** On exit,
287: ** docid->size is valid
288: ** docid->bytes is malloced and must later be freed.
289: */
2.42 frystyk 290: PRIVATE any * WAIS_from_WWW (any * docid, char * docname)
2.1 timbl 291: {
292: char *z; /* Output pointer */
293: char *sor; /* Start of record - points to size field. */
294: char *p; /* Input pointer */
295: char *q; /* Poisition of "=" */
296: char *s; /* Position of semicolon */
297: int n; /* size */
2.25 frystyk 298: if (PROT_TRACE)
2.45 eric 299: HTTrace("HTLoadWAIS.. WWW id (to become WAIS id): %s\n",
2.24 frystyk 300: docname);
2.1 timbl 301: for(n=0, p = docname; *p; p++) { /* Count sizes of strings */
302: n++;
303: if (*p == ';') n--; /* Not converted */
304: else if (*p == HEX_ESCAPE) n=n-2; /* Save two bytes */
305: docid->size = n;
306: }
307:
2.44 frystyk 308: /* result record */
309: if ((docid->bytes = (char *) HT_MALLOC(docid->size+32)) == NULL)
310: HT_OUTOFMEM("docid->bytes");
2.1 timbl 311: z = docid->bytes;
312:
2.21 frystyk 313: for(p = docname; *p; ) {
314: q = strchr(p, '=');
315: if (!q)
316: return 0;
317: *q = '\0';
318: *z++ = atoi(p);
319: *q = '=';
320: s = strchr(q, ';'); /* (Check only) */
321: if (!s)
322: return 0; /* Bad! No ';'; */
323: sor = z; /* Remember where the size field was */
324: z++; /* Skip record size for now */
325:
326: {
327: int len;
328: int tmp;
329: for(p=q+1; *p!=';' ; ) {
330: if (*p == HEX_ESCAPE) {
331: char c;
332: unsigned int b;
333: p++;
334: c = *p++;
335: b = from_hex(c);
336: c = *p++;
337: if (!c)
338: break; /* Odd number of chars! */
339: *z++ = (b<<4) + from_hex(c);
340: } else {
341: *z++ = *p++; /* Record */
342: }
343: }
344: len = (z-sor-1);
345:
346: z = sor;
347: if (len > 127) {
348: tmp = (len / 128);
349: len = len - (tmp * 128);
350: tmp = tmp + 128;
351: *z++ = (char)tmp;
352: *z = (char)len;
353: } else {
354: *z = (char)len;
355: }
356: z++;
357: }
358:
359: for(p=q+1; *p!=';' ; ) {
360: if (*p == HEX_ESCAPE) {
361: char c;
362: unsigned int b;
363: p++;
364: c = *p++;
365: b = from_hex(c);
366: c = *p++;
367: if (!c)
368: break; /* Odd number of chars! */
369: *z++ = (b<<4) + from_hex(c);
370: } else {
371: *z++ = *p++; /* Record */
372: }
373: }
374: p++; /* After semicolon: start of next record */
375: }
376:
377: #ifdef OLD_CODE
2.1 timbl 378: for(p = docname; *p; ) { /* Convert of strings */
2.2 timbl 379: /* Record type */
380:
381: *z = 0; /* Initialize record type */
382: while (*p >= '0' && *p <= '9') {
383: *z = *z*10 + (*p++ - '0'); /* Decode decimal record type */
384: }
385: z++;
386: if (*p != '=') return 0;
387: q = p;
388:
389: /* *z++ = *p++ - '0';
2.1 timbl 390: q = strchr(p , '=');
391: if (!q) return 0;
2.2 timbl 392: */
2.1 timbl 393: s = strchr(q, ';'); /* (Check only) */
394: if (!s) return 0; /* Bad! No ';'; */
395: sor = z; /* Remember where the size field was */
396: z++; /* Skip record size for now */
397: for(p=q+1; *p!=';' ; ) {
398: if (*p == HEX_ESCAPE) {
399: char c;
400: unsigned int b;
401: p++;
402: c = *p++;
403: b = from_hex(c);
404: c = *p++;
405: if (!c) break; /* Odd number of chars! */
406: *z++ = (b<<4) + from_hex(c);
407: } else {
408: *z++ = *p++; /* Record */
409: }
410: }
411: *sor = (z-sor-1); /* Fill in size -- not counting size itself */
412: p++; /* After semicolon: start of next record */
413: }
2.21 frystyk 414: #endif /* OLD_CODE */
2.25 frystyk 415: if (PROT_TRACE) {
2.1 timbl 416: char *p;
2.45 eric 417: HTTrace("WAIS........ id (%d bytes) is ", (int)docid->size);
2.1 timbl 418: for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
419: if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
2.45 eric 420: HTTrace("%c", *p);
2.1 timbl 421: else
2.45 eric 422: HTTrace("<%x>", (unsigned)*p);
2.1 timbl 423: }
2.45 eric 424: HTTrace("\n");
2.1 timbl 425: }
426: return docid; /* Ok */
427:
428: } /* WAIS_from_WWW */
429:
430:
431: /* Send a plain text record to the client output_text_record()
432: ** --------------------------------------
433: */
2.2 timbl 434:
2.42 frystyk 435: PRIVATE void output_text_record (
436: HTStream * target,
437: WAISDocumentText * record,
438: boolean quote_string_quotes,
439: boolean binary)
2.1 timbl 440: {
441: long count;
2.9 timbl 442: if (binary) {
443: (*target->isa->put_block)(target,
444: record->DocumentText->bytes,
445: record->DocumentText->size);
446: return;
447: }
448:
2.1 timbl 449: for(count = 0; count < record->DocumentText->size; count++){
450: long ch = (unsigned char)record->DocumentText->bytes[count];
2.2 timbl 451: if (ch == 27) { /* What is this in for? Tim */
2.1 timbl 452:
453: /* then we have an escape code */
454: /* if the next letter is '(' or ')', then ignore two letters */
455: if('(' == record->DocumentText->bytes[count + 1] ||
456: ')' == record->DocumentText->bytes[count + 1])
457: count += 1; /* it is a term marker */
458: else count += 4; /* it is a paragraph marker */
459: } else if (ch == '\n' || ch == '\r') {
2.2 timbl 460: PUTC('\n');
2.1 timbl 461: } else if ((ch=='\t') || isprint(ch)){
2.2 timbl 462: PUTC(ch);
2.1 timbl 463: }
464: }
465: } /* output text record */
466:
467:
2.2 timbl 468:
2.1 timbl 469: /* Format A Search response for the client display_search_response
470: ** ---------------------------------------
2.42 frystyk 471: ** modified from tracy shen's version in wutil.c
472: ** displays either a text record or a set of headlines.
2.1 timbl 473: */
2.42 frystyk 474: void display_search_response (HTStructured * target,
475: SearchResponseAPDU * response,
476: char * database,
477: char * keywords)
2.1 timbl 478: {
479: WAISSearchResponse *info;
480: long i, k;
481:
482: BOOL archie = strstr(database, "archie")!=0; /* Specical handling */
483:
2.45 eric 484: if (PROT_TRACE) HTTrace("WAIS........ Displaying search response\n");
2.1 timbl 485: sprintf(line,
2.21 frystyk 486: "Index %s contains the following %d item%s relevant to '%s'.\n",
487: database,
488: (int)(response->NumberOfRecordsReturned),
489: response->NumberOfRecordsReturned ==1 ? "" : "s",
490: keywords);
2.2 timbl 491: PUTS(line);
492: PUTS("The first figure for each entry is its relative score, ");
493: PUTS("the second the number of lines in the item.");
494: START(HTML_MENU);
495:
2.1 timbl 496: if ( response->DatabaseDiagnosticRecords != 0 ) {
497: info = (WAISSearchResponse *)response->DatabaseDiagnosticRecords;
498: i =0;
499:
500: if (info->Diagnostics != NULL)
2.2 timbl 501: showDiags((HTStream*)target, info->Diagnostics);
2.1 timbl 502:
503: if ( info->DocHeaders != 0 ) {
504: for (k=0; info->DocHeaders[k] != 0; k++ ) {
505: WAISDocumentHeader* head = info->DocHeaders[k];
506: char * headline = trim_junk(head->Headline);
507: any * docid = head->DocumentID;
508: char * docname; /* printable version of docid */
509: i++;
510:
511: /* Make a printable string out of the document id.
512: */
2.25 frystyk 513: if (PROT_TRACE)
2.45 eric 514: HTTrace("HTWAIS: %2ld: Score: %4ld, lines:%4ld '%s'\n", i,
2.24 frystyk 515: (long int)(info->DocHeaders[k]->Score),
516: (long int)(info->DocHeaders[k]->Lines),
517: headline);
2.1 timbl 518:
2.2 timbl 519: START(HTML_LI);
520: sprintf(line, "%4ld %4ld ",
521: head->Score,
522: head->Lines);
2.23 frystyk 523: PUTS(line);
2.2 timbl 524:
2.1 timbl 525: if (archie) {
526: char * www_name = WWW_from_archie(headline);
527: if (www_name) {
2.2 timbl 528: HTStartAnchor(target, NULL, www_name);
2.1 timbl 529: PUTS(headline);
2.2 timbl 530:
531: END(HTML_A);
2.44 frystyk 532: HT_FREE(www_name);
2.1 timbl 533: } else {
2.2 timbl 534: PUTS(headline);
535: PUTS(" (bad file name)");
2.1 timbl 536: }
537: } else { /* Not archie */
538: docname = WWW_from_WAIS(docid);
539: if (docname) {
2.6 timbl 540: char * dbname = HTEscape(database, URL_XPALPHAS);
2.21 frystyk 541: char types_array[1000]; /* bad */
542: char *type_escaped;
543: types_array[0] = 0;
544: if (head->Types) {
545: int i;
546: for (i = 0; head->Types[i]; i++)
547: {
548: if (i)
549: strcat (types_array, ",");
550:
551: type_escaped = HTEscape (head->Types[i], URL_XALPHAS);
552: strcat (types_array, type_escaped);
2.44 frystyk 553: HT_FREE(type_escaped);
2.21 frystyk 554: }
2.25 frystyk 555: if (PROT_TRACE)
2.45 eric 556: HTTrace("WAIS........ Types_array `%s\'\n",
2.24 frystyk 557: types_array);
2.21 frystyk 558: } else {
559: strcat (types_array, "TEXT");
560: }
561: sprintf(line, "%s/%s/%d/%s",
562: dbname,
563: types_array,
564: (int)(head->DocumentLength),
565: docname);
566: #ifdef OLD_CODE
2.1 timbl 567: sprintf(line, "%s/%s/%d/%s", /* W3 address */
568: dbname,
569: head->Types ? head->Types[0] : "TEXT",
2.2 timbl 570: (int)(head->DocumentLength),
2.1 timbl 571: docname);
2.21 frystyk 572: #endif /* OLD_CODE */
2.11 secret 573: HTStartAnchor(target, NULL, ( (head->Types)
574: && (!strcmp(head->Types[0], "URL"))) ?
575: headline : line); /* NT, Sep 93 */
2.2 timbl 576: PUTS(headline);
577: END(HTML_A);
2.44 frystyk 578: HT_FREE(dbname);
579: HT_FREE(docname);
2.1 timbl 580: } else {
2.2 timbl 581: PUTS("(bad doc id)");
2.1 timbl 582: }
583: }
584: } /* next document header */
585: } /* if there were any document headers */
586:
587: if ( info->ShortHeaders != 0 ) {
588: k =0;
589: while (info->ShortHeaders[k] != 0 ) {
590: i++;
2.2 timbl 591: PUTS( "(Short Header record, can't display)");
2.1 timbl 592: }
593: }
594: if ( info->LongHeaders != 0 ) {
595: k =0;
596: while (info->LongHeaders[k] != 0) {
597: i++;
598: PUTS( "\nLong Header record, can't display\n");
599: }
600: }
601: if ( info->Text != 0 ) {
602: k =0;
603: while (info->Text[k] != 0) {
604: i++;
605: PUTS( "\nText record\n");
2.9 timbl 606: output_text_record((HTStream*)target, info->Text[k++], false, false);
2.1 timbl 607: }
608: }
609: if ( info->Headlines != 0 ) {
610: k =0;
611: while (info->Headlines[k] != 0) {
612: i++;
613: PUTS( "\nHeadline record, can't display\n");
614: /* dsply_headline_record( info->Headlines[k++]); */
615: }
616: }
617: if ( info->Codes != 0 ) {
618: k =0;
619: while (info->Codes[k] != 0) {
620: i++;
621: PUTS( "\nCode record, can't display\n");
622: /* dsply_code_record( info->Codes[k++]); */
623: }
624: }
625: } /* Loop: display user info */
2.2 timbl 626: END(HTML_MENU);
2.1 timbl 627: PUTC('\n'); ;
628: }
629:
630:
631:
2.2 timbl 632:
2.20 frystyk 633: /* Load Document from WAIS Server HTLoadWAIS()
634: ** ------------------------------
2.2 timbl 635: **
2.20 frystyk 636: ** On entry,
637: ** request This is the request structure
638: ** On exit,
639: ** returns <0 Error has occured
640: ** HT_LOADED OK
2.1 timbl 641: */
2.51 frystyk 642: PUBLIC int HTLoadWAIS (SOCKET soc, HTRequest * request, HTEventType type)
2.1 timbl 643:
2.21 frystyk 644: #define MAX_KEYWORDS_LENGTH 4000
2.1 timbl 645: #define MAX_SERVER_LENGTH 1000
646: #define MAX_DATABASE_LENGTH 1000
647: #define MAX_SERVICE_LENGTH 1000
648:
649: {
2.48 frystyk 650: HTParentAnchor * anchor = HTRequest_anchor(request);
651: const char * arg = HTAnchor_physical(anchor);
652: HTFormat format_out = HTRequest_outputFormat(request);
653: HTStream* sink = HTRequest_outputStream(request);
2.20 frystyk 654: #if 0
2.46 frystyk 655: static const char * error_header =
2.7 timbl 656: "<h1>Access error</h1>\nThe following error occured in accesing a WAIS server:<P>\n";
2.20 frystyk 657: #endif
2.2 timbl 658: char * key; /* pointer to keywords in URL */
2.1 timbl 659: char* request_message = NULL; /* arbitrary message limit */
660: char* response_message = NULL; /* arbitrary message limit */
661: long request_buffer_length; /* how of the request is left */
662: SearchResponseAPDU *retrieval_response = 0;
663: char keywords[MAX_KEYWORDS_LENGTH + 1];
664: char *server_name;
2.6 timbl 665: char *wais_database = NULL; /* name of current database */
666: char *www_database; /* Same name escaped */
2.1 timbl 667: char *service;
668: char *doctype;
669: char *doclength;
670: long document_length;
671: char *docname;
672: FILE *connection = 0;
673: char * names; /* Copy of arg to be hacked up */
674: BOOL ok = NO;
2.20 frystyk 675: int status = -1;
2.21 frystyk 676: char *basetitle = NULL;
2.1 timbl 677:
2.31 frystyk 678: #if 0
2.1 timbl 679: extern FILE * connect_to_server();
2.31 frystyk 680: #endif
2.1 timbl 681:
2.27 frystyk 682: if (PROT_TRACE)
2.45 eric 683: HTTrace("HTLoadWAIS.. Looking for `%s\'\n", arg);
2.27 frystyk 684:
2.1 timbl 685: if (!acceptable_inited) init_acceptable();
686:
687:
688: /* Decipher and check syntax of WWW address:
689: ** ----------------------------------------
690: **
691: ** First we remove the "wais:" if it was spcified. 920110
692: */
693: names = HTParse(arg, "", PARSE_HOST | PARSE_PATH | PARSE_PUNCTUATION);
2.2 timbl 694: key = strchr(names, '?');
695:
696: if (key) {
697: char * p;
698: *key++ = 0; /* Split off keywords */
699: for (p=key; *p; p++) if (*p == '+') *p = ' ';
700: HTUnEscape(key);
701: }
2.1 timbl 702: if (names[0]== '/') {
703: server_name = names+1;
2.18 luotonen 704: if ((as_gate =(*server_name == '/')))
2.1 timbl 705: server_name++; /* Accept one or two */
706: www_database = strchr(server_name,'/');
707: if (www_database) {
708: *www_database++ = 0; /* Separate database name */
709: doctype = strchr(www_database, '/');
710: if (key) ok = YES; /* Don't need doc details */
711: else if (doctype) { /* If not search parse doc details */
712: *doctype++ = 0; /* Separate rest of doc address */
713: doclength = strchr(doctype, '/');
714: if(doclength) {
715: *doclength++ = 0;
716: document_length = atol(doclength);
717: if (document_length) {
718: docname=strchr(doclength, '/');
719: if (docname) {
720: *docname++ = 0;
721: ok = YES; /* To avoid a goto! */
722: } /* if docname */
723: } /* if document_length valid */
724: } /* if doclength */
725: } else { /* no doctype? Assume index required */
726: if (!key) key = "";
727: ok = YES;
728: } /* if doctype */
729: } /* if database */
730: }
731:
2.21 frystyk 732: if (!ok) {
733: char *unescaped = NULL;
734: StrAllocCopy(unescaped, arg);
735: HTUnEscape(unescaped);
2.40 frystyk 736: HTRequest_addError(request, ERR_FATAL, NO, HTERR_BAD_REQUEST,
2.21 frystyk 737: (void *) unescaped, (int) strlen(unescaped),
738: "HTLoadWAIS");
2.44 frystyk 739: HT_FREE(unescaped);
740: HT_FREE(names);
2.21 frystyk 741: return -1;
742: }
743:
2.45 eric 744: if (PROT_TRACE) HTTrace("HTLoadWAIS.. URL Parsed OK\n");
2.1 timbl 745:
746: service = strchr(names, ':');
747: if (service) *service++ = 0;
748: else service = "210";
749:
750: if (server_name[0] == 0)
751: connection = NULL;
752:
753: else if (!(key && !*key))
2.20 frystyk 754: if ((connection=connect_to_server(server_name,atoi(service))) == NULL) {
2.21 frystyk 755: char *host = HTParse(arg, "", PARSE_HOST);
2.25 frystyk 756: if (PROT_TRACE)
2.45 eric 757: HTTrace("HTLoadWAIS.. Can't open connection to %s via service %s.\n",
2.24 frystyk 758: server_name, service);
2.40 frystyk 759: HTRequest_addError(request, ERR_FATAL, NO, HTERR_WAIS_NO_CONNECT,
2.20 frystyk 760: (void *) host, (int) strlen(host), "HTLoadWAIS");
761: goto cleanup;
762: }
2.1 timbl 763:
2.6 timbl 764: StrAllocCopy(wais_database,www_database);
765: HTUnEscape(wais_database);
2.21 frystyk 766:
767: /* Make title name without the .src */
768: {
769: char *srcstr;
770: StrAllocCopy(basetitle, wais_database);
771: if ((srcstr = strstr(basetitle, ".src")) != NULL)
772: *srcstr = '\0';
773: }
2.6 timbl 774:
2.21 frystyk 775: /* This below fixed size stuff is terrible */
2.44 frystyk 776: if ((request_message = (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char))) == NULL)
777: HT_OUTOFMEM("WAIS request message");
778: if ((response_message = (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char))) == NULL)
779: HT_OUTOFMEM("WAIS response message");
2.1 timbl 780:
781: /* If keyword search is performed but there are no keywords,
782: ** the user has followed a link to the index itself. It would be
783: ** appropriate at this point to send him the .SRC file - how?
784: */
785:
786: if (key && !*key) { /* I N D E X */
787:
2.7 timbl 788: #ifdef CACHE_FILE_PREFIX
789: char filename[256];
790: FILE * fp;
791: #endif
2.37 frystyk 792: HTStructured * target = HTMLGenerator(request, NULL,
2.13 timbl 793: WWW_HTML, format_out, sink);
2.1 timbl 794:
2.8 timbl 795: {
2.27 frystyk 796: START(HTML_HTML);
797: START(HTML_HEAD);
2.8 timbl 798: START(HTML_TITLE);
2.21 frystyk 799: PUTS(basetitle);
800: PUTS(" Index");
2.8 timbl 801: END(HTML_TITLE);
2.27 frystyk 802: END(HTML_HEAD);
2.8 timbl 803:
2.27 frystyk 804: START(HTML_BODY);
2.8 timbl 805: START(HTML_H1);
2.21 frystyk 806: PUTS("WAIS Index: ");
807: PUTS(basetitle);
2.8 timbl 808: END(HTML_H1);
809:
810: }
2.21 frystyk 811: START(HTML_ISINDEX);
812:
813: /* If we have seen a source file for this database, use that: */
2.2 timbl 814:
2.7 timbl 815: #ifdef CACHE_FILE_PREFIX
2.8 timbl 816: sprintf(filename, "%sWSRC-%s:%s:%.100s.txt",
2.7 timbl 817: CACHE_FILE_PREFIX,
2.1 timbl 818: server_name, service, www_database);
819:
820: fp = fopen(filename, "r"); /* Have we found this already? */
2.45 eric 821: if (PROT_TRACE) HTTrace(
2.21 frystyk 822: "HTLoadWAIS.. Description of server %s %s.\n",
2.1 timbl 823: filename,
824: fp ? "exists already" : "does NOT exist!");
2.2 timbl 825:
2.1 timbl 826: if (fp) {
2.24 frystyk 827: int c;
828: START(HTML_PRE); /* Preformatted description */
829: while((c=getc(fp)) != EOF)
830: PUTC(c); /* Transfer file */
2.7 timbl 831: END(HTML_PRE);
2.1 timbl 832: fclose(fp);
2.8 timbl 833: }
2.2 timbl 834: #endif
2.27 frystyk 835: END(HTML_BODY);
836: END(HTML_HTML);
2.2 timbl 837: FREE_TARGET;
2.1 timbl 838:
839: } else if (key) { /* S E A R C H */
840: char *p;
2.2 timbl 841: HTStructured * target;
842:
2.1 timbl 843: strncpy(keywords, key, MAX_KEYWORDS_LENGTH);
2.18 luotonen 844: while ((p = strchr(keywords,'+'))) *p = ' ';
2.1 timbl 845:
846: /* Send advance title to get something fast to the other end */
847:
2.37 frystyk 848: target = HTMLGenerator(request, NULL, WWW_HTML, format_out, sink);
2.2 timbl 849:
2.23 frystyk 850: START(HTML_HTML);
851: START(HTML_HEAD);
2.2 timbl 852: START(HTML_TITLE);
853: PUTS(keywords);
2.21 frystyk 854: PUTS(" in ");
855: PUTS(basetitle);
2.2 timbl 856: END(HTML_TITLE);
2.23 frystyk 857: END(HTML_HEAD);
2.2 timbl 858:
2.23 frystyk 859: START(HTML_BODY);
2.2 timbl 860: START(HTML_H1);
2.21 frystyk 861: PUTS("WAIS Search of \"");
2.2 timbl 862: PUTS(keywords);
2.21 frystyk 863: PUTS("\" in ");
864: PUTS(basetitle);
2.2 timbl 865: END(HTML_H1);
2.1 timbl 866:
2.21 frystyk 867: START(HTML_ISINDEX);
868:
2.1 timbl 869: request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */
2.25 frystyk 870: if (PROT_TRACE)
2.45 eric 871: HTTrace("HTLoadWAIS.. Search for `%s' in `%s'\n",
2.24 frystyk 872: keywords, wais_database);
2.21 frystyk 873: if(generate_search_apdu(request_message + HEADER_LENGTH,
2.1 timbl 874: &request_buffer_length,
2.21 frystyk 875: keywords, wais_database, NULL,
876: HTMaxWAISLines) == NULL) {
2.25 frystyk 877: if (PROT_TRACE)
2.45 eric 878: HTTrace("WAIS Search. Too many lines in response\n");
2.40 frystyk 879: HTRequest_addError(request, ERR_WARN, NO, HTERR_WAIS_OVERFLOW,
2.21 frystyk 880: NULL, 0, "HTLoadWAIS");
881: }
2.1 timbl 882:
883: if(!interpret_message(request_message,
884: MAX_MESSAGE_LEN - request_buffer_length,
885: response_message,
886: MAX_MESSAGE_LEN,
887: connection,
888: false /* true verbose */
889: )) {
2.25 frystyk 890: if (PROT_TRACE)
2.45 eric 891: HTTrace("WAIS Search. Too many lines in response\n");
2.40 frystyk 892: HTRequest_addError(request, ERR_WARN, NO, HTERR_WAIS_OVERFLOW,
2.21 frystyk 893: NULL, 0, "HTLoadWAIS");
2.1 timbl 894: } else { /* returned message ok */
895: SearchResponseAPDU *query_response = 0;
896: readSearchResponseAPDU(&query_response,
897: response_message + HEADER_LENGTH);
2.2 timbl 898: display_search_response(target,
899: query_response, wais_database, keywords);
2.1 timbl 900: if (query_response->DatabaseDiagnosticRecords)
901: freeWAISSearchResponse(
902: query_response->DatabaseDiagnosticRecords);
903: freeSearchResponseAPDU( query_response);
904: } /* returned message not too large */
905:
2.23 frystyk 906: END(HTML_BODY);
907: END(HTML_HTML);
2.2 timbl 908: FREE_TARGET;
909:
2.1 timbl 910: } else { /* D O C U M E N T F E T C H */
911:
2.9 timbl 912: boolean binary; /* how to transfer stuff coming over */
2.2 timbl 913: HTStream * target;
2.1 timbl 914: long count;
915: any doc_chunk;
916: any * docid = &doc_chunk;
2.25 frystyk 917: if (PROT_TRACE)
2.45 eric 918: HTTrace(
2.24 frystyk 919: "HTLoadWAIS.. Retrieve document `%s'\n............ type `%s' length %ld\n", docname, doctype, document_length);
2.2 timbl 920:
2.48 frystyk 921: HTAnchor_setFormat(anchor,
2.2 timbl 922: !strcmp(doctype, "WSRC") ? HTAtom_for("application/x-wais-source") :
2.23 frystyk 923: !strcmp(doctype, "TEXT") ? WWW_UNKNOWN :
2.32 frystyk 924: !strcmp(doctype, "HTML") ? WWW_HTML:
925: !strcmp(doctype, "GIF") ? WWW_GIF:
926: HTAtom_for("application/octet-stream"));
2.9 timbl 927: binary =
928: 0 != strcmp(doctype, "WSRC") &&
929: 0 != strcmp(doctype, "TEXT") &&
930: 0 != strcmp(doctype, "HTML") ;
931:
2.23 frystyk 932: /* Guess on TEXT format as it might be HTML */
2.48 frystyk 933: if ((target = HTStreamStack(HTAnchor_format(anchor),
934: HTRequest_outputFormat(request),
935: HTRequest_outputStream(request),
2.28 frystyk 936: request, YES)) == NULL) {
2.21 frystyk 937: status = -1;
938: goto cleanup;
939: }
2.2 timbl 940:
2.21 frystyk 941: /* Decode hex or litteral format for document ID */
2.1 timbl 942: WAIS_from_WWW(docid, docname);
943:
2.21 frystyk 944: /* Loop over slices of the document */
945: for (count = 0; count * CHARS_PER_PAGE < document_length; count++) {
946: char *type = s_strdup(doctype);
947: request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */
2.45 eric 948: if (PROT_TRACE) HTTrace("HTLoadWAIS.. Slice number %ld\n",
2.21 frystyk 949: count);
950: if (generate_retrieval_apdu(request_message + HEADER_LENGTH,
951: &request_buffer_length,
952: docid, CT_byte,
953: count * CHARS_PER_PAGE,
954: HTMIN((count + 1) * CHARS_PER_PAGE,
955: document_length),
956: type,
957: wais_database) == 0) {
2.40 frystyk 958: HTRequest_addError(request, ERR_WARN, NO, HTERR_WAIS_OVERFLOW,
2.21 frystyk 959: NULL, 0, "HTLoadWAIS");
960: }
2.44 frystyk 961: HT_FREE(type);
2.21 frystyk 962:
963: /* Actually do the transaction given by request_message */
964: if (interpret_message(request_message,
965: MAX_MESSAGE_LEN - request_buffer_length,
966: response_message,
967: MAX_MESSAGE_LEN,
968: connection,
969: false /* true verbose */
970: ) == 0) {
2.40 frystyk 971: HTRequest_addError(request, ERR_WARN, NO, HTERR_WAIS_OVERFLOW,
2.21 frystyk 972: NULL, 0, "HTLoadWAIS");
973: }
974:
975: /* Parse the result which came back into memory. */
976: readSearchResponseAPDU(&retrieval_response,
977: response_message + HEADER_LENGTH);
978: {
979: WAISSearchResponse *searchres = (WAISSearchResponse *) retrieval_response->DatabaseDiagnosticRecords;
980: if (!searchres->Text) {
981: if (searchres->Diagnostics && *searchres->Diagnostics &&
982: (*searchres->Diagnostics)->ADDINFO) {
983: char *errmsg = (*searchres->Diagnostics)->ADDINFO;
2.40 frystyk 984: HTRequest_addError(request, ERR_WARN, NO, HTERR_WAIS_MODULE,
2.21 frystyk 985: (void *) errmsg, (int) strlen(errmsg),
986: "HTLoadWAIS");
987: } else {
2.40 frystyk 988: HTRequest_addError(request, ERR_WARN, NO, HTERR_WAIS_MODULE,
2.21 frystyk 989: NULL, 0, "HTLoadWAIS");
990: }
2.23 frystyk 991: (*target->isa->_free)(target);
2.48 frystyk 992: HTRequest_setOutputStream(request, NULL);
2.44 frystyk 993: HT_FREE(docid->bytes);
2.22 frystyk 994: freeWAISSearchResponse(retrieval_response->DatabaseDiagnosticRecords);
995: freeSearchResponseAPDU( retrieval_response);
996: goto cleanup;
2.21 frystyk 997: } else {
998: output_text_record(target, *searchres->Text,
999: false, binary);
2.22 frystyk 1000: freeWAISSearchResponse( retrieval_response->DatabaseDiagnosticRecords);
1001: freeSearchResponseAPDU( retrieval_response);
2.21 frystyk 1002: } /* If text existed */
1003: }
1004:
1005: } /* Loop over slices */
2.1 timbl 1006:
2.23 frystyk 1007: (*target->isa->_free)(target);
2.48 frystyk 1008: HTRequest_setOutputStream(request, NULL);
2.44 frystyk 1009: HT_FREE(docid->bytes);
2.1 timbl 1010: } /* If document rather than search */
2.20 frystyk 1011: status = HT_LOADED;
2.2 timbl 1012:
2.20 frystyk 1013: cleanup:
2.34 frystyk 1014: if (connection) close_connection_to_server(connection);
2.44 frystyk 1015: if (wais_database) HT_FREE(wais_database);
2.20 frystyk 1016: if (request_message) s_free(request_message);
1017: if (response_message) s_free(response_message);
2.44 frystyk 1018: HT_FREE(names);
1019: HT_FREE(basetitle);
2.20 frystyk 1020: if (status < 0) {
1021: char *unescaped = NULL;
1022: StrAllocCopy(unescaped, arg);
1023: HTUnEscape(unescaped);
2.40 frystyk 1024: HTRequest_addError(request, ERR_FATAL, NO, HTERR_INTERNAL, (void *) unescaped,
2.20 frystyk 1025: (int) strlen(unescaped), "HTLoadWAIS");
2.44 frystyk 1026: HT_FREE(unescaped);
2.20 frystyk 1027: }
1028: return status;
2.1 timbl 1029: }
1030:
Webmaster