Annotation of libwww/Library/src/HTWAIS.c, revision 2.34
2.29 frystyk 1: /* HTWAIS.c
2: ** WORLDWIDEWEB - WIDE AREA INFORMAION SERVER ACCESS
3: **
2.33 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.29 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This module allows a WWW server or client to read data from a
2.29 frystyk 8: ** remote WAIS server, and provide that data to a WWW client in
9: ** hypertext form. Source files, once retrieved, are stored and used
10: ** to provide information about the index when that is acessed.
2.1 timbl 11: **
12: ** Authors
13: ** BK Brewster Kahle, Thinking Machines, <Brewster@think.com>
14: ** TBL Tim Berners-Lee, CERN <timbl@info.cern.ch>
15: **
16: ** History
17: ** Sep 91 TBL adapted shell-ui.c (BK) with HTRetrieve.c from WWW.
18: ** Feb 91 TBL Generated HTML cleaned up a bit (quotes, escaping)
19: ** Refers to lists of sources.
2.2 timbl 20: ** Mar 93 TBL Lib 2.0 compatible module made.
2.34 ! frystyk 21: ** May 95 CHJ modified for freeWAIS-0.5
2.1 timbl 22: **
23: ** Bugs
24: ** Uses C stream i/o to read and write sockets, which won't work
25: ** on VMS TCP systems.
26: **
27: ** Should cache connections.
28: **
29: ** ANSI C only as written
30: **
2.11 secret 31: ** Bugs fixed
32: ** NT Nathan Torkington (Nathan.Torkington@vuw.ac.nz)
33: **
2.1 timbl 34: ** WAIS comments:
35: **
36: ** 1. Separate directories for different system's .o would help
37: ** 2. Document ids are rather long!
38: **
39: ** WWW Address mapping convention:
40: **
41: ** /servername/database/type/length/document-id
42: **
43: ** /servername/database?word+word+word
44: */
45: /* WIDE AREA INFORMATION SERVER SOFTWARE:
46: No guarantees or restrictions. See the readme file for the full standard
47: disclaimer.
48:
49: Brewster@think.com
50: */
51:
52:
2.8 timbl 53: #define DIRECTORY "/cnidr.org:210/directory-of-servers"
54: /* define DIRECTORY "/quake.think.com:210/directory-of-servers" */
2.1 timbl 55:
56: #define BIG 1024 /* identifier size limit @@@@@ */
57:
2.2 timbl 58: /* From WAIS
59: ** ---------
2.1 timbl 60: */
61:
62: #include <ui.h>
2.31 frystyk 63: #include <sockets.h>
2.1 timbl 64:
65: #define MAX_MESSAGE_LEN 100000
66: #define CHARS_PER_PAGE 10000 /* number of chars retrieved in each request */
67: #define WAISSEARCH_DATE "Fri Jul 19 1991"
68:
69:
2.2 timbl 70: /* FROM WWW
71: ** --------
2.1 timbl 72: */
73: #define BUFFER_SIZE 4096 /* Arbitrary size for efficiency */
74:
2.2 timbl 75: #define HEX_ESCAPE '%'
2.24 frystyk 76:
2.31 frystyk 77: /* Library include files */
78: #include "tcp.h"
2.1 timbl 79: #include "HTUtils.h"
2.31 frystyk 80: #include "HTString.h"
2.1 timbl 81: #include "HTParse.h"
2.2 timbl 82: #include "HTAccess.h" /* We implement a protocol */
2.21 frystyk 83: #include "HTError.h"
2.2 timbl 84: #include "HTML.h" /* The object we will generate */
2.25 frystyk 85: #include "HTParse.h"
86: #include "HTFormat.h"
87: #include "HTTCP.h"
2.2 timbl 88:
2.1 timbl 89: extern FILE * logfile; /* Log file output */
90:
2.27 frystyk 91: PUBLIC int HTMaxWAISLines = 200;/* Max number of entries from a search */
2.21 frystyk 92:
2.1 timbl 93: PRIVATE BOOL as_gate; /* Client is using us as gateway */
94:
95: PRIVATE char line[2048]; /* For building strings to display */
96: /* Must be able to take id */
2.2 timbl 97:
2.25 frystyk 98: /* Hypertext object building machinery */
2.2 timbl 99: #include "HTML.h"
100:
101: #define PUTC(c) (*target->isa->put_character)(target, c)
102: #define PUTS(s) (*target->isa->put_string)(target, s)
103: #define START(e) (*target->isa->start_element)(target, e, 0, 0)
104: #define END(e) (*target->isa->end_element)(target, e)
2.23 frystyk 105: #define FREE_TARGET (*target->isa->_free)(target)
2.2 timbl 106:
107: struct _HTStructured {
108: CONST HTStructuredClass * isa;
109: /* ... */
110: };
111:
112: struct _HTStream {
113: CONST HTStreamClass * isa;
114: /* ... */
115: };
116:
117:
2.1 timbl 118: /* showDiags
119: */
120: /* modified from Jonny G's version in ui/question.c */
121:
2.2 timbl 122: void showDiags ARGS2(
123: HTStream *, target,
124: diagnosticRecord **, d)
2.1 timbl 125: {
126: long i;
127:
128: for (i = 0; d[i] != NULL; i++) {
129: if (d[i]->ADDINFO != NULL) {
130: PUTS("Diagnostic code is ");
131: PUTS(d[i]->DIAG);
132: PUTC(' ');
133: PUTS(d[i]->ADDINFO);
134: PUTC('\n'); ;
135: }
136: }
137: }
138:
139: /* Matrix of allowed characters in filenames
140: ** -----------------------------------------
141: */
142:
143: PRIVATE BOOL acceptable[256];
144: PRIVATE BOOL acceptable_inited = NO;
145:
146: PRIVATE void init_acceptable NOARGS
147: {
148: unsigned int i;
149: char * good =
150: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$";
151: for(i=0; i<256; i++) acceptable[i] = NO;
152: for(;*good; good++) acceptable[(unsigned int)*good] = YES;
153: acceptable_inited = YES;
154: }
155:
156: /* Transform file identifier into WWW address
157: ** ------------------------------------------
158: **
159: **
160: ** On exit,
161: ** returns nil if error
162: ** pointer to malloced string (must be freed) if ok
163: */
164: char * WWW_from_archie ARGS1 (char *, file)
165: {
166: char * end;
167: char * result;
168: char * colon;
169: for(end=file; *end > ' '; end++); /* assumes ASCII encoding*/
170: result = (char *)malloc(10 + (end-file));
171: if (!result) return result; /* Malloc error */
172: strcpy(result, "file://");
173: strncat(result, file, end-file);
174: colon = strchr(result+7, ':'); /* Expect colon after host */
175: if (colon) {
176: for(; colon[0]; colon[0]=colon[1], colon++); /* move down */
177: }
178: return result;
179: } /* WWW_from_archie */
180:
2.2 timbl 181: /* Transform document identifier into URL
182: ** --------------------------------------
2.1 timbl 183: **
184: ** Bugs: A static buffer of finite size is used!
185: ** The format of the docid MUST be good!
186: **
187: ** On exit,
188: ** returns nil if error
189: ** pointer to malloced string (must be freed) if ok
190: */
2.9 timbl 191: PRIVATE char hex [17] = "0123456789ABCDEF";
2.2 timbl 192: extern char from_hex PARAMS((char a)); /* In HTWSRC @@ */
193:
194: PRIVATE char * WWW_from_WAIS ARGS1(any *, docid)
195:
2.1 timbl 196: {
2.21 frystyk 197: static unsigned char buf[BIG];
198: char num[10];
199: unsigned char * q = buf;
2.1 timbl 200: char * p = (docid->bytes);
201: int i, l;
2.25 frystyk 202: if (PROT_TRACE) {
2.1 timbl 203: char *p;
2.31 frystyk 204: fprintf(TDEST, "HTLoadWAIS.. id (%d bytes) is ", (int)docid->size);
2.1 timbl 205: for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
206: if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
2.31 frystyk 207: fprintf(TDEST, "%c", *p);
2.1 timbl 208: else
2.31 frystyk 209: fprintf(TDEST, "<%x>", (unsigned)*p);
2.1 timbl 210: }
2.31 frystyk 211: fprintf(TDEST, "\n");
2.1 timbl 212: }
213: for (p=docid->bytes; (p<docid->bytes+docid->size) && (q<&buf[BIG]);) {
2.25 frystyk 214: if (PROT_TRACE)
2.31 frystyk 215: fprintf(TDEST, "............ Record type %d, length %d\n",
2.24 frystyk 216: (unsigned char) p[0], (unsigned char) p[1]);
2.21 frystyk 217: sprintf(num, "%d", (int)*p);
218: memcpy(q, num, strlen(num));
219: q += strlen(num);
220: p++;
221: *q++ = '='; /* Separate */
222: l = (int)((unsigned char)*p);
223: p++;
224: if (l > 127)
225: {
226: l = (l - 128) * 128;
227: l = l + (int)((unsigned char)*p);
228: p++;
229: }
230:
231: for (i = 0; i < l; i++, p++)
232: {
233: if (!acceptable[(unsigned char)*p])
234: {
235: *q++ = HEX_ESCAPE;
236: *q++ = hex[((unsigned char)*p) >> 4];
237: *q++ = hex[((unsigned char)*p) & 15];
238: }
239: else *q++ = (unsigned char)*p;
240: }
241: *q++= ';'; /* Terminate field */
242: #ifdef OLD_CODE
2.1 timbl 243: if (*p>10) {
2.25 frystyk 244: if (PROT_TRACE)
2.31 frystyk 245: fprintf(TDEST, "WAIS........ DOCID record type of %d!\n", *p);
2.1 timbl 246: return 0;
247: }
2.2 timbl 248: { /* Bug fix -- allow any byte value 15 Apr 93 */
249: unsigned int i = (unsigned) *p++;
250:
251: if (i > 99) {
252: *q++ = (i/100) + '0';
253: i = i % 100;
254: }
255: if (i > 9) {
256: *q++ = (i/10) + '0';
257: i = i % 10;
258: }
259: *q++ = i + '0'; /* Record type */
260: }
2.1 timbl 261: *q++ = '='; /* Separate */
262: l = *p++; /* Length */
263: for(i=0; i<l; i++, p++){
2.18 luotonen 264: if (!acceptable[(int)*p]) {
2.1 timbl 265: *q++ = HEX_ESCAPE; /* Means hex commming */
266: *q++ = hex[(*p) >> 4];
267: *q++ = hex[(*p) & 15];
268: }
269: else *q++ = *p;
270: }
271: *q++= ';'; /* Terminate field */
2.21 frystyk 272: #endif /* OLD_CODE */
2.1 timbl 273: }
274: *q++ = 0; /* Terminate string */
2.31 frystyk 275: if (PROT_TRACE) fprintf(TDEST, "HTLoadWAIS.. WWW form of id: %s\n", buf);
2.1 timbl 276: {
2.24 frystyk 277: char *result;
278: if ((result = (char *) malloc((int) strlen(buf)+1)) == NULL)
279: outofmem(__FILE__, "WWW_from_WAIS");
2.1 timbl 280: strcpy(result, buf);
281: return result;
282: }
283: } /* WWW_from_WAIS */
284:
285:
2.2 timbl 286: /* Transform URL into WAIS document identifier
287: ** -------------------------------------------
2.1 timbl 288: **
289: ** On entry,
290: ** docname points to valid name produced originally by
291: ** WWW_from_WAIS
292: ** On exit,
293: ** docid->size is valid
294: ** docid->bytes is malloced and must later be freed.
295: */
296: PRIVATE any * WAIS_from_WWW ARGS2 (any *, docid, char *, docname)
297: {
298: char *z; /* Output pointer */
299: char *sor; /* Start of record - points to size field. */
300: char *p; /* Input pointer */
301: char *q; /* Poisition of "=" */
302: char *s; /* Position of semicolon */
303: int n; /* size */
2.25 frystyk 304: if (PROT_TRACE)
2.31 frystyk 305: fprintf(TDEST, "HTLoadWAIS.. WWW id (to become WAIS id): %s\n",
2.24 frystyk 306: docname);
2.1 timbl 307: for(n=0, p = docname; *p; p++) { /* Count sizes of strings */
308: n++;
309: if (*p == ';') n--; /* Not converted */
310: else if (*p == HEX_ESCAPE) n=n-2; /* Save two bytes */
311: docid->size = n;
312: }
313:
2.21 frystyk 314: docid->bytes = (char *) malloc(docid->size+32); /* result record */
2.1 timbl 315: z = docid->bytes;
316:
2.21 frystyk 317: for(p = docname; *p; ) {
318: q = strchr(p, '=');
319: if (!q)
320: return 0;
321: *q = '\0';
322: *z++ = atoi(p);
323: *q = '=';
324: s = strchr(q, ';'); /* (Check only) */
325: if (!s)
326: return 0; /* Bad! No ';'; */
327: sor = z; /* Remember where the size field was */
328: z++; /* Skip record size for now */
329:
330: {
331: int len;
332: int tmp;
333: for(p=q+1; *p!=';' ; ) {
334: if (*p == HEX_ESCAPE) {
335: char c;
336: unsigned int b;
337: p++;
338: c = *p++;
339: b = from_hex(c);
340: c = *p++;
341: if (!c)
342: break; /* Odd number of chars! */
343: *z++ = (b<<4) + from_hex(c);
344: } else {
345: *z++ = *p++; /* Record */
346: }
347: }
348: len = (z-sor-1);
349:
350: z = sor;
351: if (len > 127) {
352: tmp = (len / 128);
353: len = len - (tmp * 128);
354: tmp = tmp + 128;
355: *z++ = (char)tmp;
356: *z = (char)len;
357: } else {
358: *z = (char)len;
359: }
360: z++;
361: }
362:
363: for(p=q+1; *p!=';' ; ) {
364: if (*p == HEX_ESCAPE) {
365: char c;
366: unsigned int b;
367: p++;
368: c = *p++;
369: b = from_hex(c);
370: c = *p++;
371: if (!c)
372: break; /* Odd number of chars! */
373: *z++ = (b<<4) + from_hex(c);
374: } else {
375: *z++ = *p++; /* Record */
376: }
377: }
378: p++; /* After semicolon: start of next record */
379: }
380:
381: #ifdef OLD_CODE
2.1 timbl 382: for(p = docname; *p; ) { /* Convert of strings */
2.2 timbl 383: /* Record type */
384:
385: *z = 0; /* Initialize record type */
386: while (*p >= '0' && *p <= '9') {
387: *z = *z*10 + (*p++ - '0'); /* Decode decimal record type */
388: }
389: z++;
390: if (*p != '=') return 0;
391: q = p;
392:
393: /* *z++ = *p++ - '0';
2.1 timbl 394: q = strchr(p , '=');
395: if (!q) return 0;
2.2 timbl 396: */
2.1 timbl 397: s = strchr(q, ';'); /* (Check only) */
398: if (!s) return 0; /* Bad! No ';'; */
399: sor = z; /* Remember where the size field was */
400: z++; /* Skip record size for now */
401: for(p=q+1; *p!=';' ; ) {
402: if (*p == HEX_ESCAPE) {
403: char c;
404: unsigned int b;
405: p++;
406: c = *p++;
407: b = from_hex(c);
408: c = *p++;
409: if (!c) break; /* Odd number of chars! */
410: *z++ = (b<<4) + from_hex(c);
411: } else {
412: *z++ = *p++; /* Record */
413: }
414: }
415: *sor = (z-sor-1); /* Fill in size -- not counting size itself */
416: p++; /* After semicolon: start of next record */
417: }
2.21 frystyk 418: #endif /* OLD_CODE */
2.25 frystyk 419: if (PROT_TRACE) {
2.1 timbl 420: char *p;
2.31 frystyk 421: fprintf(TDEST, "WAIS........ id (%d bytes) is ", (int)docid->size);
2.1 timbl 422: for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
423: if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
2.31 frystyk 424: fprintf(TDEST, "%c", *p);
2.1 timbl 425: else
2.31 frystyk 426: fprintf(TDEST, "<%x>", (unsigned)*p);
2.1 timbl 427: }
2.31 frystyk 428: fprintf(TDEST, "\n");
2.1 timbl 429: }
430: return docid; /* Ok */
431:
432: } /* WAIS_from_WWW */
433:
434:
435: /* Send a plain text record to the client output_text_record()
436: ** --------------------------------------
437: */
2.2 timbl 438:
2.9 timbl 439: PRIVATE void output_text_record ARGS4(
2.2 timbl 440: HTStream *, target,
441: WAISDocumentText *, record,
2.9 timbl 442: boolean, quote_string_quotes,
443: boolean, binary)
2.1 timbl 444: {
445: long count;
446: /* printf(" Text\n");
447: print_any(" DocumentID: ", record->DocumentID);
448: printf(" VersionNumber: %d\n", record->VersionNumber);
449: */
2.9 timbl 450:
451: if (binary) {
452: (*target->isa->put_block)(target,
453: record->DocumentText->bytes,
454: record->DocumentText->size);
455: return;
456: }
457:
2.1 timbl 458: for(count = 0; count < record->DocumentText->size; count++){
459: long ch = (unsigned char)record->DocumentText->bytes[count];
2.2 timbl 460: if (ch == 27) { /* What is this in for? Tim */
2.1 timbl 461:
462: /* then we have an escape code */
463: /* if the next letter is '(' or ')', then ignore two letters */
464: if('(' == record->DocumentText->bytes[count + 1] ||
465: ')' == record->DocumentText->bytes[count + 1])
466: count += 1; /* it is a term marker */
467: else count += 4; /* it is a paragraph marker */
468: } else if (ch == '\n' || ch == '\r') {
2.2 timbl 469: PUTC('\n');
2.1 timbl 470: } else if ((ch=='\t') || isprint(ch)){
2.2 timbl 471: PUTC(ch);
2.1 timbl 472: }
473: }
474: } /* output text record */
475:
476:
2.2 timbl 477:
2.1 timbl 478: /* Format A Search response for the client display_search_response
479: ** ---------------------------------------
480: */
481: /* modified from tracy shen's version in wutil.c
482: * displays either a text record or a set of headlines.
483: */
484: void
2.2 timbl 485: display_search_response ARGS4(
486: HTStructured *, target,
2.1 timbl 487: SearchResponseAPDU *, response,
488: char *, database,
489: char *, keywords)
490: {
491: WAISSearchResponse *info;
492: long i, k;
493:
494: BOOL archie = strstr(database, "archie")!=0; /* Specical handling */
495:
2.31 frystyk 496: if (PROT_TRACE) fprintf(TDEST, "WAIS........ Displaying search response\n");
2.1 timbl 497: sprintf(line,
2.21 frystyk 498: "Index %s contains the following %d item%s relevant to '%s'.\n",
499: database,
500: (int)(response->NumberOfRecordsReturned),
501: response->NumberOfRecordsReturned ==1 ? "" : "s",
502: keywords);
2.2 timbl 503: PUTS(line);
504: PUTS("The first figure for each entry is its relative score, ");
505: PUTS("the second the number of lines in the item.");
506: START(HTML_MENU);
507:
2.1 timbl 508: if ( response->DatabaseDiagnosticRecords != 0 ) {
509: info = (WAISSearchResponse *)response->DatabaseDiagnosticRecords;
510: i =0;
511:
512: if (info->Diagnostics != NULL)
2.2 timbl 513: showDiags((HTStream*)target, info->Diagnostics);
2.1 timbl 514:
515: if ( info->DocHeaders != 0 ) {
516: for (k=0; info->DocHeaders[k] != 0; k++ ) {
517: WAISDocumentHeader* head = info->DocHeaders[k];
518: char * headline = trim_junk(head->Headline);
519: any * docid = head->DocumentID;
520: char * docname; /* printable version of docid */
521: i++;
522:
523: /* Make a printable string out of the document id.
524: */
2.25 frystyk 525: if (PROT_TRACE)
2.31 frystyk 526: fprintf(TDEST, "HTWAIS: %2ld: Score: %4ld, lines:%4ld '%s'\n", i,
2.24 frystyk 527: (long int)(info->DocHeaders[k]->Score),
528: (long int)(info->DocHeaders[k]->Lines),
529: headline);
2.1 timbl 530:
2.2 timbl 531: START(HTML_LI);
532: sprintf(line, "%4ld %4ld ",
533: head->Score,
534: head->Lines);
2.23 frystyk 535: PUTS(line);
2.2 timbl 536:
2.1 timbl 537: if (archie) {
538: char * www_name = WWW_from_archie(headline);
539: if (www_name) {
2.2 timbl 540: HTStartAnchor(target, NULL, www_name);
2.1 timbl 541: PUTS(headline);
2.2 timbl 542:
543: END(HTML_A);
2.1 timbl 544: free(www_name);
545: } else {
2.2 timbl 546: PUTS(headline);
547: PUTS(" (bad file name)");
2.1 timbl 548: }
549: } else { /* Not archie */
550: docname = WWW_from_WAIS(docid);
551: if (docname) {
2.6 timbl 552: char * dbname = HTEscape(database, URL_XPALPHAS);
2.21 frystyk 553: char types_array[1000]; /* bad */
554: char *type_escaped;
555: types_array[0] = 0;
556: if (head->Types) {
557: int i;
558: for (i = 0; head->Types[i]; i++)
559: {
560: if (i)
561: strcat (types_array, ",");
562:
563: type_escaped = HTEscape (head->Types[i], URL_XALPHAS);
564: strcat (types_array, type_escaped);
565: free (type_escaped);
566: }
2.25 frystyk 567: if (PROT_TRACE)
2.31 frystyk 568: fprintf (TDEST, "WAIS........ Types_array `%s\'\n",
2.24 frystyk 569: types_array);
2.21 frystyk 570: } else {
571: strcat (types_array, "TEXT");
572: }
573: sprintf(line, "%s/%s/%d/%s",
574: dbname,
575: types_array,
576: (int)(head->DocumentLength),
577: docname);
578: #ifdef OLD_CODE
2.1 timbl 579: sprintf(line, "%s/%s/%d/%s", /* W3 address */
580: dbname,
581: head->Types ? head->Types[0] : "TEXT",
2.2 timbl 582: (int)(head->DocumentLength),
2.1 timbl 583: docname);
2.21 frystyk 584: #endif /* OLD_CODE */
2.11 secret 585: HTStartAnchor(target, NULL, ( (head->Types)
586: && (!strcmp(head->Types[0], "URL"))) ?
587: headline : line); /* NT, Sep 93 */
2.2 timbl 588: PUTS(headline);
589: END(HTML_A);
2.1 timbl 590: free(dbname);
591: free(docname);
592: } else {
2.2 timbl 593: PUTS("(bad doc id)");
2.1 timbl 594: }
595: }
596: } /* next document header */
597: } /* if there were any document headers */
598:
599: if ( info->ShortHeaders != 0 ) {
600: k =0;
601: while (info->ShortHeaders[k] != 0 ) {
602: i++;
2.2 timbl 603: PUTS( "(Short Header record, can't display)");
2.1 timbl 604: }
605: }
606: if ( info->LongHeaders != 0 ) {
607: k =0;
608: while (info->LongHeaders[k] != 0) {
609: i++;
610: PUTS( "\nLong Header record, can't display\n");
611: }
612: }
613: if ( info->Text != 0 ) {
614: k =0;
615: while (info->Text[k] != 0) {
616: i++;
617: PUTS( "\nText record\n");
2.9 timbl 618: output_text_record((HTStream*)target, info->Text[k++], false, false);
2.1 timbl 619: }
620: }
621: if ( info->Headlines != 0 ) {
622: k =0;
623: while (info->Headlines[k] != 0) {
624: i++;
625: PUTS( "\nHeadline record, can't display\n");
626: /* dsply_headline_record( info->Headlines[k++]); */
627: }
628: }
629: if ( info->Codes != 0 ) {
630: k =0;
631: while (info->Codes[k] != 0) {
632: i++;
633: PUTS( "\nCode record, can't display\n");
634: /* dsply_code_record( info->Codes[k++]); */
635: }
636: }
637: } /* Loop: display user info */
2.2 timbl 638: END(HTML_MENU);
2.1 timbl 639: PUTC('\n'); ;
640: }
641:
642:
643:
2.2 timbl 644:
2.20 frystyk 645: /* Load Document from WAIS Server HTLoadWAIS()
646: ** ------------------------------
2.2 timbl 647: **
2.20 frystyk 648: ** On entry,
649: ** request This is the request structure
650: ** On exit,
651: ** returns <0 Error has occured
652: ** HT_LOADED OK
2.1 timbl 653: */
2.13 timbl 654: PUBLIC int HTLoadWAIS ARGS1(HTRequest * , request)
2.1 timbl 655:
2.21 frystyk 656: #define MAX_KEYWORDS_LENGTH 4000
2.1 timbl 657: #define MAX_SERVER_LENGTH 1000
658: #define MAX_DATABASE_LENGTH 1000
659: #define MAX_SERVICE_LENGTH 1000
660:
661: {
2.13 timbl 662: CONST char * arg = HTAnchor_physical(request->anchor);
663: HTFormat format_out = request->output_format;
664: HTStream* sink = request->output_stream;
2.20 frystyk 665: #if 0
2.1 timbl 666: static CONST char * error_header =
2.7 timbl 667: "<h1>Access error</h1>\nThe following error occured in accesing a WAIS server:<P>\n";
2.20 frystyk 668: #endif
2.2 timbl 669: char * key; /* pointer to keywords in URL */
2.1 timbl 670: char* request_message = NULL; /* arbitrary message limit */
671: char* response_message = NULL; /* arbitrary message limit */
672: long request_buffer_length; /* how of the request is left */
673: SearchResponseAPDU *retrieval_response = 0;
674: char keywords[MAX_KEYWORDS_LENGTH + 1];
675: char *server_name;
2.6 timbl 676: char *wais_database = NULL; /* name of current database */
677: char *www_database; /* Same name escaped */
2.1 timbl 678: char *service;
679: char *doctype;
680: char *doclength;
681: long document_length;
682: char *docname;
683: FILE *connection = 0;
684: char * names; /* Copy of arg to be hacked up */
685: BOOL ok = NO;
2.20 frystyk 686: int status = -1;
2.21 frystyk 687: char *basetitle = NULL;
2.1 timbl 688:
2.31 frystyk 689: #if 0
2.1 timbl 690: extern FILE * connect_to_server();
2.31 frystyk 691: #endif
2.1 timbl 692:
2.27 frystyk 693: if (PROT_TRACE)
2.31 frystyk 694: fprintf(TDEST, "HTLoadWAIS.. Looking for `%s\'\n", arg);
2.27 frystyk 695:
2.1 timbl 696: if (!acceptable_inited) init_acceptable();
697:
698:
699: /* Decipher and check syntax of WWW address:
700: ** ----------------------------------------
701: **
702: ** First we remove the "wais:" if it was spcified. 920110
703: */
704: names = HTParse(arg, "", PARSE_HOST | PARSE_PATH | PARSE_PUNCTUATION);
2.2 timbl 705: key = strchr(names, '?');
706:
707: if (key) {
708: char * p;
709: *key++ = 0; /* Split off keywords */
710: for (p=key; *p; p++) if (*p == '+') *p = ' ';
711: HTUnEscape(key);
712: }
2.1 timbl 713: if (names[0]== '/') {
714: server_name = names+1;
2.18 luotonen 715: if ((as_gate =(*server_name == '/')))
2.1 timbl 716: server_name++; /* Accept one or two */
717: www_database = strchr(server_name,'/');
718: if (www_database) {
719: *www_database++ = 0; /* Separate database name */
720: doctype = strchr(www_database, '/');
721: if (key) ok = YES; /* Don't need doc details */
722: else if (doctype) { /* If not search parse doc details */
723: *doctype++ = 0; /* Separate rest of doc address */
724: doclength = strchr(doctype, '/');
725: if(doclength) {
726: *doclength++ = 0;
727: document_length = atol(doclength);
728: if (document_length) {
729: docname=strchr(doclength, '/');
730: if (docname) {
731: *docname++ = 0;
732: ok = YES; /* To avoid a goto! */
733: } /* if docname */
734: } /* if document_length valid */
735: } /* if doclength */
736: } else { /* no doctype? Assume index required */
737: if (!key) key = "";
738: ok = YES;
739: } /* if doctype */
740: } /* if database */
741: }
742:
2.21 frystyk 743: if (!ok) {
744: char *unescaped = NULL;
745: StrAllocCopy(unescaped, arg);
746: HTUnEscape(unescaped);
747: HTErrorAdd(request, ERR_FATAL, NO, HTERR_BAD_REQUEST,
748: (void *) unescaped, (int) strlen(unescaped),
749: "HTLoadWAIS");
750: free(unescaped);
751: free(names);
752: return -1;
753: }
754:
2.31 frystyk 755: if (PROT_TRACE) fprintf(TDEST, "HTLoadWAIS.. URL Parsed OK\n");
2.1 timbl 756:
757: service = strchr(names, ':');
758: if (service) *service++ = 0;
759: else service = "210";
760:
761: if (server_name[0] == 0)
762: connection = NULL;
763:
764: else if (!(key && !*key))
2.20 frystyk 765: if ((connection=connect_to_server(server_name,atoi(service))) == NULL) {
2.21 frystyk 766: char *host = HTParse(arg, "", PARSE_HOST);
2.25 frystyk 767: if (PROT_TRACE)
2.31 frystyk 768: fprintf (TDEST, "HTLoadWAIS.. Can't open connection to %s via service %s.\n",
2.24 frystyk 769: server_name, service);
2.21 frystyk 770: HTErrorAdd(request, ERR_FATAL, NO, HTERR_WAIS_NO_CONNECT,
2.20 frystyk 771: (void *) host, (int) strlen(host), "HTLoadWAIS");
772: goto cleanup;
773: }
2.1 timbl 774:
2.6 timbl 775: StrAllocCopy(wais_database,www_database);
776: HTUnEscape(wais_database);
2.21 frystyk 777:
778: /* Make title name without the .src */
779: {
780: char *srcstr;
781: StrAllocCopy(basetitle, wais_database);
782: if ((srcstr = strstr(basetitle, ".src")) != NULL)
783: *srcstr = '\0';
784: }
2.6 timbl 785:
2.21 frystyk 786: /* This below fixed size stuff is terrible */
2.1 timbl 787: request_message = (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char));
788: response_message = (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char));
789:
790: /* If keyword search is performed but there are no keywords,
791: ** the user has followed a link to the index itself. It would be
792: ** appropriate at this point to send him the .SRC file - how?
793: */
794:
795: if (key && !*key) { /* I N D E X */
796:
2.7 timbl 797: #ifdef CACHE_FILE_PREFIX
798: char filename[256];
799: FILE * fp;
800: #endif
2.13 timbl 801: HTStructured * target = HTML_new(request, NULL,
802: WWW_HTML, format_out, sink);
2.1 timbl 803:
2.8 timbl 804: {
2.27 frystyk 805: START(HTML_HTML);
806: START(HTML_HEAD);
2.8 timbl 807: START(HTML_TITLE);
2.21 frystyk 808: PUTS(basetitle);
809: PUTS(" Index");
2.8 timbl 810: END(HTML_TITLE);
2.27 frystyk 811: END(HTML_HEAD);
2.8 timbl 812:
2.27 frystyk 813: START(HTML_BODY);
2.8 timbl 814: START(HTML_H1);
2.21 frystyk 815: PUTS("WAIS Index: ");
816: PUTS(basetitle);
2.8 timbl 817: END(HTML_H1);
818:
819: }
2.21 frystyk 820: START(HTML_ISINDEX);
821:
822: /* If we have seen a source file for this database, use that: */
2.2 timbl 823:
2.7 timbl 824: #ifdef CACHE_FILE_PREFIX
2.8 timbl 825: sprintf(filename, "%sWSRC-%s:%s:%.100s.txt",
2.7 timbl 826: CACHE_FILE_PREFIX,
2.1 timbl 827: server_name, service, www_database);
828:
829: fp = fopen(filename, "r"); /* Have we found this already? */
2.31 frystyk 830: if (PROT_TRACE) fprintf(TDEST,
2.21 frystyk 831: "HTLoadWAIS.. Description of server %s %s.\n",
2.1 timbl 832: filename,
833: fp ? "exists already" : "does NOT exist!");
2.2 timbl 834:
2.1 timbl 835: if (fp) {
2.24 frystyk 836: int c;
837: START(HTML_PRE); /* Preformatted description */
838: while((c=getc(fp)) != EOF)
839: PUTC(c); /* Transfer file */
2.7 timbl 840: END(HTML_PRE);
2.1 timbl 841: fclose(fp);
2.8 timbl 842: }
2.2 timbl 843: #endif
2.27 frystyk 844: END(HTML_BODY);
845: END(HTML_HTML);
2.2 timbl 846: FREE_TARGET;
2.1 timbl 847:
848: } else if (key) { /* S E A R C H */
849: char *p;
2.2 timbl 850: HTStructured * target;
851:
2.1 timbl 852: strncpy(keywords, key, MAX_KEYWORDS_LENGTH);
2.18 luotonen 853: while ((p = strchr(keywords,'+'))) *p = ' ';
2.1 timbl 854:
855: /* Send advance title to get something fast to the other end */
856:
2.13 timbl 857: target = HTML_new(request, NULL, WWW_HTML, format_out, sink);
2.2 timbl 858:
2.23 frystyk 859: START(HTML_HTML);
860: START(HTML_HEAD);
2.2 timbl 861: START(HTML_TITLE);
862: PUTS(keywords);
2.21 frystyk 863: PUTS(" in ");
864: PUTS(basetitle);
2.2 timbl 865: END(HTML_TITLE);
2.23 frystyk 866: END(HTML_HEAD);
2.2 timbl 867:
2.23 frystyk 868: START(HTML_BODY);
2.2 timbl 869: START(HTML_H1);
2.21 frystyk 870: PUTS("WAIS Search of \"");
2.2 timbl 871: PUTS(keywords);
2.21 frystyk 872: PUTS("\" in ");
873: PUTS(basetitle);
2.2 timbl 874: END(HTML_H1);
2.1 timbl 875:
2.21 frystyk 876: START(HTML_ISINDEX);
877:
2.1 timbl 878: request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */
2.25 frystyk 879: if (PROT_TRACE)
2.31 frystyk 880: fprintf(TDEST, "HTLoadWAIS.. Search for `%s' in `%s'\n",
2.24 frystyk 881: keywords, wais_database);
2.21 frystyk 882: if(generate_search_apdu(request_message + HEADER_LENGTH,
2.1 timbl 883: &request_buffer_length,
2.21 frystyk 884: keywords, wais_database, NULL,
885: HTMaxWAISLines) == NULL) {
2.25 frystyk 886: if (PROT_TRACE)
2.31 frystyk 887: fprintf(TDEST, "WAIS Search. Too many lines in response\n");
888: HTErrorAdd(request, ERR_WARN, NO, HTERR_WAIS_OVERFLOW,
2.21 frystyk 889: NULL, 0, "HTLoadWAIS");
890: }
2.1 timbl 891:
892: if(!interpret_message(request_message,
893: MAX_MESSAGE_LEN - request_buffer_length,
894: response_message,
895: MAX_MESSAGE_LEN,
896: connection,
897: false /* true verbose */
898: )) {
2.25 frystyk 899: if (PROT_TRACE)
2.31 frystyk 900: fprintf(TDEST, "WAIS Search. Too many lines in response\n");
901: HTErrorAdd(request, ERR_WARN, NO, HTERR_WAIS_OVERFLOW,
2.21 frystyk 902: NULL, 0, "HTLoadWAIS");
2.1 timbl 903: } else { /* returned message ok */
904: SearchResponseAPDU *query_response = 0;
905: readSearchResponseAPDU(&query_response,
906: response_message + HEADER_LENGTH);
2.2 timbl 907: display_search_response(target,
908: query_response, wais_database, keywords);
2.1 timbl 909: if (query_response->DatabaseDiagnosticRecords)
910: freeWAISSearchResponse(
911: query_response->DatabaseDiagnosticRecords);
912: freeSearchResponseAPDU( query_response);
913: } /* returned message not too large */
914:
2.23 frystyk 915: END(HTML_BODY);
916: END(HTML_HTML);
2.2 timbl 917: FREE_TARGET;
918:
2.1 timbl 919: } else { /* D O C U M E N T F E T C H */
920:
2.9 timbl 921: boolean binary; /* how to transfer stuff coming over */
2.2 timbl 922: HTStream * target;
2.1 timbl 923: long count;
924: any doc_chunk;
925: any * docid = &doc_chunk;
2.25 frystyk 926: if (PROT_TRACE)
2.31 frystyk 927: fprintf(TDEST,
2.24 frystyk 928: "HTLoadWAIS.. Retrieve document `%s'\n............ type `%s' length %ld\n", docname, doctype, document_length);
2.2 timbl 929:
2.32 frystyk 930: HTAnchor_setFormat(request->anchor,
2.2 timbl 931: !strcmp(doctype, "WSRC") ? HTAtom_for("application/x-wais-source") :
2.23 frystyk 932: !strcmp(doctype, "TEXT") ? WWW_UNKNOWN :
2.32 frystyk 933: !strcmp(doctype, "HTML") ? WWW_HTML:
934: !strcmp(doctype, "GIF") ? WWW_GIF:
935: HTAtom_for("application/octet-stream"));
2.9 timbl 936: binary =
937: 0 != strcmp(doctype, "WSRC") &&
938: 0 != strcmp(doctype, "TEXT") &&
939: 0 != strcmp(doctype, "HTML") ;
940:
2.23 frystyk 941: /* Guess on TEXT format as it might be HTML */
2.32 frystyk 942: if ((target = HTStreamStack(HTAnchor_format(request->anchor),
2.31 frystyk 943: request->output_format,
2.28 frystyk 944: request->output_stream,
945: request, YES)) == NULL) {
2.21 frystyk 946: status = -1;
947: goto cleanup;
948: }
2.2 timbl 949:
2.21 frystyk 950: /* Decode hex or litteral format for document ID */
2.1 timbl 951: WAIS_from_WWW(docid, docname);
952:
2.21 frystyk 953: /* Loop over slices of the document */
954: for (count = 0; count * CHARS_PER_PAGE < document_length; count++) {
955: char *type = s_strdup(doctype);
956: request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */
2.31 frystyk 957: if (PROT_TRACE) fprintf(TDEST, "HTLoadWAIS.. Slice number %ld\n",
2.21 frystyk 958: count);
959: if (generate_retrieval_apdu(request_message + HEADER_LENGTH,
960: &request_buffer_length,
961: docid, CT_byte,
962: count * CHARS_PER_PAGE,
963: HTMIN((count + 1) * CHARS_PER_PAGE,
964: document_length),
965: type,
966: wais_database) == 0) {
2.31 frystyk 967: HTErrorAdd(request, ERR_WARN, NO, HTERR_WAIS_OVERFLOW,
2.21 frystyk 968: NULL, 0, "HTLoadWAIS");
969: }
970: FREE(type);
971:
972: /* Actually do the transaction given by request_message */
973: if (interpret_message(request_message,
974: MAX_MESSAGE_LEN - request_buffer_length,
975: response_message,
976: MAX_MESSAGE_LEN,
977: connection,
978: false /* true verbose */
979: ) == 0) {
2.31 frystyk 980: HTErrorAdd(request, ERR_WARN, NO, HTERR_WAIS_OVERFLOW,
2.21 frystyk 981: NULL, 0, "HTLoadWAIS");
982: }
983:
984: /* Parse the result which came back into memory. */
985: readSearchResponseAPDU(&retrieval_response,
986: response_message + HEADER_LENGTH);
987: {
988: WAISSearchResponse *searchres = (WAISSearchResponse *) retrieval_response->DatabaseDiagnosticRecords;
989: if (!searchres->Text) {
990: if (searchres->Diagnostics && *searchres->Diagnostics &&
991: (*searchres->Diagnostics)->ADDINFO) {
992: char *errmsg = (*searchres->Diagnostics)->ADDINFO;
2.31 frystyk 993: HTErrorAdd(request, ERR_WARN, NO, HTERR_WAIS_MODULE,
2.21 frystyk 994: (void *) errmsg, (int) strlen(errmsg),
995: "HTLoadWAIS");
996: } else {
2.31 frystyk 997: HTErrorAdd(request, ERR_WARN, NO, HTERR_WAIS_MODULE,
2.21 frystyk 998: NULL, 0, "HTLoadWAIS");
999: }
2.23 frystyk 1000: (*target->isa->_free)(target);
2.26 frystyk 1001: request->output_stream = NULL;
2.22 frystyk 1002: free (docid->bytes);
1003: freeWAISSearchResponse(retrieval_response->DatabaseDiagnosticRecords);
1004: freeSearchResponseAPDU( retrieval_response);
1005: goto cleanup;
2.21 frystyk 1006: } else {
1007: output_text_record(target, *searchres->Text,
1008: false, binary);
2.22 frystyk 1009: freeWAISSearchResponse( retrieval_response->DatabaseDiagnosticRecords);
1010: freeSearchResponseAPDU( retrieval_response);
2.21 frystyk 1011: } /* If text existed */
1012: }
1013:
1014: } /* Loop over slices */
2.1 timbl 1015:
2.23 frystyk 1016: (*target->isa->_free)(target);
2.26 frystyk 1017: request->output_stream = NULL;
2.1 timbl 1018: free (docid->bytes);
1019: } /* If document rather than search */
2.20 frystyk 1020: status = HT_LOADED;
2.2 timbl 1021:
2.20 frystyk 1022: cleanup:
2.34 ! frystyk 1023: if (connection) close_connection_to_server(connection);
2.1 timbl 1024: if (wais_database) free(wais_database);
2.20 frystyk 1025: if (request_message) s_free(request_message);
1026: if (response_message) s_free(response_message);
1027: FREE(names);
2.21 frystyk 1028: FREE(basetitle);
2.20 frystyk 1029: if (status < 0) {
1030: char *unescaped = NULL;
1031: StrAllocCopy(unescaped, arg);
1032: HTUnEscape(unescaped);
1033: HTErrorAdd(request, ERR_FATAL, NO, HTERR_INTERNAL, (void *) unescaped,
1034: (int) strlen(unescaped), "HTLoadWAIS");
1035: free(unescaped);
1036: }
1037: return status;
2.1 timbl 1038: }
1039:
2.28 frystyk 1040: GLOBALDEF PUBLIC HTProtocol HTWAIS = {
1041: "wais", SOC_BLOCK, HTLoadWAIS, NULL, NULL
1042: };
2.1 timbl 1043:
1044:
Webmaster