Annotation of libwww/Library/src/HTWAIS.c, revision 2.41
2.29 frystyk 1: /* HTWAIS.c
2: ** WORLDWIDEWEB - WIDE AREA INFORMAION SERVER ACCESS
3: **
2.33 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.29 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This module allows a WWW server or client to read data from a
2.29 frystyk 8: ** remote WAIS server, and provide that data to a WWW client in
9: ** hypertext form. Source files, once retrieved, are stored and used
10: ** to provide information about the index when that is acessed.
2.1 timbl 11: **
12: ** Authors
13: ** BK Brewster Kahle, Thinking Machines, <Brewster@think.com>
2.35 frystyk 14: ** TBL Tim Berners-Lee, CERN <timbl@w3.org>
2.1 timbl 15: **
16: ** History
17: ** Sep 91 TBL adapted shell-ui.c (BK) with HTRetrieve.c from WWW.
18: ** Feb 91 TBL Generated HTML cleaned up a bit (quotes, escaping)
19: ** Refers to lists of sources.
2.2 timbl 20: ** Mar 93 TBL Lib 2.0 compatible module made.
2.34 frystyk 21: ** May 95 CHJ modified for freeWAIS-0.5
2.1 timbl 22: **
23: ** Bugs
24: ** Uses C stream i/o to read and write sockets, which won't work
25: ** on VMS TCP systems.
26: **
27: ** Should cache connections.
28: **
29: ** ANSI C only as written
30: **
2.11 secret 31: ** Bugs fixed
32: ** NT Nathan Torkington (Nathan.Torkington@vuw.ac.nz)
33: **
2.1 timbl 34: ** WAIS comments:
35: **
36: ** 1. Separate directories for different system's .o would help
37: ** 2. Document ids are rather long!
38: **
39: ** WWW Address mapping convention:
40: **
41: ** /servername/database/type/length/document-id
42: **
43: ** /servername/database?word+word+word
44: */
45: /* WIDE AREA INFORMATION SERVER SOFTWARE:
46: No guarantees or restrictions. See the readme file for the full standard
47: disclaimer.
48:
49: Brewster@think.com
50: */
51:
52:
2.8 timbl 53: #define DIRECTORY "/cnidr.org:210/directory-of-servers"
54: /* define DIRECTORY "/quake.think.com:210/directory-of-servers" */
2.1 timbl 55:
56: #define BIG 1024 /* identifier size limit @@@@@ */
57:
2.2 timbl 58: /* From WAIS
59: ** ---------
2.1 timbl 60: */
61:
62: #include <ui.h>
2.31 frystyk 63: #include <sockets.h>
2.1 timbl 64:
65: #define MAX_MESSAGE_LEN 100000
66: #define CHARS_PER_PAGE 10000 /* number of chars retrieved in each request */
67: #define WAISSEARCH_DATE "Fri Jul 19 1991"
68:
69:
2.2 timbl 70: /* FROM WWW
71: ** --------
2.1 timbl 72: */
73: #define BUFFER_SIZE 4096 /* Arbitrary size for efficiency */
74:
2.2 timbl 75: #define HEX_ESCAPE '%'
2.24 frystyk 76:
2.31 frystyk 77: /* Library include files */
78: #include "tcp.h"
2.1 timbl 79: #include "HTUtils.h"
2.31 frystyk 80: #include "HTString.h"
2.1 timbl 81: #include "HTParse.h"
2.38 frystyk 82: #include "HTReqMan.h"
2.21 frystyk 83: #include "HTError.h"
2.37 frystyk 84: #include "HTMLGen.h"
2.25 frystyk 85: #include "HTParse.h"
86: #include "HTFormat.h"
87: #include "HTTCP.h"
2.2 timbl 88:
2.1 timbl 89: extern FILE * logfile; /* Log file output */
90:
2.27 frystyk 91: PUBLIC int HTMaxWAISLines = 200;/* Max number of entries from a search */
2.21 frystyk 92:
2.1 timbl 93: PRIVATE BOOL as_gate; /* Client is using us as gateway */
94:
95: PRIVATE char line[2048]; /* For building strings to display */
96: /* Must be able to take id */
2.2 timbl 97:
2.25 frystyk 98: /* Hypertext object building machinery */
2.2 timbl 99: #define PUTC(c) (*target->isa->put_character)(target, c)
100: #define PUTS(s) (*target->isa->put_string)(target, s)
101: #define START(e) (*target->isa->start_element)(target, e, 0, 0)
102: #define END(e) (*target->isa->end_element)(target, e)
2.23 frystyk 103: #define FREE_TARGET (*target->isa->_free)(target)
2.2 timbl 104:
105: struct _HTStructured {
106: CONST HTStructuredClass * isa;
107: /* ... */
108: };
109:
110: struct _HTStream {
111: CONST HTStreamClass * isa;
112: /* ... */
113: };
114:
115:
2.1 timbl 116: /* showDiags
117: */
118: /* modified from Jonny G's version in ui/question.c */
119:
2.2 timbl 120: void showDiags ARGS2(
121: HTStream *, target,
122: diagnosticRecord **, d)
2.1 timbl 123: {
124: long i;
125:
126: for (i = 0; d[i] != NULL; i++) {
127: if (d[i]->ADDINFO != NULL) {
128: PUTS("Diagnostic code is ");
129: PUTS(d[i]->DIAG);
130: PUTC(' ');
131: PUTS(d[i]->ADDINFO);
132: PUTC('\n'); ;
133: }
134: }
135: }
136:
137: /* Matrix of allowed characters in filenames
138: ** -----------------------------------------
139: */
140:
141: PRIVATE BOOL acceptable[256];
142: PRIVATE BOOL acceptable_inited = NO;
143:
144: PRIVATE void init_acceptable NOARGS
145: {
146: unsigned int i;
147: char * good =
148: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$";
149: for(i=0; i<256; i++) acceptable[i] = NO;
150: for(;*good; good++) acceptable[(unsigned int)*good] = YES;
151: acceptable_inited = YES;
152: }
153:
154: /* Transform file identifier into WWW address
155: ** ------------------------------------------
156: **
157: **
158: ** On exit,
159: ** returns nil if error
160: ** pointer to malloced string (must be freed) if ok
161: */
162: char * WWW_from_archie ARGS1 (char *, file)
163: {
164: char * end;
165: char * result;
166: char * colon;
167: for(end=file; *end > ' '; end++); /* assumes ASCII encoding*/
168: result = (char *)malloc(10 + (end-file));
169: if (!result) return result; /* Malloc error */
170: strcpy(result, "file://");
171: strncat(result, file, end-file);
172: colon = strchr(result+7, ':'); /* Expect colon after host */
173: if (colon) {
174: for(; colon[0]; colon[0]=colon[1], colon++); /* move down */
175: }
176: return result;
177: } /* WWW_from_archie */
178:
2.2 timbl 179: /* Transform document identifier into URL
180: ** --------------------------------------
2.1 timbl 181: **
182: ** Bugs: A static buffer of finite size is used!
183: ** The format of the docid MUST be good!
184: **
185: ** On exit,
186: ** returns nil if error
187: ** pointer to malloced string (must be freed) if ok
188: */
2.9 timbl 189: PRIVATE char hex [17] = "0123456789ABCDEF";
2.41 ! frystyk 190: extern char from_hex (char a); /* In HTWSRC @@ */
2.2 timbl 191:
192: PRIVATE char * WWW_from_WAIS ARGS1(any *, docid)
193:
2.1 timbl 194: {
2.21 frystyk 195: static unsigned char buf[BIG];
196: char num[10];
197: unsigned char * q = buf;
2.1 timbl 198: char * p = (docid->bytes);
199: int i, l;
2.25 frystyk 200: if (PROT_TRACE) {
2.1 timbl 201: char *p;
2.39 frystyk 202: TTYPrint(TDEST, "HTLoadWAIS.. id (%d bytes) is ", (int)docid->size);
2.1 timbl 203: for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
204: if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
2.39 frystyk 205: TTYPrint(TDEST, "%c", *p);
2.1 timbl 206: else
2.39 frystyk 207: TTYPrint(TDEST, "<%x>", (unsigned)*p);
2.1 timbl 208: }
2.39 frystyk 209: TTYPrint(TDEST, "\n");
2.1 timbl 210: }
211: for (p=docid->bytes; (p<docid->bytes+docid->size) && (q<&buf[BIG]);) {
2.25 frystyk 212: if (PROT_TRACE)
2.39 frystyk 213: TTYPrint(TDEST, "............ Record type %d, length %d\n",
2.24 frystyk 214: (unsigned char) p[0], (unsigned char) p[1]);
2.21 frystyk 215: sprintf(num, "%d", (int)*p);
216: memcpy(q, num, strlen(num));
217: q += strlen(num);
218: p++;
219: *q++ = '='; /* Separate */
220: l = (int)((unsigned char)*p);
221: p++;
222: if (l > 127)
223: {
224: l = (l - 128) * 128;
225: l = l + (int)((unsigned char)*p);
226: p++;
227: }
228:
229: for (i = 0; i < l; i++, p++)
230: {
231: if (!acceptable[(unsigned char)*p])
232: {
233: *q++ = HEX_ESCAPE;
234: *q++ = hex[((unsigned char)*p) >> 4];
235: *q++ = hex[((unsigned char)*p) & 15];
236: }
237: else *q++ = (unsigned char)*p;
238: }
239: *q++= ';'; /* Terminate field */
240: #ifdef OLD_CODE
2.1 timbl 241: if (*p>10) {
2.25 frystyk 242: if (PROT_TRACE)
2.39 frystyk 243: TTYPrint(TDEST, "WAIS........ DOCID record type of %d!\n", *p);
2.1 timbl 244: return 0;
245: }
2.2 timbl 246: { /* Bug fix -- allow any byte value 15 Apr 93 */
247: unsigned int i = (unsigned) *p++;
248:
249: if (i > 99) {
250: *q++ = (i/100) + '0';
251: i = i % 100;
252: }
253: if (i > 9) {
254: *q++ = (i/10) + '0';
255: i = i % 10;
256: }
257: *q++ = i + '0'; /* Record type */
258: }
2.1 timbl 259: *q++ = '='; /* Separate */
260: l = *p++; /* Length */
261: for(i=0; i<l; i++, p++){
2.18 luotonen 262: if (!acceptable[(int)*p]) {
2.1 timbl 263: *q++ = HEX_ESCAPE; /* Means hex commming */
264: *q++ = hex[(*p) >> 4];
265: *q++ = hex[(*p) & 15];
266: }
267: else *q++ = *p;
268: }
269: *q++= ';'; /* Terminate field */
2.21 frystyk 270: #endif /* OLD_CODE */
2.1 timbl 271: }
272: *q++ = 0; /* Terminate string */
2.39 frystyk 273: if (PROT_TRACE) TTYPrint(TDEST, "HTLoadWAIS.. WWW form of id: %s\n", buf);
2.1 timbl 274: {
2.24 frystyk 275: char *result;
276: if ((result = (char *) malloc((int) strlen(buf)+1)) == NULL)
277: outofmem(__FILE__, "WWW_from_WAIS");
2.1 timbl 278: strcpy(result, buf);
279: return result;
280: }
281: } /* WWW_from_WAIS */
282:
283:
2.2 timbl 284: /* Transform URL into WAIS document identifier
285: ** -------------------------------------------
2.1 timbl 286: **
287: ** On entry,
288: ** docname points to valid name produced originally by
289: ** WWW_from_WAIS
290: ** On exit,
291: ** docid->size is valid
292: ** docid->bytes is malloced and must later be freed.
293: */
294: PRIVATE any * WAIS_from_WWW ARGS2 (any *, docid, char *, docname)
295: {
296: char *z; /* Output pointer */
297: char *sor; /* Start of record - points to size field. */
298: char *p; /* Input pointer */
299: char *q; /* Poisition of "=" */
300: char *s; /* Position of semicolon */
301: int n; /* size */
2.25 frystyk 302: if (PROT_TRACE)
2.39 frystyk 303: TTYPrint(TDEST, "HTLoadWAIS.. WWW id (to become WAIS id): %s\n",
2.24 frystyk 304: docname);
2.1 timbl 305: for(n=0, p = docname; *p; p++) { /* Count sizes of strings */
306: n++;
307: if (*p == ';') n--; /* Not converted */
308: else if (*p == HEX_ESCAPE) n=n-2; /* Save two bytes */
309: docid->size = n;
310: }
311:
2.21 frystyk 312: docid->bytes = (char *) malloc(docid->size+32); /* result record */
2.1 timbl 313: z = docid->bytes;
314:
2.21 frystyk 315: for(p = docname; *p; ) {
316: q = strchr(p, '=');
317: if (!q)
318: return 0;
319: *q = '\0';
320: *z++ = atoi(p);
321: *q = '=';
322: s = strchr(q, ';'); /* (Check only) */
323: if (!s)
324: return 0; /* Bad! No ';'; */
325: sor = z; /* Remember where the size field was */
326: z++; /* Skip record size for now */
327:
328: {
329: int len;
330: int tmp;
331: for(p=q+1; *p!=';' ; ) {
332: if (*p == HEX_ESCAPE) {
333: char c;
334: unsigned int b;
335: p++;
336: c = *p++;
337: b = from_hex(c);
338: c = *p++;
339: if (!c)
340: break; /* Odd number of chars! */
341: *z++ = (b<<4) + from_hex(c);
342: } else {
343: *z++ = *p++; /* Record */
344: }
345: }
346: len = (z-sor-1);
347:
348: z = sor;
349: if (len > 127) {
350: tmp = (len / 128);
351: len = len - (tmp * 128);
352: tmp = tmp + 128;
353: *z++ = (char)tmp;
354: *z = (char)len;
355: } else {
356: *z = (char)len;
357: }
358: z++;
359: }
360:
361: for(p=q+1; *p!=';' ; ) {
362: if (*p == HEX_ESCAPE) {
363: char c;
364: unsigned int b;
365: p++;
366: c = *p++;
367: b = from_hex(c);
368: c = *p++;
369: if (!c)
370: break; /* Odd number of chars! */
371: *z++ = (b<<4) + from_hex(c);
372: } else {
373: *z++ = *p++; /* Record */
374: }
375: }
376: p++; /* After semicolon: start of next record */
377: }
378:
379: #ifdef OLD_CODE
2.1 timbl 380: for(p = docname; *p; ) { /* Convert of strings */
2.2 timbl 381: /* Record type */
382:
383: *z = 0; /* Initialize record type */
384: while (*p >= '0' && *p <= '9') {
385: *z = *z*10 + (*p++ - '0'); /* Decode decimal record type */
386: }
387: z++;
388: if (*p != '=') return 0;
389: q = p;
390:
391: /* *z++ = *p++ - '0';
2.1 timbl 392: q = strchr(p , '=');
393: if (!q) return 0;
2.2 timbl 394: */
2.1 timbl 395: s = strchr(q, ';'); /* (Check only) */
396: if (!s) return 0; /* Bad! No ';'; */
397: sor = z; /* Remember where the size field was */
398: z++; /* Skip record size for now */
399: for(p=q+1; *p!=';' ; ) {
400: if (*p == HEX_ESCAPE) {
401: char c;
402: unsigned int b;
403: p++;
404: c = *p++;
405: b = from_hex(c);
406: c = *p++;
407: if (!c) break; /* Odd number of chars! */
408: *z++ = (b<<4) + from_hex(c);
409: } else {
410: *z++ = *p++; /* Record */
411: }
412: }
413: *sor = (z-sor-1); /* Fill in size -- not counting size itself */
414: p++; /* After semicolon: start of next record */
415: }
2.21 frystyk 416: #endif /* OLD_CODE */
2.25 frystyk 417: if (PROT_TRACE) {
2.1 timbl 418: char *p;
2.39 frystyk 419: TTYPrint(TDEST, "WAIS........ id (%d bytes) is ", (int)docid->size);
2.1 timbl 420: for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
421: if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
2.39 frystyk 422: TTYPrint(TDEST, "%c", *p);
2.1 timbl 423: else
2.39 frystyk 424: TTYPrint(TDEST, "<%x>", (unsigned)*p);
2.1 timbl 425: }
2.39 frystyk 426: TTYPrint(TDEST, "\n");
2.1 timbl 427: }
428: return docid; /* Ok */
429:
430: } /* WAIS_from_WWW */
431:
432:
433: /* Send a plain text record to the client output_text_record()
434: ** --------------------------------------
435: */
2.2 timbl 436:
2.9 timbl 437: PRIVATE void output_text_record ARGS4(
2.2 timbl 438: HTStream *, target,
439: WAISDocumentText *, record,
2.9 timbl 440: boolean, quote_string_quotes,
441: boolean, binary)
2.1 timbl 442: {
443: long count;
2.9 timbl 444: if (binary) {
445: (*target->isa->put_block)(target,
446: record->DocumentText->bytes,
447: record->DocumentText->size);
448: return;
449: }
450:
2.1 timbl 451: for(count = 0; count < record->DocumentText->size; count++){
452: long ch = (unsigned char)record->DocumentText->bytes[count];
2.2 timbl 453: if (ch == 27) { /* What is this in for? Tim */
2.1 timbl 454:
455: /* then we have an escape code */
456: /* if the next letter is '(' or ')', then ignore two letters */
457: if('(' == record->DocumentText->bytes[count + 1] ||
458: ')' == record->DocumentText->bytes[count + 1])
459: count += 1; /* it is a term marker */
460: else count += 4; /* it is a paragraph marker */
461: } else if (ch == '\n' || ch == '\r') {
2.2 timbl 462: PUTC('\n');
2.1 timbl 463: } else if ((ch=='\t') || isprint(ch)){
2.2 timbl 464: PUTC(ch);
2.1 timbl 465: }
466: }
467: } /* output text record */
468:
469:
2.2 timbl 470:
2.1 timbl 471: /* Format A Search response for the client display_search_response
472: ** ---------------------------------------
473: */
474: /* modified from tracy shen's version in wutil.c
475: * displays either a text record or a set of headlines.
476: */
477: void
2.2 timbl 478: display_search_response ARGS4(
479: HTStructured *, target,
2.1 timbl 480: SearchResponseAPDU *, response,
481: char *, database,
482: char *, keywords)
483: {
484: WAISSearchResponse *info;
485: long i, k;
486:
487: BOOL archie = strstr(database, "archie")!=0; /* Specical handling */
488:
2.39 frystyk 489: if (PROT_TRACE) TTYPrint(TDEST, "WAIS........ Displaying search response\n");
2.1 timbl 490: sprintf(line,
2.21 frystyk 491: "Index %s contains the following %d item%s relevant to '%s'.\n",
492: database,
493: (int)(response->NumberOfRecordsReturned),
494: response->NumberOfRecordsReturned ==1 ? "" : "s",
495: keywords);
2.2 timbl 496: PUTS(line);
497: PUTS("The first figure for each entry is its relative score, ");
498: PUTS("the second the number of lines in the item.");
499: START(HTML_MENU);
500:
2.1 timbl 501: if ( response->DatabaseDiagnosticRecords != 0 ) {
502: info = (WAISSearchResponse *)response->DatabaseDiagnosticRecords;
503: i =0;
504:
505: if (info->Diagnostics != NULL)
2.2 timbl 506: showDiags((HTStream*)target, info->Diagnostics);
2.1 timbl 507:
508: if ( info->DocHeaders != 0 ) {
509: for (k=0; info->DocHeaders[k] != 0; k++ ) {
510: WAISDocumentHeader* head = info->DocHeaders[k];
511: char * headline = trim_junk(head->Headline);
512: any * docid = head->DocumentID;
513: char * docname; /* printable version of docid */
514: i++;
515:
516: /* Make a printable string out of the document id.
517: */
2.25 frystyk 518: if (PROT_TRACE)
2.39 frystyk 519: TTYPrint(TDEST, "HTWAIS: %2ld: Score: %4ld, lines:%4ld '%s'\n", i,
2.24 frystyk 520: (long int)(info->DocHeaders[k]->Score),
521: (long int)(info->DocHeaders[k]->Lines),
522: headline);
2.1 timbl 523:
2.2 timbl 524: START(HTML_LI);
525: sprintf(line, "%4ld %4ld ",
526: head->Score,
527: head->Lines);
2.23 frystyk 528: PUTS(line);
2.2 timbl 529:
2.1 timbl 530: if (archie) {
531: char * www_name = WWW_from_archie(headline);
532: if (www_name) {
2.2 timbl 533: HTStartAnchor(target, NULL, www_name);
2.1 timbl 534: PUTS(headline);
2.2 timbl 535:
536: END(HTML_A);
2.1 timbl 537: free(www_name);
538: } else {
2.2 timbl 539: PUTS(headline);
540: PUTS(" (bad file name)");
2.1 timbl 541: }
542: } else { /* Not archie */
543: docname = WWW_from_WAIS(docid);
544: if (docname) {
2.6 timbl 545: char * dbname = HTEscape(database, URL_XPALPHAS);
2.21 frystyk 546: char types_array[1000]; /* bad */
547: char *type_escaped;
548: types_array[0] = 0;
549: if (head->Types) {
550: int i;
551: for (i = 0; head->Types[i]; i++)
552: {
553: if (i)
554: strcat (types_array, ",");
555:
556: type_escaped = HTEscape (head->Types[i], URL_XALPHAS);
557: strcat (types_array, type_escaped);
558: free (type_escaped);
559: }
2.25 frystyk 560: if (PROT_TRACE)
2.39 frystyk 561: TTYPrint(TDEST, "WAIS........ Types_array `%s\'\n",
2.24 frystyk 562: types_array);
2.21 frystyk 563: } else {
564: strcat (types_array, "TEXT");
565: }
566: sprintf(line, "%s/%s/%d/%s",
567: dbname,
568: types_array,
569: (int)(head->DocumentLength),
570: docname);
571: #ifdef OLD_CODE
2.1 timbl 572: sprintf(line, "%s/%s/%d/%s", /* W3 address */
573: dbname,
574: head->Types ? head->Types[0] : "TEXT",
2.2 timbl 575: (int)(head->DocumentLength),
2.1 timbl 576: docname);
2.21 frystyk 577: #endif /* OLD_CODE */
2.11 secret 578: HTStartAnchor(target, NULL, ( (head->Types)
579: && (!strcmp(head->Types[0], "URL"))) ?
580: headline : line); /* NT, Sep 93 */
2.2 timbl 581: PUTS(headline);
582: END(HTML_A);
2.1 timbl 583: free(dbname);
584: free(docname);
585: } else {
2.2 timbl 586: PUTS("(bad doc id)");
2.1 timbl 587: }
588: }
589: } /* next document header */
590: } /* if there were any document headers */
591:
592: if ( info->ShortHeaders != 0 ) {
593: k =0;
594: while (info->ShortHeaders[k] != 0 ) {
595: i++;
2.2 timbl 596: PUTS( "(Short Header record, can't display)");
2.1 timbl 597: }
598: }
599: if ( info->LongHeaders != 0 ) {
600: k =0;
601: while (info->LongHeaders[k] != 0) {
602: i++;
603: PUTS( "\nLong Header record, can't display\n");
604: }
605: }
606: if ( info->Text != 0 ) {
607: k =0;
608: while (info->Text[k] != 0) {
609: i++;
610: PUTS( "\nText record\n");
2.9 timbl 611: output_text_record((HTStream*)target, info->Text[k++], false, false);
2.1 timbl 612: }
613: }
614: if ( info->Headlines != 0 ) {
615: k =0;
616: while (info->Headlines[k] != 0) {
617: i++;
618: PUTS( "\nHeadline record, can't display\n");
619: /* dsply_headline_record( info->Headlines[k++]); */
620: }
621: }
622: if ( info->Codes != 0 ) {
623: k =0;
624: while (info->Codes[k] != 0) {
625: i++;
626: PUTS( "\nCode record, can't display\n");
627: /* dsply_code_record( info->Codes[k++]); */
628: }
629: }
630: } /* Loop: display user info */
2.2 timbl 631: END(HTML_MENU);
2.1 timbl 632: PUTC('\n'); ;
633: }
634:
635:
636:
2.2 timbl 637:
2.20 frystyk 638: /* Load Document from WAIS Server HTLoadWAIS()
639: ** ------------------------------
2.2 timbl 640: **
2.20 frystyk 641: ** On entry,
642: ** request This is the request structure
643: ** On exit,
644: ** returns <0 Error has occured
645: ** HT_LOADED OK
2.1 timbl 646: */
2.38 frystyk 647: PUBLIC int HTLoadWAIS ARGS3(SOCKET, soc, HTRequest *, request, SockOps, ops)
2.1 timbl 648:
2.21 frystyk 649: #define MAX_KEYWORDS_LENGTH 4000
2.1 timbl 650: #define MAX_SERVER_LENGTH 1000
651: #define MAX_DATABASE_LENGTH 1000
652: #define MAX_SERVICE_LENGTH 1000
653:
654: {
2.13 timbl 655: CONST char * arg = HTAnchor_physical(request->anchor);
656: HTFormat format_out = request->output_format;
657: HTStream* sink = request->output_stream;
2.20 frystyk 658: #if 0
2.1 timbl 659: static CONST char * error_header =
2.7 timbl 660: "<h1>Access error</h1>\nThe following error occured in accesing a WAIS server:<P>\n";
2.20 frystyk 661: #endif
2.2 timbl 662: char * key; /* pointer to keywords in URL */
2.1 timbl 663: char* request_message = NULL; /* arbitrary message limit */
664: char* response_message = NULL; /* arbitrary message limit */
665: long request_buffer_length; /* how of the request is left */
666: SearchResponseAPDU *retrieval_response = 0;
667: char keywords[MAX_KEYWORDS_LENGTH + 1];
668: char *server_name;
2.6 timbl 669: char *wais_database = NULL; /* name of current database */
670: char *www_database; /* Same name escaped */
2.1 timbl 671: char *service;
672: char *doctype;
673: char *doclength;
674: long document_length;
675: char *docname;
676: FILE *connection = 0;
677: char * names; /* Copy of arg to be hacked up */
678: BOOL ok = NO;
2.20 frystyk 679: int status = -1;
2.21 frystyk 680: char *basetitle = NULL;
2.1 timbl 681:
2.31 frystyk 682: #if 0
2.1 timbl 683: extern FILE * connect_to_server();
2.31 frystyk 684: #endif
2.1 timbl 685:
2.27 frystyk 686: if (PROT_TRACE)
2.39 frystyk 687: TTYPrint(TDEST, "HTLoadWAIS.. Looking for `%s\'\n", arg);
2.27 frystyk 688:
2.1 timbl 689: if (!acceptable_inited) init_acceptable();
690:
691:
692: /* Decipher and check syntax of WWW address:
693: ** ----------------------------------------
694: **
695: ** First we remove the "wais:" if it was spcified. 920110
696: */
697: names = HTParse(arg, "", PARSE_HOST | PARSE_PATH | PARSE_PUNCTUATION);
2.2 timbl 698: key = strchr(names, '?');
699:
700: if (key) {
701: char * p;
702: *key++ = 0; /* Split off keywords */
703: for (p=key; *p; p++) if (*p == '+') *p = ' ';
704: HTUnEscape(key);
705: }
2.1 timbl 706: if (names[0]== '/') {
707: server_name = names+1;
2.18 luotonen 708: if ((as_gate =(*server_name == '/')))
2.1 timbl 709: server_name++; /* Accept one or two */
710: www_database = strchr(server_name,'/');
711: if (www_database) {
712: *www_database++ = 0; /* Separate database name */
713: doctype = strchr(www_database, '/');
714: if (key) ok = YES; /* Don't need doc details */
715: else if (doctype) { /* If not search parse doc details */
716: *doctype++ = 0; /* Separate rest of doc address */
717: doclength = strchr(doctype, '/');
718: if(doclength) {
719: *doclength++ = 0;
720: document_length = atol(doclength);
721: if (document_length) {
722: docname=strchr(doclength, '/');
723: if (docname) {
724: *docname++ = 0;
725: ok = YES; /* To avoid a goto! */
726: } /* if docname */
727: } /* if document_length valid */
728: } /* if doclength */
729: } else { /* no doctype? Assume index required */
730: if (!key) key = "";
731: ok = YES;
732: } /* if doctype */
733: } /* if database */
734: }
735:
2.21 frystyk 736: if (!ok) {
737: char *unescaped = NULL;
738: StrAllocCopy(unescaped, arg);
739: HTUnEscape(unescaped);
2.40 frystyk 740: HTRequest_addError(request, ERR_FATAL, NO, HTERR_BAD_REQUEST,
2.21 frystyk 741: (void *) unescaped, (int) strlen(unescaped),
742: "HTLoadWAIS");
743: free(unescaped);
744: free(names);
745: return -1;
746: }
747:
2.39 frystyk 748: if (PROT_TRACE) TTYPrint(TDEST, "HTLoadWAIS.. URL Parsed OK\n");
2.1 timbl 749:
750: service = strchr(names, ':');
751: if (service) *service++ = 0;
752: else service = "210";
753:
754: if (server_name[0] == 0)
755: connection = NULL;
756:
757: else if (!(key && !*key))
2.20 frystyk 758: if ((connection=connect_to_server(server_name,atoi(service))) == NULL) {
2.21 frystyk 759: char *host = HTParse(arg, "", PARSE_HOST);
2.25 frystyk 760: if (PROT_TRACE)
2.39 frystyk 761: TTYPrint(TDEST, "HTLoadWAIS.. Can't open connection to %s via service %s.\n",
2.24 frystyk 762: server_name, service);
2.40 frystyk 763: HTRequest_addError(request, ERR_FATAL, NO, HTERR_WAIS_NO_CONNECT,
2.20 frystyk 764: (void *) host, (int) strlen(host), "HTLoadWAIS");
765: goto cleanup;
766: }
2.1 timbl 767:
2.6 timbl 768: StrAllocCopy(wais_database,www_database);
769: HTUnEscape(wais_database);
2.21 frystyk 770:
771: /* Make title name without the .src */
772: {
773: char *srcstr;
774: StrAllocCopy(basetitle, wais_database);
775: if ((srcstr = strstr(basetitle, ".src")) != NULL)
776: *srcstr = '\0';
777: }
2.6 timbl 778:
2.21 frystyk 779: /* This below fixed size stuff is terrible */
2.1 timbl 780: request_message = (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char));
781: response_message = (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char));
782:
783: /* If keyword search is performed but there are no keywords,
784: ** the user has followed a link to the index itself. It would be
785: ** appropriate at this point to send him the .SRC file - how?
786: */
787:
788: if (key && !*key) { /* I N D E X */
789:
2.7 timbl 790: #ifdef CACHE_FILE_PREFIX
791: char filename[256];
792: FILE * fp;
793: #endif
2.37 frystyk 794: HTStructured * target = HTMLGenerator(request, NULL,
2.13 timbl 795: WWW_HTML, format_out, sink);
2.1 timbl 796:
2.8 timbl 797: {
2.27 frystyk 798: START(HTML_HTML);
799: START(HTML_HEAD);
2.8 timbl 800: START(HTML_TITLE);
2.21 frystyk 801: PUTS(basetitle);
802: PUTS(" Index");
2.8 timbl 803: END(HTML_TITLE);
2.27 frystyk 804: END(HTML_HEAD);
2.8 timbl 805:
2.27 frystyk 806: START(HTML_BODY);
2.8 timbl 807: START(HTML_H1);
2.21 frystyk 808: PUTS("WAIS Index: ");
809: PUTS(basetitle);
2.8 timbl 810: END(HTML_H1);
811:
812: }
2.21 frystyk 813: START(HTML_ISINDEX);
814:
815: /* If we have seen a source file for this database, use that: */
2.2 timbl 816:
2.7 timbl 817: #ifdef CACHE_FILE_PREFIX
2.8 timbl 818: sprintf(filename, "%sWSRC-%s:%s:%.100s.txt",
2.7 timbl 819: CACHE_FILE_PREFIX,
2.1 timbl 820: server_name, service, www_database);
821:
822: fp = fopen(filename, "r"); /* Have we found this already? */
2.39 frystyk 823: if (PROT_TRACE) TTYPrint(TDEST,
2.21 frystyk 824: "HTLoadWAIS.. Description of server %s %s.\n",
2.1 timbl 825: filename,
826: fp ? "exists already" : "does NOT exist!");
2.2 timbl 827:
2.1 timbl 828: if (fp) {
2.24 frystyk 829: int c;
830: START(HTML_PRE); /* Preformatted description */
831: while((c=getc(fp)) != EOF)
832: PUTC(c); /* Transfer file */
2.7 timbl 833: END(HTML_PRE);
2.1 timbl 834: fclose(fp);
2.8 timbl 835: }
2.2 timbl 836: #endif
2.27 frystyk 837: END(HTML_BODY);
838: END(HTML_HTML);
2.2 timbl 839: FREE_TARGET;
2.1 timbl 840:
841: } else if (key) { /* S E A R C H */
842: char *p;
2.2 timbl 843: HTStructured * target;
844:
2.1 timbl 845: strncpy(keywords, key, MAX_KEYWORDS_LENGTH);
2.18 luotonen 846: while ((p = strchr(keywords,'+'))) *p = ' ';
2.1 timbl 847:
848: /* Send advance title to get something fast to the other end */
849:
2.37 frystyk 850: target = HTMLGenerator(request, NULL, WWW_HTML, format_out, sink);
2.2 timbl 851:
2.23 frystyk 852: START(HTML_HTML);
853: START(HTML_HEAD);
2.2 timbl 854: START(HTML_TITLE);
855: PUTS(keywords);
2.21 frystyk 856: PUTS(" in ");
857: PUTS(basetitle);
2.2 timbl 858: END(HTML_TITLE);
2.23 frystyk 859: END(HTML_HEAD);
2.2 timbl 860:
2.23 frystyk 861: START(HTML_BODY);
2.2 timbl 862: START(HTML_H1);
2.21 frystyk 863: PUTS("WAIS Search of \"");
2.2 timbl 864: PUTS(keywords);
2.21 frystyk 865: PUTS("\" in ");
866: PUTS(basetitle);
2.2 timbl 867: END(HTML_H1);
2.1 timbl 868:
2.21 frystyk 869: START(HTML_ISINDEX);
870:
2.1 timbl 871: request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */
2.25 frystyk 872: if (PROT_TRACE)
2.39 frystyk 873: TTYPrint(TDEST, "HTLoadWAIS.. Search for `%s' in `%s'\n",
2.24 frystyk 874: keywords, wais_database);
2.21 frystyk 875: if(generate_search_apdu(request_message + HEADER_LENGTH,
2.1 timbl 876: &request_buffer_length,
2.21 frystyk 877: keywords, wais_database, NULL,
878: HTMaxWAISLines) == NULL) {
2.25 frystyk 879: if (PROT_TRACE)
2.39 frystyk 880: TTYPrint(TDEST, "WAIS Search. Too many lines in response\n");
2.40 frystyk 881: HTRequest_addError(request, ERR_WARN, NO, HTERR_WAIS_OVERFLOW,
2.21 frystyk 882: NULL, 0, "HTLoadWAIS");
883: }
2.1 timbl 884:
885: if(!interpret_message(request_message,
886: MAX_MESSAGE_LEN - request_buffer_length,
887: response_message,
888: MAX_MESSAGE_LEN,
889: connection,
890: false /* true verbose */
891: )) {
2.25 frystyk 892: if (PROT_TRACE)
2.39 frystyk 893: TTYPrint(TDEST, "WAIS Search. Too many lines in response\n");
2.40 frystyk 894: HTRequest_addError(request, ERR_WARN, NO, HTERR_WAIS_OVERFLOW,
2.21 frystyk 895: NULL, 0, "HTLoadWAIS");
2.1 timbl 896: } else { /* returned message ok */
897: SearchResponseAPDU *query_response = 0;
898: readSearchResponseAPDU(&query_response,
899: response_message + HEADER_LENGTH);
2.2 timbl 900: display_search_response(target,
901: query_response, wais_database, keywords);
2.1 timbl 902: if (query_response->DatabaseDiagnosticRecords)
903: freeWAISSearchResponse(
904: query_response->DatabaseDiagnosticRecords);
905: freeSearchResponseAPDU( query_response);
906: } /* returned message not too large */
907:
2.23 frystyk 908: END(HTML_BODY);
909: END(HTML_HTML);
2.2 timbl 910: FREE_TARGET;
911:
2.1 timbl 912: } else { /* D O C U M E N T F E T C H */
913:
2.9 timbl 914: boolean binary; /* how to transfer stuff coming over */
2.2 timbl 915: HTStream * target;
2.1 timbl 916: long count;
917: any doc_chunk;
918: any * docid = &doc_chunk;
2.25 frystyk 919: if (PROT_TRACE)
2.39 frystyk 920: TTYPrint(TDEST,
2.24 frystyk 921: "HTLoadWAIS.. Retrieve document `%s'\n............ type `%s' length %ld\n", docname, doctype, document_length);
2.2 timbl 922:
2.32 frystyk 923: HTAnchor_setFormat(request->anchor,
2.2 timbl 924: !strcmp(doctype, "WSRC") ? HTAtom_for("application/x-wais-source") :
2.23 frystyk 925: !strcmp(doctype, "TEXT") ? WWW_UNKNOWN :
2.32 frystyk 926: !strcmp(doctype, "HTML") ? WWW_HTML:
927: !strcmp(doctype, "GIF") ? WWW_GIF:
928: HTAtom_for("application/octet-stream"));
2.9 timbl 929: binary =
930: 0 != strcmp(doctype, "WSRC") &&
931: 0 != strcmp(doctype, "TEXT") &&
932: 0 != strcmp(doctype, "HTML") ;
933:
2.23 frystyk 934: /* Guess on TEXT format as it might be HTML */
2.32 frystyk 935: if ((target = HTStreamStack(HTAnchor_format(request->anchor),
2.31 frystyk 936: request->output_format,
2.28 frystyk 937: request->output_stream,
938: request, YES)) == NULL) {
2.21 frystyk 939: status = -1;
940: goto cleanup;
941: }
2.2 timbl 942:
2.21 frystyk 943: /* Decode hex or litteral format for document ID */
2.1 timbl 944: WAIS_from_WWW(docid, docname);
945:
2.21 frystyk 946: /* Loop over slices of the document */
947: for (count = 0; count * CHARS_PER_PAGE < document_length; count++) {
948: char *type = s_strdup(doctype);
949: request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */
2.39 frystyk 950: if (PROT_TRACE) TTYPrint(TDEST, "HTLoadWAIS.. Slice number %ld\n",
2.21 frystyk 951: count);
952: if (generate_retrieval_apdu(request_message + HEADER_LENGTH,
953: &request_buffer_length,
954: docid, CT_byte,
955: count * CHARS_PER_PAGE,
956: HTMIN((count + 1) * CHARS_PER_PAGE,
957: document_length),
958: type,
959: wais_database) == 0) {
2.40 frystyk 960: HTRequest_addError(request, ERR_WARN, NO, HTERR_WAIS_OVERFLOW,
2.21 frystyk 961: NULL, 0, "HTLoadWAIS");
962: }
963: FREE(type);
964:
965: /* Actually do the transaction given by request_message */
966: if (interpret_message(request_message,
967: MAX_MESSAGE_LEN - request_buffer_length,
968: response_message,
969: MAX_MESSAGE_LEN,
970: connection,
971: false /* true verbose */
972: ) == 0) {
2.40 frystyk 973: HTRequest_addError(request, ERR_WARN, NO, HTERR_WAIS_OVERFLOW,
2.21 frystyk 974: NULL, 0, "HTLoadWAIS");
975: }
976:
977: /* Parse the result which came back into memory. */
978: readSearchResponseAPDU(&retrieval_response,
979: response_message + HEADER_LENGTH);
980: {
981: WAISSearchResponse *searchres = (WAISSearchResponse *) retrieval_response->DatabaseDiagnosticRecords;
982: if (!searchres->Text) {
983: if (searchres->Diagnostics && *searchres->Diagnostics &&
984: (*searchres->Diagnostics)->ADDINFO) {
985: char *errmsg = (*searchres->Diagnostics)->ADDINFO;
2.40 frystyk 986: HTRequest_addError(request, ERR_WARN, NO, HTERR_WAIS_MODULE,
2.21 frystyk 987: (void *) errmsg, (int) strlen(errmsg),
988: "HTLoadWAIS");
989: } else {
2.40 frystyk 990: HTRequest_addError(request, ERR_WARN, NO, HTERR_WAIS_MODULE,
2.21 frystyk 991: NULL, 0, "HTLoadWAIS");
992: }
2.23 frystyk 993: (*target->isa->_free)(target);
2.26 frystyk 994: request->output_stream = NULL;
2.22 frystyk 995: free (docid->bytes);
996: freeWAISSearchResponse(retrieval_response->DatabaseDiagnosticRecords);
997: freeSearchResponseAPDU( retrieval_response);
998: goto cleanup;
2.21 frystyk 999: } else {
1000: output_text_record(target, *searchres->Text,
1001: false, binary);
2.22 frystyk 1002: freeWAISSearchResponse( retrieval_response->DatabaseDiagnosticRecords);
1003: freeSearchResponseAPDU( retrieval_response);
2.21 frystyk 1004: } /* If text existed */
1005: }
1006:
1007: } /* Loop over slices */
2.1 timbl 1008:
2.23 frystyk 1009: (*target->isa->_free)(target);
2.26 frystyk 1010: request->output_stream = NULL;
2.1 timbl 1011: free (docid->bytes);
1012: } /* If document rather than search */
2.20 frystyk 1013: status = HT_LOADED;
2.2 timbl 1014:
2.20 frystyk 1015: cleanup:
2.34 frystyk 1016: if (connection) close_connection_to_server(connection);
2.1 timbl 1017: if (wais_database) free(wais_database);
2.20 frystyk 1018: if (request_message) s_free(request_message);
1019: if (response_message) s_free(response_message);
1020: FREE(names);
2.21 frystyk 1021: FREE(basetitle);
2.20 frystyk 1022: if (status < 0) {
1023: char *unescaped = NULL;
1024: StrAllocCopy(unescaped, arg);
1025: HTUnEscape(unescaped);
2.40 frystyk 1026: HTRequest_addError(request, ERR_FATAL, NO, HTERR_INTERNAL, (void *) unescaped,
2.20 frystyk 1027: (int) strlen(unescaped), "HTLoadWAIS");
1028: free(unescaped);
1029: }
1030: return status;
2.1 timbl 1031: }
1032:
Webmaster