Annotation of libwww/Library/src/HTWAIS.c, revision 2.13
2.1 timbl 1: /* WorldWideWeb - Wide Area Informaion Server Access HTWAIS.c
2: ** ==================================================
3: **
4: ** This module allows a WWW server or client to read data from a
5: ** remote WAIS
6: ** server, and provide that data to a WWW client in hypertext form.
7: ** Source files, once retrieved, are stored and used to provide
8: ** information about the index when that is acessed.
9: **
10: ** Authors
11: ** BK Brewster Kahle, Thinking Machines, <Brewster@think.com>
12: ** TBL Tim Berners-Lee, CERN <timbl@info.cern.ch>
13: **
14: ** History
15: ** Sep 91 TBL adapted shell-ui.c (BK) with HTRetrieve.c from WWW.
16: ** Feb 91 TBL Generated HTML cleaned up a bit (quotes, escaping)
17: ** Refers to lists of sources.
2.2 timbl 18: ** Mar 93 TBL Lib 2.0 compatible module made.
2.1 timbl 19: **
20: ** Bugs
21: ** Uses C stream i/o to read and write sockets, which won't work
22: ** on VMS TCP systems.
23: **
24: ** Should cache connections.
25: **
26: ** ANSI C only as written
27: **
2.11 secret 28: ** Bugs fixed
29: ** NT Nathan Torkington (Nathan.Torkington@vuw.ac.nz)
30: **
2.1 timbl 31: ** WAIS comments:
32: **
33: ** 1. Separate directories for different system's .o would help
34: ** 2. Document ids are rather long!
35: **
36: ** WWW Address mapping convention:
37: **
38: ** /servername/database/type/length/document-id
39: **
40: ** /servername/database?word+word+word
41: */
42: /* WIDE AREA INFORMATION SERVER SOFTWARE:
43: No guarantees or restrictions. See the readme file for the full standard
44: disclaimer.
45:
46: Brewster@think.com
47: */
48:
49:
2.8 timbl 50: #define DIRECTORY "/cnidr.org:210/directory-of-servers"
51: /* define DIRECTORY "/quake.think.com:210/directory-of-servers" */
2.1 timbl 52:
53: #define BIG 1024 /* identifier size limit @@@@@ */
54:
2.2 timbl 55: /* From WAIS
56: ** ---------
2.1 timbl 57: */
58:
59: #include <ui.h>
60:
61: #define MAX_MESSAGE_LEN 100000
62: #define CHARS_PER_PAGE 10000 /* number of chars retrieved in each request */
63:
64: #define WAISSEARCH_DATE "Fri Jul 19 1991"
65:
66:
2.2 timbl 67: /* FROM WWW
68: ** --------
2.1 timbl 69: */
70: #define BUFFER_SIZE 4096 /* Arbitrary size for efficiency */
71:
2.2 timbl 72: #define HEX_ESCAPE '%'
73:
2.1 timbl 74: #include "HTUtils.h"
75: #include "tcp.h"
76: #include "HTParse.h"
2.2 timbl 77: #include "HTAccess.h" /* We implement a protocol */
78: #include "HTML.h" /* The object we will generate */
79:
80: /* #include "ParseWSRC.h" */
2.1 timbl 81:
82: extern int WWW_TraceFlag; /* Control diagnostic output */
83: extern FILE * logfile; /* Log file output */
84:
85: PRIVATE BOOL as_gate; /* Client is using us as gateway */
86:
87: PRIVATE char line[2048]; /* For building strings to display */
88: /* Must be able to take id */
2.2 timbl 89:
90:
91: #include "HTParse.h"
92: #include "HTFormat.h"
93: #include "HTTCP.h"
2.6 timbl 94: /* #include "HTWSRC.h" */ /* Need some bits from here */
2.2 timbl 95:
96: /* Hypertext object building machinery
97: */
98: #include "HTML.h"
99:
100: #define PUTC(c) (*target->isa->put_character)(target, c)
101: #define PUTS(s) (*target->isa->put_string)(target, s)
102: #define START(e) (*target->isa->start_element)(target, e, 0, 0)
103: #define END(e) (*target->isa->end_element)(target, e)
104: #define FREE_TARGET (*target->isa->free)(target)
105:
106: struct _HTStructured {
107: CONST HTStructuredClass * isa;
108: /* ... */
109: };
110:
111: struct _HTStream {
112: CONST HTStreamClass * isa;
113: /* ... */
114: };
115:
116:
2.1 timbl 117: /* showDiags
118: */
119: /* modified from Jonny G's version in ui/question.c */
120:
2.2 timbl 121: void showDiags ARGS2(
122: HTStream *, target,
123: diagnosticRecord **, d)
2.1 timbl 124: {
125: long i;
126:
127: for (i = 0; d[i] != NULL; i++) {
128: if (d[i]->ADDINFO != NULL) {
129: PUTS("Diagnostic code is ");
130: PUTS(d[i]->DIAG);
131: PUTC(' ');
132: PUTS(d[i]->ADDINFO);
133: PUTC('\n'); ;
134: }
135: }
136: }
137:
138: /* Matrix of allowed characters in filenames
139: ** -----------------------------------------
140: */
141:
142: PRIVATE BOOL acceptable[256];
143: PRIVATE BOOL acceptable_inited = NO;
144:
145: PRIVATE void init_acceptable NOARGS
146: {
147: unsigned int i;
148: char * good =
149: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$";
150: for(i=0; i<256; i++) acceptable[i] = NO;
151: for(;*good; good++) acceptable[(unsigned int)*good] = YES;
152: acceptable_inited = YES;
153: }
154:
155: /* Transform file identifier into WWW address
156: ** ------------------------------------------
157: **
158: **
159: ** On exit,
160: ** returns nil if error
161: ** pointer to malloced string (must be freed) if ok
162: */
163: char * WWW_from_archie ARGS1 (char *, file)
164: {
165: char * end;
166: char * result;
167: char * colon;
168: for(end=file; *end > ' '; end++); /* assumes ASCII encoding*/
169: result = (char *)malloc(10 + (end-file));
170: if (!result) return result; /* Malloc error */
171: strcpy(result, "file://");
172: strncat(result, file, end-file);
173: colon = strchr(result+7, ':'); /* Expect colon after host */
174: if (colon) {
175: for(; colon[0]; colon[0]=colon[1], colon++); /* move down */
176: }
177: return result;
178: } /* WWW_from_archie */
179:
2.2 timbl 180: /* Transform document identifier into URL
181: ** --------------------------------------
2.1 timbl 182: **
183: ** Bugs: A static buffer of finite size is used!
184: ** The format of the docid MUST be good!
185: **
186: ** On exit,
187: ** returns nil if error
188: ** pointer to malloced string (must be freed) if ok
189: */
2.9 timbl 190: PRIVATE char hex [17] = "0123456789ABCDEF";
2.2 timbl 191: extern char from_hex PARAMS((char a)); /* In HTWSRC @@ */
192:
193: PRIVATE char * WWW_from_WAIS ARGS1(any *, docid)
194:
2.1 timbl 195: {
196: static char buf[BIG];
197: char * q = buf;
198: char * p = (docid->bytes);
199: int i, l;
200: if (TRACE) {
201: char *p;
2.2 timbl 202: fprintf(stderr, "WAIS id (%d bytes) is ", (int)docid->size);
2.1 timbl 203: for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
204: if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
205: fprintf(stderr, "%c", *p);
206: else
2.2 timbl 207: fprintf(stderr, "<%x>", (unsigned)*p);
2.1 timbl 208: }
209: fprintf(stderr, "\n");
210: }
211: for (p=docid->bytes; (p<docid->bytes+docid->size) && (q<&buf[BIG]);) {
212: if (TRACE) fprintf(stderr, " Record type %d, length %d\n",
213: p[0], p[1]);
214: if (*p>10) {
215: fprintf(stderr, "Eh? DOCID record type of %d!\n", *p);
216: return 0;
217: }
2.2 timbl 218: { /* Bug fix -- allow any byte value 15 Apr 93 */
219: unsigned int i = (unsigned) *p++;
220:
221: if (i > 99) {
222: *q++ = (i/100) + '0';
223: i = i % 100;
224: }
225: if (i > 9) {
226: *q++ = (i/10) + '0';
227: i = i % 10;
228: }
229: *q++ = i + '0'; /* Record type */
230: }
2.1 timbl 231: *q++ = '='; /* Separate */
232: l = *p++; /* Length */
233: for(i=0; i<l; i++, p++){
234: if (!acceptable[*p]) {
235: *q++ = HEX_ESCAPE; /* Means hex commming */
236: *q++ = hex[(*p) >> 4];
237: *q++ = hex[(*p) & 15];
238: }
239: else *q++ = *p;
240: }
241: *q++= ';'; /* Terminate field */
242: }
243: *q++ = 0; /* Terminate string */
244: if (TRACE) fprintf(stderr, "WWW form of id: %s\n", buf);
245: {
246: char * result = (char *)malloc(strlen(buf)+1);
247: strcpy(result, buf);
248: return result;
249: }
250: } /* WWW_from_WAIS */
251:
252:
2.2 timbl 253: /* Transform URL into WAIS document identifier
254: ** -------------------------------------------
2.1 timbl 255: **
256: ** On entry,
257: ** docname points to valid name produced originally by
258: ** WWW_from_WAIS
259: ** On exit,
260: ** docid->size is valid
261: ** docid->bytes is malloced and must later be freed.
262: */
263: PRIVATE any * WAIS_from_WWW ARGS2 (any *, docid, char *, docname)
264: {
265: char *z; /* Output pointer */
266: char *sor; /* Start of record - points to size field. */
267: char *p; /* Input pointer */
268: char *q; /* Poisition of "=" */
269: char *s; /* Position of semicolon */
270: int n; /* size */
271: if (TRACE) fprintf(stderr, "WWW id (to become WAIS id): %s\n", docname);
272: for(n=0, p = docname; *p; p++) { /* Count sizes of strings */
273: n++;
274: if (*p == ';') n--; /* Not converted */
275: else if (*p == HEX_ESCAPE) n=n-2; /* Save two bytes */
276: docid->size = n;
277: }
278:
279: docid->bytes = (char *) malloc(docid->size); /* result record */
280: z = docid->bytes;
281:
282: for(p = docname; *p; ) { /* Convert of strings */
2.2 timbl 283: /* Record type */
284:
285: *z = 0; /* Initialize record type */
286: while (*p >= '0' && *p <= '9') {
287: *z = *z*10 + (*p++ - '0'); /* Decode decimal record type */
288: }
289: z++;
290: if (*p != '=') return 0;
291: q = p;
292:
293: /* *z++ = *p++ - '0';
2.1 timbl 294: q = strchr(p , '=');
295: if (!q) return 0;
2.2 timbl 296: */
2.1 timbl 297: s = strchr(q, ';'); /* (Check only) */
298: if (!s) return 0; /* Bad! No ';'; */
299: sor = z; /* Remember where the size field was */
300: z++; /* Skip record size for now */
301: for(p=q+1; *p!=';' ; ) {
302: if (*p == HEX_ESCAPE) {
303: char c;
304: unsigned int b;
305: p++;
306: c = *p++;
307: b = from_hex(c);
308: c = *p++;
309: if (!c) break; /* Odd number of chars! */
310: *z++ = (b<<4) + from_hex(c);
311: } else {
312: *z++ = *p++; /* Record */
313: }
314: }
315: *sor = (z-sor-1); /* Fill in size -- not counting size itself */
316: p++; /* After semicolon: start of next record */
317: }
318:
319: if (TRACE) {
320: char *p;
2.2 timbl 321: fprintf(stderr, "WAIS id (%d bytes) is ", (int)docid->size);
2.1 timbl 322: for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
323: if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
324: fprintf(stderr, "%c", *p);
325: else
2.2 timbl 326: fprintf(stderr, "<%x>", (unsigned)*p);
2.1 timbl 327: }
328: fprintf(stderr, "\n");
329: }
330: return docid; /* Ok */
331:
332: } /* WAIS_from_WWW */
333:
334:
335: /* Send a plain text record to the client output_text_record()
336: ** --------------------------------------
337: */
2.2 timbl 338:
2.9 timbl 339: PRIVATE void output_text_record ARGS4(
2.2 timbl 340: HTStream *, target,
341: WAISDocumentText *, record,
2.9 timbl 342: boolean, quote_string_quotes,
343: boolean, binary)
2.1 timbl 344: {
345: long count;
346: /* printf(" Text\n");
347: print_any(" DocumentID: ", record->DocumentID);
348: printf(" VersionNumber: %d\n", record->VersionNumber);
349: */
2.9 timbl 350:
351: if (binary) {
352: (*target->isa->put_block)(target,
353: record->DocumentText->bytes,
354: record->DocumentText->size);
355: return;
356: }
357:
2.1 timbl 358: for(count = 0; count < record->DocumentText->size; count++){
359: long ch = (unsigned char)record->DocumentText->bytes[count];
2.2 timbl 360: if (ch == 27) { /* What is this in for? Tim */
2.1 timbl 361:
362: /* then we have an escape code */
363: /* if the next letter is '(' or ')', then ignore two letters */
364: if('(' == record->DocumentText->bytes[count + 1] ||
365: ')' == record->DocumentText->bytes[count + 1])
366: count += 1; /* it is a term marker */
367: else count += 4; /* it is a paragraph marker */
368: } else if (ch == '\n' || ch == '\r') {
2.2 timbl 369: PUTC('\n');
2.1 timbl 370: } else if ((ch=='\t') || isprint(ch)){
2.2 timbl 371: PUTC(ch);
2.1 timbl 372: }
373: }
374: } /* output text record */
375:
376:
2.2 timbl 377:
2.1 timbl 378: /* Format A Search response for the client display_search_response
379: ** ---------------------------------------
380: */
381: /* modified from tracy shen's version in wutil.c
382: * displays either a text record or a set of headlines.
383: */
384: void
2.2 timbl 385: display_search_response ARGS4(
386: HTStructured *, target,
2.1 timbl 387: SearchResponseAPDU *, response,
388: char *, database,
389: char *, keywords)
390: {
391: WAISSearchResponse *info;
392: long i, k;
393:
394: BOOL archie = strstr(database, "archie")!=0; /* Specical handling */
395:
2.7 timbl 396: if (TRACE) fprintf(stderr, "HTWAIS: Displaying search response\n");
2.1 timbl 397: sprintf(line,
398: "Index %s contains the following %d item%s relevant to '%s'.\n",
399: database,
2.2 timbl 400: (int)(response->NumberOfRecordsReturned),
2.1 timbl 401: response->NumberOfRecordsReturned ==1 ? "" : "s",
402: keywords);
2.2 timbl 403:
404: PUTS(line);
405: PUTS("The first figure for each entry is its relative score, ");
406: PUTS("the second the number of lines in the item.");
407: START(HTML_MENU);
408:
2.1 timbl 409: if ( response->DatabaseDiagnosticRecords != 0 ) {
410: info = (WAISSearchResponse *)response->DatabaseDiagnosticRecords;
411: i =0;
412:
413: if (info->Diagnostics != NULL)
2.2 timbl 414: showDiags((HTStream*)target, info->Diagnostics);
2.1 timbl 415:
416: if ( info->DocHeaders != 0 ) {
417: for (k=0; info->DocHeaders[k] != 0; k++ ) {
418: WAISDocumentHeader* head = info->DocHeaders[k];
419: char * headline = trim_junk(head->Headline);
420: any * docid = head->DocumentID;
421: char * docname; /* printable version of docid */
422: i++;
423:
424: /* Make a printable string out of the document id.
425: */
426: if (TRACE) fprintf(stderr,
2.7 timbl 427: "HTWAIS: %2ld: Score: %4ld, lines:%4ld '%s'\n",
2.1 timbl 428: i,
2.2 timbl 429: (long int)(info->DocHeaders[k]->Score),
430: (long int)(info->DocHeaders[k]->Lines),
2.1 timbl 431: headline);
432:
2.2 timbl 433: START(HTML_LI);
434: sprintf(line, "%4ld %4ld ",
435: head->Score,
436: head->Lines);
437: PUTS( line);
438:
2.1 timbl 439: if (archie) {
440: char * www_name = WWW_from_archie(headline);
441: if (www_name) {
2.2 timbl 442: HTStartAnchor(target, NULL, www_name);
2.1 timbl 443: PUTS(headline);
2.2 timbl 444:
445: END(HTML_A);
2.1 timbl 446: free(www_name);
447: } else {
2.2 timbl 448: PUTS(headline);
449: PUTS(" (bad file name)");
2.1 timbl 450: }
451: } else { /* Not archie */
452: docname = WWW_from_WAIS(docid);
453: if (docname) {
2.6 timbl 454: char * dbname = HTEscape(database, URL_XPALPHAS);
2.1 timbl 455: sprintf(line, "%s/%s/%d/%s", /* W3 address */
456: dbname,
457: head->Types ? head->Types[0] : "TEXT",
2.2 timbl 458: (int)(head->DocumentLength),
2.1 timbl 459: docname);
2.11 secret 460: HTStartAnchor(target, NULL, ( (head->Types)
461: && (!strcmp(head->Types[0], "URL"))) ?
462: headline : line); /* NT, Sep 93 */
2.2 timbl 463: PUTS(headline);
464: END(HTML_A);
2.1 timbl 465: free(dbname);
466: free(docname);
467: } else {
2.2 timbl 468: PUTS("(bad doc id)");
2.1 timbl 469: }
470: }
471: } /* next document header */
472: } /* if there were any document headers */
473:
474: if ( info->ShortHeaders != 0 ) {
475: k =0;
476: while (info->ShortHeaders[k] != 0 ) {
477: i++;
2.2 timbl 478: PUTS( "(Short Header record, can't display)");
2.1 timbl 479: }
480: }
481: if ( info->LongHeaders != 0 ) {
482: k =0;
483: while (info->LongHeaders[k] != 0) {
484: i++;
485: PUTS( "\nLong Header record, can't display\n");
486: }
487: }
488: if ( info->Text != 0 ) {
489: k =0;
490: while (info->Text[k] != 0) {
491: i++;
492: PUTS( "\nText record\n");
2.9 timbl 493: output_text_record((HTStream*)target, info->Text[k++], false, false);
2.1 timbl 494: }
495: }
496: if ( info->Headlines != 0 ) {
497: k =0;
498: while (info->Headlines[k] != 0) {
499: i++;
500: PUTS( "\nHeadline record, can't display\n");
501: /* dsply_headline_record( info->Headlines[k++]); */
502: }
503: }
504: if ( info->Codes != 0 ) {
505: k =0;
506: while (info->Codes[k] != 0) {
507: i++;
508: PUTS( "\nCode record, can't display\n");
509: /* dsply_code_record( info->Codes[k++]); */
510: }
511: }
512: } /* Loop: display user info */
2.2 timbl 513: END(HTML_MENU);
2.1 timbl 514: PUTC('\n'); ;
515: }
516:
517:
518:
2.2 timbl 519:
520: /* Load by name HTLoadWAIS
521: ** ============
522: **
523: ** This renders any object or search as required
2.1 timbl 524: */
2.13 ! timbl 525: PUBLIC int HTLoadWAIS ARGS1(HTRequest * , request)
2.1 timbl 526:
527: #define MAX_KEYWORDS_LENGTH 1000
528: #define MAX_SERVER_LENGTH 1000
529: #define MAX_DATABASE_LENGTH 1000
530: #define MAX_SERVICE_LENGTH 1000
531: #define MAXDOCS 40
532:
533: {
2.13 ! timbl 534: CONST char * arg = HTAnchor_physical(request->anchor);
! 535: HTParentAnchor * anAnchor = request->anchor;
! 536: HTFormat format_out = request->output_format;
! 537: HTStream* sink = request->output_stream;
! 538:
2.1 timbl 539: static CONST char * error_header =
2.7 timbl 540: "<h1>Access error</h1>\nThe following error occured in accesing a WAIS server:<P>\n";
2.2 timbl 541: char * key; /* pointer to keywords in URL */
2.1 timbl 542: char* request_message = NULL; /* arbitrary message limit */
543: char* response_message = NULL; /* arbitrary message limit */
544: long request_buffer_length; /* how of the request is left */
545: SearchResponseAPDU *retrieval_response = 0;
546: char keywords[MAX_KEYWORDS_LENGTH + 1];
547: char *server_name;
2.6 timbl 548: char *wais_database = NULL; /* name of current database */
549: char *www_database; /* Same name escaped */
2.1 timbl 550: char *service;
551: char *doctype;
552: char *doclength;
553: long document_length;
554: char *docname;
555: FILE *connection = 0;
556: char * names; /* Copy of arg to be hacked up */
557: BOOL ok = NO;
558:
559: extern FILE * connect_to_server();
560:
561: if (!acceptable_inited) init_acceptable();
562:
563:
564: /* Decipher and check syntax of WWW address:
565: ** ----------------------------------------
566: **
567: ** First we remove the "wais:" if it was spcified. 920110
568: */
569: names = HTParse(arg, "", PARSE_HOST | PARSE_PATH | PARSE_PUNCTUATION);
2.2 timbl 570: key = strchr(names, '?');
571:
572: if (key) {
573: char * p;
574: *key++ = 0; /* Split off keywords */
575: for (p=key; *p; p++) if (*p == '+') *p = ' ';
576: HTUnEscape(key);
577: }
2.1 timbl 578: if (names[0]== '/') {
579: server_name = names+1;
580: if (as_gate =(*server_name == '/'))
581: server_name++; /* Accept one or two */
582: www_database = strchr(server_name,'/');
583: if (www_database) {
584: *www_database++ = 0; /* Separate database name */
585: doctype = strchr(www_database, '/');
586: if (key) ok = YES; /* Don't need doc details */
587: else if (doctype) { /* If not search parse doc details */
588: *doctype++ = 0; /* Separate rest of doc address */
589: doclength = strchr(doctype, '/');
590: if(doclength) {
591: *doclength++ = 0;
592: document_length = atol(doclength);
593: if (document_length) {
594: docname=strchr(doclength, '/');
595: if (docname) {
596: *docname++ = 0;
597: ok = YES; /* To avoid a goto! */
598: } /* if docname */
599: } /* if document_length valid */
600: } /* if doclength */
601: } else { /* no doctype? Assume index required */
602: if (!key) key = "";
603: ok = YES;
604: } /* if doctype */
605: } /* if database */
606: }
607:
2.2 timbl 608: if (!ok)
609: return HTLoadError(sink, 500, "Syntax error in WAIS URL");
610:
2.7 timbl 611: if (TRACE) fprintf(stderr, "HTWAIS: Parsed OK\n");
2.1 timbl 612:
613: service = strchr(names, ':');
614: if (service) *service++ = 0;
615: else service = "210";
616:
617: if (server_name[0] == 0)
618: connection = NULL;
619:
620: else if (!(key && !*key))
621: if ((connection=connect_to_server(server_name,atoi(service)))
622: == NULL) {
2.2 timbl 623: if (TRACE) fprintf (stderr,
2.1 timbl 624: "%sCan't open connection to %s via service %s.\n",
625: error_header, server_name, service);
626: free(names);
2.2 timbl 627: return HTLoadError(sink, 500, "Can't open connection to WAIS server");
2.1 timbl 628: }
629:
2.6 timbl 630: StrAllocCopy(wais_database,www_database);
631: HTUnEscape(wais_database);
632:
2.2 timbl 633: /* This below fixed size stuff is terrible */
2.1 timbl 634: request_message = (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char));
635: response_message = (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char));
636:
637: /* If keyword search is performed but there are no keywords,
638: ** the user has followed a link to the index itself. It would be
639: ** appropriate at this point to send him the .SRC file - how?
640: */
641:
642: if (key && !*key) { /* I N D E X */
643:
2.7 timbl 644: #ifdef CACHE_FILE_PREFIX
645: char filename[256];
646: FILE * fp;
647: #endif
2.13 ! timbl 648: HTStructured * target = HTML_new(request, NULL,
! 649: WWW_HTML, format_out, sink);
2.1 timbl 650:
2.2 timbl 651: START(HTML_ISINDEX);
652:
2.8 timbl 653: {
654: START(HTML_TITLE);
655: PUTS(wais_database);
656: PUTS(" index");
657: END(HTML_TITLE);
658:
659: START(HTML_H1);
660: PUTS(wais_database);
661: END(HTML_H1);
662:
663: }
2.1 timbl 664: /* If we have seen a source file for this database, use that:
665: */
2.2 timbl 666:
2.7 timbl 667: #ifdef CACHE_FILE_PREFIX
2.8 timbl 668: sprintf(filename, "%sWSRC-%s:%s:%.100s.txt",
2.7 timbl 669: CACHE_FILE_PREFIX,
2.1 timbl 670: server_name, service, www_database);
671:
672: fp = fopen(filename, "r"); /* Have we found this already? */
673: if (TRACE) fprintf(stderr,
2.7 timbl 674: "HTWAIS: Description of server %s %s.\n",
2.1 timbl 675: filename,
676: fp ? "exists already" : "does NOT exist!");
2.2 timbl 677:
2.1 timbl 678: if (fp) {
679: char c;
2.7 timbl 680: START(HTML_PRE); /* Preformatted description */
681: while((c=getc(fp))!=EOF) PUTC(c); /* Transfer file */
682: END(HTML_PRE);
2.1 timbl 683: fclose(fp);
2.8 timbl 684: }
2.2 timbl 685: #endif
2.7 timbl 686: START(HTML_P);
2.2 timbl 687: PUTS("Specify search words.");
2.1 timbl 688:
2.2 timbl 689: FREE_TARGET;
2.1 timbl 690:
691: } else if (key) { /* S E A R C H */
692: char *p;
2.2 timbl 693: HTStructured * target;
694:
2.1 timbl 695: strncpy(keywords, key, MAX_KEYWORDS_LENGTH);
696: while(p=strchr(keywords, '+')) *p = ' ';
697:
698: /* Send advance title to get something fast to the other end */
699:
2.13 ! timbl 700: target = HTML_new(request, NULL, WWW_HTML, format_out, sink);
2.2 timbl 701:
702: START(HTML_ISINDEX);
703: START(HTML_TITLE);
704: PUTS(keywords);
705: PUTS(" (in ");
706: PUTS(wais_database);
707: PUTS(")");
708: END(HTML_TITLE);
709:
710: START(HTML_H1);
711: PUTS(keywords);
712: END(HTML_H1);
2.1 timbl 713:
714: request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */
2.7 timbl 715: if (TRACE) fprintf(stderr, "HTWAIS: Search for `%s' in `%s'\n",
2.1 timbl 716: keywords, wais_database);
717: if(NULL ==
718: generate_search_apdu(request_message + HEADER_LENGTH,
719: &request_buffer_length,
720: keywords, wais_database, NULL, MAXDOCS))
721: panic("request too large");
722:
723:
724: if(!interpret_message(request_message,
725: MAX_MESSAGE_LEN - request_buffer_length,
726: response_message,
727: MAX_MESSAGE_LEN,
728: connection,
729: false /* true verbose */
730: )) {
731: panic("returned message too large");
732:
733: } else { /* returned message ok */
734:
735: SearchResponseAPDU *query_response = 0;
736: readSearchResponseAPDU(&query_response,
737: response_message + HEADER_LENGTH);
2.2 timbl 738: display_search_response(target,
739: query_response, wais_database, keywords);
2.1 timbl 740: if (query_response->DatabaseDiagnosticRecords)
741: freeWAISSearchResponse(
742: query_response->DatabaseDiagnosticRecords);
743: freeSearchResponseAPDU( query_response);
744: } /* returned message not too large */
745:
2.2 timbl 746: FREE_TARGET;
747:
2.1 timbl 748: } else { /* D O C U M E N T F E T C H */
749:
2.2 timbl 750: HTFormat format_in;
2.9 timbl 751: boolean binary; /* how to transfer stuff coming over */
2.2 timbl 752: HTStream * target;
2.1 timbl 753: long count;
754: any doc_chunk;
755: any * docid = &doc_chunk;
756: if (TRACE) printf(
2.7 timbl 757: "HTWAIS: Retrieve document id `%s' type `%s' length %ld\n",
2.1 timbl 758: docname, doctype, document_length);
2.2 timbl 759:
760: format_in =
761: !strcmp(doctype, "WSRC") ? HTAtom_for("application/x-wais-source") :
762: !strcmp(doctype, "TEXT") ? HTAtom_for("text/plain") :
2.9 timbl 763: !strcmp(doctype, "HTML") ? HTAtom_for("text/html") :
2.2 timbl 764: !strcmp(doctype, "GIF") ? HTAtom_for("image/gif") :
2.11 secret 765: HTAtom_for("application/octet-stream");
2.9 timbl 766: binary =
767: 0 != strcmp(doctype, "WSRC") &&
768: 0 != strcmp(doctype, "TEXT") &&
769: 0 != strcmp(doctype, "HTML") ;
770:
2.2 timbl 771:
2.13 ! timbl 772: target = HTStreamStack(format_in, request);
2.2 timbl 773: if (!target) return HTLoadError(sink, 500,
774: "Can't convert format of WAIS document");
2.1 timbl 775: /* Decode hex or litteral format for document ID
776: */
777: WAIS_from_WWW(docid, docname);
778:
2.2 timbl 779:
2.1 timbl 780: /* Loop over slices of the document
781: */
782: for(count = 0;
783: count * CHARS_PER_PAGE < document_length;
784: count++){
785: char *type = s_strdup(doctype); /* Gets freed I guess */
786: request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */
2.2 timbl 787: if (TRACE) fprintf(stderr, "HTWAIS: Slice number %ld\n", count);
2.1 timbl 788: if(0 ==
789: generate_retrieval_apdu(request_message + HEADER_LENGTH,
790: &request_buffer_length,
791: docid,
792: CT_byte,
793: count * CHARS_PER_PAGE,
794: MIN((count + 1) * CHARS_PER_PAGE,document_length),
795: type,
796: wais_database
797: ))
798: panic("request too long");
2.2 timbl 799:
800: /* Actually do the transaction given by request_message */
2.1 timbl 801: if(0 ==
802: interpret_message(request_message,
803: MAX_MESSAGE_LEN - request_buffer_length,
804: response_message,
805: MAX_MESSAGE_LEN,
806: connection,
807: false /* true verbose */
808: ))
809: panic("Returned message too large");
810:
2.2 timbl 811: /* Parse the result which came back into memory.
812: */
2.1 timbl 813: readSearchResponseAPDU(&retrieval_response,
814: response_message + HEADER_LENGTH);
815:
816: if(NULL == ((WAISSearchResponse *)
817: retrieval_response->DatabaseDiagnosticRecords)->Text){
2.2 timbl 818: /* display_search_response(target, retrieval_response,
819: wais_database, keywords); */
820: PUTS("No text was returned!\n");
2.1 timbl 821: /* panic("No text was returned"); */
822: } else {
823:
2.2 timbl 824: output_text_record(target,
825: ((WAISSearchResponse *)
2.1 timbl 826: retrieval_response->DatabaseDiagnosticRecords)->Text[0],
2.9 timbl 827: false, binary);
2.1 timbl 828:
829: } /* If text existed */
830:
831: } /* Loop over slices */
832:
2.2 timbl 833: (*target->isa->free)(target);
2.1 timbl 834:
835: free (docid->bytes);
836:
837: freeWAISSearchResponse( retrieval_response->DatabaseDiagnosticRecords);
838: freeSearchResponseAPDU( retrieval_response);
839:
840: } /* If document rather than search */
841:
2.2 timbl 842:
843:
2.1 timbl 844:
2.2 timbl 845: /* (This postponed until later, after a timeout:)
2.1 timbl 846: */
847: if (connection) close_connection(connection);
848: if (wais_database) free(wais_database);
849: s_free(request_message);
850: s_free(response_message);
851:
852: free(names);
2.2 timbl 853: return HT_LOADED;
2.1 timbl 854: }
855:
2.12 duns 856: GLOBALDEF PUBLIC HTProtocol HTWAIS = { "wais", HTLoadWAIS, NULL };
2.1 timbl 857:
858:
Webmaster