Annotation of libwww/Library/src/HTWAIS.c, revision 2.6
2.1 timbl 1: /* WorldWideWeb - Wide Area Informaion Server Access HTWAIS.c
2: ** ==================================================
3: **
4: ** This module allows a WWW server or client to read data from a
5: ** remote WAIS
6: ** server, and provide that data to a WWW client in hypertext form.
7: ** Source files, once retrieved, are stored and used to provide
8: ** information about the index when that is acessed.
9: **
10: ** Authors
11: ** BK Brewster Kahle, Thinking Machines, <Brewster@think.com>
12: ** TBL Tim Berners-Lee, CERN <timbl@info.cern.ch>
13: **
14: ** History
15: ** Sep 91 TBL adapted shell-ui.c (BK) with HTRetrieve.c from WWW.
16: ** Feb 91 TBL Generated HTML cleaned up a bit (quotes, escaping)
17: ** Refers to lists of sources.
2.2 timbl 18: ** Mar 93 TBL Lib 2.0 compatible module made.
2.1 timbl 19: **
20: ** Bugs
21: ** Uses C stream i/o to read and write sockets, which won't work
22: ** on VMS TCP systems.
23: **
24: ** Should cache connections.
25: **
26: ** ANSI C only as written
27: **
28: ** WAIS comments:
29: **
30: ** 1. Separate directories for different system's .o would help
31: ** 2. Document ids are rather long!
32: **
33: ** WWW Address mapping convention:
34: **
35: ** /servername/database/type/length/document-id
36: **
37: ** /servername/database?word+word+word
38: */
39: /* WIDE AREA INFORMATION SERVER SOFTWARE:
40: No guarantees or restrictions. See the readme file for the full standard
41: disclaimer.
42:
43: Brewster@think.com
44: */
45:
46:
47: #define DIRECTORY "/quake.think.com:210/directory-of-servers"
48:
49: #define BIG 1024 /* identifier size limit @@@@@ */
50:
2.2 timbl 51: /* From WAIS
52: ** ---------
2.1 timbl 53: */
54:
55: #include <ui.h>
56:
57: #define MAX_MESSAGE_LEN 100000
58: #define CHARS_PER_PAGE 10000 /* number of chars retrieved in each request */
59:
60: #define WAISSEARCH_DATE "Fri Jul 19 1991"
61:
62:
2.2 timbl 63: /* FROM WWW
64: ** --------
2.1 timbl 65: */
66: #define BUFFER_SIZE 4096 /* Arbitrary size for efficiency */
67:
2.2 timbl 68: #define HEX_ESCAPE '%'
69:
2.1 timbl 70: #include "HTUtils.h"
71: #include "tcp.h"
72: #include "HTParse.h"
2.2 timbl 73: #include "HTAccess.h" /* We implement a protocol */
74: #include "HTML.h" /* The object we will generate */
75:
76: /* #include "ParseWSRC.h" */
2.1 timbl 77:
78: extern int WWW_TraceFlag; /* Control diagnostic output */
79: extern FILE * logfile; /* Log file output */
80:
81: PRIVATE BOOL as_gate; /* Client is using us as gateway */
82:
83: PRIVATE char line[2048]; /* For building strings to display */
84: /* Must be able to take id */
2.2 timbl 85:
86:
87: #include "HTParse.h"
88: #include "HTFormat.h"
89: #include "HTTCP.h"
2.6 ! timbl 90: /* #include "HTWSRC.h" */ /* Need some bits from here */
2.2 timbl 91:
92: /* Hypertext object building machinery
93: */
94: #include "HTML.h"
95:
96: #define PUTC(c) (*target->isa->put_character)(target, c)
97: #define PUTS(s) (*target->isa->put_string)(target, s)
98: #define START(e) (*target->isa->start_element)(target, e, 0, 0)
99: #define END(e) (*target->isa->end_element)(target, e)
100: #define END_TARGET (*target->isa->end_document)(target)
101: #define FREE_TARGET (*target->isa->free)(target)
102:
103: struct _HTStructured {
104: CONST HTStructuredClass * isa;
105: /* ... */
106: };
107:
108: struct _HTStream {
109: CONST HTStreamClass * isa;
110: /* ... */
111: };
112:
113:
2.1 timbl 114: /* showDiags
115: */
116: /* modified from Jonny G's version in ui/question.c */
117:
2.2 timbl 118: void showDiags ARGS2(
119: HTStream *, target,
120: diagnosticRecord **, d)
2.1 timbl 121: {
122: long i;
123:
124: for (i = 0; d[i] != NULL; i++) {
125: if (d[i]->ADDINFO != NULL) {
126: PUTS("Diagnostic code is ");
127: PUTS(d[i]->DIAG);
128: PUTC(' ');
129: PUTS(d[i]->ADDINFO);
130: PUTC('\n'); ;
131: }
132: }
133: }
134:
135: /* Matrix of allowed characters in filenames
136: ** -----------------------------------------
137: */
138:
139: PRIVATE BOOL acceptable[256];
140: PRIVATE BOOL acceptable_inited = NO;
141:
142: PRIVATE void init_acceptable NOARGS
143: {
144: unsigned int i;
145: char * good =
146: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$";
147: for(i=0; i<256; i++) acceptable[i] = NO;
148: for(;*good; good++) acceptable[(unsigned int)*good] = YES;
149: acceptable_inited = YES;
150: }
151:
152: /* Transform file identifier into WWW address
153: ** ------------------------------------------
154: **
155: **
156: ** On exit,
157: ** returns nil if error
158: ** pointer to malloced string (must be freed) if ok
159: */
160: char * WWW_from_archie ARGS1 (char *, file)
161: {
162: char * end;
163: char * result;
164: char * colon;
165: for(end=file; *end > ' '; end++); /* assumes ASCII encoding*/
166: result = (char *)malloc(10 + (end-file));
167: if (!result) return result; /* Malloc error */
168: strcpy(result, "file://");
169: strncat(result, file, end-file);
170: colon = strchr(result+7, ':'); /* Expect colon after host */
171: if (colon) {
172: for(; colon[0]; colon[0]=colon[1], colon++); /* move down */
173: }
174: return result;
175: } /* WWW_from_archie */
176:
2.2 timbl 177: /* Transform document identifier into URL
178: ** --------------------------------------
2.1 timbl 179: **
180: ** Bugs: A static buffer of finite size is used!
181: ** The format of the docid MUST be good!
182: **
183: ** On exit,
184: ** returns nil if error
185: ** pointer to malloced string (must be freed) if ok
186: */
2.2 timbl 187: PRIVATE char hex [16] = "0123456789ABCDEF";
188: extern char from_hex PARAMS((char a)); /* In HTWSRC @@ */
189:
190: PRIVATE char * WWW_from_WAIS ARGS1(any *, docid)
191:
2.1 timbl 192: {
193: static char buf[BIG];
194: char * q = buf;
195: char * p = (docid->bytes);
196: int i, l;
197: if (TRACE) {
198: char *p;
2.2 timbl 199: fprintf(stderr, "WAIS id (%d bytes) is ", (int)docid->size);
2.1 timbl 200: for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
201: if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
202: fprintf(stderr, "%c", *p);
203: else
2.2 timbl 204: fprintf(stderr, "<%x>", (unsigned)*p);
2.1 timbl 205: }
206: fprintf(stderr, "\n");
207: }
208: for (p=docid->bytes; (p<docid->bytes+docid->size) && (q<&buf[BIG]);) {
209: if (TRACE) fprintf(stderr, " Record type %d, length %d\n",
210: p[0], p[1]);
211: if (*p>10) {
212: fprintf(stderr, "Eh? DOCID record type of %d!\n", *p);
213: return 0;
214: }
2.2 timbl 215: { /* Bug fix -- allow any byte value 15 Apr 93 */
216: unsigned int i = (unsigned) *p++;
217:
218: if (i > 99) {
219: *q++ = (i/100) + '0';
220: i = i % 100;
221: }
222: if (i > 9) {
223: *q++ = (i/10) + '0';
224: i = i % 10;
225: }
226: *q++ = i + '0'; /* Record type */
227: }
2.1 timbl 228: *q++ = '='; /* Separate */
229: l = *p++; /* Length */
230: for(i=0; i<l; i++, p++){
231: if (!acceptable[*p]) {
232: *q++ = HEX_ESCAPE; /* Means hex commming */
233: *q++ = hex[(*p) >> 4];
234: *q++ = hex[(*p) & 15];
235: }
236: else *q++ = *p;
237: }
238: *q++= ';'; /* Terminate field */
239: }
240: *q++ = 0; /* Terminate string */
241: if (TRACE) fprintf(stderr, "WWW form of id: %s\n", buf);
242: {
243: char * result = (char *)malloc(strlen(buf)+1);
244: strcpy(result, buf);
245: return result;
246: }
247: } /* WWW_from_WAIS */
248:
249:
2.2 timbl 250: /* Transform URL into WAIS document identifier
251: ** -------------------------------------------
2.1 timbl 252: **
253: ** On entry,
254: ** docname points to valid name produced originally by
255: ** WWW_from_WAIS
256: ** On exit,
257: ** docid->size is valid
258: ** docid->bytes is malloced and must later be freed.
259: */
260: PRIVATE any * WAIS_from_WWW ARGS2 (any *, docid, char *, docname)
261: {
262: char *z; /* Output pointer */
263: char *sor; /* Start of record - points to size field. */
264: char *p; /* Input pointer */
265: char *q; /* Poisition of "=" */
266: char *s; /* Position of semicolon */
267: int n; /* size */
268: if (TRACE) fprintf(stderr, "WWW id (to become WAIS id): %s\n", docname);
269: for(n=0, p = docname; *p; p++) { /* Count sizes of strings */
270: n++;
271: if (*p == ';') n--; /* Not converted */
272: else if (*p == HEX_ESCAPE) n=n-2; /* Save two bytes */
273: docid->size = n;
274: }
275:
276: docid->bytes = (char *) malloc(docid->size); /* result record */
277: z = docid->bytes;
278:
279: for(p = docname; *p; ) { /* Convert of strings */
2.2 timbl 280: /* Record type */
281:
282: *z = 0; /* Initialize record type */
283: while (*p >= '0' && *p <= '9') {
284: *z = *z*10 + (*p++ - '0'); /* Decode decimal record type */
285: }
286: z++;
287: if (*p != '=') return 0;
288: q = p;
289:
290: /* *z++ = *p++ - '0';
2.1 timbl 291: q = strchr(p , '=');
292: if (!q) return 0;
2.2 timbl 293: */
2.1 timbl 294: s = strchr(q, ';'); /* (Check only) */
295: if (!s) return 0; /* Bad! No ';'; */
296: sor = z; /* Remember where the size field was */
297: z++; /* Skip record size for now */
298: for(p=q+1; *p!=';' ; ) {
299: if (*p == HEX_ESCAPE) {
300: char c;
301: unsigned int b;
302: p++;
303: c = *p++;
304: b = from_hex(c);
305: c = *p++;
306: if (!c) break; /* Odd number of chars! */
307: *z++ = (b<<4) + from_hex(c);
308: } else {
309: *z++ = *p++; /* Record */
310: }
311: }
312: *sor = (z-sor-1); /* Fill in size -- not counting size itself */
313: p++; /* After semicolon: start of next record */
314: }
315:
316: if (TRACE) {
317: char *p;
2.2 timbl 318: fprintf(stderr, "WAIS id (%d bytes) is ", (int)docid->size);
2.1 timbl 319: for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
320: if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
321: fprintf(stderr, "%c", *p);
322: else
2.2 timbl 323: fprintf(stderr, "<%x>", (unsigned)*p);
2.1 timbl 324: }
325: fprintf(stderr, "\n");
326: }
327: return docid; /* Ok */
328:
329: } /* WAIS_from_WWW */
330:
331:
332: /* Send a plain text record to the client output_text_record()
333: ** --------------------------------------
334: */
2.2 timbl 335:
2.1 timbl 336: PRIVATE void output_text_record ARGS3(
2.2 timbl 337: HTStream *, target,
338: WAISDocumentText *, record,
339: boolean, quote_string_quotes)
2.1 timbl 340: {
341: long count;
342: /* printf(" Text\n");
343: print_any(" DocumentID: ", record->DocumentID);
344: printf(" VersionNumber: %d\n", record->VersionNumber);
345: */
346: for(count = 0; count < record->DocumentText->size; count++){
347: long ch = (unsigned char)record->DocumentText->bytes[count];
2.2 timbl 348: if (ch == 27) { /* What is this in for? Tim */
2.1 timbl 349:
350: /* then we have an escape code */
351: /* if the next letter is '(' or ')', then ignore two letters */
352: if('(' == record->DocumentText->bytes[count + 1] ||
353: ')' == record->DocumentText->bytes[count + 1])
354: count += 1; /* it is a term marker */
355: else count += 4; /* it is a paragraph marker */
356: } else if (ch == '\n' || ch == '\r') {
2.2 timbl 357: PUTC('\n');
2.1 timbl 358: } else if ((ch=='\t') || isprint(ch)){
2.2 timbl 359: PUTC(ch);
2.1 timbl 360: }
361: }
362: } /* output text record */
363:
364:
2.2 timbl 365:
2.1 timbl 366: /* Format A Search response for the client display_search_response
367: ** ---------------------------------------
368: */
369: /* modified from tracy shen's version in wutil.c
370: * displays either a text record or a set of headlines.
371: */
372: void
2.2 timbl 373: display_search_response ARGS4(
374: HTStructured *, target,
2.1 timbl 375: SearchResponseAPDU *, response,
376: char *, database,
377: char *, keywords)
378: {
379: WAISSearchResponse *info;
380: long i, k;
381:
382: BOOL archie = strstr(database, "archie")!=0; /* Specical handling */
383:
384: if (TRACE) fprintf(stderr, "WAISGate: Displaying search response\n");
385: sprintf(line,
386: "Index %s contains the following %d item%s relevant to '%s'.\n",
387: database,
2.2 timbl 388: (int)(response->NumberOfRecordsReturned),
2.1 timbl 389: response->NumberOfRecordsReturned ==1 ? "" : "s",
390: keywords);
2.2 timbl 391:
392: PUTS(line);
393: PUTS("The first figure for each entry is its relative score, ");
394: PUTS("the second the number of lines in the item.");
395: START(HTML_MENU);
396:
2.1 timbl 397: if ( response->DatabaseDiagnosticRecords != 0 ) {
398: info = (WAISSearchResponse *)response->DatabaseDiagnosticRecords;
399: i =0;
400:
401: if (info->Diagnostics != NULL)
2.2 timbl 402: showDiags((HTStream*)target, info->Diagnostics);
2.1 timbl 403:
404: if ( info->DocHeaders != 0 ) {
405: for (k=0; info->DocHeaders[k] != 0; k++ ) {
406: WAISDocumentHeader* head = info->DocHeaders[k];
407: char * headline = trim_junk(head->Headline);
408: any * docid = head->DocumentID;
409: char * docname; /* printable version of docid */
410: i++;
411:
412: /* Make a printable string out of the document id.
413: */
414: if (TRACE) fprintf(stderr,
2.2 timbl 415: "WAISGate: %2ld: Score: %4ld, lines:%4ld '%s'\n",
2.1 timbl 416: i,
2.2 timbl 417: (long int)(info->DocHeaders[k]->Score),
418: (long int)(info->DocHeaders[k]->Lines),
2.1 timbl 419: headline);
420:
2.2 timbl 421: START(HTML_LI);
422: sprintf(line, "%4ld %4ld ",
423: head->Score,
424: head->Lines);
425: PUTS( line);
426:
2.1 timbl 427: if (archie) {
428: char * www_name = WWW_from_archie(headline);
429: if (www_name) {
2.2 timbl 430: HTStartAnchor(target, NULL, www_name);
2.1 timbl 431: PUTS(headline);
2.2 timbl 432:
433: END(HTML_A);
2.1 timbl 434: free(www_name);
435: } else {
2.2 timbl 436: PUTS(headline);
437: PUTS(" (bad file name)");
2.1 timbl 438: }
439: } else { /* Not archie */
440: docname = WWW_from_WAIS(docid);
441: if (docname) {
2.6 ! timbl 442: char * dbname = HTEscape(database, URL_XPALPHAS);
2.1 timbl 443: sprintf(line, "%s/%s/%d/%s", /* W3 address */
444: dbname,
445: head->Types ? head->Types[0] : "TEXT",
2.2 timbl 446: (int)(head->DocumentLength),
2.1 timbl 447: docname);
2.2 timbl 448: HTStartAnchor(target, NULL, line);
449: PUTS(headline);
450: END(HTML_A);
2.1 timbl 451: free(dbname);
452: free(docname);
453: } else {
2.2 timbl 454: PUTS("(bad doc id)");
2.1 timbl 455: }
456: }
457: } /* next document header */
458: } /* if there were any document headers */
459:
460: if ( info->ShortHeaders != 0 ) {
461: k =0;
462: while (info->ShortHeaders[k] != 0 ) {
463: i++;
2.2 timbl 464: PUTS( "(Short Header record, can't display)");
2.1 timbl 465: }
466: }
467: if ( info->LongHeaders != 0 ) {
468: k =0;
469: while (info->LongHeaders[k] != 0) {
470: i++;
471: PUTS( "\nLong Header record, can't display\n");
472: }
473: }
474: if ( info->Text != 0 ) {
475: k =0;
476: while (info->Text[k] != 0) {
477: i++;
478: PUTS( "\nText record\n");
2.2 timbl 479: output_text_record((HTStream*)target, info->Text[k++], false);
2.1 timbl 480: }
481: }
482: if ( info->Headlines != 0 ) {
483: k =0;
484: while (info->Headlines[k] != 0) {
485: i++;
486: PUTS( "\nHeadline record, can't display\n");
487: /* dsply_headline_record( info->Headlines[k++]); */
488: }
489: }
490: if ( info->Codes != 0 ) {
491: k =0;
492: while (info->Codes[k] != 0) {
493: i++;
494: PUTS( "\nCode record, can't display\n");
495: /* dsply_code_record( info->Codes[k++]); */
496: }
497: }
498: } /* Loop: display user info */
2.2 timbl 499: END(HTML_MENU);
2.1 timbl 500: PUTC('\n'); ;
501: }
502:
503:
504:
2.2 timbl 505:
506: /* Load by name HTLoadWAIS
507: ** ============
508: **
509: ** This renders any object or search as required
2.1 timbl 510: */
2.2 timbl 511: PUBLIC int HTLoadWAIS ARGS4(
512: CONST char *, arg,
513: HTParentAnchor *, anAnchor,
514: HTFormat, format_out,
515: HTStream*, sink)
2.1 timbl 516:
517: #define MAX_KEYWORDS_LENGTH 1000
518: #define MAX_SERVER_LENGTH 1000
519: #define MAX_DATABASE_LENGTH 1000
520: #define MAX_SERVICE_LENGTH 1000
521: #define MAXDOCS 40
522:
523: {
524: static CONST char * error_header =
525: "<h1>Access error</h1>\nThe WWW-WAIS gateway reports the following error:<P>\n";
2.2 timbl 526: char * key; /* pointer to keywords in URL */
2.1 timbl 527: char* request_message = NULL; /* arbitrary message limit */
528: char* response_message = NULL; /* arbitrary message limit */
529: long request_buffer_length; /* how of the request is left */
530: SearchResponseAPDU *retrieval_response = 0;
531: char keywords[MAX_KEYWORDS_LENGTH + 1];
532: char *server_name;
2.6 ! timbl 533: char *wais_database = NULL; /* name of current database */
! 534: char *www_database; /* Same name escaped */
2.1 timbl 535: char *service;
536: char *doctype;
537: char *doclength;
538: long document_length;
539: char *docname;
540: FILE *connection = 0;
541: char * names; /* Copy of arg to be hacked up */
542: BOOL ok = NO;
543:
544: extern FILE * connect_to_server();
545:
546: if (!acceptable_inited) init_acceptable();
547:
548:
549: /* Decipher and check syntax of WWW address:
550: ** ----------------------------------------
551: **
552: ** First we remove the "wais:" if it was spcified. 920110
553: */
554: names = HTParse(arg, "", PARSE_HOST | PARSE_PATH | PARSE_PUNCTUATION);
2.2 timbl 555: key = strchr(names, '?');
556:
557: if (key) {
558: char * p;
559: *key++ = 0; /* Split off keywords */
560: for (p=key; *p; p++) if (*p == '+') *p = ' ';
561: HTUnEscape(key);
562: }
2.1 timbl 563: if (names[0]== '/') {
564: server_name = names+1;
565: if (as_gate =(*server_name == '/'))
566: server_name++; /* Accept one or two */
567: www_database = strchr(server_name,'/');
568: if (www_database) {
569: *www_database++ = 0; /* Separate database name */
570: doctype = strchr(www_database, '/');
571: if (key) ok = YES; /* Don't need doc details */
572: else if (doctype) { /* If not search parse doc details */
573: *doctype++ = 0; /* Separate rest of doc address */
574: doclength = strchr(doctype, '/');
575: if(doclength) {
576: *doclength++ = 0;
577: document_length = atol(doclength);
578: if (document_length) {
579: docname=strchr(doclength, '/');
580: if (docname) {
581: *docname++ = 0;
582: ok = YES; /* To avoid a goto! */
583: } /* if docname */
584: } /* if document_length valid */
585: } /* if doclength */
586: } else { /* no doctype? Assume index required */
587: if (!key) key = "";
588: ok = YES;
589: } /* if doctype */
590: } /* if database */
591: }
592:
2.2 timbl 593: if (!ok)
594: return HTLoadError(sink, 500, "Syntax error in WAIS URL");
595:
2.1 timbl 596: if (TRACE) fprintf(stderr, "WAISGate: Parsed OK\n");
597:
598: service = strchr(names, ':');
599: if (service) *service++ = 0;
600: else service = "210";
601:
602: if (server_name[0] == 0)
603: connection = NULL;
604:
605: else if (!(key && !*key))
606: if ((connection=connect_to_server(server_name,atoi(service)))
607: == NULL) {
2.2 timbl 608: if (TRACE) fprintf (stderr,
2.1 timbl 609: "%sCan't open connection to %s via service %s.\n",
610: error_header, server_name, service);
611: free(names);
2.2 timbl 612: return HTLoadError(sink, 500, "Can't open connection to WAIS server");
2.1 timbl 613: }
614:
2.6 ! timbl 615: StrAllocCopy(wais_database,www_database);
! 616: HTUnEscape(wais_database);
! 617:
2.2 timbl 618: /* This below fixed size stuff is terrible */
2.1 timbl 619: request_message = (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char));
620: response_message = (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char));
621:
622: /* If keyword search is performed but there are no keywords,
623: ** the user has followed a link to the index itself. It would be
624: ** appropriate at this point to send him the .SRC file - how?
625: */
626:
627: if (key && !*key) { /* I N D E X */
628:
2.2 timbl 629:
630: HTStructured * target = HTML_new(anAnchor, format_out, sink);
2.1 timbl 631:
2.2 timbl 632: START(HTML_ISINDEX);
633:
2.1 timbl 634: /* If we have seen a source file for this database, use that:
635: */
2.2 timbl 636:
637: #ifdef CACHING /* old code ... do it this way now? */
638:
639: char filename[256];
640: FILE * fp;
2.1 timbl 641: sprintf(filename, "%s%s:%s:%s.html",
642: WAIS_CACHE_ROOT,
643: server_name, service, www_database);
644:
645: fp = fopen(filename, "r"); /* Have we found this already? */
646: if (TRACE) fprintf(stderr,
647: "WAISGate: Description of server %s %s.\n",
648: filename,
649: fp ? "exists already" : "does NOT exist!");
2.2 timbl 650:
2.1 timbl 651: if (fp) {
652: char c;
2.2 timbl 653: while((c=getc(fp))!=EOF) PUT(c); /* Transfer file */
2.1 timbl 654: fclose(fp);
2.2 timbl 655: } else
656: #endif
657: {
658: START(HTML_TITLE);
659: PUTS(wais_database);
660: PUTS(" index");
661: END(HTML_TITLE);
662:
663: START(HTML_H1);
664: PUTS(wais_database);
665: END(HTML_H1);
666:
2.1 timbl 667: }
2.2 timbl 668: PUTS("Specify search words.");
2.1 timbl 669:
2.2 timbl 670: END_TARGET;
671: FREE_TARGET;
2.1 timbl 672:
673: } else if (key) { /* S E A R C H */
674: char *p;
2.2 timbl 675: HTStructured * target;
676:
2.1 timbl 677: strncpy(keywords, key, MAX_KEYWORDS_LENGTH);
678: while(p=strchr(keywords, '+')) *p = ' ';
679:
680: /* Send advance title to get something fast to the other end */
681:
2.2 timbl 682: target = HTML_new(anAnchor, format_out, sink);
683:
684: START(HTML_ISINDEX);
685: START(HTML_TITLE);
686: PUTS(keywords);
687: PUTS(" (in ");
688: PUTS(wais_database);
689: PUTS(")");
690: END(HTML_TITLE);
691:
692: START(HTML_H1);
693: PUTS(keywords);
694: END(HTML_H1);
2.1 timbl 695:
696: request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */
697: if (TRACE) fprintf(stderr, "WAISGate: Search for `%s' in `%s'\n",
698: keywords, wais_database);
699: if(NULL ==
700: generate_search_apdu(request_message + HEADER_LENGTH,
701: &request_buffer_length,
702: keywords, wais_database, NULL, MAXDOCS))
703: panic("request too large");
704:
705:
706: if(!interpret_message(request_message,
707: MAX_MESSAGE_LEN - request_buffer_length,
708: response_message,
709: MAX_MESSAGE_LEN,
710: connection,
711: false /* true verbose */
712: )) {
713: panic("returned message too large");
714:
715: } else { /* returned message ok */
716:
717: SearchResponseAPDU *query_response = 0;
718: readSearchResponseAPDU(&query_response,
719: response_message + HEADER_LENGTH);
2.2 timbl 720: display_search_response(target,
721: query_response, wais_database, keywords);
2.1 timbl 722: if (query_response->DatabaseDiagnosticRecords)
723: freeWAISSearchResponse(
724: query_response->DatabaseDiagnosticRecords);
725: freeSearchResponseAPDU( query_response);
726: } /* returned message not too large */
727:
2.2 timbl 728: END_TARGET;
729: FREE_TARGET;
730:
2.1 timbl 731: } else { /* D O C U M E N T F E T C H */
732:
2.2 timbl 733: HTFormat format_in;
734: HTStream * target;
2.1 timbl 735: long count;
736: any doc_chunk;
737: any * docid = &doc_chunk;
738: if (TRACE) printf(
2.2 timbl 739: "WAISGate: Retrieve document id `%s' type `%s' length %ld\n",
2.1 timbl 740: docname, doctype, document_length);
2.2 timbl 741:
742: format_in =
743: !strcmp(doctype, "WSRC") ? HTAtom_for("application/x-wais-source") :
744: !strcmp(doctype, "TEXT") ? HTAtom_for("text/plain") :
745: !strcmp(doctype, "GIF") ? HTAtom_for("image/gif") :
746: HTAtom_for("text/plain");
747:
748: target = HTStreamStack(format_in, format_out, sink, anAnchor);
749: if (!target) return HTLoadError(sink, 500,
750: "Can't convert format of WAIS document");
2.1 timbl 751: /* Decode hex or litteral format for document ID
752: */
753: WAIS_from_WWW(docid, docname);
754:
2.2 timbl 755:
2.1 timbl 756: /* Loop over slices of the document
757: */
758: for(count = 0;
759: count * CHARS_PER_PAGE < document_length;
760: count++){
761: char *type = s_strdup(doctype); /* Gets freed I guess */
762: request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */
2.2 timbl 763: if (TRACE) fprintf(stderr, "HTWAIS: Slice number %ld\n", count);
2.1 timbl 764: if(0 ==
765: generate_retrieval_apdu(request_message + HEADER_LENGTH,
766: &request_buffer_length,
767: docid,
768: CT_byte,
769: count * CHARS_PER_PAGE,
770: MIN((count + 1) * CHARS_PER_PAGE,document_length),
771: type,
772: wais_database
773: ))
774: panic("request too long");
2.2 timbl 775:
776: /* Actually do the transaction given by request_message */
2.1 timbl 777: if(0 ==
778: interpret_message(request_message,
779: MAX_MESSAGE_LEN - request_buffer_length,
780: response_message,
781: MAX_MESSAGE_LEN,
782: connection,
783: false /* true verbose */
784: ))
785: panic("Returned message too large");
786:
2.2 timbl 787: /* Parse the result which came back into memory.
788: */
2.1 timbl 789: readSearchResponseAPDU(&retrieval_response,
790: response_message + HEADER_LENGTH);
791:
792: if(NULL == ((WAISSearchResponse *)
793: retrieval_response->DatabaseDiagnosticRecords)->Text){
2.2 timbl 794: /* display_search_response(target, retrieval_response,
795: wais_database, keywords); */
796: PUTS("No text was returned!\n");
2.1 timbl 797: /* panic("No text was returned"); */
798: } else {
799:
2.2 timbl 800: output_text_record(target,
801: ((WAISSearchResponse *)
2.1 timbl 802: retrieval_response->DatabaseDiagnosticRecords)->Text[0],
803: false);
804:
805: } /* If text existed */
806:
807: } /* Loop over slices */
808:
2.2 timbl 809: (*target->isa->end_document)(target);
810: (*target->isa->free)(target);
2.1 timbl 811:
812: free (docid->bytes);
813:
814: freeWAISSearchResponse( retrieval_response->DatabaseDiagnosticRecords);
815: freeSearchResponseAPDU( retrieval_response);
816:
817: } /* If document rather than search */
818:
2.2 timbl 819:
820:
2.1 timbl 821:
2.2 timbl 822: /* (This postponed until later, after a timeout:)
2.1 timbl 823: */
824: if (connection) close_connection(connection);
825: if (wais_database) free(wais_database);
826: s_free(request_message);
827: s_free(response_message);
828:
829: free(names);
2.2 timbl 830: return HT_LOADED;
2.1 timbl 831: }
832:
2.2 timbl 833: PUBLIC HTProtocol HTWAIS = { "wais", HTLoadWAIS, NULL };
2.1 timbl 834:
835:
Webmaster