Annotation of libwww/Library/src/HTWAIS.c, revision 2.6

2.1       timbl       1: /*     WorldWideWeb - Wide Area Informaion Server Access       HTWAIS.c
                      2: **     ==================================================
                      3: **
                      4: **     This module allows a WWW server or client to read data from a
                      5: **     remote  WAIS
                      6: **  server, and provide that data to a WWW client in hypertext form.
                      7: **  Source files, once retrieved, are stored and used to provide
                      8: **  information about the index when that is acessed.
                      9: **
                     10: ** Authors
                     11: **     BK      Brewster Kahle, Thinking Machines, <Brewster@think.com>
                     12: **     TBL     Tim Berners-Lee, CERN <timbl@info.cern.ch>
                     13: **
                     14: ** History
                     15: **        Sep 91       TBL adapted shell-ui.c (BK) with HTRetrieve.c from WWW.
                     16: **        Feb 91       TBL Generated HTML cleaned up a bit (quotes, escaping)
                     17: **                         Refers to lists of sources. 
2.2       timbl      18: **        Mar 93       TBL   Lib 2.0 compatible module made.   
2.1       timbl      19: **
                     20: ** Bugs
                     21: **     Uses C stream i/o to read and write sockets, which won't work
                     22: **     on VMS TCP systems.
                     23: **
                     24: **     Should cache connections.
                     25: **
                     26: **     ANSI C only as written
                     27: **
                     28: ** WAIS comments:
                     29: **
                     30: **     1.      Separate directories for different system's .o would help
                     31: **     2.      Document ids are rather long!
                     32: **
                     33: ** WWW Address mapping convention:
                     34: **
                     35: **     /servername/database/type/length/document-id
                     36: **
                     37: **     /servername/database?word+word+word
                     38: */
                     39: /* WIDE AREA INFORMATION SERVER SOFTWARE:
                     40:    No guarantees or restrictions.  See the readme file for the full standard
                     41:    disclaimer.
                     42: 
                     43:    Brewster@think.com
                     44: */
                     45: 
                     46: 
                     47: #define DIRECTORY "/quake.think.com:210/directory-of-servers"
                     48: 
                     49: #define BIG 1024       /* identifier size limit  @@@@@ */
                     50: 
2.2       timbl      51: /*                     From WAIS
                     52: **                     ---------
2.1       timbl      53: */
                     54: 
                     55: #include <ui.h>
                     56: 
                     57: #define MAX_MESSAGE_LEN 100000
                     58: #define CHARS_PER_PAGE 10000 /* number of chars retrieved in each request */
                     59: 
                     60: #define WAISSEARCH_DATE "Fri Jul 19 1991"
                     61: 
                     62: 
2.2       timbl      63: /*                     FROM WWW
                     64: **                     --------
2.1       timbl      65: */
                     66: #define BUFFER_SIZE 4096       /* Arbitrary size for efficiency */
                     67: 
2.2       timbl      68: #define HEX_ESCAPE '%'
                     69: 
2.1       timbl      70: #include "HTUtils.h"
                     71: #include "tcp.h"
                     72: #include "HTParse.h"
2.2       timbl      73: #include "HTAccess.h"          /* We implement a protocol */
                     74: #include "HTML.h"              /* The object we will generate */
                     75:  
                     76: /* #include "ParseWSRC.h" */
2.1       timbl      77: 
                     78: extern int WWW_TraceFlag;      /* Control diagnostic output */
                     79: extern FILE * logfile;         /* Log file output */
                     80: 
                     81: PRIVATE BOOL   as_gate;        /* Client is using us as gateway */
                     82: 
                     83: PRIVATE char   line[2048];     /* For building strings to display */
                     84:                                /* Must be able to take id */
2.2       timbl      85: 
                     86: 
                     87: #include "HTParse.h"
                     88: #include "HTFormat.h"
                     89: #include "HTTCP.h"
2.6     ! timbl      90: /* #include "HTWSRC.h" */      /* Need some bits from here */
2.2       timbl      91: 
                     92: /*             Hypertext object building machinery
                     93: */
                     94: #include "HTML.h"
                     95: 
                     96: #define PUTC(c) (*target->isa->put_character)(target, c)
                     97: #define PUTS(s) (*target->isa->put_string)(target, s)
                     98: #define START(e) (*target->isa->start_element)(target, e, 0, 0)
                     99: #define END(e) (*target->isa->end_element)(target, e)
                    100: #define END_TARGET (*target->isa->end_document)(target)
                    101: #define FREE_TARGET (*target->isa->free)(target)
                    102: 
                    103: struct _HTStructured {
                    104:        CONST HTStructuredClass *       isa;
                    105:        /* ... */
                    106: };
                    107: 
                    108: struct _HTStream {
                    109:        CONST HTStreamClass *   isa;
                    110:        /* ... */
                    111: };
                    112: 
                    113: 
2.1       timbl     114: /*                                                             showDiags
                    115: */
                    116: /* modified from Jonny G's version in ui/question.c */
                    117: 
2.2       timbl     118: void showDiags ARGS2(
                    119:        HTStream *,             target,
                    120:        diagnosticRecord **,    d)
2.1       timbl     121: {
                    122:   long i;
                    123: 
                    124:   for (i = 0; d[i] != NULL; i++) {
                    125:     if (d[i]->ADDINFO != NULL) {
                    126:       PUTS("Diagnostic code is ");
                    127:       PUTS(d[i]->DIAG);
                    128:       PUTC(' ');
                    129:       PUTS(d[i]->ADDINFO);
                    130:       PUTC('\n'); ;
                    131:     }
                    132:   }
                    133: }
                    134: 
                    135: /*     Matrix of allowed characters in filenames
                    136: **     -----------------------------------------
                    137: */
                    138: 
                    139: PRIVATE BOOL acceptable[256];
                    140: PRIVATE BOOL acceptable_inited = NO;
                    141: 
                    142: PRIVATE void init_acceptable NOARGS
                    143: {
                    144:     unsigned int i;
                    145:     char * good = 
                    146:       "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$";
                    147:     for(i=0; i<256; i++) acceptable[i] = NO;
                    148:     for(;*good; good++) acceptable[(unsigned int)*good] = YES;
                    149:     acceptable_inited = YES;
                    150: }
                    151: 
                    152: /*     Transform file identifier into WWW address
                    153: **     ------------------------------------------
                    154: **
                    155: **
                    156: ** On exit,
                    157: **     returns         nil if error
                    158: **                     pointer to malloced string (must be freed) if ok
                    159: */
                    160: char * WWW_from_archie ARGS1 (char *, file)
                    161: {
                    162:     char * end;
                    163:     char * result;
                    164:     char * colon;
                    165:     for(end=file; *end > ' '; end++);  /* assumes ASCII encoding*/
                    166:     result = (char *)malloc(10 + (end-file));
                    167:     if (!result) return result;                /* Malloc error */
                    168:     strcpy(result, "file://");
                    169:     strncat(result, file, end-file);
                    170:     colon = strchr(result+7, ':');     /* Expect colon after host */
                    171:     if (colon) {
                    172:        for(; colon[0]; colon[0]=colon[1], colon++);    /* move down */
                    173:     }
                    174:     return result;
                    175: } /* WWW_from_archie */
                    176: 
2.2       timbl     177: /*     Transform document identifier into URL
                    178: **     --------------------------------------
2.1       timbl     179: **
                    180: ** Bugs: A static buffer of finite size is used!
                    181: **     The format of the docid MUST be good!
                    182: **
                    183: ** On exit,
                    184: **     returns         nil if error
                    185: **                     pointer to malloced string (must be freed) if ok
                    186: */
2.2       timbl     187: PRIVATE char hex [16] = "0123456789ABCDEF";
                    188: extern char from_hex PARAMS((char a));                 /* In HTWSRC @@ */
                    189: 
                    190: PRIVATE char * WWW_from_WAIS ARGS1(any *, docid)
                    191: 
2.1       timbl     192: {
                    193:     static char buf[BIG];
                    194:     char * q = buf;
                    195:     char * p = (docid->bytes);
                    196:     int i, l;
                    197:     if (TRACE) {
                    198:        char *p;
2.2       timbl     199:        fprintf(stderr, "WAIS id (%d bytes) is ", (int)docid->size);
2.1       timbl     200:        for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
                    201:            if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
                    202:                fprintf(stderr, "%c", *p);
                    203:            else
2.2       timbl     204:                fprintf(stderr, "<%x>", (unsigned)*p);
2.1       timbl     205:        }
                    206:        fprintf(stderr, "\n");
                    207:     }   
                    208:     for (p=docid->bytes; (p<docid->bytes+docid->size) && (q<&buf[BIG]);) {
                    209:        if (TRACE) fprintf(stderr, "    Record type %d, length %d\n",
                    210:                p[0], p[1]);
                    211:         if (*p>10) {
                    212:            fprintf(stderr, "Eh? DOCID record type of %d!\n", *p);
                    213:            return 0;
                    214:        }
2.2       timbl     215:        {       /* Bug fix -- allow any byte value 15 Apr 93 */
                    216:            unsigned int i = (unsigned) *p++;
                    217:            
                    218:            if (i > 99) {
                    219:                *q++ = (i/100) + '0';
                    220:                i = i % 100;
                    221:            }
                    222:            if (i > 9) {
                    223:                *q++ = (i/10) + '0';
                    224:                i = i % 10;
                    225:            }
                    226:            *q++ = i + '0';     /* Record type */
                    227:        }
2.1       timbl     228:        *q++ = '=';             /* Separate */
                    229:        l = *p++;               /* Length */
                    230:        for(i=0; i<l; i++, p++){
                    231:            if (!acceptable[*p]) {
                    232:                *q++ = HEX_ESCAPE;      /* Means hex commming */
                    233:                *q++ = hex[(*p) >> 4];
                    234:                *q++ = hex[(*p) & 15];
                    235:            }
                    236:            else *q++ = *p;
                    237:        }
                    238:        *q++= ';';              /* Terminate field */
                    239:     }
                    240:     *q++ = 0;                  /* Terminate string */
                    241:     if (TRACE) fprintf(stderr, "WWW form of id: %s\n", buf); 
                    242:     {
                    243:         char * result = (char *)malloc(strlen(buf)+1);
                    244:        strcpy(result, buf);
                    245:        return result;
                    246:     }
                    247: } /* WWW_from_WAIS */
                    248: 
                    249: 
2.2       timbl     250: /*     Transform URL into WAIS document identifier
                    251: **     -------------------------------------------
2.1       timbl     252: **
                    253: ** On entry,
                    254: **     docname         points to valid name produced originally by
                    255: **                     WWW_from_WAIS
                    256: ** On exit,
                    257: **     docid->size     is valid
                    258: **     docid->bytes    is malloced and must later be freed.
                    259: */
                    260: PRIVATE any * WAIS_from_WWW ARGS2 (any *, docid, char *, docname)
                    261: {
                    262:     char *z;   /* Output pointer */
                    263:     char *sor; /* Start of record - points to size field. */
                    264:     char *p;   /* Input pointer */
                    265:     char *q;   /* Poisition of "=" */
                    266:     char *s;   /* Position of semicolon */
                    267:     int n;     /* size */
                    268:     if (TRACE) fprintf(stderr, "WWW id (to become WAIS id): %s\n", docname); 
                    269:     for(n=0, p = docname; *p; p++) {   /* Count sizes of strings */
                    270:         n++;
                    271:        if (*p == ';')  n--;            /* Not converted */
                    272:        else if (*p == HEX_ESCAPE) n=n-2;       /* Save two bytes */
                    273:         docid->size = n;
                    274:     }
                    275:     
                    276:     docid->bytes = (char *) malloc(docid->size); /* result record */
                    277:     z = docid->bytes;
                    278:     
                    279:     for(p = docname; *p; ) {   /* Convert of strings */
2.2       timbl     280:                                /* Record type */
                    281:                                
                    282:        *z = 0;                 /* Initialize record type */
                    283:        while (*p >= '0' && *p <= '9') {
                    284:            *z = *z*10 + (*p++ - '0');  /* Decode decimal record type */
                    285:        }
                    286:        z++;
                    287:        if (*p != '=') return 0;
                    288:        q = p;
                    289:        
                    290: /*        *z++ = *p++ - '0';
2.1       timbl     291:        q = strchr(p , '=');
                    292:        if (!q) return 0;
2.2       timbl     293: */
2.1       timbl     294:        s = strchr(q, ';');     /* (Check only) */
                    295:        if (!s) return 0;       /* Bad! No ';'; */
                    296:         sor = z;               /* Remember where the size field was */
                    297:        z++;                    /* Skip record size for now     */
                    298:        for(p=q+1; *p!=';' ; ) {
                    299:           if (*p == HEX_ESCAPE) {
                    300:                char c;
                    301:                unsigned int b;
                    302:                p++;
                    303:                c = *p++;
                    304:                b =   from_hex(c);
                    305:                c = *p++;
                    306:                if (!c) break;  /* Odd number of chars! */
                    307:                *z++ = (b<<4) + from_hex(c);
                    308:            } else {
                    309:                *z++ = *p++;    /* Record */
                    310:            }
                    311:        }
                    312:        *sor = (z-sor-1);       /* Fill in size -- not counting size itself */
                    313:        p++;                    /* After semicolon: start of next record */
                    314:     }
                    315:     
                    316:     if (TRACE) {
                    317:        char *p;
2.2       timbl     318:        fprintf(stderr, "WAIS id (%d bytes) is ", (int)docid->size);
2.1       timbl     319:        for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
                    320:            if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
                    321:                fprintf(stderr, "%c", *p);
                    322:            else
2.2       timbl     323:                fprintf(stderr, "<%x>", (unsigned)*p);
2.1       timbl     324:        }
                    325:        fprintf(stderr, "\n");
                    326:     }   
                    327:     return docid;              /* Ok */
                    328:     
                    329: } /* WAIS_from_WWW */
                    330: 
                    331: 
                    332: /*     Send a plain text record to the client          output_text_record()
                    333: **     --------------------------------------
                    334: */
2.2       timbl     335: 
2.1       timbl     336: PRIVATE void output_text_record ARGS3(
2.2       timbl     337:     HTStream *,                        target,
                    338:     WAISDocumentText *,                record,
                    339:     boolean,                   quote_string_quotes)
2.1       timbl     340: {
                    341:   long count;
                    342:   /* printf(" Text\n");
                    343:      print_any("     DocumentID:  ", record->DocumentID);
                    344:      printf("     VersionNumber:  %d\n", record->VersionNumber);
                    345:      */
                    346:   for(count = 0; count < record->DocumentText->size; count++){
                    347:     long ch = (unsigned char)record->DocumentText->bytes[count];
2.2       timbl     348:     if (ch == 27) {    /* What is this in for? Tim */
2.1       timbl     349: 
                    350:            /* then we have an escape code */
                    351:            /* if the next letter is '(' or ')', then ignore two letters */
                    352:            if('(' == record->DocumentText->bytes[count + 1] ||
                    353:                ')' == record->DocumentText->bytes[count + 1])
                    354:            count += 1;             /* it is a term marker */
                    355:            else count += 4;            /* it is a paragraph marker */
                    356:     } else if (ch == '\n' || ch == '\r') {
2.2       timbl     357:            PUTC('\n');
2.1       timbl     358:     } else if ((ch=='\t') || isprint(ch)){
2.2       timbl     359:            PUTC(ch);
2.1       timbl     360:     } 
                    361:   }
                    362: } /* output text record */
                    363: 
                    364: 
2.2       timbl     365: 
2.1       timbl     366: /*     Format A Search response for the client         display_search_response
                    367: **     ---------------------------------------
                    368: */
                    369: /* modified from tracy shen's version in wutil.c
                    370:  * displays either a text record or a set of headlines.
                    371:  */
                    372: void
2.2       timbl     373: display_search_response ARGS4(
                    374:     HTStructured *,            target,
2.1       timbl     375:     SearchResponseAPDU *,      response,
                    376:     char *,                    database,
                    377:     char *,                    keywords)
                    378: {
                    379:   WAISSearchResponse  *info;
                    380:   long i, k;
                    381:   
                    382:   BOOL archie =  strstr(database, "archie")!=0;        /* Specical handling */
                    383:   
                    384:   if (TRACE) fprintf(stderr, "WAISGate: Displaying search response\n");
                    385:   sprintf(line,
                    386:        "Index %s contains the following %d item%s relevant to '%s'.\n",
                    387:         database,
2.2       timbl     388:         (int)(response->NumberOfRecordsReturned),
2.1       timbl     389:         response->NumberOfRecordsReturned ==1 ? "" : "s",
                    390:         keywords);
2.2       timbl     391: 
                    392:   PUTS(line);
                    393:   PUTS("The first figure for each entry is its relative score, ");
                    394:   PUTS("the second the number of lines in the item.");
                    395:   START(HTML_MENU);
                    396: 
2.1       timbl     397:   if ( response->DatabaseDiagnosticRecords != 0 ) {
                    398:     info = (WAISSearchResponse *)response->DatabaseDiagnosticRecords;
                    399:     i =0; 
                    400: 
                    401:     if (info->Diagnostics != NULL)
2.2       timbl     402:       showDiags((HTStream*)target, info->Diagnostics);
2.1       timbl     403: 
                    404:     if ( info->DocHeaders != 0 ) {
                    405:       for (k=0; info->DocHeaders[k] != 0; k++ ) {
                    406:        WAISDocumentHeader* head = info->DocHeaders[k];
                    407:        char * headline = trim_junk(head->Headline);
                    408:        any * docid = head->DocumentID;
                    409:        char * docname;                 /* printable version of docid */
                    410:        i++;
                    411: 
                    412: /*     Make a printable string out of the document id.
                    413: */
                    414:        if (TRACE) fprintf(stderr, 
2.2       timbl     415:                "WAISGate:  %2ld: Score: %4ld, lines:%4ld '%s'\n", 
2.1       timbl     416:               i,
2.2       timbl     417:               (long int)(info->DocHeaders[k]->Score),
                    418:               (long int)(info->DocHeaders[k]->Lines),
2.1       timbl     419:               headline);
                    420: 
2.2       timbl     421:        START(HTML_LI);
                    422:        sprintf(line, "%4ld  %4ld  ",
                    423:            head->Score,
                    424:            head->Lines);
                    425:        PUTS( line);
                    426: 
2.1       timbl     427:        if (archie) {
                    428:            char * www_name = WWW_from_archie(headline);
                    429:            if (www_name) {
2.2       timbl     430:                HTStartAnchor(target, NULL, www_name);
2.1       timbl     431:                PUTS(headline);
2.2       timbl     432:                
                    433:                END(HTML_A);
2.1       timbl     434:                free(www_name);
                    435:            } else {
2.2       timbl     436:                 PUTS(headline);
                    437:                 PUTS(" (bad file name)");
2.1       timbl     438:            }
                    439:        } else { /* Not archie */
                    440:            docname =  WWW_from_WAIS(docid);
                    441:            if (docname) {
2.6     ! timbl     442:                char * dbname = HTEscape(database, URL_XPALPHAS);
2.1       timbl     443:                sprintf(line, "%s/%s/%d/%s",            /* W3 address */
                    444:                                    dbname,
                    445:                    head->Types ? head->Types[0] : "TEXT",
2.2       timbl     446:                    (int)(head->DocumentLength),
2.1       timbl     447:                    docname);
2.2       timbl     448:                HTStartAnchor(target, NULL, line);
                    449:                PUTS(headline);
                    450:                END(HTML_A);
2.1       timbl     451:                free(dbname);
                    452:                free(docname);
                    453:            } else {
2.2       timbl     454:                 PUTS("(bad doc id)");
2.1       timbl     455:            }
                    456:          }
                    457:       } /* next document header */
                    458:     } /* if there were any document headers */
                    459:     
                    460:     if ( info->ShortHeaders != 0 ) {
                    461:       k =0;
                    462:       while (info->ShortHeaders[k] != 0 ) {
                    463:        i++;
2.2       timbl     464:        PUTS( "(Short Header record, can't display)");
2.1       timbl     465:       }
                    466:     }
                    467:     if ( info->LongHeaders != 0 ) {
                    468:       k =0;
                    469:       while (info->LongHeaders[k] != 0) {
                    470:        i++;
                    471:        PUTS( "\nLong Header record, can't display\n");
                    472:       }
                    473:     }
                    474:     if ( info->Text != 0 ) {
                    475:       k =0;
                    476:       while (info->Text[k] != 0) {
                    477:        i++;
                    478:        PUTS( "\nText record\n");
2.2       timbl     479:        output_text_record((HTStream*)target, info->Text[k++], false);
2.1       timbl     480:       }
                    481:     }
                    482:     if ( info->Headlines != 0 ) {
                    483:       k =0;
                    484:       while (info->Headlines[k] != 0) {
                    485:        i++;
                    486:        PUTS( "\nHeadline record, can't display\n");
                    487:        /* dsply_headline_record( info->Headlines[k++]); */
                    488:       }
                    489:     }
                    490:     if ( info->Codes != 0 ) {
                    491:       k =0;
                    492:       while (info->Codes[k] != 0) {
                    493:        i++;
                    494:        PUTS( "\nCode record, can't display\n");
                    495:        /* dsply_code_record( info->Codes[k++]); */
                    496:       }
                    497:     }
                    498:   }                            /* Loop: display user info */
2.2       timbl     499:   END(HTML_MENU);
2.1       timbl     500:   PUTC('\n'); ;
                    501: }
                    502: 
                    503: 
                    504: 
2.2       timbl     505: 
                    506: /*             Load by name                                    HTLoadWAIS
                    507: **             ============
                    508: **
                    509: **     This renders any object or search as required
2.1       timbl     510: */
2.2       timbl     511: PUBLIC int HTLoadWAIS ARGS4(
                    512:        CONST char *,           arg,
                    513:        HTParentAnchor *,       anAnchor,
                    514:        HTFormat,               format_out,
                    515:        HTStream*,              sink)
2.1       timbl     516: 
                    517: #define MAX_KEYWORDS_LENGTH 1000
                    518: #define MAX_SERVER_LENGTH 1000
                    519: #define MAX_DATABASE_LENGTH 1000
                    520: #define MAX_SERVICE_LENGTH 1000
                    521: #define MAXDOCS 40
                    522: 
                    523: {
                    524:     static CONST char * error_header =
                    525: "<h1>Access error</h1>\nThe WWW-WAIS gateway reports the following error:<P>\n";
2.2       timbl     526:     char * key;                          /* pointer to keywords in URL */
2.1       timbl     527:     char* request_message = NULL; /* arbitrary message limit */
                    528:     char* response_message = NULL; /* arbitrary message limit */
                    529:     long request_buffer_length;        /* how of the request is left */
                    530:     SearchResponseAPDU  *retrieval_response = 0;
                    531:     char keywords[MAX_KEYWORDS_LENGTH + 1];
                    532:     char *server_name; 
2.6     ! timbl     533:     char *wais_database = NULL;                /* name of current database */
        !           534:     char *www_database;                        /* Same name escaped */
2.1       timbl     535:     char *service;
                    536:     char *doctype;
                    537:     char *doclength;
                    538:     long document_length;
                    539:     char *docname;
                    540:     FILE *connection = 0;
                    541:     char * names;              /* Copy of arg to be hacked up */
                    542:     BOOL ok = NO;
                    543:     
                    544:     extern FILE * connect_to_server();
                    545:     
                    546:     if (!acceptable_inited) init_acceptable();
                    547:     
                    548:         
                    549: /*     Decipher and check syntax of WWW address:
                    550: **     ----------------------------------------
                    551: **
                    552: **     First we remove the "wais:" if it was spcified.  920110
                    553: */  
                    554:     names = HTParse(arg, "", PARSE_HOST | PARSE_PATH | PARSE_PUNCTUATION);
2.2       timbl     555:     key = strchr(names, '?');
                    556:     
                    557:     if (key) {
                    558:        char * p;
                    559:        *key++ = 0;     /* Split off keywords */
                    560:        for (p=key; *p; p++) if (*p == '+') *p = ' ';
                    561:        HTUnEscape(key);
                    562:     }
2.1       timbl     563:     if (names[0]== '/') {
                    564:        server_name = names+1;
                    565:        if (as_gate =(*server_name == '/'))
                    566:            server_name++;      /* Accept one or two */
                    567:        www_database = strchr(server_name,'/');
                    568:        if (www_database) {
                    569:            *www_database++ = 0;                /* Separate database name */
                    570:            doctype = strchr(www_database, '/');
                    571:            if (key) ok = YES;  /* Don't need doc details */
                    572:            else if (doctype) { /* If not search parse doc details */
                    573:                *doctype++ = 0; /* Separate rest of doc address */
                    574:                doclength = strchr(doctype, '/');
                    575:                if(doclength) {
                    576:                    *doclength++ = 0;
                    577:                    document_length = atol(doclength);
                    578:                    if (document_length) {
                    579:                        docname=strchr(doclength, '/');
                    580:                        if (docname) {
                    581:                            *docname++ = 0;
                    582:                            ok = YES;   /* To avoid a goto! */
                    583:                        } /* if docname */
                    584:                    } /* if document_length valid */
                    585:                } /* if doclength */
                    586:            } else { /* no doctype?  Assume index required */
                    587:                if (!key) key = "";
                    588:                ok = YES;
                    589:            } /* if doctype */
                    590:        } /* if database */
                    591:      }
                    592:      
2.2       timbl     593:      if (!ok)
                    594:         return HTLoadError(sink, 500, "Syntax error in WAIS URL");
                    595: 
2.1       timbl     596:      if (TRACE) fprintf(stderr, "WAISGate: Parsed OK\n");
                    597:      
                    598:      service = strchr(names, ':');
                    599:      if (service)  *service++ = 0;
                    600:      else service = "210";
                    601:      
                    602:      if (server_name[0] == 0)
                    603:         connection = NULL;
                    604: 
                    605:      else if (!(key && !*key))
                    606:       if ((connection=connect_to_server(server_name,atoi(service)))
                    607:         == NULL)  {
2.2       timbl     608:         if (TRACE) fprintf (stderr,
2.1       timbl     609:             "%sCan't open connection to %s via service %s.\n",
                    610:             error_header, server_name, service);
                    611:         free(names);
2.2       timbl     612:         return HTLoadError(sink, 500, "Can't open connection to WAIS server");
2.1       timbl     613:     }
                    614: 
2.6     ! timbl     615:     StrAllocCopy(wais_database,www_database);
        !           616:     HTUnEscape(wais_database);
        !           617:     
2.2       timbl     618:        /* This below fixed size stuff is terrible */
2.1       timbl     619:     request_message = (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char));
                    620:     response_message = (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char));
                    621: 
                    622: /*     If keyword search is performed but there are no keywords,
                    623: **     the user has followed a link to the index itself. It would be
                    624: **     appropriate at this point to send him the .SRC file - how?
                    625: */
                    626: 
                    627:     if (key && !*key) {                                /* I N D E X */
                    628:     
2.2       timbl     629:        
                    630:        HTStructured * target = HTML_new(anAnchor, format_out, sink);
2.1       timbl     631:        
2.2       timbl     632:        START(HTML_ISINDEX);
                    633: 
2.1       timbl     634:        /* If we have seen a source file for this database, use that:
                    635:        */
2.2       timbl     636: 
                    637: #ifdef CACHING                 /* old code ... do it this way now? */
                    638: 
                    639:        char filename[256];
                    640:        FILE * fp;
2.1       timbl     641:        sprintf(filename, "%s%s:%s:%s.html",
                    642:                WAIS_CACHE_ROOT,
                    643:                server_name, service, www_database);
                    644: 
                    645:        fp = fopen(filename, "r");      /* Have we found this already? */
                    646:        if (TRACE) fprintf(stderr,
                    647:                "WAISGate: Description of server %s %s.\n",
                    648:                filename,
                    649:                fp ? "exists already" : "does NOT exist!");
2.2       timbl     650: 
2.1       timbl     651:        if (fp) {
                    652:            char c;
2.2       timbl     653:            while((c=getc(fp))!=EOF) PUT(c);    /* Transfer file */
2.1       timbl     654:            fclose(fp);
2.2       timbl     655:        } else
                    656: #endif
                    657:        {
                    658:            START(HTML_TITLE);
                    659:            PUTS(wais_database);
                    660:            PUTS(" index");
                    661:            END(HTML_TITLE);
                    662:            
                    663:            START(HTML_H1);
                    664:            PUTS(wais_database);
                    665:            END(HTML_H1);
                    666:            
2.1       timbl     667:        }
2.2       timbl     668:        PUTS("Specify search words.");
2.1       timbl     669:        
2.2       timbl     670:        END_TARGET;
                    671:        FREE_TARGET;
2.1       timbl     672:        
                    673:     } else if (key) {                                  /* S E A R C H */
                    674:        char *p;
2.2       timbl     675:        HTStructured * target;
                    676:        
2.1       timbl     677:        strncpy(keywords, key, MAX_KEYWORDS_LENGTH);
                    678:        while(p=strchr(keywords, '+')) *p = ' ';
                    679:     
                    680:         /* Send advance title to get something fast to the other end */
                    681:        
2.2       timbl     682:        target = HTML_new(anAnchor, format_out, sink);
                    683:        
                    684:        START(HTML_ISINDEX);
                    685:        START(HTML_TITLE);
                    686:        PUTS(keywords);
                    687:        PUTS(" (in ");
                    688:        PUTS(wais_database);
                    689:        PUTS(")");
                    690:        END(HTML_TITLE);
                    691:        
                    692:        START(HTML_H1);
                    693:        PUTS(keywords);
                    694:        END(HTML_H1);
2.1       timbl     695: 
                    696:        request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */
                    697:        if (TRACE) fprintf(stderr, "WAISGate: Search for `%s' in `%s'\n",
                    698:                keywords, wais_database);
                    699:        if(NULL ==
                    700:        generate_search_apdu(request_message + HEADER_LENGTH, 
                    701:                                &request_buffer_length, 
                    702:                                keywords, wais_database, NULL, MAXDOCS))
                    703:        panic("request too large");
                    704:        
                    705: 
                    706:        if(!interpret_message(request_message, 
                    707:                                MAX_MESSAGE_LEN - request_buffer_length, 
                    708:                                response_message,
                    709:                                MAX_MESSAGE_LEN,
                    710:                                connection,
                    711:                                false   /* true verbose */
                    712:                                )) {
                    713:            panic("returned message too large");
                    714:     
                    715:         } else {       /* returned message ok */
                    716:        
                    717:            SearchResponseAPDU  *query_response = 0;
                    718:            readSearchResponseAPDU(&query_response,
                    719:                response_message + HEADER_LENGTH);
2.2       timbl     720:            display_search_response(target, 
                    721:                query_response, wais_database, keywords);
2.1       timbl     722:            if (query_response->DatabaseDiagnosticRecords)
                    723:                freeWAISSearchResponse(
                    724:                        query_response->DatabaseDiagnosticRecords);         
                    725:            freeSearchResponseAPDU( query_response);
                    726:        }       /* returned message not too large */
                    727:     
2.2       timbl     728:        END_TARGET;
                    729:        FREE_TARGET;
                    730: 
2.1       timbl     731:     } else {                   /* D O C U M E N T    F E T C H */
                    732:     
2.2       timbl     733:        HTFormat format_in;
                    734:        HTStream * target;
2.1       timbl     735:        long count;
                    736:        any   doc_chunk;
                    737:        any * docid = &doc_chunk;
                    738:        if (TRACE) printf(
2.2       timbl     739:                "WAISGate: Retrieve document id `%s' type `%s' length %ld\n",
2.1       timbl     740:                docname, doctype, document_length);
2.2       timbl     741:                
                    742:        format_in = 
                    743:          !strcmp(doctype, "WSRC") ? HTAtom_for("application/x-wais-source") :
                    744:          !strcmp(doctype, "TEXT") ? HTAtom_for("text/plain") :
                    745:          !strcmp(doctype, "GIF")  ? HTAtom_for("image/gif") :
                    746:                                     HTAtom_for("text/plain");
                    747: 
                    748:        target = HTStreamStack(format_in, format_out, sink, anAnchor);
                    749:        if (!target) return HTLoadError(sink, 500,
                    750:                "Can't convert format of WAIS document");
2.1       timbl     751: /*     Decode hex or litteral format for document ID
                    752: */     
                    753:        WAIS_from_WWW(docid, docname);
                    754: 
2.2       timbl     755:        
2.1       timbl     756: /*     Loop over slices of the document
                    757: */     
                    758:        for(count = 0; 
                    759:            count * CHARS_PER_PAGE < document_length;
                    760:            count++){
                    761:          char *type = s_strdup(doctype);       /* Gets freed I guess */
                    762:          request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */
2.2       timbl     763:          if (TRACE) fprintf(stderr, "HTWAIS: Slice number %ld\n", count);
2.1       timbl     764:          if(0 ==
                    765:              generate_retrieval_apdu(request_message + HEADER_LENGTH,
                    766:                    &request_buffer_length, 
                    767:                    docid, 
                    768:                    CT_byte,
                    769:                    count * CHARS_PER_PAGE,
                    770:                    MIN((count + 1) * CHARS_PER_PAGE,document_length),
                    771:                    type,
                    772:                    wais_database
                    773:                    ))
                    774:                panic("request too long");
2.2       timbl     775:          
                    776:          /*    Actually do the transaction given by request_message */   
2.1       timbl     777:          if(0 ==
                    778:             interpret_message(request_message, 
                    779:                               MAX_MESSAGE_LEN - request_buffer_length, 
                    780:                               response_message,
                    781:                               MAX_MESSAGE_LEN,
                    782:                               connection,
                    783:                               false /* true verbose */ 
                    784:                               ))
                    785:            panic("Returned message too large");
                    786: 
2.2       timbl     787:          /*    Parse the result which came back into memory.
                    788:          */
2.1       timbl     789:          readSearchResponseAPDU(&retrieval_response, 
                    790:                                 response_message + HEADER_LENGTH);
                    791: 
                    792:          if(NULL == ((WAISSearchResponse *)
                    793:                retrieval_response->DatabaseDiagnosticRecords)->Text){
2.2       timbl     794:                /* display_search_response(target, retrieval_response,
                    795:                                        wais_database, keywords); */
                    796:                PUTS("No text was returned!\n");
2.1       timbl     797:                /* panic("No text was returned"); */
                    798:          } else {
                    799:          
2.2       timbl     800:                output_text_record(target,
                    801:                   ((WAISSearchResponse *)
2.1       timbl     802:                    retrieval_response->DatabaseDiagnosticRecords)->Text[0],
                    803:                false);
                    804:          
                    805:          } /* If text existed */
                    806:          
                    807:        }       /* Loop over slices */
                    808: 
2.2       timbl     809:        (*target->isa->end_document)(target);
                    810:        (*target->isa->free)(target);
2.1       timbl     811: 
                    812:        free (docid->bytes);
                    813:        
                    814:        freeWAISSearchResponse( retrieval_response->DatabaseDiagnosticRecords); 
                    815:        freeSearchResponseAPDU( retrieval_response);
                    816: 
                    817:     } /* If document rather than search */
                    818: 
2.2       timbl     819: 
                    820: 
2.1       timbl     821: 
2.2       timbl     822: /*     (This postponed until later,  after a timeout:)
2.1       timbl     823: */
                    824:     if (connection) close_connection(connection);
                    825:     if (wais_database) free(wais_database);
                    826:     s_free(request_message);
                    827:     s_free(response_message);
                    828: 
                    829:     free(names);
2.2       timbl     830:     return HT_LOADED;
2.1       timbl     831: }
                    832: 
2.2       timbl     833: PUBLIC HTProtocol HTWAIS = { "wais", HTLoadWAIS, NULL };
2.1       timbl     834: 
                    835: 

Webmaster