Annotation of libwww/Library/src/HTWAIS.c, revision 2.13

2.1       timbl       1: /*     WorldWideWeb - Wide Area Informaion Server Access       HTWAIS.c
                      2: **     ==================================================
                      3: **
                      4: **     This module allows a WWW server or client to read data from a
                      5: **     remote  WAIS
                      6: **  server, and provide that data to a WWW client in hypertext form.
                      7: **  Source files, once retrieved, are stored and used to provide
                      8: **  information about the index when that is acessed.
                      9: **
                     10: ** Authors
                     11: **     BK      Brewster Kahle, Thinking Machines, <Brewster@think.com>
                     12: **     TBL     Tim Berners-Lee, CERN <timbl@info.cern.ch>
                     13: **
                     14: ** History
                     15: **        Sep 91       TBL adapted shell-ui.c (BK) with HTRetrieve.c from WWW.
                     16: **        Feb 91       TBL Generated HTML cleaned up a bit (quotes, escaping)
                     17: **                         Refers to lists of sources. 
2.2       timbl      18: **        Mar 93       TBL   Lib 2.0 compatible module made.   
2.1       timbl      19: **
                     20: ** Bugs
                     21: **     Uses C stream i/o to read and write sockets, which won't work
                     22: **     on VMS TCP systems.
                     23: **
                     24: **     Should cache connections.
                     25: **
                     26: **     ANSI C only as written
                     27: **
2.11      secret     28: ** Bugs fixed
                     29: **      NT Nathan Torkington (Nathan.Torkington@vuw.ac.nz)
                     30: **
2.1       timbl      31: ** WAIS comments:
                     32: **
                     33: **     1.      Separate directories for different system's .o would help
                     34: **     2.      Document ids are rather long!
                     35: **
                     36: ** WWW Address mapping convention:
                     37: **
                     38: **     /servername/database/type/length/document-id
                     39: **
                     40: **     /servername/database?word+word+word
                     41: */
                     42: /* WIDE AREA INFORMATION SERVER SOFTWARE:
                     43:    No guarantees or restrictions.  See the readme file for the full standard
                     44:    disclaimer.
                     45: 
                     46:    Brewster@think.com
                     47: */
                     48: 
                     49: 
2.8       timbl      50: #define DIRECTORY "/cnidr.org:210/directory-of-servers"
                     51: /* define DIRECTORY "/quake.think.com:210/directory-of-servers" */
2.1       timbl      52: 
                     53: #define BIG 1024       /* identifier size limit  @@@@@ */
                     54: 
2.2       timbl      55: /*                     From WAIS
                     56: **                     ---------
2.1       timbl      57: */
                     58: 
                     59: #include <ui.h>
                     60: 
                     61: #define MAX_MESSAGE_LEN 100000
                     62: #define CHARS_PER_PAGE 10000 /* number of chars retrieved in each request */
                     63: 
                     64: #define WAISSEARCH_DATE "Fri Jul 19 1991"
                     65: 
                     66: 
2.2       timbl      67: /*                     FROM WWW
                     68: **                     --------
2.1       timbl      69: */
                     70: #define BUFFER_SIZE 4096       /* Arbitrary size for efficiency */
                     71: 
2.2       timbl      72: #define HEX_ESCAPE '%'
                     73: 
2.1       timbl      74: #include "HTUtils.h"
                     75: #include "tcp.h"
                     76: #include "HTParse.h"
2.2       timbl      77: #include "HTAccess.h"          /* We implement a protocol */
                     78: #include "HTML.h"              /* The object we will generate */
                     79:  
                     80: /* #include "ParseWSRC.h" */
2.1       timbl      81: 
                     82: extern int WWW_TraceFlag;      /* Control diagnostic output */
                     83: extern FILE * logfile;         /* Log file output */
                     84: 
                     85: PRIVATE BOOL   as_gate;        /* Client is using us as gateway */
                     86: 
                     87: PRIVATE char   line[2048];     /* For building strings to display */
                     88:                                /* Must be able to take id */
2.2       timbl      89: 
                     90: 
                     91: #include "HTParse.h"
                     92: #include "HTFormat.h"
                     93: #include "HTTCP.h"
2.6       timbl      94: /* #include "HTWSRC.h" */      /* Need some bits from here */
2.2       timbl      95: 
                     96: /*             Hypertext object building machinery
                     97: */
                     98: #include "HTML.h"
                     99: 
                    100: #define PUTC(c) (*target->isa->put_character)(target, c)
                    101: #define PUTS(s) (*target->isa->put_string)(target, s)
                    102: #define START(e) (*target->isa->start_element)(target, e, 0, 0)
                    103: #define END(e) (*target->isa->end_element)(target, e)
                    104: #define FREE_TARGET (*target->isa->free)(target)
                    105: 
                    106: struct _HTStructured {
                    107:        CONST HTStructuredClass *       isa;
                    108:        /* ... */
                    109: };
                    110: 
                    111: struct _HTStream {
                    112:        CONST HTStreamClass *   isa;
                    113:        /* ... */
                    114: };
                    115: 
                    116: 
2.1       timbl     117: /*                                                             showDiags
                    118: */
                    119: /* modified from Jonny G's version in ui/question.c */
                    120: 
2.2       timbl     121: void showDiags ARGS2(
                    122:        HTStream *,             target,
                    123:        diagnosticRecord **,    d)
2.1       timbl     124: {
                    125:   long i;
                    126: 
                    127:   for (i = 0; d[i] != NULL; i++) {
                    128:     if (d[i]->ADDINFO != NULL) {
                    129:       PUTS("Diagnostic code is ");
                    130:       PUTS(d[i]->DIAG);
                    131:       PUTC(' ');
                    132:       PUTS(d[i]->ADDINFO);
                    133:       PUTC('\n'); ;
                    134:     }
                    135:   }
                    136: }
                    137: 
                    138: /*     Matrix of allowed characters in filenames
                    139: **     -----------------------------------------
                    140: */
                    141: 
                    142: PRIVATE BOOL acceptable[256];
                    143: PRIVATE BOOL acceptable_inited = NO;
                    144: 
                    145: PRIVATE void init_acceptable NOARGS
                    146: {
                    147:     unsigned int i;
                    148:     char * good = 
                    149:       "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$";
                    150:     for(i=0; i<256; i++) acceptable[i] = NO;
                    151:     for(;*good; good++) acceptable[(unsigned int)*good] = YES;
                    152:     acceptable_inited = YES;
                    153: }
                    154: 
                    155: /*     Transform file identifier into WWW address
                    156: **     ------------------------------------------
                    157: **
                    158: **
                    159: ** On exit,
                    160: **     returns         nil if error
                    161: **                     pointer to malloced string (must be freed) if ok
                    162: */
                    163: char * WWW_from_archie ARGS1 (char *, file)
                    164: {
                    165:     char * end;
                    166:     char * result;
                    167:     char * colon;
                    168:     for(end=file; *end > ' '; end++);  /* assumes ASCII encoding*/
                    169:     result = (char *)malloc(10 + (end-file));
                    170:     if (!result) return result;                /* Malloc error */
                    171:     strcpy(result, "file://");
                    172:     strncat(result, file, end-file);
                    173:     colon = strchr(result+7, ':');     /* Expect colon after host */
                    174:     if (colon) {
                    175:        for(; colon[0]; colon[0]=colon[1], colon++);    /* move down */
                    176:     }
                    177:     return result;
                    178: } /* WWW_from_archie */
                    179: 
2.2       timbl     180: /*     Transform document identifier into URL
                    181: **     --------------------------------------
2.1       timbl     182: **
                    183: ** Bugs: A static buffer of finite size is used!
                    184: **     The format of the docid MUST be good!
                    185: **
                    186: ** On exit,
                    187: **     returns         nil if error
                    188: **                     pointer to malloced string (must be freed) if ok
                    189: */
2.9       timbl     190: PRIVATE char hex [17] = "0123456789ABCDEF";
2.2       timbl     191: extern char from_hex PARAMS((char a));                 /* In HTWSRC @@ */
                    192: 
                    193: PRIVATE char * WWW_from_WAIS ARGS1(any *, docid)
                    194: 
2.1       timbl     195: {
                    196:     static char buf[BIG];
                    197:     char * q = buf;
                    198:     char * p = (docid->bytes);
                    199:     int i, l;
                    200:     if (TRACE) {
                    201:        char *p;
2.2       timbl     202:        fprintf(stderr, "WAIS id (%d bytes) is ", (int)docid->size);
2.1       timbl     203:        for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
                    204:            if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
                    205:                fprintf(stderr, "%c", *p);
                    206:            else
2.2       timbl     207:                fprintf(stderr, "<%x>", (unsigned)*p);
2.1       timbl     208:        }
                    209:        fprintf(stderr, "\n");
                    210:     }   
                    211:     for (p=docid->bytes; (p<docid->bytes+docid->size) && (q<&buf[BIG]);) {
                    212:        if (TRACE) fprintf(stderr, "    Record type %d, length %d\n",
                    213:                p[0], p[1]);
                    214:         if (*p>10) {
                    215:            fprintf(stderr, "Eh? DOCID record type of %d!\n", *p);
                    216:            return 0;
                    217:        }
2.2       timbl     218:        {       /* Bug fix -- allow any byte value 15 Apr 93 */
                    219:            unsigned int i = (unsigned) *p++;
                    220:            
                    221:            if (i > 99) {
                    222:                *q++ = (i/100) + '0';
                    223:                i = i % 100;
                    224:            }
                    225:            if (i > 9) {
                    226:                *q++ = (i/10) + '0';
                    227:                i = i % 10;
                    228:            }
                    229:            *q++ = i + '0';     /* Record type */
                    230:        }
2.1       timbl     231:        *q++ = '=';             /* Separate */
                    232:        l = *p++;               /* Length */
                    233:        for(i=0; i<l; i++, p++){
                    234:            if (!acceptable[*p]) {
                    235:                *q++ = HEX_ESCAPE;      /* Means hex commming */
                    236:                *q++ = hex[(*p) >> 4];
                    237:                *q++ = hex[(*p) & 15];
                    238:            }
                    239:            else *q++ = *p;
                    240:        }
                    241:        *q++= ';';              /* Terminate field */
                    242:     }
                    243:     *q++ = 0;                  /* Terminate string */
                    244:     if (TRACE) fprintf(stderr, "WWW form of id: %s\n", buf); 
                    245:     {
                    246:         char * result = (char *)malloc(strlen(buf)+1);
                    247:        strcpy(result, buf);
                    248:        return result;
                    249:     }
                    250: } /* WWW_from_WAIS */
                    251: 
                    252: 
2.2       timbl     253: /*     Transform URL into WAIS document identifier
                    254: **     -------------------------------------------
2.1       timbl     255: **
                    256: ** On entry,
                    257: **     docname         points to valid name produced originally by
                    258: **                     WWW_from_WAIS
                    259: ** On exit,
                    260: **     docid->size     is valid
                    261: **     docid->bytes    is malloced and must later be freed.
                    262: */
                    263: PRIVATE any * WAIS_from_WWW ARGS2 (any *, docid, char *, docname)
                    264: {
                    265:     char *z;   /* Output pointer */
                    266:     char *sor; /* Start of record - points to size field. */
                    267:     char *p;   /* Input pointer */
                    268:     char *q;   /* Poisition of "=" */
                    269:     char *s;   /* Position of semicolon */
                    270:     int n;     /* size */
                    271:     if (TRACE) fprintf(stderr, "WWW id (to become WAIS id): %s\n", docname); 
                    272:     for(n=0, p = docname; *p; p++) {   /* Count sizes of strings */
                    273:         n++;
                    274:        if (*p == ';')  n--;            /* Not converted */
                    275:        else if (*p == HEX_ESCAPE) n=n-2;       /* Save two bytes */
                    276:         docid->size = n;
                    277:     }
                    278:     
                    279:     docid->bytes = (char *) malloc(docid->size); /* result record */
                    280:     z = docid->bytes;
                    281:     
                    282:     for(p = docname; *p; ) {   /* Convert of strings */
2.2       timbl     283:                                /* Record type */
                    284:                                
                    285:        *z = 0;                 /* Initialize record type */
                    286:        while (*p >= '0' && *p <= '9') {
                    287:            *z = *z*10 + (*p++ - '0');  /* Decode decimal record type */
                    288:        }
                    289:        z++;
                    290:        if (*p != '=') return 0;
                    291:        q = p;
                    292:        
                    293: /*        *z++ = *p++ - '0';
2.1       timbl     294:        q = strchr(p , '=');
                    295:        if (!q) return 0;
2.2       timbl     296: */
2.1       timbl     297:        s = strchr(q, ';');     /* (Check only) */
                    298:        if (!s) return 0;       /* Bad! No ';'; */
                    299:         sor = z;               /* Remember where the size field was */
                    300:        z++;                    /* Skip record size for now     */
                    301:        for(p=q+1; *p!=';' ; ) {
                    302:           if (*p == HEX_ESCAPE) {
                    303:                char c;
                    304:                unsigned int b;
                    305:                p++;
                    306:                c = *p++;
                    307:                b =   from_hex(c);
                    308:                c = *p++;
                    309:                if (!c) break;  /* Odd number of chars! */
                    310:                *z++ = (b<<4) + from_hex(c);
                    311:            } else {
                    312:                *z++ = *p++;    /* Record */
                    313:            }
                    314:        }
                    315:        *sor = (z-sor-1);       /* Fill in size -- not counting size itself */
                    316:        p++;                    /* After semicolon: start of next record */
                    317:     }
                    318:     
                    319:     if (TRACE) {
                    320:        char *p;
2.2       timbl     321:        fprintf(stderr, "WAIS id (%d bytes) is ", (int)docid->size);
2.1       timbl     322:        for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
                    323:            if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
                    324:                fprintf(stderr, "%c", *p);
                    325:            else
2.2       timbl     326:                fprintf(stderr, "<%x>", (unsigned)*p);
2.1       timbl     327:        }
                    328:        fprintf(stderr, "\n");
                    329:     }   
                    330:     return docid;              /* Ok */
                    331:     
                    332: } /* WAIS_from_WWW */
                    333: 
                    334: 
                    335: /*     Send a plain text record to the client          output_text_record()
                    336: **     --------------------------------------
                    337: */
2.2       timbl     338: 
2.9       timbl     339: PRIVATE void output_text_record ARGS4(
2.2       timbl     340:     HTStream *,                        target,
                    341:     WAISDocumentText *,                record,
2.9       timbl     342:     boolean,                   quote_string_quotes,
                    343:     boolean,                    binary)
2.1       timbl     344: {
                    345:   long count;
                    346:   /* printf(" Text\n");
                    347:      print_any("     DocumentID:  ", record->DocumentID);
                    348:      printf("     VersionNumber:  %d\n", record->VersionNumber);
                    349:      */
2.9       timbl     350: 
                    351:   if (binary) {
                    352:     (*target->isa->put_block)(target,
                    353:                              record->DocumentText->bytes,
                    354:                              record->DocumentText->size);
                    355:     return;
                    356:   }
                    357: 
2.1       timbl     358:   for(count = 0; count < record->DocumentText->size; count++){
                    359:     long ch = (unsigned char)record->DocumentText->bytes[count];
2.2       timbl     360:     if (ch == 27) {    /* What is this in for? Tim */
2.1       timbl     361: 
                    362:            /* then we have an escape code */
                    363:            /* if the next letter is '(' or ')', then ignore two letters */
                    364:            if('(' == record->DocumentText->bytes[count + 1] ||
                    365:                ')' == record->DocumentText->bytes[count + 1])
                    366:            count += 1;             /* it is a term marker */
                    367:            else count += 4;            /* it is a paragraph marker */
                    368:     } else if (ch == '\n' || ch == '\r') {
2.2       timbl     369:            PUTC('\n');
2.1       timbl     370:     } else if ((ch=='\t') || isprint(ch)){
2.2       timbl     371:            PUTC(ch);
2.1       timbl     372:     } 
                    373:   }
                    374: } /* output text record */
                    375: 
                    376: 
2.2       timbl     377: 
2.1       timbl     378: /*     Format A Search response for the client         display_search_response
                    379: **     ---------------------------------------
                    380: */
                    381: /* modified from tracy shen's version in wutil.c
                    382:  * displays either a text record or a set of headlines.
                    383:  */
                    384: void
2.2       timbl     385: display_search_response ARGS4(
                    386:     HTStructured *,            target,
2.1       timbl     387:     SearchResponseAPDU *,      response,
                    388:     char *,                    database,
                    389:     char *,                    keywords)
                    390: {
                    391:   WAISSearchResponse  *info;
                    392:   long i, k;
                    393:   
                    394:   BOOL archie =  strstr(database, "archie")!=0;        /* Specical handling */
                    395:   
2.7       timbl     396:   if (TRACE) fprintf(stderr, "HTWAIS: Displaying search response\n");
2.1       timbl     397:   sprintf(line,
                    398:        "Index %s contains the following %d item%s relevant to '%s'.\n",
                    399:         database,
2.2       timbl     400:         (int)(response->NumberOfRecordsReturned),
2.1       timbl     401:         response->NumberOfRecordsReturned ==1 ? "" : "s",
                    402:         keywords);
2.2       timbl     403: 
                    404:   PUTS(line);
                    405:   PUTS("The first figure for each entry is its relative score, ");
                    406:   PUTS("the second the number of lines in the item.");
                    407:   START(HTML_MENU);
                    408: 
2.1       timbl     409:   if ( response->DatabaseDiagnosticRecords != 0 ) {
                    410:     info = (WAISSearchResponse *)response->DatabaseDiagnosticRecords;
                    411:     i =0; 
                    412: 
                    413:     if (info->Diagnostics != NULL)
2.2       timbl     414:       showDiags((HTStream*)target, info->Diagnostics);
2.1       timbl     415: 
                    416:     if ( info->DocHeaders != 0 ) {
                    417:       for (k=0; info->DocHeaders[k] != 0; k++ ) {
                    418:        WAISDocumentHeader* head = info->DocHeaders[k];
                    419:        char * headline = trim_junk(head->Headline);
                    420:        any * docid = head->DocumentID;
                    421:        char * docname;                 /* printable version of docid */
                    422:        i++;
                    423: 
                    424: /*     Make a printable string out of the document id.
                    425: */
                    426:        if (TRACE) fprintf(stderr, 
2.7       timbl     427:                "HTWAIS:  %2ld: Score: %4ld, lines:%4ld '%s'\n", 
2.1       timbl     428:               i,
2.2       timbl     429:               (long int)(info->DocHeaders[k]->Score),
                    430:               (long int)(info->DocHeaders[k]->Lines),
2.1       timbl     431:               headline);
                    432: 
2.2       timbl     433:        START(HTML_LI);
                    434:        sprintf(line, "%4ld  %4ld  ",
                    435:            head->Score,
                    436:            head->Lines);
                    437:        PUTS( line);
                    438: 
2.1       timbl     439:        if (archie) {
                    440:            char * www_name = WWW_from_archie(headline);
                    441:            if (www_name) {
2.2       timbl     442:                HTStartAnchor(target, NULL, www_name);
2.1       timbl     443:                PUTS(headline);
2.2       timbl     444:                
                    445:                END(HTML_A);
2.1       timbl     446:                free(www_name);
                    447:            } else {
2.2       timbl     448:                 PUTS(headline);
                    449:                 PUTS(" (bad file name)");
2.1       timbl     450:            }
                    451:        } else { /* Not archie */
                    452:            docname =  WWW_from_WAIS(docid);
                    453:            if (docname) {
2.6       timbl     454:                char * dbname = HTEscape(database, URL_XPALPHAS);
2.1       timbl     455:                sprintf(line, "%s/%s/%d/%s",            /* W3 address */
                    456:                                    dbname,
                    457:                    head->Types ? head->Types[0] : "TEXT",
2.2       timbl     458:                    (int)(head->DocumentLength),
2.1       timbl     459:                    docname);
2.11      secret    460:                HTStartAnchor(target, NULL, ( (head->Types) 
                    461:                      && (!strcmp(head->Types[0], "URL"))) ? 
                    462:                              headline : line); /* NT, Sep 93 */
2.2       timbl     463:                PUTS(headline);
                    464:                END(HTML_A);
2.1       timbl     465:                free(dbname);
                    466:                free(docname);
                    467:            } else {
2.2       timbl     468:                 PUTS("(bad doc id)");
2.1       timbl     469:            }
                    470:          }
                    471:       } /* next document header */
                    472:     } /* if there were any document headers */
                    473:     
                    474:     if ( info->ShortHeaders != 0 ) {
                    475:       k =0;
                    476:       while (info->ShortHeaders[k] != 0 ) {
                    477:        i++;
2.2       timbl     478:        PUTS( "(Short Header record, can't display)");
2.1       timbl     479:       }
                    480:     }
                    481:     if ( info->LongHeaders != 0 ) {
                    482:       k =0;
                    483:       while (info->LongHeaders[k] != 0) {
                    484:        i++;
                    485:        PUTS( "\nLong Header record, can't display\n");
                    486:       }
                    487:     }
                    488:     if ( info->Text != 0 ) {
                    489:       k =0;
                    490:       while (info->Text[k] != 0) {
                    491:        i++;
                    492:        PUTS( "\nText record\n");
2.9       timbl     493:        output_text_record((HTStream*)target, info->Text[k++], false, false);
2.1       timbl     494:       }
                    495:     }
                    496:     if ( info->Headlines != 0 ) {
                    497:       k =0;
                    498:       while (info->Headlines[k] != 0) {
                    499:        i++;
                    500:        PUTS( "\nHeadline record, can't display\n");
                    501:        /* dsply_headline_record( info->Headlines[k++]); */
                    502:       }
                    503:     }
                    504:     if ( info->Codes != 0 ) {
                    505:       k =0;
                    506:       while (info->Codes[k] != 0) {
                    507:        i++;
                    508:        PUTS( "\nCode record, can't display\n");
                    509:        /* dsply_code_record( info->Codes[k++]); */
                    510:       }
                    511:     }
                    512:   }                            /* Loop: display user info */
2.2       timbl     513:   END(HTML_MENU);
2.1       timbl     514:   PUTC('\n'); ;
                    515: }
                    516: 
                    517: 
                    518: 
2.2       timbl     519: 
                    520: /*             Load by name                                    HTLoadWAIS
                    521: **             ============
                    522: **
                    523: **     This renders any object or search as required
2.1       timbl     524: */
2.13    ! timbl     525: PUBLIC int HTLoadWAIS ARGS1(HTRequest * , request)
2.1       timbl     526: 
                    527: #define MAX_KEYWORDS_LENGTH 1000
                    528: #define MAX_SERVER_LENGTH 1000
                    529: #define MAX_DATABASE_LENGTH 1000
                    530: #define MAX_SERVICE_LENGTH 1000
                    531: #define MAXDOCS 40
                    532: 
                    533: {
2.13    ! timbl     534:     CONST char * arg = HTAnchor_physical(request->anchor);
        !           535:     HTParentAnchor *   anAnchor = request->anchor;
        !           536:     HTFormat           format_out = request->output_format;
        !           537:     HTStream*          sink = request->output_stream;
        !           538:     
2.1       timbl     539:     static CONST char * error_header =
2.7       timbl     540: "<h1>Access error</h1>\nThe following error occured in accesing a WAIS server:<P>\n";
2.2       timbl     541:     char * key;                          /* pointer to keywords in URL */
2.1       timbl     542:     char* request_message = NULL; /* arbitrary message limit */
                    543:     char* response_message = NULL; /* arbitrary message limit */
                    544:     long request_buffer_length;        /* how of the request is left */
                    545:     SearchResponseAPDU  *retrieval_response = 0;
                    546:     char keywords[MAX_KEYWORDS_LENGTH + 1];
                    547:     char *server_name; 
2.6       timbl     548:     char *wais_database = NULL;                /* name of current database */
                    549:     char *www_database;                        /* Same name escaped */
2.1       timbl     550:     char *service;
                    551:     char *doctype;
                    552:     char *doclength;
                    553:     long document_length;
                    554:     char *docname;
                    555:     FILE *connection = 0;
                    556:     char * names;              /* Copy of arg to be hacked up */
                    557:     BOOL ok = NO;
                    558:     
                    559:     extern FILE * connect_to_server();
                    560:     
                    561:     if (!acceptable_inited) init_acceptable();
                    562:     
                    563:         
                    564: /*     Decipher and check syntax of WWW address:
                    565: **     ----------------------------------------
                    566: **
                    567: **     First we remove the "wais:" if it was spcified.  920110
                    568: */  
                    569:     names = HTParse(arg, "", PARSE_HOST | PARSE_PATH | PARSE_PUNCTUATION);
2.2       timbl     570:     key = strchr(names, '?');
                    571:     
                    572:     if (key) {
                    573:        char * p;
                    574:        *key++ = 0;     /* Split off keywords */
                    575:        for (p=key; *p; p++) if (*p == '+') *p = ' ';
                    576:        HTUnEscape(key);
                    577:     }
2.1       timbl     578:     if (names[0]== '/') {
                    579:        server_name = names+1;
                    580:        if (as_gate =(*server_name == '/'))
                    581:            server_name++;      /* Accept one or two */
                    582:        www_database = strchr(server_name,'/');
                    583:        if (www_database) {
                    584:            *www_database++ = 0;                /* Separate database name */
                    585:            doctype = strchr(www_database, '/');
                    586:            if (key) ok = YES;  /* Don't need doc details */
                    587:            else if (doctype) { /* If not search parse doc details */
                    588:                *doctype++ = 0; /* Separate rest of doc address */
                    589:                doclength = strchr(doctype, '/');
                    590:                if(doclength) {
                    591:                    *doclength++ = 0;
                    592:                    document_length = atol(doclength);
                    593:                    if (document_length) {
                    594:                        docname=strchr(doclength, '/');
                    595:                        if (docname) {
                    596:                            *docname++ = 0;
                    597:                            ok = YES;   /* To avoid a goto! */
                    598:                        } /* if docname */
                    599:                    } /* if document_length valid */
                    600:                } /* if doclength */
                    601:            } else { /* no doctype?  Assume index required */
                    602:                if (!key) key = "";
                    603:                ok = YES;
                    604:            } /* if doctype */
                    605:        } /* if database */
                    606:      }
                    607:      
2.2       timbl     608:      if (!ok)
                    609:         return HTLoadError(sink, 500, "Syntax error in WAIS URL");
                    610: 
2.7       timbl     611:      if (TRACE) fprintf(stderr, "HTWAIS: Parsed OK\n");
2.1       timbl     612:      
                    613:      service = strchr(names, ':');
                    614:      if (service)  *service++ = 0;
                    615:      else service = "210";
                    616:      
                    617:      if (server_name[0] == 0)
                    618:         connection = NULL;
                    619: 
                    620:      else if (!(key && !*key))
                    621:       if ((connection=connect_to_server(server_name,atoi(service)))
                    622:         == NULL)  {
2.2       timbl     623:         if (TRACE) fprintf (stderr,
2.1       timbl     624:             "%sCan't open connection to %s via service %s.\n",
                    625:             error_header, server_name, service);
                    626:         free(names);
2.2       timbl     627:         return HTLoadError(sink, 500, "Can't open connection to WAIS server");
2.1       timbl     628:     }
                    629: 
2.6       timbl     630:     StrAllocCopy(wais_database,www_database);
                    631:     HTUnEscape(wais_database);
                    632:     
2.2       timbl     633:        /* This below fixed size stuff is terrible */
2.1       timbl     634:     request_message = (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char));
                    635:     response_message = (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char));
                    636: 
                    637: /*     If keyword search is performed but there are no keywords,
                    638: **     the user has followed a link to the index itself. It would be
                    639: **     appropriate at this point to send him the .SRC file - how?
                    640: */
                    641: 
                    642:     if (key && !*key) {                                /* I N D E X */
                    643:     
2.7       timbl     644: #ifdef CACHE_FILE_PREFIX
                    645:        char filename[256];
                    646:        FILE * fp;
                    647: #endif
2.13    ! timbl     648:        HTStructured * target = HTML_new(request, NULL,
        !           649:                                        WWW_HTML, format_out, sink);
2.1       timbl     650:        
2.2       timbl     651:        START(HTML_ISINDEX);
                    652: 
2.8       timbl     653:        {
                    654:            START(HTML_TITLE);
                    655:            PUTS(wais_database);
                    656:            PUTS(" index");
                    657:            END(HTML_TITLE);
                    658:            
                    659:            START(HTML_H1);
                    660:            PUTS(wais_database);
                    661:            END(HTML_H1);
                    662:            
                    663:        }
2.1       timbl     664:        /* If we have seen a source file for this database, use that:
                    665:        */
2.2       timbl     666: 
2.7       timbl     667: #ifdef CACHE_FILE_PREFIX
2.8       timbl     668:        sprintf(filename, "%sWSRC-%s:%s:%.100s.txt",
2.7       timbl     669:                CACHE_FILE_PREFIX,
2.1       timbl     670:                server_name, service, www_database);
                    671: 
                    672:        fp = fopen(filename, "r");      /* Have we found this already? */
                    673:        if (TRACE) fprintf(stderr,
2.7       timbl     674:                "HTWAIS: Description of server %s %s.\n",
2.1       timbl     675:                filename,
                    676:                fp ? "exists already" : "does NOT exist!");
2.2       timbl     677: 
2.1       timbl     678:        if (fp) {
                    679:            char c;
2.7       timbl     680:            START(HTML_PRE);            /* Preformatted description */
                    681:            while((c=getc(fp))!=EOF) PUTC(c);   /* Transfer file */
                    682:            END(HTML_PRE);
2.1       timbl     683:            fclose(fp);
2.8       timbl     684:        }
2.2       timbl     685: #endif
2.7       timbl     686:        START(HTML_P);
2.2       timbl     687:        PUTS("Specify search words.");
2.1       timbl     688:        
2.2       timbl     689:        FREE_TARGET;
2.1       timbl     690:        
                    691:     } else if (key) {                                  /* S E A R C H */
                    692:        char *p;
2.2       timbl     693:        HTStructured * target;
                    694:        
2.1       timbl     695:        strncpy(keywords, key, MAX_KEYWORDS_LENGTH);
                    696:        while(p=strchr(keywords, '+')) *p = ' ';
                    697:     
                    698:         /* Send advance title to get something fast to the other end */
                    699:        
2.13    ! timbl     700:        target = HTML_new(request, NULL, WWW_HTML, format_out, sink);
2.2       timbl     701:        
                    702:        START(HTML_ISINDEX);
                    703:        START(HTML_TITLE);
                    704:        PUTS(keywords);
                    705:        PUTS(" (in ");
                    706:        PUTS(wais_database);
                    707:        PUTS(")");
                    708:        END(HTML_TITLE);
                    709:        
                    710:        START(HTML_H1);
                    711:        PUTS(keywords);
                    712:        END(HTML_H1);
2.1       timbl     713: 
                    714:        request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */
2.7       timbl     715:        if (TRACE) fprintf(stderr, "HTWAIS: Search for `%s' in `%s'\n",
2.1       timbl     716:                keywords, wais_database);
                    717:        if(NULL ==
                    718:        generate_search_apdu(request_message + HEADER_LENGTH, 
                    719:                                &request_buffer_length, 
                    720:                                keywords, wais_database, NULL, MAXDOCS))
                    721:        panic("request too large");
                    722:        
                    723: 
                    724:        if(!interpret_message(request_message, 
                    725:                                MAX_MESSAGE_LEN - request_buffer_length, 
                    726:                                response_message,
                    727:                                MAX_MESSAGE_LEN,
                    728:                                connection,
                    729:                                false   /* true verbose */
                    730:                                )) {
                    731:            panic("returned message too large");
                    732:     
                    733:         } else {       /* returned message ok */
                    734:        
                    735:            SearchResponseAPDU  *query_response = 0;
                    736:            readSearchResponseAPDU(&query_response,
                    737:                response_message + HEADER_LENGTH);
2.2       timbl     738:            display_search_response(target, 
                    739:                query_response, wais_database, keywords);
2.1       timbl     740:            if (query_response->DatabaseDiagnosticRecords)
                    741:                freeWAISSearchResponse(
                    742:                        query_response->DatabaseDiagnosticRecords);         
                    743:            freeSearchResponseAPDU( query_response);
                    744:        }       /* returned message not too large */
                    745:     
2.2       timbl     746:        FREE_TARGET;
                    747: 
2.1       timbl     748:     } else {                   /* D O C U M E N T    F E T C H */
                    749:     
2.2       timbl     750:        HTFormat format_in;
2.9       timbl     751:        boolean binary;     /* how to transfer stuff coming over */
2.2       timbl     752:        HTStream * target;
2.1       timbl     753:        long count;
                    754:        any   doc_chunk;
                    755:        any * docid = &doc_chunk;
                    756:        if (TRACE) printf(
2.7       timbl     757:                "HTWAIS: Retrieve document id `%s' type `%s' length %ld\n",
2.1       timbl     758:                docname, doctype, document_length);
2.2       timbl     759:                
                    760:        format_in = 
                    761:          !strcmp(doctype, "WSRC") ? HTAtom_for("application/x-wais-source") :
                    762:          !strcmp(doctype, "TEXT") ? HTAtom_for("text/plain") :
2.9       timbl     763:          !strcmp(doctype, "HTML") ? HTAtom_for("text/html") :
2.2       timbl     764:          !strcmp(doctype, "GIF")  ? HTAtom_for("image/gif") :
2.11      secret    765:                                     HTAtom_for("application/octet-stream");
2.9       timbl     766:        binary = 
                    767:          0 != strcmp(doctype, "WSRC") &&
                    768:          0 != strcmp(doctype, "TEXT") &&
                    769:          0 != strcmp(doctype, "HTML") ;
                    770: 
2.2       timbl     771: 
2.13    ! timbl     772:        target = HTStreamStack(format_in, request);
2.2       timbl     773:        if (!target) return HTLoadError(sink, 500,
                    774:                "Can't convert format of WAIS document");
2.1       timbl     775: /*     Decode hex or litteral format for document ID
                    776: */     
                    777:        WAIS_from_WWW(docid, docname);
                    778: 
2.2       timbl     779:        
2.1       timbl     780: /*     Loop over slices of the document
                    781: */     
                    782:        for(count = 0; 
                    783:            count * CHARS_PER_PAGE < document_length;
                    784:            count++){
                    785:          char *type = s_strdup(doctype);       /* Gets freed I guess */
                    786:          request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */
2.2       timbl     787:          if (TRACE) fprintf(stderr, "HTWAIS: Slice number %ld\n", count);
2.1       timbl     788:          if(0 ==
                    789:              generate_retrieval_apdu(request_message + HEADER_LENGTH,
                    790:                    &request_buffer_length, 
                    791:                    docid, 
                    792:                    CT_byte,
                    793:                    count * CHARS_PER_PAGE,
                    794:                    MIN((count + 1) * CHARS_PER_PAGE,document_length),
                    795:                    type,
                    796:                    wais_database
                    797:                    ))
                    798:                panic("request too long");
2.2       timbl     799:          
                    800:          /*    Actually do the transaction given by request_message */   
2.1       timbl     801:          if(0 ==
                    802:             interpret_message(request_message, 
                    803:                               MAX_MESSAGE_LEN - request_buffer_length, 
                    804:                               response_message,
                    805:                               MAX_MESSAGE_LEN,
                    806:                               connection,
                    807:                               false /* true verbose */ 
                    808:                               ))
                    809:            panic("Returned message too large");
                    810: 
2.2       timbl     811:          /*    Parse the result which came back into memory.
                    812:          */
2.1       timbl     813:          readSearchResponseAPDU(&retrieval_response, 
                    814:                                 response_message + HEADER_LENGTH);
                    815: 
                    816:          if(NULL == ((WAISSearchResponse *)
                    817:                retrieval_response->DatabaseDiagnosticRecords)->Text){
2.2       timbl     818:                /* display_search_response(target, retrieval_response,
                    819:                                        wais_database, keywords); */
                    820:                PUTS("No text was returned!\n");
2.1       timbl     821:                /* panic("No text was returned"); */
                    822:          } else {
                    823:          
2.2       timbl     824:                output_text_record(target,
                    825:                   ((WAISSearchResponse *)
2.1       timbl     826:                    retrieval_response->DatabaseDiagnosticRecords)->Text[0],
2.9       timbl     827:                false, binary);
2.1       timbl     828:          
                    829:          } /* If text existed */
                    830:          
                    831:        }       /* Loop over slices */
                    832: 
2.2       timbl     833:        (*target->isa->free)(target);
2.1       timbl     834: 
                    835:        free (docid->bytes);
                    836:        
                    837:        freeWAISSearchResponse( retrieval_response->DatabaseDiagnosticRecords); 
                    838:        freeSearchResponseAPDU( retrieval_response);
                    839: 
                    840:     } /* If document rather than search */
                    841: 
2.2       timbl     842: 
                    843: 
2.1       timbl     844: 
2.2       timbl     845: /*     (This postponed until later,  after a timeout:)
2.1       timbl     846: */
                    847:     if (connection) close_connection(connection);
                    848:     if (wais_database) free(wais_database);
                    849:     s_free(request_message);
                    850:     s_free(response_message);
                    851: 
                    852:     free(names);
2.2       timbl     853:     return HT_LOADED;
2.1       timbl     854: }
                    855: 
2.12      duns      856: GLOBALDEF PUBLIC HTProtocol HTWAIS = { "wais", HTLoadWAIS, NULL };
2.1       timbl     857: 
                    858: 

Webmaster