Annotation of libwww/Library/src/HTNews.c, revision 1.2

1.1       timbl       1: /*                     NEWS ACCESS                             HTNews.c
                      2: **                     ===========
                      3: **
                      4: ** History:
                      5: **     26 Sep 90       Written TBL
                      6: **     29 Nov 91       Downgraded to C, for portable implementation.
                      7: */
1.2     ! timbl       8: /* Implements:
        !             9: */
        !            10: #include "HTNews.h"
1.1       timbl      11: 
                     12: #define NEWS_PORT 119          /* See rfc977 */
                     13: #define APPEND                 /* Use append methods */
                     14: #define MAX_CHUNK      40      /* Largest number of articles in one window */
                     15: #define CHUNK_SIZE     20      /* Number of articles for quick display */
                     16: 
                     17: #ifndef DEFAULT_NEWS_HOST
                     18: #define DEFAULT_NEWS_HOST "news"
                     19: #endif
                     20: #ifndef SERVER_FILE
                     21: #define SERVER_FILE "/usr/local/lib/rn/server"
                     22: #endif
                     23: 
                     24: #include <ctype.h>
                     25: #include "HTUtils.h"           /* Coding convention macros */
                     26: #include "tcp.h"
                     27: 
1.2     ! timbl      28: #include "HTML.h"
1.1       timbl      29: #include "HTParse.h"
                     30: #include "HTFormat.h"
                     31: 
1.2     ! timbl      32: struct _HTStructured {
        !            33:        CONST HTStructuredClass *       isa;
        !            34:        /* ... */
        !            35: };
        !            36: 
1.1       timbl      37: #ifdef NeXTStep
                     38: #include <appkit/defaults.h>
                     39: #define NEWS_PROGRESS(foo)
                     40: #else
                     41: #define NEWS_PROGRESS(foo) fprintf(stderr, "%s\n", (foo))
                     42: #endif
                     43: 
                     44: 
                     45: #define NEXT_CHAR HTGetChararcter()
                     46: #define LINE_LENGTH 512                        /* Maximum length of line of ARTICLE etc */
                     47: #define GROUP_NAME_LENGTH      256     /* Maximum length of group name */
                     48: 
                     49: 
                     50: /*     Module-wide variables
                     51: */
1.2     ! timbl      52: PUBLIC char * HTNewsHost;
1.1       timbl      53: PRIVATE struct sockaddr_in soc_address;                /* Binary network address */
                     54: PRIVATE int s;                                 /* Socket for NewsHost */
                     55: PRIVATE char response_text[LINE_LENGTH+1];     /* Last response */
1.2     ! timbl      56: /* PRIVATE HText *     HT;     */              /* the new hypertext */
        !            57: PRIVATE HTStructured * target;                 /* The output sink */
        !            58: PRIVATE HTStructuredClass targetClass;         /* Copy of fn addresses */
1.1       timbl      59: PRIVATE HTParentAnchor *node_anchor;           /* Its anchor */
                     60: PRIVATE int    diagnostic;                     /* level: 0=none 2=source */
                     61: 
1.2     ! timbl      62: 
        !            63: #define PUTC(c) (*targetClass.put_character)(target, c)
        !            64: #define PUTS(s) (*targetClass.put_string)(target, s)
        !            65: #define START(e) (*targetClass.start_element)(target, e, 0, 0)
        !            66: #define END(e) (*targetClass.end_element)(target, e)
        !            67: 
        !            68: PUBLIC CONST char * HTGetNewsHost NOARGS
        !            69: {
        !            70:        return HTNewsHost;
        !            71: }
1.1       timbl      72: 
1.2     ! timbl      73: PUBLIC void HTSetNewsHost ARGS1(CONST char *, value)
        !            74: {
        !            75:        StrAllocCopy(HTNewsHost, value);
        !            76: }
1.1       timbl      77: 
                     78: /*     Initialisation for this module
                     79: **     ------------------------------
                     80: **
                     81: **     Except on the NeXT, we pick up the NewsHost name from
                     82: **
                     83: **     1.      Environment variable NNTPSERVER
                     84: **     2.      File SERVER_FILE
                     85: **     3.      Compilation time macro DEFAULT_NEWS_HOST
                     86: **     4.      Default to "news"
                     87: **
                     88: **     On the NeXT, we pick up the NewsHost name from, in order:
                     89: **
                     90: **     1.      WorldWideWeb default "NewsHost"
                     91: **     2.      Global default "NewsHost"
                     92: **     3.      News default "NewsHost"
                     93: **     4.      Compilation time macro DEFAULT_NEWS_HOST
                     94: **     5.      Default to "news"
                     95: */
                     96: PRIVATE BOOL initialized = NO;
                     97: PRIVATE BOOL initialize NOARGS
                     98: {
                     99:     CONST struct hostent  *phost;        /* Pointer to host - See netdb.h */
                    100:     struct sockaddr_in* sin = &soc_address;
                    101: 
                    102:         
                    103: /*  Set up defaults:
                    104: */
                    105:     sin->sin_family = AF_INET;         /* Family = internet, host order  */
                    106:     sin->sin_port = htons(NEWS_PORT);   /* Default: new port,    */
                    107: 
                    108: /*   Get name of Host
                    109: */
                    110: #ifdef NeXTStep
1.2     ! timbl     111:     if ((HTNewsHost = NXGetDefaultValue("WorldWideWeb","NewsHost"))==0)
        !           112:         if ((HTNewsHost = NXGetDefaultValue("News","NewsHost")) == 0)
        !           113:            HTNewsHost = DEFAULT_NEWS_HOST;
1.1       timbl     114: #else
                    115:     if (getenv("NNTPSERVER")) {
1.2     ! timbl     116:         StrAllocCopy(HTNewsHost, (char *)getenv("NNTPSERVER"));
1.1       timbl     117:        if (TRACE) fprintf(stderr, "HTNews: NNTPSERVER defined as `%s'\n",
1.2     ! timbl     118:                HTNewsHost);
1.1       timbl     119:     } else {
                    120:         char server_name[256];
                    121:         FILE* fp = fopen(SERVER_FILE, "r");
                    122:         if (fp) {
                    123:            if (fscanf(fp, "%s", server_name)==1) {
1.2     ! timbl     124:                StrAllocCopy(HTNewsHost, server_name);
1.1       timbl     125:                if (TRACE) fprintf(stderr,
                    126:                "HTNews: File %s defines news host as `%s'\n",
1.2     ! timbl     127:                        SERVER_FILE, HTNewsHost);
1.1       timbl     128:            }
                    129:            fclose(fp);
                    130:        }
                    131:     }
1.2     ! timbl     132:     if (!HTNewsHost) HTNewsHost = DEFAULT_NEWS_HOST;
1.1       timbl     133: #endif
                    134: 
1.2     ! timbl     135:     if (*HTNewsHost>='0' && *HTNewsHost<='9') {   /* Numeric node address: */
        !           136:        sin->sin_addr.s_addr = inet_addr((char *)HTNewsHost); /* See arpa/inet.h */
1.1       timbl     137: 
                    138:     } else {               /* Alphanumeric node name: */
1.2     ! timbl     139:        phost=gethostbyname((char*)HTNewsHost); /* See netdb.h */
1.1       timbl     140:        if (!phost) {
                    141: #ifdef NeXTStep
                    142:            NXRunAlertPanel(NULL, "Can't find news host name `%s'.",
1.2     ! timbl     143:                NULL, NULL, NULL, HTNewsHost);
1.1       timbl     144: #else
                    145:            fprintf(stderr,
1.2     ! timbl     146:              "HTNews: Can't find news host `%s'.\n",HTNewsHost);
1.1       timbl     147:            fprintf(stderr,
                    148: "  Please see online documentation for instructions to set the news host.\n");
                    149: #endif
                    150:            CTRACE(tfp,
1.2     ! timbl     151:              "HTNews: Can't find news host `%s'.\n",HTNewsHost);
1.1       timbl     152:            return NO;  /* Fail */
                    153:        }
                    154:        memcpy(&sin->sin_addr, phost->h_addr, phost->h_length);
                    155:     }
                    156: 
                    157:     if (TRACE) fprintf(stderr,  
                    158:        "HTNews: Parsed address as port %4x, inet %d.%d.%d.%d\n",
                    159:                (unsigned int)ntohs(sin->sin_port),
                    160:                (int)*((unsigned char *)(&sin->sin_addr)+0),
                    161:                (int)*((unsigned char *)(&sin->sin_addr)+1),
                    162:                (int)*((unsigned char *)(&sin->sin_addr)+2),
                    163:                (int)*((unsigned char *)(&sin->sin_addr)+3));
                    164: 
                    165:     s = -1;            /* Disconnected */
                    166:     
                    167:     return YES;
                    168: }
                    169: 
                    170: 
                    171: 
                    172: /*     Send NNTP Command line to remote host & Check Response
                    173: **     ------------------------------------------------------
                    174: **
                    175: ** On entry,
                    176: **     command points to the command to be sent, including CRLF, or is null
                    177: **             pointer if no command to be sent.
                    178: ** On exit,
                    179: **     Negative status indicates transmission error, socket closed.
                    180: **     Positive status is an NNTP status.
                    181: */
                    182: 
                    183: 
                    184: PRIVATE int response ARGS1(CONST char *,command)
                    185: {
                    186:     int result;    
                    187:     char * p = response_text;
                    188:     if (command) {
                    189:         int status;
                    190:        int length = strlen(command);
                    191:        if (TRACE) fprintf(stderr, "NNTP command to be sent: %s", command);
                    192: #ifdef NOT_ASCII
                    193:        {
                    194:            CONST char  * p;
                    195:            char        * q;
                    196:            char ascii[LINE_LENGTH+1];
                    197:            for(p = command, q=ascii; *p; p++, q++) {
                    198:                *q = TOASCII(*p);
                    199:            }
                    200:             status = NETWRITE(s, ascii, length);
                    201:        }
                    202: #else
                    203:         status = NETWRITE(s, command, length);
                    204: #endif
                    205:        if (status<0){
                    206:            if (TRACE) fprintf(stderr,
                    207:                "HTNews: Unable to send command. Disconnecting.\n");
                    208:            NETCLOSE(s);
                    209:            s = -1;
                    210:            return status;
                    211:        } /* if bad status */
                    212:     } /* if command to be sent */
                    213:     
                    214:     for(;;) {  
                    215:        if (((*p++=NEXT_CHAR) == '\n') || (p == &response_text[LINE_LENGTH])) {
                    216:            *p++=0;                             /* Terminate the string */
                    217:            if (TRACE) fprintf(stderr, "NNTP Response: %s\n", response_text);
                    218:            sscanf(response_text, "%d", &result);
                    219:            return result;          
                    220:        } /* if end of line */
                    221:        
                    222:        if (*(p-1) < 0) {
                    223:            if (TRACE) fprintf(stderr,
                    224:                "HTNews: EOF on read, closing socket %d\n", s);
                    225:            NETCLOSE(s);        /* End of file, close socket */
                    226:            return s = -1;      /* End of file on response */
                    227:        }
                    228:     } /* Loop over characters */
                    229: }
                    230: 
                    231: 
                    232: /*     Case insensitive string comparisons
                    233: **     -----------------------------------
                    234: **
                    235: ** On entry,
                    236: **     template must be already un upper case.
                    237: **     unknown may be in upper or lower or mixed case to match.
                    238: */
                    239: PRIVATE BOOL match ARGS2 (CONST char *,unknown, CONST char *,template)
                    240: {
                    241:     CONST char * u = unknown;
                    242:     CONST char * t = template;
                    243:     for (;*u && *t && (TOUPPER(*u)==*t); u++, t++) /* Find mismatch or end */ ;
                    244:     return (BOOL)(*t==0);              /* OK if end of template */
                    245: }
                    246: 
                    247: /*     Find Author's name in mail address
                    248: **     ----------------------------------
                    249: **
                    250: ** On exit,
                    251: **     THE EMAIL ADDRESS IS CORRUPTED
                    252: **
                    253: ** For example, returns "Tim Berners-Lee" if given any of
                    254: **     " Tim Berners-Lee <tim@online.cern.ch> "
                    255: **  or " tim@online.cern.ch ( Tim Berners-Lee ) "
                    256: */
                    257: PRIVATE char * author_name ARGS1 (char *,email)
                    258: {
                    259:     char *s, *e;
                    260:     
                    261:     if ((s=strchr(email,'(')) && (e=strchr(email, ')')))
                    262:         if (e>s) {
                    263:            *e=0;                       /* Chop off everything after the ')'  */
                    264:            return HTStrip(s+1);        /* Remove leading and trailing spaces */
                    265:        }
                    266:        
                    267:     if ((s=strchr(email,'<')) && (e=strchr(email, '>')))
                    268:         if (e>s) {
                    269:            strcpy(s, e+1);             /* Remove <...> */
                    270:            return HTStrip(email);      /* Remove leading and trailing spaces */
                    271:        }
                    272:        
                    273:     return HTStrip(email);             /* Default to the whole thing */
                    274: 
                    275: }
                    276: 
1.2     ! timbl     277: /*     Start anchor element
        !           278: **     --------------------
        !           279: */
        !           280: PRIVATE void start_anchor ARGS1(CONST char *,  href)
        !           281: {
        !           282:     BOOL               present[HTML_A_ATTRIBUTES];
        !           283:     CONST char*                value[HTML_A_ATTRIBUTES];
        !           284:     
        !           285:     {
        !           286:        int i;
        !           287:        for(i=0; i<HTML_A_ATTRIBUTES; i++)
        !           288:            present[i] = (i==HTML_A_HREF);
        !           289:     }
        !           290:     value[HTML_A_HREF] = href;
        !           291:     (*targetClass.start_element)(target, HTML_A , present, value);
        !           292: 
        !           293: }
1.1       timbl     294: 
                    295: /*     Paste in an Anchor
                    296: **     ------------------
                    297: **
                    298: **
                    299: ** On entry,
                    300: **     HT      has a selection of zero length at the end.
                    301: **     text    points to the text to be put into the file, 0 terminated.
                    302: **     addr    points to the hypertext refernce address,
                    303: **             terminated by white space, comma, NULL or '>' 
                    304: */
                    305: PRIVATE void write_anchor ARGS2(CONST char *,text, CONST char *,addr)
                    306: {
                    307:     char href[LINE_LENGTH+1];
                    308:                
                    309:     {
                    310:        CONST char * p;
                    311:        strcpy(href,"news:");
                    312:        for(p=addr; *p && (*p!='>') && !WHITE(*p) && (*p!=','); p++);
                    313:         strncat(href, addr, p-addr);   /* Make complete hypertext reference */
                    314:     }
                    315:     
1.2     ! timbl     316:     start_anchor(href);
        !           317:     PUTS(text);
        !           318:     END(HTML_A);
1.1       timbl     319: }
                    320: 
                    321: 
                    322: /*     Write list of anchors
                    323: **     ---------------------
                    324: **
                    325: **     We take a pointer to a list of objects, and write out each,
                    326: **     generating an anchor for each.
                    327: **
                    328: ** On entry,
                    329: **     HT      has a selection of zero length at the end.
                    330: **     text    points to a comma or space separated list of addresses.
                    331: ** On exit,
                    332: **     *text   is NOT any more chopped up into substrings.
                    333: */
                    334: PRIVATE void write_anchors ARGS1 (char *,text)
                    335: {
                    336:     char * start = text;
                    337:     char * end;
                    338:     char c;
                    339:     for (;;) {
                    340:         for(;*start && (WHITE(*start)); start++);  /* Find start */
                    341:        if (!*start) return;                    /* (Done) */
                    342:         for(end=start; *end && (*end!=' ') && (*end!=','); end++);/* Find end */
                    343:        if (*end) end++;        /* Include comma or space but not NULL */
                    344:        c = *end;
                    345:        *end = 0;
                    346:        write_anchor(start, start);
                    347:        *end = c;
                    348:        start = end;                    /* Point to next one */
                    349:     }
                    350: }
                    351: 
                    352: /*     Abort the connection                                    abort_socket
                    353: **     --------------------
                    354: */
                    355: PRIVATE void abort_socket NOARGS
                    356: {
                    357:     if (TRACE) fprintf(stderr,
                    358:            "HTNews: EOF on read, closing socket %d\n", s);
                    359:     NETCLOSE(s);       /* End of file, close socket */
1.2     ! timbl     360:     PUTS("Network Error: connection lost");
        !           361:     PUTC('\n');
1.1       timbl     362:     s = -1;            /* End of file on response */
                    363:     return;
                    364: }
                    365: 
                    366: /*     Read in an Article                                      read_article
                    367: **     ------------------
                    368: **
                    369: **
                    370: **     Note the termination condition of a single dot on a line by itself.
                    371: **     RFC 977 specifies that the line "folding" of RFC850 is not used, so we
                    372: **     do not handle it here.
                    373: **
                    374: ** On entry,
                    375: **     s       Global socket number is OK
                    376: **     HT      Global hypertext object is ready for appending text
                    377: */       
                    378: PRIVATE void read_article NOARGS
                    379: {
                    380: 
                    381:     char line[LINE_LENGTH+1];
                    382:     char *references=NULL;                     /* Hrefs for other articles */
                    383:     char *newsgroups=NULL;                     /* Newsgroups list */
                    384:     char *p = line;
                    385:     BOOL done = NO;
                    386:     
                    387: /*     Read in the HEADer of the article:
                    388: **
                    389: **     The header fields are either ignored, or formatted and put into the
                    390: **      Text.
                    391: */
                    392:     if (!diagnostic) {
1.2     ! timbl     393:         (*targetClass.start_element)(target, HTML_ADDRESS, 0, 0);
1.1       timbl     394:        while(!done){
                    395:            char ch = *p++ = NEXT_CHAR;
                    396:            if (ch==(char)EOF) {
                    397:                abort_socket(); /* End of file, close socket */
                    398:                return;         /* End of file on response */
                    399:            }
                    400:            if ((ch == '\n') || (p == &line[LINE_LENGTH])) {
                    401:                *--p=0;                         /* Terminate the string */
                    402:                if (TRACE) fprintf(stderr, "H %s\n", line);
                    403: 
                    404:                if (line[0]=='.') {     
                    405:                    if (line[1]<' ') {          /* End of article? */
                    406:                        done = YES;
                    407:                        break;
                    408:                    }
                    409:                
                    410:                } else if (line[0]<' ') {
                    411:                    break;              /* End of Header? */
                    412:                } else if (match(line, "SUBJECT:")) {
1.2     ! timbl     413:                    END(HTML_ADDRESS);
        !           414:                    START(HTML_TITLE);                  /** Uuugh! @@@ */
        !           415:                    PUTS(line+8);
        !           416:                    END(HTML_TITLE);
        !           417:                    START(HTML_ADDRESS);
        !           418:                    (*targetClass.start_element)(target, HTML_H1 , 0, 0);
        !           419:                    PUTS(line+8);
        !           420:                    (*targetClass.end_element)(target, HTML_H1);
        !           421:                    (*targetClass.start_element)(target, HTML_ADDRESS , 0, 0);
1.1       timbl     422:                } else if (match(line, "DATE:")
                    423:                        || match(line, "FROM:")
                    424:                        || match(line, "ORGANIZATION:")) {
                    425:                    strcat(line, "\n");
1.2     ! timbl     426:                    PUTS(strchr(line,':')+1);
1.1       timbl     427:                } else if (match(line, "NEWSGROUPS:")) {
                    428:                    StrAllocCopy(newsgroups, HTStrip(strchr(line,':')+1));
                    429:                    
                    430:                } else if (match(line, "REFERENCES:")) {
                    431:                    StrAllocCopy(references, HTStrip(strchr(line,':')+1));
                    432:                    
                    433:                } /* end if match */
                    434:                p = line;                       /* Restart at beginning */
                    435:            } /* if end of line */
                    436:        } /* Loop over characters */
1.2     ! timbl     437:        (*targetClass.end_element)(target, HTML_ADDRESS);
1.1       timbl     438:     
1.2     ! timbl     439:        if (newsgroups || references) {
        !           440:            (*targetClass.start_element)(target, HTML_DLC , 0, 0);
        !           441:            if (newsgroups) {
        !           442:                (*targetClass.start_element)(target, HTML_DT , 0, 0);
        !           443:                PUTS("Newsgroups:");
        !           444:                (*targetClass.start_element)(target, HTML_DD , 0, 0);
        !           445:                write_anchors(newsgroups);
        !           446:                free(newsgroups);
        !           447:            }
        !           448:            
        !           449:            if (references) {
        !           450:                (*targetClass.start_element)(target, HTML_DT , 0, 0);
        !           451:                PUTS("References:");
        !           452:                (*targetClass.start_element)(target, HTML_DD , 0, 0);
        !           453:                write_anchors(references);
        !           454:                free(references);
        !           455:            }
        !           456:            (*targetClass.end_element)(target, HTML_DLC);
1.1       timbl     457:        }
1.2     ! timbl     458:        PUTS("\n\n\n");
1.1       timbl     459:        
                    460:     }
                    461:     
                    462: /*     Read in the BODY of the Article:
                    463: */
1.2     ! timbl     464:     (*targetClass.start_element)(target, HTML_PRE , 0, 0);
        !           465: 
1.1       timbl     466:     p = line;
                    467:     while(!done){
                    468:        char ch = *p++ = NEXT_CHAR;
                    469:        if (ch==(char)EOF) {
                    470:            abort_socket();     /* End of file, close socket */
                    471:            return;             /* End of file on response */
                    472:        }
                    473:        if ((ch == '\n') || (p == &line[LINE_LENGTH])) {
                    474:            *p++=0;                             /* Terminate the string */
                    475:            if (TRACE) fprintf(stderr, "B %s", line);
                    476:            if (line[0]=='.') {
                    477:                if (line[1]<' ') {              /* End of article? */
                    478:                    done = YES;
                    479:                    break;
                    480:                } else {                        /* Line starts with dot */
1.2     ! timbl     481:                    PUTS(&line[1]);     /* Ignore first dot */
1.1       timbl     482:                }
                    483:            } else {
                    484: 
                    485: /*     Normal lines are scanned for buried references to other articles.
                    486: **     Unfortunately, it will pick up mail addresses as well!
                    487: */
                    488:                char *l = line;
                    489:                char * p;
                    490:                while (p=strchr(l, '<')) {
                    491:                    char *q  = strchr(p,'>');
                    492:                    char *at = strchr(p, '@');
                    493:                    if (q && at && at<q) {
                    494:                        char c = q[1];
                    495:                        q[1] = 0;               /* chop up */
                    496:                        *p = 0;
1.2     ! timbl     497:                        PUTS(l);
1.1       timbl     498:                        *p = '<';               /* again */
                    499:                        *q = 0;
1.2     ! timbl     500:                        start_anchor(p+1);
1.1       timbl     501:                        *q = '>';               /* again */
1.2     ! timbl     502:                        PUTS(p);
        !           503:                        (*targetClass.end_element)(target, HTML_A);
1.1       timbl     504:                        q[1] = c;               /* again */
                    505:                        l=q+1;
                    506:                    } else break;               /* line has unmatched <> */
                    507:                } 
1.2     ! timbl     508:                PUTS( l);       /* Last bit of the line */
1.1       timbl     509:            } /* if not dot */
                    510:            p = line;                           /* Restart at beginning */
                    511:        } /* if end of line */
                    512:     } /* Loop over characters */
1.2     ! timbl     513:     
        !           514:     (*targetClass.end_element)(target, HTML_PRE);
1.1       timbl     515: }
                    516: 
                    517: 
                    518: /*     Read in a List of Newsgroups
                    519: **     ----------------------------
                    520: */
                    521: /*
                    522: **     Note the termination condition of a single dot on a line by itself.
                    523: **     RFC 977 specifies that the line "folding" of RFC850 is not used, so we
                    524: **     do not handle it here.
                    525: */        
                    526: PRIVATE void read_list NOARGS
                    527: {
                    528: 
                    529:     char line[LINE_LENGTH+1];
                    530:     char *p;
                    531:     BOOL done = NO;
                    532:     
                    533: /*     Read in the HEADer of the article:
                    534: **
                    535: **     The header fields are either ignored, or formatted and put into the
                    536: **     Text.
                    537: */
1.2     ! timbl     538:     (*targetClass.start_element)(target, HTML_H1 , 0, 0);
        !           539:     PUTS( "Newsgroups");
        !           540:     (*targetClass.end_element)(target, HTML_PRE);
1.1       timbl     541:     p = line;
1.2     ! timbl     542:     (*targetClass.start_element)(target, HTML_MENU , 0, 0);
1.1       timbl     543:     while(!done){
                    544:        char ch = *p++ = NEXT_CHAR;
                    545:        if (ch==(char)EOF) {
                    546:            abort_socket();     /* End of file, close socket */
                    547:            return;             /* End of file on response */
                    548:        }
                    549:        if ((ch == '\n') || (p == &line[LINE_LENGTH])) {
                    550:            *p++=0;                             /* Terminate the string */
                    551:            if (TRACE) fprintf(stderr, "B %s", line);
1.2     ! timbl     552:            (*targetClass.start_element)(target, HTML_LI , 0, 0);
1.1       timbl     553:            if (line[0]=='.') {
                    554:                if (line[1]<' ') {              /* End of article? */
                    555:                    done = YES;
                    556:                    break;
                    557:                } else {                        /* Line starts with dot */
1.2     ! timbl     558:                    PUTS( &line[1]);
1.1       timbl     559:                }
                    560:            } else {
                    561: 
                    562: /*     Normal lines are scanned for references to newsgroups.
                    563: */
                    564:                char group[LINE_LENGTH];
                    565:                int first, last;
                    566:                char postable;
                    567:                if (sscanf(line, "%s %d %d %c", group, &first, &last, &postable)==4)
                    568:                    write_anchor(line, group);
                    569:                else
1.2     ! timbl     570:                    PUTS(line);
1.1       timbl     571:            } /* if not dot */
                    572:            p = line;                   /* Restart at beginning */
                    573:        } /* if end of line */
                    574:     } /* Loop over characters */
1.2     ! timbl     575:     (*targetClass.end_element)(target, HTML_MENU);
1.1       timbl     576: }
                    577: 
                    578: 
                    579: /*     Read in a Newsgroup
                    580: **     -------------------
                    581: **     Unfortunately, we have to ask for each article one by one if we
                    582: **     want more than one field.
                    583: **
                    584: */
                    585: PRIVATE void read_group ARGS3(
                    586:   CONST char *,groupName,
                    587:   int,first_required,
                    588:   int,last_required
                    589: )
                    590: {
                    591:     char line[LINE_LENGTH+1];
                    592:     char author[LINE_LENGTH+1];
                    593:     char subject[LINE_LENGTH+1];
                    594:     char *p;
                    595:     BOOL done;
                    596: 
                    597:     char buffer[LINE_LENGTH];
                    598:     char *reference=0;                 /* Href for article */
                    599:     int art;                           /* Article number WITHIN GROUP */
                    600:     int status, count, first, last;    /* Response fields */
                    601:                                        /* count is only an upper limit */
                    602: 
                    603:     sscanf(response_text, " %d %d %d %d", &status, &count, &first, &last);
                    604:     if(TRACE) printf("Newsgroup status=%d, count=%d, (%d-%d) required:(%d-%d)\n",
                    605:                        status, count, first, last, first_required, last_required);
                    606:     if (last==0) {
1.2     ! timbl     607:         PUTS( "\nNo articles in this group.\n");
1.1       timbl     608:        return;
                    609:     }
                    610:     
                    611: #define FAST_THRESHOLD 100     /* Above this, read IDs fast */
                    612: #define CHOP_THRESHOLD 50      /* Above this, chop off the rest */
                    613: 
                    614:     if (first_required<first) first_required = first;          /* clip */
                    615:     if ((last_required==0) || (last_required > last)) last_required = last;
                    616:     
                    617:     if (last_required<=first_required) {
1.2     ! timbl     618:         PUTS( "\nNo articles in this range.\n");
1.1       timbl     619:        return;
                    620:     }
                    621: 
                    622:     if (last_required-first_required+1 > MAX_CHUNK) {  /* Trim this block */
                    623:         first_required = last_required-CHUNK_SIZE+1;
                    624:     }
                    625:     if (TRACE) printf (
                    626:     "    Chunk will be (%d-%d)\n", first_required, last_required);
                    627: 
1.2     ! timbl     628: /*     Set window title
        !           629: */
        !           630:     sprintf(buffer, "Newsgroup %s,  Articles %d-%d",
        !           631:                groupName, first_required, last_required);
        !           632:     START(HTML_TITLE);
        !           633:     PUTS(buffer);
        !           634:     END(HTML_TITLE);
        !           635: 
1.1       timbl     636: /*     Link to earlier articles
                    637: */
                    638:     if (first_required>first) {
                    639:        int before;                     /* Start of one before */
                    640:        if (first_required-MAX_CHUNK <= first) before = first;
                    641:        else before = first_required-CHUNK_SIZE;
                    642:        sprintf(buffer, "%s/%d-%d", groupName, before, first_required-1);
                    643:        if (TRACE) fprintf(stderr, "    Block before is %s\n", buffer);
1.2     ! timbl     644:        PUTS( " (");
        !           645:        start_anchor(buffer);
        !           646:        PUTS("Earlier articles");
        !           647:        END(HTML_A);
        !           648:        PUTS( "...)\n");
1.1       timbl     649:     }
                    650:     
                    651:     done = NO;
                    652: 
                    653: /*#define USE_XHDR*/
                    654: #ifdef USE_XHDR
                    655:     if (count>FAST_THRESHOLD)  {
                    656:         sprintf(buffer,
                    657:        "\nThere are about %d articles currently available in %s, IDs as follows:\n\n",
                    658:                count, groupName); 
1.2     ! timbl     659:         PUTS(buffer);
1.1       timbl     660:         sprintf(buffer, "XHDR Message-ID %d-%d\n", first, last);
                    661:        status = response(buffer);
                    662:        if (status==221) {
                    663: 
                    664:            p = line;
                    665:            while(!done){
                    666:                char ch = *p++ = NEXT_CHAR;
                    667:                if (ch==(char)EOF) {
                    668:                    abort_socket();     /* End of file, close socket */
                    669:                    return;             /* End of file on response */
                    670:                }
                    671:                if ((ch == '\n') || (p == &line[LINE_LENGTH])) {
                    672:                    *p++=0;                             /* Terminate the string */
                    673:                    if (TRACE) fprintf(stderr, "X %s", line);
                    674:                    if (line[0]=='.') {
                    675:                        if (line[1]<' ') {              /* End of article? */
                    676:                            done = YES;
                    677:                            break;
                    678:                        } else {                        /* Line starts with dot */
                    679:                                /* Ignore strange line */
                    680:                        }
                    681:                    } else {
                    682:        
                    683:        /*      Normal lines are scanned for references to articles.
                    684:        */
                    685:                        char * space = strchr(line, ' ');
                    686:                        if (space++)
                    687:                            write_anchor(space, space);
                    688:                    } /* if not dot */
                    689:                    p = line;                   /* Restart at beginning */
                    690:                } /* if end of line */
                    691:            } /* Loop over characters */
                    692: 
                    693:            /* leaving loop with "done" set */
                    694:        } /* Good status */
                    695:     };
                    696: #endif
                    697: 
                    698: /*     Read newsgroup using individual fields:
                    699: */
                    700:     if (!done) {
                    701:         if (first==first_required && last==last_required)
1.2     ! timbl     702:                PUTS("\nAll available articles in ");
        !           703:         else PUTS( "\nArticles in ");
        !           704:        PUTS(groupName);
        !           705:        START(HTML_MENU);
1.1       timbl     706:        for(art=first_required; art<=last_required; art++) {
                    707:     
                    708: /*#define OVERLAP*/
                    709: #ifdef OVERLAP
                    710: /* With this code we try to keep the server running flat out by queuing just
                    711: ** one extra command ahead of time. We assume (1) that the server won't abort
                    712: ** if it gets input during output, and (2) that TCP buffering is enough for the
                    713: ** two commands. Both these assumptions seem very reasonable. However, we HAVE
                    714: ** had a hangup with a loaded server.
                    715: */
                    716:            if (art==first_required) {
                    717:                if (art==last_required) {
                    718:                        sprintf(buffer, "HEAD %d\n", art);      /* Only one */
                    719:                        status = response(buffer);
                    720:                    } else {                                    /* First of many */
                    721:                        sprintf(buffer, "HEAD %d\nHEAD %d\n", art, art+1);
                    722:                        status = response(buffer);
                    723:                    }
                    724:            } else if (art==last_required) {                    /* Last of many */
                    725:                    status = response(NULL);
                    726:            } else {                                            /* Middle of many */
                    727:                    sprintf(buffer, "HEAD %d\n", art+1);
                    728:                    status = response(buffer);
                    729:            }
                    730:            
                    731: #else  /* NOT OVERLAP */
                    732:            sprintf(buffer, "HEAD %d\n", art);
                    733:            status = response(buffer);
                    734: #endif /* NOT OVERLAP */
                    735: 
                    736:            if (status == 221) {        /* Head follows - parse it:*/
                    737:     
                    738:                p = line;                               /* Write pointer */
                    739:                done = NO;
                    740:                while(!done){
                    741:                    char ch = *p++ = NEXT_CHAR;
                    742:                    if (ch==(char)EOF) {
                    743:                        abort_socket(); /* End of file, close socket */
                    744:                        return;         /* End of file on response */
                    745:                    }
                    746:                    if ((ch == '\n')
                    747:                        || (p == &line[LINE_LENGTH]) ) {
                    748:                    
                    749:                        *--p=0;         /* Terminate  & chop LF*/
                    750:                        p = line;               /* Restart at beginning */
                    751:                        if (TRACE) fprintf(stderr, "G %s\n", line);
                    752:                        switch(line[0]) {
                    753:     
                    754:                        case '.':
                    755:                            done = (line[1]<' ');       /* End of article? */
                    756:                            break;
                    757:     
                    758:                        case 'S':
                    759:                        case 's':
                    760:                            if (match(line, "SUBJECT:"))
                    761:                                strcpy(subject, line+9);/* Save subject */
                    762:                            break;
                    763:     
                    764:                        case 'M':
                    765:                        case 'm':
                    766:                            if (match(line, "MESSAGE-ID:")) {
                    767:                                char * addr = HTStrip(line+11) +1; /* Chop < */
                    768:                                addr[strlen(addr)-1]=0;         /* Chop > */
                    769:                                StrAllocCopy(reference, addr);
                    770:                            }
                    771:                            break;
                    772:     
                    773:                        case 'f':
                    774:                        case 'F':
                    775:                            if (match(line, "FROM:")) {
                    776:                                char * p;
                    777:                                strcpy(author,
                    778:                                        author_name(strchr(line,':')+1));
                    779:                                p = author + strlen(author) - 1;
                    780:                                if (*p=='\n') *p = 0;   /* Chop off newline */
                    781:                            }
                    782:                            break;
                    783:                                    
                    784:                        } /* end switch on first character */
                    785:                    } /* if end of line */
                    786:                } /* Loop over characters */
                    787:     
1.2     ! timbl     788:                START(HTML_LI);
1.1       timbl     789:                sprintf(buffer, "\"%s\" - %s", subject, author);
                    790:                if (reference) {
                    791:                    write_anchor(buffer, reference);
                    792:                    free(reference);
                    793:                    reference=0;
                    794:                } else {
1.2     ! timbl     795:                    PUTS(buffer);
1.1       timbl     796:                }
                    797:                
                    798:     
1.2     ! timbl     799: /*      indicate progress!   @@@@@@
1.1       timbl     800: */
                    801:     
                    802:            } /* If good response */
                    803:        } /* Loop over article */           
                    804:     } /* If read headers */
1.2     ! timbl     805:     END(HTML_MENU);
        !           806:     START(HTML_P);
1.1       timbl     807:     
                    808: /*     Link to later articles
                    809: */
                    810:     if (last_required<last) {
                    811:        int after;                      /* End of article after */
                    812:        after = last_required+CHUNK_SIZE;
                    813:        if (after==last) sprintf(buffer, "news:%s", groupName); /* original group */
                    814:        else sprintf(buffer, "news:%s/%d-%d", groupName, last_required+1, after);
                    815:        if (TRACE) fprintf(stderr, "    Block after is %s\n", buffer);
1.2     ! timbl     816:        PUTS( "(");
        !           817:        start_anchor(buffer);
        !           818:        PUTS( "Later articles");
        !           819:        END(HTML_A);
        !           820:        PUTS( "...)\n");
1.1       timbl     821:     }
                    822:     
                    823: 
                    824: }
                    825: 
                    826: 
                    827: /*             Load by name                                    HTLoadNews
                    828: **             ============
                    829: */
1.2     ! timbl     830: PUBLIC int HTLoadNews ARGS4(
        !           831:        CONST char *,           arg,
        !           832:        HTParentAnchor *,       anAnchor,
        !           833:        HTFormat,               format_out,
        !           834:        HTStream*,              stream)
1.1       timbl     835: {
                    836:     char command[257];                 /* The whole command */
                    837:     char groupName[GROUP_NAME_LENGTH]; /* Just the group name */
                    838:     int status;                                /* tcp return */
                    839:     int retries;                       /* A count of how hard we have tried */ 
                    840:     BOOL group_wanted;                 /* Flag: group was asked for, not article */
                    841:     BOOL list_wanted;                  /* Flag: group was asked for, not article */
                    842:     int first, last;                   /* First and last articles asked for */
                    843: 
1.2     ! timbl     844:     diagnostic = (format_out == WWW_SOURCE);   /* set global flag */
1.1       timbl     845:     
                    846:     if (TRACE) fprintf(stderr, "HTNews: Looking for %s\n", arg);
                    847:     
                    848:     if (!initialized) initialized = initialize();
                    849:     if (!initialized) return -1;       /* FAIL */
                    850:     
                    851:     {
                    852:         CONST char * p1=arg;
                    853: 
                    854: /*     We will ask for the document, omitting the host name & anchor.
                    855: **
                    856: **     Syntax of address is
                    857: **             xxx@yyy                 Article
                    858: **             <xxx@yyy>               Same article
                    859: **             xxxxx                   News group (no "@")
                    860: **             group/n1-n2             Articles n1 to n2 in group
                    861: */        
                    862:        group_wanted = (strchr(arg, '@')==0) && (strchr(arg, '*')==0);
                    863:        list_wanted  = (strchr(arg, '@')==0) && (strchr(arg, '*')!=0);
                    864: 
                    865:        /* p1 = HTParse(arg, "", PARSE_PATH | PARSE_PUNCTUATION); */
                    866:        /* Don't use HTParse because news: access doesn't follow traditional
                    867:           rules. For instance, if the article reference contains a '#',
                    868:           the rest of it is lost -- JFG 10/7/92, from a bug report */
                    869:        if (!strncasecomp (arg, "news:", 5))
                    870:          p1 = arg + 5;  /* Skip "news:" prefix */
                    871:        if (list_wanted) {
                    872:            strcpy(command, "LIST ");
                    873:        } else if (group_wanted) {
                    874:            char * slash = strchr(p1, '/');
                    875:            strcpy(command, "GROUP ");
                    876:            first = 0;
                    877:            last = 0;
                    878:            if (slash) {
                    879:                *slash = 0;
                    880:                strcpy(groupName, p1);
                    881:                *slash = '/';
                    882:                (void) sscanf(slash+1, "%d-%d", &first, &last);
                    883:            } else {
                    884:                strcpy(groupName, p1);
                    885:            }
                    886:            strcat(command, groupName);
                    887:        } else {
                    888:            strcpy(command, "ARTICLE ");
                    889:            if (strchr(p1, '<')==0) strcat(command,"<");
                    890:            strcat(command, p1);
                    891:            if (strchr(p1, '>')==0) strcat(command,">");
                    892:        }
                    893: /*     free(p1); * bug fix TBL 5 Aug 92 */
                    894: 
                    895:         strcat(command, "\r\n");               /* CR LF, as in rfc 977 */
                    896:        
                    897:     } /* scope of p1 */
                    898:     
                    899:     if (!*arg) return NO;                      /* Ignore if no name */
                    900: 
                    901:     
                    902: /*     Make a hypertext object with an anchor list.
                    903: */       
                    904:     node_anchor = anAnchor;
1.2     ! timbl     905:     target = HTML_new(anAnchor, stream);
        !           906:     targetClass = *target->isa;        /* Copy routine entry points */
        !           907:     
1.1       timbl     908:        
                    909: /*     Now, let's get a stream setup up from the NewsHost:
                    910: */       
                    911:     for(retries=0;retries<2; retries++){
                    912:     
                    913:         if (s<0) {
                    914:             NEWS_PROGRESS("Connecting to NewsHost ...");
                    915:            s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
                    916:            status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
                    917:            if (status<0){
                    918:                char message[256];
                    919:                NETCLOSE(s);
                    920:                s = -1;
                    921:                if (TRACE) fprintf(stderr, "HTNews: Unable to connect to news host.\n");
                    922: /*             if (retries<=1) continue;   WHY TRY AGAIN ?     */
                    923: #ifdef NeXTStep
                    924:                NXRunAlertPanel(NULL,
                    925:                    "Could not access newshost %s.",
                    926:                    NULL,NULL,NULL,
1.2     ! timbl     927:                    HTNewsHost);
1.1       timbl     928: #else
                    929:                fprintf(stderr, "Could not access newshost %s\n",
1.2     ! timbl     930:                    HTNewsHost);
1.1       timbl     931: #endif
                    932:                sprintf(message,
                    933: "\nCould not access %s.\n\n (Check default WorldWideWeb NewsHost ?)\n",
1.2     ! timbl     934:                    HTNewsHost);
        !           935:                
        !           936:                PUTS(message);
        !           937:                (*targetClass.end_document)(target);
1.1       timbl     938:                return YES;
                    939:            } else {
                    940:                if (TRACE) fprintf(stderr, "HTNews: Connected to news host %s.\n",
1.2     ! timbl     941:                                HTNewsHost);
1.1       timbl     942:                HTInitInput(s);         /* set up buffering */
                    943:                if ((response(NULL) / 100) !=2) {
                    944:                        NETCLOSE(s);
                    945:                        s = -1;
                    946: #ifdef NeXTStep
                    947:                        NXRunAlertPanel("News access",
                    948:                            "Could not retrieve information:\n   %s.",
                    949:                            NULL,NULL,NULL,
                    950:                            response_text);
                    951: #endif
1.2     ! timbl     952:                        START(HTML_TITLE);
        !           953:                        PUTS("News host response");
        !           954:                        END(HTML_TITLE);
        !           955:                        PUTS("Sorry, could not retrieve information: ");
        !           956:                        PUTS(response_text);
        !           957:                        (*targetClass.end_document)(target);
1.1       timbl     958:                        return YES;
                    959:                }
                    960:            }
                    961:        } /* If needed opening */
                    962:        
1.2     ! timbl     963:        /* @@@@@@@@@@@@@@Tell user something's happening */
        !           964:        
1.1       timbl     965:        status = response(command);
                    966:        if (status<0) break;
                    967:        if ((status/ 100) !=2) {
                    968: /*         NXRunAlertPanel("News access", response_text,
                    969:                NULL,NULL,NULL);
                    970: */
1.2     ! timbl     971:            
        !           972:            PUTS(response_text);
        !           973:            (*targetClass.end_document)(target);
1.1       timbl     974:            NETCLOSE(s);
                    975:            s = -1;
                    976: /* return HT; -- no:the message might be "Timeout-disconnected" left over */
                    977:            continue;   /*      Try again */
                    978:        }
                    979:   
                    980: /*     Load a group, article, etc
                    981: */
1.2     ! timbl     982:         
1.1       timbl     983:        
                    984:        if (list_wanted) read_list();
                    985:        else if (group_wanted) read_group(groupName, first, last);
                    986:         else read_article();
                    987: 
1.2     ! timbl     988:        (*targetClass.end_document)(target);
        !           989:        return HT_LOADED;
1.1       timbl     990:        
                    991:     } /* Retry loop */
                    992:     
1.2     ! timbl     993:     
        !           994:     PUTS("Sorry, could not load requested news.\n");
        !           995:     (*targetClass.end_document)(target);
1.1       timbl     996:     
                    997: /*    NXRunAlertPanel(NULL, "Sorry, could not load `%s'.",
                    998:            NULL,NULL,NULL, arg);No -- message earlier wil have covered it */
                    999: 
1.2     ! timbl    1000:     return HT_LOADED;
1.1       timbl    1001: }
                   1002: 
1.2     ! timbl    1003: PUBLIC HTProtocol HTNews = { "news", HTLoadNews, NULL };

Webmaster