Annotation of libwww/Library/src/HTNews.c, revision 2.18

1.1       timbl       1: /*                     NEWS ACCESS                             HTNews.c
                      2: **                     ===========
                      3: **
                      4: ** History:
                      5: **     26 Sep 90       Written TBL
                      6: **     29 Nov 91       Downgraded to C, for portable implementation.
2.16      luotonen    7: **     16 Feb 94       Added Lou Montulli's Lynx & LIST NEWSGROUPS diffs.
1.1       timbl       8: */
1.2       timbl       9: /* Implements:
                     10: */
                     11: #include "HTNews.h"
1.1       timbl      12: 
1.3       timbl      13: #define CR   FROMASCII('\015') /* Must be converted to ^M for transmission */
                     14: #define LF   FROMASCII('\012') /* Must be converted to ^J for transmission */
                     15: 
1.1       timbl      16: #define NEWS_PORT 119          /* See rfc977 */
                     17: #define APPEND                 /* Use append methods */
                     18: #define MAX_CHUNK      40      /* Largest number of articles in one window */
                     19: #define CHUNK_SIZE     20      /* Number of articles for quick display */
                     20: 
                     21: #ifndef DEFAULT_NEWS_HOST
                     22: #define DEFAULT_NEWS_HOST "news"
                     23: #endif
                     24: #ifndef SERVER_FILE
                     25: #define SERVER_FILE "/usr/local/lib/rn/server"
                     26: #endif
                     27: 
                     28: #include <ctype.h>
                     29: #include "HTUtils.h"           /* Coding convention macros */
                     30: #include "tcp.h"
                     31: 
1.2       timbl      32: #include "HTML.h"
1.1       timbl      33: #include "HTParse.h"
                     34: #include "HTFormat.h"
2.8       timbl      35: #include "HTAlert.h"
1.1       timbl      36: 
2.8       timbl      37: #define BIG 1024 /* @@@ */
                     38: 
1.2       timbl      39: struct _HTStructured {
                     40:        CONST HTStructuredClass *       isa;
                     41:        /* ... */
                     42: };
                     43: 
2.7       timbl      44: #define NEWS_PROGRESS(foo) HTProgress(foo)
1.1       timbl      45: 
                     46: 
2.12      timbl      47: #define NEXT_CHAR HTInputSocket_getCharacter(isoc)
1.1       timbl      48: #define LINE_LENGTH 512                        /* Maximum length of line of ARTICLE etc */
                     49: #define GROUP_NAME_LENGTH      256     /* Maximum length of group name */
                     50: 
                     51: 
                     52: /*     Module-wide variables
                     53: */
1.2       timbl      54: PUBLIC char * HTNewsHost;
1.1       timbl      55: PRIVATE struct sockaddr_in soc_address;                /* Binary network address */
                     56: PRIVATE int s;                                 /* Socket for NewsHost */
                     57: PRIVATE char response_text[LINE_LENGTH+1];     /* Last response */
1.2       timbl      58: /* PRIVATE HText *     HT;     */              /* the new hypertext */
                     59: PRIVATE HTStructured * target;                 /* The output sink */
                     60: PRIVATE HTStructuredClass targetClass;         /* Copy of fn addresses */
1.1       timbl      61: PRIVATE HTParentAnchor *node_anchor;           /* Its anchor */
                     62: PRIVATE int    diagnostic;                     /* level: 0=none 2=source */
                     63: 
1.2       timbl      64: 
                     65: #define PUTC(c) (*targetClass.put_character)(target, c)
                     66: #define PUTS(s) (*targetClass.put_string)(target, s)
                     67: #define START(e) (*targetClass.start_element)(target, e, 0, 0)
                     68: #define END(e) (*targetClass.end_element)(target, e)
                     69: 
2.11      timbl      70: PUBLIC HTInputSocket *isoc;            /* @@@ non-reentrant */
                     71: 
1.2       timbl      72: PUBLIC CONST char * HTGetNewsHost NOARGS
                     73: {
                     74:        return HTNewsHost;
                     75: }
1.1       timbl      76: 
1.2       timbl      77: PUBLIC void HTSetNewsHost ARGS1(CONST char *, value)
                     78: {
                     79:        StrAllocCopy(HTNewsHost, value);
                     80: }
1.1       timbl      81: 
                     82: /*     Initialisation for this module
                     83: **     ------------------------------
                     84: **
                     85: **     Except on the NeXT, we pick up the NewsHost name from
                     86: **
                     87: **     1.      Environment variable NNTPSERVER
                     88: **     2.      File SERVER_FILE
                     89: **     3.      Compilation time macro DEFAULT_NEWS_HOST
                     90: **     4.      Default to "news"
                     91: **
                     92: **     On the NeXT, we pick up the NewsHost name from, in order:
                     93: **
                     94: **     1.      WorldWideWeb default "NewsHost"
                     95: **     2.      Global default "NewsHost"
                     96: **     3.      News default "NewsHost"
                     97: **     4.      Compilation time macro DEFAULT_NEWS_HOST
                     98: **     5.      Default to "news"
                     99: */
                    100: PRIVATE BOOL initialized = NO;
                    101: PRIVATE BOOL initialize NOARGS
                    102: {
                    103:     CONST struct hostent  *phost;        /* Pointer to host - See netdb.h */
                    104:     struct sockaddr_in* sin = &soc_address;
                    105: 
                    106:         
                    107: /*  Set up defaults:
                    108: */
                    109:     sin->sin_family = AF_INET;         /* Family = internet, host order  */
                    110:     sin->sin_port = htons(NEWS_PORT);   /* Default: new port,    */
                    111: 
                    112: /*   Get name of Host
                    113: */
                    114: #ifdef NeXTStep
1.2       timbl     115:     if ((HTNewsHost = NXGetDefaultValue("WorldWideWeb","NewsHost"))==0)
                    116:         if ((HTNewsHost = NXGetDefaultValue("News","NewsHost")) == 0)
                    117:            HTNewsHost = DEFAULT_NEWS_HOST;
1.1       timbl     118: #else
                    119:     if (getenv("NNTPSERVER")) {
1.2       timbl     120:         StrAllocCopy(HTNewsHost, (char *)getenv("NNTPSERVER"));
1.1       timbl     121:        if (TRACE) fprintf(stderr, "HTNews: NNTPSERVER defined as `%s'\n",
1.2       timbl     122:                HTNewsHost);
1.1       timbl     123:     } else {
                    124:         char server_name[256];
                    125:         FILE* fp = fopen(SERVER_FILE, "r");
                    126:         if (fp) {
                    127:            if (fscanf(fp, "%s", server_name)==1) {
1.2       timbl     128:                StrAllocCopy(HTNewsHost, server_name);
1.1       timbl     129:                if (TRACE) fprintf(stderr,
                    130:                "HTNews: File %s defines news host as `%s'\n",
1.2       timbl     131:                        SERVER_FILE, HTNewsHost);
1.1       timbl     132:            }
                    133:            fclose(fp);
                    134:        }
                    135:     }
1.2       timbl     136:     if (!HTNewsHost) HTNewsHost = DEFAULT_NEWS_HOST;
1.1       timbl     137: #endif
                    138: 
1.2       timbl     139:     if (*HTNewsHost>='0' && *HTNewsHost<='9') {   /* Numeric node address: */
                    140:        sin->sin_addr.s_addr = inet_addr((char *)HTNewsHost); /* See arpa/inet.h */
1.1       timbl     141: 
                    142:     } else {               /* Alphanumeric node name: */
1.2       timbl     143:        phost=gethostbyname((char*)HTNewsHost); /* See netdb.h */
1.1       timbl     144:        if (!phost) {
2.7       timbl     145:            char message[150];          /* @@@ */
                    146:            sprintf(message, 
                    147:            "HTNews: Can't find news host `%s'.\n%s",HTNewsHost,
                    148:            "Please define your NNTP server");
                    149:            HTAlert(message);
1.1       timbl     150:            CTRACE(tfp,
1.2       timbl     151:              "HTNews: Can't find news host `%s'.\n",HTNewsHost);
1.1       timbl     152:            return NO;  /* Fail */
                    153:        }
                    154:        memcpy(&sin->sin_addr, phost->h_addr, phost->h_length);
                    155:     }
                    156: 
                    157:     if (TRACE) fprintf(stderr,  
                    158:        "HTNews: Parsed address as port %4x, inet %d.%d.%d.%d\n",
                    159:                (unsigned int)ntohs(sin->sin_port),
                    160:                (int)*((unsigned char *)(&sin->sin_addr)+0),
                    161:                (int)*((unsigned char *)(&sin->sin_addr)+1),
                    162:                (int)*((unsigned char *)(&sin->sin_addr)+2),
                    163:                (int)*((unsigned char *)(&sin->sin_addr)+3));
                    164: 
                    165:     s = -1;            /* Disconnected */
                    166:     
                    167:     return YES;
                    168: }
                    169: 
                    170: 
                    171: 
                    172: /*     Send NNTP Command line to remote host & Check Response
                    173: **     ------------------------------------------------------
                    174: **
                    175: ** On entry,
                    176: **     command points to the command to be sent, including CRLF, or is null
                    177: **             pointer if no command to be sent.
                    178: ** On exit,
                    179: **     Negative status indicates transmission error, socket closed.
                    180: **     Positive status is an NNTP status.
                    181: */
                    182: 
                    183: 
                    184: PRIVATE int response ARGS1(CONST char *,command)
                    185: {
                    186:     int result;    
                    187:     char * p = response_text;
                    188:     if (command) {
                    189:         int status;
                    190:        int length = strlen(command);
                    191:        if (TRACE) fprintf(stderr, "NNTP command to be sent: %s", command);
                    192: #ifdef NOT_ASCII
                    193:        {
                    194:            CONST char  * p;
                    195:            char        * q;
                    196:            char ascii[LINE_LENGTH+1];
                    197:            for(p = command, q=ascii; *p; p++, q++) {
                    198:                *q = TOASCII(*p);
                    199:            }
                    200:             status = NETWRITE(s, ascii, length);
                    201:        }
                    202: #else
                    203:         status = NETWRITE(s, command, length);
                    204: #endif
                    205:        if (status<0){
                    206:            if (TRACE) fprintf(stderr,
                    207:                "HTNews: Unable to send command. Disconnecting.\n");
                    208:            NETCLOSE(s);
2.11      timbl     209:            HTInputSocket_free(isoc);
1.1       timbl     210:            s = -1;
                    211:            return status;
                    212:        } /* if bad status */
                    213:     } /* if command to be sent */
                    214:     
                    215:     for(;;) {  
1.3       timbl     216:        if (((*p++=NEXT_CHAR) == LF)
                    217:                        || (p == &response_text[LINE_LENGTH])) {
1.1       timbl     218:            *p++=0;                             /* Terminate the string */
                    219:            if (TRACE) fprintf(stderr, "NNTP Response: %s\n", response_text);
                    220:            sscanf(response_text, "%d", &result);
                    221:            return result;          
                    222:        } /* if end of line */
                    223:        
                    224:        if (*(p-1) < 0) {
                    225:            if (TRACE) fprintf(stderr,
                    226:                "HTNews: EOF on read, closing socket %d\n", s);
                    227:            NETCLOSE(s);        /* End of file, close socket */
2.11      timbl     228:            HTInputSocket_free(isoc);
1.1       timbl     229:            return s = -1;      /* End of file on response */
                    230:        }
                    231:     } /* Loop over characters */
                    232: }
                    233: 
                    234: 
                    235: /*     Case insensitive string comparisons
                    236: **     -----------------------------------
                    237: **
                    238: ** On entry,
                    239: **     template must be already un upper case.
                    240: **     unknown may be in upper or lower or mixed case to match.
                    241: */
                    242: PRIVATE BOOL match ARGS2 (CONST char *,unknown, CONST char *,template)
                    243: {
                    244:     CONST char * u = unknown;
                    245:     CONST char * t = template;
                    246:     for (;*u && *t && (TOUPPER(*u)==*t); u++, t++) /* Find mismatch or end */ ;
                    247:     return (BOOL)(*t==0);              /* OK if end of template */
                    248: }
                    249: 
                    250: /*     Find Author's name in mail address
                    251: **     ----------------------------------
                    252: **
                    253: ** On exit,
                    254: **     THE EMAIL ADDRESS IS CORRUPTED
                    255: **
                    256: ** For example, returns "Tim Berners-Lee" if given any of
                    257: **     " Tim Berners-Lee <tim@online.cern.ch> "
                    258: **  or " tim@online.cern.ch ( Tim Berners-Lee ) "
                    259: */
                    260: PRIVATE char * author_name ARGS1 (char *,email)
                    261: {
                    262:     char *s, *e;
                    263:     
                    264:     if ((s=strchr(email,'(')) && (e=strchr(email, ')')))
                    265:         if (e>s) {
                    266:            *e=0;                       /* Chop off everything after the ')'  */
                    267:            return HTStrip(s+1);        /* Remove leading and trailing spaces */
                    268:        }
                    269:        
                    270:     if ((s=strchr(email,'<')) && (e=strchr(email, '>')))
                    271:         if (e>s) {
                    272:            strcpy(s, e+1);             /* Remove <...> */
                    273:            return HTStrip(email);      /* Remove leading and trailing spaces */
                    274:        }
                    275:        
                    276:     return HTStrip(email);             /* Default to the whole thing */
                    277: 
                    278: }
                    279: 
1.2       timbl     280: /*     Start anchor element
                    281: **     --------------------
                    282: */
                    283: PRIVATE void start_anchor ARGS1(CONST char *,  href)
                    284: {
                    285:     BOOL               present[HTML_A_ATTRIBUTES];
                    286:     CONST char*                value[HTML_A_ATTRIBUTES];
                    287:     
                    288:     {
                    289:        int i;
                    290:        for(i=0; i<HTML_A_ATTRIBUTES; i++)
                    291:            present[i] = (i==HTML_A_HREF);
                    292:     }
                    293:     value[HTML_A_HREF] = href;
                    294:     (*targetClass.start_element)(target, HTML_A , present, value);
                    295: 
                    296: }
1.1       timbl     297: 
2.16      luotonen  298: 
                    299: /*      Start link element
                    300: **      --------------------
                    301: */
                    302: PRIVATE void start_link ARGS2(CONST char *,  href, CONST char *, rev)
                    303: {
                    304: #ifdef WHEN_WE_HAVE_HTMLPLUS
                    305: 
                    306:     BOOL                present[HTML_LINK_ATTRIBUTES];
                    307:     CONST char*         value[HTML_LINK_ATTRIBUTES];
                    308:    
                    309:     {
                    310:         int i;
                    311:         for(i=0; i<HTML_LINK_ATTRIBUTES; i++)
                    312:             present[i] = (i==HTML_LINK_HREF || i==HTML_LINK_REV);
                    313:     }
                    314:     value[HTML_LINK_HREF] = href;
                    315:     value[HTML_LINK_REV]  = rev;
                    316:     (*targetClass.start_element)(target, HTML_LINK , present, value);
                    317: 
                    318: #endif
                    319: }
                    320: 
                    321: 
                    322: 
                    323: 
1.1       timbl     324: /*     Paste in an Anchor
                    325: **     ------------------
                    326: **
                    327: **
                    328: ** On entry,
                    329: **     HT      has a selection of zero length at the end.
                    330: **     text    points to the text to be put into the file, 0 terminated.
                    331: **     addr    points to the hypertext refernce address,
                    332: **             terminated by white space, comma, NULL or '>' 
                    333: */
                    334: PRIVATE void write_anchor ARGS2(CONST char *,text, CONST char *,addr)
                    335: {
                    336:     char href[LINE_LENGTH+1];
                    337:                
                    338:     {
                    339:        CONST char * p;
                    340:        strcpy(href,"news:");
                    341:        for(p=addr; *p && (*p!='>') && !WHITE(*p) && (*p!=','); p++);
                    342:         strncat(href, addr, p-addr);   /* Make complete hypertext reference */
                    343:     }
                    344:     
1.2       timbl     345:     start_anchor(href);
                    346:     PUTS(text);
                    347:     END(HTML_A);
1.1       timbl     348: }
                    349: 
                    350: 
                    351: /*     Write list of anchors
                    352: **     ---------------------
                    353: **
                    354: **     We take a pointer to a list of objects, and write out each,
                    355: **     generating an anchor for each.
                    356: **
                    357: ** On entry,
                    358: **     HT      has a selection of zero length at the end.
                    359: **     text    points to a comma or space separated list of addresses.
                    360: ** On exit,
                    361: **     *text   is NOT any more chopped up into substrings.
                    362: */
                    363: PRIVATE void write_anchors ARGS1 (char *,text)
                    364: {
                    365:     char * start = text;
                    366:     char * end;
                    367:     char c;
                    368:     for (;;) {
                    369:         for(;*start && (WHITE(*start)); start++);  /* Find start */
                    370:        if (!*start) return;                    /* (Done) */
                    371:         for(end=start; *end && (*end!=' ') && (*end!=','); end++);/* Find end */
                    372:        if (*end) end++;        /* Include comma or space but not NULL */
                    373:        c = *end;
                    374:        *end = 0;
                    375:        write_anchor(start, start);
2.16      luotonen  376:        START(HTML_BR);
1.1       timbl     377:        *end = c;
                    378:        start = end;                    /* Point to next one */
                    379:     }
                    380: }
                    381: 
                    382: /*     Abort the connection                                    abort_socket
                    383: **     --------------------
                    384: */
                    385: PRIVATE void abort_socket NOARGS
                    386: {
                    387:     if (TRACE) fprintf(stderr,
                    388:            "HTNews: EOF on read, closing socket %d\n", s);
                    389:     NETCLOSE(s);       /* End of file, close socket */
2.11      timbl     390:     HTInputSocket_free(isoc);
1.2       timbl     391:     PUTS("Network Error: connection lost");
                    392:     PUTC('\n');
1.1       timbl     393:     s = -1;            /* End of file on response */
                    394:     return;
                    395: }
                    396: 
                    397: /*     Read in an Article                                      read_article
                    398: **     ------------------
                    399: **
                    400: **
                    401: **     Note the termination condition of a single dot on a line by itself.
                    402: **     RFC 977 specifies that the line "folding" of RFC850 is not used, so we
                    403: **     do not handle it here.
                    404: **
                    405: ** On entry,
                    406: **     s       Global socket number is OK
                    407: **     HT      Global hypertext object is ready for appending text
                    408: */       
                    409: PRIVATE void read_article NOARGS
                    410: {
                    411: 
                    412:     char line[LINE_LENGTH+1];
                    413:     char *references=NULL;                     /* Hrefs for other articles */
                    414:     char *newsgroups=NULL;                     /* Newsgroups list */
                    415:     char *p = line;
                    416:     BOOL done = NO;
                    417:     
                    418: /*     Read in the HEADer of the article:
                    419: **
                    420: **     The header fields are either ignored, or formatted and put into the
                    421: **      Text.
                    422: */
                    423:     if (!diagnostic) {
1.2       timbl     424:         (*targetClass.start_element)(target, HTML_ADDRESS, 0, 0);
1.1       timbl     425:        while(!done){
                    426:            char ch = *p++ = NEXT_CHAR;
                    427:            if (ch==(char)EOF) {
                    428:                abort_socket(); /* End of file, close socket */
                    429:                return;         /* End of file on response */
                    430:            }
1.3       timbl     431:            if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1       timbl     432:                *--p=0;                         /* Terminate the string */
                    433:                if (TRACE) fprintf(stderr, "H %s\n", line);
                    434: 
                    435:                if (line[0]=='.') {     
                    436:                    if (line[1]<' ') {          /* End of article? */
                    437:                        done = YES;
                    438:                        break;
                    439:                    }
                    440:                
                    441:                } else if (line[0]<' ') {
                    442:                    break;              /* End of Header? */
2.16      luotonen  443: 
1.1       timbl     444:                } else if (match(line, "SUBJECT:")) {
1.2       timbl     445:                    END(HTML_ADDRESS);
                    446:                    START(HTML_TITLE);                  /** Uuugh! @@@ */
2.16      luotonen  447:                    PUTS(line+9);
                    448:                    END(HTML_TITLE);
                    449:                    START(HTML_H1);
1.2       timbl     450:                    PUTS(line+8);
2.16      luotonen  451:                    END(HTML_H1);
1.2       timbl     452:                    START(HTML_ADDRESS);
2.16      luotonen  453: 
1.1       timbl     454:                } else if (match(line, "DATE:")
                    455:                        || match(line, "ORGANIZATION:")) {
2.16      luotonen  456:                    PUTS(strchr(line,':')+2);
                    457:                    START(HTML_BR);
                    458: 
                    459:                } else if(match(line, "FROM:")) {
                    460:                   char * temp=0;
                    461:                   char * href=0;
                    462:                   char *cp1, *cp2;
                    463: 
                    464:                   /* copy into temporary storage */
                    465:                   StrAllocCopy(temp, strchr(line,':')+1);
                    466: 
                    467:                   cp1=temp;
                    468:                   while(isspace(*cp1)) cp1++;
                    469:                   /* remove space and stuff after */
                    470:                   if((cp2 = strchr(cp1,' ')) != NULL)
                    471:                      *cp2 = '\0';
                    472: 
                    473:                   StrAllocCopy(href,"mailto:");
                    474:                   StrAllocCat(href,cp1);
                    475: 
                    476:                   start_anchor(href);
                    477:                   PUTS("Reply to ");
                    478:                   PUTS(strchr(line,':')+1);
                    479:                   END(HTML_A);
                    480:                   START(HTML_BR);
                    481: 
                    482:                   /* put in the owner as a link rel. as well */
                    483:                   start_link(href, "made");
                    484:                
                    485:                   /* free of temp vars */
                    486:                   free(temp);
                    487:                   free(href);
                    488: 
1.1       timbl     489:                } else if (match(line, "NEWSGROUPS:")) {
                    490:                    StrAllocCopy(newsgroups, HTStrip(strchr(line,':')+1));
                    491:                    
                    492:                } else if (match(line, "REFERENCES:")) {
                    493:                    StrAllocCopy(references, HTStrip(strchr(line,':')+1));
                    494:                    
                    495:                } /* end if match */
                    496:                p = line;                       /* Restart at beginning */
                    497:            } /* if end of line */
                    498:        } /* Loop over characters */
2.16      luotonen  499:        END(HTML_ADDRESS);
1.1       timbl     500:     
1.2       timbl     501:        if (newsgroups || references) {
2.16      luotonen  502:            START(HTML_DL);
1.2       timbl     503:            if (newsgroups) {
2.16      luotonen  504: #ifdef POSTING
                    505:                char *href=0;
                    506: #endif
                    507: 
1.2       timbl     508:                (*targetClass.start_element)(target, HTML_DT , 0, 0);
                    509:                PUTS("Newsgroups:");
                    510:                (*targetClass.start_element)(target, HTML_DD , 0, 0);
                    511:                write_anchors(newsgroups);
2.16      luotonen  512: 
                    513: #ifdef POSTING
                    514:                /* make posting possible */
                    515:                StrAllocCopy(href,"newspost:");
                    516:                StrAllocCat(href,newsgroups);
                    517:                START(HTML_DT);
                    518:                 start_anchor(href);
                    519:                 PUTS("Reply to newsgroup(s)");
                    520:                 END(HTML_A);
                    521: #endif
                    522: 
1.2       timbl     523:                free(newsgroups);
                    524:            }
                    525:            
                    526:            if (references) {
                    527:                (*targetClass.start_element)(target, HTML_DT , 0, 0);
                    528:                PUTS("References:");
                    529:                (*targetClass.start_element)(target, HTML_DD , 0, 0);
                    530:                write_anchors(references);
                    531:                free(references);
                    532:            }
2.16      luotonen  533: #ifdef WHEN_WE_HAVE_HTMLPLUS
                    534:            (*targetClass.end_element)(target, HTML_DLC);
                    535: #else
2.10      timbl     536:            (*targetClass.end_element)(target, HTML_DL);
2.16      luotonen  537: #endif
1.1       timbl     538:        }
1.2       timbl     539:        PUTS("\n\n\n");
1.1       timbl     540:        
                    541:     }
                    542:     
                    543: /*     Read in the BODY of the Article:
                    544: */
1.2       timbl     545:     (*targetClass.start_element)(target, HTML_PRE , 0, 0);
                    546: 
1.1       timbl     547:     p = line;
                    548:     while(!done){
                    549:        char ch = *p++ = NEXT_CHAR;
                    550:        if (ch==(char)EOF) {
                    551:            abort_socket();     /* End of file, close socket */
                    552:            return;             /* End of file on response */
                    553:        }
1.3       timbl     554:        if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1       timbl     555:            *p++=0;                             /* Terminate the string */
                    556:            if (TRACE) fprintf(stderr, "B %s", line);
                    557:            if (line[0]=='.') {
                    558:                if (line[1]<' ') {              /* End of article? */
                    559:                    done = YES;
                    560:                    break;
                    561:                } else {                        /* Line starts with dot */
1.2       timbl     562:                    PUTS(&line[1]);     /* Ignore first dot */
1.1       timbl     563:                }
                    564:            } else {
                    565: 
                    566: /*     Normal lines are scanned for buried references to other articles.
                    567: **     Unfortunately, it will pick up mail addresses as well!
                    568: */
                    569:                char *l = line;
                    570:                char * p;
2.14      luotonen  571:                while ((p=strchr(l, '<'))) {
1.1       timbl     572:                    char *q  = strchr(p,'>');
                    573:                    char *at = strchr(p, '@');
                    574:                    if (q && at && at<q) {
                    575:                        char c = q[1];
                    576:                        q[1] = 0;               /* chop up */
                    577:                        *p = 0;
1.2       timbl     578:                        PUTS(l);
1.1       timbl     579:                        *p = '<';               /* again */
                    580:                        *q = 0;
1.2       timbl     581:                        start_anchor(p+1);
1.1       timbl     582:                        *q = '>';               /* again */
1.2       timbl     583:                        PUTS(p);
                    584:                        (*targetClass.end_element)(target, HTML_A);
1.1       timbl     585:                        q[1] = c;               /* again */
                    586:                        l=q+1;
                    587:                    } else break;               /* line has unmatched <> */
                    588:                } 
1.2       timbl     589:                PUTS( l);       /* Last bit of the line */
1.1       timbl     590:            } /* if not dot */
                    591:            p = line;                           /* Restart at beginning */
                    592:        } /* if end of line */
                    593:     } /* Loop over characters */
1.2       timbl     594:     
                    595:     (*targetClass.end_element)(target, HTML_PRE);
1.1       timbl     596: }
                    597: 
                    598: 
                    599: /*     Read in a List of Newsgroups
                    600: **     ----------------------------
                    601: */
                    602: /*
                    603: **     Note the termination condition of a single dot on a line by itself.
                    604: **     RFC 977 specifies that the line "folding" of RFC850 is not used, so we
                    605: **     do not handle it here.
                    606: */        
                    607: PRIVATE void read_list NOARGS
                    608: {
                    609: 
                    610:     char line[LINE_LENGTH+1];
                    611:     char *p;
                    612:     BOOL done = NO;
                    613:     
                    614: /*     Read in the HEADer of the article:
                    615: **
                    616: **     The header fields are either ignored, or formatted and put into the
                    617: **     Text.
                    618: */
1.2       timbl     619:     (*targetClass.start_element)(target, HTML_H1 , 0, 0);
                    620:     PUTS( "Newsgroups");
                    621:     (*targetClass.end_element)(target, HTML_PRE);
1.1       timbl     622:     p = line;
2.16      luotonen  623:     (*targetClass.start_element)(target, HTML_DL, 0, 0);
1.1       timbl     624:     while(!done){
                    625:        char ch = *p++ = NEXT_CHAR;
                    626:        if (ch==(char)EOF) {
                    627:            abort_socket();     /* End of file, close socket */
                    628:            return;             /* End of file on response */
                    629:        }
1.3       timbl     630:        if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1       timbl     631:            *p++=0;                             /* Terminate the string */
                    632:            if (TRACE) fprintf(stderr, "B %s", line);
2.16      luotonen  633:            (*targetClass.start_element)(target, HTML_DT , 0, 0);
1.1       timbl     634:            if (line[0]=='.') {
                    635:                if (line[1]<' ') {              /* End of article? */
                    636:                    done = YES;
                    637:                    break;
                    638:                } else {                        /* Line starts with dot */
1.2       timbl     639:                    PUTS( &line[1]);
1.1       timbl     640:                }
                    641:            } else {
                    642: 
                    643: /*     Normal lines are scanned for references to newsgroups.
                    644: */
2.16      luotonen  645:                int i=0;
                    646: 
                    647:                /* find whitespace if it exits */
                    648:                for(; line[i] != '\0' && !WHITE(line[i]); i++)
                    649:                    ;  /* null body */
                    650:        
                    651:                if(line[i] != '\0') {
                    652:                    line[i] = '\0';
                    653:                    write_anchor(line, line);
                    654:                    (*targetClass.start_element)(target, HTML_DD , 0, 0);
                    655:                    PUTS(&line[i+1]); /* put description */
                    656:                } else {
                    657:                    write_anchor(line, line);
                    658:                }
                    659: 
                    660: #ifdef OLD_CODE
1.1       timbl     661:                char group[LINE_LENGTH];
                    662:                int first, last;
                    663:                char postable;
                    664:                if (sscanf(line, "%s %d %d %c", group, &first, &last, &postable)==4)
                    665:                    write_anchor(line, group);
                    666:                else
1.2       timbl     667:                    PUTS(line);
2.16      luotonen  668: #endif /*OLD_CODE*/
                    669: 
1.1       timbl     670:            } /* if not dot */
                    671:            p = line;                   /* Restart at beginning */
                    672:        } /* if end of line */
                    673:     } /* Loop over characters */
2.16      luotonen  674:     (*targetClass.end_element)(target, HTML_DL);
1.1       timbl     675: }
                    676: 
                    677: 
                    678: /*     Read in a Newsgroup
                    679: **     -------------------
                    680: **     Unfortunately, we have to ask for each article one by one if we
                    681: **     want more than one field.
                    682: **
                    683: */
                    684: PRIVATE void read_group ARGS3(
                    685:   CONST char *,groupName,
                    686:   int,first_required,
                    687:   int,last_required
                    688: )
                    689: {
                    690:     char line[LINE_LENGTH+1];
                    691:     char author[LINE_LENGTH+1];
                    692:     char subject[LINE_LENGTH+1];
                    693:     char *p;
                    694:     BOOL done;
                    695: 
                    696:     char buffer[LINE_LENGTH];
                    697:     char *reference=0;                 /* Href for article */
                    698:     int art;                           /* Article number WITHIN GROUP */
                    699:     int status, count, first, last;    /* Response fields */
                    700:                                        /* count is only an upper limit */
                    701: 
                    702:     sscanf(response_text, " %d %d %d %d", &status, &count, &first, &last);
2.17      frystyk   703:     if(TRACE)
                    704:        fprintf(stderr, 
                    705:                "Newsgroup status=%d, count=%d, (%d-%d) required:(%d-%d)\n",
                    706:                status, count, first, last, first_required, last_required);
1.1       timbl     707:     if (last==0) {
1.2       timbl     708:         PUTS( "\nNo articles in this group.\n");
2.16      luotonen  709: #ifdef POSTING
                    710:        goto add_post;
                    711: #endif
1.1       timbl     712:        return;
                    713:     }
                    714:     
                    715: #define FAST_THRESHOLD 100     /* Above this, read IDs fast */
                    716: #define CHOP_THRESHOLD 50      /* Above this, chop off the rest */
                    717: 
                    718:     if (first_required<first) first_required = first;          /* clip */
                    719:     if ((last_required==0) || (last_required > last)) last_required = last;
                    720:     
                    721:     if (last_required<=first_required) {
1.2       timbl     722:         PUTS( "\nNo articles in this range.\n");
2.16      luotonen  723: #ifdef POSTING
                    724:        goto add_post;
                    725: #endif
1.1       timbl     726:        return;
                    727:     }
                    728: 
                    729:     if (last_required-first_required+1 > MAX_CHUNK) {  /* Trim this block */
                    730:         first_required = last_required-CHUNK_SIZE+1;
                    731:     }
2.18    ! frystyk   732:     if (TRACE) fprintf (stderr, "    Chunk will be (%d-%d)\n",
2.16      luotonen  733:                       first_required, last_required);
1.1       timbl     734: 
1.2       timbl     735: /*     Set window title
                    736: */
                    737:     sprintf(buffer, "Newsgroup %s,  Articles %d-%d",
                    738:                groupName, first_required, last_required);
                    739:     START(HTML_TITLE);
                    740:     PUTS(buffer);
                    741:     END(HTML_TITLE);
                    742: 
1.1       timbl     743: /*     Link to earlier articles
                    744: */
                    745:     if (first_required>first) {
                    746:        int before;                     /* Start of one before */
                    747:        if (first_required-MAX_CHUNK <= first) before = first;
                    748:        else before = first_required-CHUNK_SIZE;
                    749:        sprintf(buffer, "%s/%d-%d", groupName, before, first_required-1);
                    750:        if (TRACE) fprintf(stderr, "    Block before is %s\n", buffer);
1.2       timbl     751:        PUTS( " (");
                    752:        start_anchor(buffer);
                    753:        PUTS("Earlier articles");
                    754:        END(HTML_A);
                    755:        PUTS( "...)\n");
1.1       timbl     756:     }
                    757:     
                    758:     done = NO;
                    759: 
                    760: /*#define USE_XHDR*/
                    761: #ifdef USE_XHDR
                    762:     if (count>FAST_THRESHOLD)  {
                    763:         sprintf(buffer,
                    764:        "\nThere are about %d articles currently available in %s, IDs as follows:\n\n",
                    765:                count, groupName); 
1.2       timbl     766:         PUTS(buffer);
1.3       timbl     767:         sprintf(buffer, "XHDR Message-ID %d-%d%c%c", first, last, CR, LF);
1.1       timbl     768:        status = response(buffer);
                    769:        if (status==221) {
                    770: 
                    771:            p = line;
                    772:            while(!done){
                    773:                char ch = *p++ = NEXT_CHAR;
                    774:                if (ch==(char)EOF) {
                    775:                    abort_socket();     /* End of file, close socket */
                    776:                    return;             /* End of file on response */
                    777:                }
                    778:                if ((ch == '\n') || (p == &line[LINE_LENGTH])) {
                    779:                    *p++=0;                             /* Terminate the string */
                    780:                    if (TRACE) fprintf(stderr, "X %s", line);
                    781:                    if (line[0]=='.') {
                    782:                        if (line[1]<' ') {              /* End of article? */
                    783:                            done = YES;
                    784:                            break;
                    785:                        } else {                        /* Line starts with dot */
                    786:                                /* Ignore strange line */
                    787:                        }
                    788:                    } else {
                    789:        
                    790:        /*      Normal lines are scanned for references to articles.
                    791:        */
                    792:                        char * space = strchr(line, ' ');
                    793:                        if (space++)
                    794:                            write_anchor(space, space);
                    795:                    } /* if not dot */
                    796:                    p = line;                   /* Restart at beginning */
                    797:                } /* if end of line */
                    798:            } /* Loop over characters */
                    799: 
                    800:            /* leaving loop with "done" set */
                    801:        } /* Good status */
                    802:     };
                    803: #endif
                    804: 
                    805: /*     Read newsgroup using individual fields:
                    806: */
                    807:     if (!done) {
                    808:         if (first==first_required && last==last_required)
1.2       timbl     809:                PUTS("\nAll available articles in ");
                    810:         else PUTS( "\nArticles in ");
                    811:        PUTS(groupName);
                    812:        START(HTML_MENU);
1.1       timbl     813:        for(art=first_required; art<=last_required; art++) {
                    814:     
                    815: /*#define OVERLAP*/
                    816: #ifdef OVERLAP
                    817: /* With this code we try to keep the server running flat out by queuing just
                    818: ** one extra command ahead of time. We assume (1) that the server won't abort
                    819: ** if it gets input during output, and (2) that TCP buffering is enough for the
                    820: ** two commands. Both these assumptions seem very reasonable. However, we HAVE
                    821: ** had a hangup with a loaded server.
                    822: */
                    823:            if (art==first_required) {
                    824:                if (art==last_required) {
1.3       timbl     825:                        sprintf(buffer, "HEAD %d%c%c", art, CR, LF);    /* Only one */
1.1       timbl     826:                        status = response(buffer);
                    827:                    } else {                                    /* First of many */
1.3       timbl     828:                        sprintf(buffer, "HEAD %d%c%cHEAD %d%c%c",
                    829:                                art, CR, LF, art+1, CR, LF);
1.1       timbl     830:                        status = response(buffer);
                    831:                    }
                    832:            } else if (art==last_required) {                    /* Last of many */
                    833:                    status = response(NULL);
                    834:            } else {                                            /* Middle of many */
1.3       timbl     835:                    sprintf(buffer, "HEAD %d%c%c", art+1, CR, LF);
1.1       timbl     836:                    status = response(buffer);
                    837:            }
                    838:            
                    839: #else  /* NOT OVERLAP */
1.3       timbl     840:            sprintf(buffer, "HEAD %d%c%c", art, CR, LF);
1.1       timbl     841:            status = response(buffer);
                    842: #endif /* NOT OVERLAP */
                    843: 
                    844:            if (status == 221) {        /* Head follows - parse it:*/
2.17      frystyk   845: 
1.1       timbl     846:                p = line;                               /* Write pointer */
                    847:                done = NO;
                    848:                while(!done){
                    849:                    char ch = *p++ = NEXT_CHAR;
                    850:                    if (ch==(char)EOF) {
                    851:                        abort_socket(); /* End of file, close socket */
                    852:                        return;         /* End of file on response */
                    853:                    }
1.3       timbl     854:                    if ((ch == LF)
1.1       timbl     855:                        || (p == &line[LINE_LENGTH]) ) {
                    856:                    
                    857:                        *--p=0;         /* Terminate  & chop LF*/
                    858:                        p = line;               /* Restart at beginning */
                    859:                        if (TRACE) fprintf(stderr, "G %s\n", line);
                    860:                        switch(line[0]) {
                    861:     
                    862:                        case '.':
                    863:                            done = (line[1]<' ');       /* End of article? */
                    864:                            break;
                    865:     
                    866:                        case 'S':
                    867:                        case 's':
                    868:                            if (match(line, "SUBJECT:"))
                    869:                                strcpy(subject, line+9);/* Save subject */
                    870:                            break;
                    871:     
                    872:                        case 'M':
                    873:                        case 'm':
                    874:                            if (match(line, "MESSAGE-ID:")) {
                    875:                                char * addr = HTStrip(line+11) +1; /* Chop < */
                    876:                                addr[strlen(addr)-1]=0;         /* Chop > */
                    877:                                StrAllocCopy(reference, addr);
                    878:                            }
                    879:                            break;
                    880:     
                    881:                        case 'f':
                    882:                        case 'F':
                    883:                            if (match(line, "FROM:")) {
                    884:                                char * p;
                    885:                                strcpy(author,
                    886:                                        author_name(strchr(line,':')+1));
2.17      frystyk   887:                                if (*author) {          /* Not always there! */
                    888:                                    p = author + strlen(author) - 1;
                    889:                                    if (*p==LF) *p = 0; /* Chop off newline */
                    890:                                }
1.1       timbl     891:                            }
                    892:                            break;
                    893:                                    
                    894:                        } /* end switch on first character */
                    895:                    } /* if end of line */
                    896:                } /* Loop over characters */
                    897:     
1.2       timbl     898:                START(HTML_LI);
1.1       timbl     899:                sprintf(buffer, "\"%s\" - %s", subject, author);
                    900:                if (reference) {
                    901:                    write_anchor(buffer, reference);
                    902:                    free(reference);
                    903:                    reference=0;
                    904:                } else {
1.2       timbl     905:                    PUTS(buffer);
1.1       timbl     906:                }
                    907:                
                    908:     
1.2       timbl     909: /*      indicate progress!   @@@@@@
1.1       timbl     910: */
                    911:     
                    912:            } /* If good response */
                    913:        } /* Loop over article */           
                    914:     } /* If read headers */
1.2       timbl     915:     END(HTML_MENU);
                    916:     START(HTML_P);
1.1       timbl     917:     
                    918: /*     Link to later articles
                    919: */
                    920:     if (last_required<last) {
                    921:        int after;                      /* End of article after */
                    922:        after = last_required+CHUNK_SIZE;
                    923:        if (after==last) sprintf(buffer, "news:%s", groupName); /* original group */
                    924:        else sprintf(buffer, "news:%s/%d-%d", groupName, last_required+1, after);
                    925:        if (TRACE) fprintf(stderr, "    Block after is %s\n", buffer);
1.2       timbl     926:        PUTS( "(");
                    927:        start_anchor(buffer);
                    928:        PUTS( "Later articles");
                    929:        END(HTML_A);
                    930:        PUTS( "...)\n");
1.1       timbl     931:     }
2.16      luotonen  932: 
                    933: add_post:
                    934:     {
                    935:        char *href=0;
                    936:        START(HTML_HR);
                    937:        
                    938:        StrAllocCopy(href,"newspost:");
                    939:        StrAllocCat(href,groupName);
                    940:        start_anchor(href);
                    941:        PUTS("Post to ");
                    942:        PUTS(groupName);
                    943:        END(HTML_A);
                    944: 
                    945:        free(href);
                    946:     }
1.1       timbl     947:     
                    948: 
                    949: }
                    950: 
                    951: 
                    952: /*             Load by name                                    HTLoadNews
                    953: **             ============
                    954: */
2.13      timbl     955: PUBLIC int HTLoadNews ARGS1(HTRequest *,               request)
1.1       timbl     956: {
2.13      timbl     957:     CONST char * arg = HTAnchor_physical(request->anchor);
1.1       timbl     958:     char command[257];                 /* The whole command */
                    959:     char groupName[GROUP_NAME_LENGTH]; /* Just the group name */
                    960:     int status;                                /* tcp return */
                    961:     int retries;                       /* A count of how hard we have tried */ 
                    962:     BOOL group_wanted;                 /* Flag: group was asked for, not article */
                    963:     BOOL list_wanted;                  /* Flag: group was asked for, not article */
                    964:     int first, last;                   /* First and last articles asked for */
                    965: 
2.10      timbl     966:     diagnostic = (request->output_format == WWW_SOURCE);       /* set global flag */
1.1       timbl     967:     
                    968:     if (TRACE) fprintf(stderr, "HTNews: Looking for %s\n", arg);
                    969:     
                    970:     if (!initialized) initialized = initialize();
                    971:     if (!initialized) return -1;       /* FAIL */
                    972:     
                    973:     {
                    974:         CONST char * p1=arg;
                    975: 
                    976: /*     We will ask for the document, omitting the host name & anchor.
                    977: **
                    978: **     Syntax of address is
                    979: **             xxx@yyy                 Article
                    980: **             <xxx@yyy>               Same article
                    981: **             xxxxx                   News group (no "@")
                    982: **             group/n1-n2             Articles n1 to n2 in group
                    983: */        
                    984:        group_wanted = (strchr(arg, '@')==0) && (strchr(arg, '*')==0);
                    985:        list_wanted  = (strchr(arg, '@')==0) && (strchr(arg, '*')!=0);
                    986: 
                    987:        /* p1 = HTParse(arg, "", PARSE_PATH | PARSE_PUNCTUATION); */
                    988:        /* Don't use HTParse because news: access doesn't follow traditional
                    989:           rules. For instance, if the article reference contains a '#',
                    990:           the rest of it is lost -- JFG 10/7/92, from a bug report */
                    991:        if (!strncasecomp (arg, "news:", 5))
                    992:          p1 = arg + 5;  /* Skip "news:" prefix */
                    993:        if (list_wanted) {
2.16      luotonen  994:            strcpy(command, "LIST NEWSGROUPS");
1.1       timbl     995:        } else if (group_wanted) {
                    996:            char * slash = strchr(p1, '/');
                    997:            strcpy(command, "GROUP ");
                    998:            first = 0;
                    999:            last = 0;
                   1000:            if (slash) {
                   1001:                *slash = 0;
                   1002:                strcpy(groupName, p1);
                   1003:                *slash = '/';
                   1004:                (void) sscanf(slash+1, "%d-%d", &first, &last);
                   1005:            } else {
                   1006:                strcpy(groupName, p1);
                   1007:            }
                   1008:            strcat(command, groupName);
                   1009:        } else {
                   1010:            strcpy(command, "ARTICLE ");
                   1011:            if (strchr(p1, '<')==0) strcat(command,"<");
                   1012:            strcat(command, p1);
                   1013:            if (strchr(p1, '>')==0) strcat(command,">");
                   1014:        }
                   1015: 
1.3       timbl    1016:         {
                   1017:            char * p = command + strlen(command);
                   1018:            *p++ = CR;          /* Macros to be correct on Mac */
                   1019:            *p++ = LF;
                   1020:            *p++ = 0;
                   1021:            /* strcat(command, "\r\n"); */      /* CR LF, as in rfc 977 */
                   1022:        }
1.1       timbl    1023:     } /* scope of p1 */
                   1024:     
                   1025:     if (!*arg) return NO;                      /* Ignore if no name */
                   1026: 
                   1027:     
                   1028: /*     Make a hypertext object with an anchor list.
                   1029: */       
2.10      timbl    1030:     node_anchor = request->anchor;
2.11      timbl    1031:     target = HTML_new(request, NULL, WWW_HTML,
                   1032:                request->output_format, request->output_stream);
1.2       timbl    1033:     targetClass = *target->isa;        /* Copy routine entry points */
                   1034:     
1.1       timbl    1035:        
                   1036: /*     Now, let's get a stream setup up from the NewsHost:
                   1037: */       
                   1038:     for(retries=0;retries<2; retries++){
                   1039:     
                   1040:         if (s<0) {
                   1041:             NEWS_PROGRESS("Connecting to NewsHost ...");
                   1042:            s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
                   1043:            status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
                   1044:            if (status<0){
                   1045:                char message[256];
                   1046:                NETCLOSE(s);
                   1047:                s = -1;
                   1048:                if (TRACE) fprintf(stderr, "HTNews: Unable to connect to news host.\n");
                   1049: /*             if (retries<=1) continue;   WHY TRY AGAIN ?     */
                   1050:                sprintf(message,
                   1051: "\nCould not access %s.\n\n (Check default WorldWideWeb NewsHost ?)\n",
1.2       timbl    1052:                    HTNewsHost);
2.15      luotonen 1053:                return HTLoadError(request, 500, message);
1.1       timbl    1054:            } else {
                   1055:                if (TRACE) fprintf(stderr, "HTNews: Connected to news host %s.\n",
1.2       timbl    1056:                                HTNewsHost);
2.11      timbl    1057:                isoc = HTInputSocket_new(s);    /* set up buffering */
1.1       timbl    1058:                if ((response(NULL) / 100) !=2) {
2.8       timbl    1059:                        char message[BIG];
1.1       timbl    1060:                        NETCLOSE(s);
2.11      timbl    1061:                        HTInputSocket_free(isoc);
1.1       timbl    1062:                        s = -1;
2.8       timbl    1063:                        sprintf(message, 
                   1064:                  "Can't read news info. News host %.20s responded: %.200s",
                   1065:                            HTNewsHost, response_text);
2.15      luotonen 1066:                        return HTLoadError(request, 500, message);
1.1       timbl    1067:                }
                   1068:            }
                   1069:        } /* If needed opening */
                   1070:        
1.2       timbl    1071:        /* @@@@@@@@@@@@@@Tell user something's happening */
                   1072:        
1.1       timbl    1073:        status = response(command);
                   1074:        if (status<0) break;
                   1075:        if ((status/ 100) !=2) {
2.8       timbl    1076:            HTProgress(response_text);
1.1       timbl    1077: /*         NXRunAlertPanel("News access", response_text,
                   1078:                NULL,NULL,NULL);
                   1079: */
                   1080:            NETCLOSE(s);
2.11      timbl    1081:            HTInputSocket_free(isoc);
1.1       timbl    1082:            s = -1;
                   1083: /* return HT; -- no:the message might be "Timeout-disconnected" left over */
                   1084:            continue;   /*      Try again */
                   1085:        }
                   1086:   
                   1087: /*     Load a group, article, etc
                   1088: */
1.2       timbl    1089:         
1.1       timbl    1090:        
                   1091:        if (list_wanted) read_list();
                   1092:        else if (group_wanted) read_group(groupName, first, last);
                   1093:         else read_article();
                   1094: 
2.6       timbl    1095:        (*targetClass.free)(target);
1.2       timbl    1096:        return HT_LOADED;
1.1       timbl    1097:        
                   1098:     } /* Retry loop */
                   1099:     
1.2       timbl    1100:     
2.8       timbl    1101:     /* HTAlert("Sorry, could not load requested news.\n"); */
                   1102:         
1.1       timbl    1103: /*    NXRunAlertPanel(NULL, "Sorry, could not load `%s'.",
                   1104:            NULL,NULL,NULL, arg);No -- message earlier wil have covered it */
                   1105: 
1.2       timbl    1106:     return HT_LOADED;
1.1       timbl    1107: }
                   1108: 
2.10      timbl    1109: GLOBALDEF PUBLIC HTProtocol HTNews = { "news", HTLoadNews, NULL, NULL};

Webmaster