Annotation of libwww/Library/src/HTNews.c, revision 2.37
2.26 frystyk 1: /* HTNews.c
2: ** NEWS ACCESS
3: **
2.29 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.26 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
7: ** History:
8: ** 26 Sep 90 Written TBL
9: ** 29 Nov 91 Downgraded to C, for portable implementation.
2.19 luotonen 10: ** 16 Feb 94 AL Added Lou Montulli's Lynx & LIST NEWSGROUPS diffs.
11: ** 2 May 94 AL Added HTUnEscape() to HTLoadNews(), and
12: ** fixed a possible security hole when the URL contains
13: ** a newline, that could cause multiple commands to be
14: ** sent to an NNTP server.
2.23 duns 15: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.1 timbl 16: */
2.27 roeber 17:
2.28 frystyk 18: #include "tcp.h"
19: #include "HTUtils.h" /* Coding convention macros */
20: #include "HTString.h"
2.37 ! frystyk 21: #include "HTMLPDTD.h"
2.34 frystyk 22: #include "HTMLGen.h"
2.28 frystyk 23: #include "HTParse.h"
24: #include "HTFormat.h"
25: #include "HTAlert.h"
2.36 frystyk 26: #include "HTReqMan.h"
2.30 frystyk 27: #include "HTSocket.h"
2.28 frystyk 28: #include "HTError.h"
29: #include "HTNews.h" /* Implemented here */
1.3 timbl 30:
1.1 timbl 31: #define NEWS_PORT 119 /* See rfc977 */
32: #define APPEND /* Use append methods */
33: #define MAX_CHUNK 40 /* Largest number of articles in one window */
34: #define CHUNK_SIZE 20 /* Number of articles for quick display */
35:
36: #ifndef DEFAULT_NEWS_HOST
37: #define DEFAULT_NEWS_HOST "news"
38: #endif
39: #ifndef SERVER_FILE
40: #define SERVER_FILE "/usr/local/lib/rn/server"
41: #endif
42:
2.8 timbl 43: #define BIG 1024 /* @@@ */
44:
1.2 timbl 45: struct _HTStructured {
46: CONST HTStructuredClass * isa;
47: /* ... */
48: };
49:
1.1 timbl 50:
2.12 timbl 51: #define NEXT_CHAR HTInputSocket_getCharacter(isoc)
1.1 timbl 52: #define LINE_LENGTH 512 /* Maximum length of line of ARTICLE etc */
53: #define GROUP_NAME_LENGTH 256 /* Maximum length of group name */
54:
55:
56: /* Module-wide variables
57: */
1.2 timbl 58: PUBLIC char * HTNewsHost;
1.1 timbl 59: PRIVATE struct sockaddr_in soc_address; /* Binary network address */
2.28 frystyk 60: PRIVATE SOCKFD s; /* Socket for NewsHost */
1.1 timbl 61: PRIVATE char response_text[LINE_LENGTH+1]; /* Last response */
1.2 timbl 62: PRIVATE HTStructured * target; /* The output sink */
63: PRIVATE HTStructuredClass targetClass; /* Copy of fn addresses */
1.1 timbl 64: PRIVATE HTParentAnchor *node_anchor; /* Its anchor */
65: PRIVATE int diagnostic; /* level: 0=none 2=source */
66:
1.2 timbl 67:
68: #define PUTC(c) (*targetClass.put_character)(target, c)
69: #define PUTS(s) (*targetClass.put_string)(target, s)
70: #define START(e) (*targetClass.start_element)(target, e, 0, 0)
71: #define END(e) (*targetClass.end_element)(target, e)
72:
2.11 timbl 73: PUBLIC HTInputSocket *isoc; /* @@@ non-reentrant */
74:
1.2 timbl 75: PUBLIC CONST char * HTGetNewsHost NOARGS
76: {
77: return HTNewsHost;
78: }
1.1 timbl 79:
1.2 timbl 80: PUBLIC void HTSetNewsHost ARGS1(CONST char *, value)
81: {
82: StrAllocCopy(HTNewsHost, value);
83: }
1.1 timbl 84:
85: /* Initialisation for this module
86: ** ------------------------------
87: **
88: ** Except on the NeXT, we pick up the NewsHost name from
89: **
90: ** 1. Environment variable NNTPSERVER
91: ** 2. File SERVER_FILE
92: ** 3. Compilation time macro DEFAULT_NEWS_HOST
93: **
94: ** On the NeXT, we pick up the NewsHost name from, in order:
95: **
96: ** 1. WorldWideWeb default "NewsHost"
97: ** 2. Global default "NewsHost"
98: ** 3. News default "NewsHost"
99: ** 4. Compilation time macro DEFAULT_NEWS_HOST
100: */
101: PRIVATE BOOL initialized = NO;
102: PRIVATE BOOL initialize NOARGS
103: {
104: CONST struct hostent *phost; /* Pointer to host - See netdb.h */
105: struct sockaddr_in* sin = &soc_address;
106:
107:
108: /* Set up defaults:
109: */
110: sin->sin_family = AF_INET; /* Family = internet, host order */
111: sin->sin_port = htons(NEWS_PORT); /* Default: new port, */
112:
113: /* Get name of Host
114: */
2.28 frystyk 115: #ifdef NeXTStep
1.2 timbl 116: if ((HTNewsHost = NXGetDefaultValue("WorldWideWeb","NewsHost"))==0)
117: if ((HTNewsHost = NXGetDefaultValue("News","NewsHost")) == 0)
118: HTNewsHost = DEFAULT_NEWS_HOST;
1.1 timbl 119: #else
120: if (getenv("NNTPSERVER")) {
1.2 timbl 121: StrAllocCopy(HTNewsHost, (char *)getenv("NNTPSERVER"));
2.28 frystyk 122: if (TRACE) fprintf(TDEST, "HTNews: NNTPSERVER defined as `%s'\n",
1.2 timbl 123: HTNewsHost);
1.1 timbl 124: } else {
125: char server_name[256];
126: FILE* fp = fopen(SERVER_FILE, "r");
127: if (fp) {
128: if (fscanf(fp, "%s", server_name)==1) {
1.2 timbl 129: StrAllocCopy(HTNewsHost, server_name);
2.28 frystyk 130: if (TRACE) fprintf(TDEST,
1.1 timbl 131: "HTNews: File %s defines news host as `%s'\n",
1.2 timbl 132: SERVER_FILE, HTNewsHost);
1.1 timbl 133: }
134: fclose(fp);
135: }
136: }
1.2 timbl 137: if (!HTNewsHost) HTNewsHost = DEFAULT_NEWS_HOST;
1.1 timbl 138: #endif
139:
1.2 timbl 140: if (*HTNewsHost>='0' && *HTNewsHost<='9') { /* Numeric node address: */
2.35 frystyk 141: #ifdef GUSI
142: sin->sin_addr = inet_addr((char *)HTNewsHost); /* STR (GUSI) */
143: #else
144: sin->sin_addr.s_addr = inet_addr((char *)HTNewsHost); /* arpa/inet.h */
145: #endif
1.1 timbl 146: } else { /* Alphanumeric node name: */
1.2 timbl 147: phost=gethostbyname((char*)HTNewsHost); /* See netdb.h */
1.1 timbl 148: if (!phost) {
2.7 timbl 149: char message[150]; /* @@@ */
150: sprintf(message,
151: "HTNews: Can't find news host `%s'.\n%s",HTNewsHost,
152: "Please define your NNTP server");
2.33 frystyk 153: /* HTAlert(message); */
2.28 frystyk 154: if (PROT_TRACE)
155: fprintf(TDEST, "HTNews: Can't find news host `%s'.\n",HTNewsHost);
1.1 timbl 156: return NO; /* Fail */
157: }
158: memcpy(&sin->sin_addr, phost->h_addr, phost->h_length);
159: }
160:
2.28 frystyk 161: if (TRACE) fprintf(TDEST,
1.1 timbl 162: "HTNews: Parsed address as port %4x, inet %d.%d.%d.%d\n",
163: (unsigned int)ntohs(sin->sin_port),
164: (int)*((unsigned char *)(&sin->sin_addr)+0),
165: (int)*((unsigned char *)(&sin->sin_addr)+1),
166: (int)*((unsigned char *)(&sin->sin_addr)+2),
167: (int)*((unsigned char *)(&sin->sin_addr)+3));
168:
2.28 frystyk 169: s = INVSOC; /* Disconnected */
1.1 timbl 170:
171: return YES;
172: }
173:
174:
175:
176: /* Send NNTP Command line to remote host & Check Response
177: ** ------------------------------------------------------
178: **
179: ** On entry,
180: ** command points to the command to be sent, including CRLF, or is null
181: ** pointer if no command to be sent.
182: ** On exit,
183: ** Negative status indicates transmission error, socket closed.
184: ** Positive status is an NNTP status.
185: */
186:
187:
188: PRIVATE int response ARGS1(CONST char *,command)
189: {
190: int result;
191: char * p = response_text;
192: if (command) {
193: int status;
194: int length = strlen(command);
2.28 frystyk 195: if (TRACE) fprintf(TDEST, "NNTP command to be sent: %s", command);
1.1 timbl 196: #ifdef NOT_ASCII
197: {
198: CONST char * p;
199: char * q;
200: char ascii[LINE_LENGTH+1];
201: for(p = command, q=ascii; *p; p++, q++) {
202: *q = TOASCII(*p);
203: }
204: status = NETWRITE(s, ascii, length);
205: }
206: #else
207: status = NETWRITE(s, command, length);
208: #endif
209: if (status<0){
2.28 frystyk 210: if (TRACE) fprintf(TDEST,
1.1 timbl 211: "HTNews: Unable to send command. Disconnecting.\n");
212: NETCLOSE(s);
2.11 timbl 213: HTInputSocket_free(isoc);
2.28 frystyk 214: s = INVSOC;
1.1 timbl 215: return status;
216: } /* if bad status */
217: } /* if command to be sent */
218:
219: for(;;) {
1.3 timbl 220: if (((*p++=NEXT_CHAR) == LF)
221: || (p == &response_text[LINE_LENGTH])) {
1.1 timbl 222: *p++=0; /* Terminate the string */
2.28 frystyk 223: if (TRACE) fprintf(TDEST, "NNTP Response: %s\n", response_text);
1.1 timbl 224: sscanf(response_text, "%d", &result);
2.19 luotonen 225: if (result >= 411 && result <= 430) { /* no such article/group */
226: char * msg = strchr(response_text,' ');
227: if (!msg) msg = response_text;
228: PUTS("<H1>News error</H1>\n");
229: PUTS(msg);
2.28 frystyk 230: if (PROT_TRACE)
231: fprintf(TDEST, "News error.. %s", response_text);
2.19 luotonen 232: }
1.1 timbl 233: return result;
234: } /* if end of line */
235:
236: if (*(p-1) < 0) {
2.28 frystyk 237: if (TRACE) fprintf(TDEST,
1.1 timbl 238: "HTNews: EOF on read, closing socket %d\n", s);
239: NETCLOSE(s); /* End of file, close socket */
2.11 timbl 240: HTInputSocket_free(isoc);
2.28 frystyk 241: return s = INVSOC; /* End of file on response */
1.1 timbl 242: }
243: } /* Loop over characters */
244: }
245:
246:
247: /* Case insensitive string comparisons
248: ** -----------------------------------
249: **
250: ** On entry,
251: ** template must be already un upper case.
252: ** unknown may be in upper or lower or mixed case to match.
253: */
2.24 frystyk 254: PRIVATE BOOL match ARGS2 (CONST char *,unknown, CONST char *,tmplate)
1.1 timbl 255: {
256: CONST char * u = unknown;
2.24 frystyk 257: CONST char * t = tmplate;
1.1 timbl 258: for (;*u && *t && (TOUPPER(*u)==*t); u++, t++) /* Find mismatch or end */ ;
259: return (BOOL)(*t==0); /* OK if end of template */
260: }
261:
262: /* Find Author's name in mail address
263: ** ----------------------------------
264: **
265: ** On exit,
266: ** THE EMAIL ADDRESS IS CORRUPTED
267: **
268: ** For example, returns "Tim Berners-Lee" if given any of
2.31 frystyk 269: ** " Tim Berners-Lee <tim@w3.org> "
270: ** or " tim@w3.org ( Tim Berners-Lee ) "
1.1 timbl 271: */
272: PRIVATE char * author_name ARGS1 (char *,email)
273: {
274: char *s, *e;
275:
276: if ((s=strchr(email,'(')) && (e=strchr(email, ')')))
277: if (e>s) {
278: *e=0; /* Chop off everything after the ')' */
279: return HTStrip(s+1); /* Remove leading and trailing spaces */
280: }
281:
282: if ((s=strchr(email,'<')) && (e=strchr(email, '>')))
283: if (e>s) {
284: strcpy(s, e+1); /* Remove <...> */
285: return HTStrip(email); /* Remove leading and trailing spaces */
286: }
287:
288: return HTStrip(email); /* Default to the whole thing */
289:
290: }
291:
1.2 timbl 292: /* Start anchor element
293: ** --------------------
294: */
295: PRIVATE void start_anchor ARGS1(CONST char *, href)
296: {
297: BOOL present[HTML_A_ATTRIBUTES];
298: CONST char* value[HTML_A_ATTRIBUTES];
299:
300: {
301: int i;
302: for(i=0; i<HTML_A_ATTRIBUTES; i++)
303: present[i] = (i==HTML_A_HREF);
304: }
305: value[HTML_A_HREF] = href;
306: (*targetClass.start_element)(target, HTML_A , present, value);
307:
308: }
1.1 timbl 309:
2.16 luotonen 310:
311: /* Start link element
312: ** --------------------
313: */
314: PRIVATE void start_link ARGS2(CONST char *, href, CONST char *, rev)
315: {
316: #ifdef WHEN_WE_HAVE_HTMLPLUS
317:
318: BOOL present[HTML_LINK_ATTRIBUTES];
319: CONST char* value[HTML_LINK_ATTRIBUTES];
320:
321: {
322: int i;
323: for(i=0; i<HTML_LINK_ATTRIBUTES; i++)
324: present[i] = (i==HTML_LINK_HREF || i==HTML_LINK_REV);
325: }
326: value[HTML_LINK_HREF] = href;
327: value[HTML_LINK_REV] = rev;
328: (*targetClass.start_element)(target, HTML_LINK , present, value);
329:
330: #endif
331: }
332:
333:
334:
335:
1.1 timbl 336: /* Paste in an Anchor
337: ** ------------------
338: **
339: **
340: ** On entry,
341: ** HT has a selection of zero length at the end.
342: ** text points to the text to be put into the file, 0 terminated.
343: ** addr points to the hypertext refernce address,
344: ** terminated by white space, comma, NULL or '>'
345: */
346: PRIVATE void write_anchor ARGS2(CONST char *,text, CONST char *,addr)
347: {
348: char href[LINE_LENGTH+1];
349:
350: {
351: CONST char * p;
352: strcpy(href,"news:");
353: for(p=addr; *p && (*p!='>') && !WHITE(*p) && (*p!=','); p++);
354: strncat(href, addr, p-addr); /* Make complete hypertext reference */
355: }
356:
1.2 timbl 357: start_anchor(href);
358: PUTS(text);
359: END(HTML_A);
1.1 timbl 360: }
361:
362:
363: /* Write list of anchors
364: ** ---------------------
365: **
366: ** We take a pointer to a list of objects, and write out each,
367: ** generating an anchor for each.
368: **
369: ** On entry,
370: ** HT has a selection of zero length at the end.
371: ** text points to a comma or space separated list of addresses.
372: ** On exit,
373: ** *text is NOT any more chopped up into substrings.
374: */
375: PRIVATE void write_anchors ARGS1 (char *,text)
376: {
377: char * start = text;
378: char * end;
379: char c;
380: for (;;) {
381: for(;*start && (WHITE(*start)); start++); /* Find start */
382: if (!*start) return; /* (Done) */
383: for(end=start; *end && (*end!=' ') && (*end!=','); end++);/* Find end */
384: if (*end) end++; /* Include comma or space but not NULL */
385: c = *end;
386: *end = 0;
387: write_anchor(start, start);
2.16 luotonen 388: START(HTML_BR);
1.1 timbl 389: *end = c;
390: start = end; /* Point to next one */
391: }
392: }
393:
394: /* Abort the connection abort_socket
395: ** --------------------
396: */
397: PRIVATE void abort_socket NOARGS
398: {
2.28 frystyk 399: if (TRACE) fprintf(TDEST,
1.1 timbl 400: "HTNews: EOF on read, closing socket %d\n", s);
401: NETCLOSE(s); /* End of file, close socket */
2.11 timbl 402: HTInputSocket_free(isoc);
1.2 timbl 403: PUTS("Network Error: connection lost");
404: PUTC('\n');
2.28 frystyk 405: s = INVSOC; /* End of file on response */
1.1 timbl 406: return;
407: }
408:
409: /* Read in an Article read_article
410: ** ------------------
411: **
412: **
413: ** Note the termination condition of a single dot on a line by itself.
414: ** RFC 977 specifies that the line "folding" of RFC850 is not used, so we
415: ** do not handle it here.
416: **
417: ** On entry,
418: ** s Global socket number is OK
419: ** HT Global hypertext object is ready for appending text
420: */
421: PRIVATE void read_article NOARGS
422: {
423:
424: char line[LINE_LENGTH+1];
425: char *references=NULL; /* Hrefs for other articles */
426: char *newsgroups=NULL; /* Newsgroups list */
427: char *p = line;
428: BOOL done = NO;
429:
430: /* Read in the HEADer of the article:
431: **
432: ** The header fields are either ignored, or formatted and put into the
433: ** Text.
434: */
435: if (!diagnostic) {
1.2 timbl 436: (*targetClass.start_element)(target, HTML_ADDRESS, 0, 0);
1.1 timbl 437: while(!done){
438: char ch = *p++ = NEXT_CHAR;
439: if (ch==(char)EOF) {
440: abort_socket(); /* End of file, close socket */
441: return; /* End of file on response */
442: }
1.3 timbl 443: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 444: *--p=0; /* Terminate the string */
2.28 frystyk 445: if (TRACE) fprintf(TDEST, "H %s\n", line);
1.1 timbl 446:
447: if (line[0]=='.') {
448: if (line[1]<' ') { /* End of article? */
449: done = YES;
450: break;
451: }
452:
453: } else if (line[0]<' ') {
454: break; /* End of Header? */
2.16 luotonen 455:
1.1 timbl 456: } else if (match(line, "SUBJECT:")) {
1.2 timbl 457: END(HTML_ADDRESS);
458: START(HTML_TITLE); /** Uuugh! @@@ */
2.16 luotonen 459: PUTS(line+9);
460: END(HTML_TITLE);
461: START(HTML_H1);
1.2 timbl 462: PUTS(line+8);
2.16 luotonen 463: END(HTML_H1);
1.2 timbl 464: START(HTML_ADDRESS);
2.16 luotonen 465:
1.1 timbl 466: } else if (match(line, "DATE:")
467: || match(line, "ORGANIZATION:")) {
2.16 luotonen 468: PUTS(strchr(line,':')+2);
469: START(HTML_BR);
470:
471: } else if(match(line, "FROM:")) {
472: char * temp=0;
473: char * href=0;
474: char *cp1, *cp2;
475:
476: /* copy into temporary storage */
477: StrAllocCopy(temp, strchr(line,':')+1);
478:
479: cp1=temp;
480: while(isspace(*cp1)) cp1++;
481: /* remove space and stuff after */
482: if((cp2 = strchr(cp1,' ')) != NULL)
483: *cp2 = '\0';
484:
485: StrAllocCopy(href,"mailto:");
486: StrAllocCat(href,cp1);
487:
488: start_anchor(href);
489: PUTS("Reply to ");
490: PUTS(strchr(line,':')+1);
491: END(HTML_A);
492: START(HTML_BR);
493:
494: /* put in the owner as a link rel. as well */
495: start_link(href, "made");
496:
497: /* free of temp vars */
498: free(temp);
499: free(href);
500:
1.1 timbl 501: } else if (match(line, "NEWSGROUPS:")) {
502: StrAllocCopy(newsgroups, HTStrip(strchr(line,':')+1));
503:
504: } else if (match(line, "REFERENCES:")) {
505: StrAllocCopy(references, HTStrip(strchr(line,':')+1));
506:
507: } /* end if match */
508: p = line; /* Restart at beginning */
509: } /* if end of line */
510: } /* Loop over characters */
2.16 luotonen 511: END(HTML_ADDRESS);
1.1 timbl 512:
1.2 timbl 513: if (newsgroups || references) {
2.16 luotonen 514: START(HTML_DL);
1.2 timbl 515: if (newsgroups) {
2.16 luotonen 516: #ifdef POSTING
517: char *href=0;
518: #endif
519:
1.2 timbl 520: (*targetClass.start_element)(target, HTML_DT , 0, 0);
521: PUTS("Newsgroups:");
522: (*targetClass.start_element)(target, HTML_DD , 0, 0);
523: write_anchors(newsgroups);
2.16 luotonen 524:
525: #ifdef POSTING
526: /* make posting possible */
527: StrAllocCopy(href,"newspost:");
528: StrAllocCat(href,newsgroups);
529: START(HTML_DT);
530: start_anchor(href);
531: PUTS("Reply to newsgroup(s)");
532: END(HTML_A);
533: #endif
534:
1.2 timbl 535: free(newsgroups);
536: }
537:
538: if (references) {
539: (*targetClass.start_element)(target, HTML_DT , 0, 0);
540: PUTS("References:");
541: (*targetClass.start_element)(target, HTML_DD , 0, 0);
542: write_anchors(references);
543: free(references);
544: }
2.16 luotonen 545: #ifdef WHEN_WE_HAVE_HTMLPLUS
546: (*targetClass.end_element)(target, HTML_DLC);
547: #else
2.10 timbl 548: (*targetClass.end_element)(target, HTML_DL);
2.16 luotonen 549: #endif
1.1 timbl 550: }
1.2 timbl 551: PUTS("\n\n\n");
1.1 timbl 552:
553: }
554:
555: /* Read in the BODY of the Article:
556: */
1.2 timbl 557: (*targetClass.start_element)(target, HTML_PRE , 0, 0);
558:
1.1 timbl 559: p = line;
560: while(!done){
561: char ch = *p++ = NEXT_CHAR;
562: if (ch==(char)EOF) {
563: abort_socket(); /* End of file, close socket */
564: return; /* End of file on response */
565: }
1.3 timbl 566: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 567: *p++=0; /* Terminate the string */
2.28 frystyk 568: if (TRACE) fprintf(TDEST, "B %s", line);
1.1 timbl 569: if (line[0]=='.') {
570: if (line[1]<' ') { /* End of article? */
571: done = YES;
572: break;
573: } else { /* Line starts with dot */
1.2 timbl 574: PUTS(&line[1]); /* Ignore first dot */
1.1 timbl 575: }
576: } else {
577:
578: /* Normal lines are scanned for buried references to other articles.
579: ** Unfortunately, it will pick up mail addresses as well!
580: */
581: char *l = line;
582: char * p;
2.14 luotonen 583: while ((p=strchr(l, '<'))) {
1.1 timbl 584: char *q = strchr(p,'>');
585: char *at = strchr(p, '@');
586: if (q && at && at<q) {
587: char c = q[1];
588: q[1] = 0; /* chop up */
589: *p = 0;
1.2 timbl 590: PUTS(l);
1.1 timbl 591: *p = '<'; /* again */
592: *q = 0;
1.2 timbl 593: start_anchor(p+1);
1.1 timbl 594: *q = '>'; /* again */
1.2 timbl 595: PUTS(p);
596: (*targetClass.end_element)(target, HTML_A);
1.1 timbl 597: q[1] = c; /* again */
598: l=q+1;
599: } else break; /* line has unmatched <> */
600: }
1.2 timbl 601: PUTS( l); /* Last bit of the line */
1.1 timbl 602: } /* if not dot */
603: p = line; /* Restart at beginning */
604: } /* if end of line */
605: } /* Loop over characters */
1.2 timbl 606:
607: (*targetClass.end_element)(target, HTML_PRE);
1.1 timbl 608: }
609:
610:
611: /* Read in a List of Newsgroups
612: ** ----------------------------
613: */
614: /*
615: ** Note the termination condition of a single dot on a line by itself.
616: ** RFC 977 specifies that the line "folding" of RFC850 is not used, so we
617: ** do not handle it here.
618: */
619: PRIVATE void read_list NOARGS
620: {
621:
622: char line[LINE_LENGTH+1];
623: char *p;
624: BOOL done = NO;
625:
626: /* Read in the HEADer of the article:
627: **
628: ** The header fields are either ignored, or formatted and put into the
629: ** Text.
630: */
1.2 timbl 631: (*targetClass.start_element)(target, HTML_H1 , 0, 0);
632: PUTS( "Newsgroups");
633: (*targetClass.end_element)(target, HTML_PRE);
1.1 timbl 634: p = line;
2.16 luotonen 635: (*targetClass.start_element)(target, HTML_DL, 0, 0);
1.1 timbl 636: while(!done){
637: char ch = *p++ = NEXT_CHAR;
638: if (ch==(char)EOF) {
639: abort_socket(); /* End of file, close socket */
640: return; /* End of file on response */
641: }
1.3 timbl 642: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 643: *p++=0; /* Terminate the string */
2.28 frystyk 644: if (TRACE) fprintf(TDEST, "B %s", line);
2.16 luotonen 645: (*targetClass.start_element)(target, HTML_DT , 0, 0);
1.1 timbl 646: if (line[0]=='.') {
647: if (line[1]<' ') { /* End of article? */
648: done = YES;
649: break;
650: } else { /* Line starts with dot */
1.2 timbl 651: PUTS( &line[1]);
1.1 timbl 652: }
653: } else {
654:
655: /* Normal lines are scanned for references to newsgroups.
656: */
2.16 luotonen 657: int i=0;
658:
659: /* find whitespace if it exits */
660: for(; line[i] != '\0' && !WHITE(line[i]); i++)
661: ; /* null body */
662:
663: if(line[i] != '\0') {
664: line[i] = '\0';
665: write_anchor(line, line);
666: (*targetClass.start_element)(target, HTML_DD , 0, 0);
667: PUTS(&line[i+1]); /* put description */
668: } else {
669: write_anchor(line, line);
670: }
671:
672: #ifdef OLD_CODE
1.1 timbl 673: char group[LINE_LENGTH];
674: int first, last;
675: char postable;
676: if (sscanf(line, "%s %d %d %c", group, &first, &last, &postable)==4)
677: write_anchor(line, group);
678: else
1.2 timbl 679: PUTS(line);
2.16 luotonen 680: #endif /*OLD_CODE*/
681:
1.1 timbl 682: } /* if not dot */
683: p = line; /* Restart at beginning */
684: } /* if end of line */
685: } /* Loop over characters */
2.16 luotonen 686: (*targetClass.end_element)(target, HTML_DL);
1.1 timbl 687: }
688:
689:
690: /* Read in a Newsgroup
691: ** -------------------
692: ** Unfortunately, we have to ask for each article one by one if we
693: ** want more than one field.
694: **
695: */
696: PRIVATE void read_group ARGS3(
697: CONST char *,groupName,
698: int,first_required,
699: int,last_required
700: )
701: {
702: char line[LINE_LENGTH+1];
703: char author[LINE_LENGTH+1];
704: char subject[LINE_LENGTH+1];
705: char *p;
706: BOOL done;
707:
708: char buffer[LINE_LENGTH];
709: char *reference=0; /* Href for article */
710: int art; /* Article number WITHIN GROUP */
711: int status, count, first, last; /* Response fields */
712: /* count is only an upper limit */
713:
714: sscanf(response_text, " %d %d %d %d", &status, &count, &first, &last);
2.17 frystyk 715: if(TRACE)
2.28 frystyk 716: fprintf(TDEST,
2.17 frystyk 717: "Newsgroup status=%d, count=%d, (%d-%d) required:(%d-%d)\n",
718: status, count, first, last, first_required, last_required);
1.1 timbl 719: if (last==0) {
1.2 timbl 720: PUTS( "\nNo articles in this group.\n");
2.16 luotonen 721: #ifdef POSTING
722: goto add_post;
723: #endif
1.1 timbl 724: return;
725: }
726:
727: #define FAST_THRESHOLD 100 /* Above this, read IDs fast */
728: #define CHOP_THRESHOLD 50 /* Above this, chop off the rest */
729:
730: if (first_required<first) first_required = first; /* clip */
731: if ((last_required==0) || (last_required > last)) last_required = last;
732:
733: if (last_required<=first_required) {
1.2 timbl 734: PUTS( "\nNo articles in this range.\n");
2.16 luotonen 735: #ifdef POSTING
736: goto add_post;
737: #endif
1.1 timbl 738: return;
739: }
740:
741: if (last_required-first_required+1 > MAX_CHUNK) { /* Trim this block */
742: first_required = last_required-CHUNK_SIZE+1;
743: }
2.28 frystyk 744: if (TRACE) fprintf (TDEST, " Chunk will be (%d-%d)\n",
2.16 luotonen 745: first_required, last_required);
1.1 timbl 746:
1.2 timbl 747: /* Set window title
748: */
749: sprintf(buffer, "Newsgroup %s, Articles %d-%d",
750: groupName, first_required, last_required);
751: START(HTML_TITLE);
752: PUTS(buffer);
753: END(HTML_TITLE);
754:
1.1 timbl 755: /* Link to earlier articles
756: */
757: if (first_required>first) {
758: int before; /* Start of one before */
759: if (first_required-MAX_CHUNK <= first) before = first;
760: else before = first_required-CHUNK_SIZE;
761: sprintf(buffer, "%s/%d-%d", groupName, before, first_required-1);
2.28 frystyk 762: if (TRACE) fprintf(TDEST, " Block before is %s\n", buffer);
1.2 timbl 763: PUTS( " (");
764: start_anchor(buffer);
765: PUTS("Earlier articles");
766: END(HTML_A);
767: PUTS( "...)\n");
1.1 timbl 768: }
769:
770: done = NO;
771:
772: /*#define USE_XHDR*/
773: #ifdef USE_XHDR
774: if (count>FAST_THRESHOLD) {
775: sprintf(buffer,
776: "\nThere are about %d articles currently available in %s, IDs as follows:\n\n",
777: count, groupName);
1.2 timbl 778: PUTS(buffer);
1.3 timbl 779: sprintf(buffer, "XHDR Message-ID %d-%d%c%c", first, last, CR, LF);
1.1 timbl 780: status = response(buffer);
781: if (status==221) {
782:
783: p = line;
784: while(!done){
785: char ch = *p++ = NEXT_CHAR;
786: if (ch==(char)EOF) {
787: abort_socket(); /* End of file, close socket */
788: return; /* End of file on response */
789: }
790: if ((ch == '\n') || (p == &line[LINE_LENGTH])) {
791: *p++=0; /* Terminate the string */
2.28 frystyk 792: if (TRACE) fprintf(TDEST, "X %s", line);
1.1 timbl 793: if (line[0]=='.') {
794: if (line[1]<' ') { /* End of article? */
795: done = YES;
796: break;
797: } else { /* Line starts with dot */
798: /* Ignore strange line */
799: }
800: } else {
801:
802: /* Normal lines are scanned for references to articles.
803: */
804: char * space = strchr(line, ' ');
805: if (space++)
806: write_anchor(space, space);
807: } /* if not dot */
808: p = line; /* Restart at beginning */
809: } /* if end of line */
810: } /* Loop over characters */
811:
812: /* leaving loop with "done" set */
813: } /* Good status */
814: };
815: #endif
816:
817: /* Read newsgroup using individual fields:
818: */
819: if (!done) {
820: if (first==first_required && last==last_required)
1.2 timbl 821: PUTS("\nAll available articles in ");
822: else PUTS( "\nArticles in ");
823: PUTS(groupName);
824: START(HTML_MENU);
1.1 timbl 825: for(art=first_required; art<=last_required; art++) {
826:
827: /*#define OVERLAP*/
828: #ifdef OVERLAP
829: /* With this code we try to keep the server running flat out by queuing just
830: ** one extra command ahead of time. We assume (1) that the server won't abort
831: ** if it gets input during output, and (2) that TCP buffering is enough for the
832: ** two commands. Both these assumptions seem very reasonable. However, we HAVE
833: ** had a hangup with a loaded server.
834: */
835: if (art==first_required) {
836: if (art==last_required) {
1.3 timbl 837: sprintf(buffer, "HEAD %d%c%c", art, CR, LF); /* Only one */
1.1 timbl 838: status = response(buffer);
839: } else { /* First of many */
1.3 timbl 840: sprintf(buffer, "HEAD %d%c%cHEAD %d%c%c",
841: art, CR, LF, art+1, CR, LF);
1.1 timbl 842: status = response(buffer);
843: }
844: } else if (art==last_required) { /* Last of many */
845: status = response(NULL);
846: } else { /* Middle of many */
1.3 timbl 847: sprintf(buffer, "HEAD %d%c%c", art+1, CR, LF);
1.1 timbl 848: status = response(buffer);
849: }
850:
851: #else /* NOT OVERLAP */
1.3 timbl 852: sprintf(buffer, "HEAD %d%c%c", art, CR, LF);
1.1 timbl 853: status = response(buffer);
854: #endif /* NOT OVERLAP */
855:
856: if (status == 221) { /* Head follows - parse it:*/
2.20 frystyk 857: int ch;
1.1 timbl 858: p = line; /* Write pointer */
859: done = NO;
860: while(!done){
2.20 frystyk 861: if ((ch = HTInputSocket_getCharacter(isoc)) < 0) {
1.1 timbl 862: abort_socket(); /* End of file, close socket */
863: return; /* End of file on response */
864: }
2.20 frystyk 865: *p++ = (unsigned char) ch;
1.3 timbl 866: if ((ch == LF)
1.1 timbl 867: || (p == &line[LINE_LENGTH]) ) {
868:
869: *--p=0; /* Terminate & chop LF*/
870: p = line; /* Restart at beginning */
2.28 frystyk 871: if (TRACE) fprintf(TDEST, "G %s\n", line);
1.1 timbl 872: switch(line[0]) {
873:
874: case '.':
875: done = (line[1]<' '); /* End of article? */
876: break;
877:
878: case 'S':
879: case 's':
880: if (match(line, "SUBJECT:"))
881: strcpy(subject, line+9);/* Save subject */
882: break;
883:
884: case 'M':
885: case 'm':
886: if (match(line, "MESSAGE-ID:")) {
887: char * addr = HTStrip(line+11) +1; /* Chop < */
888: addr[strlen(addr)-1]=0; /* Chop > */
889: StrAllocCopy(reference, addr);
890: }
891: break;
892:
893: case 'f':
894: case 'F':
895: if (match(line, "FROM:")) {
896: char * p;
897: strcpy(author,
898: author_name(strchr(line,':')+1));
2.17 frystyk 899: if (*author) { /* Not always there! */
900: p = author + strlen(author) - 1;
901: if (*p==LF) *p = 0; /* Chop off newline */
902: }
1.1 timbl 903: }
904: break;
905:
906: } /* end switch on first character */
907: } /* if end of line */
908: } /* Loop over characters */
909:
1.2 timbl 910: START(HTML_LI);
1.1 timbl 911: sprintf(buffer, "\"%s\" - %s", subject, author);
912: if (reference) {
913: write_anchor(buffer, reference);
914: free(reference);
915: reference=0;
916: } else {
1.2 timbl 917: PUTS(buffer);
1.1 timbl 918: }
919:
920:
1.2 timbl 921: /* indicate progress! @@@@@@
1.1 timbl 922: */
923:
924: } /* If good response */
925: } /* Loop over article */
926: } /* If read headers */
1.2 timbl 927: END(HTML_MENU);
928: START(HTML_P);
1.1 timbl 929:
930: /* Link to later articles
931: */
932: if (last_required<last) {
933: int after; /* End of article after */
934: after = last_required+CHUNK_SIZE;
935: if (after==last) sprintf(buffer, "news:%s", groupName); /* original group */
936: else sprintf(buffer, "news:%s/%d-%d", groupName, last_required+1, after);
2.28 frystyk 937: if (TRACE) fprintf(TDEST, " Block after is %s\n", buffer);
1.2 timbl 938: PUTS( "(");
939: start_anchor(buffer);
940: PUTS( "Later articles");
941: END(HTML_A);
942: PUTS( "...)\n");
1.1 timbl 943: }
2.16 luotonen 944:
2.19 luotonen 945: #ifdef POSTING
946: add_post:
947: #endif
2.16 luotonen 948: {
949: char *href=0;
950: START(HTML_HR);
951:
952: StrAllocCopy(href,"newspost:");
953: StrAllocCat(href,groupName);
954: start_anchor(href);
955: PUTS("Post to ");
956: PUTS(groupName);
957: END(HTML_A);
958:
959: free(href);
960: }
1.1 timbl 961:
962:
963: }
964:
965:
966: /* Load by name HTLoadNews
967: ** ============
968: */
2.36 frystyk 969: PUBLIC int HTLoadNews ARGS3(SOCKET, soc, HTRequest *, request, SockOps, ops)
1.1 timbl 970: {
2.19 luotonen 971: char * arg = HTAnchor_physical(request->anchor);
1.1 timbl 972: char command[257]; /* The whole command */
973: char groupName[GROUP_NAME_LENGTH]; /* Just the group name */
974: int status; /* tcp return */
975: int retries; /* A count of how hard we have tried */
976: BOOL group_wanted; /* Flag: group was asked for, not article */
977: BOOL list_wanted; /* Flag: group was asked for, not article */
978: int first, last; /* First and last articles asked for */
979:
2.10 timbl 980: diagnostic = (request->output_format == WWW_SOURCE); /* set global flag */
1.1 timbl 981:
2.36 frystyk 982: if (ops == FD_NONE) {
983: if (PROT_TRACE) fprintf(TDEST, "News........ Looking for `%s\'\n",arg);
984: #if 0
985: if ((news = (news_info *) calloc(1, sizeof(news_info))) == NULL)
986: outofmem(__FILE__, "HTLoadNews");
987: news->state = NEWS_BEGIN;
988: net->context = news;
989: #endif
990: } if (ops == FD_CLOSE) { /* Interrupted */
991: HTNet_delete(request->net, HT_INTERRUPTED);
992: return HT_OK;
993: } else
994: HTNet_delete(request->net, HT_ERROR);
995:
1.1 timbl 996: if (!initialized) initialized = initialize();
2.36 frystyk 997: if (!initialized) return HT_ERROR; /* FAIL */
1.1 timbl 998:
999: {
2.19 luotonen 1000: char * p1=arg;
1.1 timbl 1001:
1002: /* We will ask for the document, omitting the host name & anchor.
1003: **
1004: ** Syntax of address is
1005: ** xxx@yyy Article
1006: ** <xxx@yyy> Same article
1007: ** xxxxx News group (no "@")
1008: ** group/n1-n2 Articles n1 to n2 in group
1009: */
1010: group_wanted = (strchr(arg, '@')==0) && (strchr(arg, '*')==0);
1011: list_wanted = (strchr(arg, '@')==0) && (strchr(arg, '*')!=0);
1012:
1013: /* p1 = HTParse(arg, "", PARSE_PATH | PARSE_PUNCTUATION); */
1014: /* Don't use HTParse because news: access doesn't follow traditional
1015: rules. For instance, if the article reference contains a '#',
1016: the rest of it is lost -- JFG 10/7/92, from a bug report */
1017: if (!strncasecomp (arg, "news:", 5))
1018: p1 = arg + 5; /* Skip "news:" prefix */
2.19 luotonen 1019: HTUnEscape(p1); /* AL May 2, 1994 */
1020: HTCleanTelnetString(p1); /* Prevent security holes */
1.1 timbl 1021: if (list_wanted) {
2.16 luotonen 1022: strcpy(command, "LIST NEWSGROUPS");
1.1 timbl 1023: } else if (group_wanted) {
1024: char * slash = strchr(p1, '/');
1025: strcpy(command, "GROUP ");
1026: first = 0;
1027: last = 0;
1028: if (slash) {
1029: *slash = 0;
1030: strcpy(groupName, p1);
1031: *slash = '/';
1032: (void) sscanf(slash+1, "%d-%d", &first, &last);
1033: } else {
1034: strcpy(groupName, p1);
1035: }
1036: strcat(command, groupName);
1037: } else {
1038: strcpy(command, "ARTICLE ");
1039: if (strchr(p1, '<')==0) strcat(command,"<");
1040: strcat(command, p1);
1041: if (strchr(p1, '>')==0) strcat(command,">");
1042: }
1043:
1.3 timbl 1044: {
1045: char * p = command + strlen(command);
1046: *p++ = CR; /* Macros to be correct on Mac */
1047: *p++ = LF;
1048: *p++ = 0;
1049: /* strcat(command, "\r\n"); */ /* CR LF, as in rfc 977 */
1050: }
1.1 timbl 1051: } /* scope of p1 */
1052:
2.36 frystyk 1053: if (!*arg) return HT_ERROR; /* Ignore if no name */
1.1 timbl 1054:
1055:
1056: /* Make a hypertext object with an anchor list.
1057: */
2.10 timbl 1058: node_anchor = request->anchor;
2.34 frystyk 1059: target = HTMLGenerator(request, NULL, WWW_HTML,
2.11 timbl 1060: request->output_format, request->output_stream);
1.2 timbl 1061: targetClass = *target->isa; /* Copy routine entry points */
1062:
1.1 timbl 1063:
1064: /* Now, let's get a stream setup up from the NewsHost:
1065: */
1066: for(retries=0;retries<2; retries++){
1067:
1068: if (s<0) {
1069: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
1070: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
1071: if (status<0){
1072: NETCLOSE(s);
2.28 frystyk 1073: s = INVSOC;
2.21 frystyk 1074: #ifdef OLD_CODE
1075: char message[256];
2.28 frystyk 1076: if (TRACE) fprintf(TDEST, "HTNews: Unable to connect to news host.\n");
1.1 timbl 1077: /* if (retries<=1) continue; WHY TRY AGAIN ? */
1078: sprintf(message,
1079: "\nCould not access %s.\n\n (Check default WorldWideWeb NewsHost ?)\n",
1.2 timbl 1080: HTNewsHost);
2.34 frystyk 1081: return HT_ERROR;
2.21 frystyk 1082: #endif /* OLD_CODE */
1083: {
1084: char *unescaped = NULL;
1085: StrAllocCopy(unescaped, arg);
1086: HTUnEscape(unescaped);
1087: HTErrorAdd(request, ERR_FATAL, NO, HTERR_INTERNAL,
1088: (void *) unescaped,
1089: (int) strlen(unescaped), "HTLoadNews");
1090: free(unescaped);
2.36 frystyk 1091: return HT_OK;
2.21 frystyk 1092: }
1.1 timbl 1093: } else {
2.28 frystyk 1094: if (TRACE) fprintf(TDEST, "HTNews: Connected to news host %s.\n",
1.2 timbl 1095: HTNewsHost);
2.11 timbl 1096: isoc = HTInputSocket_new(s); /* set up buffering */
1.1 timbl 1097: if ((response(NULL) / 100) !=2) {
2.21 frystyk 1098: int length = strlen(response_text);
1099: NETCLOSE(s);
1100: HTInputSocket_free(isoc);
2.28 frystyk 1101: s = INVSOC;
2.21 frystyk 1102: HTErrorAdd(request, ERR_FATAL, NO, HTERR_NEWS_SERVER,
1103: (void *) response_text, length < 50 ?
1104: length : 50, "HTLoadNews");
1105: HTErrorAdd(request, ERR_FATAL, NO, HTERR_INTERNAL,
1106: (void *) HTNewsHost,
1107: (int) strlen(HTNewsHost), "HTLoadNews");
2.36 frystyk 1108: return HT_OK;
2.21 frystyk 1109: }
1110: #ifdef OLD_CODE
2.8 timbl 1111: char message[BIG];
1112: sprintf(message,
1113: "Can't read news info. News host %.20s responded: %.200s",
1114: HTNewsHost, response_text);
2.34 frystyk 1115: return HT_ERROR;
2.21 frystyk 1116: }
1117: #endif /* OLD_CODE */
1.1 timbl 1118: }
1119: } /* If needed opening */
1120:
1.2 timbl 1121: /* @@@@@@@@@@@@@@Tell user something's happening */
1122:
1.1 timbl 1123: status = response(command);
1124: if (status<0) break;
2.19 luotonen 1125: if (status >= 411 && status <= 430) break; /* no such article/group */
1.1 timbl 1126: if ((status/ 100) !=2) {
1127: /* NXRunAlertPanel("News access", response_text,
1128: NULL,NULL,NULL);
1129: */
1130: NETCLOSE(s);
2.11 timbl 1131: HTInputSocket_free(isoc);
2.28 frystyk 1132: s = INVSOC;
1.1 timbl 1133: /* return HT; -- no:the message might be "Timeout-disconnected" left over */
1134: continue; /* Try again */
1135: }
1136:
1137: /* Load a group, article, etc
1138: */
1.2 timbl 1139:
1.1 timbl 1140:
1141: if (list_wanted) read_list();
1142: else if (group_wanted) read_group(groupName, first, last);
1143: else read_article();
1144:
2.23 duns 1145: (*targetClass._free)(target);
2.36 frystyk 1146: return HT_OK;
1.1 timbl 1147:
1148: } /* Retry loop */
1149:
1.2 timbl 1150:
2.8 timbl 1151: /* HTAlert("Sorry, could not load requested news.\n"); */
1152:
1.1 timbl 1153: /* NXRunAlertPanel(NULL, "Sorry, could not load `%s'.",
1154: NULL,NULL,NULL, arg);No -- message earlier wil have covered it */
1155:
2.23 duns 1156: (*targetClass._free)(target); /* AL May 2, 1994 */
2.36 frystyk 1157: return HT_OK;
1.1 timbl 1158: }
Webmaster