Annotation of libwww/Library/src/HTNews.c, revision 2.22
1.1 timbl 1: /* NEWS ACCESS HTNews.c
2: ** ===========
3: **
4: ** History:
5: ** 26 Sep 90 Written TBL
6: ** 29 Nov 91 Downgraded to C, for portable implementation.
2.19 luotonen 7: ** 16 Feb 94 AL Added Lou Montulli's Lynx & LIST NEWSGROUPS diffs.
8: ** 2 May 94 AL Added HTUnEscape() to HTLoadNews(), and
9: ** fixed a possible security hole when the URL contains
10: ** a newline, that could cause multiple commands to be
11: ** sent to an NNTP server.
1.1 timbl 12: */
1.2 timbl 13: /* Implements:
14: */
15: #include "HTNews.h"
1.1 timbl 16:
1.3 timbl 17: #define CR FROMASCII('\015') /* Must be converted to ^M for transmission */
18: #define LF FROMASCII('\012') /* Must be converted to ^J for transmission */
19:
1.1 timbl 20: #define NEWS_PORT 119 /* See rfc977 */
21: #define APPEND /* Use append methods */
22: #define MAX_CHUNK 40 /* Largest number of articles in one window */
23: #define CHUNK_SIZE 20 /* Number of articles for quick display */
24:
25: #ifndef DEFAULT_NEWS_HOST
26: #define DEFAULT_NEWS_HOST "news"
27: #endif
28: #ifndef SERVER_FILE
29: #define SERVER_FILE "/usr/local/lib/rn/server"
30: #endif
31:
32: #include <ctype.h>
33: #include "HTUtils.h" /* Coding convention macros */
34: #include "tcp.h"
35:
1.2 timbl 36: #include "HTML.h"
1.1 timbl 37: #include "HTParse.h"
38: #include "HTFormat.h"
2.8 timbl 39: #include "HTAlert.h"
2.21 frystyk 40: #include "HTError.h"
1.1 timbl 41:
2.8 timbl 42: #define BIG 1024 /* @@@ */
43:
1.2 timbl 44: struct _HTStructured {
45: CONST HTStructuredClass * isa;
46: /* ... */
47: };
48:
2.7 timbl 49: #define NEWS_PROGRESS(foo) HTProgress(foo)
1.1 timbl 50:
51:
2.12 timbl 52: #define NEXT_CHAR HTInputSocket_getCharacter(isoc)
1.1 timbl 53: #define LINE_LENGTH 512 /* Maximum length of line of ARTICLE etc */
54: #define GROUP_NAME_LENGTH 256 /* Maximum length of group name */
55:
56:
57: /* Module-wide variables
58: */
1.2 timbl 59: PUBLIC char * HTNewsHost;
1.1 timbl 60: PRIVATE struct sockaddr_in soc_address; /* Binary network address */
61: PRIVATE int s; /* Socket for NewsHost */
62: PRIVATE char response_text[LINE_LENGTH+1]; /* Last response */
1.2 timbl 63: /* PRIVATE HText * HT; */ /* the new hypertext */
64: PRIVATE HTStructured * target; /* The output sink */
65: PRIVATE HTStructuredClass targetClass; /* Copy of fn addresses */
1.1 timbl 66: PRIVATE HTParentAnchor *node_anchor; /* Its anchor */
67: PRIVATE int diagnostic; /* level: 0=none 2=source */
68:
1.2 timbl 69:
70: #define PUTC(c) (*targetClass.put_character)(target, c)
71: #define PUTS(s) (*targetClass.put_string)(target, s)
72: #define START(e) (*targetClass.start_element)(target, e, 0, 0)
73: #define END(e) (*targetClass.end_element)(target, e)
74:
2.11 timbl 75: PUBLIC HTInputSocket *isoc; /* @@@ non-reentrant */
76:
1.2 timbl 77: PUBLIC CONST char * HTGetNewsHost NOARGS
78: {
79: return HTNewsHost;
80: }
1.1 timbl 81:
1.2 timbl 82: PUBLIC void HTSetNewsHost ARGS1(CONST char *, value)
83: {
84: StrAllocCopy(HTNewsHost, value);
85: }
1.1 timbl 86:
87: /* Initialisation for this module
88: ** ------------------------------
89: **
90: ** Except on the NeXT, we pick up the NewsHost name from
91: **
92: ** 1. Environment variable NNTPSERVER
93: ** 2. File SERVER_FILE
94: ** 3. Compilation time macro DEFAULT_NEWS_HOST
95: **
96: ** On the NeXT, we pick up the NewsHost name from, in order:
97: **
98: ** 1. WorldWideWeb default "NewsHost"
99: ** 2. Global default "NewsHost"
100: ** 3. News default "NewsHost"
101: ** 4. Compilation time macro DEFAULT_NEWS_HOST
102: */
103: PRIVATE BOOL initialized = NO;
104: PRIVATE BOOL initialize NOARGS
105: {
106: CONST struct hostent *phost; /* Pointer to host - See netdb.h */
107: struct sockaddr_in* sin = &soc_address;
108:
109:
110: /* Set up defaults:
111: */
112: sin->sin_family = AF_INET; /* Family = internet, host order */
113: sin->sin_port = htons(NEWS_PORT); /* Default: new port, */
114:
115: /* Get name of Host
116: */
117: #ifdef NeXTStep
1.2 timbl 118: if ((HTNewsHost = NXGetDefaultValue("WorldWideWeb","NewsHost"))==0)
119: if ((HTNewsHost = NXGetDefaultValue("News","NewsHost")) == 0)
120: HTNewsHost = DEFAULT_NEWS_HOST;
1.1 timbl 121: #else
122: if (getenv("NNTPSERVER")) {
1.2 timbl 123: StrAllocCopy(HTNewsHost, (char *)getenv("NNTPSERVER"));
1.1 timbl 124: if (TRACE) fprintf(stderr, "HTNews: NNTPSERVER defined as `%s'\n",
1.2 timbl 125: HTNewsHost);
1.1 timbl 126: } else {
127: char server_name[256];
128: FILE* fp = fopen(SERVER_FILE, "r");
129: if (fp) {
130: if (fscanf(fp, "%s", server_name)==1) {
1.2 timbl 131: StrAllocCopy(HTNewsHost, server_name);
1.1 timbl 132: if (TRACE) fprintf(stderr,
133: "HTNews: File %s defines news host as `%s'\n",
1.2 timbl 134: SERVER_FILE, HTNewsHost);
1.1 timbl 135: }
136: fclose(fp);
137: }
138: }
1.2 timbl 139: if (!HTNewsHost) HTNewsHost = DEFAULT_NEWS_HOST;
1.1 timbl 140: #endif
141:
1.2 timbl 142: if (*HTNewsHost>='0' && *HTNewsHost<='9') { /* Numeric node address: */
143: sin->sin_addr.s_addr = inet_addr((char *)HTNewsHost); /* See arpa/inet.h */
1.1 timbl 144:
145: } else { /* Alphanumeric node name: */
1.2 timbl 146: phost=gethostbyname((char*)HTNewsHost); /* See netdb.h */
1.1 timbl 147: if (!phost) {
2.7 timbl 148: char message[150]; /* @@@ */
149: sprintf(message,
150: "HTNews: Can't find news host `%s'.\n%s",HTNewsHost,
151: "Please define your NNTP server");
152: HTAlert(message);
1.1 timbl 153: CTRACE(tfp,
1.2 timbl 154: "HTNews: Can't find news host `%s'.\n",HTNewsHost);
1.1 timbl 155: return NO; /* Fail */
156: }
157: memcpy(&sin->sin_addr, phost->h_addr, phost->h_length);
158: }
159:
160: if (TRACE) fprintf(stderr,
161: "HTNews: Parsed address as port %4x, inet %d.%d.%d.%d\n",
162: (unsigned int)ntohs(sin->sin_port),
163: (int)*((unsigned char *)(&sin->sin_addr)+0),
164: (int)*((unsigned char *)(&sin->sin_addr)+1),
165: (int)*((unsigned char *)(&sin->sin_addr)+2),
166: (int)*((unsigned char *)(&sin->sin_addr)+3));
167:
168: s = -1; /* Disconnected */
169:
170: return YES;
171: }
172:
173:
174:
175: /* Send NNTP Command line to remote host & Check Response
176: ** ------------------------------------------------------
177: **
178: ** On entry,
179: ** command points to the command to be sent, including CRLF, or is null
180: ** pointer if no command to be sent.
181: ** On exit,
182: ** Negative status indicates transmission error, socket closed.
183: ** Positive status is an NNTP status.
184: */
185:
186:
187: PRIVATE int response ARGS1(CONST char *,command)
188: {
189: int result;
190: char * p = response_text;
191: if (command) {
192: int status;
193: int length = strlen(command);
194: if (TRACE) fprintf(stderr, "NNTP command to be sent: %s", command);
195: #ifdef NOT_ASCII
196: {
197: CONST char * p;
198: char * q;
199: char ascii[LINE_LENGTH+1];
200: for(p = command, q=ascii; *p; p++, q++) {
201: *q = TOASCII(*p);
202: }
203: status = NETWRITE(s, ascii, length);
204: }
205: #else
206: status = NETWRITE(s, command, length);
207: #endif
208: if (status<0){
209: if (TRACE) fprintf(stderr,
210: "HTNews: Unable to send command. Disconnecting.\n");
211: NETCLOSE(s);
2.11 timbl 212: HTInputSocket_free(isoc);
1.1 timbl 213: s = -1;
214: return status;
215: } /* if bad status */
216: } /* if command to be sent */
217:
218: for(;;) {
1.3 timbl 219: if (((*p++=NEXT_CHAR) == LF)
220: || (p == &response_text[LINE_LENGTH])) {
1.1 timbl 221: *p++=0; /* Terminate the string */
222: if (TRACE) fprintf(stderr, "NNTP Response: %s\n", response_text);
223: sscanf(response_text, "%d", &result);
2.19 luotonen 224: if (result >= 411 && result <= 430) { /* no such article/group */
225: char * msg = strchr(response_text,' ');
226: if (!msg) msg = response_text;
227: PUTS("<H1>News error</H1>\n");
228: PUTS(msg);
229: CTRACE(stderr, "News error.. %s", response_text);
230: }
1.1 timbl 231: return result;
232: } /* if end of line */
233:
234: if (*(p-1) < 0) {
235: if (TRACE) fprintf(stderr,
236: "HTNews: EOF on read, closing socket %d\n", s);
237: NETCLOSE(s); /* End of file, close socket */
2.11 timbl 238: HTInputSocket_free(isoc);
1.1 timbl 239: return s = -1; /* End of file on response */
240: }
241: } /* Loop over characters */
242: }
243:
244:
245: /* Case insensitive string comparisons
246: ** -----------------------------------
247: **
248: ** On entry,
249: ** template must be already un upper case.
250: ** unknown may be in upper or lower or mixed case to match.
251: */
252: PRIVATE BOOL match ARGS2 (CONST char *,unknown, CONST char *,template)
253: {
254: CONST char * u = unknown;
255: CONST char * t = template;
256: for (;*u && *t && (TOUPPER(*u)==*t); u++, t++) /* Find mismatch or end */ ;
257: return (BOOL)(*t==0); /* OK if end of template */
258: }
259:
260: /* Find Author's name in mail address
261: ** ----------------------------------
262: **
263: ** On exit,
264: ** THE EMAIL ADDRESS IS CORRUPTED
265: **
266: ** For example, returns "Tim Berners-Lee" if given any of
267: ** " Tim Berners-Lee <tim@online.cern.ch> "
268: ** or " tim@online.cern.ch ( Tim Berners-Lee ) "
269: */
270: PRIVATE char * author_name ARGS1 (char *,email)
271: {
272: char *s, *e;
273:
274: if ((s=strchr(email,'(')) && (e=strchr(email, ')')))
275: if (e>s) {
276: *e=0; /* Chop off everything after the ')' */
277: return HTStrip(s+1); /* Remove leading and trailing spaces */
278: }
279:
280: if ((s=strchr(email,'<')) && (e=strchr(email, '>')))
281: if (e>s) {
282: strcpy(s, e+1); /* Remove <...> */
283: return HTStrip(email); /* Remove leading and trailing spaces */
284: }
285:
286: return HTStrip(email); /* Default to the whole thing */
287:
288: }
289:
1.2 timbl 290: /* Start anchor element
291: ** --------------------
292: */
293: PRIVATE void start_anchor ARGS1(CONST char *, href)
294: {
295: BOOL present[HTML_A_ATTRIBUTES];
296: CONST char* value[HTML_A_ATTRIBUTES];
297:
298: {
299: int i;
300: for(i=0; i<HTML_A_ATTRIBUTES; i++)
301: present[i] = (i==HTML_A_HREF);
302: }
303: value[HTML_A_HREF] = href;
304: (*targetClass.start_element)(target, HTML_A , present, value);
305:
306: }
1.1 timbl 307:
2.16 luotonen 308:
309: /* Start link element
310: ** --------------------
311: */
312: PRIVATE void start_link ARGS2(CONST char *, href, CONST char *, rev)
313: {
314: #ifdef WHEN_WE_HAVE_HTMLPLUS
315:
316: BOOL present[HTML_LINK_ATTRIBUTES];
317: CONST char* value[HTML_LINK_ATTRIBUTES];
318:
319: {
320: int i;
321: for(i=0; i<HTML_LINK_ATTRIBUTES; i++)
322: present[i] = (i==HTML_LINK_HREF || i==HTML_LINK_REV);
323: }
324: value[HTML_LINK_HREF] = href;
325: value[HTML_LINK_REV] = rev;
326: (*targetClass.start_element)(target, HTML_LINK , present, value);
327:
328: #endif
329: }
330:
331:
332:
333:
1.1 timbl 334: /* Paste in an Anchor
335: ** ------------------
336: **
337: **
338: ** On entry,
339: ** HT has a selection of zero length at the end.
340: ** text points to the text to be put into the file, 0 terminated.
341: ** addr points to the hypertext refernce address,
342: ** terminated by white space, comma, NULL or '>'
343: */
344: PRIVATE void write_anchor ARGS2(CONST char *,text, CONST char *,addr)
345: {
346: char href[LINE_LENGTH+1];
347:
348: {
349: CONST char * p;
350: strcpy(href,"news:");
351: for(p=addr; *p && (*p!='>') && !WHITE(*p) && (*p!=','); p++);
352: strncat(href, addr, p-addr); /* Make complete hypertext reference */
353: }
354:
1.2 timbl 355: start_anchor(href);
356: PUTS(text);
357: END(HTML_A);
1.1 timbl 358: }
359:
360:
361: /* Write list of anchors
362: ** ---------------------
363: **
364: ** We take a pointer to a list of objects, and write out each,
365: ** generating an anchor for each.
366: **
367: ** On entry,
368: ** HT has a selection of zero length at the end.
369: ** text points to a comma or space separated list of addresses.
370: ** On exit,
371: ** *text is NOT any more chopped up into substrings.
372: */
373: PRIVATE void write_anchors ARGS1 (char *,text)
374: {
375: char * start = text;
376: char * end;
377: char c;
378: for (;;) {
379: for(;*start && (WHITE(*start)); start++); /* Find start */
380: if (!*start) return; /* (Done) */
381: for(end=start; *end && (*end!=' ') && (*end!=','); end++);/* Find end */
382: if (*end) end++; /* Include comma or space but not NULL */
383: c = *end;
384: *end = 0;
385: write_anchor(start, start);
2.16 luotonen 386: START(HTML_BR);
1.1 timbl 387: *end = c;
388: start = end; /* Point to next one */
389: }
390: }
391:
392: /* Abort the connection abort_socket
393: ** --------------------
394: */
395: PRIVATE void abort_socket NOARGS
396: {
397: if (TRACE) fprintf(stderr,
398: "HTNews: EOF on read, closing socket %d\n", s);
399: NETCLOSE(s); /* End of file, close socket */
2.11 timbl 400: HTInputSocket_free(isoc);
1.2 timbl 401: PUTS("Network Error: connection lost");
402: PUTC('\n');
1.1 timbl 403: s = -1; /* End of file on response */
404: return;
405: }
406:
407: /* Read in an Article read_article
408: ** ------------------
409: **
410: **
411: ** Note the termination condition of a single dot on a line by itself.
412: ** RFC 977 specifies that the line "folding" of RFC850 is not used, so we
413: ** do not handle it here.
414: **
415: ** On entry,
416: ** s Global socket number is OK
417: ** HT Global hypertext object is ready for appending text
418: */
419: PRIVATE void read_article NOARGS
420: {
421:
422: char line[LINE_LENGTH+1];
423: char *references=NULL; /* Hrefs for other articles */
424: char *newsgroups=NULL; /* Newsgroups list */
425: char *p = line;
426: BOOL done = NO;
427:
428: /* Read in the HEADer of the article:
429: **
430: ** The header fields are either ignored, or formatted and put into the
431: ** Text.
432: */
433: if (!diagnostic) {
1.2 timbl 434: (*targetClass.start_element)(target, HTML_ADDRESS, 0, 0);
1.1 timbl 435: while(!done){
436: char ch = *p++ = NEXT_CHAR;
437: if (ch==(char)EOF) {
438: abort_socket(); /* End of file, close socket */
439: return; /* End of file on response */
440: }
1.3 timbl 441: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 442: *--p=0; /* Terminate the string */
443: if (TRACE) fprintf(stderr, "H %s\n", line);
444:
445: if (line[0]=='.') {
446: if (line[1]<' ') { /* End of article? */
447: done = YES;
448: break;
449: }
450:
451: } else if (line[0]<' ') {
452: break; /* End of Header? */
2.16 luotonen 453:
1.1 timbl 454: } else if (match(line, "SUBJECT:")) {
1.2 timbl 455: END(HTML_ADDRESS);
456: START(HTML_TITLE); /** Uuugh! @@@ */
2.16 luotonen 457: PUTS(line+9);
458: END(HTML_TITLE);
459: START(HTML_H1);
1.2 timbl 460: PUTS(line+8);
2.16 luotonen 461: END(HTML_H1);
1.2 timbl 462: START(HTML_ADDRESS);
2.16 luotonen 463:
1.1 timbl 464: } else if (match(line, "DATE:")
465: || match(line, "ORGANIZATION:")) {
2.16 luotonen 466: PUTS(strchr(line,':')+2);
467: START(HTML_BR);
468:
469: } else if(match(line, "FROM:")) {
470: char * temp=0;
471: char * href=0;
472: char *cp1, *cp2;
473:
474: /* copy into temporary storage */
475: StrAllocCopy(temp, strchr(line,':')+1);
476:
477: cp1=temp;
478: while(isspace(*cp1)) cp1++;
479: /* remove space and stuff after */
480: if((cp2 = strchr(cp1,' ')) != NULL)
481: *cp2 = '\0';
482:
483: StrAllocCopy(href,"mailto:");
484: StrAllocCat(href,cp1);
485:
486: start_anchor(href);
487: PUTS("Reply to ");
488: PUTS(strchr(line,':')+1);
489: END(HTML_A);
490: START(HTML_BR);
491:
492: /* put in the owner as a link rel. as well */
493: start_link(href, "made");
494:
495: /* free of temp vars */
496: free(temp);
497: free(href);
498:
1.1 timbl 499: } else if (match(line, "NEWSGROUPS:")) {
500: StrAllocCopy(newsgroups, HTStrip(strchr(line,':')+1));
501:
502: } else if (match(line, "REFERENCES:")) {
503: StrAllocCopy(references, HTStrip(strchr(line,':')+1));
504:
505: } /* end if match */
506: p = line; /* Restart at beginning */
507: } /* if end of line */
508: } /* Loop over characters */
2.16 luotonen 509: END(HTML_ADDRESS);
1.1 timbl 510:
1.2 timbl 511: if (newsgroups || references) {
2.16 luotonen 512: START(HTML_DL);
1.2 timbl 513: if (newsgroups) {
2.16 luotonen 514: #ifdef POSTING
515: char *href=0;
516: #endif
517:
1.2 timbl 518: (*targetClass.start_element)(target, HTML_DT , 0, 0);
519: PUTS("Newsgroups:");
520: (*targetClass.start_element)(target, HTML_DD , 0, 0);
521: write_anchors(newsgroups);
2.16 luotonen 522:
523: #ifdef POSTING
524: /* make posting possible */
525: StrAllocCopy(href,"newspost:");
526: StrAllocCat(href,newsgroups);
527: START(HTML_DT);
528: start_anchor(href);
529: PUTS("Reply to newsgroup(s)");
530: END(HTML_A);
531: #endif
532:
1.2 timbl 533: free(newsgroups);
534: }
535:
536: if (references) {
537: (*targetClass.start_element)(target, HTML_DT , 0, 0);
538: PUTS("References:");
539: (*targetClass.start_element)(target, HTML_DD , 0, 0);
540: write_anchors(references);
541: free(references);
542: }
2.16 luotonen 543: #ifdef WHEN_WE_HAVE_HTMLPLUS
544: (*targetClass.end_element)(target, HTML_DLC);
545: #else
2.10 timbl 546: (*targetClass.end_element)(target, HTML_DL);
2.16 luotonen 547: #endif
1.1 timbl 548: }
1.2 timbl 549: PUTS("\n\n\n");
1.1 timbl 550:
551: }
552:
553: /* Read in the BODY of the Article:
554: */
1.2 timbl 555: (*targetClass.start_element)(target, HTML_PRE , 0, 0);
556:
1.1 timbl 557: p = line;
558: while(!done){
559: char ch = *p++ = NEXT_CHAR;
560: if (ch==(char)EOF) {
561: abort_socket(); /* End of file, close socket */
562: return; /* End of file on response */
563: }
1.3 timbl 564: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 565: *p++=0; /* Terminate the string */
566: if (TRACE) fprintf(stderr, "B %s", line);
567: if (line[0]=='.') {
568: if (line[1]<' ') { /* End of article? */
569: done = YES;
570: break;
571: } else { /* Line starts with dot */
1.2 timbl 572: PUTS(&line[1]); /* Ignore first dot */
1.1 timbl 573: }
574: } else {
575:
576: /* Normal lines are scanned for buried references to other articles.
577: ** Unfortunately, it will pick up mail addresses as well!
578: */
579: char *l = line;
580: char * p;
2.14 luotonen 581: while ((p=strchr(l, '<'))) {
1.1 timbl 582: char *q = strchr(p,'>');
583: char *at = strchr(p, '@');
584: if (q && at && at<q) {
585: char c = q[1];
586: q[1] = 0; /* chop up */
587: *p = 0;
1.2 timbl 588: PUTS(l);
1.1 timbl 589: *p = '<'; /* again */
590: *q = 0;
1.2 timbl 591: start_anchor(p+1);
1.1 timbl 592: *q = '>'; /* again */
1.2 timbl 593: PUTS(p);
594: (*targetClass.end_element)(target, HTML_A);
1.1 timbl 595: q[1] = c; /* again */
596: l=q+1;
597: } else break; /* line has unmatched <> */
598: }
1.2 timbl 599: PUTS( l); /* Last bit of the line */
1.1 timbl 600: } /* if not dot */
601: p = line; /* Restart at beginning */
602: } /* if end of line */
603: } /* Loop over characters */
1.2 timbl 604:
605: (*targetClass.end_element)(target, HTML_PRE);
1.1 timbl 606: }
607:
608:
609: /* Read in a List of Newsgroups
610: ** ----------------------------
611: */
612: /*
613: ** Note the termination condition of a single dot on a line by itself.
614: ** RFC 977 specifies that the line "folding" of RFC850 is not used, so we
615: ** do not handle it here.
616: */
617: PRIVATE void read_list NOARGS
618: {
619:
620: char line[LINE_LENGTH+1];
621: char *p;
622: BOOL done = NO;
623:
624: /* Read in the HEADer of the article:
625: **
626: ** The header fields are either ignored, or formatted and put into the
627: ** Text.
628: */
1.2 timbl 629: (*targetClass.start_element)(target, HTML_H1 , 0, 0);
630: PUTS( "Newsgroups");
631: (*targetClass.end_element)(target, HTML_PRE);
1.1 timbl 632: p = line;
2.16 luotonen 633: (*targetClass.start_element)(target, HTML_DL, 0, 0);
1.1 timbl 634: while(!done){
635: char ch = *p++ = NEXT_CHAR;
636: if (ch==(char)EOF) {
637: abort_socket(); /* End of file, close socket */
638: return; /* End of file on response */
639: }
1.3 timbl 640: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 641: *p++=0; /* Terminate the string */
642: if (TRACE) fprintf(stderr, "B %s", line);
2.16 luotonen 643: (*targetClass.start_element)(target, HTML_DT , 0, 0);
1.1 timbl 644: if (line[0]=='.') {
645: if (line[1]<' ') { /* End of article? */
646: done = YES;
647: break;
648: } else { /* Line starts with dot */
1.2 timbl 649: PUTS( &line[1]);
1.1 timbl 650: }
651: } else {
652:
653: /* Normal lines are scanned for references to newsgroups.
654: */
2.16 luotonen 655: int i=0;
656:
657: /* find whitespace if it exits */
658: for(; line[i] != '\0' && !WHITE(line[i]); i++)
659: ; /* null body */
660:
661: if(line[i] != '\0') {
662: line[i] = '\0';
663: write_anchor(line, line);
664: (*targetClass.start_element)(target, HTML_DD , 0, 0);
665: PUTS(&line[i+1]); /* put description */
666: } else {
667: write_anchor(line, line);
668: }
669:
670: #ifdef OLD_CODE
1.1 timbl 671: char group[LINE_LENGTH];
672: int first, last;
673: char postable;
674: if (sscanf(line, "%s %d %d %c", group, &first, &last, &postable)==4)
675: write_anchor(line, group);
676: else
1.2 timbl 677: PUTS(line);
2.16 luotonen 678: #endif /*OLD_CODE*/
679:
1.1 timbl 680: } /* if not dot */
681: p = line; /* Restart at beginning */
682: } /* if end of line */
683: } /* Loop over characters */
2.16 luotonen 684: (*targetClass.end_element)(target, HTML_DL);
1.1 timbl 685: }
686:
687:
688: /* Read in a Newsgroup
689: ** -------------------
690: ** Unfortunately, we have to ask for each article one by one if we
691: ** want more than one field.
692: **
693: */
694: PRIVATE void read_group ARGS3(
695: CONST char *,groupName,
696: int,first_required,
697: int,last_required
698: )
699: {
700: char line[LINE_LENGTH+1];
701: char author[LINE_LENGTH+1];
702: char subject[LINE_LENGTH+1];
703: char *p;
704: BOOL done;
705:
706: char buffer[LINE_LENGTH];
707: char *reference=0; /* Href for article */
708: int art; /* Article number WITHIN GROUP */
709: int status, count, first, last; /* Response fields */
710: /* count is only an upper limit */
711:
712: sscanf(response_text, " %d %d %d %d", &status, &count, &first, &last);
2.17 frystyk 713: if(TRACE)
714: fprintf(stderr,
715: "Newsgroup status=%d, count=%d, (%d-%d) required:(%d-%d)\n",
716: status, count, first, last, first_required, last_required);
1.1 timbl 717: if (last==0) {
1.2 timbl 718: PUTS( "\nNo articles in this group.\n");
2.16 luotonen 719: #ifdef POSTING
720: goto add_post;
721: #endif
1.1 timbl 722: return;
723: }
724:
725: #define FAST_THRESHOLD 100 /* Above this, read IDs fast */
726: #define CHOP_THRESHOLD 50 /* Above this, chop off the rest */
727:
728: if (first_required<first) first_required = first; /* clip */
729: if ((last_required==0) || (last_required > last)) last_required = last;
730:
731: if (last_required<=first_required) {
1.2 timbl 732: PUTS( "\nNo articles in this range.\n");
2.16 luotonen 733: #ifdef POSTING
734: goto add_post;
735: #endif
1.1 timbl 736: return;
737: }
738:
739: if (last_required-first_required+1 > MAX_CHUNK) { /* Trim this block */
740: first_required = last_required-CHUNK_SIZE+1;
741: }
2.18 frystyk 742: if (TRACE) fprintf (stderr, " Chunk will be (%d-%d)\n",
2.16 luotonen 743: first_required, last_required);
1.1 timbl 744:
1.2 timbl 745: /* Set window title
746: */
747: sprintf(buffer, "Newsgroup %s, Articles %d-%d",
748: groupName, first_required, last_required);
749: START(HTML_TITLE);
750: PUTS(buffer);
751: END(HTML_TITLE);
752:
1.1 timbl 753: /* Link to earlier articles
754: */
755: if (first_required>first) {
756: int before; /* Start of one before */
757: if (first_required-MAX_CHUNK <= first) before = first;
758: else before = first_required-CHUNK_SIZE;
759: sprintf(buffer, "%s/%d-%d", groupName, before, first_required-1);
760: if (TRACE) fprintf(stderr, " Block before is %s\n", buffer);
1.2 timbl 761: PUTS( " (");
762: start_anchor(buffer);
763: PUTS("Earlier articles");
764: END(HTML_A);
765: PUTS( "...)\n");
1.1 timbl 766: }
767:
768: done = NO;
769:
770: /*#define USE_XHDR*/
771: #ifdef USE_XHDR
772: if (count>FAST_THRESHOLD) {
773: sprintf(buffer,
774: "\nThere are about %d articles currently available in %s, IDs as follows:\n\n",
775: count, groupName);
1.2 timbl 776: PUTS(buffer);
1.3 timbl 777: sprintf(buffer, "XHDR Message-ID %d-%d%c%c", first, last, CR, LF);
1.1 timbl 778: status = response(buffer);
779: if (status==221) {
780:
781: p = line;
782: while(!done){
783: char ch = *p++ = NEXT_CHAR;
784: if (ch==(char)EOF) {
785: abort_socket(); /* End of file, close socket */
786: return; /* End of file on response */
787: }
788: if ((ch == '\n') || (p == &line[LINE_LENGTH])) {
789: *p++=0; /* Terminate the string */
790: if (TRACE) fprintf(stderr, "X %s", line);
791: if (line[0]=='.') {
792: if (line[1]<' ') { /* End of article? */
793: done = YES;
794: break;
795: } else { /* Line starts with dot */
796: /* Ignore strange line */
797: }
798: } else {
799:
800: /* Normal lines are scanned for references to articles.
801: */
802: char * space = strchr(line, ' ');
803: if (space++)
804: write_anchor(space, space);
805: } /* if not dot */
806: p = line; /* Restart at beginning */
807: } /* if end of line */
808: } /* Loop over characters */
809:
810: /* leaving loop with "done" set */
811: } /* Good status */
812: };
813: #endif
814:
815: /* Read newsgroup using individual fields:
816: */
817: if (!done) {
818: if (first==first_required && last==last_required)
1.2 timbl 819: PUTS("\nAll available articles in ");
820: else PUTS( "\nArticles in ");
821: PUTS(groupName);
822: START(HTML_MENU);
1.1 timbl 823: for(art=first_required; art<=last_required; art++) {
824:
825: /*#define OVERLAP*/
826: #ifdef OVERLAP
827: /* With this code we try to keep the server running flat out by queuing just
828: ** one extra command ahead of time. We assume (1) that the server won't abort
829: ** if it gets input during output, and (2) that TCP buffering is enough for the
830: ** two commands. Both these assumptions seem very reasonable. However, we HAVE
831: ** had a hangup with a loaded server.
832: */
833: if (art==first_required) {
834: if (art==last_required) {
1.3 timbl 835: sprintf(buffer, "HEAD %d%c%c", art, CR, LF); /* Only one */
1.1 timbl 836: status = response(buffer);
837: } else { /* First of many */
1.3 timbl 838: sprintf(buffer, "HEAD %d%c%cHEAD %d%c%c",
839: art, CR, LF, art+1, CR, LF);
1.1 timbl 840: status = response(buffer);
841: }
842: } else if (art==last_required) { /* Last of many */
843: status = response(NULL);
844: } else { /* Middle of many */
1.3 timbl 845: sprintf(buffer, "HEAD %d%c%c", art+1, CR, LF);
1.1 timbl 846: status = response(buffer);
847: }
848:
849: #else /* NOT OVERLAP */
1.3 timbl 850: sprintf(buffer, "HEAD %d%c%c", art, CR, LF);
1.1 timbl 851: status = response(buffer);
852: #endif /* NOT OVERLAP */
853:
854: if (status == 221) { /* Head follows - parse it:*/
2.20 frystyk 855: int ch;
1.1 timbl 856: p = line; /* Write pointer */
857: done = NO;
858: while(!done){
2.20 frystyk 859: if ((ch = HTInputSocket_getCharacter(isoc)) < 0) {
1.1 timbl 860: abort_socket(); /* End of file, close socket */
861: return; /* End of file on response */
862: }
2.20 frystyk 863: *p++ = (unsigned char) ch;
1.3 timbl 864: if ((ch == LF)
1.1 timbl 865: || (p == &line[LINE_LENGTH]) ) {
866:
867: *--p=0; /* Terminate & chop LF*/
868: p = line; /* Restart at beginning */
869: if (TRACE) fprintf(stderr, "G %s\n", line);
870: switch(line[0]) {
871:
872: case '.':
873: done = (line[1]<' '); /* End of article? */
874: break;
875:
876: case 'S':
877: case 's':
878: if (match(line, "SUBJECT:"))
879: strcpy(subject, line+9);/* Save subject */
880: break;
881:
882: case 'M':
883: case 'm':
884: if (match(line, "MESSAGE-ID:")) {
885: char * addr = HTStrip(line+11) +1; /* Chop < */
886: addr[strlen(addr)-1]=0; /* Chop > */
887: StrAllocCopy(reference, addr);
888: }
889: break;
890:
891: case 'f':
892: case 'F':
893: if (match(line, "FROM:")) {
894: char * p;
895: strcpy(author,
896: author_name(strchr(line,':')+1));
2.17 frystyk 897: if (*author) { /* Not always there! */
898: p = author + strlen(author) - 1;
899: if (*p==LF) *p = 0; /* Chop off newline */
900: }
1.1 timbl 901: }
902: break;
903:
904: } /* end switch on first character */
905: } /* if end of line */
906: } /* Loop over characters */
907:
1.2 timbl 908: START(HTML_LI);
1.1 timbl 909: sprintf(buffer, "\"%s\" - %s", subject, author);
910: if (reference) {
911: write_anchor(buffer, reference);
912: free(reference);
913: reference=0;
914: } else {
1.2 timbl 915: PUTS(buffer);
1.1 timbl 916: }
917:
918:
1.2 timbl 919: /* indicate progress! @@@@@@
1.1 timbl 920: */
921:
922: } /* If good response */
923: } /* Loop over article */
924: } /* If read headers */
1.2 timbl 925: END(HTML_MENU);
926: START(HTML_P);
1.1 timbl 927:
928: /* Link to later articles
929: */
930: if (last_required<last) {
931: int after; /* End of article after */
932: after = last_required+CHUNK_SIZE;
933: if (after==last) sprintf(buffer, "news:%s", groupName); /* original group */
934: else sprintf(buffer, "news:%s/%d-%d", groupName, last_required+1, after);
935: if (TRACE) fprintf(stderr, " Block after is %s\n", buffer);
1.2 timbl 936: PUTS( "(");
937: start_anchor(buffer);
938: PUTS( "Later articles");
939: END(HTML_A);
940: PUTS( "...)\n");
1.1 timbl 941: }
2.16 luotonen 942:
2.19 luotonen 943: #ifdef POSTING
944: add_post:
945: #endif
2.16 luotonen 946: {
947: char *href=0;
948: START(HTML_HR);
949:
950: StrAllocCopy(href,"newspost:");
951: StrAllocCat(href,groupName);
952: start_anchor(href);
953: PUTS("Post to ");
954: PUTS(groupName);
955: END(HTML_A);
956:
957: free(href);
958: }
1.1 timbl 959:
960:
961: }
962:
963:
964: /* Load by name HTLoadNews
965: ** ============
966: */
2.13 timbl 967: PUBLIC int HTLoadNews ARGS1(HTRequest *, request)
1.1 timbl 968: {
2.19 luotonen 969: char * arg = HTAnchor_physical(request->anchor);
1.1 timbl 970: char command[257]; /* The whole command */
971: char groupName[GROUP_NAME_LENGTH]; /* Just the group name */
972: int status; /* tcp return */
973: int retries; /* A count of how hard we have tried */
974: BOOL group_wanted; /* Flag: group was asked for, not article */
975: BOOL list_wanted; /* Flag: group was asked for, not article */
976: int first, last; /* First and last articles asked for */
977:
2.10 timbl 978: diagnostic = (request->output_format == WWW_SOURCE); /* set global flag */
1.1 timbl 979:
980: if (TRACE) fprintf(stderr, "HTNews: Looking for %s\n", arg);
981:
982: if (!initialized) initialized = initialize();
983: if (!initialized) return -1; /* FAIL */
984:
985: {
2.19 luotonen 986: char * p1=arg;
1.1 timbl 987:
988: /* We will ask for the document, omitting the host name & anchor.
989: **
990: ** Syntax of address is
991: ** xxx@yyy Article
992: ** <xxx@yyy> Same article
993: ** xxxxx News group (no "@")
994: ** group/n1-n2 Articles n1 to n2 in group
995: */
996: group_wanted = (strchr(arg, '@')==0) && (strchr(arg, '*')==0);
997: list_wanted = (strchr(arg, '@')==0) && (strchr(arg, '*')!=0);
998:
999: /* p1 = HTParse(arg, "", PARSE_PATH | PARSE_PUNCTUATION); */
1000: /* Don't use HTParse because news: access doesn't follow traditional
1001: rules. For instance, if the article reference contains a '#',
1002: the rest of it is lost -- JFG 10/7/92, from a bug report */
1003: if (!strncasecomp (arg, "news:", 5))
1004: p1 = arg + 5; /* Skip "news:" prefix */
2.19 luotonen 1005: HTUnEscape(p1); /* AL May 2, 1994 */
1006: HTCleanTelnetString(p1); /* Prevent security holes */
1.1 timbl 1007: if (list_wanted) {
2.16 luotonen 1008: strcpy(command, "LIST NEWSGROUPS");
1.1 timbl 1009: } else if (group_wanted) {
1010: char * slash = strchr(p1, '/');
1011: strcpy(command, "GROUP ");
1012: first = 0;
1013: last = 0;
1014: if (slash) {
1015: *slash = 0;
1016: strcpy(groupName, p1);
1017: *slash = '/';
1018: (void) sscanf(slash+1, "%d-%d", &first, &last);
1019: } else {
1020: strcpy(groupName, p1);
1021: }
1022: strcat(command, groupName);
1023: } else {
1024: strcpy(command, "ARTICLE ");
1025: if (strchr(p1, '<')==0) strcat(command,"<");
1026: strcat(command, p1);
1027: if (strchr(p1, '>')==0) strcat(command,">");
1028: }
1029:
1.3 timbl 1030: {
1031: char * p = command + strlen(command);
1032: *p++ = CR; /* Macros to be correct on Mac */
1033: *p++ = LF;
1034: *p++ = 0;
1035: /* strcat(command, "\r\n"); */ /* CR LF, as in rfc 977 */
1036: }
1.1 timbl 1037: } /* scope of p1 */
1038:
1039: if (!*arg) return NO; /* Ignore if no name */
1040:
1041:
1042: /* Make a hypertext object with an anchor list.
1043: */
2.10 timbl 1044: node_anchor = request->anchor;
2.11 timbl 1045: target = HTML_new(request, NULL, WWW_HTML,
1046: request->output_format, request->output_stream);
1.2 timbl 1047: targetClass = *target->isa; /* Copy routine entry points */
1048:
1.1 timbl 1049:
1050: /* Now, let's get a stream setup up from the NewsHost:
1051: */
1052: for(retries=0;retries<2; retries++){
1053:
1054: if (s<0) {
1055: NEWS_PROGRESS("Connecting to NewsHost ...");
1056: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
1057: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
1058: if (status<0){
1059: NETCLOSE(s);
1060: s = -1;
2.21 frystyk 1061: #ifdef OLD_CODE
1062: char message[256];
1.1 timbl 1063: if (TRACE) fprintf(stderr, "HTNews: Unable to connect to news host.\n");
1064: /* if (retries<=1) continue; WHY TRY AGAIN ? */
1065: sprintf(message,
1066: "\nCould not access %s.\n\n (Check default WorldWideWeb NewsHost ?)\n",
1.2 timbl 1067: HTNewsHost);
2.15 luotonen 1068: return HTLoadError(request, 500, message);
2.21 frystyk 1069: #endif /* OLD_CODE */
1070: {
1071: char *unescaped = NULL;
1072: StrAllocCopy(unescaped, arg);
1073: HTUnEscape(unescaped);
1074: HTErrorAdd(request, ERR_FATAL, NO, HTERR_INTERNAL,
1075: (void *) unescaped,
1076: (int) strlen(unescaped), "HTLoadNews");
1077: free(unescaped);
1078: return -1;
1079: }
1.1 timbl 1080: } else {
1081: if (TRACE) fprintf(stderr, "HTNews: Connected to news host %s.\n",
1.2 timbl 1082: HTNewsHost);
2.11 timbl 1083: isoc = HTInputSocket_new(s); /* set up buffering */
1.1 timbl 1084: if ((response(NULL) / 100) !=2) {
2.21 frystyk 1085: int length = strlen(response_text);
1086: NETCLOSE(s);
1087: HTInputSocket_free(isoc);
1088: s = -1;
1089: HTErrorAdd(request, ERR_FATAL, NO, HTERR_NEWS_SERVER,
1090: (void *) response_text, length < 50 ?
1091: length : 50, "HTLoadNews");
1092: HTErrorAdd(request, ERR_FATAL, NO, HTERR_INTERNAL,
1093: (void *) HTNewsHost,
1094: (int) strlen(HTNewsHost), "HTLoadNews");
1095: return -1;
1096: }
1097: #ifdef OLD_CODE
2.8 timbl 1098: char message[BIG];
1099: sprintf(message,
1100: "Can't read news info. News host %.20s responded: %.200s",
1101: HTNewsHost, response_text);
2.15 luotonen 1102: return HTLoadError(request, 500, message);
2.21 frystyk 1103: }
1104: #endif /* OLD_CODE */
1.1 timbl 1105: }
1106: } /* If needed opening */
1107:
1.2 timbl 1108: /* @@@@@@@@@@@@@@Tell user something's happening */
1109:
1.1 timbl 1110: status = response(command);
1111: if (status<0) break;
2.19 luotonen 1112: if (status >= 411 && status <= 430) break; /* no such article/group */
1.1 timbl 1113: if ((status/ 100) !=2) {
2.8 timbl 1114: HTProgress(response_text);
1.1 timbl 1115: /* NXRunAlertPanel("News access", response_text,
1116: NULL,NULL,NULL);
1117: */
1118: NETCLOSE(s);
2.11 timbl 1119: HTInputSocket_free(isoc);
1.1 timbl 1120: s = -1;
1121: /* return HT; -- no:the message might be "Timeout-disconnected" left over */
1122: continue; /* Try again */
1123: }
1124:
1125: /* Load a group, article, etc
1126: */
1.2 timbl 1127:
1.1 timbl 1128:
1129: if (list_wanted) read_list();
1130: else if (group_wanted) read_group(groupName, first, last);
1131: else read_article();
1132:
2.6 timbl 1133: (*targetClass.free)(target);
1.2 timbl 1134: return HT_LOADED;
1.1 timbl 1135:
1136: } /* Retry loop */
1137:
1.2 timbl 1138:
2.8 timbl 1139: /* HTAlert("Sorry, could not load requested news.\n"); */
1140:
1.1 timbl 1141: /* NXRunAlertPanel(NULL, "Sorry, could not load `%s'.",
1142: NULL,NULL,NULL, arg);No -- message earlier wil have covered it */
1143:
2.19 luotonen 1144: (*targetClass.free)(target); /* AL May 2, 1994 */
1.2 timbl 1145: return HT_LOADED;
1.1 timbl 1146: }
1147:
2.10 timbl 1148: GLOBALDEF PUBLIC HTProtocol HTNews = { "news", HTLoadNews, NULL, NULL};
Webmaster