Annotation of libwww/Library/src/HTNews.c, revision 2.26
2.26 ! frystyk 1: /* HTNews.c
! 2: ** NEWS ACCESS
! 3: **
! 4: ** (c) COPYRIGHT CERN 1994.
! 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
7: ** History:
8: ** 26 Sep 90 Written TBL
9: ** 29 Nov 91 Downgraded to C, for portable implementation.
2.19 luotonen 10: ** 16 Feb 94 AL Added Lou Montulli's Lynx & LIST NEWSGROUPS diffs.
11: ** 2 May 94 AL Added HTUnEscape() to HTLoadNews(), and
12: ** fixed a possible security hole when the URL contains
13: ** a newline, that could cause multiple commands to be
14: ** sent to an NNTP server.
2.23 duns 15: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.1 timbl 16: */
1.2 timbl 17: /* Implements:
18: */
19: #include "HTNews.h"
1.1 timbl 20:
1.3 timbl 21: #define CR FROMASCII('\015') /* Must be converted to ^M for transmission */
22: #define LF FROMASCII('\012') /* Must be converted to ^J for transmission */
23:
1.1 timbl 24: #define NEWS_PORT 119 /* See rfc977 */
25: #define APPEND /* Use append methods */
26: #define MAX_CHUNK 40 /* Largest number of articles in one window */
27: #define CHUNK_SIZE 20 /* Number of articles for quick display */
28:
29: #ifndef DEFAULT_NEWS_HOST
30: #define DEFAULT_NEWS_HOST "news"
31: #endif
32: #ifndef SERVER_FILE
33: #define SERVER_FILE "/usr/local/lib/rn/server"
34: #endif
35:
36: #include <ctype.h>
37: #include "HTUtils.h" /* Coding convention macros */
38: #include "tcp.h"
39:
1.2 timbl 40: #include "HTML.h"
1.1 timbl 41: #include "HTParse.h"
42: #include "HTFormat.h"
2.8 timbl 43: #include "HTAlert.h"
2.21 frystyk 44: #include "HTError.h"
1.1 timbl 45:
2.8 timbl 46: #define BIG 1024 /* @@@ */
47:
1.2 timbl 48: struct _HTStructured {
49: CONST HTStructuredClass * isa;
50: /* ... */
51: };
52:
2.7 timbl 53: #define NEWS_PROGRESS(foo) HTProgress(foo)
1.1 timbl 54:
55:
2.12 timbl 56: #define NEXT_CHAR HTInputSocket_getCharacter(isoc)
1.1 timbl 57: #define LINE_LENGTH 512 /* Maximum length of line of ARTICLE etc */
58: #define GROUP_NAME_LENGTH 256 /* Maximum length of group name */
59:
60:
61: /* Module-wide variables
62: */
1.2 timbl 63: PUBLIC char * HTNewsHost;
1.1 timbl 64: PRIVATE struct sockaddr_in soc_address; /* Binary network address */
65: PRIVATE int s; /* Socket for NewsHost */
66: PRIVATE char response_text[LINE_LENGTH+1]; /* Last response */
1.2 timbl 67: /* PRIVATE HText * HT; */ /* the new hypertext */
68: PRIVATE HTStructured * target; /* The output sink */
69: PRIVATE HTStructuredClass targetClass; /* Copy of fn addresses */
1.1 timbl 70: PRIVATE HTParentAnchor *node_anchor; /* Its anchor */
71: PRIVATE int diagnostic; /* level: 0=none 2=source */
72:
1.2 timbl 73:
74: #define PUTC(c) (*targetClass.put_character)(target, c)
75: #define PUTS(s) (*targetClass.put_string)(target, s)
76: #define START(e) (*targetClass.start_element)(target, e, 0, 0)
77: #define END(e) (*targetClass.end_element)(target, e)
78:
2.11 timbl 79: PUBLIC HTInputSocket *isoc; /* @@@ non-reentrant */
80:
1.2 timbl 81: PUBLIC CONST char * HTGetNewsHost NOARGS
82: {
83: return HTNewsHost;
84: }
1.1 timbl 85:
1.2 timbl 86: PUBLIC void HTSetNewsHost ARGS1(CONST char *, value)
87: {
88: StrAllocCopy(HTNewsHost, value);
89: }
1.1 timbl 90:
91: /* Initialisation for this module
92: ** ------------------------------
93: **
94: ** Except on the NeXT, we pick up the NewsHost name from
95: **
96: ** 1. Environment variable NNTPSERVER
97: ** 2. File SERVER_FILE
98: ** 3. Compilation time macro DEFAULT_NEWS_HOST
99: **
100: ** On the NeXT, we pick up the NewsHost name from, in order:
101: **
102: ** 1. WorldWideWeb default "NewsHost"
103: ** 2. Global default "NewsHost"
104: ** 3. News default "NewsHost"
105: ** 4. Compilation time macro DEFAULT_NEWS_HOST
106: */
107: PRIVATE BOOL initialized = NO;
108: PRIVATE BOOL initialize NOARGS
109: {
110: CONST struct hostent *phost; /* Pointer to host - See netdb.h */
111: struct sockaddr_in* sin = &soc_address;
112:
113:
114: /* Set up defaults:
115: */
116: sin->sin_family = AF_INET; /* Family = internet, host order */
117: sin->sin_port = htons(NEWS_PORT); /* Default: new port, */
118:
119: /* Get name of Host
120: */
121: #ifdef NeXTStep
1.2 timbl 122: if ((HTNewsHost = NXGetDefaultValue("WorldWideWeb","NewsHost"))==0)
123: if ((HTNewsHost = NXGetDefaultValue("News","NewsHost")) == 0)
124: HTNewsHost = DEFAULT_NEWS_HOST;
1.1 timbl 125: #else
126: if (getenv("NNTPSERVER")) {
1.2 timbl 127: StrAllocCopy(HTNewsHost, (char *)getenv("NNTPSERVER"));
1.1 timbl 128: if (TRACE) fprintf(stderr, "HTNews: NNTPSERVER defined as `%s'\n",
1.2 timbl 129: HTNewsHost);
1.1 timbl 130: } else {
131: char server_name[256];
132: FILE* fp = fopen(SERVER_FILE, "r");
133: if (fp) {
134: if (fscanf(fp, "%s", server_name)==1) {
1.2 timbl 135: StrAllocCopy(HTNewsHost, server_name);
1.1 timbl 136: if (TRACE) fprintf(stderr,
137: "HTNews: File %s defines news host as `%s'\n",
1.2 timbl 138: SERVER_FILE, HTNewsHost);
1.1 timbl 139: }
140: fclose(fp);
141: }
142: }
1.2 timbl 143: if (!HTNewsHost) HTNewsHost = DEFAULT_NEWS_HOST;
1.1 timbl 144: #endif
145:
1.2 timbl 146: if (*HTNewsHost>='0' && *HTNewsHost<='9') { /* Numeric node address: */
147: sin->sin_addr.s_addr = inet_addr((char *)HTNewsHost); /* See arpa/inet.h */
1.1 timbl 148:
149: } else { /* Alphanumeric node name: */
1.2 timbl 150: phost=gethostbyname((char*)HTNewsHost); /* See netdb.h */
1.1 timbl 151: if (!phost) {
2.7 timbl 152: char message[150]; /* @@@ */
153: sprintf(message,
154: "HTNews: Can't find news host `%s'.\n%s",HTNewsHost,
155: "Please define your NNTP server");
156: HTAlert(message);
1.1 timbl 157: CTRACE(tfp,
1.2 timbl 158: "HTNews: Can't find news host `%s'.\n",HTNewsHost);
1.1 timbl 159: return NO; /* Fail */
160: }
161: memcpy(&sin->sin_addr, phost->h_addr, phost->h_length);
162: }
163:
164: if (TRACE) fprintf(stderr,
165: "HTNews: Parsed address as port %4x, inet %d.%d.%d.%d\n",
166: (unsigned int)ntohs(sin->sin_port),
167: (int)*((unsigned char *)(&sin->sin_addr)+0),
168: (int)*((unsigned char *)(&sin->sin_addr)+1),
169: (int)*((unsigned char *)(&sin->sin_addr)+2),
170: (int)*((unsigned char *)(&sin->sin_addr)+3));
171:
172: s = -1; /* Disconnected */
173:
174: return YES;
175: }
176:
177:
178:
179: /* Send NNTP Command line to remote host & Check Response
180: ** ------------------------------------------------------
181: **
182: ** On entry,
183: ** command points to the command to be sent, including CRLF, or is null
184: ** pointer if no command to be sent.
185: ** On exit,
186: ** Negative status indicates transmission error, socket closed.
187: ** Positive status is an NNTP status.
188: */
189:
190:
191: PRIVATE int response ARGS1(CONST char *,command)
192: {
193: int result;
194: char * p = response_text;
195: if (command) {
196: int status;
197: int length = strlen(command);
198: if (TRACE) fprintf(stderr, "NNTP command to be sent: %s", command);
199: #ifdef NOT_ASCII
200: {
201: CONST char * p;
202: char * q;
203: char ascii[LINE_LENGTH+1];
204: for(p = command, q=ascii; *p; p++, q++) {
205: *q = TOASCII(*p);
206: }
207: status = NETWRITE(s, ascii, length);
208: }
209: #else
210: status = NETWRITE(s, command, length);
211: #endif
212: if (status<0){
213: if (TRACE) fprintf(stderr,
214: "HTNews: Unable to send command. Disconnecting.\n");
215: NETCLOSE(s);
2.11 timbl 216: HTInputSocket_free(isoc);
1.1 timbl 217: s = -1;
218: return status;
219: } /* if bad status */
220: } /* if command to be sent */
221:
222: for(;;) {
1.3 timbl 223: if (((*p++=NEXT_CHAR) == LF)
224: || (p == &response_text[LINE_LENGTH])) {
1.1 timbl 225: *p++=0; /* Terminate the string */
226: if (TRACE) fprintf(stderr, "NNTP Response: %s\n", response_text);
227: sscanf(response_text, "%d", &result);
2.19 luotonen 228: if (result >= 411 && result <= 430) { /* no such article/group */
229: char * msg = strchr(response_text,' ');
230: if (!msg) msg = response_text;
231: PUTS("<H1>News error</H1>\n");
232: PUTS(msg);
233: CTRACE(stderr, "News error.. %s", response_text);
234: }
1.1 timbl 235: return result;
236: } /* if end of line */
237:
238: if (*(p-1) < 0) {
239: if (TRACE) fprintf(stderr,
240: "HTNews: EOF on read, closing socket %d\n", s);
241: NETCLOSE(s); /* End of file, close socket */
2.11 timbl 242: HTInputSocket_free(isoc);
1.1 timbl 243: return s = -1; /* End of file on response */
244: }
245: } /* Loop over characters */
246: }
247:
248:
249: /* Case insensitive string comparisons
250: ** -----------------------------------
251: **
252: ** On entry,
253: ** template must be already un upper case.
254: ** unknown may be in upper or lower or mixed case to match.
255: */
2.24 frystyk 256: PRIVATE BOOL match ARGS2 (CONST char *,unknown, CONST char *,tmplate)
1.1 timbl 257: {
258: CONST char * u = unknown;
2.24 frystyk 259: CONST char * t = tmplate;
1.1 timbl 260: for (;*u && *t && (TOUPPER(*u)==*t); u++, t++) /* Find mismatch or end */ ;
261: return (BOOL)(*t==0); /* OK if end of template */
262: }
263:
264: /* Find Author's name in mail address
265: ** ----------------------------------
266: **
267: ** On exit,
268: ** THE EMAIL ADDRESS IS CORRUPTED
269: **
270: ** For example, returns "Tim Berners-Lee" if given any of
271: ** " Tim Berners-Lee <tim@online.cern.ch> "
272: ** or " tim@online.cern.ch ( Tim Berners-Lee ) "
273: */
274: PRIVATE char * author_name ARGS1 (char *,email)
275: {
276: char *s, *e;
277:
278: if ((s=strchr(email,'(')) && (e=strchr(email, ')')))
279: if (e>s) {
280: *e=0; /* Chop off everything after the ')' */
281: return HTStrip(s+1); /* Remove leading and trailing spaces */
282: }
283:
284: if ((s=strchr(email,'<')) && (e=strchr(email, '>')))
285: if (e>s) {
286: strcpy(s, e+1); /* Remove <...> */
287: return HTStrip(email); /* Remove leading and trailing spaces */
288: }
289:
290: return HTStrip(email); /* Default to the whole thing */
291:
292: }
293:
1.2 timbl 294: /* Start anchor element
295: ** --------------------
296: */
297: PRIVATE void start_anchor ARGS1(CONST char *, href)
298: {
299: BOOL present[HTML_A_ATTRIBUTES];
300: CONST char* value[HTML_A_ATTRIBUTES];
301:
302: {
303: int i;
304: for(i=0; i<HTML_A_ATTRIBUTES; i++)
305: present[i] = (i==HTML_A_HREF);
306: }
307: value[HTML_A_HREF] = href;
308: (*targetClass.start_element)(target, HTML_A , present, value);
309:
310: }
1.1 timbl 311:
2.16 luotonen 312:
313: /* Start link element
314: ** --------------------
315: */
316: PRIVATE void start_link ARGS2(CONST char *, href, CONST char *, rev)
317: {
318: #ifdef WHEN_WE_HAVE_HTMLPLUS
319:
320: BOOL present[HTML_LINK_ATTRIBUTES];
321: CONST char* value[HTML_LINK_ATTRIBUTES];
322:
323: {
324: int i;
325: for(i=0; i<HTML_LINK_ATTRIBUTES; i++)
326: present[i] = (i==HTML_LINK_HREF || i==HTML_LINK_REV);
327: }
328: value[HTML_LINK_HREF] = href;
329: value[HTML_LINK_REV] = rev;
330: (*targetClass.start_element)(target, HTML_LINK , present, value);
331:
332: #endif
333: }
334:
335:
336:
337:
1.1 timbl 338: /* Paste in an Anchor
339: ** ------------------
340: **
341: **
342: ** On entry,
343: ** HT has a selection of zero length at the end.
344: ** text points to the text to be put into the file, 0 terminated.
345: ** addr points to the hypertext refernce address,
346: ** terminated by white space, comma, NULL or '>'
347: */
348: PRIVATE void write_anchor ARGS2(CONST char *,text, CONST char *,addr)
349: {
350: char href[LINE_LENGTH+1];
351:
352: {
353: CONST char * p;
354: strcpy(href,"news:");
355: for(p=addr; *p && (*p!='>') && !WHITE(*p) && (*p!=','); p++);
356: strncat(href, addr, p-addr); /* Make complete hypertext reference */
357: }
358:
1.2 timbl 359: start_anchor(href);
360: PUTS(text);
361: END(HTML_A);
1.1 timbl 362: }
363:
364:
365: /* Write list of anchors
366: ** ---------------------
367: **
368: ** We take a pointer to a list of objects, and write out each,
369: ** generating an anchor for each.
370: **
371: ** On entry,
372: ** HT has a selection of zero length at the end.
373: ** text points to a comma or space separated list of addresses.
374: ** On exit,
375: ** *text is NOT any more chopped up into substrings.
376: */
377: PRIVATE void write_anchors ARGS1 (char *,text)
378: {
379: char * start = text;
380: char * end;
381: char c;
382: for (;;) {
383: for(;*start && (WHITE(*start)); start++); /* Find start */
384: if (!*start) return; /* (Done) */
385: for(end=start; *end && (*end!=' ') && (*end!=','); end++);/* Find end */
386: if (*end) end++; /* Include comma or space but not NULL */
387: c = *end;
388: *end = 0;
389: write_anchor(start, start);
2.16 luotonen 390: START(HTML_BR);
1.1 timbl 391: *end = c;
392: start = end; /* Point to next one */
393: }
394: }
395:
396: /* Abort the connection abort_socket
397: ** --------------------
398: */
399: PRIVATE void abort_socket NOARGS
400: {
401: if (TRACE) fprintf(stderr,
402: "HTNews: EOF on read, closing socket %d\n", s);
403: NETCLOSE(s); /* End of file, close socket */
2.11 timbl 404: HTInputSocket_free(isoc);
1.2 timbl 405: PUTS("Network Error: connection lost");
406: PUTC('\n');
1.1 timbl 407: s = -1; /* End of file on response */
408: return;
409: }
410:
411: /* Read in an Article read_article
412: ** ------------------
413: **
414: **
415: ** Note the termination condition of a single dot on a line by itself.
416: ** RFC 977 specifies that the line "folding" of RFC850 is not used, so we
417: ** do not handle it here.
418: **
419: ** On entry,
420: ** s Global socket number is OK
421: ** HT Global hypertext object is ready for appending text
422: */
423: PRIVATE void read_article NOARGS
424: {
425:
426: char line[LINE_LENGTH+1];
427: char *references=NULL; /* Hrefs for other articles */
428: char *newsgroups=NULL; /* Newsgroups list */
429: char *p = line;
430: BOOL done = NO;
431:
432: /* Read in the HEADer of the article:
433: **
434: ** The header fields are either ignored, or formatted and put into the
435: ** Text.
436: */
437: if (!diagnostic) {
1.2 timbl 438: (*targetClass.start_element)(target, HTML_ADDRESS, 0, 0);
1.1 timbl 439: while(!done){
440: char ch = *p++ = NEXT_CHAR;
441: if (ch==(char)EOF) {
442: abort_socket(); /* End of file, close socket */
443: return; /* End of file on response */
444: }
1.3 timbl 445: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 446: *--p=0; /* Terminate the string */
447: if (TRACE) fprintf(stderr, "H %s\n", line);
448:
449: if (line[0]=='.') {
450: if (line[1]<' ') { /* End of article? */
451: done = YES;
452: break;
453: }
454:
455: } else if (line[0]<' ') {
456: break; /* End of Header? */
2.16 luotonen 457:
1.1 timbl 458: } else if (match(line, "SUBJECT:")) {
1.2 timbl 459: END(HTML_ADDRESS);
460: START(HTML_TITLE); /** Uuugh! @@@ */
2.16 luotonen 461: PUTS(line+9);
462: END(HTML_TITLE);
463: START(HTML_H1);
1.2 timbl 464: PUTS(line+8);
2.16 luotonen 465: END(HTML_H1);
1.2 timbl 466: START(HTML_ADDRESS);
2.16 luotonen 467:
1.1 timbl 468: } else if (match(line, "DATE:")
469: || match(line, "ORGANIZATION:")) {
2.16 luotonen 470: PUTS(strchr(line,':')+2);
471: START(HTML_BR);
472:
473: } else if(match(line, "FROM:")) {
474: char * temp=0;
475: char * href=0;
476: char *cp1, *cp2;
477:
478: /* copy into temporary storage */
479: StrAllocCopy(temp, strchr(line,':')+1);
480:
481: cp1=temp;
482: while(isspace(*cp1)) cp1++;
483: /* remove space and stuff after */
484: if((cp2 = strchr(cp1,' ')) != NULL)
485: *cp2 = '\0';
486:
487: StrAllocCopy(href,"mailto:");
488: StrAllocCat(href,cp1);
489:
490: start_anchor(href);
491: PUTS("Reply to ");
492: PUTS(strchr(line,':')+1);
493: END(HTML_A);
494: START(HTML_BR);
495:
496: /* put in the owner as a link rel. as well */
497: start_link(href, "made");
498:
499: /* free of temp vars */
500: free(temp);
501: free(href);
502:
1.1 timbl 503: } else if (match(line, "NEWSGROUPS:")) {
504: StrAllocCopy(newsgroups, HTStrip(strchr(line,':')+1));
505:
506: } else if (match(line, "REFERENCES:")) {
507: StrAllocCopy(references, HTStrip(strchr(line,':')+1));
508:
509: } /* end if match */
510: p = line; /* Restart at beginning */
511: } /* if end of line */
512: } /* Loop over characters */
2.16 luotonen 513: END(HTML_ADDRESS);
1.1 timbl 514:
1.2 timbl 515: if (newsgroups || references) {
2.16 luotonen 516: START(HTML_DL);
1.2 timbl 517: if (newsgroups) {
2.16 luotonen 518: #ifdef POSTING
519: char *href=0;
520: #endif
521:
1.2 timbl 522: (*targetClass.start_element)(target, HTML_DT , 0, 0);
523: PUTS("Newsgroups:");
524: (*targetClass.start_element)(target, HTML_DD , 0, 0);
525: write_anchors(newsgroups);
2.16 luotonen 526:
527: #ifdef POSTING
528: /* make posting possible */
529: StrAllocCopy(href,"newspost:");
530: StrAllocCat(href,newsgroups);
531: START(HTML_DT);
532: start_anchor(href);
533: PUTS("Reply to newsgroup(s)");
534: END(HTML_A);
535: #endif
536:
1.2 timbl 537: free(newsgroups);
538: }
539:
540: if (references) {
541: (*targetClass.start_element)(target, HTML_DT , 0, 0);
542: PUTS("References:");
543: (*targetClass.start_element)(target, HTML_DD , 0, 0);
544: write_anchors(references);
545: free(references);
546: }
2.16 luotonen 547: #ifdef WHEN_WE_HAVE_HTMLPLUS
548: (*targetClass.end_element)(target, HTML_DLC);
549: #else
2.10 timbl 550: (*targetClass.end_element)(target, HTML_DL);
2.16 luotonen 551: #endif
1.1 timbl 552: }
1.2 timbl 553: PUTS("\n\n\n");
1.1 timbl 554:
555: }
556:
557: /* Read in the BODY of the Article:
558: */
1.2 timbl 559: (*targetClass.start_element)(target, HTML_PRE , 0, 0);
560:
1.1 timbl 561: p = line;
562: while(!done){
563: char ch = *p++ = NEXT_CHAR;
564: if (ch==(char)EOF) {
565: abort_socket(); /* End of file, close socket */
566: return; /* End of file on response */
567: }
1.3 timbl 568: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 569: *p++=0; /* Terminate the string */
570: if (TRACE) fprintf(stderr, "B %s", line);
571: if (line[0]=='.') {
572: if (line[1]<' ') { /* End of article? */
573: done = YES;
574: break;
575: } else { /* Line starts with dot */
1.2 timbl 576: PUTS(&line[1]); /* Ignore first dot */
1.1 timbl 577: }
578: } else {
579:
580: /* Normal lines are scanned for buried references to other articles.
581: ** Unfortunately, it will pick up mail addresses as well!
582: */
583: char *l = line;
584: char * p;
2.14 luotonen 585: while ((p=strchr(l, '<'))) {
1.1 timbl 586: char *q = strchr(p,'>');
587: char *at = strchr(p, '@');
588: if (q && at && at<q) {
589: char c = q[1];
590: q[1] = 0; /* chop up */
591: *p = 0;
1.2 timbl 592: PUTS(l);
1.1 timbl 593: *p = '<'; /* again */
594: *q = 0;
1.2 timbl 595: start_anchor(p+1);
1.1 timbl 596: *q = '>'; /* again */
1.2 timbl 597: PUTS(p);
598: (*targetClass.end_element)(target, HTML_A);
1.1 timbl 599: q[1] = c; /* again */
600: l=q+1;
601: } else break; /* line has unmatched <> */
602: }
1.2 timbl 603: PUTS( l); /* Last bit of the line */
1.1 timbl 604: } /* if not dot */
605: p = line; /* Restart at beginning */
606: } /* if end of line */
607: } /* Loop over characters */
1.2 timbl 608:
609: (*targetClass.end_element)(target, HTML_PRE);
1.1 timbl 610: }
611:
612:
613: /* Read in a List of Newsgroups
614: ** ----------------------------
615: */
616: /*
617: ** Note the termination condition of a single dot on a line by itself.
618: ** RFC 977 specifies that the line "folding" of RFC850 is not used, so we
619: ** do not handle it here.
620: */
621: PRIVATE void read_list NOARGS
622: {
623:
624: char line[LINE_LENGTH+1];
625: char *p;
626: BOOL done = NO;
627:
628: /* Read in the HEADer of the article:
629: **
630: ** The header fields are either ignored, or formatted and put into the
631: ** Text.
632: */
1.2 timbl 633: (*targetClass.start_element)(target, HTML_H1 , 0, 0);
634: PUTS( "Newsgroups");
635: (*targetClass.end_element)(target, HTML_PRE);
1.1 timbl 636: p = line;
2.16 luotonen 637: (*targetClass.start_element)(target, HTML_DL, 0, 0);
1.1 timbl 638: while(!done){
639: char ch = *p++ = NEXT_CHAR;
640: if (ch==(char)EOF) {
641: abort_socket(); /* End of file, close socket */
642: return; /* End of file on response */
643: }
1.3 timbl 644: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 645: *p++=0; /* Terminate the string */
646: if (TRACE) fprintf(stderr, "B %s", line);
2.16 luotonen 647: (*targetClass.start_element)(target, HTML_DT , 0, 0);
1.1 timbl 648: if (line[0]=='.') {
649: if (line[1]<' ') { /* End of article? */
650: done = YES;
651: break;
652: } else { /* Line starts with dot */
1.2 timbl 653: PUTS( &line[1]);
1.1 timbl 654: }
655: } else {
656:
657: /* Normal lines are scanned for references to newsgroups.
658: */
2.16 luotonen 659: int i=0;
660:
661: /* find whitespace if it exits */
662: for(; line[i] != '\0' && !WHITE(line[i]); i++)
663: ; /* null body */
664:
665: if(line[i] != '\0') {
666: line[i] = '\0';
667: write_anchor(line, line);
668: (*targetClass.start_element)(target, HTML_DD , 0, 0);
669: PUTS(&line[i+1]); /* put description */
670: } else {
671: write_anchor(line, line);
672: }
673:
674: #ifdef OLD_CODE
1.1 timbl 675: char group[LINE_LENGTH];
676: int first, last;
677: char postable;
678: if (sscanf(line, "%s %d %d %c", group, &first, &last, &postable)==4)
679: write_anchor(line, group);
680: else
1.2 timbl 681: PUTS(line);
2.16 luotonen 682: #endif /*OLD_CODE*/
683:
1.1 timbl 684: } /* if not dot */
685: p = line; /* Restart at beginning */
686: } /* if end of line */
687: } /* Loop over characters */
2.16 luotonen 688: (*targetClass.end_element)(target, HTML_DL);
1.1 timbl 689: }
690:
691:
692: /* Read in a Newsgroup
693: ** -------------------
694: ** Unfortunately, we have to ask for each article one by one if we
695: ** want more than one field.
696: **
697: */
698: PRIVATE void read_group ARGS3(
699: CONST char *,groupName,
700: int,first_required,
701: int,last_required
702: )
703: {
704: char line[LINE_LENGTH+1];
705: char author[LINE_LENGTH+1];
706: char subject[LINE_LENGTH+1];
707: char *p;
708: BOOL done;
709:
710: char buffer[LINE_LENGTH];
711: char *reference=0; /* Href for article */
712: int art; /* Article number WITHIN GROUP */
713: int status, count, first, last; /* Response fields */
714: /* count is only an upper limit */
715:
716: sscanf(response_text, " %d %d %d %d", &status, &count, &first, &last);
2.17 frystyk 717: if(TRACE)
718: fprintf(stderr,
719: "Newsgroup status=%d, count=%d, (%d-%d) required:(%d-%d)\n",
720: status, count, first, last, first_required, last_required);
1.1 timbl 721: if (last==0) {
1.2 timbl 722: PUTS( "\nNo articles in this group.\n");
2.16 luotonen 723: #ifdef POSTING
724: goto add_post;
725: #endif
1.1 timbl 726: return;
727: }
728:
729: #define FAST_THRESHOLD 100 /* Above this, read IDs fast */
730: #define CHOP_THRESHOLD 50 /* Above this, chop off the rest */
731:
732: if (first_required<first) first_required = first; /* clip */
733: if ((last_required==0) || (last_required > last)) last_required = last;
734:
735: if (last_required<=first_required) {
1.2 timbl 736: PUTS( "\nNo articles in this range.\n");
2.16 luotonen 737: #ifdef POSTING
738: goto add_post;
739: #endif
1.1 timbl 740: return;
741: }
742:
743: if (last_required-first_required+1 > MAX_CHUNK) { /* Trim this block */
744: first_required = last_required-CHUNK_SIZE+1;
745: }
2.18 frystyk 746: if (TRACE) fprintf (stderr, " Chunk will be (%d-%d)\n",
2.16 luotonen 747: first_required, last_required);
1.1 timbl 748:
1.2 timbl 749: /* Set window title
750: */
751: sprintf(buffer, "Newsgroup %s, Articles %d-%d",
752: groupName, first_required, last_required);
753: START(HTML_TITLE);
754: PUTS(buffer);
755: END(HTML_TITLE);
756:
1.1 timbl 757: /* Link to earlier articles
758: */
759: if (first_required>first) {
760: int before; /* Start of one before */
761: if (first_required-MAX_CHUNK <= first) before = first;
762: else before = first_required-CHUNK_SIZE;
763: sprintf(buffer, "%s/%d-%d", groupName, before, first_required-1);
764: if (TRACE) fprintf(stderr, " Block before is %s\n", buffer);
1.2 timbl 765: PUTS( " (");
766: start_anchor(buffer);
767: PUTS("Earlier articles");
768: END(HTML_A);
769: PUTS( "...)\n");
1.1 timbl 770: }
771:
772: done = NO;
773:
774: /*#define USE_XHDR*/
775: #ifdef USE_XHDR
776: if (count>FAST_THRESHOLD) {
777: sprintf(buffer,
778: "\nThere are about %d articles currently available in %s, IDs as follows:\n\n",
779: count, groupName);
1.2 timbl 780: PUTS(buffer);
1.3 timbl 781: sprintf(buffer, "XHDR Message-ID %d-%d%c%c", first, last, CR, LF);
1.1 timbl 782: status = response(buffer);
783: if (status==221) {
784:
785: p = line;
786: while(!done){
787: char ch = *p++ = NEXT_CHAR;
788: if (ch==(char)EOF) {
789: abort_socket(); /* End of file, close socket */
790: return; /* End of file on response */
791: }
792: if ((ch == '\n') || (p == &line[LINE_LENGTH])) {
793: *p++=0; /* Terminate the string */
794: if (TRACE) fprintf(stderr, "X %s", line);
795: if (line[0]=='.') {
796: if (line[1]<' ') { /* End of article? */
797: done = YES;
798: break;
799: } else { /* Line starts with dot */
800: /* Ignore strange line */
801: }
802: } else {
803:
804: /* Normal lines are scanned for references to articles.
805: */
806: char * space = strchr(line, ' ');
807: if (space++)
808: write_anchor(space, space);
809: } /* if not dot */
810: p = line; /* Restart at beginning */
811: } /* if end of line */
812: } /* Loop over characters */
813:
814: /* leaving loop with "done" set */
815: } /* Good status */
816: };
817: #endif
818:
819: /* Read newsgroup using individual fields:
820: */
821: if (!done) {
822: if (first==first_required && last==last_required)
1.2 timbl 823: PUTS("\nAll available articles in ");
824: else PUTS( "\nArticles in ");
825: PUTS(groupName);
826: START(HTML_MENU);
1.1 timbl 827: for(art=first_required; art<=last_required; art++) {
828:
829: /*#define OVERLAP*/
830: #ifdef OVERLAP
831: /* With this code we try to keep the server running flat out by queuing just
832: ** one extra command ahead of time. We assume (1) that the server won't abort
833: ** if it gets input during output, and (2) that TCP buffering is enough for the
834: ** two commands. Both these assumptions seem very reasonable. However, we HAVE
835: ** had a hangup with a loaded server.
836: */
837: if (art==first_required) {
838: if (art==last_required) {
1.3 timbl 839: sprintf(buffer, "HEAD %d%c%c", art, CR, LF); /* Only one */
1.1 timbl 840: status = response(buffer);
841: } else { /* First of many */
1.3 timbl 842: sprintf(buffer, "HEAD %d%c%cHEAD %d%c%c",
843: art, CR, LF, art+1, CR, LF);
1.1 timbl 844: status = response(buffer);
845: }
846: } else if (art==last_required) { /* Last of many */
847: status = response(NULL);
848: } else { /* Middle of many */
1.3 timbl 849: sprintf(buffer, "HEAD %d%c%c", art+1, CR, LF);
1.1 timbl 850: status = response(buffer);
851: }
852:
853: #else /* NOT OVERLAP */
1.3 timbl 854: sprintf(buffer, "HEAD %d%c%c", art, CR, LF);
1.1 timbl 855: status = response(buffer);
856: #endif /* NOT OVERLAP */
857:
858: if (status == 221) { /* Head follows - parse it:*/
2.20 frystyk 859: int ch;
1.1 timbl 860: p = line; /* Write pointer */
861: done = NO;
862: while(!done){
2.20 frystyk 863: if ((ch = HTInputSocket_getCharacter(isoc)) < 0) {
1.1 timbl 864: abort_socket(); /* End of file, close socket */
865: return; /* End of file on response */
866: }
2.20 frystyk 867: *p++ = (unsigned char) ch;
1.3 timbl 868: if ((ch == LF)
1.1 timbl 869: || (p == &line[LINE_LENGTH]) ) {
870:
871: *--p=0; /* Terminate & chop LF*/
872: p = line; /* Restart at beginning */
873: if (TRACE) fprintf(stderr, "G %s\n", line);
874: switch(line[0]) {
875:
876: case '.':
877: done = (line[1]<' '); /* End of article? */
878: break;
879:
880: case 'S':
881: case 's':
882: if (match(line, "SUBJECT:"))
883: strcpy(subject, line+9);/* Save subject */
884: break;
885:
886: case 'M':
887: case 'm':
888: if (match(line, "MESSAGE-ID:")) {
889: char * addr = HTStrip(line+11) +1; /* Chop < */
890: addr[strlen(addr)-1]=0; /* Chop > */
891: StrAllocCopy(reference, addr);
892: }
893: break;
894:
895: case 'f':
896: case 'F':
897: if (match(line, "FROM:")) {
898: char * p;
899: strcpy(author,
900: author_name(strchr(line,':')+1));
2.17 frystyk 901: if (*author) { /* Not always there! */
902: p = author + strlen(author) - 1;
903: if (*p==LF) *p = 0; /* Chop off newline */
904: }
1.1 timbl 905: }
906: break;
907:
908: } /* end switch on first character */
909: } /* if end of line */
910: } /* Loop over characters */
911:
1.2 timbl 912: START(HTML_LI);
1.1 timbl 913: sprintf(buffer, "\"%s\" - %s", subject, author);
914: if (reference) {
915: write_anchor(buffer, reference);
916: free(reference);
917: reference=0;
918: } else {
1.2 timbl 919: PUTS(buffer);
1.1 timbl 920: }
921:
922:
1.2 timbl 923: /* indicate progress! @@@@@@
1.1 timbl 924: */
925:
926: } /* If good response */
927: } /* Loop over article */
928: } /* If read headers */
1.2 timbl 929: END(HTML_MENU);
930: START(HTML_P);
1.1 timbl 931:
932: /* Link to later articles
933: */
934: if (last_required<last) {
935: int after; /* End of article after */
936: after = last_required+CHUNK_SIZE;
937: if (after==last) sprintf(buffer, "news:%s", groupName); /* original group */
938: else sprintf(buffer, "news:%s/%d-%d", groupName, last_required+1, after);
939: if (TRACE) fprintf(stderr, " Block after is %s\n", buffer);
1.2 timbl 940: PUTS( "(");
941: start_anchor(buffer);
942: PUTS( "Later articles");
943: END(HTML_A);
944: PUTS( "...)\n");
1.1 timbl 945: }
2.16 luotonen 946:
2.19 luotonen 947: #ifdef POSTING
948: add_post:
949: #endif
2.16 luotonen 950: {
951: char *href=0;
952: START(HTML_HR);
953:
954: StrAllocCopy(href,"newspost:");
955: StrAllocCat(href,groupName);
956: start_anchor(href);
957: PUTS("Post to ");
958: PUTS(groupName);
959: END(HTML_A);
960:
961: free(href);
962: }
1.1 timbl 963:
964:
965: }
966:
967:
968: /* Load by name HTLoadNews
969: ** ============
970: */
2.13 timbl 971: PUBLIC int HTLoadNews ARGS1(HTRequest *, request)
1.1 timbl 972: {
2.19 luotonen 973: char * arg = HTAnchor_physical(request->anchor);
1.1 timbl 974: char command[257]; /* The whole command */
975: char groupName[GROUP_NAME_LENGTH]; /* Just the group name */
976: int status; /* tcp return */
977: int retries; /* A count of how hard we have tried */
978: BOOL group_wanted; /* Flag: group was asked for, not article */
979: BOOL list_wanted; /* Flag: group was asked for, not article */
980: int first, last; /* First and last articles asked for */
981:
2.10 timbl 982: diagnostic = (request->output_format == WWW_SOURCE); /* set global flag */
1.1 timbl 983:
984: if (TRACE) fprintf(stderr, "HTNews: Looking for %s\n", arg);
985:
986: if (!initialized) initialized = initialize();
987: if (!initialized) return -1; /* FAIL */
988:
989: {
2.19 luotonen 990: char * p1=arg;
1.1 timbl 991:
992: /* We will ask for the document, omitting the host name & anchor.
993: **
994: ** Syntax of address is
995: ** xxx@yyy Article
996: ** <xxx@yyy> Same article
997: ** xxxxx News group (no "@")
998: ** group/n1-n2 Articles n1 to n2 in group
999: */
1000: group_wanted = (strchr(arg, '@')==0) && (strchr(arg, '*')==0);
1001: list_wanted = (strchr(arg, '@')==0) && (strchr(arg, '*')!=0);
1002:
1003: /* p1 = HTParse(arg, "", PARSE_PATH | PARSE_PUNCTUATION); */
1004: /* Don't use HTParse because news: access doesn't follow traditional
1005: rules. For instance, if the article reference contains a '#',
1006: the rest of it is lost -- JFG 10/7/92, from a bug report */
1007: if (!strncasecomp (arg, "news:", 5))
1008: p1 = arg + 5; /* Skip "news:" prefix */
2.19 luotonen 1009: HTUnEscape(p1); /* AL May 2, 1994 */
1010: HTCleanTelnetString(p1); /* Prevent security holes */
1.1 timbl 1011: if (list_wanted) {
2.16 luotonen 1012: strcpy(command, "LIST NEWSGROUPS");
1.1 timbl 1013: } else if (group_wanted) {
1014: char * slash = strchr(p1, '/');
1015: strcpy(command, "GROUP ");
1016: first = 0;
1017: last = 0;
1018: if (slash) {
1019: *slash = 0;
1020: strcpy(groupName, p1);
1021: *slash = '/';
1022: (void) sscanf(slash+1, "%d-%d", &first, &last);
1023: } else {
1024: strcpy(groupName, p1);
1025: }
1026: strcat(command, groupName);
1027: } else {
1028: strcpy(command, "ARTICLE ");
1029: if (strchr(p1, '<')==0) strcat(command,"<");
1030: strcat(command, p1);
1031: if (strchr(p1, '>')==0) strcat(command,">");
1032: }
1033:
1.3 timbl 1034: {
1035: char * p = command + strlen(command);
1036: *p++ = CR; /* Macros to be correct on Mac */
1037: *p++ = LF;
1038: *p++ = 0;
1039: /* strcat(command, "\r\n"); */ /* CR LF, as in rfc 977 */
1040: }
1.1 timbl 1041: } /* scope of p1 */
1042:
1043: if (!*arg) return NO; /* Ignore if no name */
1044:
1045:
1046: /* Make a hypertext object with an anchor list.
1047: */
2.10 timbl 1048: node_anchor = request->anchor;
2.11 timbl 1049: target = HTML_new(request, NULL, WWW_HTML,
1050: request->output_format, request->output_stream);
1.2 timbl 1051: targetClass = *target->isa; /* Copy routine entry points */
1052:
1.1 timbl 1053:
1054: /* Now, let's get a stream setup up from the NewsHost:
1055: */
1056: for(retries=0;retries<2; retries++){
1057:
1058: if (s<0) {
1059: NEWS_PROGRESS("Connecting to NewsHost ...");
1060: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
1061: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
1062: if (status<0){
1063: NETCLOSE(s);
1064: s = -1;
2.21 frystyk 1065: #ifdef OLD_CODE
1066: char message[256];
1.1 timbl 1067: if (TRACE) fprintf(stderr, "HTNews: Unable to connect to news host.\n");
1068: /* if (retries<=1) continue; WHY TRY AGAIN ? */
1069: sprintf(message,
1070: "\nCould not access %s.\n\n (Check default WorldWideWeb NewsHost ?)\n",
1.2 timbl 1071: HTNewsHost);
2.15 luotonen 1072: return HTLoadError(request, 500, message);
2.21 frystyk 1073: #endif /* OLD_CODE */
1074: {
1075: char *unescaped = NULL;
1076: StrAllocCopy(unescaped, arg);
1077: HTUnEscape(unescaped);
1078: HTErrorAdd(request, ERR_FATAL, NO, HTERR_INTERNAL,
1079: (void *) unescaped,
1080: (int) strlen(unescaped), "HTLoadNews");
1081: free(unescaped);
1082: return -1;
1083: }
1.1 timbl 1084: } else {
1085: if (TRACE) fprintf(stderr, "HTNews: Connected to news host %s.\n",
1.2 timbl 1086: HTNewsHost);
2.11 timbl 1087: isoc = HTInputSocket_new(s); /* set up buffering */
1.1 timbl 1088: if ((response(NULL) / 100) !=2) {
2.21 frystyk 1089: int length = strlen(response_text);
1090: NETCLOSE(s);
1091: HTInputSocket_free(isoc);
1092: s = -1;
1093: HTErrorAdd(request, ERR_FATAL, NO, HTERR_NEWS_SERVER,
1094: (void *) response_text, length < 50 ?
1095: length : 50, "HTLoadNews");
1096: HTErrorAdd(request, ERR_FATAL, NO, HTERR_INTERNAL,
1097: (void *) HTNewsHost,
1098: (int) strlen(HTNewsHost), "HTLoadNews");
1099: return -1;
1100: }
1101: #ifdef OLD_CODE
2.8 timbl 1102: char message[BIG];
1103: sprintf(message,
1104: "Can't read news info. News host %.20s responded: %.200s",
1105: HTNewsHost, response_text);
2.15 luotonen 1106: return HTLoadError(request, 500, message);
2.21 frystyk 1107: }
1108: #endif /* OLD_CODE */
1.1 timbl 1109: }
1110: } /* If needed opening */
1111:
1.2 timbl 1112: /* @@@@@@@@@@@@@@Tell user something's happening */
1113:
1.1 timbl 1114: status = response(command);
1115: if (status<0) break;
2.19 luotonen 1116: if (status >= 411 && status <= 430) break; /* no such article/group */
1.1 timbl 1117: if ((status/ 100) !=2) {
2.8 timbl 1118: HTProgress(response_text);
1.1 timbl 1119: /* NXRunAlertPanel("News access", response_text,
1120: NULL,NULL,NULL);
1121: */
1122: NETCLOSE(s);
2.11 timbl 1123: HTInputSocket_free(isoc);
1.1 timbl 1124: s = -1;
1125: /* return HT; -- no:the message might be "Timeout-disconnected" left over */
1126: continue; /* Try again */
1127: }
1128:
1129: /* Load a group, article, etc
1130: */
1.2 timbl 1131:
1.1 timbl 1132:
1133: if (list_wanted) read_list();
1134: else if (group_wanted) read_group(groupName, first, last);
1135: else read_article();
1136:
2.23 duns 1137: (*targetClass._free)(target);
1.2 timbl 1138: return HT_LOADED;
1.1 timbl 1139:
1140: } /* Retry loop */
1141:
1.2 timbl 1142:
2.8 timbl 1143: /* HTAlert("Sorry, could not load requested news.\n"); */
1144:
1.1 timbl 1145: /* NXRunAlertPanel(NULL, "Sorry, could not load `%s'.",
1146: NULL,NULL,NULL, arg);No -- message earlier wil have covered it */
1147:
2.23 duns 1148: (*targetClass._free)(target); /* AL May 2, 1994 */
1.2 timbl 1149: return HT_LOADED;
1.1 timbl 1150: }
1151:
2.25 frystyk 1152: GLOBALDEF PUBLIC HTProtocol HTNews = {
1153: "news", SOC_BLOCK, HTLoadNews, NULL, NULL
1154: };
Webmaster