Annotation of libwww/Library/src/HTNews.c, revision 2.6
1.1 timbl 1: /* NEWS ACCESS HTNews.c
2: ** ===========
3: **
4: ** History:
5: ** 26 Sep 90 Written TBL
6: ** 29 Nov 91 Downgraded to C, for portable implementation.
7: */
1.2 timbl 8: /* Implements:
9: */
10: #include "HTNews.h"
1.1 timbl 11:
1.3 timbl 12: #define CR FROMASCII('\015') /* Must be converted to ^M for transmission */
13: #define LF FROMASCII('\012') /* Must be converted to ^J for transmission */
14:
1.1 timbl 15: #define NEWS_PORT 119 /* See rfc977 */
16: #define APPEND /* Use append methods */
17: #define MAX_CHUNK 40 /* Largest number of articles in one window */
18: #define CHUNK_SIZE 20 /* Number of articles for quick display */
19:
20: #ifndef DEFAULT_NEWS_HOST
21: #define DEFAULT_NEWS_HOST "news"
22: #endif
23: #ifndef SERVER_FILE
24: #define SERVER_FILE "/usr/local/lib/rn/server"
25: #endif
26:
27: #include <ctype.h>
28: #include "HTUtils.h" /* Coding convention macros */
29: #include "tcp.h"
30:
1.2 timbl 31: #include "HTML.h"
1.1 timbl 32: #include "HTParse.h"
33: #include "HTFormat.h"
34:
1.2 timbl 35: struct _HTStructured {
36: CONST HTStructuredClass * isa;
37: /* ... */
38: };
39:
1.1 timbl 40: #ifdef NeXTStep
41: #include <appkit/defaults.h>
42: #define NEWS_PROGRESS(foo)
43: #else
44: #define NEWS_PROGRESS(foo) fprintf(stderr, "%s\n", (foo))
45: #endif
46:
47:
48: #define NEXT_CHAR HTGetChararcter()
49: #define LINE_LENGTH 512 /* Maximum length of line of ARTICLE etc */
50: #define GROUP_NAME_LENGTH 256 /* Maximum length of group name */
51:
52:
53: /* Module-wide variables
54: */
1.2 timbl 55: PUBLIC char * HTNewsHost;
1.1 timbl 56: PRIVATE struct sockaddr_in soc_address; /* Binary network address */
57: PRIVATE int s; /* Socket for NewsHost */
58: PRIVATE char response_text[LINE_LENGTH+1]; /* Last response */
1.2 timbl 59: /* PRIVATE HText * HT; */ /* the new hypertext */
60: PRIVATE HTStructured * target; /* The output sink */
61: PRIVATE HTStructuredClass targetClass; /* Copy of fn addresses */
1.1 timbl 62: PRIVATE HTParentAnchor *node_anchor; /* Its anchor */
63: PRIVATE int diagnostic; /* level: 0=none 2=source */
64:
1.2 timbl 65:
66: #define PUTC(c) (*targetClass.put_character)(target, c)
67: #define PUTS(s) (*targetClass.put_string)(target, s)
68: #define START(e) (*targetClass.start_element)(target, e, 0, 0)
69: #define END(e) (*targetClass.end_element)(target, e)
70:
71: PUBLIC CONST char * HTGetNewsHost NOARGS
72: {
73: return HTNewsHost;
74: }
1.1 timbl 75:
1.2 timbl 76: PUBLIC void HTSetNewsHost ARGS1(CONST char *, value)
77: {
78: StrAllocCopy(HTNewsHost, value);
79: }
1.1 timbl 80:
81: /* Initialisation for this module
82: ** ------------------------------
83: **
84: ** Except on the NeXT, we pick up the NewsHost name from
85: **
86: ** 1. Environment variable NNTPSERVER
87: ** 2. File SERVER_FILE
88: ** 3. Compilation time macro DEFAULT_NEWS_HOST
89: ** 4. Default to "news"
90: **
91: ** On the NeXT, we pick up the NewsHost name from, in order:
92: **
93: ** 1. WorldWideWeb default "NewsHost"
94: ** 2. Global default "NewsHost"
95: ** 3. News default "NewsHost"
96: ** 4. Compilation time macro DEFAULT_NEWS_HOST
97: ** 5. Default to "news"
98: */
99: PRIVATE BOOL initialized = NO;
100: PRIVATE BOOL initialize NOARGS
101: {
102: CONST struct hostent *phost; /* Pointer to host - See netdb.h */
103: struct sockaddr_in* sin = &soc_address;
104:
105:
106: /* Set up defaults:
107: */
108: sin->sin_family = AF_INET; /* Family = internet, host order */
109: sin->sin_port = htons(NEWS_PORT); /* Default: new port, */
110:
111: /* Get name of Host
112: */
113: #ifdef NeXTStep
1.2 timbl 114: if ((HTNewsHost = NXGetDefaultValue("WorldWideWeb","NewsHost"))==0)
115: if ((HTNewsHost = NXGetDefaultValue("News","NewsHost")) == 0)
116: HTNewsHost = DEFAULT_NEWS_HOST;
1.1 timbl 117: #else
118: if (getenv("NNTPSERVER")) {
1.2 timbl 119: StrAllocCopy(HTNewsHost, (char *)getenv("NNTPSERVER"));
1.1 timbl 120: if (TRACE) fprintf(stderr, "HTNews: NNTPSERVER defined as `%s'\n",
1.2 timbl 121: HTNewsHost);
1.1 timbl 122: } else {
123: char server_name[256];
124: FILE* fp = fopen(SERVER_FILE, "r");
125: if (fp) {
126: if (fscanf(fp, "%s", server_name)==1) {
1.2 timbl 127: StrAllocCopy(HTNewsHost, server_name);
1.1 timbl 128: if (TRACE) fprintf(stderr,
129: "HTNews: File %s defines news host as `%s'\n",
1.2 timbl 130: SERVER_FILE, HTNewsHost);
1.1 timbl 131: }
132: fclose(fp);
133: }
134: }
1.2 timbl 135: if (!HTNewsHost) HTNewsHost = DEFAULT_NEWS_HOST;
1.1 timbl 136: #endif
137:
1.2 timbl 138: if (*HTNewsHost>='0' && *HTNewsHost<='9') { /* Numeric node address: */
139: sin->sin_addr.s_addr = inet_addr((char *)HTNewsHost); /* See arpa/inet.h */
1.1 timbl 140:
141: } else { /* Alphanumeric node name: */
1.2 timbl 142: phost=gethostbyname((char*)HTNewsHost); /* See netdb.h */
1.1 timbl 143: if (!phost) {
144: #ifdef NeXTStep
145: NXRunAlertPanel(NULL, "Can't find news host name `%s'.",
1.2 timbl 146: NULL, NULL, NULL, HTNewsHost);
1.1 timbl 147: #else
148: fprintf(stderr,
1.2 timbl 149: "HTNews: Can't find news host `%s'.\n",HTNewsHost);
1.1 timbl 150: fprintf(stderr,
151: " Please see online documentation for instructions to set the news host.\n");
152: #endif
153: CTRACE(tfp,
1.2 timbl 154: "HTNews: Can't find news host `%s'.\n",HTNewsHost);
1.1 timbl 155: return NO; /* Fail */
156: }
157: memcpy(&sin->sin_addr, phost->h_addr, phost->h_length);
158: }
159:
160: if (TRACE) fprintf(stderr,
161: "HTNews: Parsed address as port %4x, inet %d.%d.%d.%d\n",
162: (unsigned int)ntohs(sin->sin_port),
163: (int)*((unsigned char *)(&sin->sin_addr)+0),
164: (int)*((unsigned char *)(&sin->sin_addr)+1),
165: (int)*((unsigned char *)(&sin->sin_addr)+2),
166: (int)*((unsigned char *)(&sin->sin_addr)+3));
167:
168: s = -1; /* Disconnected */
169:
170: return YES;
171: }
172:
173:
174:
175: /* Send NNTP Command line to remote host & Check Response
176: ** ------------------------------------------------------
177: **
178: ** On entry,
179: ** command points to the command to be sent, including CRLF, or is null
180: ** pointer if no command to be sent.
181: ** On exit,
182: ** Negative status indicates transmission error, socket closed.
183: ** Positive status is an NNTP status.
184: */
185:
186:
187: PRIVATE int response ARGS1(CONST char *,command)
188: {
189: int result;
190: char * p = response_text;
191: if (command) {
192: int status;
193: int length = strlen(command);
194: if (TRACE) fprintf(stderr, "NNTP command to be sent: %s", command);
195: #ifdef NOT_ASCII
196: {
197: CONST char * p;
198: char * q;
199: char ascii[LINE_LENGTH+1];
200: for(p = command, q=ascii; *p; p++, q++) {
201: *q = TOASCII(*p);
202: }
203: status = NETWRITE(s, ascii, length);
204: }
205: #else
206: status = NETWRITE(s, command, length);
207: #endif
208: if (status<0){
209: if (TRACE) fprintf(stderr,
210: "HTNews: Unable to send command. Disconnecting.\n");
211: NETCLOSE(s);
212: s = -1;
213: return status;
214: } /* if bad status */
215: } /* if command to be sent */
216:
217: for(;;) {
1.3 timbl 218: if (((*p++=NEXT_CHAR) == LF)
219: || (p == &response_text[LINE_LENGTH])) {
1.1 timbl 220: *p++=0; /* Terminate the string */
221: if (TRACE) fprintf(stderr, "NNTP Response: %s\n", response_text);
222: sscanf(response_text, "%d", &result);
223: return result;
224: } /* if end of line */
225:
226: if (*(p-1) < 0) {
227: if (TRACE) fprintf(stderr,
228: "HTNews: EOF on read, closing socket %d\n", s);
229: NETCLOSE(s); /* End of file, close socket */
230: return s = -1; /* End of file on response */
231: }
232: } /* Loop over characters */
233: }
234:
235:
236: /* Case insensitive string comparisons
237: ** -----------------------------------
238: **
239: ** On entry,
240: ** template must be already un upper case.
241: ** unknown may be in upper or lower or mixed case to match.
242: */
243: PRIVATE BOOL match ARGS2 (CONST char *,unknown, CONST char *,template)
244: {
245: CONST char * u = unknown;
246: CONST char * t = template;
247: for (;*u && *t && (TOUPPER(*u)==*t); u++, t++) /* Find mismatch or end */ ;
248: return (BOOL)(*t==0); /* OK if end of template */
249: }
250:
251: /* Find Author's name in mail address
252: ** ----------------------------------
253: **
254: ** On exit,
255: ** THE EMAIL ADDRESS IS CORRUPTED
256: **
257: ** For example, returns "Tim Berners-Lee" if given any of
258: ** " Tim Berners-Lee <tim@online.cern.ch> "
259: ** or " tim@online.cern.ch ( Tim Berners-Lee ) "
260: */
261: PRIVATE char * author_name ARGS1 (char *,email)
262: {
263: char *s, *e;
264:
265: if ((s=strchr(email,'(')) && (e=strchr(email, ')')))
266: if (e>s) {
267: *e=0; /* Chop off everything after the ')' */
268: return HTStrip(s+1); /* Remove leading and trailing spaces */
269: }
270:
271: if ((s=strchr(email,'<')) && (e=strchr(email, '>')))
272: if (e>s) {
273: strcpy(s, e+1); /* Remove <...> */
274: return HTStrip(email); /* Remove leading and trailing spaces */
275: }
276:
277: return HTStrip(email); /* Default to the whole thing */
278:
279: }
280:
1.2 timbl 281: /* Start anchor element
282: ** --------------------
283: */
284: PRIVATE void start_anchor ARGS1(CONST char *, href)
285: {
286: BOOL present[HTML_A_ATTRIBUTES];
287: CONST char* value[HTML_A_ATTRIBUTES];
288:
289: {
290: int i;
291: for(i=0; i<HTML_A_ATTRIBUTES; i++)
292: present[i] = (i==HTML_A_HREF);
293: }
294: value[HTML_A_HREF] = href;
295: (*targetClass.start_element)(target, HTML_A , present, value);
296:
297: }
1.1 timbl 298:
299: /* Paste in an Anchor
300: ** ------------------
301: **
302: **
303: ** On entry,
304: ** HT has a selection of zero length at the end.
305: ** text points to the text to be put into the file, 0 terminated.
306: ** addr points to the hypertext refernce address,
307: ** terminated by white space, comma, NULL or '>'
308: */
309: PRIVATE void write_anchor ARGS2(CONST char *,text, CONST char *,addr)
310: {
311: char href[LINE_LENGTH+1];
312:
313: {
314: CONST char * p;
315: strcpy(href,"news:");
316: for(p=addr; *p && (*p!='>') && !WHITE(*p) && (*p!=','); p++);
317: strncat(href, addr, p-addr); /* Make complete hypertext reference */
318: }
319:
1.2 timbl 320: start_anchor(href);
321: PUTS(text);
322: END(HTML_A);
1.1 timbl 323: }
324:
325:
326: /* Write list of anchors
327: ** ---------------------
328: **
329: ** We take a pointer to a list of objects, and write out each,
330: ** generating an anchor for each.
331: **
332: ** On entry,
333: ** HT has a selection of zero length at the end.
334: ** text points to a comma or space separated list of addresses.
335: ** On exit,
336: ** *text is NOT any more chopped up into substrings.
337: */
338: PRIVATE void write_anchors ARGS1 (char *,text)
339: {
340: char * start = text;
341: char * end;
342: char c;
343: for (;;) {
344: for(;*start && (WHITE(*start)); start++); /* Find start */
345: if (!*start) return; /* (Done) */
346: for(end=start; *end && (*end!=' ') && (*end!=','); end++);/* Find end */
347: if (*end) end++; /* Include comma or space but not NULL */
348: c = *end;
349: *end = 0;
350: write_anchor(start, start);
351: *end = c;
352: start = end; /* Point to next one */
353: }
354: }
355:
356: /* Abort the connection abort_socket
357: ** --------------------
358: */
359: PRIVATE void abort_socket NOARGS
360: {
361: if (TRACE) fprintf(stderr,
362: "HTNews: EOF on read, closing socket %d\n", s);
363: NETCLOSE(s); /* End of file, close socket */
1.2 timbl 364: PUTS("Network Error: connection lost");
365: PUTC('\n');
1.1 timbl 366: s = -1; /* End of file on response */
367: return;
368: }
369:
370: /* Read in an Article read_article
371: ** ------------------
372: **
373: **
374: ** Note the termination condition of a single dot on a line by itself.
375: ** RFC 977 specifies that the line "folding" of RFC850 is not used, so we
376: ** do not handle it here.
377: **
378: ** On entry,
379: ** s Global socket number is OK
380: ** HT Global hypertext object is ready for appending text
381: */
382: PRIVATE void read_article NOARGS
383: {
384:
385: char line[LINE_LENGTH+1];
386: char *references=NULL; /* Hrefs for other articles */
387: char *newsgroups=NULL; /* Newsgroups list */
388: char *p = line;
389: BOOL done = NO;
390:
391: /* Read in the HEADer of the article:
392: **
393: ** The header fields are either ignored, or formatted and put into the
394: ** Text.
395: */
396: if (!diagnostic) {
1.2 timbl 397: (*targetClass.start_element)(target, HTML_ADDRESS, 0, 0);
1.1 timbl 398: while(!done){
399: char ch = *p++ = NEXT_CHAR;
400: if (ch==(char)EOF) {
401: abort_socket(); /* End of file, close socket */
402: return; /* End of file on response */
403: }
1.3 timbl 404: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 405: *--p=0; /* Terminate the string */
406: if (TRACE) fprintf(stderr, "H %s\n", line);
407:
408: if (line[0]=='.') {
409: if (line[1]<' ') { /* End of article? */
410: done = YES;
411: break;
412: }
413:
414: } else if (line[0]<' ') {
415: break; /* End of Header? */
416: } else if (match(line, "SUBJECT:")) {
1.2 timbl 417: END(HTML_ADDRESS);
418: START(HTML_TITLE); /** Uuugh! @@@ */
419: PUTS(line+8);
420: END(HTML_TITLE);
421: START(HTML_ADDRESS);
422: (*targetClass.start_element)(target, HTML_H1 , 0, 0);
423: PUTS(line+8);
424: (*targetClass.end_element)(target, HTML_H1);
425: (*targetClass.start_element)(target, HTML_ADDRESS , 0, 0);
1.1 timbl 426: } else if (match(line, "DATE:")
427: || match(line, "FROM:")
428: || match(line, "ORGANIZATION:")) {
429: strcat(line, "\n");
1.2 timbl 430: PUTS(strchr(line,':')+1);
1.1 timbl 431: } else if (match(line, "NEWSGROUPS:")) {
432: StrAllocCopy(newsgroups, HTStrip(strchr(line,':')+1));
433:
434: } else if (match(line, "REFERENCES:")) {
435: StrAllocCopy(references, HTStrip(strchr(line,':')+1));
436:
437: } /* end if match */
438: p = line; /* Restart at beginning */
439: } /* if end of line */
440: } /* Loop over characters */
1.2 timbl 441: (*targetClass.end_element)(target, HTML_ADDRESS);
1.1 timbl 442:
1.2 timbl 443: if (newsgroups || references) {
444: (*targetClass.start_element)(target, HTML_DLC , 0, 0);
445: if (newsgroups) {
446: (*targetClass.start_element)(target, HTML_DT , 0, 0);
447: PUTS("Newsgroups:");
448: (*targetClass.start_element)(target, HTML_DD , 0, 0);
449: write_anchors(newsgroups);
450: free(newsgroups);
451: }
452:
453: if (references) {
454: (*targetClass.start_element)(target, HTML_DT , 0, 0);
455: PUTS("References:");
456: (*targetClass.start_element)(target, HTML_DD , 0, 0);
457: write_anchors(references);
458: free(references);
459: }
460: (*targetClass.end_element)(target, HTML_DLC);
1.1 timbl 461: }
1.2 timbl 462: PUTS("\n\n\n");
1.1 timbl 463:
464: }
465:
466: /* Read in the BODY of the Article:
467: */
1.2 timbl 468: (*targetClass.start_element)(target, HTML_PRE , 0, 0);
469:
1.1 timbl 470: p = line;
471: while(!done){
472: char ch = *p++ = NEXT_CHAR;
473: if (ch==(char)EOF) {
474: abort_socket(); /* End of file, close socket */
475: return; /* End of file on response */
476: }
1.3 timbl 477: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 478: *p++=0; /* Terminate the string */
479: if (TRACE) fprintf(stderr, "B %s", line);
480: if (line[0]=='.') {
481: if (line[1]<' ') { /* End of article? */
482: done = YES;
483: break;
484: } else { /* Line starts with dot */
1.2 timbl 485: PUTS(&line[1]); /* Ignore first dot */
1.1 timbl 486: }
487: } else {
488:
489: /* Normal lines are scanned for buried references to other articles.
490: ** Unfortunately, it will pick up mail addresses as well!
491: */
492: char *l = line;
493: char * p;
494: while (p=strchr(l, '<')) {
495: char *q = strchr(p,'>');
496: char *at = strchr(p, '@');
497: if (q && at && at<q) {
498: char c = q[1];
499: q[1] = 0; /* chop up */
500: *p = 0;
1.2 timbl 501: PUTS(l);
1.1 timbl 502: *p = '<'; /* again */
503: *q = 0;
1.2 timbl 504: start_anchor(p+1);
1.1 timbl 505: *q = '>'; /* again */
1.2 timbl 506: PUTS(p);
507: (*targetClass.end_element)(target, HTML_A);
1.1 timbl 508: q[1] = c; /* again */
509: l=q+1;
510: } else break; /* line has unmatched <> */
511: }
1.2 timbl 512: PUTS( l); /* Last bit of the line */
1.1 timbl 513: } /* if not dot */
514: p = line; /* Restart at beginning */
515: } /* if end of line */
516: } /* Loop over characters */
1.2 timbl 517:
518: (*targetClass.end_element)(target, HTML_PRE);
1.1 timbl 519: }
520:
521:
522: /* Read in a List of Newsgroups
523: ** ----------------------------
524: */
525: /*
526: ** Note the termination condition of a single dot on a line by itself.
527: ** RFC 977 specifies that the line "folding" of RFC850 is not used, so we
528: ** do not handle it here.
529: */
530: PRIVATE void read_list NOARGS
531: {
532:
533: char line[LINE_LENGTH+1];
534: char *p;
535: BOOL done = NO;
536:
537: /* Read in the HEADer of the article:
538: **
539: ** The header fields are either ignored, or formatted and put into the
540: ** Text.
541: */
1.2 timbl 542: (*targetClass.start_element)(target, HTML_H1 , 0, 0);
543: PUTS( "Newsgroups");
544: (*targetClass.end_element)(target, HTML_PRE);
1.1 timbl 545: p = line;
1.2 timbl 546: (*targetClass.start_element)(target, HTML_MENU , 0, 0);
1.1 timbl 547: while(!done){
548: char ch = *p++ = NEXT_CHAR;
549: if (ch==(char)EOF) {
550: abort_socket(); /* End of file, close socket */
551: return; /* End of file on response */
552: }
1.3 timbl 553: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 554: *p++=0; /* Terminate the string */
555: if (TRACE) fprintf(stderr, "B %s", line);
1.2 timbl 556: (*targetClass.start_element)(target, HTML_LI , 0, 0);
1.1 timbl 557: if (line[0]=='.') {
558: if (line[1]<' ') { /* End of article? */
559: done = YES;
560: break;
561: } else { /* Line starts with dot */
1.2 timbl 562: PUTS( &line[1]);
1.1 timbl 563: }
564: } else {
565:
566: /* Normal lines are scanned for references to newsgroups.
567: */
568: char group[LINE_LENGTH];
569: int first, last;
570: char postable;
571: if (sscanf(line, "%s %d %d %c", group, &first, &last, &postable)==4)
572: write_anchor(line, group);
573: else
1.2 timbl 574: PUTS(line);
1.1 timbl 575: } /* if not dot */
576: p = line; /* Restart at beginning */
577: } /* if end of line */
578: } /* Loop over characters */
1.2 timbl 579: (*targetClass.end_element)(target, HTML_MENU);
1.1 timbl 580: }
581:
582:
583: /* Read in a Newsgroup
584: ** -------------------
585: ** Unfortunately, we have to ask for each article one by one if we
586: ** want more than one field.
587: **
588: */
589: PRIVATE void read_group ARGS3(
590: CONST char *,groupName,
591: int,first_required,
592: int,last_required
593: )
594: {
595: char line[LINE_LENGTH+1];
596: char author[LINE_LENGTH+1];
597: char subject[LINE_LENGTH+1];
598: char *p;
599: BOOL done;
600:
601: char buffer[LINE_LENGTH];
602: char *reference=0; /* Href for article */
603: int art; /* Article number WITHIN GROUP */
604: int status, count, first, last; /* Response fields */
605: /* count is only an upper limit */
606:
607: sscanf(response_text, " %d %d %d %d", &status, &count, &first, &last);
608: if(TRACE) printf("Newsgroup status=%d, count=%d, (%d-%d) required:(%d-%d)\n",
609: status, count, first, last, first_required, last_required);
610: if (last==0) {
1.2 timbl 611: PUTS( "\nNo articles in this group.\n");
1.1 timbl 612: return;
613: }
614:
615: #define FAST_THRESHOLD 100 /* Above this, read IDs fast */
616: #define CHOP_THRESHOLD 50 /* Above this, chop off the rest */
617:
618: if (first_required<first) first_required = first; /* clip */
619: if ((last_required==0) || (last_required > last)) last_required = last;
620:
621: if (last_required<=first_required) {
1.2 timbl 622: PUTS( "\nNo articles in this range.\n");
1.1 timbl 623: return;
624: }
625:
626: if (last_required-first_required+1 > MAX_CHUNK) { /* Trim this block */
627: first_required = last_required-CHUNK_SIZE+1;
628: }
629: if (TRACE) printf (
630: " Chunk will be (%d-%d)\n", first_required, last_required);
631:
1.2 timbl 632: /* Set window title
633: */
634: sprintf(buffer, "Newsgroup %s, Articles %d-%d",
635: groupName, first_required, last_required);
636: START(HTML_TITLE);
637: PUTS(buffer);
638: END(HTML_TITLE);
639:
1.1 timbl 640: /* Link to earlier articles
641: */
642: if (first_required>first) {
643: int before; /* Start of one before */
644: if (first_required-MAX_CHUNK <= first) before = first;
645: else before = first_required-CHUNK_SIZE;
646: sprintf(buffer, "%s/%d-%d", groupName, before, first_required-1);
647: if (TRACE) fprintf(stderr, " Block before is %s\n", buffer);
1.2 timbl 648: PUTS( " (");
649: start_anchor(buffer);
650: PUTS("Earlier articles");
651: END(HTML_A);
652: PUTS( "...)\n");
1.1 timbl 653: }
654:
655: done = NO;
656:
657: /*#define USE_XHDR*/
658: #ifdef USE_XHDR
659: if (count>FAST_THRESHOLD) {
660: sprintf(buffer,
661: "\nThere are about %d articles currently available in %s, IDs as follows:\n\n",
662: count, groupName);
1.2 timbl 663: PUTS(buffer);
1.3 timbl 664: sprintf(buffer, "XHDR Message-ID %d-%d%c%c", first, last, CR, LF);
1.1 timbl 665: status = response(buffer);
666: if (status==221) {
667:
668: p = line;
669: while(!done){
670: char ch = *p++ = NEXT_CHAR;
671: if (ch==(char)EOF) {
672: abort_socket(); /* End of file, close socket */
673: return; /* End of file on response */
674: }
675: if ((ch == '\n') || (p == &line[LINE_LENGTH])) {
676: *p++=0; /* Terminate the string */
677: if (TRACE) fprintf(stderr, "X %s", line);
678: if (line[0]=='.') {
679: if (line[1]<' ') { /* End of article? */
680: done = YES;
681: break;
682: } else { /* Line starts with dot */
683: /* Ignore strange line */
684: }
685: } else {
686:
687: /* Normal lines are scanned for references to articles.
688: */
689: char * space = strchr(line, ' ');
690: if (space++)
691: write_anchor(space, space);
692: } /* if not dot */
693: p = line; /* Restart at beginning */
694: } /* if end of line */
695: } /* Loop over characters */
696:
697: /* leaving loop with "done" set */
698: } /* Good status */
699: };
700: #endif
701:
702: /* Read newsgroup using individual fields:
703: */
704: if (!done) {
705: if (first==first_required && last==last_required)
1.2 timbl 706: PUTS("\nAll available articles in ");
707: else PUTS( "\nArticles in ");
708: PUTS(groupName);
709: START(HTML_MENU);
1.1 timbl 710: for(art=first_required; art<=last_required; art++) {
711:
712: /*#define OVERLAP*/
713: #ifdef OVERLAP
714: /* With this code we try to keep the server running flat out by queuing just
715: ** one extra command ahead of time. We assume (1) that the server won't abort
716: ** if it gets input during output, and (2) that TCP buffering is enough for the
717: ** two commands. Both these assumptions seem very reasonable. However, we HAVE
718: ** had a hangup with a loaded server.
719: */
720: if (art==first_required) {
721: if (art==last_required) {
1.3 timbl 722: sprintf(buffer, "HEAD %d%c%c", art, CR, LF); /* Only one */
1.1 timbl 723: status = response(buffer);
724: } else { /* First of many */
1.3 timbl 725: sprintf(buffer, "HEAD %d%c%cHEAD %d%c%c",
726: art, CR, LF, art+1, CR, LF);
1.1 timbl 727: status = response(buffer);
728: }
729: } else if (art==last_required) { /* Last of many */
730: status = response(NULL);
731: } else { /* Middle of many */
1.3 timbl 732: sprintf(buffer, "HEAD %d%c%c", art+1, CR, LF);
1.1 timbl 733: status = response(buffer);
734: }
735:
736: #else /* NOT OVERLAP */
1.3 timbl 737: sprintf(buffer, "HEAD %d%c%c", art, CR, LF);
1.1 timbl 738: status = response(buffer);
739: #endif /* NOT OVERLAP */
740:
741: if (status == 221) { /* Head follows - parse it:*/
742:
743: p = line; /* Write pointer */
744: done = NO;
745: while(!done){
746: char ch = *p++ = NEXT_CHAR;
747: if (ch==(char)EOF) {
748: abort_socket(); /* End of file, close socket */
749: return; /* End of file on response */
750: }
1.3 timbl 751: if ((ch == LF)
1.1 timbl 752: || (p == &line[LINE_LENGTH]) ) {
753:
754: *--p=0; /* Terminate & chop LF*/
755: p = line; /* Restart at beginning */
756: if (TRACE) fprintf(stderr, "G %s\n", line);
757: switch(line[0]) {
758:
759: case '.':
760: done = (line[1]<' '); /* End of article? */
761: break;
762:
763: case 'S':
764: case 's':
765: if (match(line, "SUBJECT:"))
766: strcpy(subject, line+9);/* Save subject */
767: break;
768:
769: case 'M':
770: case 'm':
771: if (match(line, "MESSAGE-ID:")) {
772: char * addr = HTStrip(line+11) +1; /* Chop < */
773: addr[strlen(addr)-1]=0; /* Chop > */
774: StrAllocCopy(reference, addr);
775: }
776: break;
777:
778: case 'f':
779: case 'F':
780: if (match(line, "FROM:")) {
781: char * p;
782: strcpy(author,
783: author_name(strchr(line,':')+1));
784: p = author + strlen(author) - 1;
1.3 timbl 785: if (*p==LF) *p = 0; /* Chop off newline */
1.1 timbl 786: }
787: break;
788:
789: } /* end switch on first character */
790: } /* if end of line */
791: } /* Loop over characters */
792:
1.2 timbl 793: START(HTML_LI);
1.1 timbl 794: sprintf(buffer, "\"%s\" - %s", subject, author);
795: if (reference) {
796: write_anchor(buffer, reference);
797: free(reference);
798: reference=0;
799: } else {
1.2 timbl 800: PUTS(buffer);
1.1 timbl 801: }
802:
803:
1.2 timbl 804: /* indicate progress! @@@@@@
1.1 timbl 805: */
806:
807: } /* If good response */
808: } /* Loop over article */
809: } /* If read headers */
1.2 timbl 810: END(HTML_MENU);
811: START(HTML_P);
1.1 timbl 812:
813: /* Link to later articles
814: */
815: if (last_required<last) {
816: int after; /* End of article after */
817: after = last_required+CHUNK_SIZE;
818: if (after==last) sprintf(buffer, "news:%s", groupName); /* original group */
819: else sprintf(buffer, "news:%s/%d-%d", groupName, last_required+1, after);
820: if (TRACE) fprintf(stderr, " Block after is %s\n", buffer);
1.2 timbl 821: PUTS( "(");
822: start_anchor(buffer);
823: PUTS( "Later articles");
824: END(HTML_A);
825: PUTS( "...)\n");
1.1 timbl 826: }
827:
828:
829: }
830:
831:
832: /* Load by name HTLoadNews
833: ** ============
834: */
1.2 timbl 835: PUBLIC int HTLoadNews ARGS4(
836: CONST char *, arg,
837: HTParentAnchor *, anAnchor,
838: HTFormat, format_out,
839: HTStream*, stream)
1.1 timbl 840: {
841: char command[257]; /* The whole command */
842: char groupName[GROUP_NAME_LENGTH]; /* Just the group name */
843: int status; /* tcp return */
844: int retries; /* A count of how hard we have tried */
845: BOOL group_wanted; /* Flag: group was asked for, not article */
846: BOOL list_wanted; /* Flag: group was asked for, not article */
847: int first, last; /* First and last articles asked for */
848:
1.2 timbl 849: diagnostic = (format_out == WWW_SOURCE); /* set global flag */
1.1 timbl 850:
851: if (TRACE) fprintf(stderr, "HTNews: Looking for %s\n", arg);
852:
853: if (!initialized) initialized = initialize();
854: if (!initialized) return -1; /* FAIL */
855:
856: {
857: CONST char * p1=arg;
858:
859: /* We will ask for the document, omitting the host name & anchor.
860: **
861: ** Syntax of address is
862: ** xxx@yyy Article
863: ** <xxx@yyy> Same article
864: ** xxxxx News group (no "@")
865: ** group/n1-n2 Articles n1 to n2 in group
866: */
867: group_wanted = (strchr(arg, '@')==0) && (strchr(arg, '*')==0);
868: list_wanted = (strchr(arg, '@')==0) && (strchr(arg, '*')!=0);
869:
870: /* p1 = HTParse(arg, "", PARSE_PATH | PARSE_PUNCTUATION); */
871: /* Don't use HTParse because news: access doesn't follow traditional
872: rules. For instance, if the article reference contains a '#',
873: the rest of it is lost -- JFG 10/7/92, from a bug report */
874: if (!strncasecomp (arg, "news:", 5))
875: p1 = arg + 5; /* Skip "news:" prefix */
876: if (list_wanted) {
877: strcpy(command, "LIST ");
878: } else if (group_wanted) {
879: char * slash = strchr(p1, '/');
880: strcpy(command, "GROUP ");
881: first = 0;
882: last = 0;
883: if (slash) {
884: *slash = 0;
885: strcpy(groupName, p1);
886: *slash = '/';
887: (void) sscanf(slash+1, "%d-%d", &first, &last);
888: } else {
889: strcpy(groupName, p1);
890: }
891: strcat(command, groupName);
892: } else {
893: strcpy(command, "ARTICLE ");
894: if (strchr(p1, '<')==0) strcat(command,"<");
895: strcat(command, p1);
896: if (strchr(p1, '>')==0) strcat(command,">");
897: }
898:
1.3 timbl 899: {
900: char * p = command + strlen(command);
901: *p++ = CR; /* Macros to be correct on Mac */
902: *p++ = LF;
903: *p++ = 0;
904: /* strcat(command, "\r\n"); */ /* CR LF, as in rfc 977 */
905: }
1.1 timbl 906: } /* scope of p1 */
907:
908: if (!*arg) return NO; /* Ignore if no name */
909:
910:
911: /* Make a hypertext object with an anchor list.
912: */
913: node_anchor = anAnchor;
1.3 timbl 914: target = HTML_new(anAnchor, format_out, stream);
1.2 timbl 915: targetClass = *target->isa; /* Copy routine entry points */
916:
1.1 timbl 917:
918: /* Now, let's get a stream setup up from the NewsHost:
919: */
920: for(retries=0;retries<2; retries++){
921:
922: if (s<0) {
923: NEWS_PROGRESS("Connecting to NewsHost ...");
924: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
925: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
926: if (status<0){
927: char message[256];
928: NETCLOSE(s);
929: s = -1;
930: if (TRACE) fprintf(stderr, "HTNews: Unable to connect to news host.\n");
931: /* if (retries<=1) continue; WHY TRY AGAIN ? */
932: #ifdef NeXTStep
933: NXRunAlertPanel(NULL,
934: "Could not access newshost %s.",
935: NULL,NULL,NULL,
1.2 timbl 936: HTNewsHost);
1.1 timbl 937: #else
938: fprintf(stderr, "Could not access newshost %s\n",
1.2 timbl 939: HTNewsHost);
1.1 timbl 940: #endif
941: sprintf(message,
942: "\nCould not access %s.\n\n (Check default WorldWideWeb NewsHost ?)\n",
1.2 timbl 943: HTNewsHost);
944:
945: PUTS(message);
946: (*targetClass.end_document)(target);
1.1 timbl 947: return YES;
948: } else {
949: if (TRACE) fprintf(stderr, "HTNews: Connected to news host %s.\n",
1.2 timbl 950: HTNewsHost);
1.1 timbl 951: HTInitInput(s); /* set up buffering */
952: if ((response(NULL) / 100) !=2) {
953: NETCLOSE(s);
954: s = -1;
955: #ifdef NeXTStep
956: NXRunAlertPanel("News access",
957: "Could not retrieve information:\n %s.",
958: NULL,NULL,NULL,
959: response_text);
960: #endif
1.2 timbl 961: START(HTML_TITLE);
962: PUTS("News host response");
963: END(HTML_TITLE);
964: PUTS("Sorry, could not retrieve information: ");
965: PUTS(response_text);
966: (*targetClass.end_document)(target);
1.1 timbl 967: return YES;
968: }
969: }
970: } /* If needed opening */
971:
1.2 timbl 972: /* @@@@@@@@@@@@@@Tell user something's happening */
973:
1.1 timbl 974: status = response(command);
975: if (status<0) break;
976: if ((status/ 100) !=2) {
977: /* NXRunAlertPanel("News access", response_text,
978: NULL,NULL,NULL);
979: */
1.2 timbl 980:
981: PUTS(response_text);
982: (*targetClass.end_document)(target);
1.1 timbl 983: NETCLOSE(s);
984: s = -1;
985: /* return HT; -- no:the message might be "Timeout-disconnected" left over */
986: continue; /* Try again */
987: }
988:
989: /* Load a group, article, etc
990: */
1.2 timbl 991:
1.1 timbl 992:
993: if (list_wanted) read_list();
994: else if (group_wanted) read_group(groupName, first, last);
995: else read_article();
996:
1.2 timbl 997: (*targetClass.end_document)(target);
2.6 ! timbl 998: (*targetClass.free)(target);
1.2 timbl 999: return HT_LOADED;
1.1 timbl 1000:
1001: } /* Retry loop */
1002:
1.2 timbl 1003:
1004: PUTS("Sorry, could not load requested news.\n");
1005: (*targetClass.end_document)(target);
1.1 timbl 1006:
1007: /* NXRunAlertPanel(NULL, "Sorry, could not load `%s'.",
1008: NULL,NULL,NULL, arg);No -- message earlier wil have covered it */
1009:
1.2 timbl 1010: return HT_LOADED;
1.1 timbl 1011: }
1012:
1.2 timbl 1013: PUBLIC HTProtocol HTNews = { "news", HTLoadNews, NULL };
Webmaster