Annotation of libwww/Library/src/HTNews.c, revision 2.10
1.1 timbl 1: /* NEWS ACCESS HTNews.c
2: ** ===========
3: **
4: ** History:
5: ** 26 Sep 90 Written TBL
6: ** 29 Nov 91 Downgraded to C, for portable implementation.
7: */
1.2 timbl 8: /* Implements:
9: */
10: #include "HTNews.h"
1.1 timbl 11:
1.3 timbl 12: #define CR FROMASCII('\015') /* Must be converted to ^M for transmission */
13: #define LF FROMASCII('\012') /* Must be converted to ^J for transmission */
14:
1.1 timbl 15: #define NEWS_PORT 119 /* See rfc977 */
16: #define APPEND /* Use append methods */
17: #define MAX_CHUNK 40 /* Largest number of articles in one window */
18: #define CHUNK_SIZE 20 /* Number of articles for quick display */
19:
20: #ifndef DEFAULT_NEWS_HOST
21: #define DEFAULT_NEWS_HOST "news"
22: #endif
23: #ifndef SERVER_FILE
24: #define SERVER_FILE "/usr/local/lib/rn/server"
25: #endif
26:
27: #include <ctype.h>
28: #include "HTUtils.h" /* Coding convention macros */
29: #include "tcp.h"
30:
1.2 timbl 31: #include "HTML.h"
1.1 timbl 32: #include "HTParse.h"
33: #include "HTFormat.h"
2.8 timbl 34: #include "HTAlert.h"
1.1 timbl 35:
2.8 timbl 36: #define BIG 1024 /* @@@ */
37:
1.2 timbl 38: struct _HTStructured {
39: CONST HTStructuredClass * isa;
40: /* ... */
41: };
42:
2.7 timbl 43: #define NEWS_PROGRESS(foo) HTProgress(foo)
1.1 timbl 44:
45:
46: #define NEXT_CHAR HTGetChararcter()
47: #define LINE_LENGTH 512 /* Maximum length of line of ARTICLE etc */
48: #define GROUP_NAME_LENGTH 256 /* Maximum length of group name */
49:
50:
51: /* Module-wide variables
52: */
1.2 timbl 53: PUBLIC char * HTNewsHost;
1.1 timbl 54: PRIVATE struct sockaddr_in soc_address; /* Binary network address */
55: PRIVATE int s; /* Socket for NewsHost */
56: PRIVATE char response_text[LINE_LENGTH+1]; /* Last response */
1.2 timbl 57: /* PRIVATE HText * HT; */ /* the new hypertext */
58: PRIVATE HTStructured * target; /* The output sink */
59: PRIVATE HTStructuredClass targetClass; /* Copy of fn addresses */
1.1 timbl 60: PRIVATE HTParentAnchor *node_anchor; /* Its anchor */
61: PRIVATE int diagnostic; /* level: 0=none 2=source */
62:
1.2 timbl 63:
64: #define PUTC(c) (*targetClass.put_character)(target, c)
65: #define PUTS(s) (*targetClass.put_string)(target, s)
66: #define START(e) (*targetClass.start_element)(target, e, 0, 0)
67: #define END(e) (*targetClass.end_element)(target, e)
68:
69: PUBLIC CONST char * HTGetNewsHost NOARGS
70: {
71: return HTNewsHost;
72: }
1.1 timbl 73:
1.2 timbl 74: PUBLIC void HTSetNewsHost ARGS1(CONST char *, value)
75: {
76: StrAllocCopy(HTNewsHost, value);
77: }
1.1 timbl 78:
79: /* Initialisation for this module
80: ** ------------------------------
81: **
82: ** Except on the NeXT, we pick up the NewsHost name from
83: **
84: ** 1. Environment variable NNTPSERVER
85: ** 2. File SERVER_FILE
86: ** 3. Compilation time macro DEFAULT_NEWS_HOST
87: ** 4. Default to "news"
88: **
89: ** On the NeXT, we pick up the NewsHost name from, in order:
90: **
91: ** 1. WorldWideWeb default "NewsHost"
92: ** 2. Global default "NewsHost"
93: ** 3. News default "NewsHost"
94: ** 4. Compilation time macro DEFAULT_NEWS_HOST
95: ** 5. Default to "news"
96: */
97: PRIVATE BOOL initialized = NO;
98: PRIVATE BOOL initialize NOARGS
99: {
100: CONST struct hostent *phost; /* Pointer to host - See netdb.h */
101: struct sockaddr_in* sin = &soc_address;
102:
103:
104: /* Set up defaults:
105: */
106: sin->sin_family = AF_INET; /* Family = internet, host order */
107: sin->sin_port = htons(NEWS_PORT); /* Default: new port, */
108:
109: /* Get name of Host
110: */
111: #ifdef NeXTStep
1.2 timbl 112: if ((HTNewsHost = NXGetDefaultValue("WorldWideWeb","NewsHost"))==0)
113: if ((HTNewsHost = NXGetDefaultValue("News","NewsHost")) == 0)
114: HTNewsHost = DEFAULT_NEWS_HOST;
1.1 timbl 115: #else
116: if (getenv("NNTPSERVER")) {
1.2 timbl 117: StrAllocCopy(HTNewsHost, (char *)getenv("NNTPSERVER"));
1.1 timbl 118: if (TRACE) fprintf(stderr, "HTNews: NNTPSERVER defined as `%s'\n",
1.2 timbl 119: HTNewsHost);
1.1 timbl 120: } else {
121: char server_name[256];
122: FILE* fp = fopen(SERVER_FILE, "r");
123: if (fp) {
124: if (fscanf(fp, "%s", server_name)==1) {
1.2 timbl 125: StrAllocCopy(HTNewsHost, server_name);
1.1 timbl 126: if (TRACE) fprintf(stderr,
127: "HTNews: File %s defines news host as `%s'\n",
1.2 timbl 128: SERVER_FILE, HTNewsHost);
1.1 timbl 129: }
130: fclose(fp);
131: }
132: }
1.2 timbl 133: if (!HTNewsHost) HTNewsHost = DEFAULT_NEWS_HOST;
1.1 timbl 134: #endif
135:
1.2 timbl 136: if (*HTNewsHost>='0' && *HTNewsHost<='9') { /* Numeric node address: */
137: sin->sin_addr.s_addr = inet_addr((char *)HTNewsHost); /* See arpa/inet.h */
1.1 timbl 138:
139: } else { /* Alphanumeric node name: */
1.2 timbl 140: phost=gethostbyname((char*)HTNewsHost); /* See netdb.h */
1.1 timbl 141: if (!phost) {
2.7 timbl 142: char message[150]; /* @@@ */
143: sprintf(message,
144: "HTNews: Can't find news host `%s'.\n%s",HTNewsHost,
145: "Please define your NNTP server");
146: HTAlert(message);
1.1 timbl 147: CTRACE(tfp,
1.2 timbl 148: "HTNews: Can't find news host `%s'.\n",HTNewsHost);
1.1 timbl 149: return NO; /* Fail */
150: }
151: memcpy(&sin->sin_addr, phost->h_addr, phost->h_length);
152: }
153:
154: if (TRACE) fprintf(stderr,
155: "HTNews: Parsed address as port %4x, inet %d.%d.%d.%d\n",
156: (unsigned int)ntohs(sin->sin_port),
157: (int)*((unsigned char *)(&sin->sin_addr)+0),
158: (int)*((unsigned char *)(&sin->sin_addr)+1),
159: (int)*((unsigned char *)(&sin->sin_addr)+2),
160: (int)*((unsigned char *)(&sin->sin_addr)+3));
161:
162: s = -1; /* Disconnected */
163:
164: return YES;
165: }
166:
167:
168:
169: /* Send NNTP Command line to remote host & Check Response
170: ** ------------------------------------------------------
171: **
172: ** On entry,
173: ** command points to the command to be sent, including CRLF, or is null
174: ** pointer if no command to be sent.
175: ** On exit,
176: ** Negative status indicates transmission error, socket closed.
177: ** Positive status is an NNTP status.
178: */
179:
180:
181: PRIVATE int response ARGS1(CONST char *,command)
182: {
183: int result;
184: char * p = response_text;
185: if (command) {
186: int status;
187: int length = strlen(command);
188: if (TRACE) fprintf(stderr, "NNTP command to be sent: %s", command);
189: #ifdef NOT_ASCII
190: {
191: CONST char * p;
192: char * q;
193: char ascii[LINE_LENGTH+1];
194: for(p = command, q=ascii; *p; p++, q++) {
195: *q = TOASCII(*p);
196: }
197: status = NETWRITE(s, ascii, length);
198: }
199: #else
200: status = NETWRITE(s, command, length);
201: #endif
202: if (status<0){
203: if (TRACE) fprintf(stderr,
204: "HTNews: Unable to send command. Disconnecting.\n");
205: NETCLOSE(s);
206: s = -1;
207: return status;
208: } /* if bad status */
209: } /* if command to be sent */
210:
211: for(;;) {
1.3 timbl 212: if (((*p++=NEXT_CHAR) == LF)
213: || (p == &response_text[LINE_LENGTH])) {
1.1 timbl 214: *p++=0; /* Terminate the string */
215: if (TRACE) fprintf(stderr, "NNTP Response: %s\n", response_text);
216: sscanf(response_text, "%d", &result);
217: return result;
218: } /* if end of line */
219:
220: if (*(p-1) < 0) {
221: if (TRACE) fprintf(stderr,
222: "HTNews: EOF on read, closing socket %d\n", s);
223: NETCLOSE(s); /* End of file, close socket */
224: return s = -1; /* End of file on response */
225: }
226: } /* Loop over characters */
227: }
228:
229:
230: /* Case insensitive string comparisons
231: ** -----------------------------------
232: **
233: ** On entry,
234: ** template must be already un upper case.
235: ** unknown may be in upper or lower or mixed case to match.
236: */
237: PRIVATE BOOL match ARGS2 (CONST char *,unknown, CONST char *,template)
238: {
239: CONST char * u = unknown;
240: CONST char * t = template;
241: for (;*u && *t && (TOUPPER(*u)==*t); u++, t++) /* Find mismatch or end */ ;
242: return (BOOL)(*t==0); /* OK if end of template */
243: }
244:
245: /* Find Author's name in mail address
246: ** ----------------------------------
247: **
248: ** On exit,
249: ** THE EMAIL ADDRESS IS CORRUPTED
250: **
251: ** For example, returns "Tim Berners-Lee" if given any of
252: ** " Tim Berners-Lee <tim@online.cern.ch> "
253: ** or " tim@online.cern.ch ( Tim Berners-Lee ) "
254: */
255: PRIVATE char * author_name ARGS1 (char *,email)
256: {
257: char *s, *e;
258:
259: if ((s=strchr(email,'(')) && (e=strchr(email, ')')))
260: if (e>s) {
261: *e=0; /* Chop off everything after the ')' */
262: return HTStrip(s+1); /* Remove leading and trailing spaces */
263: }
264:
265: if ((s=strchr(email,'<')) && (e=strchr(email, '>')))
266: if (e>s) {
267: strcpy(s, e+1); /* Remove <...> */
268: return HTStrip(email); /* Remove leading and trailing spaces */
269: }
270:
271: return HTStrip(email); /* Default to the whole thing */
272:
273: }
274:
1.2 timbl 275: /* Start anchor element
276: ** --------------------
277: */
278: PRIVATE void start_anchor ARGS1(CONST char *, href)
279: {
280: BOOL present[HTML_A_ATTRIBUTES];
281: CONST char* value[HTML_A_ATTRIBUTES];
282:
283: {
284: int i;
285: for(i=0; i<HTML_A_ATTRIBUTES; i++)
286: present[i] = (i==HTML_A_HREF);
287: }
288: value[HTML_A_HREF] = href;
289: (*targetClass.start_element)(target, HTML_A , present, value);
290:
291: }
1.1 timbl 292:
293: /* Paste in an Anchor
294: ** ------------------
295: **
296: **
297: ** On entry,
298: ** HT has a selection of zero length at the end.
299: ** text points to the text to be put into the file, 0 terminated.
300: ** addr points to the hypertext refernce address,
301: ** terminated by white space, comma, NULL or '>'
302: */
303: PRIVATE void write_anchor ARGS2(CONST char *,text, CONST char *,addr)
304: {
305: char href[LINE_LENGTH+1];
306:
307: {
308: CONST char * p;
309: strcpy(href,"news:");
310: for(p=addr; *p && (*p!='>') && !WHITE(*p) && (*p!=','); p++);
311: strncat(href, addr, p-addr); /* Make complete hypertext reference */
312: }
313:
1.2 timbl 314: start_anchor(href);
315: PUTS(text);
316: END(HTML_A);
1.1 timbl 317: }
318:
319:
320: /* Write list of anchors
321: ** ---------------------
322: **
323: ** We take a pointer to a list of objects, and write out each,
324: ** generating an anchor for each.
325: **
326: ** On entry,
327: ** HT has a selection of zero length at the end.
328: ** text points to a comma or space separated list of addresses.
329: ** On exit,
330: ** *text is NOT any more chopped up into substrings.
331: */
332: PRIVATE void write_anchors ARGS1 (char *,text)
333: {
334: char * start = text;
335: char * end;
336: char c;
337: for (;;) {
338: for(;*start && (WHITE(*start)); start++); /* Find start */
339: if (!*start) return; /* (Done) */
340: for(end=start; *end && (*end!=' ') && (*end!=','); end++);/* Find end */
341: if (*end) end++; /* Include comma or space but not NULL */
342: c = *end;
343: *end = 0;
344: write_anchor(start, start);
345: *end = c;
346: start = end; /* Point to next one */
347: }
348: }
349:
350: /* Abort the connection abort_socket
351: ** --------------------
352: */
353: PRIVATE void abort_socket NOARGS
354: {
355: if (TRACE) fprintf(stderr,
356: "HTNews: EOF on read, closing socket %d\n", s);
357: NETCLOSE(s); /* End of file, close socket */
1.2 timbl 358: PUTS("Network Error: connection lost");
359: PUTC('\n');
1.1 timbl 360: s = -1; /* End of file on response */
361: return;
362: }
363:
364: /* Read in an Article read_article
365: ** ------------------
366: **
367: **
368: ** Note the termination condition of a single dot on a line by itself.
369: ** RFC 977 specifies that the line "folding" of RFC850 is not used, so we
370: ** do not handle it here.
371: **
372: ** On entry,
373: ** s Global socket number is OK
374: ** HT Global hypertext object is ready for appending text
375: */
376: PRIVATE void read_article NOARGS
377: {
378:
379: char line[LINE_LENGTH+1];
380: char *references=NULL; /* Hrefs for other articles */
381: char *newsgroups=NULL; /* Newsgroups list */
382: char *p = line;
383: BOOL done = NO;
384:
385: /* Read in the HEADer of the article:
386: **
387: ** The header fields are either ignored, or formatted and put into the
388: ** Text.
389: */
390: if (!diagnostic) {
1.2 timbl 391: (*targetClass.start_element)(target, HTML_ADDRESS, 0, 0);
1.1 timbl 392: while(!done){
393: char ch = *p++ = NEXT_CHAR;
394: if (ch==(char)EOF) {
395: abort_socket(); /* End of file, close socket */
396: return; /* End of file on response */
397: }
1.3 timbl 398: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 399: *--p=0; /* Terminate the string */
400: if (TRACE) fprintf(stderr, "H %s\n", line);
401:
402: if (line[0]=='.') {
403: if (line[1]<' ') { /* End of article? */
404: done = YES;
405: break;
406: }
407:
408: } else if (line[0]<' ') {
409: break; /* End of Header? */
410: } else if (match(line, "SUBJECT:")) {
1.2 timbl 411: END(HTML_ADDRESS);
412: START(HTML_TITLE); /** Uuugh! @@@ */
413: PUTS(line+8);
414: END(HTML_TITLE);
415: START(HTML_ADDRESS);
416: (*targetClass.start_element)(target, HTML_H1 , 0, 0);
417: PUTS(line+8);
418: (*targetClass.end_element)(target, HTML_H1);
419: (*targetClass.start_element)(target, HTML_ADDRESS , 0, 0);
1.1 timbl 420: } else if (match(line, "DATE:")
421: || match(line, "FROM:")
422: || match(line, "ORGANIZATION:")) {
423: strcat(line, "\n");
1.2 timbl 424: PUTS(strchr(line,':')+1);
1.1 timbl 425: } else if (match(line, "NEWSGROUPS:")) {
426: StrAllocCopy(newsgroups, HTStrip(strchr(line,':')+1));
427:
428: } else if (match(line, "REFERENCES:")) {
429: StrAllocCopy(references, HTStrip(strchr(line,':')+1));
430:
431: } /* end if match */
432: p = line; /* Restart at beginning */
433: } /* if end of line */
434: } /* Loop over characters */
1.2 timbl 435: (*targetClass.end_element)(target, HTML_ADDRESS);
1.1 timbl 436:
1.2 timbl 437: if (newsgroups || references) {
2.10 ! timbl 438: (*targetClass.start_element)(target, HTML_DL , 0, 0);
! 439: /* @@@@@@@@@@ SHOULD BE COMPACT */
1.2 timbl 440: if (newsgroups) {
441: (*targetClass.start_element)(target, HTML_DT , 0, 0);
442: PUTS("Newsgroups:");
443: (*targetClass.start_element)(target, HTML_DD , 0, 0);
444: write_anchors(newsgroups);
445: free(newsgroups);
446: }
447:
448: if (references) {
449: (*targetClass.start_element)(target, HTML_DT , 0, 0);
450: PUTS("References:");
451: (*targetClass.start_element)(target, HTML_DD , 0, 0);
452: write_anchors(references);
453: free(references);
454: }
2.10 ! timbl 455: (*targetClass.end_element)(target, HTML_DL);
1.1 timbl 456: }
1.2 timbl 457: PUTS("\n\n\n");
1.1 timbl 458:
459: }
460:
461: /* Read in the BODY of the Article:
462: */
1.2 timbl 463: (*targetClass.start_element)(target, HTML_PRE , 0, 0);
464:
1.1 timbl 465: p = line;
466: while(!done){
467: char ch = *p++ = NEXT_CHAR;
468: if (ch==(char)EOF) {
469: abort_socket(); /* End of file, close socket */
470: return; /* End of file on response */
471: }
1.3 timbl 472: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 473: *p++=0; /* Terminate the string */
474: if (TRACE) fprintf(stderr, "B %s", line);
475: if (line[0]=='.') {
476: if (line[1]<' ') { /* End of article? */
477: done = YES;
478: break;
479: } else { /* Line starts with dot */
1.2 timbl 480: PUTS(&line[1]); /* Ignore first dot */
1.1 timbl 481: }
482: } else {
483:
484: /* Normal lines are scanned for buried references to other articles.
485: ** Unfortunately, it will pick up mail addresses as well!
486: */
487: char *l = line;
488: char * p;
489: while (p=strchr(l, '<')) {
490: char *q = strchr(p,'>');
491: char *at = strchr(p, '@');
492: if (q && at && at<q) {
493: char c = q[1];
494: q[1] = 0; /* chop up */
495: *p = 0;
1.2 timbl 496: PUTS(l);
1.1 timbl 497: *p = '<'; /* again */
498: *q = 0;
1.2 timbl 499: start_anchor(p+1);
1.1 timbl 500: *q = '>'; /* again */
1.2 timbl 501: PUTS(p);
502: (*targetClass.end_element)(target, HTML_A);
1.1 timbl 503: q[1] = c; /* again */
504: l=q+1;
505: } else break; /* line has unmatched <> */
506: }
1.2 timbl 507: PUTS( l); /* Last bit of the line */
1.1 timbl 508: } /* if not dot */
509: p = line; /* Restart at beginning */
510: } /* if end of line */
511: } /* Loop over characters */
1.2 timbl 512:
513: (*targetClass.end_element)(target, HTML_PRE);
1.1 timbl 514: }
515:
516:
517: /* Read in a List of Newsgroups
518: ** ----------------------------
519: */
520: /*
521: ** Note the termination condition of a single dot on a line by itself.
522: ** RFC 977 specifies that the line "folding" of RFC850 is not used, so we
523: ** do not handle it here.
524: */
525: PRIVATE void read_list NOARGS
526: {
527:
528: char line[LINE_LENGTH+1];
529: char *p;
530: BOOL done = NO;
531:
532: /* Read in the HEADer of the article:
533: **
534: ** The header fields are either ignored, or formatted and put into the
535: ** Text.
536: */
1.2 timbl 537: (*targetClass.start_element)(target, HTML_H1 , 0, 0);
538: PUTS( "Newsgroups");
539: (*targetClass.end_element)(target, HTML_PRE);
1.1 timbl 540: p = line;
1.2 timbl 541: (*targetClass.start_element)(target, HTML_MENU , 0, 0);
1.1 timbl 542: while(!done){
543: char ch = *p++ = NEXT_CHAR;
544: if (ch==(char)EOF) {
545: abort_socket(); /* End of file, close socket */
546: return; /* End of file on response */
547: }
1.3 timbl 548: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 549: *p++=0; /* Terminate the string */
550: if (TRACE) fprintf(stderr, "B %s", line);
1.2 timbl 551: (*targetClass.start_element)(target, HTML_LI , 0, 0);
1.1 timbl 552: if (line[0]=='.') {
553: if (line[1]<' ') { /* End of article? */
554: done = YES;
555: break;
556: } else { /* Line starts with dot */
1.2 timbl 557: PUTS( &line[1]);
1.1 timbl 558: }
559: } else {
560:
561: /* Normal lines are scanned for references to newsgroups.
562: */
563: char group[LINE_LENGTH];
564: int first, last;
565: char postable;
566: if (sscanf(line, "%s %d %d %c", group, &first, &last, &postable)==4)
567: write_anchor(line, group);
568: else
1.2 timbl 569: PUTS(line);
1.1 timbl 570: } /* if not dot */
571: p = line; /* Restart at beginning */
572: } /* if end of line */
573: } /* Loop over characters */
1.2 timbl 574: (*targetClass.end_element)(target, HTML_MENU);
1.1 timbl 575: }
576:
577:
578: /* Read in a Newsgroup
579: ** -------------------
580: ** Unfortunately, we have to ask for each article one by one if we
581: ** want more than one field.
582: **
583: */
584: PRIVATE void read_group ARGS3(
585: CONST char *,groupName,
586: int,first_required,
587: int,last_required
588: )
589: {
590: char line[LINE_LENGTH+1];
591: char author[LINE_LENGTH+1];
592: char subject[LINE_LENGTH+1];
593: char *p;
594: BOOL done;
595:
596: char buffer[LINE_LENGTH];
597: char *reference=0; /* Href for article */
598: int art; /* Article number WITHIN GROUP */
599: int status, count, first, last; /* Response fields */
600: /* count is only an upper limit */
601:
602: sscanf(response_text, " %d %d %d %d", &status, &count, &first, &last);
603: if(TRACE) printf("Newsgroup status=%d, count=%d, (%d-%d) required:(%d-%d)\n",
604: status, count, first, last, first_required, last_required);
605: if (last==0) {
1.2 timbl 606: PUTS( "\nNo articles in this group.\n");
1.1 timbl 607: return;
608: }
609:
610: #define FAST_THRESHOLD 100 /* Above this, read IDs fast */
611: #define CHOP_THRESHOLD 50 /* Above this, chop off the rest */
612:
613: if (first_required<first) first_required = first; /* clip */
614: if ((last_required==0) || (last_required > last)) last_required = last;
615:
616: if (last_required<=first_required) {
1.2 timbl 617: PUTS( "\nNo articles in this range.\n");
1.1 timbl 618: return;
619: }
620:
621: if (last_required-first_required+1 > MAX_CHUNK) { /* Trim this block */
622: first_required = last_required-CHUNK_SIZE+1;
623: }
624: if (TRACE) printf (
625: " Chunk will be (%d-%d)\n", first_required, last_required);
626:
1.2 timbl 627: /* Set window title
628: */
629: sprintf(buffer, "Newsgroup %s, Articles %d-%d",
630: groupName, first_required, last_required);
631: START(HTML_TITLE);
632: PUTS(buffer);
633: END(HTML_TITLE);
634:
1.1 timbl 635: /* Link to earlier articles
636: */
637: if (first_required>first) {
638: int before; /* Start of one before */
639: if (first_required-MAX_CHUNK <= first) before = first;
640: else before = first_required-CHUNK_SIZE;
641: sprintf(buffer, "%s/%d-%d", groupName, before, first_required-1);
642: if (TRACE) fprintf(stderr, " Block before is %s\n", buffer);
1.2 timbl 643: PUTS( " (");
644: start_anchor(buffer);
645: PUTS("Earlier articles");
646: END(HTML_A);
647: PUTS( "...)\n");
1.1 timbl 648: }
649:
650: done = NO;
651:
652: /*#define USE_XHDR*/
653: #ifdef USE_XHDR
654: if (count>FAST_THRESHOLD) {
655: sprintf(buffer,
656: "\nThere are about %d articles currently available in %s, IDs as follows:\n\n",
657: count, groupName);
1.2 timbl 658: PUTS(buffer);
1.3 timbl 659: sprintf(buffer, "XHDR Message-ID %d-%d%c%c", first, last, CR, LF);
1.1 timbl 660: status = response(buffer);
661: if (status==221) {
662:
663: p = line;
664: while(!done){
665: char ch = *p++ = NEXT_CHAR;
666: if (ch==(char)EOF) {
667: abort_socket(); /* End of file, close socket */
668: return; /* End of file on response */
669: }
670: if ((ch == '\n') || (p == &line[LINE_LENGTH])) {
671: *p++=0; /* Terminate the string */
672: if (TRACE) fprintf(stderr, "X %s", line);
673: if (line[0]=='.') {
674: if (line[1]<' ') { /* End of article? */
675: done = YES;
676: break;
677: } else { /* Line starts with dot */
678: /* Ignore strange line */
679: }
680: } else {
681:
682: /* Normal lines are scanned for references to articles.
683: */
684: char * space = strchr(line, ' ');
685: if (space++)
686: write_anchor(space, space);
687: } /* if not dot */
688: p = line; /* Restart at beginning */
689: } /* if end of line */
690: } /* Loop over characters */
691:
692: /* leaving loop with "done" set */
693: } /* Good status */
694: };
695: #endif
696:
697: /* Read newsgroup using individual fields:
698: */
699: if (!done) {
700: if (first==first_required && last==last_required)
1.2 timbl 701: PUTS("\nAll available articles in ");
702: else PUTS( "\nArticles in ");
703: PUTS(groupName);
704: START(HTML_MENU);
1.1 timbl 705: for(art=first_required; art<=last_required; art++) {
706:
707: /*#define OVERLAP*/
708: #ifdef OVERLAP
709: /* With this code we try to keep the server running flat out by queuing just
710: ** one extra command ahead of time. We assume (1) that the server won't abort
711: ** if it gets input during output, and (2) that TCP buffering is enough for the
712: ** two commands. Both these assumptions seem very reasonable. However, we HAVE
713: ** had a hangup with a loaded server.
714: */
715: if (art==first_required) {
716: if (art==last_required) {
1.3 timbl 717: sprintf(buffer, "HEAD %d%c%c", art, CR, LF); /* Only one */
1.1 timbl 718: status = response(buffer);
719: } else { /* First of many */
1.3 timbl 720: sprintf(buffer, "HEAD %d%c%cHEAD %d%c%c",
721: art, CR, LF, art+1, CR, LF);
1.1 timbl 722: status = response(buffer);
723: }
724: } else if (art==last_required) { /* Last of many */
725: status = response(NULL);
726: } else { /* Middle of many */
1.3 timbl 727: sprintf(buffer, "HEAD %d%c%c", art+1, CR, LF);
1.1 timbl 728: status = response(buffer);
729: }
730:
731: #else /* NOT OVERLAP */
1.3 timbl 732: sprintf(buffer, "HEAD %d%c%c", art, CR, LF);
1.1 timbl 733: status = response(buffer);
734: #endif /* NOT OVERLAP */
735:
736: if (status == 221) { /* Head follows - parse it:*/
737:
738: p = line; /* Write pointer */
739: done = NO;
740: while(!done){
741: char ch = *p++ = NEXT_CHAR;
742: if (ch==(char)EOF) {
743: abort_socket(); /* End of file, close socket */
744: return; /* End of file on response */
745: }
1.3 timbl 746: if ((ch == LF)
1.1 timbl 747: || (p == &line[LINE_LENGTH]) ) {
748:
749: *--p=0; /* Terminate & chop LF*/
750: p = line; /* Restart at beginning */
751: if (TRACE) fprintf(stderr, "G %s\n", line);
752: switch(line[0]) {
753:
754: case '.':
755: done = (line[1]<' '); /* End of article? */
756: break;
757:
758: case 'S':
759: case 's':
760: if (match(line, "SUBJECT:"))
761: strcpy(subject, line+9);/* Save subject */
762: break;
763:
764: case 'M':
765: case 'm':
766: if (match(line, "MESSAGE-ID:")) {
767: char * addr = HTStrip(line+11) +1; /* Chop < */
768: addr[strlen(addr)-1]=0; /* Chop > */
769: StrAllocCopy(reference, addr);
770: }
771: break;
772:
773: case 'f':
774: case 'F':
775: if (match(line, "FROM:")) {
776: char * p;
777: strcpy(author,
778: author_name(strchr(line,':')+1));
779: p = author + strlen(author) - 1;
1.3 timbl 780: if (*p==LF) *p = 0; /* Chop off newline */
1.1 timbl 781: }
782: break;
783:
784: } /* end switch on first character */
785: } /* if end of line */
786: } /* Loop over characters */
787:
1.2 timbl 788: START(HTML_LI);
1.1 timbl 789: sprintf(buffer, "\"%s\" - %s", subject, author);
790: if (reference) {
791: write_anchor(buffer, reference);
792: free(reference);
793: reference=0;
794: } else {
1.2 timbl 795: PUTS(buffer);
1.1 timbl 796: }
797:
798:
1.2 timbl 799: /* indicate progress! @@@@@@
1.1 timbl 800: */
801:
802: } /* If good response */
803: } /* Loop over article */
804: } /* If read headers */
1.2 timbl 805: END(HTML_MENU);
806: START(HTML_P);
1.1 timbl 807:
808: /* Link to later articles
809: */
810: if (last_required<last) {
811: int after; /* End of article after */
812: after = last_required+CHUNK_SIZE;
813: if (after==last) sprintf(buffer, "news:%s", groupName); /* original group */
814: else sprintf(buffer, "news:%s/%d-%d", groupName, last_required+1, after);
815: if (TRACE) fprintf(stderr, " Block after is %s\n", buffer);
1.2 timbl 816: PUTS( "(");
817: start_anchor(buffer);
818: PUTS( "Later articles");
819: END(HTML_A);
820: PUTS( "...)\n");
1.1 timbl 821: }
822:
823:
824: }
825:
826:
827: /* Load by name HTLoadNews
828: ** ============
829: */
2.10 ! timbl 830: PUBLIC int HTLoadNews ARGS2(
1.2 timbl 831: CONST char *, arg,
2.10 ! timbl 832: HTRequest *, request)
1.1 timbl 833: {
834: char command[257]; /* The whole command */
835: char groupName[GROUP_NAME_LENGTH]; /* Just the group name */
836: int status; /* tcp return */
837: int retries; /* A count of how hard we have tried */
838: BOOL group_wanted; /* Flag: group was asked for, not article */
839: BOOL list_wanted; /* Flag: group was asked for, not article */
840: int first, last; /* First and last articles asked for */
841:
2.10 ! timbl 842: diagnostic = (request->output_format == WWW_SOURCE); /* set global flag */
1.1 timbl 843:
844: if (TRACE) fprintf(stderr, "HTNews: Looking for %s\n", arg);
845:
846: if (!initialized) initialized = initialize();
847: if (!initialized) return -1; /* FAIL */
848:
849: {
850: CONST char * p1=arg;
851:
852: /* We will ask for the document, omitting the host name & anchor.
853: **
854: ** Syntax of address is
855: ** xxx@yyy Article
856: ** <xxx@yyy> Same article
857: ** xxxxx News group (no "@")
858: ** group/n1-n2 Articles n1 to n2 in group
859: */
860: group_wanted = (strchr(arg, '@')==0) && (strchr(arg, '*')==0);
861: list_wanted = (strchr(arg, '@')==0) && (strchr(arg, '*')!=0);
862:
863: /* p1 = HTParse(arg, "", PARSE_PATH | PARSE_PUNCTUATION); */
864: /* Don't use HTParse because news: access doesn't follow traditional
865: rules. For instance, if the article reference contains a '#',
866: the rest of it is lost -- JFG 10/7/92, from a bug report */
867: if (!strncasecomp (arg, "news:", 5))
868: p1 = arg + 5; /* Skip "news:" prefix */
869: if (list_wanted) {
870: strcpy(command, "LIST ");
871: } else if (group_wanted) {
872: char * slash = strchr(p1, '/');
873: strcpy(command, "GROUP ");
874: first = 0;
875: last = 0;
876: if (slash) {
877: *slash = 0;
878: strcpy(groupName, p1);
879: *slash = '/';
880: (void) sscanf(slash+1, "%d-%d", &first, &last);
881: } else {
882: strcpy(groupName, p1);
883: }
884: strcat(command, groupName);
885: } else {
886: strcpy(command, "ARTICLE ");
887: if (strchr(p1, '<')==0) strcat(command,"<");
888: strcat(command, p1);
889: if (strchr(p1, '>')==0) strcat(command,">");
890: }
891:
1.3 timbl 892: {
893: char * p = command + strlen(command);
894: *p++ = CR; /* Macros to be correct on Mac */
895: *p++ = LF;
896: *p++ = 0;
897: /* strcat(command, "\r\n"); */ /* CR LF, as in rfc 977 */
898: }
1.1 timbl 899: } /* scope of p1 */
900:
901: if (!*arg) return NO; /* Ignore if no name */
902:
903:
904: /* Make a hypertext object with an anchor list.
905: */
2.10 ! timbl 906: node_anchor = request->anchor;
! 907: target = HTML_new(request->anchor, request->output_format, request->output_stream);
1.2 timbl 908: targetClass = *target->isa; /* Copy routine entry points */
909:
1.1 timbl 910:
911: /* Now, let's get a stream setup up from the NewsHost:
912: */
913: for(retries=0;retries<2; retries++){
914:
915: if (s<0) {
916: NEWS_PROGRESS("Connecting to NewsHost ...");
917: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
918: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
919: if (status<0){
920: char message[256];
921: NETCLOSE(s);
922: s = -1;
923: if (TRACE) fprintf(stderr, "HTNews: Unable to connect to news host.\n");
924: /* if (retries<=1) continue; WHY TRY AGAIN ? */
925: sprintf(message,
926: "\nCould not access %s.\n\n (Check default WorldWideWeb NewsHost ?)\n",
1.2 timbl 927: HTNewsHost);
2.10 ! timbl 928: return HTLoadError(request->output_stream, 500, message);
1.1 timbl 929: } else {
930: if (TRACE) fprintf(stderr, "HTNews: Connected to news host %s.\n",
1.2 timbl 931: HTNewsHost);
1.1 timbl 932: HTInitInput(s); /* set up buffering */
933: if ((response(NULL) / 100) !=2) {
2.8 timbl 934: char message[BIG];
1.1 timbl 935: NETCLOSE(s);
936: s = -1;
2.8 timbl 937: sprintf(message,
938: "Can't read news info. News host %.20s responded: %.200s",
939: HTNewsHost, response_text);
2.10 ! timbl 940: return HTLoadError(request->output_stream, 500, message);
1.1 timbl 941: }
942: }
943: } /* If needed opening */
944:
1.2 timbl 945: /* @@@@@@@@@@@@@@Tell user something's happening */
946:
1.1 timbl 947: status = response(command);
948: if (status<0) break;
949: if ((status/ 100) !=2) {
2.8 timbl 950: HTProgress(response_text);
1.1 timbl 951: /* NXRunAlertPanel("News access", response_text,
952: NULL,NULL,NULL);
953: */
954: NETCLOSE(s);
955: s = -1;
956: /* return HT; -- no:the message might be "Timeout-disconnected" left over */
957: continue; /* Try again */
958: }
959:
960: /* Load a group, article, etc
961: */
1.2 timbl 962:
1.1 timbl 963:
964: if (list_wanted) read_list();
965: else if (group_wanted) read_group(groupName, first, last);
966: else read_article();
967:
2.6 timbl 968: (*targetClass.free)(target);
1.2 timbl 969: return HT_LOADED;
1.1 timbl 970:
971: } /* Retry loop */
972:
1.2 timbl 973:
2.8 timbl 974: /* HTAlert("Sorry, could not load requested news.\n"); */
975:
1.1 timbl 976: /* NXRunAlertPanel(NULL, "Sorry, could not load `%s'.",
977: NULL,NULL,NULL, arg);No -- message earlier wil have covered it */
978:
1.2 timbl 979: return HT_LOADED;
1.1 timbl 980: }
981:
2.10 ! timbl 982: GLOBALDEF PUBLIC HTProtocol HTNews = { "news", HTLoadNews, NULL, NULL};
Webmaster