Annotation of libwww/Library/src/HTNews.c, revision 2.31
2.26 frystyk 1: /* HTNews.c
2: ** NEWS ACCESS
3: **
2.29 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.26 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
7: ** History:
8: ** 26 Sep 90 Written TBL
9: ** 29 Nov 91 Downgraded to C, for portable implementation.
2.19 luotonen 10: ** 16 Feb 94 AL Added Lou Montulli's Lynx & LIST NEWSGROUPS diffs.
11: ** 2 May 94 AL Added HTUnEscape() to HTLoadNews(), and
12: ** fixed a possible security hole when the URL contains
13: ** a newline, that could cause multiple commands to be
14: ** sent to an NNTP server.
2.23 duns 15: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.1 timbl 16: */
2.27 roeber 17:
2.28 frystyk 18: #include "tcp.h"
19: #include "HTUtils.h" /* Coding convention macros */
20: #include "HTString.h"
21: #include "HTML.h"
22: #include "HTParse.h"
23: #include "HTFormat.h"
24: #include "HTAlert.h"
2.30 frystyk 25: #include "HTSocket.h"
2.28 frystyk 26: #include "HTError.h"
27: #include "HTNews.h" /* Implemented here */
1.3 timbl 28:
1.1 timbl 29: #define NEWS_PORT 119 /* See rfc977 */
30: #define APPEND /* Use append methods */
31: #define MAX_CHUNK 40 /* Largest number of articles in one window */
32: #define CHUNK_SIZE 20 /* Number of articles for quick display */
33:
34: #ifndef DEFAULT_NEWS_HOST
35: #define DEFAULT_NEWS_HOST "news"
36: #endif
37: #ifndef SERVER_FILE
38: #define SERVER_FILE "/usr/local/lib/rn/server"
39: #endif
40:
2.8 timbl 41: #define BIG 1024 /* @@@ */
42:
1.2 timbl 43: struct _HTStructured {
44: CONST HTStructuredClass * isa;
45: /* ... */
46: };
47:
2.7 timbl 48: #define NEWS_PROGRESS(foo) HTProgress(foo)
1.1 timbl 49:
50:
2.12 timbl 51: #define NEXT_CHAR HTInputSocket_getCharacter(isoc)
1.1 timbl 52: #define LINE_LENGTH 512 /* Maximum length of line of ARTICLE etc */
53: #define GROUP_NAME_LENGTH 256 /* Maximum length of group name */
54:
55:
56: /* Module-wide variables
57: */
1.2 timbl 58: PUBLIC char * HTNewsHost;
1.1 timbl 59: PRIVATE struct sockaddr_in soc_address; /* Binary network address */
2.28 frystyk 60: PRIVATE SOCKFD s; /* Socket for NewsHost */
1.1 timbl 61: PRIVATE char response_text[LINE_LENGTH+1]; /* Last response */
1.2 timbl 62: PRIVATE HTStructured * target; /* The output sink */
63: PRIVATE HTStructuredClass targetClass; /* Copy of fn addresses */
1.1 timbl 64: PRIVATE HTParentAnchor *node_anchor; /* Its anchor */
65: PRIVATE int diagnostic; /* level: 0=none 2=source */
66:
1.2 timbl 67:
68: #define PUTC(c) (*targetClass.put_character)(target, c)
69: #define PUTS(s) (*targetClass.put_string)(target, s)
70: #define START(e) (*targetClass.start_element)(target, e, 0, 0)
71: #define END(e) (*targetClass.end_element)(target, e)
72:
2.11 timbl 73: PUBLIC HTInputSocket *isoc; /* @@@ non-reentrant */
74:
1.2 timbl 75: PUBLIC CONST char * HTGetNewsHost NOARGS
76: {
77: return HTNewsHost;
78: }
1.1 timbl 79:
1.2 timbl 80: PUBLIC void HTSetNewsHost ARGS1(CONST char *, value)
81: {
82: StrAllocCopy(HTNewsHost, value);
83: }
1.1 timbl 84:
85: /* Initialisation for this module
86: ** ------------------------------
87: **
88: ** Except on the NeXT, we pick up the NewsHost name from
89: **
90: ** 1. Environment variable NNTPSERVER
91: ** 2. File SERVER_FILE
92: ** 3. Compilation time macro DEFAULT_NEWS_HOST
93: **
94: ** On the NeXT, we pick up the NewsHost name from, in order:
95: **
96: ** 1. WorldWideWeb default "NewsHost"
97: ** 2. Global default "NewsHost"
98: ** 3. News default "NewsHost"
99: ** 4. Compilation time macro DEFAULT_NEWS_HOST
100: */
101: PRIVATE BOOL initialized = NO;
102: PRIVATE BOOL initialize NOARGS
103: {
104: CONST struct hostent *phost; /* Pointer to host - See netdb.h */
105: struct sockaddr_in* sin = &soc_address;
106:
107:
108: /* Set up defaults:
109: */
110: sin->sin_family = AF_INET; /* Family = internet, host order */
111: sin->sin_port = htons(NEWS_PORT); /* Default: new port, */
112:
113: /* Get name of Host
114: */
2.28 frystyk 115: #ifdef NeXTStep
1.2 timbl 116: if ((HTNewsHost = NXGetDefaultValue("WorldWideWeb","NewsHost"))==0)
117: if ((HTNewsHost = NXGetDefaultValue("News","NewsHost")) == 0)
118: HTNewsHost = DEFAULT_NEWS_HOST;
1.1 timbl 119: #else
120: if (getenv("NNTPSERVER")) {
1.2 timbl 121: StrAllocCopy(HTNewsHost, (char *)getenv("NNTPSERVER"));
2.28 frystyk 122: if (TRACE) fprintf(TDEST, "HTNews: NNTPSERVER defined as `%s'\n",
1.2 timbl 123: HTNewsHost);
1.1 timbl 124: } else {
125: char server_name[256];
126: FILE* fp = fopen(SERVER_FILE, "r");
127: if (fp) {
128: if (fscanf(fp, "%s", server_name)==1) {
1.2 timbl 129: StrAllocCopy(HTNewsHost, server_name);
2.28 frystyk 130: if (TRACE) fprintf(TDEST,
1.1 timbl 131: "HTNews: File %s defines news host as `%s'\n",
1.2 timbl 132: SERVER_FILE, HTNewsHost);
1.1 timbl 133: }
134: fclose(fp);
135: }
136: }
1.2 timbl 137: if (!HTNewsHost) HTNewsHost = DEFAULT_NEWS_HOST;
1.1 timbl 138: #endif
139:
1.2 timbl 140: if (*HTNewsHost>='0' && *HTNewsHost<='9') { /* Numeric node address: */
141: sin->sin_addr.s_addr = inet_addr((char *)HTNewsHost); /* See arpa/inet.h */
1.1 timbl 142:
143: } else { /* Alphanumeric node name: */
1.2 timbl 144: phost=gethostbyname((char*)HTNewsHost); /* See netdb.h */
1.1 timbl 145: if (!phost) {
2.7 timbl 146: char message[150]; /* @@@ */
147: sprintf(message,
148: "HTNews: Can't find news host `%s'.\n%s",HTNewsHost,
149: "Please define your NNTP server");
150: HTAlert(message);
2.28 frystyk 151: if (PROT_TRACE)
152: fprintf(TDEST, "HTNews: Can't find news host `%s'.\n",HTNewsHost);
1.1 timbl 153: return NO; /* Fail */
154: }
155: memcpy(&sin->sin_addr, phost->h_addr, phost->h_length);
156: }
157:
2.28 frystyk 158: if (TRACE) fprintf(TDEST,
1.1 timbl 159: "HTNews: Parsed address as port %4x, inet %d.%d.%d.%d\n",
160: (unsigned int)ntohs(sin->sin_port),
161: (int)*((unsigned char *)(&sin->sin_addr)+0),
162: (int)*((unsigned char *)(&sin->sin_addr)+1),
163: (int)*((unsigned char *)(&sin->sin_addr)+2),
164: (int)*((unsigned char *)(&sin->sin_addr)+3));
165:
2.28 frystyk 166: s = INVSOC; /* Disconnected */
1.1 timbl 167:
168: return YES;
169: }
170:
171:
172:
173: /* Send NNTP Command line to remote host & Check Response
174: ** ------------------------------------------------------
175: **
176: ** On entry,
177: ** command points to the command to be sent, including CRLF, or is null
178: ** pointer if no command to be sent.
179: ** On exit,
180: ** Negative status indicates transmission error, socket closed.
181: ** Positive status is an NNTP status.
182: */
183:
184:
185: PRIVATE int response ARGS1(CONST char *,command)
186: {
187: int result;
188: char * p = response_text;
189: if (command) {
190: int status;
191: int length = strlen(command);
2.28 frystyk 192: if (TRACE) fprintf(TDEST, "NNTP command to be sent: %s", command);
1.1 timbl 193: #ifdef NOT_ASCII
194: {
195: CONST char * p;
196: char * q;
197: char ascii[LINE_LENGTH+1];
198: for(p = command, q=ascii; *p; p++, q++) {
199: *q = TOASCII(*p);
200: }
201: status = NETWRITE(s, ascii, length);
202: }
203: #else
204: status = NETWRITE(s, command, length);
205: #endif
206: if (status<0){
2.28 frystyk 207: if (TRACE) fprintf(TDEST,
1.1 timbl 208: "HTNews: Unable to send command. Disconnecting.\n");
209: NETCLOSE(s);
2.11 timbl 210: HTInputSocket_free(isoc);
2.28 frystyk 211: s = INVSOC;
1.1 timbl 212: return status;
213: } /* if bad status */
214: } /* if command to be sent */
215:
216: for(;;) {
1.3 timbl 217: if (((*p++=NEXT_CHAR) == LF)
218: || (p == &response_text[LINE_LENGTH])) {
1.1 timbl 219: *p++=0; /* Terminate the string */
2.28 frystyk 220: if (TRACE) fprintf(TDEST, "NNTP Response: %s\n", response_text);
1.1 timbl 221: sscanf(response_text, "%d", &result);
2.19 luotonen 222: if (result >= 411 && result <= 430) { /* no such article/group */
223: char * msg = strchr(response_text,' ');
224: if (!msg) msg = response_text;
225: PUTS("<H1>News error</H1>\n");
226: PUTS(msg);
2.28 frystyk 227: if (PROT_TRACE)
228: fprintf(TDEST, "News error.. %s", response_text);
2.19 luotonen 229: }
1.1 timbl 230: return result;
231: } /* if end of line */
232:
233: if (*(p-1) < 0) {
2.28 frystyk 234: if (TRACE) fprintf(TDEST,
1.1 timbl 235: "HTNews: EOF on read, closing socket %d\n", s);
236: NETCLOSE(s); /* End of file, close socket */
2.11 timbl 237: HTInputSocket_free(isoc);
2.28 frystyk 238: return s = INVSOC; /* End of file on response */
1.1 timbl 239: }
240: } /* Loop over characters */
241: }
242:
243:
244: /* Case insensitive string comparisons
245: ** -----------------------------------
246: **
247: ** On entry,
248: ** template must be already un upper case.
249: ** unknown may be in upper or lower or mixed case to match.
250: */
2.24 frystyk 251: PRIVATE BOOL match ARGS2 (CONST char *,unknown, CONST char *,tmplate)
1.1 timbl 252: {
253: CONST char * u = unknown;
2.24 frystyk 254: CONST char * t = tmplate;
1.1 timbl 255: for (;*u && *t && (TOUPPER(*u)==*t); u++, t++) /* Find mismatch or end */ ;
256: return (BOOL)(*t==0); /* OK if end of template */
257: }
258:
259: /* Find Author's name in mail address
260: ** ----------------------------------
261: **
262: ** On exit,
263: ** THE EMAIL ADDRESS IS CORRUPTED
264: **
265: ** For example, returns "Tim Berners-Lee" if given any of
2.31 ! frystyk 266: ** " Tim Berners-Lee <tim@w3.org> "
! 267: ** or " tim@w3.org ( Tim Berners-Lee ) "
1.1 timbl 268: */
269: PRIVATE char * author_name ARGS1 (char *,email)
270: {
271: char *s, *e;
272:
273: if ((s=strchr(email,'(')) && (e=strchr(email, ')')))
274: if (e>s) {
275: *e=0; /* Chop off everything after the ')' */
276: return HTStrip(s+1); /* Remove leading and trailing spaces */
277: }
278:
279: if ((s=strchr(email,'<')) && (e=strchr(email, '>')))
280: if (e>s) {
281: strcpy(s, e+1); /* Remove <...> */
282: return HTStrip(email); /* Remove leading and trailing spaces */
283: }
284:
285: return HTStrip(email); /* Default to the whole thing */
286:
287: }
288:
1.2 timbl 289: /* Start anchor element
290: ** --------------------
291: */
292: PRIVATE void start_anchor ARGS1(CONST char *, href)
293: {
294: BOOL present[HTML_A_ATTRIBUTES];
295: CONST char* value[HTML_A_ATTRIBUTES];
296:
297: {
298: int i;
299: for(i=0; i<HTML_A_ATTRIBUTES; i++)
300: present[i] = (i==HTML_A_HREF);
301: }
302: value[HTML_A_HREF] = href;
303: (*targetClass.start_element)(target, HTML_A , present, value);
304:
305: }
1.1 timbl 306:
2.16 luotonen 307:
308: /* Start link element
309: ** --------------------
310: */
311: PRIVATE void start_link ARGS2(CONST char *, href, CONST char *, rev)
312: {
313: #ifdef WHEN_WE_HAVE_HTMLPLUS
314:
315: BOOL present[HTML_LINK_ATTRIBUTES];
316: CONST char* value[HTML_LINK_ATTRIBUTES];
317:
318: {
319: int i;
320: for(i=0; i<HTML_LINK_ATTRIBUTES; i++)
321: present[i] = (i==HTML_LINK_HREF || i==HTML_LINK_REV);
322: }
323: value[HTML_LINK_HREF] = href;
324: value[HTML_LINK_REV] = rev;
325: (*targetClass.start_element)(target, HTML_LINK , present, value);
326:
327: #endif
328: }
329:
330:
331:
332:
1.1 timbl 333: /* Paste in an Anchor
334: ** ------------------
335: **
336: **
337: ** On entry,
338: ** HT has a selection of zero length at the end.
339: ** text points to the text to be put into the file, 0 terminated.
340: ** addr points to the hypertext refernce address,
341: ** terminated by white space, comma, NULL or '>'
342: */
343: PRIVATE void write_anchor ARGS2(CONST char *,text, CONST char *,addr)
344: {
345: char href[LINE_LENGTH+1];
346:
347: {
348: CONST char * p;
349: strcpy(href,"news:");
350: for(p=addr; *p && (*p!='>') && !WHITE(*p) && (*p!=','); p++);
351: strncat(href, addr, p-addr); /* Make complete hypertext reference */
352: }
353:
1.2 timbl 354: start_anchor(href);
355: PUTS(text);
356: END(HTML_A);
1.1 timbl 357: }
358:
359:
360: /* Write list of anchors
361: ** ---------------------
362: **
363: ** We take a pointer to a list of objects, and write out each,
364: ** generating an anchor for each.
365: **
366: ** On entry,
367: ** HT has a selection of zero length at the end.
368: ** text points to a comma or space separated list of addresses.
369: ** On exit,
370: ** *text is NOT any more chopped up into substrings.
371: */
372: PRIVATE void write_anchors ARGS1 (char *,text)
373: {
374: char * start = text;
375: char * end;
376: char c;
377: for (;;) {
378: for(;*start && (WHITE(*start)); start++); /* Find start */
379: if (!*start) return; /* (Done) */
380: for(end=start; *end && (*end!=' ') && (*end!=','); end++);/* Find end */
381: if (*end) end++; /* Include comma or space but not NULL */
382: c = *end;
383: *end = 0;
384: write_anchor(start, start);
2.16 luotonen 385: START(HTML_BR);
1.1 timbl 386: *end = c;
387: start = end; /* Point to next one */
388: }
389: }
390:
391: /* Abort the connection abort_socket
392: ** --------------------
393: */
394: PRIVATE void abort_socket NOARGS
395: {
2.28 frystyk 396: if (TRACE) fprintf(TDEST,
1.1 timbl 397: "HTNews: EOF on read, closing socket %d\n", s);
398: NETCLOSE(s); /* End of file, close socket */
2.11 timbl 399: HTInputSocket_free(isoc);
1.2 timbl 400: PUTS("Network Error: connection lost");
401: PUTC('\n');
2.28 frystyk 402: s = INVSOC; /* End of file on response */
1.1 timbl 403: return;
404: }
405:
406: /* Read in an Article read_article
407: ** ------------------
408: **
409: **
410: ** Note the termination condition of a single dot on a line by itself.
411: ** RFC 977 specifies that the line "folding" of RFC850 is not used, so we
412: ** do not handle it here.
413: **
414: ** On entry,
415: ** s Global socket number is OK
416: ** HT Global hypertext object is ready for appending text
417: */
418: PRIVATE void read_article NOARGS
419: {
420:
421: char line[LINE_LENGTH+1];
422: char *references=NULL; /* Hrefs for other articles */
423: char *newsgroups=NULL; /* Newsgroups list */
424: char *p = line;
425: BOOL done = NO;
426:
427: /* Read in the HEADer of the article:
428: **
429: ** The header fields are either ignored, or formatted and put into the
430: ** Text.
431: */
432: if (!diagnostic) {
1.2 timbl 433: (*targetClass.start_element)(target, HTML_ADDRESS, 0, 0);
1.1 timbl 434: while(!done){
435: char ch = *p++ = NEXT_CHAR;
436: if (ch==(char)EOF) {
437: abort_socket(); /* End of file, close socket */
438: return; /* End of file on response */
439: }
1.3 timbl 440: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 441: *--p=0; /* Terminate the string */
2.28 frystyk 442: if (TRACE) fprintf(TDEST, "H %s\n", line);
1.1 timbl 443:
444: if (line[0]=='.') {
445: if (line[1]<' ') { /* End of article? */
446: done = YES;
447: break;
448: }
449:
450: } else if (line[0]<' ') {
451: break; /* End of Header? */
2.16 luotonen 452:
1.1 timbl 453: } else if (match(line, "SUBJECT:")) {
1.2 timbl 454: END(HTML_ADDRESS);
455: START(HTML_TITLE); /** Uuugh! @@@ */
2.16 luotonen 456: PUTS(line+9);
457: END(HTML_TITLE);
458: START(HTML_H1);
1.2 timbl 459: PUTS(line+8);
2.16 luotonen 460: END(HTML_H1);
1.2 timbl 461: START(HTML_ADDRESS);
2.16 luotonen 462:
1.1 timbl 463: } else if (match(line, "DATE:")
464: || match(line, "ORGANIZATION:")) {
2.16 luotonen 465: PUTS(strchr(line,':')+2);
466: START(HTML_BR);
467:
468: } else if(match(line, "FROM:")) {
469: char * temp=0;
470: char * href=0;
471: char *cp1, *cp2;
472:
473: /* copy into temporary storage */
474: StrAllocCopy(temp, strchr(line,':')+1);
475:
476: cp1=temp;
477: while(isspace(*cp1)) cp1++;
478: /* remove space and stuff after */
479: if((cp2 = strchr(cp1,' ')) != NULL)
480: *cp2 = '\0';
481:
482: StrAllocCopy(href,"mailto:");
483: StrAllocCat(href,cp1);
484:
485: start_anchor(href);
486: PUTS("Reply to ");
487: PUTS(strchr(line,':')+1);
488: END(HTML_A);
489: START(HTML_BR);
490:
491: /* put in the owner as a link rel. as well */
492: start_link(href, "made");
493:
494: /* free of temp vars */
495: free(temp);
496: free(href);
497:
1.1 timbl 498: } else if (match(line, "NEWSGROUPS:")) {
499: StrAllocCopy(newsgroups, HTStrip(strchr(line,':')+1));
500:
501: } else if (match(line, "REFERENCES:")) {
502: StrAllocCopy(references, HTStrip(strchr(line,':')+1));
503:
504: } /* end if match */
505: p = line; /* Restart at beginning */
506: } /* if end of line */
507: } /* Loop over characters */
2.16 luotonen 508: END(HTML_ADDRESS);
1.1 timbl 509:
1.2 timbl 510: if (newsgroups || references) {
2.16 luotonen 511: START(HTML_DL);
1.2 timbl 512: if (newsgroups) {
2.16 luotonen 513: #ifdef POSTING
514: char *href=0;
515: #endif
516:
1.2 timbl 517: (*targetClass.start_element)(target, HTML_DT , 0, 0);
518: PUTS("Newsgroups:");
519: (*targetClass.start_element)(target, HTML_DD , 0, 0);
520: write_anchors(newsgroups);
2.16 luotonen 521:
522: #ifdef POSTING
523: /* make posting possible */
524: StrAllocCopy(href,"newspost:");
525: StrAllocCat(href,newsgroups);
526: START(HTML_DT);
527: start_anchor(href);
528: PUTS("Reply to newsgroup(s)");
529: END(HTML_A);
530: #endif
531:
1.2 timbl 532: free(newsgroups);
533: }
534:
535: if (references) {
536: (*targetClass.start_element)(target, HTML_DT , 0, 0);
537: PUTS("References:");
538: (*targetClass.start_element)(target, HTML_DD , 0, 0);
539: write_anchors(references);
540: free(references);
541: }
2.16 luotonen 542: #ifdef WHEN_WE_HAVE_HTMLPLUS
543: (*targetClass.end_element)(target, HTML_DLC);
544: #else
2.10 timbl 545: (*targetClass.end_element)(target, HTML_DL);
2.16 luotonen 546: #endif
1.1 timbl 547: }
1.2 timbl 548: PUTS("\n\n\n");
1.1 timbl 549:
550: }
551:
552: /* Read in the BODY of the Article:
553: */
1.2 timbl 554: (*targetClass.start_element)(target, HTML_PRE , 0, 0);
555:
1.1 timbl 556: p = line;
557: while(!done){
558: char ch = *p++ = NEXT_CHAR;
559: if (ch==(char)EOF) {
560: abort_socket(); /* End of file, close socket */
561: return; /* End of file on response */
562: }
1.3 timbl 563: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 564: *p++=0; /* Terminate the string */
2.28 frystyk 565: if (TRACE) fprintf(TDEST, "B %s", line);
1.1 timbl 566: if (line[0]=='.') {
567: if (line[1]<' ') { /* End of article? */
568: done = YES;
569: break;
570: } else { /* Line starts with dot */
1.2 timbl 571: PUTS(&line[1]); /* Ignore first dot */
1.1 timbl 572: }
573: } else {
574:
575: /* Normal lines are scanned for buried references to other articles.
576: ** Unfortunately, it will pick up mail addresses as well!
577: */
578: char *l = line;
579: char * p;
2.14 luotonen 580: while ((p=strchr(l, '<'))) {
1.1 timbl 581: char *q = strchr(p,'>');
582: char *at = strchr(p, '@');
583: if (q && at && at<q) {
584: char c = q[1];
585: q[1] = 0; /* chop up */
586: *p = 0;
1.2 timbl 587: PUTS(l);
1.1 timbl 588: *p = '<'; /* again */
589: *q = 0;
1.2 timbl 590: start_anchor(p+1);
1.1 timbl 591: *q = '>'; /* again */
1.2 timbl 592: PUTS(p);
593: (*targetClass.end_element)(target, HTML_A);
1.1 timbl 594: q[1] = c; /* again */
595: l=q+1;
596: } else break; /* line has unmatched <> */
597: }
1.2 timbl 598: PUTS( l); /* Last bit of the line */
1.1 timbl 599: } /* if not dot */
600: p = line; /* Restart at beginning */
601: } /* if end of line */
602: } /* Loop over characters */
1.2 timbl 603:
604: (*targetClass.end_element)(target, HTML_PRE);
1.1 timbl 605: }
606:
607:
608: /* Read in a List of Newsgroups
609: ** ----------------------------
610: */
611: /*
612: ** Note the termination condition of a single dot on a line by itself.
613: ** RFC 977 specifies that the line "folding" of RFC850 is not used, so we
614: ** do not handle it here.
615: */
616: PRIVATE void read_list NOARGS
617: {
618:
619: char line[LINE_LENGTH+1];
620: char *p;
621: BOOL done = NO;
622:
623: /* Read in the HEADer of the article:
624: **
625: ** The header fields are either ignored, or formatted and put into the
626: ** Text.
627: */
1.2 timbl 628: (*targetClass.start_element)(target, HTML_H1 , 0, 0);
629: PUTS( "Newsgroups");
630: (*targetClass.end_element)(target, HTML_PRE);
1.1 timbl 631: p = line;
2.16 luotonen 632: (*targetClass.start_element)(target, HTML_DL, 0, 0);
1.1 timbl 633: while(!done){
634: char ch = *p++ = NEXT_CHAR;
635: if (ch==(char)EOF) {
636: abort_socket(); /* End of file, close socket */
637: return; /* End of file on response */
638: }
1.3 timbl 639: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 640: *p++=0; /* Terminate the string */
2.28 frystyk 641: if (TRACE) fprintf(TDEST, "B %s", line);
2.16 luotonen 642: (*targetClass.start_element)(target, HTML_DT , 0, 0);
1.1 timbl 643: if (line[0]=='.') {
644: if (line[1]<' ') { /* End of article? */
645: done = YES;
646: break;
647: } else { /* Line starts with dot */
1.2 timbl 648: PUTS( &line[1]);
1.1 timbl 649: }
650: } else {
651:
652: /* Normal lines are scanned for references to newsgroups.
653: */
2.16 luotonen 654: int i=0;
655:
656: /* find whitespace if it exits */
657: for(; line[i] != '\0' && !WHITE(line[i]); i++)
658: ; /* null body */
659:
660: if(line[i] != '\0') {
661: line[i] = '\0';
662: write_anchor(line, line);
663: (*targetClass.start_element)(target, HTML_DD , 0, 0);
664: PUTS(&line[i+1]); /* put description */
665: } else {
666: write_anchor(line, line);
667: }
668:
669: #ifdef OLD_CODE
1.1 timbl 670: char group[LINE_LENGTH];
671: int first, last;
672: char postable;
673: if (sscanf(line, "%s %d %d %c", group, &first, &last, &postable)==4)
674: write_anchor(line, group);
675: else
1.2 timbl 676: PUTS(line);
2.16 luotonen 677: #endif /*OLD_CODE*/
678:
1.1 timbl 679: } /* if not dot */
680: p = line; /* Restart at beginning */
681: } /* if end of line */
682: } /* Loop over characters */
2.16 luotonen 683: (*targetClass.end_element)(target, HTML_DL);
1.1 timbl 684: }
685:
686:
687: /* Read in a Newsgroup
688: ** -------------------
689: ** Unfortunately, we have to ask for each article one by one if we
690: ** want more than one field.
691: **
692: */
693: PRIVATE void read_group ARGS3(
694: CONST char *,groupName,
695: int,first_required,
696: int,last_required
697: )
698: {
699: char line[LINE_LENGTH+1];
700: char author[LINE_LENGTH+1];
701: char subject[LINE_LENGTH+1];
702: char *p;
703: BOOL done;
704:
705: char buffer[LINE_LENGTH];
706: char *reference=0; /* Href for article */
707: int art; /* Article number WITHIN GROUP */
708: int status, count, first, last; /* Response fields */
709: /* count is only an upper limit */
710:
711: sscanf(response_text, " %d %d %d %d", &status, &count, &first, &last);
2.17 frystyk 712: if(TRACE)
2.28 frystyk 713: fprintf(TDEST,
2.17 frystyk 714: "Newsgroup status=%d, count=%d, (%d-%d) required:(%d-%d)\n",
715: status, count, first, last, first_required, last_required);
1.1 timbl 716: if (last==0) {
1.2 timbl 717: PUTS( "\nNo articles in this group.\n");
2.16 luotonen 718: #ifdef POSTING
719: goto add_post;
720: #endif
1.1 timbl 721: return;
722: }
723:
724: #define FAST_THRESHOLD 100 /* Above this, read IDs fast */
725: #define CHOP_THRESHOLD 50 /* Above this, chop off the rest */
726:
727: if (first_required<first) first_required = first; /* clip */
728: if ((last_required==0) || (last_required > last)) last_required = last;
729:
730: if (last_required<=first_required) {
1.2 timbl 731: PUTS( "\nNo articles in this range.\n");
2.16 luotonen 732: #ifdef POSTING
733: goto add_post;
734: #endif
1.1 timbl 735: return;
736: }
737:
738: if (last_required-first_required+1 > MAX_CHUNK) { /* Trim this block */
739: first_required = last_required-CHUNK_SIZE+1;
740: }
2.28 frystyk 741: if (TRACE) fprintf (TDEST, " Chunk will be (%d-%d)\n",
2.16 luotonen 742: first_required, last_required);
1.1 timbl 743:
1.2 timbl 744: /* Set window title
745: */
746: sprintf(buffer, "Newsgroup %s, Articles %d-%d",
747: groupName, first_required, last_required);
748: START(HTML_TITLE);
749: PUTS(buffer);
750: END(HTML_TITLE);
751:
1.1 timbl 752: /* Link to earlier articles
753: */
754: if (first_required>first) {
755: int before; /* Start of one before */
756: if (first_required-MAX_CHUNK <= first) before = first;
757: else before = first_required-CHUNK_SIZE;
758: sprintf(buffer, "%s/%d-%d", groupName, before, first_required-1);
2.28 frystyk 759: if (TRACE) fprintf(TDEST, " Block before is %s\n", buffer);
1.2 timbl 760: PUTS( " (");
761: start_anchor(buffer);
762: PUTS("Earlier articles");
763: END(HTML_A);
764: PUTS( "...)\n");
1.1 timbl 765: }
766:
767: done = NO;
768:
769: /*#define USE_XHDR*/
770: #ifdef USE_XHDR
771: if (count>FAST_THRESHOLD) {
772: sprintf(buffer,
773: "\nThere are about %d articles currently available in %s, IDs as follows:\n\n",
774: count, groupName);
1.2 timbl 775: PUTS(buffer);
1.3 timbl 776: sprintf(buffer, "XHDR Message-ID %d-%d%c%c", first, last, CR, LF);
1.1 timbl 777: status = response(buffer);
778: if (status==221) {
779:
780: p = line;
781: while(!done){
782: char ch = *p++ = NEXT_CHAR;
783: if (ch==(char)EOF) {
784: abort_socket(); /* End of file, close socket */
785: return; /* End of file on response */
786: }
787: if ((ch == '\n') || (p == &line[LINE_LENGTH])) {
788: *p++=0; /* Terminate the string */
2.28 frystyk 789: if (TRACE) fprintf(TDEST, "X %s", line);
1.1 timbl 790: if (line[0]=='.') {
791: if (line[1]<' ') { /* End of article? */
792: done = YES;
793: break;
794: } else { /* Line starts with dot */
795: /* Ignore strange line */
796: }
797: } else {
798:
799: /* Normal lines are scanned for references to articles.
800: */
801: char * space = strchr(line, ' ');
802: if (space++)
803: write_anchor(space, space);
804: } /* if not dot */
805: p = line; /* Restart at beginning */
806: } /* if end of line */
807: } /* Loop over characters */
808:
809: /* leaving loop with "done" set */
810: } /* Good status */
811: };
812: #endif
813:
814: /* Read newsgroup using individual fields:
815: */
816: if (!done) {
817: if (first==first_required && last==last_required)
1.2 timbl 818: PUTS("\nAll available articles in ");
819: else PUTS( "\nArticles in ");
820: PUTS(groupName);
821: START(HTML_MENU);
1.1 timbl 822: for(art=first_required; art<=last_required; art++) {
823:
824: /*#define OVERLAP*/
825: #ifdef OVERLAP
826: /* With this code we try to keep the server running flat out by queuing just
827: ** one extra command ahead of time. We assume (1) that the server won't abort
828: ** if it gets input during output, and (2) that TCP buffering is enough for the
829: ** two commands. Both these assumptions seem very reasonable. However, we HAVE
830: ** had a hangup with a loaded server.
831: */
832: if (art==first_required) {
833: if (art==last_required) {
1.3 timbl 834: sprintf(buffer, "HEAD %d%c%c", art, CR, LF); /* Only one */
1.1 timbl 835: status = response(buffer);
836: } else { /* First of many */
1.3 timbl 837: sprintf(buffer, "HEAD %d%c%cHEAD %d%c%c",
838: art, CR, LF, art+1, CR, LF);
1.1 timbl 839: status = response(buffer);
840: }
841: } else if (art==last_required) { /* Last of many */
842: status = response(NULL);
843: } else { /* Middle of many */
1.3 timbl 844: sprintf(buffer, "HEAD %d%c%c", art+1, CR, LF);
1.1 timbl 845: status = response(buffer);
846: }
847:
848: #else /* NOT OVERLAP */
1.3 timbl 849: sprintf(buffer, "HEAD %d%c%c", art, CR, LF);
1.1 timbl 850: status = response(buffer);
851: #endif /* NOT OVERLAP */
852:
853: if (status == 221) { /* Head follows - parse it:*/
2.20 frystyk 854: int ch;
1.1 timbl 855: p = line; /* Write pointer */
856: done = NO;
857: while(!done){
2.20 frystyk 858: if ((ch = HTInputSocket_getCharacter(isoc)) < 0) {
1.1 timbl 859: abort_socket(); /* End of file, close socket */
860: return; /* End of file on response */
861: }
2.20 frystyk 862: *p++ = (unsigned char) ch;
1.3 timbl 863: if ((ch == LF)
1.1 timbl 864: || (p == &line[LINE_LENGTH]) ) {
865:
866: *--p=0; /* Terminate & chop LF*/
867: p = line; /* Restart at beginning */
2.28 frystyk 868: if (TRACE) fprintf(TDEST, "G %s\n", line);
1.1 timbl 869: switch(line[0]) {
870:
871: case '.':
872: done = (line[1]<' '); /* End of article? */
873: break;
874:
875: case 'S':
876: case 's':
877: if (match(line, "SUBJECT:"))
878: strcpy(subject, line+9);/* Save subject */
879: break;
880:
881: case 'M':
882: case 'm':
883: if (match(line, "MESSAGE-ID:")) {
884: char * addr = HTStrip(line+11) +1; /* Chop < */
885: addr[strlen(addr)-1]=0; /* Chop > */
886: StrAllocCopy(reference, addr);
887: }
888: break;
889:
890: case 'f':
891: case 'F':
892: if (match(line, "FROM:")) {
893: char * p;
894: strcpy(author,
895: author_name(strchr(line,':')+1));
2.17 frystyk 896: if (*author) { /* Not always there! */
897: p = author + strlen(author) - 1;
898: if (*p==LF) *p = 0; /* Chop off newline */
899: }
1.1 timbl 900: }
901: break;
902:
903: } /* end switch on first character */
904: } /* if end of line */
905: } /* Loop over characters */
906:
1.2 timbl 907: START(HTML_LI);
1.1 timbl 908: sprintf(buffer, "\"%s\" - %s", subject, author);
909: if (reference) {
910: write_anchor(buffer, reference);
911: free(reference);
912: reference=0;
913: } else {
1.2 timbl 914: PUTS(buffer);
1.1 timbl 915: }
916:
917:
1.2 timbl 918: /* indicate progress! @@@@@@
1.1 timbl 919: */
920:
921: } /* If good response */
922: } /* Loop over article */
923: } /* If read headers */
1.2 timbl 924: END(HTML_MENU);
925: START(HTML_P);
1.1 timbl 926:
927: /* Link to later articles
928: */
929: if (last_required<last) {
930: int after; /* End of article after */
931: after = last_required+CHUNK_SIZE;
932: if (after==last) sprintf(buffer, "news:%s", groupName); /* original group */
933: else sprintf(buffer, "news:%s/%d-%d", groupName, last_required+1, after);
2.28 frystyk 934: if (TRACE) fprintf(TDEST, " Block after is %s\n", buffer);
1.2 timbl 935: PUTS( "(");
936: start_anchor(buffer);
937: PUTS( "Later articles");
938: END(HTML_A);
939: PUTS( "...)\n");
1.1 timbl 940: }
2.16 luotonen 941:
2.19 luotonen 942: #ifdef POSTING
943: add_post:
944: #endif
2.16 luotonen 945: {
946: char *href=0;
947: START(HTML_HR);
948:
949: StrAllocCopy(href,"newspost:");
950: StrAllocCat(href,groupName);
951: start_anchor(href);
952: PUTS("Post to ");
953: PUTS(groupName);
954: END(HTML_A);
955:
956: free(href);
957: }
1.1 timbl 958:
959:
960: }
961:
962:
963: /* Load by name HTLoadNews
964: ** ============
965: */
2.13 timbl 966: PUBLIC int HTLoadNews ARGS1(HTRequest *, request)
1.1 timbl 967: {
2.19 luotonen 968: char * arg = HTAnchor_physical(request->anchor);
1.1 timbl 969: char command[257]; /* The whole command */
970: char groupName[GROUP_NAME_LENGTH]; /* Just the group name */
971: int status; /* tcp return */
972: int retries; /* A count of how hard we have tried */
973: BOOL group_wanted; /* Flag: group was asked for, not article */
974: BOOL list_wanted; /* Flag: group was asked for, not article */
975: int first, last; /* First and last articles asked for */
976:
2.10 timbl 977: diagnostic = (request->output_format == WWW_SOURCE); /* set global flag */
1.1 timbl 978:
2.28 frystyk 979: if (TRACE) fprintf(TDEST, "HTNews: Looking for %s\n", arg);
1.1 timbl 980:
981: if (!initialized) initialized = initialize();
982: if (!initialized) return -1; /* FAIL */
983:
984: {
2.19 luotonen 985: char * p1=arg;
1.1 timbl 986:
987: /* We will ask for the document, omitting the host name & anchor.
988: **
989: ** Syntax of address is
990: ** xxx@yyy Article
991: ** <xxx@yyy> Same article
992: ** xxxxx News group (no "@")
993: ** group/n1-n2 Articles n1 to n2 in group
994: */
995: group_wanted = (strchr(arg, '@')==0) && (strchr(arg, '*')==0);
996: list_wanted = (strchr(arg, '@')==0) && (strchr(arg, '*')!=0);
997:
998: /* p1 = HTParse(arg, "", PARSE_PATH | PARSE_PUNCTUATION); */
999: /* Don't use HTParse because news: access doesn't follow traditional
1000: rules. For instance, if the article reference contains a '#',
1001: the rest of it is lost -- JFG 10/7/92, from a bug report */
1002: if (!strncasecomp (arg, "news:", 5))
1003: p1 = arg + 5; /* Skip "news:" prefix */
2.19 luotonen 1004: HTUnEscape(p1); /* AL May 2, 1994 */
1005: HTCleanTelnetString(p1); /* Prevent security holes */
1.1 timbl 1006: if (list_wanted) {
2.16 luotonen 1007: strcpy(command, "LIST NEWSGROUPS");
1.1 timbl 1008: } else if (group_wanted) {
1009: char * slash = strchr(p1, '/');
1010: strcpy(command, "GROUP ");
1011: first = 0;
1012: last = 0;
1013: if (slash) {
1014: *slash = 0;
1015: strcpy(groupName, p1);
1016: *slash = '/';
1017: (void) sscanf(slash+1, "%d-%d", &first, &last);
1018: } else {
1019: strcpy(groupName, p1);
1020: }
1021: strcat(command, groupName);
1022: } else {
1023: strcpy(command, "ARTICLE ");
1024: if (strchr(p1, '<')==0) strcat(command,"<");
1025: strcat(command, p1);
1026: if (strchr(p1, '>')==0) strcat(command,">");
1027: }
1028:
1.3 timbl 1029: {
1030: char * p = command + strlen(command);
1031: *p++ = CR; /* Macros to be correct on Mac */
1032: *p++ = LF;
1033: *p++ = 0;
1034: /* strcat(command, "\r\n"); */ /* CR LF, as in rfc 977 */
1035: }
1.1 timbl 1036: } /* scope of p1 */
1037:
1038: if (!*arg) return NO; /* Ignore if no name */
1039:
1040:
1041: /* Make a hypertext object with an anchor list.
1042: */
2.10 timbl 1043: node_anchor = request->anchor;
2.11 timbl 1044: target = HTML_new(request, NULL, WWW_HTML,
1045: request->output_format, request->output_stream);
1.2 timbl 1046: targetClass = *target->isa; /* Copy routine entry points */
1047:
1.1 timbl 1048:
1049: /* Now, let's get a stream setup up from the NewsHost:
1050: */
1051: for(retries=0;retries<2; retries++){
1052:
1053: if (s<0) {
1054: NEWS_PROGRESS("Connecting to NewsHost ...");
1055: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
1056: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
1057: if (status<0){
1058: NETCLOSE(s);
2.28 frystyk 1059: s = INVSOC;
2.21 frystyk 1060: #ifdef OLD_CODE
1061: char message[256];
2.28 frystyk 1062: if (TRACE) fprintf(TDEST, "HTNews: Unable to connect to news host.\n");
1.1 timbl 1063: /* if (retries<=1) continue; WHY TRY AGAIN ? */
1064: sprintf(message,
1065: "\nCould not access %s.\n\n (Check default WorldWideWeb NewsHost ?)\n",
1.2 timbl 1066: HTNewsHost);
2.15 luotonen 1067: return HTLoadError(request, 500, message);
2.21 frystyk 1068: #endif /* OLD_CODE */
1069: {
1070: char *unescaped = NULL;
1071: StrAllocCopy(unescaped, arg);
1072: HTUnEscape(unescaped);
1073: HTErrorAdd(request, ERR_FATAL, NO, HTERR_INTERNAL,
1074: (void *) unescaped,
1075: (int) strlen(unescaped), "HTLoadNews");
1076: free(unescaped);
1077: return -1;
1078: }
1.1 timbl 1079: } else {
2.28 frystyk 1080: if (TRACE) fprintf(TDEST, "HTNews: Connected to news host %s.\n",
1.2 timbl 1081: HTNewsHost);
2.11 timbl 1082: isoc = HTInputSocket_new(s); /* set up buffering */
1.1 timbl 1083: if ((response(NULL) / 100) !=2) {
2.21 frystyk 1084: int length = strlen(response_text);
1085: NETCLOSE(s);
1086: HTInputSocket_free(isoc);
2.28 frystyk 1087: s = INVSOC;
2.21 frystyk 1088: HTErrorAdd(request, ERR_FATAL, NO, HTERR_NEWS_SERVER,
1089: (void *) response_text, length < 50 ?
1090: length : 50, "HTLoadNews");
1091: HTErrorAdd(request, ERR_FATAL, NO, HTERR_INTERNAL,
1092: (void *) HTNewsHost,
1093: (int) strlen(HTNewsHost), "HTLoadNews");
1094: return -1;
1095: }
1096: #ifdef OLD_CODE
2.8 timbl 1097: char message[BIG];
1098: sprintf(message,
1099: "Can't read news info. News host %.20s responded: %.200s",
1100: HTNewsHost, response_text);
2.15 luotonen 1101: return HTLoadError(request, 500, message);
2.21 frystyk 1102: }
1103: #endif /* OLD_CODE */
1.1 timbl 1104: }
1105: } /* If needed opening */
1106:
1.2 timbl 1107: /* @@@@@@@@@@@@@@Tell user something's happening */
1108:
1.1 timbl 1109: status = response(command);
1110: if (status<0) break;
2.19 luotonen 1111: if (status >= 411 && status <= 430) break; /* no such article/group */
1.1 timbl 1112: if ((status/ 100) !=2) {
2.8 timbl 1113: HTProgress(response_text);
1.1 timbl 1114: /* NXRunAlertPanel("News access", response_text,
1115: NULL,NULL,NULL);
1116: */
1117: NETCLOSE(s);
2.11 timbl 1118: HTInputSocket_free(isoc);
2.28 frystyk 1119: s = INVSOC;
1.1 timbl 1120: /* return HT; -- no:the message might be "Timeout-disconnected" left over */
1121: continue; /* Try again */
1122: }
1123:
1124: /* Load a group, article, etc
1125: */
1.2 timbl 1126:
1.1 timbl 1127:
1128: if (list_wanted) read_list();
1129: else if (group_wanted) read_group(groupName, first, last);
1130: else read_article();
1131:
2.23 duns 1132: (*targetClass._free)(target);
1.2 timbl 1133: return HT_LOADED;
1.1 timbl 1134:
1135: } /* Retry loop */
1136:
1.2 timbl 1137:
2.8 timbl 1138: /* HTAlert("Sorry, could not load requested news.\n"); */
1139:
1.1 timbl 1140: /* NXRunAlertPanel(NULL, "Sorry, could not load `%s'.",
1141: NULL,NULL,NULL, arg);No -- message earlier wil have covered it */
1142:
2.23 duns 1143: (*targetClass._free)(target); /* AL May 2, 1994 */
1.2 timbl 1144: return HT_LOADED;
1.1 timbl 1145: }
1146:
2.25 frystyk 1147: GLOBALDEF PUBLIC HTProtocol HTNews = {
1148: "news", SOC_BLOCK, HTLoadNews, NULL, NULL
1149: };
Webmaster