Annotation of libwww/Library/src/HTNews.c, revision 2.32
2.26 frystyk 1: /* HTNews.c
2: ** NEWS ACCESS
3: **
2.29 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.26 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
7: ** History:
8: ** 26 Sep 90 Written TBL
9: ** 29 Nov 91 Downgraded to C, for portable implementation.
2.19 luotonen 10: ** 16 Feb 94 AL Added Lou Montulli's Lynx & LIST NEWSGROUPS diffs.
11: ** 2 May 94 AL Added HTUnEscape() to HTLoadNews(), and
12: ** fixed a possible security hole when the URL contains
13: ** a newline, that could cause multiple commands to be
14: ** sent to an NNTP server.
2.23 duns 15: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.1 timbl 16: */
2.27 roeber 17:
2.28 frystyk 18: #include "tcp.h"
19: #include "HTUtils.h" /* Coding convention macros */
20: #include "HTString.h"
21: #include "HTML.h"
22: #include "HTParse.h"
23: #include "HTFormat.h"
24: #include "HTAlert.h"
2.30 frystyk 25: #include "HTSocket.h"
2.28 frystyk 26: #include "HTError.h"
27: #include "HTNews.h" /* Implemented here */
1.3 timbl 28:
1.1 timbl 29: #define NEWS_PORT 119 /* See rfc977 */
30: #define APPEND /* Use append methods */
31: #define MAX_CHUNK 40 /* Largest number of articles in one window */
32: #define CHUNK_SIZE 20 /* Number of articles for quick display */
33:
34: #ifndef DEFAULT_NEWS_HOST
35: #define DEFAULT_NEWS_HOST "news"
36: #endif
37: #ifndef SERVER_FILE
38: #define SERVER_FILE "/usr/local/lib/rn/server"
39: #endif
40:
2.8 timbl 41: #define BIG 1024 /* @@@ */
42:
1.2 timbl 43: struct _HTStructured {
44: CONST HTStructuredClass * isa;
45: /* ... */
46: };
47:
1.1 timbl 48:
2.12 timbl 49: #define NEXT_CHAR HTInputSocket_getCharacter(isoc)
1.1 timbl 50: #define LINE_LENGTH 512 /* Maximum length of line of ARTICLE etc */
51: #define GROUP_NAME_LENGTH 256 /* Maximum length of group name */
52:
53:
54: /* Module-wide variables
55: */
1.2 timbl 56: PUBLIC char * HTNewsHost;
1.1 timbl 57: PRIVATE struct sockaddr_in soc_address; /* Binary network address */
2.28 frystyk 58: PRIVATE SOCKFD s; /* Socket for NewsHost */
1.1 timbl 59: PRIVATE char response_text[LINE_LENGTH+1]; /* Last response */
1.2 timbl 60: PRIVATE HTStructured * target; /* The output sink */
61: PRIVATE HTStructuredClass targetClass; /* Copy of fn addresses */
1.1 timbl 62: PRIVATE HTParentAnchor *node_anchor; /* Its anchor */
63: PRIVATE int diagnostic; /* level: 0=none 2=source */
64:
1.2 timbl 65:
66: #define PUTC(c) (*targetClass.put_character)(target, c)
67: #define PUTS(s) (*targetClass.put_string)(target, s)
68: #define START(e) (*targetClass.start_element)(target, e, 0, 0)
69: #define END(e) (*targetClass.end_element)(target, e)
70:
2.11 timbl 71: PUBLIC HTInputSocket *isoc; /* @@@ non-reentrant */
72:
1.2 timbl 73: PUBLIC CONST char * HTGetNewsHost NOARGS
74: {
75: return HTNewsHost;
76: }
1.1 timbl 77:
1.2 timbl 78: PUBLIC void HTSetNewsHost ARGS1(CONST char *, value)
79: {
80: StrAllocCopy(HTNewsHost, value);
81: }
1.1 timbl 82:
83: /* Initialisation for this module
84: ** ------------------------------
85: **
86: ** Except on the NeXT, we pick up the NewsHost name from
87: **
88: ** 1. Environment variable NNTPSERVER
89: ** 2. File SERVER_FILE
90: ** 3. Compilation time macro DEFAULT_NEWS_HOST
91: **
92: ** On the NeXT, we pick up the NewsHost name from, in order:
93: **
94: ** 1. WorldWideWeb default "NewsHost"
95: ** 2. Global default "NewsHost"
96: ** 3. News default "NewsHost"
97: ** 4. Compilation time macro DEFAULT_NEWS_HOST
98: */
99: PRIVATE BOOL initialized = NO;
100: PRIVATE BOOL initialize NOARGS
101: {
102: CONST struct hostent *phost; /* Pointer to host - See netdb.h */
103: struct sockaddr_in* sin = &soc_address;
104:
105:
106: /* Set up defaults:
107: */
108: sin->sin_family = AF_INET; /* Family = internet, host order */
109: sin->sin_port = htons(NEWS_PORT); /* Default: new port, */
110:
111: /* Get name of Host
112: */
2.28 frystyk 113: #ifdef NeXTStep
1.2 timbl 114: if ((HTNewsHost = NXGetDefaultValue("WorldWideWeb","NewsHost"))==0)
115: if ((HTNewsHost = NXGetDefaultValue("News","NewsHost")) == 0)
116: HTNewsHost = DEFAULT_NEWS_HOST;
1.1 timbl 117: #else
118: if (getenv("NNTPSERVER")) {
1.2 timbl 119: StrAllocCopy(HTNewsHost, (char *)getenv("NNTPSERVER"));
2.28 frystyk 120: if (TRACE) fprintf(TDEST, "HTNews: NNTPSERVER defined as `%s'\n",
1.2 timbl 121: HTNewsHost);
1.1 timbl 122: } else {
123: char server_name[256];
124: FILE* fp = fopen(SERVER_FILE, "r");
125: if (fp) {
126: if (fscanf(fp, "%s", server_name)==1) {
1.2 timbl 127: StrAllocCopy(HTNewsHost, server_name);
2.28 frystyk 128: if (TRACE) fprintf(TDEST,
1.1 timbl 129: "HTNews: File %s defines news host as `%s'\n",
1.2 timbl 130: SERVER_FILE, HTNewsHost);
1.1 timbl 131: }
132: fclose(fp);
133: }
134: }
1.2 timbl 135: if (!HTNewsHost) HTNewsHost = DEFAULT_NEWS_HOST;
1.1 timbl 136: #endif
137:
1.2 timbl 138: if (*HTNewsHost>='0' && *HTNewsHost<='9') { /* Numeric node address: */
139: sin->sin_addr.s_addr = inet_addr((char *)HTNewsHost); /* See arpa/inet.h */
1.1 timbl 140:
141: } else { /* Alphanumeric node name: */
1.2 timbl 142: phost=gethostbyname((char*)HTNewsHost); /* See netdb.h */
1.1 timbl 143: if (!phost) {
2.7 timbl 144: char message[150]; /* @@@ */
145: sprintf(message,
146: "HTNews: Can't find news host `%s'.\n%s",HTNewsHost,
147: "Please define your NNTP server");
148: HTAlert(message);
2.28 frystyk 149: if (PROT_TRACE)
150: fprintf(TDEST, "HTNews: Can't find news host `%s'.\n",HTNewsHost);
1.1 timbl 151: return NO; /* Fail */
152: }
153: memcpy(&sin->sin_addr, phost->h_addr, phost->h_length);
154: }
155:
2.28 frystyk 156: if (TRACE) fprintf(TDEST,
1.1 timbl 157: "HTNews: Parsed address as port %4x, inet %d.%d.%d.%d\n",
158: (unsigned int)ntohs(sin->sin_port),
159: (int)*((unsigned char *)(&sin->sin_addr)+0),
160: (int)*((unsigned char *)(&sin->sin_addr)+1),
161: (int)*((unsigned char *)(&sin->sin_addr)+2),
162: (int)*((unsigned char *)(&sin->sin_addr)+3));
163:
2.28 frystyk 164: s = INVSOC; /* Disconnected */
1.1 timbl 165:
166: return YES;
167: }
168:
169:
170:
171: /* Send NNTP Command line to remote host & Check Response
172: ** ------------------------------------------------------
173: **
174: ** On entry,
175: ** command points to the command to be sent, including CRLF, or is null
176: ** pointer if no command to be sent.
177: ** On exit,
178: ** Negative status indicates transmission error, socket closed.
179: ** Positive status is an NNTP status.
180: */
181:
182:
183: PRIVATE int response ARGS1(CONST char *,command)
184: {
185: int result;
186: char * p = response_text;
187: if (command) {
188: int status;
189: int length = strlen(command);
2.28 frystyk 190: if (TRACE) fprintf(TDEST, "NNTP command to be sent: %s", command);
1.1 timbl 191: #ifdef NOT_ASCII
192: {
193: CONST char * p;
194: char * q;
195: char ascii[LINE_LENGTH+1];
196: for(p = command, q=ascii; *p; p++, q++) {
197: *q = TOASCII(*p);
198: }
199: status = NETWRITE(s, ascii, length);
200: }
201: #else
202: status = NETWRITE(s, command, length);
203: #endif
204: if (status<0){
2.28 frystyk 205: if (TRACE) fprintf(TDEST,
1.1 timbl 206: "HTNews: Unable to send command. Disconnecting.\n");
207: NETCLOSE(s);
2.11 timbl 208: HTInputSocket_free(isoc);
2.28 frystyk 209: s = INVSOC;
1.1 timbl 210: return status;
211: } /* if bad status */
212: } /* if command to be sent */
213:
214: for(;;) {
1.3 timbl 215: if (((*p++=NEXT_CHAR) == LF)
216: || (p == &response_text[LINE_LENGTH])) {
1.1 timbl 217: *p++=0; /* Terminate the string */
2.28 frystyk 218: if (TRACE) fprintf(TDEST, "NNTP Response: %s\n", response_text);
1.1 timbl 219: sscanf(response_text, "%d", &result);
2.19 luotonen 220: if (result >= 411 && result <= 430) { /* no such article/group */
221: char * msg = strchr(response_text,' ');
222: if (!msg) msg = response_text;
223: PUTS("<H1>News error</H1>\n");
224: PUTS(msg);
2.28 frystyk 225: if (PROT_TRACE)
226: fprintf(TDEST, "News error.. %s", response_text);
2.19 luotonen 227: }
1.1 timbl 228: return result;
229: } /* if end of line */
230:
231: if (*(p-1) < 0) {
2.28 frystyk 232: if (TRACE) fprintf(TDEST,
1.1 timbl 233: "HTNews: EOF on read, closing socket %d\n", s);
234: NETCLOSE(s); /* End of file, close socket */
2.11 timbl 235: HTInputSocket_free(isoc);
2.28 frystyk 236: return s = INVSOC; /* End of file on response */
1.1 timbl 237: }
238: } /* Loop over characters */
239: }
240:
241:
242: /* Case insensitive string comparisons
243: ** -----------------------------------
244: **
245: ** On entry,
246: ** template must be already un upper case.
247: ** unknown may be in upper or lower or mixed case to match.
248: */
2.24 frystyk 249: PRIVATE BOOL match ARGS2 (CONST char *,unknown, CONST char *,tmplate)
1.1 timbl 250: {
251: CONST char * u = unknown;
2.24 frystyk 252: CONST char * t = tmplate;
1.1 timbl 253: for (;*u && *t && (TOUPPER(*u)==*t); u++, t++) /* Find mismatch or end */ ;
254: return (BOOL)(*t==0); /* OK if end of template */
255: }
256:
257: /* Find Author's name in mail address
258: ** ----------------------------------
259: **
260: ** On exit,
261: ** THE EMAIL ADDRESS IS CORRUPTED
262: **
263: ** For example, returns "Tim Berners-Lee" if given any of
2.31 frystyk 264: ** " Tim Berners-Lee <tim@w3.org> "
265: ** or " tim@w3.org ( Tim Berners-Lee ) "
1.1 timbl 266: */
267: PRIVATE char * author_name ARGS1 (char *,email)
268: {
269: char *s, *e;
270:
271: if ((s=strchr(email,'(')) && (e=strchr(email, ')')))
272: if (e>s) {
273: *e=0; /* Chop off everything after the ')' */
274: return HTStrip(s+1); /* Remove leading and trailing spaces */
275: }
276:
277: if ((s=strchr(email,'<')) && (e=strchr(email, '>')))
278: if (e>s) {
279: strcpy(s, e+1); /* Remove <...> */
280: return HTStrip(email); /* Remove leading and trailing spaces */
281: }
282:
283: return HTStrip(email); /* Default to the whole thing */
284:
285: }
286:
1.2 timbl 287: /* Start anchor element
288: ** --------------------
289: */
290: PRIVATE void start_anchor ARGS1(CONST char *, href)
291: {
292: BOOL present[HTML_A_ATTRIBUTES];
293: CONST char* value[HTML_A_ATTRIBUTES];
294:
295: {
296: int i;
297: for(i=0; i<HTML_A_ATTRIBUTES; i++)
298: present[i] = (i==HTML_A_HREF);
299: }
300: value[HTML_A_HREF] = href;
301: (*targetClass.start_element)(target, HTML_A , present, value);
302:
303: }
1.1 timbl 304:
2.16 luotonen 305:
306: /* Start link element
307: ** --------------------
308: */
309: PRIVATE void start_link ARGS2(CONST char *, href, CONST char *, rev)
310: {
311: #ifdef WHEN_WE_HAVE_HTMLPLUS
312:
313: BOOL present[HTML_LINK_ATTRIBUTES];
314: CONST char* value[HTML_LINK_ATTRIBUTES];
315:
316: {
317: int i;
318: for(i=0; i<HTML_LINK_ATTRIBUTES; i++)
319: present[i] = (i==HTML_LINK_HREF || i==HTML_LINK_REV);
320: }
321: value[HTML_LINK_HREF] = href;
322: value[HTML_LINK_REV] = rev;
323: (*targetClass.start_element)(target, HTML_LINK , present, value);
324:
325: #endif
326: }
327:
328:
329:
330:
1.1 timbl 331: /* Paste in an Anchor
332: ** ------------------
333: **
334: **
335: ** On entry,
336: ** HT has a selection of zero length at the end.
337: ** text points to the text to be put into the file, 0 terminated.
338: ** addr points to the hypertext refernce address,
339: ** terminated by white space, comma, NULL or '>'
340: */
341: PRIVATE void write_anchor ARGS2(CONST char *,text, CONST char *,addr)
342: {
343: char href[LINE_LENGTH+1];
344:
345: {
346: CONST char * p;
347: strcpy(href,"news:");
348: for(p=addr; *p && (*p!='>') && !WHITE(*p) && (*p!=','); p++);
349: strncat(href, addr, p-addr); /* Make complete hypertext reference */
350: }
351:
1.2 timbl 352: start_anchor(href);
353: PUTS(text);
354: END(HTML_A);
1.1 timbl 355: }
356:
357:
358: /* Write list of anchors
359: ** ---------------------
360: **
361: ** We take a pointer to a list of objects, and write out each,
362: ** generating an anchor for each.
363: **
364: ** On entry,
365: ** HT has a selection of zero length at the end.
366: ** text points to a comma or space separated list of addresses.
367: ** On exit,
368: ** *text is NOT any more chopped up into substrings.
369: */
370: PRIVATE void write_anchors ARGS1 (char *,text)
371: {
372: char * start = text;
373: char * end;
374: char c;
375: for (;;) {
376: for(;*start && (WHITE(*start)); start++); /* Find start */
377: if (!*start) return; /* (Done) */
378: for(end=start; *end && (*end!=' ') && (*end!=','); end++);/* Find end */
379: if (*end) end++; /* Include comma or space but not NULL */
380: c = *end;
381: *end = 0;
382: write_anchor(start, start);
2.16 luotonen 383: START(HTML_BR);
1.1 timbl 384: *end = c;
385: start = end; /* Point to next one */
386: }
387: }
388:
389: /* Abort the connection abort_socket
390: ** --------------------
391: */
392: PRIVATE void abort_socket NOARGS
393: {
2.28 frystyk 394: if (TRACE) fprintf(TDEST,
1.1 timbl 395: "HTNews: EOF on read, closing socket %d\n", s);
396: NETCLOSE(s); /* End of file, close socket */
2.11 timbl 397: HTInputSocket_free(isoc);
1.2 timbl 398: PUTS("Network Error: connection lost");
399: PUTC('\n');
2.28 frystyk 400: s = INVSOC; /* End of file on response */
1.1 timbl 401: return;
402: }
403:
404: /* Read in an Article read_article
405: ** ------------------
406: **
407: **
408: ** Note the termination condition of a single dot on a line by itself.
409: ** RFC 977 specifies that the line "folding" of RFC850 is not used, so we
410: ** do not handle it here.
411: **
412: ** On entry,
413: ** s Global socket number is OK
414: ** HT Global hypertext object is ready for appending text
415: */
416: PRIVATE void read_article NOARGS
417: {
418:
419: char line[LINE_LENGTH+1];
420: char *references=NULL; /* Hrefs for other articles */
421: char *newsgroups=NULL; /* Newsgroups list */
422: char *p = line;
423: BOOL done = NO;
424:
425: /* Read in the HEADer of the article:
426: **
427: ** The header fields are either ignored, or formatted and put into the
428: ** Text.
429: */
430: if (!diagnostic) {
1.2 timbl 431: (*targetClass.start_element)(target, HTML_ADDRESS, 0, 0);
1.1 timbl 432: while(!done){
433: char ch = *p++ = NEXT_CHAR;
434: if (ch==(char)EOF) {
435: abort_socket(); /* End of file, close socket */
436: return; /* End of file on response */
437: }
1.3 timbl 438: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 439: *--p=0; /* Terminate the string */
2.28 frystyk 440: if (TRACE) fprintf(TDEST, "H %s\n", line);
1.1 timbl 441:
442: if (line[0]=='.') {
443: if (line[1]<' ') { /* End of article? */
444: done = YES;
445: break;
446: }
447:
448: } else if (line[0]<' ') {
449: break; /* End of Header? */
2.16 luotonen 450:
1.1 timbl 451: } else if (match(line, "SUBJECT:")) {
1.2 timbl 452: END(HTML_ADDRESS);
453: START(HTML_TITLE); /** Uuugh! @@@ */
2.16 luotonen 454: PUTS(line+9);
455: END(HTML_TITLE);
456: START(HTML_H1);
1.2 timbl 457: PUTS(line+8);
2.16 luotonen 458: END(HTML_H1);
1.2 timbl 459: START(HTML_ADDRESS);
2.16 luotonen 460:
1.1 timbl 461: } else if (match(line, "DATE:")
462: || match(line, "ORGANIZATION:")) {
2.16 luotonen 463: PUTS(strchr(line,':')+2);
464: START(HTML_BR);
465:
466: } else if(match(line, "FROM:")) {
467: char * temp=0;
468: char * href=0;
469: char *cp1, *cp2;
470:
471: /* copy into temporary storage */
472: StrAllocCopy(temp, strchr(line,':')+1);
473:
474: cp1=temp;
475: while(isspace(*cp1)) cp1++;
476: /* remove space and stuff after */
477: if((cp2 = strchr(cp1,' ')) != NULL)
478: *cp2 = '\0';
479:
480: StrAllocCopy(href,"mailto:");
481: StrAllocCat(href,cp1);
482:
483: start_anchor(href);
484: PUTS("Reply to ");
485: PUTS(strchr(line,':')+1);
486: END(HTML_A);
487: START(HTML_BR);
488:
489: /* put in the owner as a link rel. as well */
490: start_link(href, "made");
491:
492: /* free of temp vars */
493: free(temp);
494: free(href);
495:
1.1 timbl 496: } else if (match(line, "NEWSGROUPS:")) {
497: StrAllocCopy(newsgroups, HTStrip(strchr(line,':')+1));
498:
499: } else if (match(line, "REFERENCES:")) {
500: StrAllocCopy(references, HTStrip(strchr(line,':')+1));
501:
502: } /* end if match */
503: p = line; /* Restart at beginning */
504: } /* if end of line */
505: } /* Loop over characters */
2.16 luotonen 506: END(HTML_ADDRESS);
1.1 timbl 507:
1.2 timbl 508: if (newsgroups || references) {
2.16 luotonen 509: START(HTML_DL);
1.2 timbl 510: if (newsgroups) {
2.16 luotonen 511: #ifdef POSTING
512: char *href=0;
513: #endif
514:
1.2 timbl 515: (*targetClass.start_element)(target, HTML_DT , 0, 0);
516: PUTS("Newsgroups:");
517: (*targetClass.start_element)(target, HTML_DD , 0, 0);
518: write_anchors(newsgroups);
2.16 luotonen 519:
520: #ifdef POSTING
521: /* make posting possible */
522: StrAllocCopy(href,"newspost:");
523: StrAllocCat(href,newsgroups);
524: START(HTML_DT);
525: start_anchor(href);
526: PUTS("Reply to newsgroup(s)");
527: END(HTML_A);
528: #endif
529:
1.2 timbl 530: free(newsgroups);
531: }
532:
533: if (references) {
534: (*targetClass.start_element)(target, HTML_DT , 0, 0);
535: PUTS("References:");
536: (*targetClass.start_element)(target, HTML_DD , 0, 0);
537: write_anchors(references);
538: free(references);
539: }
2.16 luotonen 540: #ifdef WHEN_WE_HAVE_HTMLPLUS
541: (*targetClass.end_element)(target, HTML_DLC);
542: #else
2.10 timbl 543: (*targetClass.end_element)(target, HTML_DL);
2.16 luotonen 544: #endif
1.1 timbl 545: }
1.2 timbl 546: PUTS("\n\n\n");
1.1 timbl 547:
548: }
549:
550: /* Read in the BODY of the Article:
551: */
1.2 timbl 552: (*targetClass.start_element)(target, HTML_PRE , 0, 0);
553:
1.1 timbl 554: p = line;
555: while(!done){
556: char ch = *p++ = NEXT_CHAR;
557: if (ch==(char)EOF) {
558: abort_socket(); /* End of file, close socket */
559: return; /* End of file on response */
560: }
1.3 timbl 561: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 562: *p++=0; /* Terminate the string */
2.28 frystyk 563: if (TRACE) fprintf(TDEST, "B %s", line);
1.1 timbl 564: if (line[0]=='.') {
565: if (line[1]<' ') { /* End of article? */
566: done = YES;
567: break;
568: } else { /* Line starts with dot */
1.2 timbl 569: PUTS(&line[1]); /* Ignore first dot */
1.1 timbl 570: }
571: } else {
572:
573: /* Normal lines are scanned for buried references to other articles.
574: ** Unfortunately, it will pick up mail addresses as well!
575: */
576: char *l = line;
577: char * p;
2.14 luotonen 578: while ((p=strchr(l, '<'))) {
1.1 timbl 579: char *q = strchr(p,'>');
580: char *at = strchr(p, '@');
581: if (q && at && at<q) {
582: char c = q[1];
583: q[1] = 0; /* chop up */
584: *p = 0;
1.2 timbl 585: PUTS(l);
1.1 timbl 586: *p = '<'; /* again */
587: *q = 0;
1.2 timbl 588: start_anchor(p+1);
1.1 timbl 589: *q = '>'; /* again */
1.2 timbl 590: PUTS(p);
591: (*targetClass.end_element)(target, HTML_A);
1.1 timbl 592: q[1] = c; /* again */
593: l=q+1;
594: } else break; /* line has unmatched <> */
595: }
1.2 timbl 596: PUTS( l); /* Last bit of the line */
1.1 timbl 597: } /* if not dot */
598: p = line; /* Restart at beginning */
599: } /* if end of line */
600: } /* Loop over characters */
1.2 timbl 601:
602: (*targetClass.end_element)(target, HTML_PRE);
1.1 timbl 603: }
604:
605:
606: /* Read in a List of Newsgroups
607: ** ----------------------------
608: */
609: /*
610: ** Note the termination condition of a single dot on a line by itself.
611: ** RFC 977 specifies that the line "folding" of RFC850 is not used, so we
612: ** do not handle it here.
613: */
614: PRIVATE void read_list NOARGS
615: {
616:
617: char line[LINE_LENGTH+1];
618: char *p;
619: BOOL done = NO;
620:
621: /* Read in the HEADer of the article:
622: **
623: ** The header fields are either ignored, or formatted and put into the
624: ** Text.
625: */
1.2 timbl 626: (*targetClass.start_element)(target, HTML_H1 , 0, 0);
627: PUTS( "Newsgroups");
628: (*targetClass.end_element)(target, HTML_PRE);
1.1 timbl 629: p = line;
2.16 luotonen 630: (*targetClass.start_element)(target, HTML_DL, 0, 0);
1.1 timbl 631: while(!done){
632: char ch = *p++ = NEXT_CHAR;
633: if (ch==(char)EOF) {
634: abort_socket(); /* End of file, close socket */
635: return; /* End of file on response */
636: }
1.3 timbl 637: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 638: *p++=0; /* Terminate the string */
2.28 frystyk 639: if (TRACE) fprintf(TDEST, "B %s", line);
2.16 luotonen 640: (*targetClass.start_element)(target, HTML_DT , 0, 0);
1.1 timbl 641: if (line[0]=='.') {
642: if (line[1]<' ') { /* End of article? */
643: done = YES;
644: break;
645: } else { /* Line starts with dot */
1.2 timbl 646: PUTS( &line[1]);
1.1 timbl 647: }
648: } else {
649:
650: /* Normal lines are scanned for references to newsgroups.
651: */
2.16 luotonen 652: int i=0;
653:
654: /* find whitespace if it exits */
655: for(; line[i] != '\0' && !WHITE(line[i]); i++)
656: ; /* null body */
657:
658: if(line[i] != '\0') {
659: line[i] = '\0';
660: write_anchor(line, line);
661: (*targetClass.start_element)(target, HTML_DD , 0, 0);
662: PUTS(&line[i+1]); /* put description */
663: } else {
664: write_anchor(line, line);
665: }
666:
667: #ifdef OLD_CODE
1.1 timbl 668: char group[LINE_LENGTH];
669: int first, last;
670: char postable;
671: if (sscanf(line, "%s %d %d %c", group, &first, &last, &postable)==4)
672: write_anchor(line, group);
673: else
1.2 timbl 674: PUTS(line);
2.16 luotonen 675: #endif /*OLD_CODE*/
676:
1.1 timbl 677: } /* if not dot */
678: p = line; /* Restart at beginning */
679: } /* if end of line */
680: } /* Loop over characters */
2.16 luotonen 681: (*targetClass.end_element)(target, HTML_DL);
1.1 timbl 682: }
683:
684:
685: /* Read in a Newsgroup
686: ** -------------------
687: ** Unfortunately, we have to ask for each article one by one if we
688: ** want more than one field.
689: **
690: */
691: PRIVATE void read_group ARGS3(
692: CONST char *,groupName,
693: int,first_required,
694: int,last_required
695: )
696: {
697: char line[LINE_LENGTH+1];
698: char author[LINE_LENGTH+1];
699: char subject[LINE_LENGTH+1];
700: char *p;
701: BOOL done;
702:
703: char buffer[LINE_LENGTH];
704: char *reference=0; /* Href for article */
705: int art; /* Article number WITHIN GROUP */
706: int status, count, first, last; /* Response fields */
707: /* count is only an upper limit */
708:
709: sscanf(response_text, " %d %d %d %d", &status, &count, &first, &last);
2.17 frystyk 710: if(TRACE)
2.28 frystyk 711: fprintf(TDEST,
2.17 frystyk 712: "Newsgroup status=%d, count=%d, (%d-%d) required:(%d-%d)\n",
713: status, count, first, last, first_required, last_required);
1.1 timbl 714: if (last==0) {
1.2 timbl 715: PUTS( "\nNo articles in this group.\n");
2.16 luotonen 716: #ifdef POSTING
717: goto add_post;
718: #endif
1.1 timbl 719: return;
720: }
721:
722: #define FAST_THRESHOLD 100 /* Above this, read IDs fast */
723: #define CHOP_THRESHOLD 50 /* Above this, chop off the rest */
724:
725: if (first_required<first) first_required = first; /* clip */
726: if ((last_required==0) || (last_required > last)) last_required = last;
727:
728: if (last_required<=first_required) {
1.2 timbl 729: PUTS( "\nNo articles in this range.\n");
2.16 luotonen 730: #ifdef POSTING
731: goto add_post;
732: #endif
1.1 timbl 733: return;
734: }
735:
736: if (last_required-first_required+1 > MAX_CHUNK) { /* Trim this block */
737: first_required = last_required-CHUNK_SIZE+1;
738: }
2.28 frystyk 739: if (TRACE) fprintf (TDEST, " Chunk will be (%d-%d)\n",
2.16 luotonen 740: first_required, last_required);
1.1 timbl 741:
1.2 timbl 742: /* Set window title
743: */
744: sprintf(buffer, "Newsgroup %s, Articles %d-%d",
745: groupName, first_required, last_required);
746: START(HTML_TITLE);
747: PUTS(buffer);
748: END(HTML_TITLE);
749:
1.1 timbl 750: /* Link to earlier articles
751: */
752: if (first_required>first) {
753: int before; /* Start of one before */
754: if (first_required-MAX_CHUNK <= first) before = first;
755: else before = first_required-CHUNK_SIZE;
756: sprintf(buffer, "%s/%d-%d", groupName, before, first_required-1);
2.28 frystyk 757: if (TRACE) fprintf(TDEST, " Block before is %s\n", buffer);
1.2 timbl 758: PUTS( " (");
759: start_anchor(buffer);
760: PUTS("Earlier articles");
761: END(HTML_A);
762: PUTS( "...)\n");
1.1 timbl 763: }
764:
765: done = NO;
766:
767: /*#define USE_XHDR*/
768: #ifdef USE_XHDR
769: if (count>FAST_THRESHOLD) {
770: sprintf(buffer,
771: "\nThere are about %d articles currently available in %s, IDs as follows:\n\n",
772: count, groupName);
1.2 timbl 773: PUTS(buffer);
1.3 timbl 774: sprintf(buffer, "XHDR Message-ID %d-%d%c%c", first, last, CR, LF);
1.1 timbl 775: status = response(buffer);
776: if (status==221) {
777:
778: p = line;
779: while(!done){
780: char ch = *p++ = NEXT_CHAR;
781: if (ch==(char)EOF) {
782: abort_socket(); /* End of file, close socket */
783: return; /* End of file on response */
784: }
785: if ((ch == '\n') || (p == &line[LINE_LENGTH])) {
786: *p++=0; /* Terminate the string */
2.28 frystyk 787: if (TRACE) fprintf(TDEST, "X %s", line);
1.1 timbl 788: if (line[0]=='.') {
789: if (line[1]<' ') { /* End of article? */
790: done = YES;
791: break;
792: } else { /* Line starts with dot */
793: /* Ignore strange line */
794: }
795: } else {
796:
797: /* Normal lines are scanned for references to articles.
798: */
799: char * space = strchr(line, ' ');
800: if (space++)
801: write_anchor(space, space);
802: } /* if not dot */
803: p = line; /* Restart at beginning */
804: } /* if end of line */
805: } /* Loop over characters */
806:
807: /* leaving loop with "done" set */
808: } /* Good status */
809: };
810: #endif
811:
812: /* Read newsgroup using individual fields:
813: */
814: if (!done) {
815: if (first==first_required && last==last_required)
1.2 timbl 816: PUTS("\nAll available articles in ");
817: else PUTS( "\nArticles in ");
818: PUTS(groupName);
819: START(HTML_MENU);
1.1 timbl 820: for(art=first_required; art<=last_required; art++) {
821:
822: /*#define OVERLAP*/
823: #ifdef OVERLAP
824: /* With this code we try to keep the server running flat out by queuing just
825: ** one extra command ahead of time. We assume (1) that the server won't abort
826: ** if it gets input during output, and (2) that TCP buffering is enough for the
827: ** two commands. Both these assumptions seem very reasonable. However, we HAVE
828: ** had a hangup with a loaded server.
829: */
830: if (art==first_required) {
831: if (art==last_required) {
1.3 timbl 832: sprintf(buffer, "HEAD %d%c%c", art, CR, LF); /* Only one */
1.1 timbl 833: status = response(buffer);
834: } else { /* First of many */
1.3 timbl 835: sprintf(buffer, "HEAD %d%c%cHEAD %d%c%c",
836: art, CR, LF, art+1, CR, LF);
1.1 timbl 837: status = response(buffer);
838: }
839: } else if (art==last_required) { /* Last of many */
840: status = response(NULL);
841: } else { /* Middle of many */
1.3 timbl 842: sprintf(buffer, "HEAD %d%c%c", art+1, CR, LF);
1.1 timbl 843: status = response(buffer);
844: }
845:
846: #else /* NOT OVERLAP */
1.3 timbl 847: sprintf(buffer, "HEAD %d%c%c", art, CR, LF);
1.1 timbl 848: status = response(buffer);
849: #endif /* NOT OVERLAP */
850:
851: if (status == 221) { /* Head follows - parse it:*/
2.20 frystyk 852: int ch;
1.1 timbl 853: p = line; /* Write pointer */
854: done = NO;
855: while(!done){
2.20 frystyk 856: if ((ch = HTInputSocket_getCharacter(isoc)) < 0) {
1.1 timbl 857: abort_socket(); /* End of file, close socket */
858: return; /* End of file on response */
859: }
2.20 frystyk 860: *p++ = (unsigned char) ch;
1.3 timbl 861: if ((ch == LF)
1.1 timbl 862: || (p == &line[LINE_LENGTH]) ) {
863:
864: *--p=0; /* Terminate & chop LF*/
865: p = line; /* Restart at beginning */
2.28 frystyk 866: if (TRACE) fprintf(TDEST, "G %s\n", line);
1.1 timbl 867: switch(line[0]) {
868:
869: case '.':
870: done = (line[1]<' '); /* End of article? */
871: break;
872:
873: case 'S':
874: case 's':
875: if (match(line, "SUBJECT:"))
876: strcpy(subject, line+9);/* Save subject */
877: break;
878:
879: case 'M':
880: case 'm':
881: if (match(line, "MESSAGE-ID:")) {
882: char * addr = HTStrip(line+11) +1; /* Chop < */
883: addr[strlen(addr)-1]=0; /* Chop > */
884: StrAllocCopy(reference, addr);
885: }
886: break;
887:
888: case 'f':
889: case 'F':
890: if (match(line, "FROM:")) {
891: char * p;
892: strcpy(author,
893: author_name(strchr(line,':')+1));
2.17 frystyk 894: if (*author) { /* Not always there! */
895: p = author + strlen(author) - 1;
896: if (*p==LF) *p = 0; /* Chop off newline */
897: }
1.1 timbl 898: }
899: break;
900:
901: } /* end switch on first character */
902: } /* if end of line */
903: } /* Loop over characters */
904:
1.2 timbl 905: START(HTML_LI);
1.1 timbl 906: sprintf(buffer, "\"%s\" - %s", subject, author);
907: if (reference) {
908: write_anchor(buffer, reference);
909: free(reference);
910: reference=0;
911: } else {
1.2 timbl 912: PUTS(buffer);
1.1 timbl 913: }
914:
915:
1.2 timbl 916: /* indicate progress! @@@@@@
1.1 timbl 917: */
918:
919: } /* If good response */
920: } /* Loop over article */
921: } /* If read headers */
1.2 timbl 922: END(HTML_MENU);
923: START(HTML_P);
1.1 timbl 924:
925: /* Link to later articles
926: */
927: if (last_required<last) {
928: int after; /* End of article after */
929: after = last_required+CHUNK_SIZE;
930: if (after==last) sprintf(buffer, "news:%s", groupName); /* original group */
931: else sprintf(buffer, "news:%s/%d-%d", groupName, last_required+1, after);
2.28 frystyk 932: if (TRACE) fprintf(TDEST, " Block after is %s\n", buffer);
1.2 timbl 933: PUTS( "(");
934: start_anchor(buffer);
935: PUTS( "Later articles");
936: END(HTML_A);
937: PUTS( "...)\n");
1.1 timbl 938: }
2.16 luotonen 939:
2.19 luotonen 940: #ifdef POSTING
941: add_post:
942: #endif
2.16 luotonen 943: {
944: char *href=0;
945: START(HTML_HR);
946:
947: StrAllocCopy(href,"newspost:");
948: StrAllocCat(href,groupName);
949: start_anchor(href);
950: PUTS("Post to ");
951: PUTS(groupName);
952: END(HTML_A);
953:
954: free(href);
955: }
1.1 timbl 956:
957:
958: }
959:
960:
961: /* Load by name HTLoadNews
962: ** ============
963: */
2.13 timbl 964: PUBLIC int HTLoadNews ARGS1(HTRequest *, request)
1.1 timbl 965: {
2.19 luotonen 966: char * arg = HTAnchor_physical(request->anchor);
1.1 timbl 967: char command[257]; /* The whole command */
968: char groupName[GROUP_NAME_LENGTH]; /* Just the group name */
969: int status; /* tcp return */
970: int retries; /* A count of how hard we have tried */
971: BOOL group_wanted; /* Flag: group was asked for, not article */
972: BOOL list_wanted; /* Flag: group was asked for, not article */
973: int first, last; /* First and last articles asked for */
974:
2.10 timbl 975: diagnostic = (request->output_format == WWW_SOURCE); /* set global flag */
1.1 timbl 976:
2.28 frystyk 977: if (TRACE) fprintf(TDEST, "HTNews: Looking for %s\n", arg);
1.1 timbl 978:
979: if (!initialized) initialized = initialize();
980: if (!initialized) return -1; /* FAIL */
981:
982: {
2.19 luotonen 983: char * p1=arg;
1.1 timbl 984:
985: /* We will ask for the document, omitting the host name & anchor.
986: **
987: ** Syntax of address is
988: ** xxx@yyy Article
989: ** <xxx@yyy> Same article
990: ** xxxxx News group (no "@")
991: ** group/n1-n2 Articles n1 to n2 in group
992: */
993: group_wanted = (strchr(arg, '@')==0) && (strchr(arg, '*')==0);
994: list_wanted = (strchr(arg, '@')==0) && (strchr(arg, '*')!=0);
995:
996: /* p1 = HTParse(arg, "", PARSE_PATH | PARSE_PUNCTUATION); */
997: /* Don't use HTParse because news: access doesn't follow traditional
998: rules. For instance, if the article reference contains a '#',
999: the rest of it is lost -- JFG 10/7/92, from a bug report */
1000: if (!strncasecomp (arg, "news:", 5))
1001: p1 = arg + 5; /* Skip "news:" prefix */
2.19 luotonen 1002: HTUnEscape(p1); /* AL May 2, 1994 */
1003: HTCleanTelnetString(p1); /* Prevent security holes */
1.1 timbl 1004: if (list_wanted) {
2.16 luotonen 1005: strcpy(command, "LIST NEWSGROUPS");
1.1 timbl 1006: } else if (group_wanted) {
1007: char * slash = strchr(p1, '/');
1008: strcpy(command, "GROUP ");
1009: first = 0;
1010: last = 0;
1011: if (slash) {
1012: *slash = 0;
1013: strcpy(groupName, p1);
1014: *slash = '/';
1015: (void) sscanf(slash+1, "%d-%d", &first, &last);
1016: } else {
1017: strcpy(groupName, p1);
1018: }
1019: strcat(command, groupName);
1020: } else {
1021: strcpy(command, "ARTICLE ");
1022: if (strchr(p1, '<')==0) strcat(command,"<");
1023: strcat(command, p1);
1024: if (strchr(p1, '>')==0) strcat(command,">");
1025: }
1026:
1.3 timbl 1027: {
1028: char * p = command + strlen(command);
1029: *p++ = CR; /* Macros to be correct on Mac */
1030: *p++ = LF;
1031: *p++ = 0;
1032: /* strcat(command, "\r\n"); */ /* CR LF, as in rfc 977 */
1033: }
1.1 timbl 1034: } /* scope of p1 */
1035:
1036: if (!*arg) return NO; /* Ignore if no name */
1037:
1038:
1039: /* Make a hypertext object with an anchor list.
1040: */
2.10 timbl 1041: node_anchor = request->anchor;
2.11 timbl 1042: target = HTML_new(request, NULL, WWW_HTML,
1043: request->output_format, request->output_stream);
1.2 timbl 1044: targetClass = *target->isa; /* Copy routine entry points */
1045:
1.1 timbl 1046:
1047: /* Now, let's get a stream setup up from the NewsHost:
1048: */
1049: for(retries=0;retries<2; retries++){
1050:
1051: if (s<0) {
1052: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
1053: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
1054: if (status<0){
1055: NETCLOSE(s);
2.28 frystyk 1056: s = INVSOC;
2.21 frystyk 1057: #ifdef OLD_CODE
1058: char message[256];
2.28 frystyk 1059: if (TRACE) fprintf(TDEST, "HTNews: Unable to connect to news host.\n");
1.1 timbl 1060: /* if (retries<=1) continue; WHY TRY AGAIN ? */
1061: sprintf(message,
1062: "\nCould not access %s.\n\n (Check default WorldWideWeb NewsHost ?)\n",
1.2 timbl 1063: HTNewsHost);
2.15 luotonen 1064: return HTLoadError(request, 500, message);
2.21 frystyk 1065: #endif /* OLD_CODE */
1066: {
1067: char *unescaped = NULL;
1068: StrAllocCopy(unescaped, arg);
1069: HTUnEscape(unescaped);
1070: HTErrorAdd(request, ERR_FATAL, NO, HTERR_INTERNAL,
1071: (void *) unescaped,
1072: (int) strlen(unescaped), "HTLoadNews");
1073: free(unescaped);
1074: return -1;
1075: }
1.1 timbl 1076: } else {
2.28 frystyk 1077: if (TRACE) fprintf(TDEST, "HTNews: Connected to news host %s.\n",
1.2 timbl 1078: HTNewsHost);
2.11 timbl 1079: isoc = HTInputSocket_new(s); /* set up buffering */
1.1 timbl 1080: if ((response(NULL) / 100) !=2) {
2.21 frystyk 1081: int length = strlen(response_text);
1082: NETCLOSE(s);
1083: HTInputSocket_free(isoc);
2.28 frystyk 1084: s = INVSOC;
2.21 frystyk 1085: HTErrorAdd(request, ERR_FATAL, NO, HTERR_NEWS_SERVER,
1086: (void *) response_text, length < 50 ?
1087: length : 50, "HTLoadNews");
1088: HTErrorAdd(request, ERR_FATAL, NO, HTERR_INTERNAL,
1089: (void *) HTNewsHost,
1090: (int) strlen(HTNewsHost), "HTLoadNews");
1091: return -1;
1092: }
1093: #ifdef OLD_CODE
2.8 timbl 1094: char message[BIG];
1095: sprintf(message,
1096: "Can't read news info. News host %.20s responded: %.200s",
1097: HTNewsHost, response_text);
2.15 luotonen 1098: return HTLoadError(request, 500, message);
2.21 frystyk 1099: }
1100: #endif /* OLD_CODE */
1.1 timbl 1101: }
1102: } /* If needed opening */
1103:
1.2 timbl 1104: /* @@@@@@@@@@@@@@Tell user something's happening */
1105:
1.1 timbl 1106: status = response(command);
1107: if (status<0) break;
2.19 luotonen 1108: if (status >= 411 && status <= 430) break; /* no such article/group */
1.1 timbl 1109: if ((status/ 100) !=2) {
1110: /* NXRunAlertPanel("News access", response_text,
1111: NULL,NULL,NULL);
1112: */
1113: NETCLOSE(s);
2.11 timbl 1114: HTInputSocket_free(isoc);
2.28 frystyk 1115: s = INVSOC;
1.1 timbl 1116: /* return HT; -- no:the message might be "Timeout-disconnected" left over */
1117: continue; /* Try again */
1118: }
1119:
1120: /* Load a group, article, etc
1121: */
1.2 timbl 1122:
1.1 timbl 1123:
1124: if (list_wanted) read_list();
1125: else if (group_wanted) read_group(groupName, first, last);
1126: else read_article();
1127:
2.23 duns 1128: (*targetClass._free)(target);
1.2 timbl 1129: return HT_LOADED;
1.1 timbl 1130:
1131: } /* Retry loop */
1132:
1.2 timbl 1133:
2.8 timbl 1134: /* HTAlert("Sorry, could not load requested news.\n"); */
1135:
1.1 timbl 1136: /* NXRunAlertPanel(NULL, "Sorry, could not load `%s'.",
1137: NULL,NULL,NULL, arg);No -- message earlier wil have covered it */
1138:
2.23 duns 1139: (*targetClass._free)(target); /* AL May 2, 1994 */
1.2 timbl 1140: return HT_LOADED;
1.1 timbl 1141: }
1142:
2.25 frystyk 1143: GLOBALDEF PUBLIC HTProtocol HTNews = {
1144: "news", SOC_BLOCK, HTLoadNews, NULL, NULL
1145: };
Webmaster