Annotation of libwww/Library/src/HTNews.c, revision 2.29
2.26 frystyk 1: /* HTNews.c
2: ** NEWS ACCESS
3: **
2.29 ! frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.26 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
7: ** History:
8: ** 26 Sep 90 Written TBL
9: ** 29 Nov 91 Downgraded to C, for portable implementation.
2.19 luotonen 10: ** 16 Feb 94 AL Added Lou Montulli's Lynx & LIST NEWSGROUPS diffs.
11: ** 2 May 94 AL Added HTUnEscape() to HTLoadNews(), and
12: ** fixed a possible security hole when the URL contains
13: ** a newline, that could cause multiple commands to be
14: ** sent to an NNTP server.
2.23 duns 15: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.1 timbl 16: */
2.27 roeber 17:
2.28 frystyk 18: #include "tcp.h"
19: #include "HTUtils.h" /* Coding convention macros */
20: #include "HTString.h"
21: #include "HTML.h"
22: #include "HTParse.h"
23: #include "HTFormat.h"
24: #include "HTAlert.h"
25: #include "HTError.h"
26: #include "HTNews.h" /* Implemented here */
1.3 timbl 27:
1.1 timbl 28: #define NEWS_PORT 119 /* See rfc977 */
29: #define APPEND /* Use append methods */
30: #define MAX_CHUNK 40 /* Largest number of articles in one window */
31: #define CHUNK_SIZE 20 /* Number of articles for quick display */
32:
33: #ifndef DEFAULT_NEWS_HOST
34: #define DEFAULT_NEWS_HOST "news"
35: #endif
36: #ifndef SERVER_FILE
37: #define SERVER_FILE "/usr/local/lib/rn/server"
38: #endif
39:
2.8 timbl 40: #define BIG 1024 /* @@@ */
41:
1.2 timbl 42: struct _HTStructured {
43: CONST HTStructuredClass * isa;
44: /* ... */
45: };
46:
2.7 timbl 47: #define NEWS_PROGRESS(foo) HTProgress(foo)
1.1 timbl 48:
49:
2.12 timbl 50: #define NEXT_CHAR HTInputSocket_getCharacter(isoc)
1.1 timbl 51: #define LINE_LENGTH 512 /* Maximum length of line of ARTICLE etc */
52: #define GROUP_NAME_LENGTH 256 /* Maximum length of group name */
53:
54:
55: /* Module-wide variables
56: */
1.2 timbl 57: PUBLIC char * HTNewsHost;
1.1 timbl 58: PRIVATE struct sockaddr_in soc_address; /* Binary network address */
2.28 frystyk 59: PRIVATE SOCKFD s; /* Socket for NewsHost */
1.1 timbl 60: PRIVATE char response_text[LINE_LENGTH+1]; /* Last response */
1.2 timbl 61: PRIVATE HTStructured * target; /* The output sink */
62: PRIVATE HTStructuredClass targetClass; /* Copy of fn addresses */
1.1 timbl 63: PRIVATE HTParentAnchor *node_anchor; /* Its anchor */
64: PRIVATE int diagnostic; /* level: 0=none 2=source */
65:
1.2 timbl 66:
67: #define PUTC(c) (*targetClass.put_character)(target, c)
68: #define PUTS(s) (*targetClass.put_string)(target, s)
69: #define START(e) (*targetClass.start_element)(target, e, 0, 0)
70: #define END(e) (*targetClass.end_element)(target, e)
71:
2.11 timbl 72: PUBLIC HTInputSocket *isoc; /* @@@ non-reentrant */
73:
1.2 timbl 74: PUBLIC CONST char * HTGetNewsHost NOARGS
75: {
76: return HTNewsHost;
77: }
1.1 timbl 78:
1.2 timbl 79: PUBLIC void HTSetNewsHost ARGS1(CONST char *, value)
80: {
81: StrAllocCopy(HTNewsHost, value);
82: }
1.1 timbl 83:
84: /* Initialisation for this module
85: ** ------------------------------
86: **
87: ** Except on the NeXT, we pick up the NewsHost name from
88: **
89: ** 1. Environment variable NNTPSERVER
90: ** 2. File SERVER_FILE
91: ** 3. Compilation time macro DEFAULT_NEWS_HOST
92: **
93: ** On the NeXT, we pick up the NewsHost name from, in order:
94: **
95: ** 1. WorldWideWeb default "NewsHost"
96: ** 2. Global default "NewsHost"
97: ** 3. News default "NewsHost"
98: ** 4. Compilation time macro DEFAULT_NEWS_HOST
99: */
100: PRIVATE BOOL initialized = NO;
101: PRIVATE BOOL initialize NOARGS
102: {
103: CONST struct hostent *phost; /* Pointer to host - See netdb.h */
104: struct sockaddr_in* sin = &soc_address;
105:
106:
107: /* Set up defaults:
108: */
109: sin->sin_family = AF_INET; /* Family = internet, host order */
110: sin->sin_port = htons(NEWS_PORT); /* Default: new port, */
111:
112: /* Get name of Host
113: */
2.28 frystyk 114: #ifdef NeXTStep
1.2 timbl 115: if ((HTNewsHost = NXGetDefaultValue("WorldWideWeb","NewsHost"))==0)
116: if ((HTNewsHost = NXGetDefaultValue("News","NewsHost")) == 0)
117: HTNewsHost = DEFAULT_NEWS_HOST;
1.1 timbl 118: #else
119: if (getenv("NNTPSERVER")) {
1.2 timbl 120: StrAllocCopy(HTNewsHost, (char *)getenv("NNTPSERVER"));
2.28 frystyk 121: if (TRACE) fprintf(TDEST, "HTNews: NNTPSERVER defined as `%s'\n",
1.2 timbl 122: HTNewsHost);
1.1 timbl 123: } else {
124: char server_name[256];
125: FILE* fp = fopen(SERVER_FILE, "r");
126: if (fp) {
127: if (fscanf(fp, "%s", server_name)==1) {
1.2 timbl 128: StrAllocCopy(HTNewsHost, server_name);
2.28 frystyk 129: if (TRACE) fprintf(TDEST,
1.1 timbl 130: "HTNews: File %s defines news host as `%s'\n",
1.2 timbl 131: SERVER_FILE, HTNewsHost);
1.1 timbl 132: }
133: fclose(fp);
134: }
135: }
1.2 timbl 136: if (!HTNewsHost) HTNewsHost = DEFAULT_NEWS_HOST;
1.1 timbl 137: #endif
138:
1.2 timbl 139: if (*HTNewsHost>='0' && *HTNewsHost<='9') { /* Numeric node address: */
140: sin->sin_addr.s_addr = inet_addr((char *)HTNewsHost); /* See arpa/inet.h */
1.1 timbl 141:
142: } else { /* Alphanumeric node name: */
1.2 timbl 143: phost=gethostbyname((char*)HTNewsHost); /* See netdb.h */
1.1 timbl 144: if (!phost) {
2.7 timbl 145: char message[150]; /* @@@ */
146: sprintf(message,
147: "HTNews: Can't find news host `%s'.\n%s",HTNewsHost,
148: "Please define your NNTP server");
149: HTAlert(message);
2.28 frystyk 150: if (PROT_TRACE)
151: fprintf(TDEST, "HTNews: Can't find news host `%s'.\n",HTNewsHost);
1.1 timbl 152: return NO; /* Fail */
153: }
154: memcpy(&sin->sin_addr, phost->h_addr, phost->h_length);
155: }
156:
2.28 frystyk 157: if (TRACE) fprintf(TDEST,
1.1 timbl 158: "HTNews: Parsed address as port %4x, inet %d.%d.%d.%d\n",
159: (unsigned int)ntohs(sin->sin_port),
160: (int)*((unsigned char *)(&sin->sin_addr)+0),
161: (int)*((unsigned char *)(&sin->sin_addr)+1),
162: (int)*((unsigned char *)(&sin->sin_addr)+2),
163: (int)*((unsigned char *)(&sin->sin_addr)+3));
164:
2.28 frystyk 165: s = INVSOC; /* Disconnected */
1.1 timbl 166:
167: return YES;
168: }
169:
170:
171:
172: /* Send NNTP Command line to remote host & Check Response
173: ** ------------------------------------------------------
174: **
175: ** On entry,
176: ** command points to the command to be sent, including CRLF, or is null
177: ** pointer if no command to be sent.
178: ** On exit,
179: ** Negative status indicates transmission error, socket closed.
180: ** Positive status is an NNTP status.
181: */
182:
183:
184: PRIVATE int response ARGS1(CONST char *,command)
185: {
186: int result;
187: char * p = response_text;
188: if (command) {
189: int status;
190: int length = strlen(command);
2.28 frystyk 191: if (TRACE) fprintf(TDEST, "NNTP command to be sent: %s", command);
1.1 timbl 192: #ifdef NOT_ASCII
193: {
194: CONST char * p;
195: char * q;
196: char ascii[LINE_LENGTH+1];
197: for(p = command, q=ascii; *p; p++, q++) {
198: *q = TOASCII(*p);
199: }
200: status = NETWRITE(s, ascii, length);
201: }
202: #else
203: status = NETWRITE(s, command, length);
204: #endif
205: if (status<0){
2.28 frystyk 206: if (TRACE) fprintf(TDEST,
1.1 timbl 207: "HTNews: Unable to send command. Disconnecting.\n");
208: NETCLOSE(s);
2.11 timbl 209: HTInputSocket_free(isoc);
2.28 frystyk 210: s = INVSOC;
1.1 timbl 211: return status;
212: } /* if bad status */
213: } /* if command to be sent */
214:
215: for(;;) {
1.3 timbl 216: if (((*p++=NEXT_CHAR) == LF)
217: || (p == &response_text[LINE_LENGTH])) {
1.1 timbl 218: *p++=0; /* Terminate the string */
2.28 frystyk 219: if (TRACE) fprintf(TDEST, "NNTP Response: %s\n", response_text);
1.1 timbl 220: sscanf(response_text, "%d", &result);
2.19 luotonen 221: if (result >= 411 && result <= 430) { /* no such article/group */
222: char * msg = strchr(response_text,' ');
223: if (!msg) msg = response_text;
224: PUTS("<H1>News error</H1>\n");
225: PUTS(msg);
2.28 frystyk 226: if (PROT_TRACE)
227: fprintf(TDEST, "News error.. %s", response_text);
2.19 luotonen 228: }
1.1 timbl 229: return result;
230: } /* if end of line */
231:
232: if (*(p-1) < 0) {
2.28 frystyk 233: if (TRACE) fprintf(TDEST,
1.1 timbl 234: "HTNews: EOF on read, closing socket %d\n", s);
235: NETCLOSE(s); /* End of file, close socket */
2.11 timbl 236: HTInputSocket_free(isoc);
2.28 frystyk 237: return s = INVSOC; /* End of file on response */
1.1 timbl 238: }
239: } /* Loop over characters */
240: }
241:
242:
243: /* Case insensitive string comparisons
244: ** -----------------------------------
245: **
246: ** On entry,
247: ** template must be already un upper case.
248: ** unknown may be in upper or lower or mixed case to match.
249: */
2.24 frystyk 250: PRIVATE BOOL match ARGS2 (CONST char *,unknown, CONST char *,tmplate)
1.1 timbl 251: {
252: CONST char * u = unknown;
2.24 frystyk 253: CONST char * t = tmplate;
1.1 timbl 254: for (;*u && *t && (TOUPPER(*u)==*t); u++, t++) /* Find mismatch or end */ ;
255: return (BOOL)(*t==0); /* OK if end of template */
256: }
257:
258: /* Find Author's name in mail address
259: ** ----------------------------------
260: **
261: ** On exit,
262: ** THE EMAIL ADDRESS IS CORRUPTED
263: **
264: ** For example, returns "Tim Berners-Lee" if given any of
265: ** " Tim Berners-Lee <tim@online.cern.ch> "
266: ** or " tim@online.cern.ch ( Tim Berners-Lee ) "
267: */
268: PRIVATE char * author_name ARGS1 (char *,email)
269: {
270: char *s, *e;
271:
272: if ((s=strchr(email,'(')) && (e=strchr(email, ')')))
273: if (e>s) {
274: *e=0; /* Chop off everything after the ')' */
275: return HTStrip(s+1); /* Remove leading and trailing spaces */
276: }
277:
278: if ((s=strchr(email,'<')) && (e=strchr(email, '>')))
279: if (e>s) {
280: strcpy(s, e+1); /* Remove <...> */
281: return HTStrip(email); /* Remove leading and trailing spaces */
282: }
283:
284: return HTStrip(email); /* Default to the whole thing */
285:
286: }
287:
1.2 timbl 288: /* Start anchor element
289: ** --------------------
290: */
291: PRIVATE void start_anchor ARGS1(CONST char *, href)
292: {
293: BOOL present[HTML_A_ATTRIBUTES];
294: CONST char* value[HTML_A_ATTRIBUTES];
295:
296: {
297: int i;
298: for(i=0; i<HTML_A_ATTRIBUTES; i++)
299: present[i] = (i==HTML_A_HREF);
300: }
301: value[HTML_A_HREF] = href;
302: (*targetClass.start_element)(target, HTML_A , present, value);
303:
304: }
1.1 timbl 305:
2.16 luotonen 306:
307: /* Start link element
308: ** --------------------
309: */
310: PRIVATE void start_link ARGS2(CONST char *, href, CONST char *, rev)
311: {
312: #ifdef WHEN_WE_HAVE_HTMLPLUS
313:
314: BOOL present[HTML_LINK_ATTRIBUTES];
315: CONST char* value[HTML_LINK_ATTRIBUTES];
316:
317: {
318: int i;
319: for(i=0; i<HTML_LINK_ATTRIBUTES; i++)
320: present[i] = (i==HTML_LINK_HREF || i==HTML_LINK_REV);
321: }
322: value[HTML_LINK_HREF] = href;
323: value[HTML_LINK_REV] = rev;
324: (*targetClass.start_element)(target, HTML_LINK , present, value);
325:
326: #endif
327: }
328:
329:
330:
331:
1.1 timbl 332: /* Paste in an Anchor
333: ** ------------------
334: **
335: **
336: ** On entry,
337: ** HT has a selection of zero length at the end.
338: ** text points to the text to be put into the file, 0 terminated.
339: ** addr points to the hypertext refernce address,
340: ** terminated by white space, comma, NULL or '>'
341: */
342: PRIVATE void write_anchor ARGS2(CONST char *,text, CONST char *,addr)
343: {
344: char href[LINE_LENGTH+1];
345:
346: {
347: CONST char * p;
348: strcpy(href,"news:");
349: for(p=addr; *p && (*p!='>') && !WHITE(*p) && (*p!=','); p++);
350: strncat(href, addr, p-addr); /* Make complete hypertext reference */
351: }
352:
1.2 timbl 353: start_anchor(href);
354: PUTS(text);
355: END(HTML_A);
1.1 timbl 356: }
357:
358:
359: /* Write list of anchors
360: ** ---------------------
361: **
362: ** We take a pointer to a list of objects, and write out each,
363: ** generating an anchor for each.
364: **
365: ** On entry,
366: ** HT has a selection of zero length at the end.
367: ** text points to a comma or space separated list of addresses.
368: ** On exit,
369: ** *text is NOT any more chopped up into substrings.
370: */
371: PRIVATE void write_anchors ARGS1 (char *,text)
372: {
373: char * start = text;
374: char * end;
375: char c;
376: for (;;) {
377: for(;*start && (WHITE(*start)); start++); /* Find start */
378: if (!*start) return; /* (Done) */
379: for(end=start; *end && (*end!=' ') && (*end!=','); end++);/* Find end */
380: if (*end) end++; /* Include comma or space but not NULL */
381: c = *end;
382: *end = 0;
383: write_anchor(start, start);
2.16 luotonen 384: START(HTML_BR);
1.1 timbl 385: *end = c;
386: start = end; /* Point to next one */
387: }
388: }
389:
390: /* Abort the connection abort_socket
391: ** --------------------
392: */
393: PRIVATE void abort_socket NOARGS
394: {
2.28 frystyk 395: if (TRACE) fprintf(TDEST,
1.1 timbl 396: "HTNews: EOF on read, closing socket %d\n", s);
397: NETCLOSE(s); /* End of file, close socket */
2.11 timbl 398: HTInputSocket_free(isoc);
1.2 timbl 399: PUTS("Network Error: connection lost");
400: PUTC('\n');
2.28 frystyk 401: s = INVSOC; /* End of file on response */
1.1 timbl 402: return;
403: }
404:
405: /* Read in an Article read_article
406: ** ------------------
407: **
408: **
409: ** Note the termination condition of a single dot on a line by itself.
410: ** RFC 977 specifies that the line "folding" of RFC850 is not used, so we
411: ** do not handle it here.
412: **
413: ** On entry,
414: ** s Global socket number is OK
415: ** HT Global hypertext object is ready for appending text
416: */
417: PRIVATE void read_article NOARGS
418: {
419:
420: char line[LINE_LENGTH+1];
421: char *references=NULL; /* Hrefs for other articles */
422: char *newsgroups=NULL; /* Newsgroups list */
423: char *p = line;
424: BOOL done = NO;
425:
426: /* Read in the HEADer of the article:
427: **
428: ** The header fields are either ignored, or formatted and put into the
429: ** Text.
430: */
431: if (!diagnostic) {
1.2 timbl 432: (*targetClass.start_element)(target, HTML_ADDRESS, 0, 0);
1.1 timbl 433: while(!done){
434: char ch = *p++ = NEXT_CHAR;
435: if (ch==(char)EOF) {
436: abort_socket(); /* End of file, close socket */
437: return; /* End of file on response */
438: }
1.3 timbl 439: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 440: *--p=0; /* Terminate the string */
2.28 frystyk 441: if (TRACE) fprintf(TDEST, "H %s\n", line);
1.1 timbl 442:
443: if (line[0]=='.') {
444: if (line[1]<' ') { /* End of article? */
445: done = YES;
446: break;
447: }
448:
449: } else if (line[0]<' ') {
450: break; /* End of Header? */
2.16 luotonen 451:
1.1 timbl 452: } else if (match(line, "SUBJECT:")) {
1.2 timbl 453: END(HTML_ADDRESS);
454: START(HTML_TITLE); /** Uuugh! @@@ */
2.16 luotonen 455: PUTS(line+9);
456: END(HTML_TITLE);
457: START(HTML_H1);
1.2 timbl 458: PUTS(line+8);
2.16 luotonen 459: END(HTML_H1);
1.2 timbl 460: START(HTML_ADDRESS);
2.16 luotonen 461:
1.1 timbl 462: } else if (match(line, "DATE:")
463: || match(line, "ORGANIZATION:")) {
2.16 luotonen 464: PUTS(strchr(line,':')+2);
465: START(HTML_BR);
466:
467: } else if(match(line, "FROM:")) {
468: char * temp=0;
469: char * href=0;
470: char *cp1, *cp2;
471:
472: /* copy into temporary storage */
473: StrAllocCopy(temp, strchr(line,':')+1);
474:
475: cp1=temp;
476: while(isspace(*cp1)) cp1++;
477: /* remove space and stuff after */
478: if((cp2 = strchr(cp1,' ')) != NULL)
479: *cp2 = '\0';
480:
481: StrAllocCopy(href,"mailto:");
482: StrAllocCat(href,cp1);
483:
484: start_anchor(href);
485: PUTS("Reply to ");
486: PUTS(strchr(line,':')+1);
487: END(HTML_A);
488: START(HTML_BR);
489:
490: /* put in the owner as a link rel. as well */
491: start_link(href, "made");
492:
493: /* free of temp vars */
494: free(temp);
495: free(href);
496:
1.1 timbl 497: } else if (match(line, "NEWSGROUPS:")) {
498: StrAllocCopy(newsgroups, HTStrip(strchr(line,':')+1));
499:
500: } else if (match(line, "REFERENCES:")) {
501: StrAllocCopy(references, HTStrip(strchr(line,':')+1));
502:
503: } /* end if match */
504: p = line; /* Restart at beginning */
505: } /* if end of line */
506: } /* Loop over characters */
2.16 luotonen 507: END(HTML_ADDRESS);
1.1 timbl 508:
1.2 timbl 509: if (newsgroups || references) {
2.16 luotonen 510: START(HTML_DL);
1.2 timbl 511: if (newsgroups) {
2.16 luotonen 512: #ifdef POSTING
513: char *href=0;
514: #endif
515:
1.2 timbl 516: (*targetClass.start_element)(target, HTML_DT , 0, 0);
517: PUTS("Newsgroups:");
518: (*targetClass.start_element)(target, HTML_DD , 0, 0);
519: write_anchors(newsgroups);
2.16 luotonen 520:
521: #ifdef POSTING
522: /* make posting possible */
523: StrAllocCopy(href,"newspost:");
524: StrAllocCat(href,newsgroups);
525: START(HTML_DT);
526: start_anchor(href);
527: PUTS("Reply to newsgroup(s)");
528: END(HTML_A);
529: #endif
530:
1.2 timbl 531: free(newsgroups);
532: }
533:
534: if (references) {
535: (*targetClass.start_element)(target, HTML_DT , 0, 0);
536: PUTS("References:");
537: (*targetClass.start_element)(target, HTML_DD , 0, 0);
538: write_anchors(references);
539: free(references);
540: }
2.16 luotonen 541: #ifdef WHEN_WE_HAVE_HTMLPLUS
542: (*targetClass.end_element)(target, HTML_DLC);
543: #else
2.10 timbl 544: (*targetClass.end_element)(target, HTML_DL);
2.16 luotonen 545: #endif
1.1 timbl 546: }
1.2 timbl 547: PUTS("\n\n\n");
1.1 timbl 548:
549: }
550:
551: /* Read in the BODY of the Article:
552: */
1.2 timbl 553: (*targetClass.start_element)(target, HTML_PRE , 0, 0);
554:
1.1 timbl 555: p = line;
556: while(!done){
557: char ch = *p++ = NEXT_CHAR;
558: if (ch==(char)EOF) {
559: abort_socket(); /* End of file, close socket */
560: return; /* End of file on response */
561: }
1.3 timbl 562: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 563: *p++=0; /* Terminate the string */
2.28 frystyk 564: if (TRACE) fprintf(TDEST, "B %s", line);
1.1 timbl 565: if (line[0]=='.') {
566: if (line[1]<' ') { /* End of article? */
567: done = YES;
568: break;
569: } else { /* Line starts with dot */
1.2 timbl 570: PUTS(&line[1]); /* Ignore first dot */
1.1 timbl 571: }
572: } else {
573:
574: /* Normal lines are scanned for buried references to other articles.
575: ** Unfortunately, it will pick up mail addresses as well!
576: */
577: char *l = line;
578: char * p;
2.14 luotonen 579: while ((p=strchr(l, '<'))) {
1.1 timbl 580: char *q = strchr(p,'>');
581: char *at = strchr(p, '@');
582: if (q && at && at<q) {
583: char c = q[1];
584: q[1] = 0; /* chop up */
585: *p = 0;
1.2 timbl 586: PUTS(l);
1.1 timbl 587: *p = '<'; /* again */
588: *q = 0;
1.2 timbl 589: start_anchor(p+1);
1.1 timbl 590: *q = '>'; /* again */
1.2 timbl 591: PUTS(p);
592: (*targetClass.end_element)(target, HTML_A);
1.1 timbl 593: q[1] = c; /* again */
594: l=q+1;
595: } else break; /* line has unmatched <> */
596: }
1.2 timbl 597: PUTS( l); /* Last bit of the line */
1.1 timbl 598: } /* if not dot */
599: p = line; /* Restart at beginning */
600: } /* if end of line */
601: } /* Loop over characters */
1.2 timbl 602:
603: (*targetClass.end_element)(target, HTML_PRE);
1.1 timbl 604: }
605:
606:
607: /* Read in a List of Newsgroups
608: ** ----------------------------
609: */
610: /*
611: ** Note the termination condition of a single dot on a line by itself.
612: ** RFC 977 specifies that the line "folding" of RFC850 is not used, so we
613: ** do not handle it here.
614: */
615: PRIVATE void read_list NOARGS
616: {
617:
618: char line[LINE_LENGTH+1];
619: char *p;
620: BOOL done = NO;
621:
622: /* Read in the HEADer of the article:
623: **
624: ** The header fields are either ignored, or formatted and put into the
625: ** Text.
626: */
1.2 timbl 627: (*targetClass.start_element)(target, HTML_H1 , 0, 0);
628: PUTS( "Newsgroups");
629: (*targetClass.end_element)(target, HTML_PRE);
1.1 timbl 630: p = line;
2.16 luotonen 631: (*targetClass.start_element)(target, HTML_DL, 0, 0);
1.1 timbl 632: while(!done){
633: char ch = *p++ = NEXT_CHAR;
634: if (ch==(char)EOF) {
635: abort_socket(); /* End of file, close socket */
636: return; /* End of file on response */
637: }
1.3 timbl 638: if ((ch == LF) || (p == &line[LINE_LENGTH])) {
1.1 timbl 639: *p++=0; /* Terminate the string */
2.28 frystyk 640: if (TRACE) fprintf(TDEST, "B %s", line);
2.16 luotonen 641: (*targetClass.start_element)(target, HTML_DT , 0, 0);
1.1 timbl 642: if (line[0]=='.') {
643: if (line[1]<' ') { /* End of article? */
644: done = YES;
645: break;
646: } else { /* Line starts with dot */
1.2 timbl 647: PUTS( &line[1]);
1.1 timbl 648: }
649: } else {
650:
651: /* Normal lines are scanned for references to newsgroups.
652: */
2.16 luotonen 653: int i=0;
654:
655: /* find whitespace if it exits */
656: for(; line[i] != '\0' && !WHITE(line[i]); i++)
657: ; /* null body */
658:
659: if(line[i] != '\0') {
660: line[i] = '\0';
661: write_anchor(line, line);
662: (*targetClass.start_element)(target, HTML_DD , 0, 0);
663: PUTS(&line[i+1]); /* put description */
664: } else {
665: write_anchor(line, line);
666: }
667:
668: #ifdef OLD_CODE
1.1 timbl 669: char group[LINE_LENGTH];
670: int first, last;
671: char postable;
672: if (sscanf(line, "%s %d %d %c", group, &first, &last, &postable)==4)
673: write_anchor(line, group);
674: else
1.2 timbl 675: PUTS(line);
2.16 luotonen 676: #endif /*OLD_CODE*/
677:
1.1 timbl 678: } /* if not dot */
679: p = line; /* Restart at beginning */
680: } /* if end of line */
681: } /* Loop over characters */
2.16 luotonen 682: (*targetClass.end_element)(target, HTML_DL);
1.1 timbl 683: }
684:
685:
686: /* Read in a Newsgroup
687: ** -------------------
688: ** Unfortunately, we have to ask for each article one by one if we
689: ** want more than one field.
690: **
691: */
692: PRIVATE void read_group ARGS3(
693: CONST char *,groupName,
694: int,first_required,
695: int,last_required
696: )
697: {
698: char line[LINE_LENGTH+1];
699: char author[LINE_LENGTH+1];
700: char subject[LINE_LENGTH+1];
701: char *p;
702: BOOL done;
703:
704: char buffer[LINE_LENGTH];
705: char *reference=0; /* Href for article */
706: int art; /* Article number WITHIN GROUP */
707: int status, count, first, last; /* Response fields */
708: /* count is only an upper limit */
709:
710: sscanf(response_text, " %d %d %d %d", &status, &count, &first, &last);
2.17 frystyk 711: if(TRACE)
2.28 frystyk 712: fprintf(TDEST,
2.17 frystyk 713: "Newsgroup status=%d, count=%d, (%d-%d) required:(%d-%d)\n",
714: status, count, first, last, first_required, last_required);
1.1 timbl 715: if (last==0) {
1.2 timbl 716: PUTS( "\nNo articles in this group.\n");
2.16 luotonen 717: #ifdef POSTING
718: goto add_post;
719: #endif
1.1 timbl 720: return;
721: }
722:
723: #define FAST_THRESHOLD 100 /* Above this, read IDs fast */
724: #define CHOP_THRESHOLD 50 /* Above this, chop off the rest */
725:
726: if (first_required<first) first_required = first; /* clip */
727: if ((last_required==0) || (last_required > last)) last_required = last;
728:
729: if (last_required<=first_required) {
1.2 timbl 730: PUTS( "\nNo articles in this range.\n");
2.16 luotonen 731: #ifdef POSTING
732: goto add_post;
733: #endif
1.1 timbl 734: return;
735: }
736:
737: if (last_required-first_required+1 > MAX_CHUNK) { /* Trim this block */
738: first_required = last_required-CHUNK_SIZE+1;
739: }
2.28 frystyk 740: if (TRACE) fprintf (TDEST, " Chunk will be (%d-%d)\n",
2.16 luotonen 741: first_required, last_required);
1.1 timbl 742:
1.2 timbl 743: /* Set window title
744: */
745: sprintf(buffer, "Newsgroup %s, Articles %d-%d",
746: groupName, first_required, last_required);
747: START(HTML_TITLE);
748: PUTS(buffer);
749: END(HTML_TITLE);
750:
1.1 timbl 751: /* Link to earlier articles
752: */
753: if (first_required>first) {
754: int before; /* Start of one before */
755: if (first_required-MAX_CHUNK <= first) before = first;
756: else before = first_required-CHUNK_SIZE;
757: sprintf(buffer, "%s/%d-%d", groupName, before, first_required-1);
2.28 frystyk 758: if (TRACE) fprintf(TDEST, " Block before is %s\n", buffer);
1.2 timbl 759: PUTS( " (");
760: start_anchor(buffer);
761: PUTS("Earlier articles");
762: END(HTML_A);
763: PUTS( "...)\n");
1.1 timbl 764: }
765:
766: done = NO;
767:
768: /*#define USE_XHDR*/
769: #ifdef USE_XHDR
770: if (count>FAST_THRESHOLD) {
771: sprintf(buffer,
772: "\nThere are about %d articles currently available in %s, IDs as follows:\n\n",
773: count, groupName);
1.2 timbl 774: PUTS(buffer);
1.3 timbl 775: sprintf(buffer, "XHDR Message-ID %d-%d%c%c", first, last, CR, LF);
1.1 timbl 776: status = response(buffer);
777: if (status==221) {
778:
779: p = line;
780: while(!done){
781: char ch = *p++ = NEXT_CHAR;
782: if (ch==(char)EOF) {
783: abort_socket(); /* End of file, close socket */
784: return; /* End of file on response */
785: }
786: if ((ch == '\n') || (p == &line[LINE_LENGTH])) {
787: *p++=0; /* Terminate the string */
2.28 frystyk 788: if (TRACE) fprintf(TDEST, "X %s", line);
1.1 timbl 789: if (line[0]=='.') {
790: if (line[1]<' ') { /* End of article? */
791: done = YES;
792: break;
793: } else { /* Line starts with dot */
794: /* Ignore strange line */
795: }
796: } else {
797:
798: /* Normal lines are scanned for references to articles.
799: */
800: char * space = strchr(line, ' ');
801: if (space++)
802: write_anchor(space, space);
803: } /* if not dot */
804: p = line; /* Restart at beginning */
805: } /* if end of line */
806: } /* Loop over characters */
807:
808: /* leaving loop with "done" set */
809: } /* Good status */
810: };
811: #endif
812:
813: /* Read newsgroup using individual fields:
814: */
815: if (!done) {
816: if (first==first_required && last==last_required)
1.2 timbl 817: PUTS("\nAll available articles in ");
818: else PUTS( "\nArticles in ");
819: PUTS(groupName);
820: START(HTML_MENU);
1.1 timbl 821: for(art=first_required; art<=last_required; art++) {
822:
823: /*#define OVERLAP*/
824: #ifdef OVERLAP
825: /* With this code we try to keep the server running flat out by queuing just
826: ** one extra command ahead of time. We assume (1) that the server won't abort
827: ** if it gets input during output, and (2) that TCP buffering is enough for the
828: ** two commands. Both these assumptions seem very reasonable. However, we HAVE
829: ** had a hangup with a loaded server.
830: */
831: if (art==first_required) {
832: if (art==last_required) {
1.3 timbl 833: sprintf(buffer, "HEAD %d%c%c", art, CR, LF); /* Only one */
1.1 timbl 834: status = response(buffer);
835: } else { /* First of many */
1.3 timbl 836: sprintf(buffer, "HEAD %d%c%cHEAD %d%c%c",
837: art, CR, LF, art+1, CR, LF);
1.1 timbl 838: status = response(buffer);
839: }
840: } else if (art==last_required) { /* Last of many */
841: status = response(NULL);
842: } else { /* Middle of many */
1.3 timbl 843: sprintf(buffer, "HEAD %d%c%c", art+1, CR, LF);
1.1 timbl 844: status = response(buffer);
845: }
846:
847: #else /* NOT OVERLAP */
1.3 timbl 848: sprintf(buffer, "HEAD %d%c%c", art, CR, LF);
1.1 timbl 849: status = response(buffer);
850: #endif /* NOT OVERLAP */
851:
852: if (status == 221) { /* Head follows - parse it:*/
2.20 frystyk 853: int ch;
1.1 timbl 854: p = line; /* Write pointer */
855: done = NO;
856: while(!done){
2.20 frystyk 857: if ((ch = HTInputSocket_getCharacter(isoc)) < 0) {
1.1 timbl 858: abort_socket(); /* End of file, close socket */
859: return; /* End of file on response */
860: }
2.20 frystyk 861: *p++ = (unsigned char) ch;
1.3 timbl 862: if ((ch == LF)
1.1 timbl 863: || (p == &line[LINE_LENGTH]) ) {
864:
865: *--p=0; /* Terminate & chop LF*/
866: p = line; /* Restart at beginning */
2.28 frystyk 867: if (TRACE) fprintf(TDEST, "G %s\n", line);
1.1 timbl 868: switch(line[0]) {
869:
870: case '.':
871: done = (line[1]<' '); /* End of article? */
872: break;
873:
874: case 'S':
875: case 's':
876: if (match(line, "SUBJECT:"))
877: strcpy(subject, line+9);/* Save subject */
878: break;
879:
880: case 'M':
881: case 'm':
882: if (match(line, "MESSAGE-ID:")) {
883: char * addr = HTStrip(line+11) +1; /* Chop < */
884: addr[strlen(addr)-1]=0; /* Chop > */
885: StrAllocCopy(reference, addr);
886: }
887: break;
888:
889: case 'f':
890: case 'F':
891: if (match(line, "FROM:")) {
892: char * p;
893: strcpy(author,
894: author_name(strchr(line,':')+1));
2.17 frystyk 895: if (*author) { /* Not always there! */
896: p = author + strlen(author) - 1;
897: if (*p==LF) *p = 0; /* Chop off newline */
898: }
1.1 timbl 899: }
900: break;
901:
902: } /* end switch on first character */
903: } /* if end of line */
904: } /* Loop over characters */
905:
1.2 timbl 906: START(HTML_LI);
1.1 timbl 907: sprintf(buffer, "\"%s\" - %s", subject, author);
908: if (reference) {
909: write_anchor(buffer, reference);
910: free(reference);
911: reference=0;
912: } else {
1.2 timbl 913: PUTS(buffer);
1.1 timbl 914: }
915:
916:
1.2 timbl 917: /* indicate progress! @@@@@@
1.1 timbl 918: */
919:
920: } /* If good response */
921: } /* Loop over article */
922: } /* If read headers */
1.2 timbl 923: END(HTML_MENU);
924: START(HTML_P);
1.1 timbl 925:
926: /* Link to later articles
927: */
928: if (last_required<last) {
929: int after; /* End of article after */
930: after = last_required+CHUNK_SIZE;
931: if (after==last) sprintf(buffer, "news:%s", groupName); /* original group */
932: else sprintf(buffer, "news:%s/%d-%d", groupName, last_required+1, after);
2.28 frystyk 933: if (TRACE) fprintf(TDEST, " Block after is %s\n", buffer);
1.2 timbl 934: PUTS( "(");
935: start_anchor(buffer);
936: PUTS( "Later articles");
937: END(HTML_A);
938: PUTS( "...)\n");
1.1 timbl 939: }
2.16 luotonen 940:
2.19 luotonen 941: #ifdef POSTING
942: add_post:
943: #endif
2.16 luotonen 944: {
945: char *href=0;
946: START(HTML_HR);
947:
948: StrAllocCopy(href,"newspost:");
949: StrAllocCat(href,groupName);
950: start_anchor(href);
951: PUTS("Post to ");
952: PUTS(groupName);
953: END(HTML_A);
954:
955: free(href);
956: }
1.1 timbl 957:
958:
959: }
960:
961:
962: /* Load by name HTLoadNews
963: ** ============
964: */
2.13 timbl 965: PUBLIC int HTLoadNews ARGS1(HTRequest *, request)
1.1 timbl 966: {
2.19 luotonen 967: char * arg = HTAnchor_physical(request->anchor);
1.1 timbl 968: char command[257]; /* The whole command */
969: char groupName[GROUP_NAME_LENGTH]; /* Just the group name */
970: int status; /* tcp return */
971: int retries; /* A count of how hard we have tried */
972: BOOL group_wanted; /* Flag: group was asked for, not article */
973: BOOL list_wanted; /* Flag: group was asked for, not article */
974: int first, last; /* First and last articles asked for */
975:
2.10 timbl 976: diagnostic = (request->output_format == WWW_SOURCE); /* set global flag */
1.1 timbl 977:
2.28 frystyk 978: if (TRACE) fprintf(TDEST, "HTNews: Looking for %s\n", arg);
1.1 timbl 979:
980: if (!initialized) initialized = initialize();
981: if (!initialized) return -1; /* FAIL */
982:
983: {
2.19 luotonen 984: char * p1=arg;
1.1 timbl 985:
986: /* We will ask for the document, omitting the host name & anchor.
987: **
988: ** Syntax of address is
989: ** xxx@yyy Article
990: ** <xxx@yyy> Same article
991: ** xxxxx News group (no "@")
992: ** group/n1-n2 Articles n1 to n2 in group
993: */
994: group_wanted = (strchr(arg, '@')==0) && (strchr(arg, '*')==0);
995: list_wanted = (strchr(arg, '@')==0) && (strchr(arg, '*')!=0);
996:
997: /* p1 = HTParse(arg, "", PARSE_PATH | PARSE_PUNCTUATION); */
998: /* Don't use HTParse because news: access doesn't follow traditional
999: rules. For instance, if the article reference contains a '#',
1000: the rest of it is lost -- JFG 10/7/92, from a bug report */
1001: if (!strncasecomp (arg, "news:", 5))
1002: p1 = arg + 5; /* Skip "news:" prefix */
2.19 luotonen 1003: HTUnEscape(p1); /* AL May 2, 1994 */
1004: HTCleanTelnetString(p1); /* Prevent security holes */
1.1 timbl 1005: if (list_wanted) {
2.16 luotonen 1006: strcpy(command, "LIST NEWSGROUPS");
1.1 timbl 1007: } else if (group_wanted) {
1008: char * slash = strchr(p1, '/');
1009: strcpy(command, "GROUP ");
1010: first = 0;
1011: last = 0;
1012: if (slash) {
1013: *slash = 0;
1014: strcpy(groupName, p1);
1015: *slash = '/';
1016: (void) sscanf(slash+1, "%d-%d", &first, &last);
1017: } else {
1018: strcpy(groupName, p1);
1019: }
1020: strcat(command, groupName);
1021: } else {
1022: strcpy(command, "ARTICLE ");
1023: if (strchr(p1, '<')==0) strcat(command,"<");
1024: strcat(command, p1);
1025: if (strchr(p1, '>')==0) strcat(command,">");
1026: }
1027:
1.3 timbl 1028: {
1029: char * p = command + strlen(command);
1030: *p++ = CR; /* Macros to be correct on Mac */
1031: *p++ = LF;
1032: *p++ = 0;
1033: /* strcat(command, "\r\n"); */ /* CR LF, as in rfc 977 */
1034: }
1.1 timbl 1035: } /* scope of p1 */
1036:
1037: if (!*arg) return NO; /* Ignore if no name */
1038:
1039:
1040: /* Make a hypertext object with an anchor list.
1041: */
2.10 timbl 1042: node_anchor = request->anchor;
2.11 timbl 1043: target = HTML_new(request, NULL, WWW_HTML,
1044: request->output_format, request->output_stream);
1.2 timbl 1045: targetClass = *target->isa; /* Copy routine entry points */
1046:
1.1 timbl 1047:
1048: /* Now, let's get a stream setup up from the NewsHost:
1049: */
1050: for(retries=0;retries<2; retries++){
1051:
1052: if (s<0) {
1053: NEWS_PROGRESS("Connecting to NewsHost ...");
1054: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
1055: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
1056: if (status<0){
1057: NETCLOSE(s);
2.28 frystyk 1058: s = INVSOC;
2.21 frystyk 1059: #ifdef OLD_CODE
1060: char message[256];
2.28 frystyk 1061: if (TRACE) fprintf(TDEST, "HTNews: Unable to connect to news host.\n");
1.1 timbl 1062: /* if (retries<=1) continue; WHY TRY AGAIN ? */
1063: sprintf(message,
1064: "\nCould not access %s.\n\n (Check default WorldWideWeb NewsHost ?)\n",
1.2 timbl 1065: HTNewsHost);
2.15 luotonen 1066: return HTLoadError(request, 500, message);
2.21 frystyk 1067: #endif /* OLD_CODE */
1068: {
1069: char *unescaped = NULL;
1070: StrAllocCopy(unescaped, arg);
1071: HTUnEscape(unescaped);
1072: HTErrorAdd(request, ERR_FATAL, NO, HTERR_INTERNAL,
1073: (void *) unescaped,
1074: (int) strlen(unescaped), "HTLoadNews");
1075: free(unescaped);
1076: return -1;
1077: }
1.1 timbl 1078: } else {
2.28 frystyk 1079: if (TRACE) fprintf(TDEST, "HTNews: Connected to news host %s.\n",
1.2 timbl 1080: HTNewsHost);
2.11 timbl 1081: isoc = HTInputSocket_new(s); /* set up buffering */
1.1 timbl 1082: if ((response(NULL) / 100) !=2) {
2.21 frystyk 1083: int length = strlen(response_text);
1084: NETCLOSE(s);
1085: HTInputSocket_free(isoc);
2.28 frystyk 1086: s = INVSOC;
2.21 frystyk 1087: HTErrorAdd(request, ERR_FATAL, NO, HTERR_NEWS_SERVER,
1088: (void *) response_text, length < 50 ?
1089: length : 50, "HTLoadNews");
1090: HTErrorAdd(request, ERR_FATAL, NO, HTERR_INTERNAL,
1091: (void *) HTNewsHost,
1092: (int) strlen(HTNewsHost), "HTLoadNews");
1093: return -1;
1094: }
1095: #ifdef OLD_CODE
2.8 timbl 1096: char message[BIG];
1097: sprintf(message,
1098: "Can't read news info. News host %.20s responded: %.200s",
1099: HTNewsHost, response_text);
2.15 luotonen 1100: return HTLoadError(request, 500, message);
2.21 frystyk 1101: }
1102: #endif /* OLD_CODE */
1.1 timbl 1103: }
1104: } /* If needed opening */
1105:
1.2 timbl 1106: /* @@@@@@@@@@@@@@Tell user something's happening */
1107:
1.1 timbl 1108: status = response(command);
1109: if (status<0) break;
2.19 luotonen 1110: if (status >= 411 && status <= 430) break; /* no such article/group */
1.1 timbl 1111: if ((status/ 100) !=2) {
2.8 timbl 1112: HTProgress(response_text);
1.1 timbl 1113: /* NXRunAlertPanel("News access", response_text,
1114: NULL,NULL,NULL);
1115: */
1116: NETCLOSE(s);
2.11 timbl 1117: HTInputSocket_free(isoc);
2.28 frystyk 1118: s = INVSOC;
1.1 timbl 1119: /* return HT; -- no:the message might be "Timeout-disconnected" left over */
1120: continue; /* Try again */
1121: }
1122:
1123: /* Load a group, article, etc
1124: */
1.2 timbl 1125:
1.1 timbl 1126:
1127: if (list_wanted) read_list();
1128: else if (group_wanted) read_group(groupName, first, last);
1129: else read_article();
1130:
2.23 duns 1131: (*targetClass._free)(target);
1.2 timbl 1132: return HT_LOADED;
1.1 timbl 1133:
1134: } /* Retry loop */
1135:
1.2 timbl 1136:
2.8 timbl 1137: /* HTAlert("Sorry, could not load requested news.\n"); */
1138:
1.1 timbl 1139: /* NXRunAlertPanel(NULL, "Sorry, could not load `%s'.",
1140: NULL,NULL,NULL, arg);No -- message earlier wil have covered it */
1141:
2.23 duns 1142: (*targetClass._free)(target); /* AL May 2, 1994 */
1.2 timbl 1143: return HT_LOADED;
1.1 timbl 1144: }
1145:
2.25 frystyk 1146: GLOBALDEF PUBLIC HTProtocol HTNews = {
1147: "news", SOC_BLOCK, HTLoadNews, NULL, NULL
1148: };
Webmaster