Annotation of libwww/Robot/src/HTRobot.c, revision 1.33
1.1 frystyk 1: /* HTRobot.c
2: ** W3C MINI ROBOT
3: **
4: ** (c) COPRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
6: **
7: ** This program illustrates how to travers links using the Anchor object
8: **
9: ** Authors:
10: ** HFN Henrik Frystyk Nielsen, (frystyk@w3.org)
11: **
12: ** History:
13: ** Dec 04 95 First version
14: */
15:
16: #include "WWWLib.h" /* Global Library Include file */
17: #include "WWWApp.h" /* Application stuff */
1.17 frystyk 18: #include "WWWTrans.h"
1.10 frystyk 19: #include "WWWInit.h"
1.9 frystyk 20:
1.4 frystyk 21: #include "HText.h"
1.1 frystyk 22:
23: #include "HTRobot.h" /* Implemented here */
1.33 ! eric 24: #include "HTWatch.h"
1.1 frystyk 25:
1.14 frystyk 26: #ifndef W3C_VERSION
1.33 ! eric 27: #define W3C_VERSION "Unspecified"
1.1 frystyk 28: #endif
29:
30: #define APP_NAME "W3CRobot"
1.14 frystyk 31: #define APP_VERSION W3C_VERSION
1.1 frystyk 32:
33: #define DEFAULT_OUTPUT_FILE "robot.out"
34: #define DEFAULT_RULE_FILE "robot.conf"
35: #define DEFAULT_LOG_FILE "robot.log"
1.7 frystyk 36: #define DEFAULT_DEPTH 0
1.1 frystyk 37:
38: #define SHOW_MSG (WWWTRACE || HTAlert_interactive())
39:
1.7 frystyk 40: #define DEFAULT_TIMEOUT 10 /* timeout in seconds */
1.1 frystyk 41:
42: #if defined(__svr4__)
43: #define CATCH_SIG
44: #endif
45:
46: typedef enum _MRFlags {
1.2 frystyk 47: MR_IMG = 0x1,
48: MR_LINK = 0x2,
1.12 frystyk 49: MR_PREEMPTIVE= 0x4,
50: MR_TIME = 0x8
1.1 frystyk 51: } MRFlags;
52:
53: typedef struct _Robot {
54: HTRequest * request;
1.7 frystyk 55: HTRequest * timeout; /* Until we get a server eventloop */
1.1 frystyk 56: HTParentAnchor * anchor;
1.2 frystyk 57: int depth; /* How deep is our tree */
1.30 frystyk 58: int cnt; /* Count of requests */
1.2 frystyk 59: HTList * hyperdoc; /* List of our HyperDoc Objects */
1.4 frystyk 60: HTList * htext; /* List of our HText Objects */
1.1 frystyk 61: struct timeval * tv; /* Timeout on socket */
62: char * cwd; /* Current dir URL */
63: char * rules;
64: char * logfile;
65: char * outputfile;
66: FILE * output;
67: MRFlags flags;
68: } Robot;
69:
70: typedef enum _LoadState {
71: L_INVALID = -2,
72: L_LOADING = -1,
73: L_SUCCESS = 0,
74: L_ERROR
75: } LoadState;
76:
77: /*
78: ** The HyperDoc object is bound to the anchor and contains information about
79: ** where we are in the search for recursive searches
80: */
81: typedef struct _HyperDoc {
82: HTParentAnchor * anchor;
83: LoadState state;
84: int depth;
85: } HyperDoc;
86:
87: /*
88: ** This is the HText object that is created every time we start parsing a
89: ** HTML object
90: */
1.4 frystyk 91: struct _HText {
1.1 frystyk 92: HTRequest * request;
1.4 frystyk 93: };
1.1 frystyk 94:
95: PUBLIC HText * HTMainText = NULL;
96: PUBLIC HTParentAnchor * HTMainAnchor = NULL;
97: PUBLIC HTStyleSheet * styleSheet = NULL;
98:
99: /* ------------------------------------------------------------------------- */
100:
1.33 ! eric 101: PUBLIC int HTWatch(int id, void * obj, const char * fmt, ...)
! 102: {
! 103: va_list pArgs;
! 104: va_start(pArgs, fmt);
! 105: fprintf(stderr, "id: %x obj: %p: ", id, obj);
! 106: return vfprintf(stderr, fmt, pArgs);
! 107: }
! 108:
1.13 eric 109: /* Standard (non-error) Output
110: ** ---------------------------
111: */
112: PUBLIC int OutputData(const char * fmt, ...)
113: {
114: int ret;
115: va_list pArgs;
116: va_start(pArgs, fmt);
117: ret = vfprintf(stdout, fmt, pArgs);
118: va_end(pArgs);
119: return ret;
120: }
121:
122: /* ------------------------------------------------------------------------- */
123:
1.2 frystyk 124: /* Create a "HyperDoc" object
125: ** --------------------------
126: ** A HyperDoc object contains information about whether we have already
127: ** started checking the anchor and the depth in our search
128: */
129: PRIVATE HyperDoc * HyperDoc_new (Robot * mr,HTParentAnchor * anchor, int depth)
130: {
131: HyperDoc * hd;
1.14 frystyk 132: if ((hd = (HyperDoc *) HT_CALLOC(1, sizeof(HyperDoc))) == NULL)
133: HT_OUTOFMEM("HyperDoc_new");
1.2 frystyk 134: hd->state = L_INVALID;
135: hd->depth = depth;
136:
137: /* Bind the HyperDoc object together with the Anchor Object */
138: hd->anchor = anchor;
139: HTAnchor_setDocument(anchor, (void *) hd);
140:
141: /* Add this HyperDoc object to our list */
142: if (!mr->hyperdoc) mr->hyperdoc = HTList_new();
143: HTList_addObject(mr->hyperdoc, (void *) hd);
144: return hd;
145: }
146:
147: /* Delete a "HyperDoc" object
148: ** --------------------------
149: */
150: PRIVATE BOOL HyperDoc_delete (HyperDoc * hd)
151: {
152: if (hd) {
1.11 frystyk 153: HT_FREE (hd);
1.2 frystyk 154: return YES;
155: }
156: return NO;
157: }
158:
1.1 frystyk 159: /* Create a Command Line Object
160: ** ----------------------------
161: */
162: PRIVATE Robot * Robot_new (void)
163: {
164: Robot * me;
1.14 frystyk 165: if ((me = (Robot *) HT_CALLOC(1, sizeof(Robot))) == NULL ||
166: (me->tv = (struct timeval*) HT_CALLOC(1, sizeof(struct timeval))) == NULL)
167: HT_OUTOFMEM("Robot_new");
1.2 frystyk 168: me->hyperdoc = HTList_new();
1.4 frystyk 169: me->htext = HTList_new();
1.1 frystyk 170: me->tv->tv_sec = DEFAULT_TIMEOUT;
1.25 frystyk 171: me->cwd = HTGetCurrentDirectoryURL();
1.1 frystyk 172: me->output = OUTPUT;
1.30 frystyk 173: me->cnt = 1;
1.1 frystyk 174:
1.7 frystyk 175: /* We keep an extra timeout request object for the timeout_handler */
176: me->timeout = HTRequest_new();
177: HTRequest_setContext (me->timeout, me);
178:
1.1 frystyk 179: /* Bind the Robot object together with the Request Object */
180: me->request = HTRequest_new();
181: HTRequest_setContext (me->request, me);
182: return me;
183: }
184:
185: /* Delete a Command Line Object
186: ** ----------------------------
187: */
188: PRIVATE BOOL Robot_delete (Robot * me)
189: {
190: if (me) {
1.2 frystyk 191: if (me->hyperdoc) {
192: HTList * cur = me->hyperdoc;
193: HyperDoc * pres;
194: while ((pres = (HyperDoc *) HTList_nextObject(cur)))
195: HyperDoc_delete(pres);
196: HTList_delete(me->hyperdoc);
197: }
1.4 frystyk 198: if (me->htext) {
199: HTList * cur = me->htext;
200: HText * pres;
201: while ((pres = (HText *) HTList_nextObject(cur)))
202: HText_free(pres);
203: HTList_delete(me->htext);
204: }
1.1 frystyk 205: if (me->logfile) HTLog_close();
206: if (me->output && me->output != STDOUT) fclose(me->output);
1.12 frystyk 207: if (me->flags & MR_TIME) {
208: time_t local = time(NULL);
1.13 eric 209: HTTrace("Robot terminated %s\n",HTDateTimeStr(&local,YES));
1.12 frystyk 210: }
1.11 frystyk 211: HT_FREE(me->cwd);
212: HT_FREE(me->tv);
213: HT_FREE(me);
1.1 frystyk 214: return YES;
215: }
216: return NO;
217: }
218:
1.2 frystyk 219: /*
220: ** This function creates a new request object and initializes it
221: */
222: PRIVATE HTRequest * Thread_new (Robot * mr, HTMethod method)
223: {
224: HTRequest * newreq = HTRequest_new();
225: HTRequest_setContext (newreq, mr);
1.7 frystyk 226: if (mr->flags & MR_PREEMPTIVE) HTRequest_setPreemptive(newreq, YES);
1.5 frystyk 227: HTRequest_addRqHd(newreq, HT_C_HOST);
1.2 frystyk 228: HTRequest_setMethod(newreq, method);
1.30 frystyk 229: mr->cnt++;
1.2 frystyk 230: return newreq;
231: }
232:
233: PRIVATE BOOL Thread_delete (Robot * mr, HTRequest * request)
234: {
235: if (mr && request) {
236: HTRequest_delete(request);
1.30 frystyk 237: mr->cnt--;
1.2 frystyk 238: return YES;
239: }
240: return NO;
241: }
242:
243: /*
244: ** Cleanup and make sure we close all connections including the persistent
245: ** ones
246: */
1.1 frystyk 247: PRIVATE void Cleanup (Robot * me, int status)
248: {
249: Robot_delete(me);
1.29 eric 250: HTProfile_delete();
1.1 frystyk 251: #ifdef VMS
252: exit(status ? status : 1);
253: #else
254: exit(status ? status : 0);
255: #endif
256: }
257:
258: #ifdef CATCH_SIG
259: #include <signal.h>
260: /* SetSignal
261: ** This function sets up signal handlers. This might not be necessary to
262: ** call if the application has its own handlers (lossage on SVR4)
263: */
264: PRIVATE void SetSignal (void)
265: {
266: /* On some systems (SYSV) it is necessary to catch the SIGPIPE signal
267: ** when attemting to connect to a remote host where you normally should
268: ** get `connection refused' back
269: */
270: if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
1.13 eric 271: if (PROT_TRACE) HTTrace("HTSignal.... Can't catch SIGPIPE\n");
1.1 frystyk 272: } else {
1.13 eric 273: if (PROT_TRACE) HTTrace("HTSignal.... Ignoring SIGPIPE\n");
1.1 frystyk 274: }
275: }
276: #endif /* CATCH_SIG */
277:
278: PRIVATE void VersionInfo (void)
279: {
1.13 eric 280: OutputData("\n\nW3C Reference Software\n\n");
281: OutputData("\tW3C Mini Robot (%s) version %s.\n",
1.1 frystyk 282: APP_NAME, APP_VERSION);
1.13 eric 283: OutputData("\tW3C Reference Library version %s.\n\n",HTLib_version());
284: OutputData("Please send feedback to <libwww@w3.org>\n");
1.1 frystyk 285: }
286:
287: /* terminate_handler
288: ** -----------------
1.2 frystyk 289: ** This function is registered to handle the result of the request.
290: ** If no more requests are pending then terminate program
1.1 frystyk 291: */
1.32 frystyk 292: PRIVATE int terminate_handler (HTRequest * request, HTResponse * response,
293: void * param, int status)
1.1 frystyk 294: {
295: Robot * mr = (Robot *) HTRequest_context(request);
1.2 frystyk 296: Thread_delete(mr, request);
1.30 frystyk 297: if (HTNet_isEmpty()) {
298: if (SHOW_MSG) HTTrace("Robot....... Everything is finished...\n");
299: Cleanup(mr, 0);
300: }
301: if (SHOW_MSG) HTTrace("Robot....... %d outstanding requests\n", mr->cnt);
1.1 frystyk 302: return HT_OK;
303: }
304:
305: /* timeout_handler
306: ** ---------------
307: ** This function is registered to handle timeout in select eventloop
1.7 frystyk 308: **
309: ** BUG: This doesn't work as we don't get the right request object
310: ** back from the event loop
1.1 frystyk 311: */
312: PRIVATE int timeout_handler (HTRequest * request)
313: {
1.27 frystyk 314: #if 0
1.2 frystyk 315: Robot * mr = (Robot *) HTRequest_context(request);
1.27 frystyk 316: #endif
1.25 frystyk 317: if (SHOW_MSG) HTTrace("Robot....... We don't know how to handle timeout...\n");
1.7 frystyk 318: #if 0
1.1 frystyk 319: HTRequest_kill(request);
1.2 frystyk 320: Thread_delete(mr, request);
1.7 frystyk 321: #endif
1.4 frystyk 322: return HT_OK;
1.1 frystyk 323: }
324:
325: /* ------------------------------------------------------------------------- */
326: /* HTEXT INTERFACE */
327: /* ------------------------------------------------------------------------- */
328:
329: PUBLIC HText * HText_new2 (HTRequest * request, HTParentAnchor * anchor,
330: HTStream * stream)
331: {
332: HText * me;
1.4 frystyk 333: Robot * mr = (Robot *) HTRequest_context(request);
1.14 frystyk 334: if ((me = (HText *) HT_CALLOC(1, sizeof(HText))) == NULL)
335: HT_OUTOFMEM("HText_new2");
1.4 frystyk 336:
337: /* Bind the HText object together with the Request Object */
1.1 frystyk 338: me->request = request;
1.4 frystyk 339:
340: /* Add this HyperDoc object to our list */
341: if (!mr->htext) mr->htext = HTList_new();
342: HTList_addObject(mr->htext, (void *) me);
1.1 frystyk 343: return me;
344: }
345:
1.4 frystyk 346: PUBLIC void HText_free (HText * me) {
1.11 frystyk 347: if (me) HT_FREE (me);
1.4 frystyk 348: }
349:
1.1 frystyk 350: PUBLIC void HText_beginAnchor (HText * text, HTChildAnchor * anchor)
351: {
352: if (text && anchor) {
1.2 frystyk 353: Robot * mr = (Robot *) HTRequest_context(text->request);
1.1 frystyk 354: HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor);
355: HTParentAnchor * dest_parent = HTAnchor_parent(dest);
1.7 frystyk 356: char * uri = HTAnchor_address((HTAnchor *) dest_parent);
1.1 frystyk 357: HyperDoc * hd = HTAnchor_document(dest_parent);
358:
1.13 eric 359: if (SHOW_MSG) HTTrace("Robot....... Found `%s\' - ", uri ? uri : "NULL");
1.7 frystyk 360:
1.2 frystyk 361: /* Test whether we already have a hyperdoc for this document */
362: if (mr->flags & MR_LINK && dest_parent && !hd) {
1.1 frystyk 363: HTParentAnchor * parent = HTRequest_parent(text->request);
364: HyperDoc * last = HTAnchor_document(parent);
365: int depth = last ? last->depth+1 : 0;
1.2 frystyk 366: HTRequest * newreq = Thread_new(mr, METHOD_GET);
367: HyperDoc_new(mr, dest_parent, depth);
1.7 frystyk 368: HTRequest_setParent(newreq, HTRequest_anchor(text->request));
369: if (depth >= mr->depth) {
370: if (SHOW_MSG)
1.13 eric 371: HTTrace("loading at depth %d using HEAD\n", depth);
1.7 frystyk 372: HTRequest_setMethod(newreq, METHOD_HEAD);
1.30 frystyk 373: HTRequest_setOutputFormat(newreq, WWW_DEBUG);
1.7 frystyk 374: } else {
1.13 eric 375: if (SHOW_MSG) HTTrace("loading at depth %d\n", depth);
1.2 frystyk 376: }
377: if (HTLoadAnchor((HTAnchor *) dest_parent, newreq) != YES) {
1.13 eric 378: if (SHOW_MSG) HTTrace("not tested!\n");
1.2 frystyk 379: Thread_delete(mr, newreq);
380: }
1.7 frystyk 381: } else {
1.18 frystyk 382: if (SHOW_MSG) HTTrace("duplicate or max depth reached\n");
1.2 frystyk 383: }
1.11 frystyk 384: HT_FREE(uri);
1.2 frystyk 385: }
386: }
387:
388: PUBLIC void HText_appendImage (HText * text, HTChildAnchor * anchor,
1.14 frystyk 389: const char *alt, const char * align, BOOL isMap)
1.2 frystyk 390: {
391: if (text && anchor) {
392: Robot * mr = (Robot *) HTRequest_context(text->request);
393: HTParentAnchor * dest = (HTParentAnchor *)
394: HTAnchor_followMainLink((HTAnchor *) anchor);
395: HyperDoc * hd = HTAnchor_document(dest);
1.1 frystyk 396:
1.2 frystyk 397: /* Test whether we already have a hyperdoc for this document */
398: if (mr->flags & MR_IMG && dest && !hd) {
399: HTParentAnchor * parent = HTRequest_parent(text->request);
400: HyperDoc * last = HTAnchor_document(parent);
401: int depth = last ? last->depth+1 : 0;
402: HTRequest * newreq = Thread_new(mr, METHOD_HEAD);
403: HyperDoc_new(mr, dest, depth);
404: if (SHOW_MSG) {
405: char * uri = HTAnchor_address((HTAnchor *) dest);
1.13 eric 406: HTTrace("Robot....... Checking Image `%s\'\n", uri);
1.11 frystyk 407: HT_FREE(uri);
1.2 frystyk 408: }
409: if (HTLoadAnchor((HTAnchor *) dest, newreq) != YES) {
410: if (SHOW_MSG)
1.13 eric 411: HTTrace("Robot....... Image not tested!\n");
1.2 frystyk 412: Thread_delete(mr, newreq);
1.1 frystyk 413: }
414: }
415: }
416: }
417:
418: PUBLIC void HText_endAnchor (HText * text) {}
1.14 frystyk 419: PUBLIC void HText_appendText (HText * text, const char * str) {}
1.1 frystyk 420: PUBLIC void HText_appendCharacter (HText * text, char ch) {}
421: PUBLIC void HText_endAppend (HText * text) {}
422: PUBLIC void HText_setStyle (HText * text, HTStyle * style) {}
423: PUBLIC void HText_beginAppend (HText * text) {}
424: PUBLIC void HText_appendParagraph (HText * text) {}
425:
426: /* ------------------------------------------------------------------------- */
427: /* MAIN PROGRAM */
428: /* ------------------------------------------------------------------------- */
429:
430: int main (int argc, char ** argv)
431: {
432: int status = 0;
433: int arg;
434: HTChunk * keywords = NULL; /* From command line */
435: int keycnt = 0;
1.12 frystyk 436: Robot * mr = NULL;
1.1 frystyk 437:
438: /* Starts Mac GUSI socket library */
439: #ifdef GUSI
440: GUSISetup(GUSIwithSIOUXSockets);
441: GUSISetup(GUSIwithInternetSockets);
442: #endif
443:
444: #ifdef __MWERKS__ /* STR */
445: InitGraf((Ptr) &qd.thePort);
446: InitFonts();
447: InitWindows();
448: InitMenus(); TEInit();
449: InitDialogs(nil);
450: InitCursor();
451: SIOUXSettings.asktosaveonclose = false;
452: argc=ccommand(&argv);
453: #endif
454:
1.27 frystyk 455: /* Initiate W3C Reference Library with a robot profile */
456: HTProfile_newRobot(APP_NAME, APP_VERSION);
457:
458: /* Add the default HTML parser to the set of converters */
459: {
460: HTList * converters = HTFormat_conversion();
461: HTMLInit(converters);
462: }
1.1 frystyk 463:
1.12 frystyk 464: /* Build a new robot object */
465: mr = Robot_new();
466:
1.1 frystyk 467: /* Scan command Line for parameters */
468: for (arg=1; arg<argc; arg++) {
469: if (*argv[arg] == '-') {
470:
471: /* non-interactive */
1.17 frystyk 472: if (!strcmp(argv[arg], "-n")) {
1.1 frystyk 473: HTAlert_setInteractive(NO);
474:
475: /* log file */
476: } else if (!strcmp(argv[arg], "-l")) {
477: mr->logfile = (arg+1 < argc && *argv[arg+1] != '-') ?
478: argv[++arg] : DEFAULT_LOG_FILE;
479:
480: /* rule file */
481: } else if (!strcmp(argv[arg], "-r")) {
482: mr->rules = (arg+1 < argc && *argv[arg+1] != '-') ?
483: argv[++arg] : DEFAULT_RULE_FILE;
484:
485: /* output filename */
486: } else if (!strcmp(argv[arg], "-o")) {
487: mr->outputfile = (arg+1 < argc && *argv[arg+1] != '-') ?
488: argv[++arg] : DEFAULT_OUTPUT_FILE;
489:
490: /* timeout -- Change the default request timeout */
491: } else if (!strcmp(argv[arg], "-timeout")) {
492: int timeout = (arg+1 < argc && *argv[arg+1] != '-') ?
493: atoi(argv[++arg]) : DEFAULT_TIMEOUT;
494: if (timeout > 0) mr->tv->tv_sec = timeout;
495:
1.7 frystyk 496: /* preemptive or non-preemptive access */
1.1 frystyk 497: } else if (!strcmp(argv[arg], "-single")) {
1.7 frystyk 498: HTRequest_setPreemptive(mr->request, YES);
499: mr->flags |= MR_PREEMPTIVE;
1.2 frystyk 500:
501: /* test inlined images */
502: } else if (!strcmp(argv[arg], "-img")) {
503: mr->flags |= MR_IMG;
504:
505: /* load anchors */
506: } else if (!strcmp(argv[arg], "-link")) {
507: mr->flags |= MR_LINK;
1.7 frystyk 508: mr->depth = (arg+1 < argc && *argv[arg+1] != '-') ?
509: atoi(argv[++arg]) : DEFAULT_DEPTH;
1.2 frystyk 510:
1.7 frystyk 511: /* preemptive or non-preemptive access */
1.2 frystyk 512: } else if (!strcmp(argv[arg], "-single")) {
1.7 frystyk 513: HTRequest_setPreemptive(mr->request, YES);
514: mr->flags |= MR_PREEMPTIVE;
1.1 frystyk 515:
1.12 frystyk 516: /* Output start and end time */
517: } else if (!strcmp(argv[arg], "-ss")) {
518: time_t local = time(NULL);
1.13 eric 519: HTTrace("Robot started on %s\n",
1.12 frystyk 520: HTDateTimeStr(&local, YES));
521: mr->flags |= MR_TIME;
522:
1.1 frystyk 523: /* print version and exit */
524: } else if (!strcmp(argv[arg], "-version")) {
525: VersionInfo();
526: Cleanup(mr, 0);
527:
528: #ifdef WWWTRACE
529: /* trace flags */
530: } else if (!strncmp(argv[arg], "-v", 2)) {
1.24 frystyk 531: HTSetTraceMessageMask(argv[arg]+2);
1.1 frystyk 532: #endif
533:
534: } else {
1.13 eric 535: if (SHOW_MSG) HTTrace("Bad Argument (%s)\n", argv[arg]);
1.1 frystyk 536: }
1.17 frystyk 537: } else { /* If no leading `-' then check for URL or keywords */
1.1 frystyk 538: if (!keycnt) {
539: char * ref = HTParse(argv[arg], mr->cwd, PARSE_ALL);
540: mr->anchor = (HTParentAnchor *) HTAnchor_findAddress(ref);
1.7 frystyk 541: HyperDoc_new(mr, mr->anchor, 0);
1.1 frystyk 542: keycnt = 1;
1.11 frystyk 543: HT_FREE(ref);
1.1 frystyk 544: } else { /* Check for successive keyword arguments */
545: char *escaped = HTEscape(argv[arg], URL_XALPHAS);
546: if (keycnt++ <= 1)
1.5 frystyk 547: keywords = HTChunk_new(128);
1.1 frystyk 548: else
1.5 frystyk 549: HTChunk_putc(keywords, ' ');
550: HTChunk_puts(keywords, HTStrip(escaped));
1.11 frystyk 551: HT_FREE(escaped);
1.1 frystyk 552: }
553: }
554: }
555:
556: #ifdef CATCH_SIG
557: SetSignal();
558: #endif
559:
560: if (!keycnt) {
1.13 eric 561: if (SHOW_MSG) HTTrace("Please specify URL to check.\n");
1.1 frystyk 562: Cleanup(mr, -1);
563: }
564:
1.23 manoli 565: /* Testing that HTTrace is working */
566: HTTrace ("Welcome to the W3C mini Robot\n");
567:
1.1 frystyk 568: /* Rule file specified? */
569: if (mr->rules) {
570: char * rules = HTParse(mr->rules, mr->cwd, PARSE_ALL);
1.27 frystyk 571: if (!HTLoadRules(rules))
1.13 eric 572: if (SHOW_MSG) HTTrace("Can't access rules\n");
1.11 frystyk 573: HT_FREE(rules);
1.1 frystyk 574: }
575:
576: /* Output file specified? */
577: if (mr->outputfile) {
578: if ((mr->output = fopen(mr->outputfile, "wb")) == NULL) {
1.13 eric 579: if (SHOW_MSG) HTTrace("Can't open `%s'\n", mr->outputfile);
1.1 frystyk 580: mr->output = OUTPUT;
581: }
582: }
583:
584: /* Log file specifed? */
585: if (mr->logfile) HTLog_open(mr->logfile, YES, YES);
586:
1.27 frystyk 587: /* Register our own someterminater filter */
1.32 frystyk 588: HTNet_addAfter(terminate_handler, NULL, NULL, HT_ALL, HT_FILTER_LAST);
1.1 frystyk 589:
590: /* Set timeout on sockets */
1.33 ! eric 591: HTEventList_registerTimeout(mr->tv, mr->timeout, timeout_handler, NO);
1.1 frystyk 592:
593: /* Start the request */
594: if (keywords) /* Search */
1.28 frystyk 595: status = HTSearchAnchor(keywords, (HTAnchor *)mr->anchor, mr->request);
1.1 frystyk 596: else
597: status = HTLoadAnchor((HTAnchor *) mr->anchor, mr->request);
598:
1.5 frystyk 599: if (keywords) HTChunk_delete(keywords);
1.1 frystyk 600: if (status != YES) {
1.13 eric 601: if (SHOW_MSG) HTTrace("Can't access resource\n");
1.1 frystyk 602: Cleanup(mr, -1);
603: }
604:
605: /* Go into the event loop... */
1.33 ! eric 606: HTEventList_loop(mr->request);
1.1 frystyk 607:
608: /* Only gets here if event loop fails */
609: Cleanup(mr, 0);
610: return 0;
611: }
Webmaster