Annotation of libwww/Robot/src/HTRobot.c, revision 1.45
1.1 frystyk 1: /* HTRobot.c
2: ** W3C MINI ROBOT
3: **
4: ** (c) COPRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
6: **
7: ** This program illustrates how to travers links using the Anchor object
8: **
9: ** Authors:
10: ** HFN Henrik Frystyk Nielsen, (frystyk@w3.org)
11: **
12: ** History:
13: ** Dec 04 95 First version
14: */
15:
16: #include "WWWLib.h" /* Global Library Include file */
17: #include "WWWApp.h" /* Application stuff */
1.17 frystyk 18: #include "WWWTrans.h"
1.10 frystyk 19: #include "WWWInit.h"
1.9 frystyk 20:
1.4 frystyk 21: #include "HText.h"
1.1 frystyk 22:
1.39 eric 23: #include "HTMemLog.h"
1.1 frystyk 24: #include "HTRobot.h" /* Implemented here */
25:
1.14 frystyk 26: #ifndef W3C_VERSION
1.33 eric 27: #define W3C_VERSION "Unspecified"
1.1 frystyk 28: #endif
29:
30: #define APP_NAME "W3CRobot"
1.14 frystyk 31: #define APP_VERSION W3C_VERSION
1.1 frystyk 32:
33: #define DEFAULT_OUTPUT_FILE "robot.out"
34: #define DEFAULT_RULE_FILE "robot.conf"
35: #define DEFAULT_LOG_FILE "robot.log"
1.7 frystyk 36: #define DEFAULT_DEPTH 0
1.1 frystyk 37:
38: #define SHOW_MSG (WWWTRACE || HTAlert_interactive())
39:
1.40 frystyk 40: #define DEFAULT_TIMEOUT 10000 /* timeout in millis */
1.1 frystyk 41:
42: #if defined(__svr4__)
43: #define CATCH_SIG
44: #endif
45:
46: typedef enum _MRFlags {
1.45 ! frystyk 47: MR_IMG = 0x1,
! 48: MR_LINK = 0x2,
! 49: MR_PREEMPTIVE = 0x4,
! 50: MR_TIME = 0x8,
! 51: MR_SAVE = 0x10
1.1 frystyk 52: } MRFlags;
53:
54: typedef struct _Robot {
1.2 frystyk 55: int depth; /* How deep is our tree */
1.30 frystyk 56: int cnt; /* Count of requests */
1.2 frystyk 57: HTList * hyperdoc; /* List of our HyperDoc Objects */
1.4 frystyk 58: HTList * htext; /* List of our HText Objects */
1.34 eric 59: HTList * fingers;
1.40 frystyk 60: int timer;
1.1 frystyk 61: char * cwd; /* Current dir URL */
62: char * rules;
63: char * logfile;
64: char * outputfile;
65: FILE * output;
66: MRFlags flags;
67: } Robot;
1.34 eric 68:
69: typedef struct _Finger {
70: Robot * robot;
71: HTRequest * request;
72: HTParentAnchor * dest;
73: } Finger;
74:
1.1 frystyk 75: typedef enum _LoadState {
76: L_INVALID = -2,
77: L_LOADING = -1,
78: L_SUCCESS = 0,
79: L_ERROR
80: } LoadState;
81:
82: /*
83: ** The HyperDoc object is bound to the anchor and contains information about
84: ** where we are in the search for recursive searches
85: */
86: typedef struct _HyperDoc {
87: HTParentAnchor * anchor;
88: LoadState state;
89: int depth;
90: } HyperDoc;
91:
92: /*
93: ** This is the HText object that is created every time we start parsing a
94: ** HTML object
95: */
1.4 frystyk 96: struct _HText {
1.1 frystyk 97: HTRequest * request;
1.4 frystyk 98: };
1.1 frystyk 99:
100: PUBLIC HText * HTMainText = NULL;
101: PUBLIC HTParentAnchor * HTMainAnchor = NULL;
102: PUBLIC HTStyleSheet * styleSheet = NULL;
103:
104: /* ------------------------------------------------------------------------- */
105:
1.13 eric 106: /* Standard (non-error) Output
107: ** ---------------------------
108: */
109: PUBLIC int OutputData(const char * fmt, ...)
110: {
111: int ret;
112: va_list pArgs;
113: va_start(pArgs, fmt);
114: ret = vfprintf(stdout, fmt, pArgs);
115: va_end(pArgs);
116: return ret;
117: }
118:
119: /* ------------------------------------------------------------------------- */
120:
1.2 frystyk 121: /* Create a "HyperDoc" object
122: ** --------------------------
123: ** A HyperDoc object contains information about whether we have already
124: ** started checking the anchor and the depth in our search
125: */
126: PRIVATE HyperDoc * HyperDoc_new (Robot * mr,HTParentAnchor * anchor, int depth)
127: {
128: HyperDoc * hd;
1.14 frystyk 129: if ((hd = (HyperDoc *) HT_CALLOC(1, sizeof(HyperDoc))) == NULL)
130: HT_OUTOFMEM("HyperDoc_new");
1.2 frystyk 131: hd->state = L_INVALID;
132: hd->depth = depth;
133:
134: /* Bind the HyperDoc object together with the Anchor Object */
135: hd->anchor = anchor;
136: HTAnchor_setDocument(anchor, (void *) hd);
137:
138: /* Add this HyperDoc object to our list */
139: if (!mr->hyperdoc) mr->hyperdoc = HTList_new();
140: HTList_addObject(mr->hyperdoc, (void *) hd);
141: return hd;
142: }
143:
144: /* Delete a "HyperDoc" object
145: ** --------------------------
146: */
147: PRIVATE BOOL HyperDoc_delete (HyperDoc * hd)
148: {
149: if (hd) {
1.11 frystyk 150: HT_FREE (hd);
1.2 frystyk 151: return YES;
152: }
153: return NO;
154: }
155:
1.1 frystyk 156: /* Create a Command Line Object
157: ** ----------------------------
158: */
159: PRIVATE Robot * Robot_new (void)
160: {
161: Robot * me;
1.41 frystyk 162: if ((me = (Robot *) HT_CALLOC(1, sizeof(Robot))) == NULL)
1.14 frystyk 163: HT_OUTOFMEM("Robot_new");
1.2 frystyk 164: me->hyperdoc = HTList_new();
1.4 frystyk 165: me->htext = HTList_new();
1.40 frystyk 166: me->timer = DEFAULT_TIMEOUT;
1.25 frystyk 167: me->cwd = HTGetCurrentDirectoryURL();
1.1 frystyk 168: me->output = OUTPUT;
1.35 eric 169: me->cnt = 0;
1.34 eric 170: me->fingers = HTList_new();
1.1 frystyk 171: return me;
172: }
173:
174: /* Delete a Command Line Object
175: ** ----------------------------
176: */
177: PRIVATE BOOL Robot_delete (Robot * me)
178: {
179: if (me) {
1.34 eric 180: HTList_delete(me->fingers);
1.2 frystyk 181: if (me->hyperdoc) {
182: HTList * cur = me->hyperdoc;
183: HyperDoc * pres;
184: while ((pres = (HyperDoc *) HTList_nextObject(cur)))
185: HyperDoc_delete(pres);
186: HTList_delete(me->hyperdoc);
187: }
1.4 frystyk 188: if (me->htext) {
189: HTList * cur = me->htext;
190: HText * pres;
191: while ((pres = (HText *) HTList_nextObject(cur)))
192: HText_free(pres);
193: HTList_delete(me->htext);
194: }
1.1 frystyk 195: if (me->logfile) HTLog_close();
196: if (me->output && me->output != STDOUT) fclose(me->output);
1.12 frystyk 197: if (me->flags & MR_TIME) {
198: time_t local = time(NULL);
1.13 eric 199: HTTrace("Robot terminated %s\n",HTDateTimeStr(&local,YES));
1.12 frystyk 200: }
1.11 frystyk 201: HT_FREE(me->cwd);
202: HT_FREE(me);
1.1 frystyk 203: return YES;
204: }
205: return NO;
206: }
207:
1.2 frystyk 208: /*
1.34 eric 209: ** This function creates a new finger object and initializes it with a new request
1.2 frystyk 210: */
1.34 eric 211: PRIVATE Finger * Finger_new (Robot * robot, HTParentAnchor * dest, HTMethod method)
1.2 frystyk 212: {
1.34 eric 213: Finger * me;
214: HTRequest * request = HTRequest_new();
215: if ((me = (Finger *) HT_CALLOC(1, sizeof(Finger))) == NULL)
216: HT_OUTOFMEM("Finger_new");
217: me->robot = robot;
218: me->request = request;
219: me->dest = dest;
220: HTList_addObject(robot->fingers, (void *)me);
221:
222: HTRequest_setContext (request, me);
223: if (robot->flags & MR_PREEMPTIVE) HTRequest_setPreemptive(request, YES);
224: HTRequest_addRqHd(request, HT_C_HOST);
225: HTRequest_setMethod(request, method);
226: robot->cnt++;
227: return me;
1.2 frystyk 228: }
229:
1.34 eric 230: PRIVATE int Finger_delete (Finger * me)
1.2 frystyk 231: {
1.34 eric 232: HTList_removeObject(me->robot->fingers, (void *)me);
233: me->robot->cnt--;
1.37 frystyk 234:
235: /*
236: ** If we are down at one request then flush the output buffer
237: */
238: if (me->request) {
239: if (me->robot->cnt == 1) HTRequest_forceFlush(me->request);
1.34 eric 240: HTRequest_delete(me->request);
1.37 frystyk 241: }
242:
243: /*
244: ** Delete the request and free myself
245: */
1.34 eric 246: HT_FREE(me);
247: return YES;
1.2 frystyk 248: }
249:
250: /*
251: ** Cleanup and make sure we close all connections including the persistent
252: ** ones
253: */
1.1 frystyk 254: PRIVATE void Cleanup (Robot * me, int status)
255: {
256: Robot_delete(me);
1.29 eric 257: HTProfile_delete();
1.39 eric 258: HTMemLog_close();
1.1 frystyk 259: #ifdef VMS
260: exit(status ? status : 1);
261: #else
262: exit(status ? status : 0);
263: #endif
264: }
265:
266: #ifdef CATCH_SIG
267: #include <signal.h>
268: /* SetSignal
269: ** This function sets up signal handlers. This might not be necessary to
270: ** call if the application has its own handlers (lossage on SVR4)
271: */
272: PRIVATE void SetSignal (void)
273: {
274: /* On some systems (SYSV) it is necessary to catch the SIGPIPE signal
275: ** when attemting to connect to a remote host where you normally should
276: ** get `connection refused' back
277: */
278: if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
1.13 eric 279: if (PROT_TRACE) HTTrace("HTSignal.... Can't catch SIGPIPE\n");
1.1 frystyk 280: } else {
1.13 eric 281: if (PROT_TRACE) HTTrace("HTSignal.... Ignoring SIGPIPE\n");
1.1 frystyk 282: }
1.44 eric 283: HTMemLog_flush();
1.1 frystyk 284: }
285: #endif /* CATCH_SIG */
286:
287: PRIVATE void VersionInfo (void)
288: {
1.13 eric 289: OutputData("\n\nW3C Reference Software\n\n");
290: OutputData("\tW3C Mini Robot (%s) version %s.\n",
1.1 frystyk 291: APP_NAME, APP_VERSION);
1.13 eric 292: OutputData("\tW3C Reference Library version %s.\n\n",HTLib_version());
293: OutputData("Please send feedback to <libwww@w3.org>\n");
1.1 frystyk 294: }
295:
296: /* terminate_handler
297: ** -----------------
1.2 frystyk 298: ** This function is registered to handle the result of the request.
299: ** If no more requests are pending then terminate program
1.1 frystyk 300: */
1.32 frystyk 301: PRIVATE int terminate_handler (HTRequest * request, HTResponse * response,
302: void * param, int status)
1.1 frystyk 303: {
1.34 eric 304: Finger * finger = (Finger *) HTRequest_context(request);
305: Robot * robot = finger->robot;
306: if (SHOW_MSG) HTTrace("Robot....... done with %s\n", HTAnchor_physical(finger->dest));
307: Finger_delete(finger);
1.37 frystyk 308: if (robot->cnt <= 0) {
1.34 eric 309: if (SHOW_MSG) HTTrace(" Everything is finished...\n");
1.37 frystyk 310: Cleanup(robot, 0); /* No way back from here */
1.30 frystyk 311: }
1.37 frystyk 312:
313: if (SHOW_MSG) HTTrace(" %d outstanding request%s\n", robot->cnt, robot->cnt == 1 ? "" : "s");
1.1 frystyk 314: return HT_OK;
315: }
316:
317: /* ------------------------------------------------------------------------- */
318: /* HTEXT INTERFACE */
319: /* ------------------------------------------------------------------------- */
320:
321: PUBLIC HText * HText_new2 (HTRequest * request, HTParentAnchor * anchor,
322: HTStream * stream)
323: {
324: HText * me;
1.34 eric 325: Finger * finger = (Finger *) HTRequest_context(request);
326: Robot * mr = finger->robot;
1.14 frystyk 327: if ((me = (HText *) HT_CALLOC(1, sizeof(HText))) == NULL)
328: HT_OUTOFMEM("HText_new2");
1.4 frystyk 329:
330: /* Bind the HText object together with the Request Object */
1.1 frystyk 331: me->request = request;
1.4 frystyk 332:
333: /* Add this HyperDoc object to our list */
334: if (!mr->htext) mr->htext = HTList_new();
335: HTList_addObject(mr->htext, (void *) me);
1.1 frystyk 336: return me;
337: }
338:
1.4 frystyk 339: PUBLIC void HText_free (HText * me) {
1.11 frystyk 340: if (me) HT_FREE (me);
1.4 frystyk 341: }
342:
1.1 frystyk 343: PUBLIC void HText_beginAnchor (HText * text, HTChildAnchor * anchor)
344: {
345: if (text && anchor) {
1.34 eric 346: Finger * finger = (Finger *) HTRequest_context(text->request);
347: Robot * mr = finger->robot;
1.1 frystyk 348: HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor);
349: HTParentAnchor * dest_parent = HTAnchor_parent(dest);
1.7 frystyk 350: char * uri = HTAnchor_address((HTAnchor *) dest_parent);
1.1 frystyk 351: HyperDoc * hd = HTAnchor_document(dest_parent);
352:
1.13 eric 353: if (SHOW_MSG) HTTrace("Robot....... Found `%s\' - ", uri ? uri : "NULL");
1.7 frystyk 354:
1.2 frystyk 355: /* Test whether we already have a hyperdoc for this document */
356: if (mr->flags & MR_LINK && dest_parent && !hd) {
1.1 frystyk 357: HTParentAnchor * parent = HTRequest_parent(text->request);
358: HyperDoc * last = HTAnchor_document(parent);
359: int depth = last ? last->depth+1 : 0;
1.34 eric 360: Finger * newfinger = Finger_new(mr, dest_parent, METHOD_GET);
361: HTRequest * newreq = newfinger->request;
1.2 frystyk 362: HyperDoc_new(mr, dest_parent, depth);
1.7 frystyk 363: HTRequest_setParent(newreq, HTRequest_anchor(text->request));
364: if (depth >= mr->depth) {
365: if (SHOW_MSG)
1.13 eric 366: HTTrace("loading at depth %d using HEAD\n", depth);
1.7 frystyk 367: HTRequest_setMethod(newreq, METHOD_HEAD);
1.30 frystyk 368: HTRequest_setOutputFormat(newreq, WWW_DEBUG);
1.7 frystyk 369: } else {
1.13 eric 370: if (SHOW_MSG) HTTrace("loading at depth %d\n", depth);
1.2 frystyk 371: }
372: if (HTLoadAnchor((HTAnchor *) dest_parent, newreq) != YES) {
1.13 eric 373: if (SHOW_MSG) HTTrace("not tested!\n");
1.34 eric 374: Finger_delete(newfinger);
1.2 frystyk 375: }
1.7 frystyk 376: } else {
1.18 frystyk 377: if (SHOW_MSG) HTTrace("duplicate or max depth reached\n");
1.2 frystyk 378: }
1.11 frystyk 379: HT_FREE(uri);
1.2 frystyk 380: }
381: }
382:
383: PUBLIC void HText_appendImage (HText * text, HTChildAnchor * anchor,
1.14 frystyk 384: const char *alt, const char * align, BOOL isMap)
1.2 frystyk 385: {
386: if (text && anchor) {
1.34 eric 387: Finger * finger = (Finger *) HTRequest_context(text->request);
388: Robot * mr = finger->robot;
1.2 frystyk 389: HTParentAnchor * dest = (HTParentAnchor *)
390: HTAnchor_followMainLink((HTAnchor *) anchor);
391: HyperDoc * hd = HTAnchor_document(dest);
1.1 frystyk 392:
1.2 frystyk 393: /* Test whether we already have a hyperdoc for this document */
394: if (mr->flags & MR_IMG && dest && !hd) {
395: HTParentAnchor * parent = HTRequest_parent(text->request);
396: HyperDoc * last = HTAnchor_document(parent);
397: int depth = last ? last->depth+1 : 0;
1.45 ! frystyk 398: Finger * newfinger = Finger_new(mr, dest,
! 399: mr->flags & MR_SAVE ?
! 400: METHOD_GET : METHOD_HEAD);
1.34 eric 401: HTRequest * newreq = newfinger->request;
1.2 frystyk 402: HyperDoc_new(mr, dest, depth);
403: if (SHOW_MSG) {
404: char * uri = HTAnchor_address((HTAnchor *) dest);
1.13 eric 405: HTTrace("Robot....... Checking Image `%s\'\n", uri);
1.11 frystyk 406: HT_FREE(uri);
1.2 frystyk 407: }
408: if (HTLoadAnchor((HTAnchor *) dest, newreq) != YES) {
409: if (SHOW_MSG)
1.13 eric 410: HTTrace("Robot....... Image not tested!\n");
1.34 eric 411: Finger_delete(newfinger);
1.1 frystyk 412: }
413: }
414: }
415: }
416:
417: PUBLIC void HText_endAnchor (HText * text) {}
1.14 frystyk 418: PUBLIC void HText_appendText (HText * text, const char * str) {}
1.1 frystyk 419: PUBLIC void HText_appendCharacter (HText * text, char ch) {}
420: PUBLIC void HText_endAppend (HText * text) {}
421: PUBLIC void HText_setStyle (HText * text, HTStyle * style) {}
422: PUBLIC void HText_beginAppend (HText * text) {}
423: PUBLIC void HText_appendParagraph (HText * text) {}
424:
425: /* ------------------------------------------------------------------------- */
426: /* MAIN PROGRAM */
427: /* ------------------------------------------------------------------------- */
428:
429: int main (int argc, char ** argv)
430: {
431: int status = 0;
432: int arg;
433: HTChunk * keywords = NULL; /* From command line */
434: int keycnt = 0;
1.12 frystyk 435: Robot * mr = NULL;
1.43 frystyk 436: Finger * finger = NULL;
437: HTParentAnchor * startAnchor = NULL;
1.1 frystyk 438:
439: /* Starts Mac GUSI socket library */
440: #ifdef GUSI
441: GUSISetup(GUSIwithSIOUXSockets);
442: GUSISetup(GUSIwithInternetSockets);
443: #endif
444:
445: #ifdef __MWERKS__ /* STR */
446: InitGraf((Ptr) &qd.thePort);
447: InitFonts();
448: InitWindows();
449: InitMenus(); TEInit();
450: InitDialogs(nil);
451: InitCursor();
452: SIOUXSettings.asktosaveonclose = false;
453: argc=ccommand(&argv);
454: #endif
455:
1.44 eric 456: HTMemLog_open("data.log", 8192, YES);
1.39 eric 457: HTTraceData_setCallback(HTMemLog_callback);
1.27 frystyk 458: /* Initiate W3C Reference Library with a robot profile */
459: HTProfile_newRobot(APP_NAME, APP_VERSION);
460:
461: /* Add the default HTML parser to the set of converters */
462: {
463: HTList * converters = HTFormat_conversion();
464: HTMLInit(converters);
465: }
1.1 frystyk 466:
1.12 frystyk 467: /* Build a new robot object */
468: mr = Robot_new();
469:
1.1 frystyk 470: /* Scan command Line for parameters */
471: for (arg=1; arg<argc; arg++) {
472: if (*argv[arg] == '-') {
473:
474: /* non-interactive */
1.17 frystyk 475: if (!strcmp(argv[arg], "-n")) {
1.1 frystyk 476: HTAlert_setInteractive(NO);
477:
478: /* log file */
479: } else if (!strcmp(argv[arg], "-l")) {
480: mr->logfile = (arg+1 < argc && *argv[arg+1] != '-') ?
481: argv[++arg] : DEFAULT_LOG_FILE;
482:
483: /* rule file */
484: } else if (!strcmp(argv[arg], "-r")) {
485: mr->rules = (arg+1 < argc && *argv[arg+1] != '-') ?
486: argv[++arg] : DEFAULT_RULE_FILE;
487:
488: /* output filename */
489: } else if (!strcmp(argv[arg], "-o")) {
490: mr->outputfile = (arg+1 < argc && *argv[arg+1] != '-') ?
491: argv[++arg] : DEFAULT_OUTPUT_FILE;
492:
493: /* timeout -- Change the default request timeout */
494: } else if (!strcmp(argv[arg], "-timeout")) {
495: int timeout = (arg+1 < argc && *argv[arg+1] != '-') ?
496: atoi(argv[++arg]) : DEFAULT_TIMEOUT;
1.40 frystyk 497: if (timeout > 0) mr->timer = timeout;
1.1 frystyk 498:
1.7 frystyk 499: /* preemptive or non-preemptive access */
1.1 frystyk 500: } else if (!strcmp(argv[arg], "-single")) {
1.7 frystyk 501: mr->flags |= MR_PREEMPTIVE;
1.2 frystyk 502:
503: /* test inlined images */
504: } else if (!strcmp(argv[arg], "-img")) {
505: mr->flags |= MR_IMG;
1.45 ! frystyk 506:
! 507: /* load inlined images */
! 508: } else if (!strcmp(argv[arg], "-saveimg")) {
! 509: mr->flags |= (MR_IMG | MR_SAVE);
1.2 frystyk 510:
511: /* load anchors */
512: } else if (!strcmp(argv[arg], "-link")) {
513: mr->flags |= MR_LINK;
1.7 frystyk 514: mr->depth = (arg+1 < argc && *argv[arg+1] != '-') ?
515: atoi(argv[++arg]) : DEFAULT_DEPTH;
1.2 frystyk 516:
1.12 frystyk 517: /* Output start and end time */
518: } else if (!strcmp(argv[arg], "-ss")) {
519: time_t local = time(NULL);
1.13 eric 520: HTTrace("Robot started on %s\n",
1.12 frystyk 521: HTDateTimeStr(&local, YES));
522: mr->flags |= MR_TIME;
523:
1.1 frystyk 524: /* print version and exit */
525: } else if (!strcmp(argv[arg], "-version")) {
526: VersionInfo();
527: Cleanup(mr, 0);
528:
529: #ifdef WWWTRACE
530: /* trace flags */
531: } else if (!strncmp(argv[arg], "-v", 2)) {
1.24 frystyk 532: HTSetTraceMessageMask(argv[arg]+2);
1.1 frystyk 533: #endif
534:
535: } else {
1.13 eric 536: if (SHOW_MSG) HTTrace("Bad Argument (%s)\n", argv[arg]);
1.1 frystyk 537: }
1.17 frystyk 538: } else { /* If no leading `-' then check for URL or keywords */
1.1 frystyk 539: if (!keycnt) {
540: char * ref = HTParse(argv[arg], mr->cwd, PARSE_ALL);
1.34 eric 541: startAnchor = (HTParentAnchor *) HTAnchor_findAddress(ref);
542: HyperDoc_new(mr, startAnchor, 0);
1.1 frystyk 543: keycnt = 1;
1.11 frystyk 544: HT_FREE(ref);
1.1 frystyk 545: } else { /* Check for successive keyword arguments */
546: char *escaped = HTEscape(argv[arg], URL_XALPHAS);
547: if (keycnt++ <= 1)
1.5 frystyk 548: keywords = HTChunk_new(128);
1.1 frystyk 549: else
1.5 frystyk 550: HTChunk_putc(keywords, ' ');
551: HTChunk_puts(keywords, HTStrip(escaped));
1.11 frystyk 552: HT_FREE(escaped);
1.1 frystyk 553: }
554: }
555: }
556:
557: #ifdef CATCH_SIG
558: SetSignal();
559: #endif
560:
561: if (!keycnt) {
1.13 eric 562: if (SHOW_MSG) HTTrace("Please specify URL to check.\n");
1.1 frystyk 563: Cleanup(mr, -1);
564: }
565:
1.23 manoli 566: /* Testing that HTTrace is working */
567: HTTrace ("Welcome to the W3C mini Robot\n");
568:
1.1 frystyk 569: /* Rule file specified? */
570: if (mr->rules) {
571: char * rules = HTParse(mr->rules, mr->cwd, PARSE_ALL);
1.27 frystyk 572: if (!HTLoadRules(rules))
1.13 eric 573: if (SHOW_MSG) HTTrace("Can't access rules\n");
1.11 frystyk 574: HT_FREE(rules);
1.1 frystyk 575: }
576:
577: /* Output file specified? */
578: if (mr->outputfile) {
579: if ((mr->output = fopen(mr->outputfile, "wb")) == NULL) {
1.13 eric 580: if (SHOW_MSG) HTTrace("Can't open `%s'\n", mr->outputfile);
1.1 frystyk 581: mr->output = OUTPUT;
582: }
583: }
584:
585: /* Log file specifed? */
586: if (mr->logfile) HTLog_open(mr->logfile, YES, YES);
587:
1.27 frystyk 588: /* Register our own someterminater filter */
1.32 frystyk 589: HTNet_addAfter(terminate_handler, NULL, NULL, HT_ALL, HT_FILTER_LAST);
1.40 frystyk 590:
591: /* Setting event timeout */
592: HTHost_setEventTimeout(mr->timer);
1.37 frystyk 593:
1.34 eric 594: /* Start the request */
595: finger = Finger_new(mr, startAnchor, METHOD_GET);
1.43 frystyk 596:
597: /*
598: ** Make sure that the first request is flushed immediately and not
599: ** buffered in the output buffer
600: */
601: HTRequest_setFlush(finger->request, YES);
602:
603: /*
604: ** Now do the load
605: */
1.34 eric 606: if (mr->flags & MR_PREEMPTIVE)
607: HTRequest_setPreemptive(finger->request, YES);
1.1 frystyk 608:
609: if (keywords) /* Search */
1.34 eric 610: status = HTSearchAnchor(keywords, (HTAnchor *)startAnchor, finger->request);
1.1 frystyk 611: else
1.34 eric 612: status = HTLoadAnchor((HTAnchor *)startAnchor, finger->request);
1.1 frystyk 613:
1.5 frystyk 614: if (keywords) HTChunk_delete(keywords);
1.1 frystyk 615: if (status != YES) {
1.13 eric 616: if (SHOW_MSG) HTTrace("Can't access resource\n");
1.1 frystyk 617: Cleanup(mr, -1);
618: }
619:
620: /* Go into the event loop... */
1.34 eric 621: HTEventList_loop(finger->request);
1.1 frystyk 622:
623: /* Only gets here if event loop fails */
624: Cleanup(mr, 0);
625: return 0;
626: }
Webmaster