Annotation of libwww/Robot/src/HTRobot.c, revision 1.46
1.1 frystyk 1: /* HTRobot.c
2: ** W3C MINI ROBOT
3: **
4: ** (c) COPRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
6: **
7: ** This program illustrates how to travers links using the Anchor object
8: **
9: ** Authors:
10: ** HFN Henrik Frystyk Nielsen, (frystyk@w3.org)
11: **
12: ** History:
13: ** Dec 04 95 First version
14: */
15:
16: #include "WWWLib.h" /* Global Library Include file */
17: #include "WWWApp.h" /* Application stuff */
1.17 frystyk 18: #include "WWWTrans.h"
1.10 frystyk 19: #include "WWWInit.h"
1.9 frystyk 20:
1.4 frystyk 21: #include "HText.h"
1.1 frystyk 22:
1.39 eric 23: #include "HTMemLog.h"
1.1 frystyk 24: #include "HTRobot.h" /* Implemented here */
25:
1.14 frystyk 26: #ifndef W3C_VERSION
1.33 eric 27: #define W3C_VERSION "Unspecified"
1.1 frystyk 28: #endif
29:
30: #define APP_NAME "W3CRobot"
1.14 frystyk 31: #define APP_VERSION W3C_VERSION
1.1 frystyk 32:
33: #define DEFAULT_OUTPUT_FILE "robot.out"
34: #define DEFAULT_RULE_FILE "robot.conf"
35: #define DEFAULT_LOG_FILE "robot.log"
1.7 frystyk 36: #define DEFAULT_DEPTH 0
1.1 frystyk 37:
1.46 ! eric 38: /* #define SHOW_MSG (WWWTRACE || HTAlert_interactive()) */
! 39: #define SHOW_MSG (!(mr->flags & MR_QUIET))
1.1 frystyk 40:
1.40 frystyk 41: #define DEFAULT_TIMEOUT 10000 /* timeout in millis */
1.1 frystyk 42:
43: #if defined(__svr4__)
44: #define CATCH_SIG
45: #endif
46:
47: typedef enum _MRFlags {
1.45 frystyk 48: MR_IMG = 0x1,
49: MR_LINK = 0x2,
50: MR_PREEMPTIVE = 0x4,
51: MR_TIME = 0x8,
1.46 ! eric 52: MR_SAVE = 0x10,
! 53: MR_QUIET = 0x20
1.1 frystyk 54: } MRFlags;
55:
56: typedef struct _Robot {
1.2 frystyk 57: int depth; /* How deep is our tree */
1.30 frystyk 58: int cnt; /* Count of requests */
1.2 frystyk 59: HTList * hyperdoc; /* List of our HyperDoc Objects */
1.4 frystyk 60: HTList * htext; /* List of our HText Objects */
1.34 eric 61: HTList * fingers;
1.40 frystyk 62: int timer;
1.1 frystyk 63: char * cwd; /* Current dir URL */
64: char * rules;
65: char * logfile;
66: char * outputfile;
67: FILE * output;
68: MRFlags flags;
69: } Robot;
1.34 eric 70:
71: typedef struct _Finger {
72: Robot * robot;
73: HTRequest * request;
74: HTParentAnchor * dest;
75: } Finger;
76:
1.1 frystyk 77: typedef enum _LoadState {
78: L_INVALID = -2,
79: L_LOADING = -1,
80: L_SUCCESS = 0,
81: L_ERROR
82: } LoadState;
83:
84: /*
85: ** The HyperDoc object is bound to the anchor and contains information about
86: ** where we are in the search for recursive searches
87: */
88: typedef struct _HyperDoc {
89: HTParentAnchor * anchor;
90: LoadState state;
91: int depth;
92: } HyperDoc;
93:
94: /*
95: ** This is the HText object that is created every time we start parsing a
96: ** HTML object
97: */
1.4 frystyk 98: struct _HText {
1.1 frystyk 99: HTRequest * request;
1.4 frystyk 100: };
1.1 frystyk 101:
102: PUBLIC HText * HTMainText = NULL;
103: PUBLIC HTParentAnchor * HTMainAnchor = NULL;
104: PUBLIC HTStyleSheet * styleSheet = NULL;
105:
106: /* ------------------------------------------------------------------------- */
107:
1.13 eric 108: /* Standard (non-error) Output
109: ** ---------------------------
110: */
111: PUBLIC int OutputData(const char * fmt, ...)
112: {
113: int ret;
114: va_list pArgs;
115: va_start(pArgs, fmt);
116: ret = vfprintf(stdout, fmt, pArgs);
117: va_end(pArgs);
118: return ret;
119: }
120:
121: /* ------------------------------------------------------------------------- */
122:
1.2 frystyk 123: /* Create a "HyperDoc" object
124: ** --------------------------
125: ** A HyperDoc object contains information about whether we have already
126: ** started checking the anchor and the depth in our search
127: */
128: PRIVATE HyperDoc * HyperDoc_new (Robot * mr,HTParentAnchor * anchor, int depth)
129: {
130: HyperDoc * hd;
1.14 frystyk 131: if ((hd = (HyperDoc *) HT_CALLOC(1, sizeof(HyperDoc))) == NULL)
132: HT_OUTOFMEM("HyperDoc_new");
1.2 frystyk 133: hd->state = L_INVALID;
134: hd->depth = depth;
135:
136: /* Bind the HyperDoc object together with the Anchor Object */
137: hd->anchor = anchor;
138: HTAnchor_setDocument(anchor, (void *) hd);
139:
140: /* Add this HyperDoc object to our list */
141: if (!mr->hyperdoc) mr->hyperdoc = HTList_new();
142: HTList_addObject(mr->hyperdoc, (void *) hd);
143: return hd;
144: }
145:
146: /* Delete a "HyperDoc" object
147: ** --------------------------
148: */
149: PRIVATE BOOL HyperDoc_delete (HyperDoc * hd)
150: {
151: if (hd) {
1.11 frystyk 152: HT_FREE (hd);
1.2 frystyk 153: return YES;
154: }
155: return NO;
156: }
157:
1.1 frystyk 158: /* Create a Command Line Object
159: ** ----------------------------
160: */
161: PRIVATE Robot * Robot_new (void)
162: {
163: Robot * me;
1.41 frystyk 164: if ((me = (Robot *) HT_CALLOC(1, sizeof(Robot))) == NULL)
1.14 frystyk 165: HT_OUTOFMEM("Robot_new");
1.2 frystyk 166: me->hyperdoc = HTList_new();
1.4 frystyk 167: me->htext = HTList_new();
1.40 frystyk 168: me->timer = DEFAULT_TIMEOUT;
1.25 frystyk 169: me->cwd = HTGetCurrentDirectoryURL();
1.1 frystyk 170: me->output = OUTPUT;
1.35 eric 171: me->cnt = 0;
1.34 eric 172: me->fingers = HTList_new();
1.1 frystyk 173: return me;
174: }
175:
176: /* Delete a Command Line Object
177: ** ----------------------------
178: */
179: PRIVATE BOOL Robot_delete (Robot * me)
180: {
181: if (me) {
1.34 eric 182: HTList_delete(me->fingers);
1.2 frystyk 183: if (me->hyperdoc) {
184: HTList * cur = me->hyperdoc;
185: HyperDoc * pres;
186: while ((pres = (HyperDoc *) HTList_nextObject(cur)))
187: HyperDoc_delete(pres);
188: HTList_delete(me->hyperdoc);
189: }
1.4 frystyk 190: if (me->htext) {
191: HTList * cur = me->htext;
192: HText * pres;
193: while ((pres = (HText *) HTList_nextObject(cur)))
194: HText_free(pres);
195: HTList_delete(me->htext);
196: }
1.1 frystyk 197: if (me->logfile) HTLog_close();
198: if (me->output && me->output != STDOUT) fclose(me->output);
1.12 frystyk 199: if (me->flags & MR_TIME) {
200: time_t local = time(NULL);
1.13 eric 201: HTTrace("Robot terminated %s\n",HTDateTimeStr(&local,YES));
1.12 frystyk 202: }
1.11 frystyk 203: HT_FREE(me->cwd);
204: HT_FREE(me);
1.1 frystyk 205: return YES;
206: }
207: return NO;
208: }
209:
1.2 frystyk 210: /*
1.34 eric 211: ** This function creates a new finger object and initializes it with a new request
1.2 frystyk 212: */
1.34 eric 213: PRIVATE Finger * Finger_new (Robot * robot, HTParentAnchor * dest, HTMethod method)
1.2 frystyk 214: {
1.34 eric 215: Finger * me;
216: HTRequest * request = HTRequest_new();
217: if ((me = (Finger *) HT_CALLOC(1, sizeof(Finger))) == NULL)
218: HT_OUTOFMEM("Finger_new");
219: me->robot = robot;
220: me->request = request;
221: me->dest = dest;
222: HTList_addObject(robot->fingers, (void *)me);
223:
224: HTRequest_setContext (request, me);
225: if (robot->flags & MR_PREEMPTIVE) HTRequest_setPreemptive(request, YES);
226: HTRequest_addRqHd(request, HT_C_HOST);
227: HTRequest_setMethod(request, method);
228: robot->cnt++;
229: return me;
1.2 frystyk 230: }
231:
1.34 eric 232: PRIVATE int Finger_delete (Finger * me)
1.2 frystyk 233: {
1.34 eric 234: HTList_removeObject(me->robot->fingers, (void *)me);
235: me->robot->cnt--;
1.37 frystyk 236:
237: /*
238: ** If we are down at one request then flush the output buffer
239: */
240: if (me->request) {
241: if (me->robot->cnt == 1) HTRequest_forceFlush(me->request);
1.34 eric 242: HTRequest_delete(me->request);
1.37 frystyk 243: }
244:
245: /*
246: ** Delete the request and free myself
247: */
1.34 eric 248: HT_FREE(me);
249: return YES;
1.2 frystyk 250: }
251:
252: /*
253: ** Cleanup and make sure we close all connections including the persistent
254: ** ones
255: */
1.1 frystyk 256: PRIVATE void Cleanup (Robot * me, int status)
257: {
258: Robot_delete(me);
1.29 eric 259: HTProfile_delete();
1.39 eric 260: HTMemLog_close();
1.1 frystyk 261: #ifdef VMS
262: exit(status ? status : 1);
263: #else
264: exit(status ? status : 0);
265: #endif
266: }
267:
268: #ifdef CATCH_SIG
269: #include <signal.h>
270: /* SetSignal
271: ** This function sets up signal handlers. This might not be necessary to
272: ** call if the application has its own handlers (lossage on SVR4)
273: */
274: PRIVATE void SetSignal (void)
275: {
276: /* On some systems (SYSV) it is necessary to catch the SIGPIPE signal
277: ** when attemting to connect to a remote host where you normally should
278: ** get `connection refused' back
279: */
280: if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
1.13 eric 281: if (PROT_TRACE) HTTrace("HTSignal.... Can't catch SIGPIPE\n");
1.1 frystyk 282: } else {
1.13 eric 283: if (PROT_TRACE) HTTrace("HTSignal.... Ignoring SIGPIPE\n");
1.1 frystyk 284: }
1.44 eric 285: HTMemLog_flush();
1.1 frystyk 286: }
287: #endif /* CATCH_SIG */
288:
289: PRIVATE void VersionInfo (void)
290: {
1.13 eric 291: OutputData("\n\nW3C Reference Software\n\n");
292: OutputData("\tW3C Mini Robot (%s) version %s.\n",
1.1 frystyk 293: APP_NAME, APP_VERSION);
1.13 eric 294: OutputData("\tW3C Reference Library version %s.\n\n",HTLib_version());
295: OutputData("Please send feedback to <libwww@w3.org>\n");
1.1 frystyk 296: }
297:
298: /* terminate_handler
299: ** -----------------
1.2 frystyk 300: ** This function is registered to handle the result of the request.
301: ** If no more requests are pending then terminate program
1.1 frystyk 302: */
1.32 frystyk 303: PRIVATE int terminate_handler (HTRequest * request, HTResponse * response,
304: void * param, int status)
1.1 frystyk 305: {
1.34 eric 306: Finger * finger = (Finger *) HTRequest_context(request);
1.46 ! eric 307: Robot * mr = finger->robot;
1.34 eric 308: if (SHOW_MSG) HTTrace("Robot....... done with %s\n", HTAnchor_physical(finger->dest));
309: Finger_delete(finger);
1.46 ! eric 310: if (mr->cnt <= 0) {
1.34 eric 311: if (SHOW_MSG) HTTrace(" Everything is finished...\n");
1.46 ! eric 312: Cleanup(mr, 0); /* No way back from here */
1.30 frystyk 313: }
1.37 frystyk 314:
1.46 ! eric 315: if (SHOW_MSG) HTTrace(" %d outstanding request%s\n", mr->cnt, mr->cnt == 1 ? "" : "s");
1.1 frystyk 316: return HT_OK;
317: }
318:
319: /* ------------------------------------------------------------------------- */
320: /* HTEXT INTERFACE */
321: /* ------------------------------------------------------------------------- */
322:
323: PUBLIC HText * HText_new2 (HTRequest * request, HTParentAnchor * anchor,
324: HTStream * stream)
325: {
326: HText * me;
1.34 eric 327: Finger * finger = (Finger *) HTRequest_context(request);
328: Robot * mr = finger->robot;
1.14 frystyk 329: if ((me = (HText *) HT_CALLOC(1, sizeof(HText))) == NULL)
330: HT_OUTOFMEM("HText_new2");
1.4 frystyk 331:
332: /* Bind the HText object together with the Request Object */
1.1 frystyk 333: me->request = request;
1.4 frystyk 334:
335: /* Add this HyperDoc object to our list */
336: if (!mr->htext) mr->htext = HTList_new();
337: HTList_addObject(mr->htext, (void *) me);
1.1 frystyk 338: return me;
339: }
340:
1.4 frystyk 341: PUBLIC void HText_free (HText * me) {
1.11 frystyk 342: if (me) HT_FREE (me);
1.4 frystyk 343: }
344:
1.1 frystyk 345: PUBLIC void HText_beginAnchor (HText * text, HTChildAnchor * anchor)
346: {
347: if (text && anchor) {
1.34 eric 348: Finger * finger = (Finger *) HTRequest_context(text->request);
349: Robot * mr = finger->robot;
1.1 frystyk 350: HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor);
351: HTParentAnchor * dest_parent = HTAnchor_parent(dest);
1.7 frystyk 352: char * uri = HTAnchor_address((HTAnchor *) dest_parent);
1.1 frystyk 353: HyperDoc * hd = HTAnchor_document(dest_parent);
354:
1.13 eric 355: if (SHOW_MSG) HTTrace("Robot....... Found `%s\' - ", uri ? uri : "NULL");
1.7 frystyk 356:
1.2 frystyk 357: /* Test whether we already have a hyperdoc for this document */
358: if (mr->flags & MR_LINK && dest_parent && !hd) {
1.1 frystyk 359: HTParentAnchor * parent = HTRequest_parent(text->request);
360: HyperDoc * last = HTAnchor_document(parent);
361: int depth = last ? last->depth+1 : 0;
1.34 eric 362: Finger * newfinger = Finger_new(mr, dest_parent, METHOD_GET);
363: HTRequest * newreq = newfinger->request;
1.2 frystyk 364: HyperDoc_new(mr, dest_parent, depth);
1.7 frystyk 365: HTRequest_setParent(newreq, HTRequest_anchor(text->request));
366: if (depth >= mr->depth) {
367: if (SHOW_MSG)
1.13 eric 368: HTTrace("loading at depth %d using HEAD\n", depth);
1.7 frystyk 369: HTRequest_setMethod(newreq, METHOD_HEAD);
1.30 frystyk 370: HTRequest_setOutputFormat(newreq, WWW_DEBUG);
1.7 frystyk 371: } else {
1.13 eric 372: if (SHOW_MSG) HTTrace("loading at depth %d\n", depth);
1.2 frystyk 373: }
374: if (HTLoadAnchor((HTAnchor *) dest_parent, newreq) != YES) {
1.13 eric 375: if (SHOW_MSG) HTTrace("not tested!\n");
1.34 eric 376: Finger_delete(newfinger);
1.2 frystyk 377: }
1.7 frystyk 378: } else {
1.18 frystyk 379: if (SHOW_MSG) HTTrace("duplicate or max depth reached\n");
1.2 frystyk 380: }
1.11 frystyk 381: HT_FREE(uri);
1.2 frystyk 382: }
383: }
384:
385: PUBLIC void HText_appendImage (HText * text, HTChildAnchor * anchor,
1.14 frystyk 386: const char *alt, const char * align, BOOL isMap)
1.2 frystyk 387: {
388: if (text && anchor) {
1.34 eric 389: Finger * finger = (Finger *) HTRequest_context(text->request);
390: Robot * mr = finger->robot;
1.2 frystyk 391: HTParentAnchor * dest = (HTParentAnchor *)
392: HTAnchor_followMainLink((HTAnchor *) anchor);
393: HyperDoc * hd = HTAnchor_document(dest);
1.1 frystyk 394:
1.2 frystyk 395: /* Test whether we already have a hyperdoc for this document */
396: if (mr->flags & MR_IMG && dest && !hd) {
397: HTParentAnchor * parent = HTRequest_parent(text->request);
398: HyperDoc * last = HTAnchor_document(parent);
399: int depth = last ? last->depth+1 : 0;
1.45 frystyk 400: Finger * newfinger = Finger_new(mr, dest,
401: mr->flags & MR_SAVE ?
402: METHOD_GET : METHOD_HEAD);
1.34 eric 403: HTRequest * newreq = newfinger->request;
1.2 frystyk 404: HyperDoc_new(mr, dest, depth);
405: if (SHOW_MSG) {
406: char * uri = HTAnchor_address((HTAnchor *) dest);
1.13 eric 407: HTTrace("Robot....... Checking Image `%s\'\n", uri);
1.11 frystyk 408: HT_FREE(uri);
1.2 frystyk 409: }
410: if (HTLoadAnchor((HTAnchor *) dest, newreq) != YES) {
411: if (SHOW_MSG)
1.13 eric 412: HTTrace("Robot....... Image not tested!\n");
1.34 eric 413: Finger_delete(newfinger);
1.1 frystyk 414: }
415: }
416: }
417: }
418:
419: PUBLIC void HText_endAnchor (HText * text) {}
1.14 frystyk 420: PUBLIC void HText_appendText (HText * text, const char * str) {}
1.1 frystyk 421: PUBLIC void HText_appendCharacter (HText * text, char ch) {}
422: PUBLIC void HText_endAppend (HText * text) {}
423: PUBLIC void HText_setStyle (HText * text, HTStyle * style) {}
424: PUBLIC void HText_beginAppend (HText * text) {}
425: PUBLIC void HText_appendParagraph (HText * text) {}
426:
427: /* ------------------------------------------------------------------------- */
428: /* MAIN PROGRAM */
429: /* ------------------------------------------------------------------------- */
430:
431: int main (int argc, char ** argv)
432: {
433: int status = 0;
434: int arg;
435: HTChunk * keywords = NULL; /* From command line */
436: int keycnt = 0;
1.12 frystyk 437: Robot * mr = NULL;
1.43 frystyk 438: Finger * finger = NULL;
439: HTParentAnchor * startAnchor = NULL;
1.1 frystyk 440:
441: /* Starts Mac GUSI socket library */
442: #ifdef GUSI
443: GUSISetup(GUSIwithSIOUXSockets);
444: GUSISetup(GUSIwithInternetSockets);
445: #endif
446:
447: #ifdef __MWERKS__ /* STR */
448: InitGraf((Ptr) &qd.thePort);
449: InitFonts();
450: InitWindows();
451: InitMenus(); TEInit();
452: InitDialogs(nil);
453: InitCursor();
454: SIOUXSettings.asktosaveonclose = false;
455: argc=ccommand(&argv);
456: #endif
457:
1.44 eric 458: HTMemLog_open("data.log", 8192, YES);
1.46 ! eric 459: /* HTFakeReader_init ("readz", "elements", NO); */
! 460:
1.27 frystyk 461: /* Initiate W3C Reference Library with a robot profile */
462: HTProfile_newRobot(APP_NAME, APP_VERSION);
463:
464: /* Add the default HTML parser to the set of converters */
465: {
466: HTList * converters = HTFormat_conversion();
467: HTMLInit(converters);
468: }
1.1 frystyk 469:
1.12 frystyk 470: /* Build a new robot object */
471: mr = Robot_new();
472:
1.1 frystyk 473: /* Scan command Line for parameters */
474: for (arg=1; arg<argc; arg++) {
475: if (*argv[arg] == '-') {
476:
477: /* non-interactive */
1.17 frystyk 478: if (!strcmp(argv[arg], "-n")) {
1.1 frystyk 479: HTAlert_setInteractive(NO);
480:
481: /* log file */
482: } else if (!strcmp(argv[arg], "-l")) {
483: mr->logfile = (arg+1 < argc && *argv[arg+1] != '-') ?
484: argv[++arg] : DEFAULT_LOG_FILE;
485:
486: /* rule file */
487: } else if (!strcmp(argv[arg], "-r")) {
488: mr->rules = (arg+1 < argc && *argv[arg+1] != '-') ?
489: argv[++arg] : DEFAULT_RULE_FILE;
490:
491: /* output filename */
492: } else if (!strcmp(argv[arg], "-o")) {
493: mr->outputfile = (arg+1 < argc && *argv[arg+1] != '-') ?
494: argv[++arg] : DEFAULT_OUTPUT_FILE;
495:
496: /* timeout -- Change the default request timeout */
497: } else if (!strcmp(argv[arg], "-timeout")) {
498: int timeout = (arg+1 < argc && *argv[arg+1] != '-') ?
499: atoi(argv[++arg]) : DEFAULT_TIMEOUT;
1.40 frystyk 500: if (timeout > 0) mr->timer = timeout;
1.1 frystyk 501:
1.7 frystyk 502: /* preemptive or non-preemptive access */
1.1 frystyk 503: } else if (!strcmp(argv[arg], "-single")) {
1.7 frystyk 504: mr->flags |= MR_PREEMPTIVE;
1.2 frystyk 505:
506: /* test inlined images */
507: } else if (!strcmp(argv[arg], "-img")) {
508: mr->flags |= MR_IMG;
1.45 frystyk 509:
510: /* load inlined images */
511: } else if (!strcmp(argv[arg], "-saveimg")) {
512: mr->flags |= (MR_IMG | MR_SAVE);
1.2 frystyk 513:
514: /* load anchors */
515: } else if (!strcmp(argv[arg], "-link")) {
516: mr->flags |= MR_LINK;
1.7 frystyk 517: mr->depth = (arg+1 < argc && *argv[arg+1] != '-') ?
518: atoi(argv[++arg]) : DEFAULT_DEPTH;
1.2 frystyk 519:
1.12 frystyk 520: /* Output start and end time */
521: } else if (!strcmp(argv[arg], "-ss")) {
522: time_t local = time(NULL);
1.13 eric 523: HTTrace("Robot started on %s\n",
1.12 frystyk 524: HTDateTimeStr(&local, YES));
525: mr->flags |= MR_TIME;
526:
1.1 frystyk 527: /* print version and exit */
528: } else if (!strcmp(argv[arg], "-version")) {
529: VersionInfo();
530: Cleanup(mr, 0);
1.46 ! eric 531:
! 532: /* run in quiet mode */
! 533: } else if (!strcmp(argv[arg], "-q")) {
! 534: mr->flags |= MR_QUIET;
1.1 frystyk 535:
536: #ifdef WWWTRACE
537: /* trace flags */
538: } else if (!strncmp(argv[arg], "-v", 2)) {
1.24 frystyk 539: HTSetTraceMessageMask(argv[arg]+2);
1.1 frystyk 540: #endif
541:
542: } else {
1.13 eric 543: if (SHOW_MSG) HTTrace("Bad Argument (%s)\n", argv[arg]);
1.1 frystyk 544: }
1.17 frystyk 545: } else { /* If no leading `-' then check for URL or keywords */
1.1 frystyk 546: if (!keycnt) {
547: char * ref = HTParse(argv[arg], mr->cwd, PARSE_ALL);
1.34 eric 548: startAnchor = (HTParentAnchor *) HTAnchor_findAddress(ref);
549: HyperDoc_new(mr, startAnchor, 0);
1.1 frystyk 550: keycnt = 1;
1.11 frystyk 551: HT_FREE(ref);
1.1 frystyk 552: } else { /* Check for successive keyword arguments */
553: char *escaped = HTEscape(argv[arg], URL_XALPHAS);
554: if (keycnt++ <= 1)
1.5 frystyk 555: keywords = HTChunk_new(128);
1.1 frystyk 556: else
1.5 frystyk 557: HTChunk_putc(keywords, ' ');
558: HTChunk_puts(keywords, HTStrip(escaped));
1.11 frystyk 559: HT_FREE(escaped);
1.1 frystyk 560: }
561: }
562: }
563:
564: #ifdef CATCH_SIG
565: SetSignal();
566: #endif
567:
568: if (!keycnt) {
1.13 eric 569: if (SHOW_MSG) HTTrace("Please specify URL to check.\n");
1.1 frystyk 570: Cleanup(mr, -1);
571: }
572:
1.23 manoli 573: /* Testing that HTTrace is working */
574: HTTrace ("Welcome to the W3C mini Robot\n");
575:
1.1 frystyk 576: /* Rule file specified? */
577: if (mr->rules) {
578: char * rules = HTParse(mr->rules, mr->cwd, PARSE_ALL);
1.27 frystyk 579: if (!HTLoadRules(rules))
1.13 eric 580: if (SHOW_MSG) HTTrace("Can't access rules\n");
1.11 frystyk 581: HT_FREE(rules);
1.1 frystyk 582: }
583:
584: /* Output file specified? */
585: if (mr->outputfile) {
586: if ((mr->output = fopen(mr->outputfile, "wb")) == NULL) {
1.13 eric 587: if (SHOW_MSG) HTTrace("Can't open `%s'\n", mr->outputfile);
1.1 frystyk 588: mr->output = OUTPUT;
589: }
590: }
591:
592: /* Log file specifed? */
593: if (mr->logfile) HTLog_open(mr->logfile, YES, YES);
594:
1.27 frystyk 595: /* Register our own someterminater filter */
1.32 frystyk 596: HTNet_addAfter(terminate_handler, NULL, NULL, HT_ALL, HT_FILTER_LAST);
1.40 frystyk 597:
598: /* Setting event timeout */
599: HTHost_setEventTimeout(mr->timer);
1.37 frystyk 600:
1.34 eric 601: /* Start the request */
602: finger = Finger_new(mr, startAnchor, METHOD_GET);
1.43 frystyk 603:
604: /*
605: ** Make sure that the first request is flushed immediately and not
606: ** buffered in the output buffer
607: */
608: HTRequest_setFlush(finger->request, YES);
609:
610: /*
611: ** Now do the load
612: */
1.34 eric 613: if (mr->flags & MR_PREEMPTIVE)
614: HTRequest_setPreemptive(finger->request, YES);
1.1 frystyk 615:
616: if (keywords) /* Search */
1.34 eric 617: status = HTSearchAnchor(keywords, (HTAnchor *)startAnchor, finger->request);
1.1 frystyk 618: else
1.34 eric 619: status = HTLoadAnchor((HTAnchor *)startAnchor, finger->request);
1.1 frystyk 620:
1.5 frystyk 621: if (keywords) HTChunk_delete(keywords);
1.1 frystyk 622: if (status != YES) {
1.13 eric 623: if (SHOW_MSG) HTTrace("Can't access resource\n");
1.1 frystyk 624: Cleanup(mr, -1);
625: }
626:
627: /* Go into the event loop... */
1.34 eric 628: HTEventList_loop(finger->request);
1.1 frystyk 629:
630: /* Only gets here if event loop fails */
631: Cleanup(mr, 0);
632: return 0;
633: }
Webmaster