Annotation of libwww/Robot/src/HTRobot.c, revision 1.50
1.1 frystyk 1: /* HTRobot.c
2: ** W3C MINI ROBOT
3: **
4: ** (c) COPRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
6: **
7: ** This program illustrates how to travers links using the Anchor object
8: **
9: ** Authors:
10: ** HFN Henrik Frystyk Nielsen, (frystyk@w3.org)
11: **
12: ** History:
13: ** Dec 04 95 First version
14: */
15:
16: #include "WWWLib.h" /* Global Library Include file */
17: #include "WWWApp.h" /* Application stuff */
1.17 frystyk 18: #include "WWWTrans.h"
1.10 frystyk 19: #include "WWWInit.h"
1.9 frystyk 20:
1.4 frystyk 21: #include "HText.h"
1.1 frystyk 22:
1.39 eric 23: #include "HTMemLog.h"
1.1 frystyk 24: #include "HTRobot.h" /* Implemented here */
25:
1.14 frystyk 26: #ifndef W3C_VERSION
1.33 eric 27: #define W3C_VERSION "Unspecified"
1.1 frystyk 28: #endif
29:
30: #define APP_NAME "W3CRobot"
1.14 frystyk 31: #define APP_VERSION W3C_VERSION
1.1 frystyk 32:
33: #define DEFAULT_OUTPUT_FILE "robot.out"
34: #define DEFAULT_RULE_FILE "robot.conf"
35: #define DEFAULT_LOG_FILE "robot.log"
1.7 frystyk 36: #define DEFAULT_DEPTH 0
1.1 frystyk 37:
1.46 eric 38: /* #define SHOW_MSG (WWWTRACE || HTAlert_interactive()) */
39: #define SHOW_MSG (!(mr->flags & MR_QUIET))
1.1 frystyk 40:
1.40 frystyk 41: #define DEFAULT_TIMEOUT 10000 /* timeout in millis */
1.1 frystyk 42:
43: #if defined(__svr4__)
44: #define CATCH_SIG
45: #endif
46:
47: typedef enum _MRFlags {
1.45 frystyk 48: MR_IMG = 0x1,
49: MR_LINK = 0x2,
50: MR_PREEMPTIVE = 0x4,
51: MR_TIME = 0x8,
1.46 eric 52: MR_SAVE = 0x10,
1.48 frystyk 53: MR_QUIET = 0x20,
54: MR_VALIDATE = 0x40,
55: MR_END_VALIDATE = 0x80
1.1 frystyk 56: } MRFlags;
57:
58: typedef struct _Robot {
1.2 frystyk 59: int depth; /* How deep is our tree */
1.30 frystyk 60: int cnt; /* Count of requests */
1.2 frystyk 61: HTList * hyperdoc; /* List of our HyperDoc Objects */
1.4 frystyk 62: HTList * htext; /* List of our HText Objects */
1.34 eric 63: HTList * fingers;
1.40 frystyk 64: int timer;
1.1 frystyk 65: char * cwd; /* Current dir URL */
66: char * rules;
67: char * logfile;
68: char * outputfile;
69: FILE * output;
70: MRFlags flags;
71: } Robot;
1.34 eric 72:
73: typedef struct _Finger {
74: Robot * robot;
75: HTRequest * request;
76: HTParentAnchor * dest;
77: } Finger;
78:
1.1 frystyk 79: typedef enum _LoadState {
80: L_INVALID = -2,
81: L_LOADING = -1,
82: L_SUCCESS = 0,
83: L_ERROR
84: } LoadState;
85:
86: /*
87: ** The HyperDoc object is bound to the anchor and contains information about
88: ** where we are in the search for recursive searches
89: */
90: typedef struct _HyperDoc {
91: HTParentAnchor * anchor;
92: LoadState state;
93: int depth;
94: } HyperDoc;
95:
96: /*
97: ** This is the HText object that is created every time we start parsing a
98: ** HTML object
99: */
1.4 frystyk 100: struct _HText {
1.1 frystyk 101: HTRequest * request;
1.4 frystyk 102: };
1.1 frystyk 103:
104: PUBLIC HText * HTMainText = NULL;
105: PUBLIC HTParentAnchor * HTMainAnchor = NULL;
106: PUBLIC HTStyleSheet * styleSheet = NULL;
107:
108: /* ------------------------------------------------------------------------- */
109:
1.13 eric 110: /* Standard (non-error) Output
111: ** ---------------------------
112: */
113: PUBLIC int OutputData(const char * fmt, ...)
114: {
115: int ret;
116: va_list pArgs;
117: va_start(pArgs, fmt);
118: ret = vfprintf(stdout, fmt, pArgs);
119: va_end(pArgs);
120: return ret;
121: }
122:
123: /* ------------------------------------------------------------------------- */
124:
1.2 frystyk 125: /* Create a "HyperDoc" object
126: ** --------------------------
127: ** A HyperDoc object contains information about whether we have already
128: ** started checking the anchor and the depth in our search
129: */
130: PRIVATE HyperDoc * HyperDoc_new (Robot * mr,HTParentAnchor * anchor, int depth)
131: {
132: HyperDoc * hd;
1.14 frystyk 133: if ((hd = (HyperDoc *) HT_CALLOC(1, sizeof(HyperDoc))) == NULL)
134: HT_OUTOFMEM("HyperDoc_new");
1.2 frystyk 135: hd->state = L_INVALID;
136: hd->depth = depth;
137:
138: /* Bind the HyperDoc object together with the Anchor Object */
139: hd->anchor = anchor;
140: HTAnchor_setDocument(anchor, (void *) hd);
141:
142: /* Add this HyperDoc object to our list */
143: if (!mr->hyperdoc) mr->hyperdoc = HTList_new();
144: HTList_addObject(mr->hyperdoc, (void *) hd);
145: return hd;
146: }
147:
148: /* Delete a "HyperDoc" object
149: ** --------------------------
150: */
151: PRIVATE BOOL HyperDoc_delete (HyperDoc * hd)
152: {
153: if (hd) {
1.11 frystyk 154: HT_FREE (hd);
1.2 frystyk 155: return YES;
156: }
157: return NO;
158: }
159:
1.1 frystyk 160: /* Create a Command Line Object
161: ** ----------------------------
162: */
163: PRIVATE Robot * Robot_new (void)
164: {
165: Robot * me;
1.41 frystyk 166: if ((me = (Robot *) HT_CALLOC(1, sizeof(Robot))) == NULL)
1.14 frystyk 167: HT_OUTOFMEM("Robot_new");
1.2 frystyk 168: me->hyperdoc = HTList_new();
1.4 frystyk 169: me->htext = HTList_new();
1.40 frystyk 170: me->timer = DEFAULT_TIMEOUT;
1.25 frystyk 171: me->cwd = HTGetCurrentDirectoryURL();
1.1 frystyk 172: me->output = OUTPUT;
1.35 eric 173: me->cnt = 0;
1.34 eric 174: me->fingers = HTList_new();
1.1 frystyk 175: return me;
176: }
177:
178: /* Delete a Command Line Object
179: ** ----------------------------
180: */
181: PRIVATE BOOL Robot_delete (Robot * me)
182: {
183: if (me) {
1.34 eric 184: HTList_delete(me->fingers);
1.2 frystyk 185: if (me->hyperdoc) {
186: HTList * cur = me->hyperdoc;
187: HyperDoc * pres;
188: while ((pres = (HyperDoc *) HTList_nextObject(cur)))
189: HyperDoc_delete(pres);
190: HTList_delete(me->hyperdoc);
191: }
1.4 frystyk 192: if (me->htext) {
193: HTList * cur = me->htext;
194: HText * pres;
195: while ((pres = (HText *) HTList_nextObject(cur)))
196: HText_free(pres);
197: HTList_delete(me->htext);
198: }
1.1 frystyk 199: if (me->logfile) HTLog_close();
200: if (me->output && me->output != STDOUT) fclose(me->output);
1.12 frystyk 201: if (me->flags & MR_TIME) {
202: time_t local = time(NULL);
1.13 eric 203: HTTrace("Robot terminated %s\n",HTDateTimeStr(&local,YES));
1.12 frystyk 204: }
1.11 frystyk 205: HT_FREE(me->cwd);
206: HT_FREE(me);
1.1 frystyk 207: return YES;
208: }
209: return NO;
210: }
211:
1.2 frystyk 212: /*
1.34 eric 213: ** This function creates a new finger object and initializes it with a new request
1.2 frystyk 214: */
1.34 eric 215: PRIVATE Finger * Finger_new (Robot * robot, HTParentAnchor * dest, HTMethod method)
1.2 frystyk 216: {
1.34 eric 217: Finger * me;
218: HTRequest * request = HTRequest_new();
219: if ((me = (Finger *) HT_CALLOC(1, sizeof(Finger))) == NULL)
220: HT_OUTOFMEM("Finger_new");
221: me->robot = robot;
222: me->request = request;
223: me->dest = dest;
224: HTList_addObject(robot->fingers, (void *)me);
225:
1.48 frystyk 226: /* Set the context for this request */
1.34 eric 227: HTRequest_setContext (request, me);
1.48 frystyk 228:
229: /* Check the various flags to customize the request */
230: if (robot->flags & MR_PREEMPTIVE)
231: HTRequest_setPreemptive(request, YES);
232: if (robot->flags & MR_VALIDATE)
233: HTRequest_setReloadMode(request, HT_CACHE_VALIDATE);
234: if (robot->flags & MR_END_VALIDATE)
235: HTRequest_setReloadMode(request, HT_CACHE_END_VALIDATE);
236:
237: /* We wanna make sure that we are sending a Host header (default) */
1.34 eric 238: HTRequest_addRqHd(request, HT_C_HOST);
1.48 frystyk 239:
240: /* Set the method for this request */
1.34 eric 241: HTRequest_setMethod(request, method);
242: robot->cnt++;
243: return me;
1.2 frystyk 244: }
245:
1.34 eric 246: PRIVATE int Finger_delete (Finger * me)
1.2 frystyk 247: {
1.34 eric 248: HTList_removeObject(me->robot->fingers, (void *)me);
249: me->robot->cnt--;
1.37 frystyk 250:
251: /*
252: ** If we are down at one request then flush the output buffer
253: */
254: if (me->request) {
255: if (me->robot->cnt == 1) HTRequest_forceFlush(me->request);
1.34 eric 256: HTRequest_delete(me->request);
1.37 frystyk 257: }
258:
259: /*
260: ** Delete the request and free myself
261: */
1.34 eric 262: HT_FREE(me);
263: return YES;
1.2 frystyk 264: }
265:
266: /*
267: ** Cleanup and make sure we close all connections including the persistent
268: ** ones
269: */
1.1 frystyk 270: PRIVATE void Cleanup (Robot * me, int status)
271: {
272: Robot_delete(me);
1.29 eric 273: HTProfile_delete();
1.50 ! frystyk 274: #ifdef HT_MEMLOG
1.39 eric 275: HTMemLog_close();
1.47 frystyk 276: #endif
277:
1.1 frystyk 278: #ifdef VMS
279: exit(status ? status : 1);
280: #else
281: exit(status ? status : 0);
282: #endif
283: }
284:
285: #ifdef CATCH_SIG
286: #include <signal.h>
287: /* SetSignal
288: ** This function sets up signal handlers. This might not be necessary to
289: ** call if the application has its own handlers (lossage on SVR4)
290: */
291: PRIVATE void SetSignal (void)
292: {
293: /* On some systems (SYSV) it is necessary to catch the SIGPIPE signal
294: ** when attemting to connect to a remote host where you normally should
295: ** get `connection refused' back
296: */
297: if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
1.13 eric 298: if (PROT_TRACE) HTTrace("HTSignal.... Can't catch SIGPIPE\n");
1.1 frystyk 299: } else {
1.13 eric 300: if (PROT_TRACE) HTTrace("HTSignal.... Ignoring SIGPIPE\n");
1.1 frystyk 301: }
1.47 frystyk 302:
1.50 ! frystyk 303: #ifdef HT_MEMLOG
1.44 eric 304: HTMemLog_flush();
1.47 frystyk 305: #endif
306:
1.1 frystyk 307: }
308: #endif /* CATCH_SIG */
309:
310: PRIVATE void VersionInfo (void)
311: {
1.13 eric 312: OutputData("\n\nW3C Reference Software\n\n");
313: OutputData("\tW3C Mini Robot (%s) version %s.\n",
1.1 frystyk 314: APP_NAME, APP_VERSION);
1.13 eric 315: OutputData("\tW3C Reference Library version %s.\n\n",HTLib_version());
316: OutputData("Please send feedback to <libwww@w3.org>\n");
1.1 frystyk 317: }
318:
319: /* terminate_handler
320: ** -----------------
1.2 frystyk 321: ** This function is registered to handle the result of the request.
322: ** If no more requests are pending then terminate program
1.1 frystyk 323: */
1.32 frystyk 324: PRIVATE int terminate_handler (HTRequest * request, HTResponse * response,
325: void * param, int status)
1.1 frystyk 326: {
1.34 eric 327: Finger * finger = (Finger *) HTRequest_context(request);
1.46 eric 328: Robot * mr = finger->robot;
1.34 eric 329: if (SHOW_MSG) HTTrace("Robot....... done with %s\n", HTAnchor_physical(finger->dest));
330: Finger_delete(finger);
1.46 eric 331: if (mr->cnt <= 0) {
1.34 eric 332: if (SHOW_MSG) HTTrace(" Everything is finished...\n");
1.46 eric 333: Cleanup(mr, 0); /* No way back from here */
1.30 frystyk 334: }
1.37 frystyk 335:
1.46 eric 336: if (SHOW_MSG) HTTrace(" %d outstanding request%s\n", mr->cnt, mr->cnt == 1 ? "" : "s");
1.1 frystyk 337: return HT_OK;
338: }
339:
340: /* ------------------------------------------------------------------------- */
341: /* HTEXT INTERFACE */
342: /* ------------------------------------------------------------------------- */
343:
344: PUBLIC HText * HText_new2 (HTRequest * request, HTParentAnchor * anchor,
345: HTStream * stream)
346: {
347: HText * me;
1.34 eric 348: Finger * finger = (Finger *) HTRequest_context(request);
349: Robot * mr = finger->robot;
1.14 frystyk 350: if ((me = (HText *) HT_CALLOC(1, sizeof(HText))) == NULL)
351: HT_OUTOFMEM("HText_new2");
1.4 frystyk 352:
353: /* Bind the HText object together with the Request Object */
1.1 frystyk 354: me->request = request;
1.4 frystyk 355:
356: /* Add this HyperDoc object to our list */
357: if (!mr->htext) mr->htext = HTList_new();
358: HTList_addObject(mr->htext, (void *) me);
1.1 frystyk 359: return me;
360: }
361:
1.4 frystyk 362: PUBLIC void HText_free (HText * me) {
1.11 frystyk 363: if (me) HT_FREE (me);
1.4 frystyk 364: }
365:
1.1 frystyk 366: PUBLIC void HText_beginAnchor (HText * text, HTChildAnchor * anchor)
367: {
368: if (text && anchor) {
1.34 eric 369: Finger * finger = (Finger *) HTRequest_context(text->request);
370: Robot * mr = finger->robot;
1.1 frystyk 371: HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor);
372: HTParentAnchor * dest_parent = HTAnchor_parent(dest);
1.7 frystyk 373: char * uri = HTAnchor_address((HTAnchor *) dest_parent);
1.1 frystyk 374: HyperDoc * hd = HTAnchor_document(dest_parent);
375:
1.13 eric 376: if (SHOW_MSG) HTTrace("Robot....... Found `%s\' - ", uri ? uri : "NULL");
1.7 frystyk 377:
1.2 frystyk 378: /* Test whether we already have a hyperdoc for this document */
379: if (mr->flags & MR_LINK && dest_parent && !hd) {
1.1 frystyk 380: HTParentAnchor * parent = HTRequest_parent(text->request);
381: HyperDoc * last = HTAnchor_document(parent);
382: int depth = last ? last->depth+1 : 0;
1.34 eric 383: Finger * newfinger = Finger_new(mr, dest_parent, METHOD_GET);
384: HTRequest * newreq = newfinger->request;
1.2 frystyk 385: HyperDoc_new(mr, dest_parent, depth);
1.7 frystyk 386: HTRequest_setParent(newreq, HTRequest_anchor(text->request));
387: if (depth >= mr->depth) {
388: if (SHOW_MSG)
1.13 eric 389: HTTrace("loading at depth %d using HEAD\n", depth);
1.7 frystyk 390: HTRequest_setMethod(newreq, METHOD_HEAD);
1.30 frystyk 391: HTRequest_setOutputFormat(newreq, WWW_DEBUG);
1.7 frystyk 392: } else {
1.13 eric 393: if (SHOW_MSG) HTTrace("loading at depth %d\n", depth);
1.2 frystyk 394: }
395: if (HTLoadAnchor((HTAnchor *) dest_parent, newreq) != YES) {
1.13 eric 396: if (SHOW_MSG) HTTrace("not tested!\n");
1.34 eric 397: Finger_delete(newfinger);
1.2 frystyk 398: }
1.7 frystyk 399: } else {
1.18 frystyk 400: if (SHOW_MSG) HTTrace("duplicate or max depth reached\n");
1.2 frystyk 401: }
1.11 frystyk 402: HT_FREE(uri);
1.2 frystyk 403: }
404: }
405:
406: PUBLIC void HText_appendImage (HText * text, HTChildAnchor * anchor,
1.14 frystyk 407: const char *alt, const char * align, BOOL isMap)
1.2 frystyk 408: {
409: if (text && anchor) {
1.34 eric 410: Finger * finger = (Finger *) HTRequest_context(text->request);
411: Robot * mr = finger->robot;
1.2 frystyk 412: HTParentAnchor * dest = (HTParentAnchor *)
413: HTAnchor_followMainLink((HTAnchor *) anchor);
414: HyperDoc * hd = HTAnchor_document(dest);
1.1 frystyk 415:
1.2 frystyk 416: /* Test whether we already have a hyperdoc for this document */
417: if (mr->flags & MR_IMG && dest && !hd) {
418: HTParentAnchor * parent = HTRequest_parent(text->request);
419: HyperDoc * last = HTAnchor_document(parent);
420: int depth = last ? last->depth+1 : 0;
1.45 frystyk 421: Finger * newfinger = Finger_new(mr, dest,
422: mr->flags & MR_SAVE ?
423: METHOD_GET : METHOD_HEAD);
1.34 eric 424: HTRequest * newreq = newfinger->request;
1.2 frystyk 425: HyperDoc_new(mr, dest, depth);
426: if (SHOW_MSG) {
427: char * uri = HTAnchor_address((HTAnchor *) dest);
1.13 eric 428: HTTrace("Robot....... Checking Image `%s\'\n", uri);
1.11 frystyk 429: HT_FREE(uri);
1.2 frystyk 430: }
431: if (HTLoadAnchor((HTAnchor *) dest, newreq) != YES) {
432: if (SHOW_MSG)
1.13 eric 433: HTTrace("Robot....... Image not tested!\n");
1.34 eric 434: Finger_delete(newfinger);
1.1 frystyk 435: }
436: }
437: }
438: }
439:
440: PUBLIC void HText_endAnchor (HText * text) {}
1.14 frystyk 441: PUBLIC void HText_appendText (HText * text, const char * str) {}
1.1 frystyk 442: PUBLIC void HText_appendCharacter (HText * text, char ch) {}
443: PUBLIC void HText_endAppend (HText * text) {}
444: PUBLIC void HText_setStyle (HText * text, HTStyle * style) {}
445: PUBLIC void HText_beginAppend (HText * text) {}
446: PUBLIC void HText_appendParagraph (HText * text) {}
447:
1.48 frystyk 448: PRIVATE int RobotTrace (const char * fmt, va_list pArgs)
449: {
450: return (vfprintf(stderr, fmt, pArgs));
451: }
452:
1.1 frystyk 453: /* ------------------------------------------------------------------------- */
454: /* MAIN PROGRAM */
455: /* ------------------------------------------------------------------------- */
456:
457: int main (int argc, char ** argv)
458: {
1.48 frystyk 459: int status = 0;
1.1 frystyk 460: int arg;
1.48 frystyk 461: BOOL cache = NO; /* Use persistent cache */
462: BOOL flush = NO; /* flush the persistent cache */
1.1 frystyk 463: HTChunk * keywords = NULL; /* From command line */
464: int keycnt = 0;
1.12 frystyk 465: Robot * mr = NULL;
1.43 frystyk 466: Finger * finger = NULL;
467: HTParentAnchor * startAnchor = NULL;
1.1 frystyk 468:
469: /* Starts Mac GUSI socket library */
470: #ifdef GUSI
471: GUSISetup(GUSIwithSIOUXSockets);
472: GUSISetup(GUSIwithInternetSockets);
473: #endif
474:
475: #ifdef __MWERKS__ /* STR */
476: InitGraf((Ptr) &qd.thePort);
477: InitFonts();
478: InitWindows();
479: InitMenus(); TEInit();
480: InitDialogs(nil);
481: InitCursor();
482: SIOUXSettings.asktosaveonclose = false;
483: argc=ccommand(&argv);
1.50 ! frystyk 484: #endif /* __MWERKS__ */
1.1 frystyk 485:
1.50 ! frystyk 486: #ifdef HT_MEMLOG
! 487: HTMemLog_open(HT_MEMLOG, 8192, YES);
1.47 frystyk 488: #endif
1.46 eric 489:
1.27 frystyk 490: /* Initiate W3C Reference Library with a robot profile */
491: HTProfile_newRobot(APP_NAME, APP_VERSION);
1.48 frystyk 492: HTTrace_setCallback(RobotTrace);
1.27 frystyk 493:
494: /* Add the default HTML parser to the set of converters */
495: {
496: HTList * converters = HTFormat_conversion();
497: HTMLInit(converters);
498: }
1.1 frystyk 499:
1.12 frystyk 500: /* Build a new robot object */
501: mr = Robot_new();
502:
1.1 frystyk 503: /* Scan command Line for parameters */
504: for (arg=1; arg<argc; arg++) {
505: if (*argv[arg] == '-') {
506:
507: /* non-interactive */
1.17 frystyk 508: if (!strcmp(argv[arg], "-n")) {
1.1 frystyk 509: HTAlert_setInteractive(NO);
510:
511: /* log file */
512: } else if (!strcmp(argv[arg], "-l")) {
513: mr->logfile = (arg+1 < argc && *argv[arg+1] != '-') ?
514: argv[++arg] : DEFAULT_LOG_FILE;
515:
516: /* rule file */
517: } else if (!strcmp(argv[arg], "-r")) {
518: mr->rules = (arg+1 < argc && *argv[arg+1] != '-') ?
519: argv[++arg] : DEFAULT_RULE_FILE;
520:
521: /* output filename */
522: } else if (!strcmp(argv[arg], "-o")) {
523: mr->outputfile = (arg+1 < argc && *argv[arg+1] != '-') ?
524: argv[++arg] : DEFAULT_OUTPUT_FILE;
525:
526: /* timeout -- Change the default request timeout */
527: } else if (!strcmp(argv[arg], "-timeout")) {
528: int timeout = (arg+1 < argc && *argv[arg+1] != '-') ?
529: atoi(argv[++arg]) : DEFAULT_TIMEOUT;
1.40 frystyk 530: if (timeout > 0) mr->timer = timeout;
1.1 frystyk 531:
1.48 frystyk 532: /* Start the persistent cache */
533: } else if (!strcmp(argv[arg], "-cache")) {
534: cache = YES;
535:
536: /* Persistent cache flush */
537: } else if (!strcmp(argv[arg], "-flush")) {
538: flush = YES;
539:
540: /* Do a cache validation */
541: } else if (!strcmp(argv[arg], "-validate")) {
542: mr->flags |= MR_VALIDATE;
543:
544: /* Do an end-to-end cache-validation */
545: } else if (!strcmp(argv[arg], "-endvalidate")) {
546: mr->flags |= MR_END_VALIDATE;
547:
1.7 frystyk 548: /* preemptive or non-preemptive access */
1.1 frystyk 549: } else if (!strcmp(argv[arg], "-single")) {
1.7 frystyk 550: mr->flags |= MR_PREEMPTIVE;
1.2 frystyk 551:
552: /* test inlined images */
553: } else if (!strcmp(argv[arg], "-img")) {
554: mr->flags |= MR_IMG;
1.45 frystyk 555:
556: /* load inlined images */
557: } else if (!strcmp(argv[arg], "-saveimg")) {
558: mr->flags |= (MR_IMG | MR_SAVE);
1.2 frystyk 559:
560: /* load anchors */
561: } else if (!strcmp(argv[arg], "-link")) {
562: mr->flags |= MR_LINK;
1.7 frystyk 563: mr->depth = (arg+1 < argc && *argv[arg+1] != '-') ?
564: atoi(argv[++arg]) : DEFAULT_DEPTH;
1.2 frystyk 565:
1.12 frystyk 566: /* Output start and end time */
567: } else if (!strcmp(argv[arg], "-ss")) {
568: time_t local = time(NULL);
1.13 eric 569: HTTrace("Robot started on %s\n",
1.12 frystyk 570: HTDateTimeStr(&local, YES));
571: mr->flags |= MR_TIME;
572:
1.1 frystyk 573: /* print version and exit */
574: } else if (!strcmp(argv[arg], "-version")) {
575: VersionInfo();
576: Cleanup(mr, 0);
1.46 eric 577:
578: /* run in quiet mode */
579: } else if (!strcmp(argv[arg], "-q")) {
580: mr->flags |= MR_QUIET;
1.1 frystyk 581:
582: #ifdef WWWTRACE
583: /* trace flags */
584: } else if (!strncmp(argv[arg], "-v", 2)) {
1.24 frystyk 585: HTSetTraceMessageMask(argv[arg]+2);
1.1 frystyk 586: #endif
587:
588: } else {
1.13 eric 589: if (SHOW_MSG) HTTrace("Bad Argument (%s)\n", argv[arg]);
1.1 frystyk 590: }
1.17 frystyk 591: } else { /* If no leading `-' then check for URL or keywords */
1.1 frystyk 592: if (!keycnt) {
593: char * ref = HTParse(argv[arg], mr->cwd, PARSE_ALL);
1.34 eric 594: startAnchor = (HTParentAnchor *) HTAnchor_findAddress(ref);
595: HyperDoc_new(mr, startAnchor, 0);
1.1 frystyk 596: keycnt = 1;
1.11 frystyk 597: HT_FREE(ref);
1.1 frystyk 598: } else { /* Check for successive keyword arguments */
599: char *escaped = HTEscape(argv[arg], URL_XALPHAS);
600: if (keycnt++ <= 1)
1.5 frystyk 601: keywords = HTChunk_new(128);
1.1 frystyk 602: else
1.5 frystyk 603: HTChunk_putc(keywords, ' ');
604: HTChunk_puts(keywords, HTStrip(escaped));
1.11 frystyk 605: HT_FREE(escaped);
1.1 frystyk 606: }
607: }
608: }
609:
610: #ifdef CATCH_SIG
611: SetSignal();
612: #endif
613:
614: if (!keycnt) {
1.13 eric 615: if (SHOW_MSG) HTTrace("Please specify URL to check.\n");
1.1 frystyk 616: Cleanup(mr, -1);
617: }
618:
1.23 manoli 619: /* Testing that HTTrace is working */
1.47 frystyk 620: if (SHOW_MSG) HTTrace ("Welcome to the W3C mini Robot\n");
1.23 manoli 621:
1.1 frystyk 622: /* Rule file specified? */
623: if (mr->rules) {
624: char * rules = HTParse(mr->rules, mr->cwd, PARSE_ALL);
1.27 frystyk 625: if (!HTLoadRules(rules))
1.13 eric 626: if (SHOW_MSG) HTTrace("Can't access rules\n");
1.11 frystyk 627: HT_FREE(rules);
1.1 frystyk 628: }
629:
630: /* Output file specified? */
631: if (mr->outputfile) {
632: if ((mr->output = fopen(mr->outputfile, "wb")) == NULL) {
1.13 eric 633: if (SHOW_MSG) HTTrace("Can't open `%s'\n", mr->outputfile);
1.1 frystyk 634: mr->output = OUTPUT;
635: }
636: }
637:
1.48 frystyk 638: /* Should we use persistent cache? */
639: if (cache) {
640: HTCacheInit(NULL, 20);
1.49 frystyk 641: HTNet_addBefore(HTCacheFilter, "http://*", NULL, HT_FILTER_MIDDLE);
642: HTNet_addAfter(HTCacheUpdateFilter, "http://*", NULL,
643: HT_NOT_MODIFIED, HT_FILTER_MIDDLE);
1.48 frystyk 644:
645: /* Should we start by flushing? */
646: if (flush) HTCache_flushAll();
647: }
648:
1.1 frystyk 649: /* Log file specifed? */
650: if (mr->logfile) HTLog_open(mr->logfile, YES, YES);
651:
1.27 frystyk 652: /* Register our own someterminater filter */
1.32 frystyk 653: HTNet_addAfter(terminate_handler, NULL, NULL, HT_ALL, HT_FILTER_LAST);
1.40 frystyk 654:
655: /* Setting event timeout */
656: HTHost_setEventTimeout(mr->timer);
1.37 frystyk 657:
1.34 eric 658: /* Start the request */
659: finger = Finger_new(mr, startAnchor, METHOD_GET);
1.43 frystyk 660:
661: /*
662: ** Make sure that the first request is flushed immediately and not
663: ** buffered in the output buffer
664: */
665: HTRequest_setFlush(finger->request, YES);
666:
667: /*
1.48 frystyk 668: ** Check whether we should do some kind of cache validation on
669: ** the load
670: */
671: if (mr->flags & MR_VALIDATE)
672: HTRequest_setReloadMode(finger->request, HT_CACHE_VALIDATE);
673: if (mr->flags & MR_END_VALIDATE)
674: HTRequest_setReloadMode(finger->request, HT_CACHE_END_VALIDATE);
675:
676: /*
1.43 frystyk 677: ** Now do the load
678: */
1.34 eric 679: if (mr->flags & MR_PREEMPTIVE)
680: HTRequest_setPreemptive(finger->request, YES);
1.1 frystyk 681:
682: if (keywords) /* Search */
1.34 eric 683: status = HTSearchAnchor(keywords, (HTAnchor *)startAnchor, finger->request);
1.1 frystyk 684: else
1.34 eric 685: status = HTLoadAnchor((HTAnchor *)startAnchor, finger->request);
1.1 frystyk 686:
1.5 frystyk 687: if (keywords) HTChunk_delete(keywords);
1.1 frystyk 688: if (status != YES) {
1.13 eric 689: if (SHOW_MSG) HTTrace("Can't access resource\n");
1.1 frystyk 690: Cleanup(mr, -1);
691: }
692:
693: /* Go into the event loop... */
1.34 eric 694: HTEventList_loop(finger->request);
1.1 frystyk 695:
696: /* Only gets here if event loop fails */
697: Cleanup(mr, 0);
698: return 0;
699: }
Webmaster