Annotation of libwww/Robot/src/HTRobot.c, revision 1.51
1.1 frystyk 1: /* HTRobot.c
2: ** W3C MINI ROBOT
3: **
4: ** (c) COPRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
6: **
7: ** This program illustrates how to travers links using the Anchor object
8: **
9: ** Authors:
10: ** HFN Henrik Frystyk Nielsen, (frystyk@w3.org)
11: **
12: ** History:
13: ** Dec 04 95 First version
14: */
15:
16: #include "WWWLib.h" /* Global Library Include file */
17: #include "WWWApp.h" /* Application stuff */
1.17 frystyk 18: #include "WWWTrans.h"
1.10 frystyk 19: #include "WWWInit.h"
1.9 frystyk 20:
1.4 frystyk 21: #include "HText.h"
1.1 frystyk 22:
1.39 eric 23: #include "HTMemLog.h"
1.1 frystyk 24: #include "HTRobot.h" /* Implemented here */
25:
1.14 frystyk 26: #ifndef W3C_VERSION
1.33 eric 27: #define W3C_VERSION "Unspecified"
1.1 frystyk 28: #endif
29:
30: #define APP_NAME "W3CRobot"
1.14 frystyk 31: #define APP_VERSION W3C_VERSION
1.1 frystyk 32:
33: #define DEFAULT_OUTPUT_FILE "robot.out"
34: #define DEFAULT_RULE_FILE "robot.conf"
35: #define DEFAULT_LOG_FILE "robot.log"
1.51 ! frystyk 36: #define DEFAULT_MEMLOG "robot.mem"
1.7 frystyk 37: #define DEFAULT_DEPTH 0
1.1 frystyk 38:
1.51 ! frystyk 39: #if 0
! 40: #define HT_MEMLOG
! 41: #endif
! 42:
1.46 eric 43: /* #define SHOW_MSG (WWWTRACE || HTAlert_interactive()) */
44: #define SHOW_MSG (!(mr->flags & MR_QUIET))
1.1 frystyk 45:
1.40 frystyk 46: #define DEFAULT_TIMEOUT 10000 /* timeout in millis */
1.1 frystyk 47:
48: #if defined(__svr4__)
49: #define CATCH_SIG
50: #endif
51:
52: typedef enum _MRFlags {
1.45 frystyk 53: MR_IMG = 0x1,
54: MR_LINK = 0x2,
55: MR_PREEMPTIVE = 0x4,
56: MR_TIME = 0x8,
1.46 eric 57: MR_SAVE = 0x10,
1.48 frystyk 58: MR_QUIET = 0x20,
59: MR_VALIDATE = 0x40,
60: MR_END_VALIDATE = 0x80
1.1 frystyk 61: } MRFlags;
62:
63: typedef struct _Robot {
1.2 frystyk 64: int depth; /* How deep is our tree */
1.30 frystyk 65: int cnt; /* Count of requests */
1.2 frystyk 66: HTList * hyperdoc; /* List of our HyperDoc Objects */
1.4 frystyk 67: HTList * htext; /* List of our HText Objects */
1.34 eric 68: HTList * fingers;
1.40 frystyk 69: int timer;
1.1 frystyk 70: char * cwd; /* Current dir URL */
71: char * rules;
72: char * logfile;
73: char * outputfile;
74: FILE * output;
75: MRFlags flags;
76: } Robot;
1.34 eric 77:
78: typedef struct _Finger {
79: Robot * robot;
80: HTRequest * request;
81: HTParentAnchor * dest;
82: } Finger;
83:
1.1 frystyk 84: typedef enum _LoadState {
85: L_INVALID = -2,
86: L_LOADING = -1,
87: L_SUCCESS = 0,
88: L_ERROR
89: } LoadState;
90:
91: /*
92: ** The HyperDoc object is bound to the anchor and contains information about
93: ** where we are in the search for recursive searches
94: */
95: typedef struct _HyperDoc {
96: HTParentAnchor * anchor;
97: LoadState state;
98: int depth;
99: } HyperDoc;
100:
101: /*
102: ** This is the HText object that is created every time we start parsing a
103: ** HTML object
104: */
1.4 frystyk 105: struct _HText {
1.1 frystyk 106: HTRequest * request;
1.4 frystyk 107: };
1.1 frystyk 108:
109: PUBLIC HText * HTMainText = NULL;
110: PUBLIC HTParentAnchor * HTMainAnchor = NULL;
111: PUBLIC HTStyleSheet * styleSheet = NULL;
112:
113: /* ------------------------------------------------------------------------- */
114:
1.13 eric 115: /* Standard (non-error) Output
116: ** ---------------------------
117: */
118: PUBLIC int OutputData(const char * fmt, ...)
119: {
120: int ret;
121: va_list pArgs;
122: va_start(pArgs, fmt);
123: ret = vfprintf(stdout, fmt, pArgs);
124: va_end(pArgs);
125: return ret;
126: }
127:
128: /* ------------------------------------------------------------------------- */
129:
1.2 frystyk 130: /* Create a "HyperDoc" object
131: ** --------------------------
132: ** A HyperDoc object contains information about whether we have already
133: ** started checking the anchor and the depth in our search
134: */
135: PRIVATE HyperDoc * HyperDoc_new (Robot * mr,HTParentAnchor * anchor, int depth)
136: {
137: HyperDoc * hd;
1.14 frystyk 138: if ((hd = (HyperDoc *) HT_CALLOC(1, sizeof(HyperDoc))) == NULL)
139: HT_OUTOFMEM("HyperDoc_new");
1.2 frystyk 140: hd->state = L_INVALID;
141: hd->depth = depth;
142:
143: /* Bind the HyperDoc object together with the Anchor Object */
144: hd->anchor = anchor;
145: HTAnchor_setDocument(anchor, (void *) hd);
146:
147: /* Add this HyperDoc object to our list */
148: if (!mr->hyperdoc) mr->hyperdoc = HTList_new();
149: HTList_addObject(mr->hyperdoc, (void *) hd);
150: return hd;
151: }
152:
153: /* Delete a "HyperDoc" object
154: ** --------------------------
155: */
156: PRIVATE BOOL HyperDoc_delete (HyperDoc * hd)
157: {
158: if (hd) {
1.11 frystyk 159: HT_FREE (hd);
1.2 frystyk 160: return YES;
161: }
162: return NO;
163: }
164:
1.1 frystyk 165: /* Create a Command Line Object
166: ** ----------------------------
167: */
168: PRIVATE Robot * Robot_new (void)
169: {
170: Robot * me;
1.41 frystyk 171: if ((me = (Robot *) HT_CALLOC(1, sizeof(Robot))) == NULL)
1.14 frystyk 172: HT_OUTOFMEM("Robot_new");
1.2 frystyk 173: me->hyperdoc = HTList_new();
1.4 frystyk 174: me->htext = HTList_new();
1.40 frystyk 175: me->timer = DEFAULT_TIMEOUT;
1.25 frystyk 176: me->cwd = HTGetCurrentDirectoryURL();
1.1 frystyk 177: me->output = OUTPUT;
1.35 eric 178: me->cnt = 0;
1.34 eric 179: me->fingers = HTList_new();
1.1 frystyk 180: return me;
181: }
182:
183: /* Delete a Command Line Object
184: ** ----------------------------
185: */
186: PRIVATE BOOL Robot_delete (Robot * me)
187: {
188: if (me) {
1.34 eric 189: HTList_delete(me->fingers);
1.2 frystyk 190: if (me->hyperdoc) {
191: HTList * cur = me->hyperdoc;
192: HyperDoc * pres;
193: while ((pres = (HyperDoc *) HTList_nextObject(cur)))
194: HyperDoc_delete(pres);
195: HTList_delete(me->hyperdoc);
196: }
1.4 frystyk 197: if (me->htext) {
198: HTList * cur = me->htext;
199: HText * pres;
200: while ((pres = (HText *) HTList_nextObject(cur)))
201: HText_free(pres);
202: HTList_delete(me->htext);
203: }
1.1 frystyk 204: if (me->logfile) HTLog_close();
205: if (me->output && me->output != STDOUT) fclose(me->output);
1.12 frystyk 206: if (me->flags & MR_TIME) {
207: time_t local = time(NULL);
1.13 eric 208: HTTrace("Robot terminated %s\n",HTDateTimeStr(&local,YES));
1.12 frystyk 209: }
1.11 frystyk 210: HT_FREE(me->cwd);
211: HT_FREE(me);
1.1 frystyk 212: return YES;
213: }
214: return NO;
215: }
216:
1.2 frystyk 217: /*
1.34 eric 218: ** This function creates a new finger object and initializes it with a new request
1.2 frystyk 219: */
1.34 eric 220: PRIVATE Finger * Finger_new (Robot * robot, HTParentAnchor * dest, HTMethod method)
1.2 frystyk 221: {
1.34 eric 222: Finger * me;
223: HTRequest * request = HTRequest_new();
224: if ((me = (Finger *) HT_CALLOC(1, sizeof(Finger))) == NULL)
225: HT_OUTOFMEM("Finger_new");
226: me->robot = robot;
227: me->request = request;
228: me->dest = dest;
229: HTList_addObject(robot->fingers, (void *)me);
230:
1.48 frystyk 231: /* Set the context for this request */
1.34 eric 232: HTRequest_setContext (request, me);
1.48 frystyk 233:
234: /* Check the various flags to customize the request */
235: if (robot->flags & MR_PREEMPTIVE)
236: HTRequest_setPreemptive(request, YES);
237: if (robot->flags & MR_VALIDATE)
238: HTRequest_setReloadMode(request, HT_CACHE_VALIDATE);
239: if (robot->flags & MR_END_VALIDATE)
240: HTRequest_setReloadMode(request, HT_CACHE_END_VALIDATE);
241:
242: /* We wanna make sure that we are sending a Host header (default) */
1.34 eric 243: HTRequest_addRqHd(request, HT_C_HOST);
1.48 frystyk 244:
245: /* Set the method for this request */
1.34 eric 246: HTRequest_setMethod(request, method);
247: robot->cnt++;
248: return me;
1.2 frystyk 249: }
250:
1.34 eric 251: PRIVATE int Finger_delete (Finger * me)
1.2 frystyk 252: {
1.34 eric 253: HTList_removeObject(me->robot->fingers, (void *)me);
254: me->robot->cnt--;
1.37 frystyk 255:
256: /*
257: ** If we are down at one request then flush the output buffer
258: */
259: if (me->request) {
260: if (me->robot->cnt == 1) HTRequest_forceFlush(me->request);
1.34 eric 261: HTRequest_delete(me->request);
1.37 frystyk 262: }
263:
264: /*
265: ** Delete the request and free myself
266: */
1.34 eric 267: HT_FREE(me);
268: return YES;
1.2 frystyk 269: }
270:
271: /*
272: ** Cleanup and make sure we close all connections including the persistent
273: ** ones
274: */
1.1 frystyk 275: PRIVATE void Cleanup (Robot * me, int status)
276: {
277: Robot_delete(me);
1.29 eric 278: HTProfile_delete();
1.50 frystyk 279: #ifdef HT_MEMLOG
1.39 eric 280: HTMemLog_close();
1.47 frystyk 281: #endif
282:
1.1 frystyk 283: #ifdef VMS
284: exit(status ? status : 1);
285: #else
286: exit(status ? status : 0);
287: #endif
288: }
289:
290: #ifdef CATCH_SIG
291: #include <signal.h>
292: /* SetSignal
293: ** This function sets up signal handlers. This might not be necessary to
294: ** call if the application has its own handlers (lossage on SVR4)
295: */
296: PRIVATE void SetSignal (void)
297: {
298: /* On some systems (SYSV) it is necessary to catch the SIGPIPE signal
299: ** when attemting to connect to a remote host where you normally should
300: ** get `connection refused' back
301: */
302: if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
1.13 eric 303: if (PROT_TRACE) HTTrace("HTSignal.... Can't catch SIGPIPE\n");
1.1 frystyk 304: } else {
1.13 eric 305: if (PROT_TRACE) HTTrace("HTSignal.... Ignoring SIGPIPE\n");
1.1 frystyk 306: }
1.47 frystyk 307:
1.50 frystyk 308: #ifdef HT_MEMLOG
1.44 eric 309: HTMemLog_flush();
1.47 frystyk 310: #endif
311:
1.1 frystyk 312: }
313: #endif /* CATCH_SIG */
314:
315: PRIVATE void VersionInfo (void)
316: {
1.13 eric 317: OutputData("\n\nW3C Reference Software\n\n");
318: OutputData("\tW3C Mini Robot (%s) version %s.\n",
1.1 frystyk 319: APP_NAME, APP_VERSION);
1.13 eric 320: OutputData("\tW3C Reference Library version %s.\n\n",HTLib_version());
321: OutputData("Please send feedback to <libwww@w3.org>\n");
1.1 frystyk 322: }
323:
324: /* terminate_handler
325: ** -----------------
1.2 frystyk 326: ** This function is registered to handle the result of the request.
327: ** If no more requests are pending then terminate program
1.1 frystyk 328: */
1.32 frystyk 329: PRIVATE int terminate_handler (HTRequest * request, HTResponse * response,
330: void * param, int status)
1.1 frystyk 331: {
1.34 eric 332: Finger * finger = (Finger *) HTRequest_context(request);
1.46 eric 333: Robot * mr = finger->robot;
1.34 eric 334: if (SHOW_MSG) HTTrace("Robot....... done with %s\n", HTAnchor_physical(finger->dest));
335: Finger_delete(finger);
1.46 eric 336: if (mr->cnt <= 0) {
1.34 eric 337: if (SHOW_MSG) HTTrace(" Everything is finished...\n");
1.46 eric 338: Cleanup(mr, 0); /* No way back from here */
1.30 frystyk 339: }
1.37 frystyk 340:
1.46 eric 341: if (SHOW_MSG) HTTrace(" %d outstanding request%s\n", mr->cnt, mr->cnt == 1 ? "" : "s");
1.1 frystyk 342: return HT_OK;
343: }
344:
345: /* ------------------------------------------------------------------------- */
346: /* HTEXT INTERFACE */
347: /* ------------------------------------------------------------------------- */
348:
349: PUBLIC HText * HText_new2 (HTRequest * request, HTParentAnchor * anchor,
350: HTStream * stream)
351: {
352: HText * me;
1.34 eric 353: Finger * finger = (Finger *) HTRequest_context(request);
354: Robot * mr = finger->robot;
1.14 frystyk 355: if ((me = (HText *) HT_CALLOC(1, sizeof(HText))) == NULL)
356: HT_OUTOFMEM("HText_new2");
1.4 frystyk 357:
358: /* Bind the HText object together with the Request Object */
1.1 frystyk 359: me->request = request;
1.4 frystyk 360:
361: /* Add this HyperDoc object to our list */
362: if (!mr->htext) mr->htext = HTList_new();
363: HTList_addObject(mr->htext, (void *) me);
1.1 frystyk 364: return me;
365: }
366:
1.4 frystyk 367: PUBLIC void HText_free (HText * me) {
1.11 frystyk 368: if (me) HT_FREE (me);
1.4 frystyk 369: }
370:
1.1 frystyk 371: PUBLIC void HText_beginAnchor (HText * text, HTChildAnchor * anchor)
372: {
373: if (text && anchor) {
1.34 eric 374: Finger * finger = (Finger *) HTRequest_context(text->request);
375: Robot * mr = finger->robot;
1.1 frystyk 376: HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor);
377: HTParentAnchor * dest_parent = HTAnchor_parent(dest);
1.7 frystyk 378: char * uri = HTAnchor_address((HTAnchor *) dest_parent);
1.1 frystyk 379: HyperDoc * hd = HTAnchor_document(dest_parent);
380:
1.13 eric 381: if (SHOW_MSG) HTTrace("Robot....... Found `%s\' - ", uri ? uri : "NULL");
1.7 frystyk 382:
1.2 frystyk 383: /* Test whether we already have a hyperdoc for this document */
384: if (mr->flags & MR_LINK && dest_parent && !hd) {
1.1 frystyk 385: HTParentAnchor * parent = HTRequest_parent(text->request);
386: HyperDoc * last = HTAnchor_document(parent);
387: int depth = last ? last->depth+1 : 0;
1.34 eric 388: Finger * newfinger = Finger_new(mr, dest_parent, METHOD_GET);
389: HTRequest * newreq = newfinger->request;
1.2 frystyk 390: HyperDoc_new(mr, dest_parent, depth);
1.7 frystyk 391: HTRequest_setParent(newreq, HTRequest_anchor(text->request));
392: if (depth >= mr->depth) {
393: if (SHOW_MSG)
1.13 eric 394: HTTrace("loading at depth %d using HEAD\n", depth);
1.7 frystyk 395: HTRequest_setMethod(newreq, METHOD_HEAD);
1.30 frystyk 396: HTRequest_setOutputFormat(newreq, WWW_DEBUG);
1.7 frystyk 397: } else {
1.13 eric 398: if (SHOW_MSG) HTTrace("loading at depth %d\n", depth);
1.2 frystyk 399: }
400: if (HTLoadAnchor((HTAnchor *) dest_parent, newreq) != YES) {
1.13 eric 401: if (SHOW_MSG) HTTrace("not tested!\n");
1.34 eric 402: Finger_delete(newfinger);
1.2 frystyk 403: }
1.7 frystyk 404: } else {
1.18 frystyk 405: if (SHOW_MSG) HTTrace("duplicate or max depth reached\n");
1.2 frystyk 406: }
1.11 frystyk 407: HT_FREE(uri);
1.2 frystyk 408: }
409: }
410:
411: PUBLIC void HText_appendImage (HText * text, HTChildAnchor * anchor,
1.14 frystyk 412: const char *alt, const char * align, BOOL isMap)
1.2 frystyk 413: {
414: if (text && anchor) {
1.34 eric 415: Finger * finger = (Finger *) HTRequest_context(text->request);
416: Robot * mr = finger->robot;
1.2 frystyk 417: HTParentAnchor * dest = (HTParentAnchor *)
418: HTAnchor_followMainLink((HTAnchor *) anchor);
419: HyperDoc * hd = HTAnchor_document(dest);
1.1 frystyk 420:
1.2 frystyk 421: /* Test whether we already have a hyperdoc for this document */
422: if (mr->flags & MR_IMG && dest && !hd) {
423: HTParentAnchor * parent = HTRequest_parent(text->request);
424: HyperDoc * last = HTAnchor_document(parent);
425: int depth = last ? last->depth+1 : 0;
1.45 frystyk 426: Finger * newfinger = Finger_new(mr, dest,
427: mr->flags & MR_SAVE ?
428: METHOD_GET : METHOD_HEAD);
1.34 eric 429: HTRequest * newreq = newfinger->request;
1.2 frystyk 430: HyperDoc_new(mr, dest, depth);
431: if (SHOW_MSG) {
432: char * uri = HTAnchor_address((HTAnchor *) dest);
1.13 eric 433: HTTrace("Robot....... Checking Image `%s\'\n", uri);
1.11 frystyk 434: HT_FREE(uri);
1.2 frystyk 435: }
436: if (HTLoadAnchor((HTAnchor *) dest, newreq) != YES) {
437: if (SHOW_MSG)
1.13 eric 438: HTTrace("Robot....... Image not tested!\n");
1.34 eric 439: Finger_delete(newfinger);
1.1 frystyk 440: }
441: }
442: }
443: }
444:
445: PUBLIC void HText_endAnchor (HText * text) {}
1.14 frystyk 446: PUBLIC void HText_appendText (HText * text, const char * str) {}
1.1 frystyk 447: PUBLIC void HText_appendCharacter (HText * text, char ch) {}
448: PUBLIC void HText_endAppend (HText * text) {}
449: PUBLIC void HText_setStyle (HText * text, HTStyle * style) {}
450: PUBLIC void HText_beginAppend (HText * text) {}
451: PUBLIC void HText_appendParagraph (HText * text) {}
452:
1.48 frystyk 453: PRIVATE int RobotTrace (const char * fmt, va_list pArgs)
454: {
455: return (vfprintf(stderr, fmt, pArgs));
456: }
457:
1.1 frystyk 458: /* ------------------------------------------------------------------------- */
459: /* MAIN PROGRAM */
460: /* ------------------------------------------------------------------------- */
461:
462: int main (int argc, char ** argv)
463: {
1.48 frystyk 464: int status = 0;
1.1 frystyk 465: int arg;
1.48 frystyk 466: BOOL cache = NO; /* Use persistent cache */
467: BOOL flush = NO; /* flush the persistent cache */
1.1 frystyk 468: HTChunk * keywords = NULL; /* From command line */
469: int keycnt = 0;
1.12 frystyk 470: Robot * mr = NULL;
1.43 frystyk 471: Finger * finger = NULL;
472: HTParentAnchor * startAnchor = NULL;
1.1 frystyk 473:
474: /* Starts Mac GUSI socket library */
475: #ifdef GUSI
476: GUSISetup(GUSIwithSIOUXSockets);
477: GUSISetup(GUSIwithInternetSockets);
478: #endif
479:
480: #ifdef __MWERKS__ /* STR */
481: InitGraf((Ptr) &qd.thePort);
482: InitFonts();
483: InitWindows();
484: InitMenus(); TEInit();
485: InitDialogs(nil);
486: InitCursor();
487: SIOUXSettings.asktosaveonclose = false;
488: argc=ccommand(&argv);
1.50 frystyk 489: #endif /* __MWERKS__ */
1.1 frystyk 490:
1.50 frystyk 491: #ifdef HT_MEMLOG
1.51 ! frystyk 492: HTMemLog_open(DEFAULT_MEMLOG, 8192, YES);
1.47 frystyk 493: #endif
1.46 eric 494:
1.27 frystyk 495: /* Initiate W3C Reference Library with a robot profile */
496: HTProfile_newRobot(APP_NAME, APP_VERSION);
1.48 frystyk 497: HTTrace_setCallback(RobotTrace);
1.27 frystyk 498:
499: /* Add the default HTML parser to the set of converters */
500: {
501: HTList * converters = HTFormat_conversion();
502: HTMLInit(converters);
503: }
1.1 frystyk 504:
1.12 frystyk 505: /* Build a new robot object */
506: mr = Robot_new();
507:
1.1 frystyk 508: /* Scan command Line for parameters */
509: for (arg=1; arg<argc; arg++) {
510: if (*argv[arg] == '-') {
511:
512: /* non-interactive */
1.17 frystyk 513: if (!strcmp(argv[arg], "-n")) {
1.1 frystyk 514: HTAlert_setInteractive(NO);
515:
516: /* log file */
517: } else if (!strcmp(argv[arg], "-l")) {
518: mr->logfile = (arg+1 < argc && *argv[arg+1] != '-') ?
519: argv[++arg] : DEFAULT_LOG_FILE;
520:
521: /* rule file */
522: } else if (!strcmp(argv[arg], "-r")) {
523: mr->rules = (arg+1 < argc && *argv[arg+1] != '-') ?
524: argv[++arg] : DEFAULT_RULE_FILE;
525:
526: /* output filename */
527: } else if (!strcmp(argv[arg], "-o")) {
528: mr->outputfile = (arg+1 < argc && *argv[arg+1] != '-') ?
529: argv[++arg] : DEFAULT_OUTPUT_FILE;
530:
531: /* timeout -- Change the default request timeout */
532: } else if (!strcmp(argv[arg], "-timeout")) {
533: int timeout = (arg+1 < argc && *argv[arg+1] != '-') ?
534: atoi(argv[++arg]) : DEFAULT_TIMEOUT;
1.40 frystyk 535: if (timeout > 0) mr->timer = timeout;
1.1 frystyk 536:
1.48 frystyk 537: /* Start the persistent cache */
538: } else if (!strcmp(argv[arg], "-cache")) {
539: cache = YES;
540:
1.51 ! frystyk 541: /* Force no pipelined requests */
! 542: } else if (!strcmp(argv[arg], "-nopipe")) {
! 543: HTTP_setConnectionMode(HTTP_NO_PIPELINING);
! 544:
1.48 frystyk 545: /* Persistent cache flush */
546: } else if (!strcmp(argv[arg], "-flush")) {
547: flush = YES;
548:
549: /* Do a cache validation */
550: } else if (!strcmp(argv[arg], "-validate")) {
551: mr->flags |= MR_VALIDATE;
552:
553: /* Do an end-to-end cache-validation */
554: } else if (!strcmp(argv[arg], "-endvalidate")) {
555: mr->flags |= MR_END_VALIDATE;
556:
1.7 frystyk 557: /* preemptive or non-preemptive access */
1.1 frystyk 558: } else if (!strcmp(argv[arg], "-single")) {
1.7 frystyk 559: mr->flags |= MR_PREEMPTIVE;
1.2 frystyk 560:
561: /* test inlined images */
562: } else if (!strcmp(argv[arg], "-img")) {
563: mr->flags |= MR_IMG;
1.45 frystyk 564:
565: /* load inlined images */
566: } else if (!strcmp(argv[arg], "-saveimg")) {
567: mr->flags |= (MR_IMG | MR_SAVE);
1.2 frystyk 568:
569: /* load anchors */
570: } else if (!strcmp(argv[arg], "-link")) {
571: mr->flags |= MR_LINK;
1.7 frystyk 572: mr->depth = (arg+1 < argc && *argv[arg+1] != '-') ?
573: atoi(argv[++arg]) : DEFAULT_DEPTH;
1.2 frystyk 574:
1.12 frystyk 575: /* Output start and end time */
576: } else if (!strcmp(argv[arg], "-ss")) {
577: time_t local = time(NULL);
1.13 eric 578: HTTrace("Robot started on %s\n",
1.12 frystyk 579: HTDateTimeStr(&local, YES));
580: mr->flags |= MR_TIME;
581:
1.1 frystyk 582: /* print version and exit */
583: } else if (!strcmp(argv[arg], "-version")) {
584: VersionInfo();
585: Cleanup(mr, 0);
1.46 eric 586:
587: /* run in quiet mode */
588: } else if (!strcmp(argv[arg], "-q")) {
589: mr->flags |= MR_QUIET;
1.1 frystyk 590:
591: #ifdef WWWTRACE
592: /* trace flags */
593: } else if (!strncmp(argv[arg], "-v", 2)) {
1.24 frystyk 594: HTSetTraceMessageMask(argv[arg]+2);
1.1 frystyk 595: #endif
596:
597: } else {
1.13 eric 598: if (SHOW_MSG) HTTrace("Bad Argument (%s)\n", argv[arg]);
1.1 frystyk 599: }
1.17 frystyk 600: } else { /* If no leading `-' then check for URL or keywords */
1.1 frystyk 601: if (!keycnt) {
602: char * ref = HTParse(argv[arg], mr->cwd, PARSE_ALL);
1.34 eric 603: startAnchor = (HTParentAnchor *) HTAnchor_findAddress(ref);
604: HyperDoc_new(mr, startAnchor, 0);
1.1 frystyk 605: keycnt = 1;
1.11 frystyk 606: HT_FREE(ref);
1.1 frystyk 607: } else { /* Check for successive keyword arguments */
608: char *escaped = HTEscape(argv[arg], URL_XALPHAS);
609: if (keycnt++ <= 1)
1.5 frystyk 610: keywords = HTChunk_new(128);
1.1 frystyk 611: else
1.5 frystyk 612: HTChunk_putc(keywords, ' ');
613: HTChunk_puts(keywords, HTStrip(escaped));
1.11 frystyk 614: HT_FREE(escaped);
1.1 frystyk 615: }
616: }
617: }
618:
619: #ifdef CATCH_SIG
620: SetSignal();
621: #endif
622:
623: if (!keycnt) {
1.13 eric 624: if (SHOW_MSG) HTTrace("Please specify URL to check.\n");
1.1 frystyk 625: Cleanup(mr, -1);
626: }
627:
1.23 manoli 628: /* Testing that HTTrace is working */
1.47 frystyk 629: if (SHOW_MSG) HTTrace ("Welcome to the W3C mini Robot\n");
1.23 manoli 630:
1.1 frystyk 631: /* Rule file specified? */
632: if (mr->rules) {
633: char * rules = HTParse(mr->rules, mr->cwd, PARSE_ALL);
1.27 frystyk 634: if (!HTLoadRules(rules))
1.13 eric 635: if (SHOW_MSG) HTTrace("Can't access rules\n");
1.11 frystyk 636: HT_FREE(rules);
1.1 frystyk 637: }
638:
639: /* Output file specified? */
640: if (mr->outputfile) {
641: if ((mr->output = fopen(mr->outputfile, "wb")) == NULL) {
1.13 eric 642: if (SHOW_MSG) HTTrace("Can't open `%s'\n", mr->outputfile);
1.1 frystyk 643: mr->output = OUTPUT;
644: }
645: }
646:
1.48 frystyk 647: /* Should we use persistent cache? */
648: if (cache) {
649: HTCacheInit(NULL, 20);
1.49 frystyk 650: HTNet_addBefore(HTCacheFilter, "http://*", NULL, HT_FILTER_MIDDLE);
651: HTNet_addAfter(HTCacheUpdateFilter, "http://*", NULL,
652: HT_NOT_MODIFIED, HT_FILTER_MIDDLE);
1.48 frystyk 653:
654: /* Should we start by flushing? */
655: if (flush) HTCache_flushAll();
656: }
657:
1.1 frystyk 658: /* Log file specifed? */
659: if (mr->logfile) HTLog_open(mr->logfile, YES, YES);
660:
1.27 frystyk 661: /* Register our own someterminater filter */
1.32 frystyk 662: HTNet_addAfter(terminate_handler, NULL, NULL, HT_ALL, HT_FILTER_LAST);
1.40 frystyk 663:
664: /* Setting event timeout */
665: HTHost_setEventTimeout(mr->timer);
1.37 frystyk 666:
1.34 eric 667: /* Start the request */
668: finger = Finger_new(mr, startAnchor, METHOD_GET);
1.43 frystyk 669:
670: /*
671: ** Make sure that the first request is flushed immediately and not
672: ** buffered in the output buffer
673: */
674: HTRequest_setFlush(finger->request, YES);
675:
676: /*
1.48 frystyk 677: ** Check whether we should do some kind of cache validation on
678: ** the load
679: */
680: if (mr->flags & MR_VALIDATE)
681: HTRequest_setReloadMode(finger->request, HT_CACHE_VALIDATE);
682: if (mr->flags & MR_END_VALIDATE)
683: HTRequest_setReloadMode(finger->request, HT_CACHE_END_VALIDATE);
684:
685: /*
1.43 frystyk 686: ** Now do the load
687: */
1.34 eric 688: if (mr->flags & MR_PREEMPTIVE)
689: HTRequest_setPreemptive(finger->request, YES);
1.1 frystyk 690:
691: if (keywords) /* Search */
1.34 eric 692: status = HTSearchAnchor(keywords, (HTAnchor *)startAnchor, finger->request);
1.1 frystyk 693: else
1.34 eric 694: status = HTLoadAnchor((HTAnchor *)startAnchor, finger->request);
1.1 frystyk 695:
1.5 frystyk 696: if (keywords) HTChunk_delete(keywords);
1.1 frystyk 697: if (status != YES) {
1.13 eric 698: if (SHOW_MSG) HTTrace("Can't access resource\n");
1.1 frystyk 699: Cleanup(mr, -1);
700: }
701:
702: /* Go into the event loop... */
1.34 eric 703: HTEventList_loop(finger->request);
1.1 frystyk 704:
705: /* Only gets here if event loop fails */
706: Cleanup(mr, 0);
707: return 0;
708: }
Webmaster