Annotation of libwww/Robot/src/HTRobot.c, revision 1.36
1.1 frystyk 1: /* HTRobot.c
2: ** W3C MINI ROBOT
3: **
4: ** (c) COPRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
6: **
7: ** This program illustrates how to travers links using the Anchor object
8: **
9: ** Authors:
10: ** HFN Henrik Frystyk Nielsen, (frystyk@w3.org)
11: **
12: ** History:
13: ** Dec 04 95 First version
14: */
15:
16: #include "WWWLib.h" /* Global Library Include file */
17: #include "WWWApp.h" /* Application stuff */
1.17 frystyk 18: #include "WWWTrans.h"
1.10 frystyk 19: #include "WWWInit.h"
1.9 frystyk 20:
1.4 frystyk 21: #include "HText.h"
1.1 frystyk 22:
23: #include "HTRobot.h" /* Implemented here */
1.33 eric 24: #include "HTWatch.h"
1.1 frystyk 25:
1.14 frystyk 26: #ifndef W3C_VERSION
1.33 eric 27: #define W3C_VERSION "Unspecified"
1.1 frystyk 28: #endif
29:
30: #define APP_NAME "W3CRobot"
1.14 frystyk 31: #define APP_VERSION W3C_VERSION
1.1 frystyk 32:
33: #define DEFAULT_OUTPUT_FILE "robot.out"
34: #define DEFAULT_RULE_FILE "robot.conf"
35: #define DEFAULT_LOG_FILE "robot.log"
1.7 frystyk 36: #define DEFAULT_DEPTH 0
1.1 frystyk 37:
38: #define SHOW_MSG (WWWTRACE || HTAlert_interactive())
39:
1.7 frystyk 40: #define DEFAULT_TIMEOUT 10 /* timeout in seconds */
1.1 frystyk 41:
42: #if defined(__svr4__)
43: #define CATCH_SIG
44: #endif
45:
46: typedef enum _MRFlags {
1.2 frystyk 47: MR_IMG = 0x1,
48: MR_LINK = 0x2,
1.12 frystyk 49: MR_PREEMPTIVE= 0x4,
50: MR_TIME = 0x8
1.1 frystyk 51: } MRFlags;
52:
53: typedef struct _Robot {
1.7 frystyk 54: HTRequest * timeout; /* Until we get a server eventloop */
1.2 frystyk 55: int depth; /* How deep is our tree */
1.30 frystyk 56: int cnt; /* Count of requests */
1.2 frystyk 57: HTList * hyperdoc; /* List of our HyperDoc Objects */
1.4 frystyk 58: HTList * htext; /* List of our HText Objects */
1.34 eric 59: HTList * fingers;
1.1 frystyk 60: struct timeval * tv; /* Timeout on socket */
61: char * cwd; /* Current dir URL */
62: char * rules;
63: char * logfile;
64: char * outputfile;
65: FILE * output;
66: MRFlags flags;
67: } Robot;
1.34 eric 68:
69: typedef struct _Finger {
70: Robot * robot;
71: HTRequest * request;
72: HTParentAnchor * dest;
73: } Finger;
74:
1.1 frystyk 75: typedef enum _LoadState {
76: L_INVALID = -2,
77: L_LOADING = -1,
78: L_SUCCESS = 0,
79: L_ERROR
80: } LoadState;
81:
82: /*
83: ** The HyperDoc object is bound to the anchor and contains information about
84: ** where we are in the search for recursive searches
85: */
86: typedef struct _HyperDoc {
87: HTParentAnchor * anchor;
88: LoadState state;
89: int depth;
90: } HyperDoc;
91:
92: /*
93: ** This is the HText object that is created every time we start parsing a
94: ** HTML object
95: */
1.4 frystyk 96: struct _HText {
1.1 frystyk 97: HTRequest * request;
1.4 frystyk 98: };
1.1 frystyk 99:
100: PUBLIC HText * HTMainText = NULL;
101: PUBLIC HTParentAnchor * HTMainAnchor = NULL;
102: PUBLIC HTStyleSheet * styleSheet = NULL;
103:
104: /* ------------------------------------------------------------------------- */
105:
1.33 eric 106: PUBLIC int HTWatch(int id, void * obj, const char * fmt, ...)
107: {
108: va_list pArgs;
109: va_start(pArgs, fmt);
110: fprintf(stderr, "id: %x obj: %p: ", id, obj);
111: return vfprintf(stderr, fmt, pArgs);
112: }
113:
1.36 ! eric 114: #define LOG_BUFF_SIZE 65536
! 115: int LogFile = 2;
! 116: char * LogBuff = NULL;
! 117: size_t LogLen = 0;
! 118:
! 119: PUBLIC int HTWatch_logOpen (char *ident, int option, int facility)
! 120: {
! 121: #ifdef USE_SYSLOG
! 122: openlog(ident, option, facility);
! 123: #else /* USE_SYSLOG */
! 124: #if 0
! 125: if ((LogFile = open("HTRobot.log", O_CREAT|O_TRUNC)) == -1)
! 126: return HT_ERROR;
! 127: close(LogFile);
! 128: #endif
! 129: if ((LogBuff = (char *) HT_MALLOC(LOG_BUFF_SIZE)) == NULL)
! 130: HT_OUTOFMEM("HTWatch_logOpen");
! 131: LogLen = 0;
! 132: #endif /* !USE_SYSLOG */
! 133: return HT_OK;
! 134: }
! 135:
! 136: PRIVATE int HTWatch_logFlush(void)
! 137: {
! 138: if ((LogFile = open("HTRobot.log", O_APPEND)) == -1)
! 139: return HT_ERROR;
! 140: write(LogFile, LogBuff, LogLen);
! 141: LogLen = 0;
! 142: close(LogFile);
! 143: return HT_OK;
! 144: }
! 145:
! 146: PRIVATE int HTWatch_logAdd(char * buf, size_t len)
! 147: {
! 148: /*
! 149: ** Dump everything that won't fit in buffer
! 150: */
! 151: while (len + LogLen > LOG_BUFF_SIZE) {
! 152: size_t toWrite = LOG_BUFF_SIZE-LogLen;
! 153: memcpy(LogBuff+LogLen, buf, toWrite);
! 154: HTWatch_logFlush();
! 155: buf += toWrite;
! 156: len -= toWrite;
! 157: }
! 158: memcpy(LogBuff+LogLen, buf, len);
! 159: LogLen += len;
! 160: return HT_OK;
! 161: }
! 162:
! 163: PUBLIC void HTWatch_logClose (void)
! 164: {
! 165: #ifdef USE_SYSLOG
! 166: closelog();
! 167: #else /* USE_SYSLOG */
! 168: if (LogLen)
! 169: HTWatch_logFlush();
! 170: if (LogFile > 2)
! 171: close(LogFile);
! 172: if (LogBuff != NULL)
! 173: HT_FREE(LogBuff);
! 174: #endif /* !USE_SYSLOG */
! 175: }
! 176:
! 177: PUBLIC int HTWatch_logData (char * data, size_t len, const char * fmt, ...)
! 178: {
! 179: char buff[8200];
! 180: va_list pArgs;
! 181: char * tptr;
! 182: time_t now;
! 183: int ret;
! 184: va_start(pArgs, fmt);
! 185: ret = vsprintf(buff, fmt, pArgs);
! 186: #ifdef USE_SYSLOG
! 187: syslog(LOG_DEBUG, "%s\n", buff);
! 188: if (len > 8192)
! 189: len = 8192;
! 190: strncpy(buff, data, len);
! 191: buff[len] = 0;
! 192: syslog(LOG_DEBUG, "%s\n", buff);
! 193: #else /* USE_SYSLOG */
! 194: time(&now);
! 195: tptr = ctime(&now);
! 196: HTWatch_logAdd(tptr, strlen(tptr));
! 197: HTWatch_logAdd(buff, ret);
! 198: HTWatch_logAdd("\n", 1);
! 199: HTWatch_logAdd(data, len);
! 200: #endif /* !USE_SYSLOG */
! 201: return ret;
! 202: }
! 203:
1.13 eric 204: /* Standard (non-error) Output
205: ** ---------------------------
206: */
207: PUBLIC int OutputData(const char * fmt, ...)
208: {
209: int ret;
210: va_list pArgs;
211: va_start(pArgs, fmt);
212: ret = vfprintf(stdout, fmt, pArgs);
213: va_end(pArgs);
214: return ret;
215: }
216:
217: /* ------------------------------------------------------------------------- */
218:
1.2 frystyk 219: /* Create a "HyperDoc" object
220: ** --------------------------
221: ** A HyperDoc object contains information about whether we have already
222: ** started checking the anchor and the depth in our search
223: */
224: PRIVATE HyperDoc * HyperDoc_new (Robot * mr,HTParentAnchor * anchor, int depth)
225: {
226: HyperDoc * hd;
1.14 frystyk 227: if ((hd = (HyperDoc *) HT_CALLOC(1, sizeof(HyperDoc))) == NULL)
228: HT_OUTOFMEM("HyperDoc_new");
1.2 frystyk 229: hd->state = L_INVALID;
230: hd->depth = depth;
231:
232: /* Bind the HyperDoc object together with the Anchor Object */
233: hd->anchor = anchor;
234: HTAnchor_setDocument(anchor, (void *) hd);
235:
236: /* Add this HyperDoc object to our list */
237: if (!mr->hyperdoc) mr->hyperdoc = HTList_new();
238: HTList_addObject(mr->hyperdoc, (void *) hd);
239: return hd;
240: }
241:
242: /* Delete a "HyperDoc" object
243: ** --------------------------
244: */
245: PRIVATE BOOL HyperDoc_delete (HyperDoc * hd)
246: {
247: if (hd) {
1.11 frystyk 248: HT_FREE (hd);
1.2 frystyk 249: return YES;
250: }
251: return NO;
252: }
253:
1.1 frystyk 254: /* Create a Command Line Object
255: ** ----------------------------
256: */
257: PRIVATE Robot * Robot_new (void)
258: {
259: Robot * me;
1.14 frystyk 260: if ((me = (Robot *) HT_CALLOC(1, sizeof(Robot))) == NULL ||
261: (me->tv = (struct timeval*) HT_CALLOC(1, sizeof(struct timeval))) == NULL)
262: HT_OUTOFMEM("Robot_new");
1.2 frystyk 263: me->hyperdoc = HTList_new();
1.4 frystyk 264: me->htext = HTList_new();
1.1 frystyk 265: me->tv->tv_sec = DEFAULT_TIMEOUT;
1.25 frystyk 266: me->cwd = HTGetCurrentDirectoryURL();
1.1 frystyk 267: me->output = OUTPUT;
1.35 eric 268: me->cnt = 0;
1.34 eric 269: me->fingers = HTList_new();
1.1 frystyk 270:
1.7 frystyk 271: /* We keep an extra timeout request object for the timeout_handler */
272: me->timeout = HTRequest_new();
273: HTRequest_setContext (me->timeout, me);
274:
1.1 frystyk 275: return me;
276: }
277:
278: /* Delete a Command Line Object
279: ** ----------------------------
280: */
281: PRIVATE BOOL Robot_delete (Robot * me)
282: {
283: if (me) {
1.34 eric 284: HTList_delete(me->fingers);
1.2 frystyk 285: if (me->hyperdoc) {
286: HTList * cur = me->hyperdoc;
287: HyperDoc * pres;
288: while ((pres = (HyperDoc *) HTList_nextObject(cur)))
289: HyperDoc_delete(pres);
290: HTList_delete(me->hyperdoc);
291: }
1.4 frystyk 292: if (me->htext) {
293: HTList * cur = me->htext;
294: HText * pres;
295: while ((pres = (HText *) HTList_nextObject(cur)))
296: HText_free(pres);
297: HTList_delete(me->htext);
298: }
1.1 frystyk 299: if (me->logfile) HTLog_close();
300: if (me->output && me->output != STDOUT) fclose(me->output);
1.12 frystyk 301: if (me->flags & MR_TIME) {
302: time_t local = time(NULL);
1.13 eric 303: HTTrace("Robot terminated %s\n",HTDateTimeStr(&local,YES));
1.12 frystyk 304: }
1.11 frystyk 305: HT_FREE(me->cwd);
306: HT_FREE(me->tv);
307: HT_FREE(me);
1.1 frystyk 308: return YES;
309: }
310: return NO;
311: }
312:
1.2 frystyk 313: /*
1.34 eric 314: ** This function creates a new finger object and initializes it with a new request
1.2 frystyk 315: */
1.34 eric 316: PRIVATE Finger * Finger_new (Robot * robot, HTParentAnchor * dest, HTMethod method)
1.2 frystyk 317: {
1.34 eric 318: Finger * me;
319: HTRequest * request = HTRequest_new();
320: if ((me = (Finger *) HT_CALLOC(1, sizeof(Finger))) == NULL)
321: HT_OUTOFMEM("Finger_new");
322: me->robot = robot;
323: me->request = request;
324: me->dest = dest;
325: HTList_addObject(robot->fingers, (void *)me);
326:
327: HTRequest_setContext (request, me);
328: if (robot->flags & MR_PREEMPTIVE) HTRequest_setPreemptive(request, YES);
329: HTRequest_addRqHd(request, HT_C_HOST);
330: HTRequest_setMethod(request, method);
331: robot->cnt++;
332: return me;
1.2 frystyk 333: }
334:
1.34 eric 335: PRIVATE int Finger_delete (Finger * me)
1.2 frystyk 336: {
1.34 eric 337: HTList_removeObject(me->robot->fingers, (void *)me);
338: me->robot->cnt--;
339: if (me->request)
340: HTRequest_delete(me->request);
341: HT_FREE(me);
342: return YES;
1.2 frystyk 343: }
344:
345: /*
346: ** Cleanup and make sure we close all connections including the persistent
347: ** ones
348: */
1.1 frystyk 349: PRIVATE void Cleanup (Robot * me, int status)
350: {
351: Robot_delete(me);
1.29 eric 352: HTProfile_delete();
1.36 ! eric 353: HTWatch_logClose();
1.1 frystyk 354: #ifdef VMS
355: exit(status ? status : 1);
356: #else
357: exit(status ? status : 0);
358: #endif
359: }
360:
361: #ifdef CATCH_SIG
362: #include <signal.h>
363: /* SetSignal
364: ** This function sets up signal handlers. This might not be necessary to
365: ** call if the application has its own handlers (lossage on SVR4)
366: */
367: PRIVATE void SetSignal (void)
368: {
369: /* On some systems (SYSV) it is necessary to catch the SIGPIPE signal
370: ** when attemting to connect to a remote host where you normally should
371: ** get `connection refused' back
372: */
373: if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
1.13 eric 374: if (PROT_TRACE) HTTrace("HTSignal.... Can't catch SIGPIPE\n");
1.1 frystyk 375: } else {
1.13 eric 376: if (PROT_TRACE) HTTrace("HTSignal.... Ignoring SIGPIPE\n");
1.1 frystyk 377: }
378: }
379: #endif /* CATCH_SIG */
380:
381: PRIVATE void VersionInfo (void)
382: {
1.13 eric 383: OutputData("\n\nW3C Reference Software\n\n");
384: OutputData("\tW3C Mini Robot (%s) version %s.\n",
1.1 frystyk 385: APP_NAME, APP_VERSION);
1.13 eric 386: OutputData("\tW3C Reference Library version %s.\n\n",HTLib_version());
387: OutputData("Please send feedback to <libwww@w3.org>\n");
1.1 frystyk 388: }
389:
390: /* terminate_handler
391: ** -----------------
1.2 frystyk 392: ** This function is registered to handle the result of the request.
393: ** If no more requests are pending then terminate program
1.1 frystyk 394: */
1.32 frystyk 395: PRIVATE int terminate_handler (HTRequest * request, HTResponse * response,
396: void * param, int status)
1.1 frystyk 397: {
1.35 eric 398: /* int count = HTNet_count(); */
1.34 eric 399: Finger * finger = (Finger *) HTRequest_context(request);
400: Robot * robot = finger->robot;
401: if (SHOW_MSG) HTTrace("Robot....... done with %s\n", HTAnchor_physical(finger->dest));
402: Finger_delete(finger);
1.35 eric 403: switch (robot->cnt) {
1.34 eric 404: case 0:
405: if (SHOW_MSG) HTTrace(" Everything is finished...\n");
406: Cleanup(robot, 0);
407: case 1:
408: HTRequest_forceFlush(request);
409: default:
1.35 eric 410: if (SHOW_MSG) HTTrace(" %d outstanding request%s\n", robot->cnt, robot->cnt == 1 ? "" : "s");
1.30 frystyk 411: }
1.1 frystyk 412: return HT_OK;
413: }
414:
415: /* timeout_handler
416: ** ---------------
417: ** This function is registered to handle timeout in select eventloop
1.7 frystyk 418: **
419: ** BUG: This doesn't work as we don't get the right request object
420: ** back from the event loop
1.1 frystyk 421: */
422: PRIVATE int timeout_handler (HTRequest * request)
423: {
1.27 frystyk 424: #if 0
1.34 eric 425: Finger * finger = (Finger *) HTRequest_context(request);
1.27 frystyk 426: #endif
1.25 frystyk 427: if (SHOW_MSG) HTTrace("Robot....... We don't know how to handle timeout...\n");
1.7 frystyk 428: #if 0
1.1 frystyk 429: HTRequest_kill(request);
1.34 eric 430: Finger_delete(finger);
1.7 frystyk 431: #endif
1.4 frystyk 432: return HT_OK;
1.1 frystyk 433: }
434:
435: /* ------------------------------------------------------------------------- */
436: /* HTEXT INTERFACE */
437: /* ------------------------------------------------------------------------- */
438:
439: PUBLIC HText * HText_new2 (HTRequest * request, HTParentAnchor * anchor,
440: HTStream * stream)
441: {
442: HText * me;
1.34 eric 443: Finger * finger = (Finger *) HTRequest_context(request);
444: Robot * mr = finger->robot;
1.14 frystyk 445: if ((me = (HText *) HT_CALLOC(1, sizeof(HText))) == NULL)
446: HT_OUTOFMEM("HText_new2");
1.4 frystyk 447:
448: /* Bind the HText object together with the Request Object */
1.1 frystyk 449: me->request = request;
1.4 frystyk 450:
451: /* Add this HyperDoc object to our list */
452: if (!mr->htext) mr->htext = HTList_new();
453: HTList_addObject(mr->htext, (void *) me);
1.1 frystyk 454: return me;
455: }
456:
1.4 frystyk 457: PUBLIC void HText_free (HText * me) {
1.11 frystyk 458: if (me) HT_FREE (me);
1.4 frystyk 459: }
460:
1.1 frystyk 461: PUBLIC void HText_beginAnchor (HText * text, HTChildAnchor * anchor)
462: {
463: if (text && anchor) {
1.34 eric 464: Finger * finger = (Finger *) HTRequest_context(text->request);
465: Robot * mr = finger->robot;
1.1 frystyk 466: HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor);
467: HTParentAnchor * dest_parent = HTAnchor_parent(dest);
1.7 frystyk 468: char * uri = HTAnchor_address((HTAnchor *) dest_parent);
1.1 frystyk 469: HyperDoc * hd = HTAnchor_document(dest_parent);
470:
1.13 eric 471: if (SHOW_MSG) HTTrace("Robot....... Found `%s\' - ", uri ? uri : "NULL");
1.7 frystyk 472:
1.2 frystyk 473: /* Test whether we already have a hyperdoc for this document */
474: if (mr->flags & MR_LINK && dest_parent && !hd) {
1.1 frystyk 475: HTParentAnchor * parent = HTRequest_parent(text->request);
476: HyperDoc * last = HTAnchor_document(parent);
477: int depth = last ? last->depth+1 : 0;
1.34 eric 478: Finger * newfinger = Finger_new(mr, dest_parent, METHOD_GET);
479: HTRequest * newreq = newfinger->request;
1.2 frystyk 480: HyperDoc_new(mr, dest_parent, depth);
1.7 frystyk 481: HTRequest_setParent(newreq, HTRequest_anchor(text->request));
482: if (depth >= mr->depth) {
483: if (SHOW_MSG)
1.13 eric 484: HTTrace("loading at depth %d using HEAD\n", depth);
1.7 frystyk 485: HTRequest_setMethod(newreq, METHOD_HEAD);
1.30 frystyk 486: HTRequest_setOutputFormat(newreq, WWW_DEBUG);
1.7 frystyk 487: } else {
1.13 eric 488: if (SHOW_MSG) HTTrace("loading at depth %d\n", depth);
1.2 frystyk 489: }
490: if (HTLoadAnchor((HTAnchor *) dest_parent, newreq) != YES) {
1.13 eric 491: if (SHOW_MSG) HTTrace("not tested!\n");
1.34 eric 492: Finger_delete(newfinger);
1.2 frystyk 493: }
1.7 frystyk 494: } else {
1.18 frystyk 495: if (SHOW_MSG) HTTrace("duplicate or max depth reached\n");
1.2 frystyk 496: }
1.11 frystyk 497: HT_FREE(uri);
1.2 frystyk 498: }
499: }
500:
501: PUBLIC void HText_appendImage (HText * text, HTChildAnchor * anchor,
1.14 frystyk 502: const char *alt, const char * align, BOOL isMap)
1.2 frystyk 503: {
504: if (text && anchor) {
1.34 eric 505: Finger * finger = (Finger *) HTRequest_context(text->request);
506: Robot * mr = finger->robot;
1.2 frystyk 507: HTParentAnchor * dest = (HTParentAnchor *)
508: HTAnchor_followMainLink((HTAnchor *) anchor);
509: HyperDoc * hd = HTAnchor_document(dest);
1.1 frystyk 510:
1.2 frystyk 511: /* Test whether we already have a hyperdoc for this document */
512: if (mr->flags & MR_IMG && dest && !hd) {
513: HTParentAnchor * parent = HTRequest_parent(text->request);
514: HyperDoc * last = HTAnchor_document(parent);
515: int depth = last ? last->depth+1 : 0;
1.34 eric 516: Finger * newfinger = Finger_new(mr, dest, METHOD_HEAD);
517: HTRequest * newreq = newfinger->request;
1.2 frystyk 518: HyperDoc_new(mr, dest, depth);
519: if (SHOW_MSG) {
520: char * uri = HTAnchor_address((HTAnchor *) dest);
1.13 eric 521: HTTrace("Robot....... Checking Image `%s\'\n", uri);
1.11 frystyk 522: HT_FREE(uri);
1.2 frystyk 523: }
524: if (HTLoadAnchor((HTAnchor *) dest, newreq) != YES) {
525: if (SHOW_MSG)
1.13 eric 526: HTTrace("Robot....... Image not tested!\n");
1.34 eric 527: Finger_delete(newfinger);
1.1 frystyk 528: }
529: }
530: }
531: }
532:
533: PUBLIC void HText_endAnchor (HText * text) {}
1.14 frystyk 534: PUBLIC void HText_appendText (HText * text, const char * str) {}
1.1 frystyk 535: PUBLIC void HText_appendCharacter (HText * text, char ch) {}
536: PUBLIC void HText_endAppend (HText * text) {}
537: PUBLIC void HText_setStyle (HText * text, HTStyle * style) {}
538: PUBLIC void HText_beginAppend (HText * text) {}
539: PUBLIC void HText_appendParagraph (HText * text) {}
540:
541: /* ------------------------------------------------------------------------- */
542: /* MAIN PROGRAM */
543: /* ------------------------------------------------------------------------- */
544:
545: int main (int argc, char ** argv)
546: {
547: int status = 0;
548: int arg;
549: HTChunk * keywords = NULL; /* From command line */
550: int keycnt = 0;
1.12 frystyk 551: Robot * mr = NULL;
1.34 eric 552: Finger * finger;
553: HTParentAnchor * startAnchor;
1.1 frystyk 554:
555: /* Starts Mac GUSI socket library */
556: #ifdef GUSI
557: GUSISetup(GUSIwithSIOUXSockets);
558: GUSISetup(GUSIwithInternetSockets);
559: #endif
560:
561: #ifdef __MWERKS__ /* STR */
562: InitGraf((Ptr) &qd.thePort);
563: InitFonts();
564: InitWindows();
565: InitMenus(); TEInit();
566: InitDialogs(nil);
567: InitCursor();
568: SIOUXSettings.asktosaveonclose = false;
569: argc=ccommand(&argv);
570: #endif
571:
1.36 ! eric 572: HTWatch_logOpen("HTRobot", LOG_NDELAY, LOG_USER);
1.27 frystyk 573: /* Initiate W3C Reference Library with a robot profile */
574: HTProfile_newRobot(APP_NAME, APP_VERSION);
575:
576: /* Add the default HTML parser to the set of converters */
577: {
578: HTList * converters = HTFormat_conversion();
579: HTMLInit(converters);
580: }
1.1 frystyk 581:
1.12 frystyk 582: /* Build a new robot object */
583: mr = Robot_new();
584:
1.1 frystyk 585: /* Scan command Line for parameters */
586: for (arg=1; arg<argc; arg++) {
587: if (*argv[arg] == '-') {
588:
589: /* non-interactive */
1.17 frystyk 590: if (!strcmp(argv[arg], "-n")) {
1.1 frystyk 591: HTAlert_setInteractive(NO);
592:
593: /* log file */
594: } else if (!strcmp(argv[arg], "-l")) {
595: mr->logfile = (arg+1 < argc && *argv[arg+1] != '-') ?
596: argv[++arg] : DEFAULT_LOG_FILE;
597:
598: /* rule file */
599: } else if (!strcmp(argv[arg], "-r")) {
600: mr->rules = (arg+1 < argc && *argv[arg+1] != '-') ?
601: argv[++arg] : DEFAULT_RULE_FILE;
602:
603: /* output filename */
604: } else if (!strcmp(argv[arg], "-o")) {
605: mr->outputfile = (arg+1 < argc && *argv[arg+1] != '-') ?
606: argv[++arg] : DEFAULT_OUTPUT_FILE;
607:
608: /* timeout -- Change the default request timeout */
609: } else if (!strcmp(argv[arg], "-timeout")) {
610: int timeout = (arg+1 < argc && *argv[arg+1] != '-') ?
611: atoi(argv[++arg]) : DEFAULT_TIMEOUT;
612: if (timeout > 0) mr->tv->tv_sec = timeout;
613:
1.7 frystyk 614: /* preemptive or non-preemptive access */
1.1 frystyk 615: } else if (!strcmp(argv[arg], "-single")) {
1.7 frystyk 616: mr->flags |= MR_PREEMPTIVE;
1.2 frystyk 617:
618: /* test inlined images */
619: } else if (!strcmp(argv[arg], "-img")) {
620: mr->flags |= MR_IMG;
621:
622: /* load anchors */
623: } else if (!strcmp(argv[arg], "-link")) {
624: mr->flags |= MR_LINK;
1.7 frystyk 625: mr->depth = (arg+1 < argc && *argv[arg+1] != '-') ?
626: atoi(argv[++arg]) : DEFAULT_DEPTH;
1.2 frystyk 627:
1.12 frystyk 628: /* Output start and end time */
629: } else if (!strcmp(argv[arg], "-ss")) {
630: time_t local = time(NULL);
1.13 eric 631: HTTrace("Robot started on %s\n",
1.12 frystyk 632: HTDateTimeStr(&local, YES));
633: mr->flags |= MR_TIME;
634:
1.1 frystyk 635: /* print version and exit */
636: } else if (!strcmp(argv[arg], "-version")) {
637: VersionInfo();
638: Cleanup(mr, 0);
639:
640: #ifdef WWWTRACE
641: /* trace flags */
642: } else if (!strncmp(argv[arg], "-v", 2)) {
1.24 frystyk 643: HTSetTraceMessageMask(argv[arg]+2);
1.1 frystyk 644: #endif
645:
646: } else {
1.13 eric 647: if (SHOW_MSG) HTTrace("Bad Argument (%s)\n", argv[arg]);
1.1 frystyk 648: }
1.17 frystyk 649: } else { /* If no leading `-' then check for URL or keywords */
1.1 frystyk 650: if (!keycnt) {
651: char * ref = HTParse(argv[arg], mr->cwd, PARSE_ALL);
1.34 eric 652: startAnchor = (HTParentAnchor *) HTAnchor_findAddress(ref);
653: HyperDoc_new(mr, startAnchor, 0);
1.1 frystyk 654: keycnt = 1;
1.11 frystyk 655: HT_FREE(ref);
1.1 frystyk 656: } else { /* Check for successive keyword arguments */
657: char *escaped = HTEscape(argv[arg], URL_XALPHAS);
658: if (keycnt++ <= 1)
1.5 frystyk 659: keywords = HTChunk_new(128);
1.1 frystyk 660: else
1.5 frystyk 661: HTChunk_putc(keywords, ' ');
662: HTChunk_puts(keywords, HTStrip(escaped));
1.11 frystyk 663: HT_FREE(escaped);
1.1 frystyk 664: }
665: }
666: }
667:
668: #ifdef CATCH_SIG
669: SetSignal();
670: #endif
671:
672: if (!keycnt) {
1.13 eric 673: if (SHOW_MSG) HTTrace("Please specify URL to check.\n");
1.1 frystyk 674: Cleanup(mr, -1);
675: }
676:
1.23 manoli 677: /* Testing that HTTrace is working */
678: HTTrace ("Welcome to the W3C mini Robot\n");
679:
1.1 frystyk 680: /* Rule file specified? */
681: if (mr->rules) {
682: char * rules = HTParse(mr->rules, mr->cwd, PARSE_ALL);
1.27 frystyk 683: if (!HTLoadRules(rules))
1.13 eric 684: if (SHOW_MSG) HTTrace("Can't access rules\n");
1.11 frystyk 685: HT_FREE(rules);
1.1 frystyk 686: }
687:
688: /* Output file specified? */
689: if (mr->outputfile) {
690: if ((mr->output = fopen(mr->outputfile, "wb")) == NULL) {
1.13 eric 691: if (SHOW_MSG) HTTrace("Can't open `%s'\n", mr->outputfile);
1.1 frystyk 692: mr->output = OUTPUT;
693: }
694: }
695:
696: /* Log file specifed? */
697: if (mr->logfile) HTLog_open(mr->logfile, YES, YES);
698:
1.27 frystyk 699: /* Register our own someterminater filter */
1.32 frystyk 700: HTNet_addAfter(terminate_handler, NULL, NULL, HT_ALL, HT_FILTER_LAST);
1.34 eric 701: #if 0
1.1 frystyk 702: /* Set timeout on sockets */
1.33 eric 703: HTEventList_registerTimeout(mr->tv, mr->timeout, timeout_handler, NO);
1.34 eric 704: #endif
705: /* Start the request */
706: finger = Finger_new(mr, startAnchor, METHOD_GET);
707: if (mr->flags & MR_PREEMPTIVE)
708: HTRequest_setPreemptive(finger->request, YES);
1.1 frystyk 709:
710: if (keywords) /* Search */
1.34 eric 711: status = HTSearchAnchor(keywords, (HTAnchor *)startAnchor, finger->request);
1.1 frystyk 712: else
1.34 eric 713: status = HTLoadAnchor((HTAnchor *)startAnchor, finger->request);
1.1 frystyk 714:
1.5 frystyk 715: if (keywords) HTChunk_delete(keywords);
1.1 frystyk 716: if (status != YES) {
1.13 eric 717: if (SHOW_MSG) HTTrace("Can't access resource\n");
1.1 frystyk 718: Cleanup(mr, -1);
719: }
720:
721: /* Go into the event loop... */
1.34 eric 722: HTEventList_loop(finger->request);
1.1 frystyk 723:
724: /* Only gets here if event loop fails */
725: Cleanup(mr, 0);
726: return 0;
727: }
Webmaster