Annotation of libwww/Robot/src/HTRobot.c, revision 1.37
1.1 frystyk 1: /* HTRobot.c
2: ** W3C MINI ROBOT
3: **
4: ** (c) COPRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
6: **
7: ** This program illustrates how to travers links using the Anchor object
8: **
9: ** Authors:
10: ** HFN Henrik Frystyk Nielsen, (frystyk@w3.org)
11: **
12: ** History:
13: ** Dec 04 95 First version
14: */
15:
16: #include "WWWLib.h" /* Global Library Include file */
17: #include "WWWApp.h" /* Application stuff */
1.17 frystyk 18: #include "WWWTrans.h"
1.10 frystyk 19: #include "WWWInit.h"
1.9 frystyk 20:
1.4 frystyk 21: #include "HText.h"
1.1 frystyk 22:
23: #include "HTRobot.h" /* Implemented here */
1.33 eric 24: #include "HTWatch.h"
1.1 frystyk 25:
1.14 frystyk 26: #ifndef W3C_VERSION
1.33 eric 27: #define W3C_VERSION "Unspecified"
1.1 frystyk 28: #endif
29:
30: #define APP_NAME "W3CRobot"
1.14 frystyk 31: #define APP_VERSION W3C_VERSION
1.1 frystyk 32:
33: #define DEFAULT_OUTPUT_FILE "robot.out"
34: #define DEFAULT_RULE_FILE "robot.conf"
35: #define DEFAULT_LOG_FILE "robot.log"
1.7 frystyk 36: #define DEFAULT_DEPTH 0
1.1 frystyk 37:
38: #define SHOW_MSG (WWWTRACE || HTAlert_interactive())
39:
1.7 frystyk 40: #define DEFAULT_TIMEOUT 10 /* timeout in seconds */
1.1 frystyk 41:
42: #if defined(__svr4__)
43: #define CATCH_SIG
44: #endif
45:
46: typedef enum _MRFlags {
1.2 frystyk 47: MR_IMG = 0x1,
48: MR_LINK = 0x2,
1.12 frystyk 49: MR_PREEMPTIVE= 0x4,
50: MR_TIME = 0x8
1.1 frystyk 51: } MRFlags;
52:
53: typedef struct _Robot {
1.2 frystyk 54: int depth; /* How deep is our tree */
1.30 frystyk 55: int cnt; /* Count of requests */
1.2 frystyk 56: HTList * hyperdoc; /* List of our HyperDoc Objects */
1.4 frystyk 57: HTList * htext; /* List of our HText Objects */
1.34 eric 58: HTList * fingers;
1.1 frystyk 59: struct timeval * tv; /* Timeout on socket */
60: char * cwd; /* Current dir URL */
61: char * rules;
62: char * logfile;
63: char * outputfile;
64: FILE * output;
65: MRFlags flags;
66: } Robot;
1.34 eric 67:
68: typedef struct _Finger {
69: Robot * robot;
70: HTRequest * request;
71: HTParentAnchor * dest;
72: } Finger;
73:
1.1 frystyk 74: typedef enum _LoadState {
75: L_INVALID = -2,
76: L_LOADING = -1,
77: L_SUCCESS = 0,
78: L_ERROR
79: } LoadState;
80:
81: /*
82: ** The HyperDoc object is bound to the anchor and contains information about
83: ** where we are in the search for recursive searches
84: */
85: typedef struct _HyperDoc {
86: HTParentAnchor * anchor;
87: LoadState state;
88: int depth;
89: } HyperDoc;
90:
91: /*
92: ** This is the HText object that is created every time we start parsing a
93: ** HTML object
94: */
1.4 frystyk 95: struct _HText {
1.1 frystyk 96: HTRequest * request;
1.4 frystyk 97: };
1.1 frystyk 98:
99: PUBLIC HText * HTMainText = NULL;
100: PUBLIC HTParentAnchor * HTMainAnchor = NULL;
101: PUBLIC HTStyleSheet * styleSheet = NULL;
102:
103: /* ------------------------------------------------------------------------- */
104:
1.33 eric 105: PUBLIC int HTWatch(int id, void * obj, const char * fmt, ...)
106: {
107: va_list pArgs;
108: va_start(pArgs, fmt);
109: fprintf(stderr, "id: %x obj: %p: ", id, obj);
110: return vfprintf(stderr, fmt, pArgs);
111: }
112:
1.36 eric 113: #define LOG_BUFF_SIZE 65536
114: int LogFile = 2;
115: char * LogBuff = NULL;
116: size_t LogLen = 0;
117:
118: PUBLIC int HTWatch_logOpen (char *ident, int option, int facility)
119: {
120: #ifdef USE_SYSLOG
121: openlog(ident, option, facility);
122: #else /* USE_SYSLOG */
123: #if 0
124: if ((LogFile = open("HTRobot.log", O_CREAT|O_TRUNC)) == -1)
125: return HT_ERROR;
126: close(LogFile);
127: #endif
128: if ((LogBuff = (char *) HT_MALLOC(LOG_BUFF_SIZE)) == NULL)
129: HT_OUTOFMEM("HTWatch_logOpen");
130: LogLen = 0;
131: #endif /* !USE_SYSLOG */
132: return HT_OK;
133: }
134:
135: PRIVATE int HTWatch_logFlush(void)
136: {
137: if ((LogFile = open("HTRobot.log", O_APPEND)) == -1)
138: return HT_ERROR;
139: write(LogFile, LogBuff, LogLen);
140: LogLen = 0;
141: close(LogFile);
142: return HT_OK;
143: }
144:
145: PRIVATE int HTWatch_logAdd(char * buf, size_t len)
146: {
147: /*
148: ** Dump everything that won't fit in buffer
149: */
150: while (len + LogLen > LOG_BUFF_SIZE) {
151: size_t toWrite = LOG_BUFF_SIZE-LogLen;
152: memcpy(LogBuff+LogLen, buf, toWrite);
153: HTWatch_logFlush();
154: buf += toWrite;
155: len -= toWrite;
156: }
157: memcpy(LogBuff+LogLen, buf, len);
158: LogLen += len;
159: return HT_OK;
160: }
161:
162: PUBLIC void HTWatch_logClose (void)
163: {
164: #ifdef USE_SYSLOG
165: closelog();
166: #else /* USE_SYSLOG */
167: if (LogLen)
168: HTWatch_logFlush();
169: if (LogFile > 2)
170: close(LogFile);
171: if (LogBuff != NULL)
172: HT_FREE(LogBuff);
173: #endif /* !USE_SYSLOG */
174: }
175:
176: PUBLIC int HTWatch_logData (char * data, size_t len, const char * fmt, ...)
177: {
178: char buff[8200];
179: va_list pArgs;
180: char * tptr;
181: time_t now;
182: int ret;
183: va_start(pArgs, fmt);
184: ret = vsprintf(buff, fmt, pArgs);
185: #ifdef USE_SYSLOG
186: syslog(LOG_DEBUG, "%s\n", buff);
187: if (len > 8192)
188: len = 8192;
189: strncpy(buff, data, len);
190: buff[len] = 0;
191: syslog(LOG_DEBUG, "%s\n", buff);
192: #else /* USE_SYSLOG */
193: time(&now);
194: tptr = ctime(&now);
195: HTWatch_logAdd(tptr, strlen(tptr));
196: HTWatch_logAdd(buff, ret);
197: HTWatch_logAdd("\n", 1);
198: HTWatch_logAdd(data, len);
199: #endif /* !USE_SYSLOG */
200: return ret;
201: }
202:
1.13 eric 203: /* Standard (non-error) Output
204: ** ---------------------------
205: */
206: PUBLIC int OutputData(const char * fmt, ...)
207: {
208: int ret;
209: va_list pArgs;
210: va_start(pArgs, fmt);
211: ret = vfprintf(stdout, fmt, pArgs);
212: va_end(pArgs);
213: return ret;
214: }
215:
216: /* ------------------------------------------------------------------------- */
217:
1.2 frystyk 218: /* Create a "HyperDoc" object
219: ** --------------------------
220: ** A HyperDoc object contains information about whether we have already
221: ** started checking the anchor and the depth in our search
222: */
223: PRIVATE HyperDoc * HyperDoc_new (Robot * mr,HTParentAnchor * anchor, int depth)
224: {
225: HyperDoc * hd;
1.14 frystyk 226: if ((hd = (HyperDoc *) HT_CALLOC(1, sizeof(HyperDoc))) == NULL)
227: HT_OUTOFMEM("HyperDoc_new");
1.2 frystyk 228: hd->state = L_INVALID;
229: hd->depth = depth;
230:
231: /* Bind the HyperDoc object together with the Anchor Object */
232: hd->anchor = anchor;
233: HTAnchor_setDocument(anchor, (void *) hd);
234:
235: /* Add this HyperDoc object to our list */
236: if (!mr->hyperdoc) mr->hyperdoc = HTList_new();
237: HTList_addObject(mr->hyperdoc, (void *) hd);
238: return hd;
239: }
240:
241: /* Delete a "HyperDoc" object
242: ** --------------------------
243: */
244: PRIVATE BOOL HyperDoc_delete (HyperDoc * hd)
245: {
246: if (hd) {
1.11 frystyk 247: HT_FREE (hd);
1.2 frystyk 248: return YES;
249: }
250: return NO;
251: }
252:
1.1 frystyk 253: /* Create a Command Line Object
254: ** ----------------------------
255: */
256: PRIVATE Robot * Robot_new (void)
257: {
258: Robot * me;
1.14 frystyk 259: if ((me = (Robot *) HT_CALLOC(1, sizeof(Robot))) == NULL ||
260: (me->tv = (struct timeval*) HT_CALLOC(1, sizeof(struct timeval))) == NULL)
261: HT_OUTOFMEM("Robot_new");
1.2 frystyk 262: me->hyperdoc = HTList_new();
1.4 frystyk 263: me->htext = HTList_new();
1.1 frystyk 264: me->tv->tv_sec = DEFAULT_TIMEOUT;
1.25 frystyk 265: me->cwd = HTGetCurrentDirectoryURL();
1.1 frystyk 266: me->output = OUTPUT;
1.35 eric 267: me->cnt = 0;
1.34 eric 268: me->fingers = HTList_new();
1.1 frystyk 269: return me;
270: }
271:
272: /* Delete a Command Line Object
273: ** ----------------------------
274: */
275: PRIVATE BOOL Robot_delete (Robot * me)
276: {
277: if (me) {
1.34 eric 278: HTList_delete(me->fingers);
1.2 frystyk 279: if (me->hyperdoc) {
280: HTList * cur = me->hyperdoc;
281: HyperDoc * pres;
282: while ((pres = (HyperDoc *) HTList_nextObject(cur)))
283: HyperDoc_delete(pres);
284: HTList_delete(me->hyperdoc);
285: }
1.4 frystyk 286: if (me->htext) {
287: HTList * cur = me->htext;
288: HText * pres;
289: while ((pres = (HText *) HTList_nextObject(cur)))
290: HText_free(pres);
291: HTList_delete(me->htext);
292: }
1.1 frystyk 293: if (me->logfile) HTLog_close();
294: if (me->output && me->output != STDOUT) fclose(me->output);
1.12 frystyk 295: if (me->flags & MR_TIME) {
296: time_t local = time(NULL);
1.13 eric 297: HTTrace("Robot terminated %s\n",HTDateTimeStr(&local,YES));
1.12 frystyk 298: }
1.11 frystyk 299: HT_FREE(me->cwd);
300: HT_FREE(me->tv);
301: HT_FREE(me);
1.1 frystyk 302: return YES;
303: }
304: return NO;
305: }
306:
1.2 frystyk 307: /*
1.34 eric 308: ** This function creates a new finger object and initializes it with a new request
1.2 frystyk 309: */
1.34 eric 310: PRIVATE Finger * Finger_new (Robot * robot, HTParentAnchor * dest, HTMethod method)
1.2 frystyk 311: {
1.34 eric 312: Finger * me;
313: HTRequest * request = HTRequest_new();
314: if ((me = (Finger *) HT_CALLOC(1, sizeof(Finger))) == NULL)
315: HT_OUTOFMEM("Finger_new");
316: me->robot = robot;
317: me->request = request;
318: me->dest = dest;
319: HTList_addObject(robot->fingers, (void *)me);
320:
321: HTRequest_setContext (request, me);
322: if (robot->flags & MR_PREEMPTIVE) HTRequest_setPreemptive(request, YES);
323: HTRequest_addRqHd(request, HT_C_HOST);
324: HTRequest_setMethod(request, method);
325: robot->cnt++;
326: return me;
1.2 frystyk 327: }
328:
1.34 eric 329: PRIVATE int Finger_delete (Finger * me)
1.2 frystyk 330: {
1.34 eric 331: HTList_removeObject(me->robot->fingers, (void *)me);
332: me->robot->cnt--;
1.37 ! frystyk 333:
! 334: /*
! 335: ** If we are down at one request then flush the output buffer
! 336: */
! 337: if (me->request) {
! 338: if (me->robot->cnt == 1) HTRequest_forceFlush(me->request);
1.34 eric 339: HTRequest_delete(me->request);
1.37 ! frystyk 340: }
! 341:
! 342: /*
! 343: ** Delete the request and free myself
! 344: */
1.34 eric 345: HT_FREE(me);
346: return YES;
1.2 frystyk 347: }
348:
349: /*
350: ** Cleanup and make sure we close all connections including the persistent
351: ** ones
352: */
1.1 frystyk 353: PRIVATE void Cleanup (Robot * me, int status)
354: {
355: Robot_delete(me);
1.29 eric 356: HTProfile_delete();
1.36 eric 357: HTWatch_logClose();
1.1 frystyk 358: #ifdef VMS
359: exit(status ? status : 1);
360: #else
361: exit(status ? status : 0);
362: #endif
363: }
364:
365: #ifdef CATCH_SIG
366: #include <signal.h>
367: /* SetSignal
368: ** This function sets up signal handlers. This might not be necessary to
369: ** call if the application has its own handlers (lossage on SVR4)
370: */
371: PRIVATE void SetSignal (void)
372: {
373: /* On some systems (SYSV) it is necessary to catch the SIGPIPE signal
374: ** when attemting to connect to a remote host where you normally should
375: ** get `connection refused' back
376: */
377: if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
1.13 eric 378: if (PROT_TRACE) HTTrace("HTSignal.... Can't catch SIGPIPE\n");
1.1 frystyk 379: } else {
1.13 eric 380: if (PROT_TRACE) HTTrace("HTSignal.... Ignoring SIGPIPE\n");
1.1 frystyk 381: }
382: }
383: #endif /* CATCH_SIG */
384:
385: PRIVATE void VersionInfo (void)
386: {
1.13 eric 387: OutputData("\n\nW3C Reference Software\n\n");
388: OutputData("\tW3C Mini Robot (%s) version %s.\n",
1.1 frystyk 389: APP_NAME, APP_VERSION);
1.13 eric 390: OutputData("\tW3C Reference Library version %s.\n\n",HTLib_version());
391: OutputData("Please send feedback to <libwww@w3.org>\n");
1.1 frystyk 392: }
393:
394: /* terminate_handler
395: ** -----------------
1.2 frystyk 396: ** This function is registered to handle the result of the request.
397: ** If no more requests are pending then terminate program
1.1 frystyk 398: */
1.32 frystyk 399: PRIVATE int terminate_handler (HTRequest * request, HTResponse * response,
400: void * param, int status)
1.1 frystyk 401: {
1.34 eric 402: Finger * finger = (Finger *) HTRequest_context(request);
403: Robot * robot = finger->robot;
404: if (SHOW_MSG) HTTrace("Robot....... done with %s\n", HTAnchor_physical(finger->dest));
405: Finger_delete(finger);
1.37 ! frystyk 406: if (robot->cnt <= 0) {
1.34 eric 407: if (SHOW_MSG) HTTrace(" Everything is finished...\n");
1.37 ! frystyk 408: Cleanup(robot, 0); /* No way back from here */
1.30 frystyk 409: }
1.37 ! frystyk 410:
! 411: if (SHOW_MSG) HTTrace(" %d outstanding request%s\n", robot->cnt, robot->cnt == 1 ? "" : "s");
1.1 frystyk 412: return HT_OK;
413: }
414:
1.37 ! frystyk 415: #if 0
1.1 frystyk 416: /* timeout_handler
417: ** ---------------
418: ** This function is registered to handle timeout in select eventloop
1.7 frystyk 419: **
420: ** BUG: This doesn't work as we don't get the right request object
421: ** back from the event loop
1.1 frystyk 422: */
423: PRIVATE int timeout_handler (HTRequest * request)
424: {
1.27 frystyk 425: #if 0
1.34 eric 426: Finger * finger = (Finger *) HTRequest_context(request);
1.27 frystyk 427: #endif
1.25 frystyk 428: if (SHOW_MSG) HTTrace("Robot....... We don't know how to handle timeout...\n");
1.7 frystyk 429: #if 0
1.1 frystyk 430: HTRequest_kill(request);
1.34 eric 431: Finger_delete(finger);
1.7 frystyk 432: #endif
1.4 frystyk 433: return HT_OK;
1.1 frystyk 434: }
1.37 ! frystyk 435: #endif
1.1 frystyk 436:
437: /* ------------------------------------------------------------------------- */
438: /* HTEXT INTERFACE */
439: /* ------------------------------------------------------------------------- */
440:
441: PUBLIC HText * HText_new2 (HTRequest * request, HTParentAnchor * anchor,
442: HTStream * stream)
443: {
444: HText * me;
1.34 eric 445: Finger * finger = (Finger *) HTRequest_context(request);
446: Robot * mr = finger->robot;
1.14 frystyk 447: if ((me = (HText *) HT_CALLOC(1, sizeof(HText))) == NULL)
448: HT_OUTOFMEM("HText_new2");
1.4 frystyk 449:
450: /* Bind the HText object together with the Request Object */
1.1 frystyk 451: me->request = request;
1.4 frystyk 452:
453: /* Add this HyperDoc object to our list */
454: if (!mr->htext) mr->htext = HTList_new();
455: HTList_addObject(mr->htext, (void *) me);
1.1 frystyk 456: return me;
457: }
458:
1.4 frystyk 459: PUBLIC void HText_free (HText * me) {
1.11 frystyk 460: if (me) HT_FREE (me);
1.4 frystyk 461: }
462:
1.1 frystyk 463: PUBLIC void HText_beginAnchor (HText * text, HTChildAnchor * anchor)
464: {
465: if (text && anchor) {
1.34 eric 466: Finger * finger = (Finger *) HTRequest_context(text->request);
467: Robot * mr = finger->robot;
1.1 frystyk 468: HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor);
469: HTParentAnchor * dest_parent = HTAnchor_parent(dest);
1.7 frystyk 470: char * uri = HTAnchor_address((HTAnchor *) dest_parent);
1.1 frystyk 471: HyperDoc * hd = HTAnchor_document(dest_parent);
472:
1.13 eric 473: if (SHOW_MSG) HTTrace("Robot....... Found `%s\' - ", uri ? uri : "NULL");
1.7 frystyk 474:
1.2 frystyk 475: /* Test whether we already have a hyperdoc for this document */
476: if (mr->flags & MR_LINK && dest_parent && !hd) {
1.1 frystyk 477: HTParentAnchor * parent = HTRequest_parent(text->request);
478: HyperDoc * last = HTAnchor_document(parent);
479: int depth = last ? last->depth+1 : 0;
1.34 eric 480: Finger * newfinger = Finger_new(mr, dest_parent, METHOD_GET);
481: HTRequest * newreq = newfinger->request;
1.2 frystyk 482: HyperDoc_new(mr, dest_parent, depth);
1.7 frystyk 483: HTRequest_setParent(newreq, HTRequest_anchor(text->request));
484: if (depth >= mr->depth) {
485: if (SHOW_MSG)
1.13 eric 486: HTTrace("loading at depth %d using HEAD\n", depth);
1.7 frystyk 487: HTRequest_setMethod(newreq, METHOD_HEAD);
1.30 frystyk 488: HTRequest_setOutputFormat(newreq, WWW_DEBUG);
1.7 frystyk 489: } else {
1.13 eric 490: if (SHOW_MSG) HTTrace("loading at depth %d\n", depth);
1.2 frystyk 491: }
492: if (HTLoadAnchor((HTAnchor *) dest_parent, newreq) != YES) {
1.13 eric 493: if (SHOW_MSG) HTTrace("not tested!\n");
1.34 eric 494: Finger_delete(newfinger);
1.2 frystyk 495: }
1.7 frystyk 496: } else {
1.18 frystyk 497: if (SHOW_MSG) HTTrace("duplicate or max depth reached\n");
1.2 frystyk 498: }
1.11 frystyk 499: HT_FREE(uri);
1.2 frystyk 500: }
501: }
502:
503: PUBLIC void HText_appendImage (HText * text, HTChildAnchor * anchor,
1.14 frystyk 504: const char *alt, const char * align, BOOL isMap)
1.2 frystyk 505: {
506: if (text && anchor) {
1.34 eric 507: Finger * finger = (Finger *) HTRequest_context(text->request);
508: Robot * mr = finger->robot;
1.2 frystyk 509: HTParentAnchor * dest = (HTParentAnchor *)
510: HTAnchor_followMainLink((HTAnchor *) anchor);
511: HyperDoc * hd = HTAnchor_document(dest);
1.1 frystyk 512:
1.2 frystyk 513: /* Test whether we already have a hyperdoc for this document */
514: if (mr->flags & MR_IMG && dest && !hd) {
515: HTParentAnchor * parent = HTRequest_parent(text->request);
516: HyperDoc * last = HTAnchor_document(parent);
517: int depth = last ? last->depth+1 : 0;
1.34 eric 518: Finger * newfinger = Finger_new(mr, dest, METHOD_HEAD);
519: HTRequest * newreq = newfinger->request;
1.2 frystyk 520: HyperDoc_new(mr, dest, depth);
521: if (SHOW_MSG) {
522: char * uri = HTAnchor_address((HTAnchor *) dest);
1.13 eric 523: HTTrace("Robot....... Checking Image `%s\'\n", uri);
1.11 frystyk 524: HT_FREE(uri);
1.2 frystyk 525: }
526: if (HTLoadAnchor((HTAnchor *) dest, newreq) != YES) {
527: if (SHOW_MSG)
1.13 eric 528: HTTrace("Robot....... Image not tested!\n");
1.34 eric 529: Finger_delete(newfinger);
1.1 frystyk 530: }
531: }
532: }
533: }
534:
535: PUBLIC void HText_endAnchor (HText * text) {}
1.14 frystyk 536: PUBLIC void HText_appendText (HText * text, const char * str) {}
1.1 frystyk 537: PUBLIC void HText_appendCharacter (HText * text, char ch) {}
538: PUBLIC void HText_endAppend (HText * text) {}
539: PUBLIC void HText_setStyle (HText * text, HTStyle * style) {}
540: PUBLIC void HText_beginAppend (HText * text) {}
541: PUBLIC void HText_appendParagraph (HText * text) {}
542:
543: /* ------------------------------------------------------------------------- */
544: /* MAIN PROGRAM */
545: /* ------------------------------------------------------------------------- */
546:
547: int main (int argc, char ** argv)
548: {
549: int status = 0;
550: int arg;
551: HTChunk * keywords = NULL; /* From command line */
552: int keycnt = 0;
1.12 frystyk 553: Robot * mr = NULL;
1.34 eric 554: Finger * finger;
555: HTParentAnchor * startAnchor;
1.1 frystyk 556:
557: /* Starts Mac GUSI socket library */
558: #ifdef GUSI
559: GUSISetup(GUSIwithSIOUXSockets);
560: GUSISetup(GUSIwithInternetSockets);
561: #endif
562:
563: #ifdef __MWERKS__ /* STR */
564: InitGraf((Ptr) &qd.thePort);
565: InitFonts();
566: InitWindows();
567: InitMenus(); TEInit();
568: InitDialogs(nil);
569: InitCursor();
570: SIOUXSettings.asktosaveonclose = false;
571: argc=ccommand(&argv);
572: #endif
573:
1.36 eric 574: HTWatch_logOpen("HTRobot", LOG_NDELAY, LOG_USER);
1.27 frystyk 575: /* Initiate W3C Reference Library with a robot profile */
576: HTProfile_newRobot(APP_NAME, APP_VERSION);
577:
578: /* Add the default HTML parser to the set of converters */
579: {
580: HTList * converters = HTFormat_conversion();
581: HTMLInit(converters);
582: }
1.1 frystyk 583:
1.12 frystyk 584: /* Build a new robot object */
585: mr = Robot_new();
586:
1.1 frystyk 587: /* Scan command Line for parameters */
588: for (arg=1; arg<argc; arg++) {
589: if (*argv[arg] == '-') {
590:
591: /* non-interactive */
1.17 frystyk 592: if (!strcmp(argv[arg], "-n")) {
1.1 frystyk 593: HTAlert_setInteractive(NO);
594:
595: /* log file */
596: } else if (!strcmp(argv[arg], "-l")) {
597: mr->logfile = (arg+1 < argc && *argv[arg+1] != '-') ?
598: argv[++arg] : DEFAULT_LOG_FILE;
599:
600: /* rule file */
601: } else if (!strcmp(argv[arg], "-r")) {
602: mr->rules = (arg+1 < argc && *argv[arg+1] != '-') ?
603: argv[++arg] : DEFAULT_RULE_FILE;
604:
605: /* output filename */
606: } else if (!strcmp(argv[arg], "-o")) {
607: mr->outputfile = (arg+1 < argc && *argv[arg+1] != '-') ?
608: argv[++arg] : DEFAULT_OUTPUT_FILE;
609:
610: /* timeout -- Change the default request timeout */
611: } else if (!strcmp(argv[arg], "-timeout")) {
612: int timeout = (arg+1 < argc && *argv[arg+1] != '-') ?
613: atoi(argv[++arg]) : DEFAULT_TIMEOUT;
614: if (timeout > 0) mr->tv->tv_sec = timeout;
615:
1.7 frystyk 616: /* preemptive or non-preemptive access */
1.1 frystyk 617: } else if (!strcmp(argv[arg], "-single")) {
1.7 frystyk 618: mr->flags |= MR_PREEMPTIVE;
1.2 frystyk 619:
620: /* test inlined images */
621: } else if (!strcmp(argv[arg], "-img")) {
622: mr->flags |= MR_IMG;
623:
624: /* load anchors */
625: } else if (!strcmp(argv[arg], "-link")) {
626: mr->flags |= MR_LINK;
1.7 frystyk 627: mr->depth = (arg+1 < argc && *argv[arg+1] != '-') ?
628: atoi(argv[++arg]) : DEFAULT_DEPTH;
1.2 frystyk 629:
1.12 frystyk 630: /* Output start and end time */
631: } else if (!strcmp(argv[arg], "-ss")) {
632: time_t local = time(NULL);
1.13 eric 633: HTTrace("Robot started on %s\n",
1.12 frystyk 634: HTDateTimeStr(&local, YES));
635: mr->flags |= MR_TIME;
636:
1.1 frystyk 637: /* print version and exit */
638: } else if (!strcmp(argv[arg], "-version")) {
639: VersionInfo();
640: Cleanup(mr, 0);
641:
642: #ifdef WWWTRACE
643: /* trace flags */
644: } else if (!strncmp(argv[arg], "-v", 2)) {
1.24 frystyk 645: HTSetTraceMessageMask(argv[arg]+2);
1.1 frystyk 646: #endif
647:
648: } else {
1.13 eric 649: if (SHOW_MSG) HTTrace("Bad Argument (%s)\n", argv[arg]);
1.1 frystyk 650: }
1.17 frystyk 651: } else { /* If no leading `-' then check for URL or keywords */
1.1 frystyk 652: if (!keycnt) {
653: char * ref = HTParse(argv[arg], mr->cwd, PARSE_ALL);
1.34 eric 654: startAnchor = (HTParentAnchor *) HTAnchor_findAddress(ref);
655: HyperDoc_new(mr, startAnchor, 0);
1.1 frystyk 656: keycnt = 1;
1.11 frystyk 657: HT_FREE(ref);
1.1 frystyk 658: } else { /* Check for successive keyword arguments */
659: char *escaped = HTEscape(argv[arg], URL_XALPHAS);
660: if (keycnt++ <= 1)
1.5 frystyk 661: keywords = HTChunk_new(128);
1.1 frystyk 662: else
1.5 frystyk 663: HTChunk_putc(keywords, ' ');
664: HTChunk_puts(keywords, HTStrip(escaped));
1.11 frystyk 665: HT_FREE(escaped);
1.1 frystyk 666: }
667: }
668: }
669:
670: #ifdef CATCH_SIG
671: SetSignal();
672: #endif
673:
674: if (!keycnt) {
1.13 eric 675: if (SHOW_MSG) HTTrace("Please specify URL to check.\n");
1.1 frystyk 676: Cleanup(mr, -1);
677: }
678:
1.23 manoli 679: /* Testing that HTTrace is working */
680: HTTrace ("Welcome to the W3C mini Robot\n");
681:
1.1 frystyk 682: /* Rule file specified? */
683: if (mr->rules) {
684: char * rules = HTParse(mr->rules, mr->cwd, PARSE_ALL);
1.27 frystyk 685: if (!HTLoadRules(rules))
1.13 eric 686: if (SHOW_MSG) HTTrace("Can't access rules\n");
1.11 frystyk 687: HT_FREE(rules);
1.1 frystyk 688: }
689:
690: /* Output file specified? */
691: if (mr->outputfile) {
692: if ((mr->output = fopen(mr->outputfile, "wb")) == NULL) {
1.13 eric 693: if (SHOW_MSG) HTTrace("Can't open `%s'\n", mr->outputfile);
1.1 frystyk 694: mr->output = OUTPUT;
695: }
696: }
697:
698: /* Log file specifed? */
699: if (mr->logfile) HTLog_open(mr->logfile, YES, YES);
700:
1.27 frystyk 701: /* Register our own someterminater filter */
1.32 frystyk 702: HTNet_addAfter(terminate_handler, NULL, NULL, HT_ALL, HT_FILTER_LAST);
1.37 ! frystyk 703:
1.34 eric 704: /* Start the request */
705: finger = Finger_new(mr, startAnchor, METHOD_GET);
706: if (mr->flags & MR_PREEMPTIVE)
707: HTRequest_setPreemptive(finger->request, YES);
1.1 frystyk 708:
709: if (keywords) /* Search */
1.34 eric 710: status = HTSearchAnchor(keywords, (HTAnchor *)startAnchor, finger->request);
1.1 frystyk 711: else
1.34 eric 712: status = HTLoadAnchor((HTAnchor *)startAnchor, finger->request);
1.1 frystyk 713:
1.5 frystyk 714: if (keywords) HTChunk_delete(keywords);
1.1 frystyk 715: if (status != YES) {
1.13 eric 716: if (SHOW_MSG) HTTrace("Can't access resource\n");
1.1 frystyk 717: Cleanup(mr, -1);
718: }
719:
720: /* Go into the event loop... */
1.34 eric 721: HTEventList_loop(finger->request);
1.1 frystyk 722:
723: /* Only gets here if event loop fails */
724: Cleanup(mr, 0);
725: return 0;
726: }
Webmaster