Annotation of libwww/Robot/src/HTRobot.c, revision 1.38
1.1 frystyk 1: /* HTRobot.c
2: ** W3C MINI ROBOT
3: **
4: ** (c) COPRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
6: **
7: ** This program illustrates how to travers links using the Anchor object
8: **
9: ** Authors:
10: ** HFN Henrik Frystyk Nielsen, (frystyk@w3.org)
11: **
12: ** History:
13: ** Dec 04 95 First version
14: */
15:
16: #include "WWWLib.h" /* Global Library Include file */
17: #include "WWWApp.h" /* Application stuff */
1.17 frystyk 18: #include "WWWTrans.h"
1.10 frystyk 19: #include "WWWInit.h"
1.9 frystyk 20:
1.4 frystyk 21: #include "HText.h"
1.1 frystyk 22:
23: #include "HTRobot.h" /* Implemented here */
1.33 eric 24: #include "HTWatch.h"
1.1 frystyk 25:
1.14 frystyk 26: #ifndef W3C_VERSION
1.33 eric 27: #define W3C_VERSION "Unspecified"
1.1 frystyk 28: #endif
29:
30: #define APP_NAME "W3CRobot"
1.14 frystyk 31: #define APP_VERSION W3C_VERSION
1.1 frystyk 32:
33: #define DEFAULT_OUTPUT_FILE "robot.out"
34: #define DEFAULT_RULE_FILE "robot.conf"
35: #define DEFAULT_LOG_FILE "robot.log"
1.7 frystyk 36: #define DEFAULT_DEPTH 0
1.1 frystyk 37:
38: #define SHOW_MSG (WWWTRACE || HTAlert_interactive())
39:
1.7 frystyk 40: #define DEFAULT_TIMEOUT 10 /* timeout in seconds */
1.1 frystyk 41:
42: #if defined(__svr4__)
43: #define CATCH_SIG
44: #endif
45:
46: typedef enum _MRFlags {
1.2 frystyk 47: MR_IMG = 0x1,
48: MR_LINK = 0x2,
1.12 frystyk 49: MR_PREEMPTIVE= 0x4,
50: MR_TIME = 0x8
1.1 frystyk 51: } MRFlags;
52:
53: typedef struct _Robot {
1.2 frystyk 54: int depth; /* How deep is our tree */
1.30 frystyk 55: int cnt; /* Count of requests */
1.2 frystyk 56: HTList * hyperdoc; /* List of our HyperDoc Objects */
1.4 frystyk 57: HTList * htext; /* List of our HText Objects */
1.34 eric 58: HTList * fingers;
1.1 frystyk 59: struct timeval * tv; /* Timeout on socket */
60: char * cwd; /* Current dir URL */
61: char * rules;
62: char * logfile;
63: char * outputfile;
64: FILE * output;
65: MRFlags flags;
66: } Robot;
1.34 eric 67:
68: typedef struct _Finger {
69: Robot * robot;
70: HTRequest * request;
71: HTParentAnchor * dest;
72: } Finger;
73:
1.1 frystyk 74: typedef enum _LoadState {
75: L_INVALID = -2,
76: L_LOADING = -1,
77: L_SUCCESS = 0,
78: L_ERROR
79: } LoadState;
80:
81: /*
82: ** The HyperDoc object is bound to the anchor and contains information about
83: ** where we are in the search for recursive searches
84: */
85: typedef struct _HyperDoc {
86: HTParentAnchor * anchor;
87: LoadState state;
88: int depth;
89: } HyperDoc;
90:
91: /*
92: ** This is the HText object that is created every time we start parsing a
93: ** HTML object
94: */
1.4 frystyk 95: struct _HText {
1.1 frystyk 96: HTRequest * request;
1.4 frystyk 97: };
1.1 frystyk 98:
99: PUBLIC HText * HTMainText = NULL;
100: PUBLIC HTParentAnchor * HTMainAnchor = NULL;
101: PUBLIC HTStyleSheet * styleSheet = NULL;
102:
103: /* ------------------------------------------------------------------------- */
104:
1.33 eric 105: PUBLIC int HTWatch(int id, void * obj, const char * fmt, ...)
106: {
107: va_list pArgs;
108: va_start(pArgs, fmt);
109: fprintf(stderr, "id: %x obj: %p: ", id, obj);
110: return vfprintf(stderr, fmt, pArgs);
111: }
112:
1.36 eric 113: #define LOG_BUFF_SIZE 65536
114: int LogFile = 2;
115: char * LogBuff = NULL;
116: size_t LogLen = 0;
117:
118: PUBLIC int HTWatch_logOpen (char *ident, int option, int facility)
119: {
120: #ifdef USE_SYSLOG
121: openlog(ident, option, facility);
122: #else /* USE_SYSLOG */
123: #if 0
124: if ((LogFile = open("HTRobot.log", O_CREAT|O_TRUNC)) == -1)
125: return HT_ERROR;
126: close(LogFile);
127: #endif
128: if ((LogBuff = (char *) HT_MALLOC(LOG_BUFF_SIZE)) == NULL)
129: HT_OUTOFMEM("HTWatch_logOpen");
130: LogLen = 0;
131: #endif /* !USE_SYSLOG */
132: return HT_OK;
133: }
134:
135: PRIVATE int HTWatch_logFlush(void)
136: {
137: if ((LogFile = open("HTRobot.log", O_APPEND)) == -1)
138: return HT_ERROR;
139: write(LogFile, LogBuff, LogLen);
140: LogLen = 0;
141: close(LogFile);
142: return HT_OK;
143: }
144:
145: PRIVATE int HTWatch_logAdd(char * buf, size_t len)
146: {
147: /*
148: ** Dump everything that won't fit in buffer
149: */
150: while (len + LogLen > LOG_BUFF_SIZE) {
151: size_t toWrite = LOG_BUFF_SIZE-LogLen;
152: memcpy(LogBuff+LogLen, buf, toWrite);
153: HTWatch_logFlush();
154: buf += toWrite;
155: len -= toWrite;
156: }
157: memcpy(LogBuff+LogLen, buf, len);
158: LogLen += len;
159: return HT_OK;
160: }
161:
1.38 ! eric 162: #include <sys/time.h>
! 163: #include <unistd.h>
! 164:
! 165: PRIVATE int _adjustGMT(long theTime)
! 166: {
! 167: static long adjustment = -1;
! 168: if (adjustment == -1) {
! 169: tzset();
! 170: adjustment = timezone;
! 171: }
! 172: return theTime-adjustment;
! 173: }
! 174:
! 175: PRIVATE int HTWatch_logTime(void)
! 176: {
! 177: char buff[20];
! 178: int len;
! 179: struct timeval tp;
! 180: struct timezone tz = {300, DST_USA};
! 181:
! 182: gettimeofday(&tp, &tz);
! 183: tp.tv_sec = _adjustGMT(tp.tv_sec)%(24*60*60);
! 184: len = sprintf(buff, "%02d:%02d:%02d.%d ", tp.tv_sec/3600, (tp.tv_sec%3600)/60, tp.tv_sec%60, tp.tv_usec);
! 185: HTWatch_logAdd(buff, len);
! 186: return tp.tv_sec;
! 187: }
! 188:
1.36 eric 189: PUBLIC void HTWatch_logClose (void)
190: {
191: #ifdef USE_SYSLOG
192: closelog();
193: #else /* USE_SYSLOG */
194: if (LogLen)
195: HTWatch_logFlush();
196: if (LogFile > 2)
197: close(LogFile);
198: if (LogBuff != NULL)
199: HT_FREE(LogBuff);
200: #endif /* !USE_SYSLOG */
201: }
202:
203: PUBLIC int HTWatch_logData (char * data, size_t len, const char * fmt, ...)
204: {
205: char buff[8200];
206: va_list pArgs;
1.38 ! eric 207: /* char * tptr;
! 208: time_t now; */
1.36 eric 209: int ret;
210: va_start(pArgs, fmt);
211: ret = vsprintf(buff, fmt, pArgs);
212: #ifdef USE_SYSLOG
213: syslog(LOG_DEBUG, "%s\n", buff);
214: if (len > 8192)
215: len = 8192;
216: strncpy(buff, data, len);
217: buff[len] = 0;
218: syslog(LOG_DEBUG, "%s\n", buff);
219: #else /* USE_SYSLOG */
1.38 ! eric 220: /*
1.36 eric 221: time(&now);
222: tptr = ctime(&now);
223: HTWatch_logAdd(tptr, strlen(tptr));
1.38 ! eric 224: */
! 225: HTWatch_logTime();
1.36 eric 226: HTWatch_logAdd(buff, ret);
227: HTWatch_logAdd("\n", 1);
228: HTWatch_logAdd(data, len);
229: #endif /* !USE_SYSLOG */
230: return ret;
231: }
232:
1.13 eric 233: /* Standard (non-error) Output
234: ** ---------------------------
235: */
236: PUBLIC int OutputData(const char * fmt, ...)
237: {
238: int ret;
239: va_list pArgs;
240: va_start(pArgs, fmt);
241: ret = vfprintf(stdout, fmt, pArgs);
242: va_end(pArgs);
243: return ret;
244: }
245:
246: /* ------------------------------------------------------------------------- */
247:
1.2 frystyk 248: /* Create a "HyperDoc" object
249: ** --------------------------
250: ** A HyperDoc object contains information about whether we have already
251: ** started checking the anchor and the depth in our search
252: */
253: PRIVATE HyperDoc * HyperDoc_new (Robot * mr,HTParentAnchor * anchor, int depth)
254: {
255: HyperDoc * hd;
1.14 frystyk 256: if ((hd = (HyperDoc *) HT_CALLOC(1, sizeof(HyperDoc))) == NULL)
257: HT_OUTOFMEM("HyperDoc_new");
1.2 frystyk 258: hd->state = L_INVALID;
259: hd->depth = depth;
260:
261: /* Bind the HyperDoc object together with the Anchor Object */
262: hd->anchor = anchor;
263: HTAnchor_setDocument(anchor, (void *) hd);
264:
265: /* Add this HyperDoc object to our list */
266: if (!mr->hyperdoc) mr->hyperdoc = HTList_new();
267: HTList_addObject(mr->hyperdoc, (void *) hd);
268: return hd;
269: }
270:
271: /* Delete a "HyperDoc" object
272: ** --------------------------
273: */
274: PRIVATE BOOL HyperDoc_delete (HyperDoc * hd)
275: {
276: if (hd) {
1.11 frystyk 277: HT_FREE (hd);
1.2 frystyk 278: return YES;
279: }
280: return NO;
281: }
282:
1.1 frystyk 283: /* Create a Command Line Object
284: ** ----------------------------
285: */
286: PRIVATE Robot * Robot_new (void)
287: {
288: Robot * me;
1.14 frystyk 289: if ((me = (Robot *) HT_CALLOC(1, sizeof(Robot))) == NULL ||
290: (me->tv = (struct timeval*) HT_CALLOC(1, sizeof(struct timeval))) == NULL)
291: HT_OUTOFMEM("Robot_new");
1.2 frystyk 292: me->hyperdoc = HTList_new();
1.4 frystyk 293: me->htext = HTList_new();
1.1 frystyk 294: me->tv->tv_sec = DEFAULT_TIMEOUT;
1.25 frystyk 295: me->cwd = HTGetCurrentDirectoryURL();
1.1 frystyk 296: me->output = OUTPUT;
1.35 eric 297: me->cnt = 0;
1.34 eric 298: me->fingers = HTList_new();
1.1 frystyk 299: return me;
300: }
301:
302: /* Delete a Command Line Object
303: ** ----------------------------
304: */
305: PRIVATE BOOL Robot_delete (Robot * me)
306: {
307: if (me) {
1.34 eric 308: HTList_delete(me->fingers);
1.2 frystyk 309: if (me->hyperdoc) {
310: HTList * cur = me->hyperdoc;
311: HyperDoc * pres;
312: while ((pres = (HyperDoc *) HTList_nextObject(cur)))
313: HyperDoc_delete(pres);
314: HTList_delete(me->hyperdoc);
315: }
1.4 frystyk 316: if (me->htext) {
317: HTList * cur = me->htext;
318: HText * pres;
319: while ((pres = (HText *) HTList_nextObject(cur)))
320: HText_free(pres);
321: HTList_delete(me->htext);
322: }
1.1 frystyk 323: if (me->logfile) HTLog_close();
324: if (me->output && me->output != STDOUT) fclose(me->output);
1.12 frystyk 325: if (me->flags & MR_TIME) {
326: time_t local = time(NULL);
1.13 eric 327: HTTrace("Robot terminated %s\n",HTDateTimeStr(&local,YES));
1.12 frystyk 328: }
1.11 frystyk 329: HT_FREE(me->cwd);
330: HT_FREE(me->tv);
331: HT_FREE(me);
1.1 frystyk 332: return YES;
333: }
334: return NO;
335: }
336:
1.2 frystyk 337: /*
1.34 eric 338: ** This function creates a new finger object and initializes it with a new request
1.2 frystyk 339: */
1.34 eric 340: PRIVATE Finger * Finger_new (Robot * robot, HTParentAnchor * dest, HTMethod method)
1.2 frystyk 341: {
1.34 eric 342: Finger * me;
343: HTRequest * request = HTRequest_new();
344: if ((me = (Finger *) HT_CALLOC(1, sizeof(Finger))) == NULL)
345: HT_OUTOFMEM("Finger_new");
346: me->robot = robot;
347: me->request = request;
348: me->dest = dest;
349: HTList_addObject(robot->fingers, (void *)me);
350:
351: HTRequest_setContext (request, me);
352: if (robot->flags & MR_PREEMPTIVE) HTRequest_setPreemptive(request, YES);
353: HTRequest_addRqHd(request, HT_C_HOST);
354: HTRequest_setMethod(request, method);
355: robot->cnt++;
356: return me;
1.2 frystyk 357: }
358:
1.34 eric 359: PRIVATE int Finger_delete (Finger * me)
1.2 frystyk 360: {
1.34 eric 361: HTList_removeObject(me->robot->fingers, (void *)me);
362: me->robot->cnt--;
1.37 frystyk 363:
364: /*
365: ** If we are down at one request then flush the output buffer
366: */
367: if (me->request) {
368: if (me->robot->cnt == 1) HTRequest_forceFlush(me->request);
1.34 eric 369: HTRequest_delete(me->request);
1.37 frystyk 370: }
371:
372: /*
373: ** Delete the request and free myself
374: */
1.34 eric 375: HT_FREE(me);
376: return YES;
1.2 frystyk 377: }
378:
379: /*
380: ** Cleanup and make sure we close all connections including the persistent
381: ** ones
382: */
1.1 frystyk 383: PRIVATE void Cleanup (Robot * me, int status)
384: {
385: Robot_delete(me);
1.29 eric 386: HTProfile_delete();
1.36 eric 387: HTWatch_logClose();
1.1 frystyk 388: #ifdef VMS
389: exit(status ? status : 1);
390: #else
391: exit(status ? status : 0);
392: #endif
393: }
394:
395: #ifdef CATCH_SIG
396: #include <signal.h>
397: /* SetSignal
398: ** This function sets up signal handlers. This might not be necessary to
399: ** call if the application has its own handlers (lossage on SVR4)
400: */
401: PRIVATE void SetSignal (void)
402: {
403: /* On some systems (SYSV) it is necessary to catch the SIGPIPE signal
404: ** when attemting to connect to a remote host where you normally should
405: ** get `connection refused' back
406: */
407: if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
1.13 eric 408: if (PROT_TRACE) HTTrace("HTSignal.... Can't catch SIGPIPE\n");
1.1 frystyk 409: } else {
1.13 eric 410: if (PROT_TRACE) HTTrace("HTSignal.... Ignoring SIGPIPE\n");
1.1 frystyk 411: }
412: }
413: #endif /* CATCH_SIG */
414:
415: PRIVATE void VersionInfo (void)
416: {
1.13 eric 417: OutputData("\n\nW3C Reference Software\n\n");
418: OutputData("\tW3C Mini Robot (%s) version %s.\n",
1.1 frystyk 419: APP_NAME, APP_VERSION);
1.13 eric 420: OutputData("\tW3C Reference Library version %s.\n\n",HTLib_version());
421: OutputData("Please send feedback to <libwww@w3.org>\n");
1.1 frystyk 422: }
423:
424: /* terminate_handler
425: ** -----------------
1.2 frystyk 426: ** This function is registered to handle the result of the request.
427: ** If no more requests are pending then terminate program
1.1 frystyk 428: */
1.32 frystyk 429: PRIVATE int terminate_handler (HTRequest * request, HTResponse * response,
430: void * param, int status)
1.1 frystyk 431: {
1.34 eric 432: Finger * finger = (Finger *) HTRequest_context(request);
433: Robot * robot = finger->robot;
434: if (SHOW_MSG) HTTrace("Robot....... done with %s\n", HTAnchor_physical(finger->dest));
435: Finger_delete(finger);
1.37 frystyk 436: if (robot->cnt <= 0) {
1.34 eric 437: if (SHOW_MSG) HTTrace(" Everything is finished...\n");
1.37 frystyk 438: Cleanup(robot, 0); /* No way back from here */
1.30 frystyk 439: }
1.37 frystyk 440:
441: if (SHOW_MSG) HTTrace(" %d outstanding request%s\n", robot->cnt, robot->cnt == 1 ? "" : "s");
1.1 frystyk 442: return HT_OK;
443: }
444:
1.37 frystyk 445: #if 0
1.1 frystyk 446: /* timeout_handler
447: ** ---------------
448: ** This function is registered to handle timeout in select eventloop
1.7 frystyk 449: **
450: ** BUG: This doesn't work as we don't get the right request object
451: ** back from the event loop
1.1 frystyk 452: */
453: PRIVATE int timeout_handler (HTRequest * request)
454: {
1.27 frystyk 455: #if 0
1.34 eric 456: Finger * finger = (Finger *) HTRequest_context(request);
1.27 frystyk 457: #endif
1.25 frystyk 458: if (SHOW_MSG) HTTrace("Robot....... We don't know how to handle timeout...\n");
1.7 frystyk 459: #if 0
1.1 frystyk 460: HTRequest_kill(request);
1.34 eric 461: Finger_delete(finger);
1.7 frystyk 462: #endif
1.4 frystyk 463: return HT_OK;
1.1 frystyk 464: }
1.37 frystyk 465: #endif
1.1 frystyk 466:
467: /* ------------------------------------------------------------------------- */
468: /* HTEXT INTERFACE */
469: /* ------------------------------------------------------------------------- */
470:
471: PUBLIC HText * HText_new2 (HTRequest * request, HTParentAnchor * anchor,
472: HTStream * stream)
473: {
474: HText * me;
1.34 eric 475: Finger * finger = (Finger *) HTRequest_context(request);
476: Robot * mr = finger->robot;
1.14 frystyk 477: if ((me = (HText *) HT_CALLOC(1, sizeof(HText))) == NULL)
478: HT_OUTOFMEM("HText_new2");
1.4 frystyk 479:
480: /* Bind the HText object together with the Request Object */
1.1 frystyk 481: me->request = request;
1.4 frystyk 482:
483: /* Add this HyperDoc object to our list */
484: if (!mr->htext) mr->htext = HTList_new();
485: HTList_addObject(mr->htext, (void *) me);
1.1 frystyk 486: return me;
487: }
488:
1.4 frystyk 489: PUBLIC void HText_free (HText * me) {
1.11 frystyk 490: if (me) HT_FREE (me);
1.4 frystyk 491: }
492:
1.1 frystyk 493: PUBLIC void HText_beginAnchor (HText * text, HTChildAnchor * anchor)
494: {
495: if (text && anchor) {
1.34 eric 496: Finger * finger = (Finger *) HTRequest_context(text->request);
497: Robot * mr = finger->robot;
1.1 frystyk 498: HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor);
499: HTParentAnchor * dest_parent = HTAnchor_parent(dest);
1.7 frystyk 500: char * uri = HTAnchor_address((HTAnchor *) dest_parent);
1.1 frystyk 501: HyperDoc * hd = HTAnchor_document(dest_parent);
502:
1.13 eric 503: if (SHOW_MSG) HTTrace("Robot....... Found `%s\' - ", uri ? uri : "NULL");
1.7 frystyk 504:
1.2 frystyk 505: /* Test whether we already have a hyperdoc for this document */
506: if (mr->flags & MR_LINK && dest_parent && !hd) {
1.1 frystyk 507: HTParentAnchor * parent = HTRequest_parent(text->request);
508: HyperDoc * last = HTAnchor_document(parent);
509: int depth = last ? last->depth+1 : 0;
1.34 eric 510: Finger * newfinger = Finger_new(mr, dest_parent, METHOD_GET);
511: HTRequest * newreq = newfinger->request;
1.2 frystyk 512: HyperDoc_new(mr, dest_parent, depth);
1.7 frystyk 513: HTRequest_setParent(newreq, HTRequest_anchor(text->request));
514: if (depth >= mr->depth) {
515: if (SHOW_MSG)
1.13 eric 516: HTTrace("loading at depth %d using HEAD\n", depth);
1.7 frystyk 517: HTRequest_setMethod(newreq, METHOD_HEAD);
1.30 frystyk 518: HTRequest_setOutputFormat(newreq, WWW_DEBUG);
1.7 frystyk 519: } else {
1.13 eric 520: if (SHOW_MSG) HTTrace("loading at depth %d\n", depth);
1.2 frystyk 521: }
522: if (HTLoadAnchor((HTAnchor *) dest_parent, newreq) != YES) {
1.13 eric 523: if (SHOW_MSG) HTTrace("not tested!\n");
1.34 eric 524: Finger_delete(newfinger);
1.2 frystyk 525: }
1.7 frystyk 526: } else {
1.18 frystyk 527: if (SHOW_MSG) HTTrace("duplicate or max depth reached\n");
1.2 frystyk 528: }
1.11 frystyk 529: HT_FREE(uri);
1.2 frystyk 530: }
531: }
532:
533: PUBLIC void HText_appendImage (HText * text, HTChildAnchor * anchor,
1.14 frystyk 534: const char *alt, const char * align, BOOL isMap)
1.2 frystyk 535: {
536: if (text && anchor) {
1.34 eric 537: Finger * finger = (Finger *) HTRequest_context(text->request);
538: Robot * mr = finger->robot;
1.2 frystyk 539: HTParentAnchor * dest = (HTParentAnchor *)
540: HTAnchor_followMainLink((HTAnchor *) anchor);
541: HyperDoc * hd = HTAnchor_document(dest);
1.1 frystyk 542:
1.2 frystyk 543: /* Test whether we already have a hyperdoc for this document */
544: if (mr->flags & MR_IMG && dest && !hd) {
545: HTParentAnchor * parent = HTRequest_parent(text->request);
546: HyperDoc * last = HTAnchor_document(parent);
547: int depth = last ? last->depth+1 : 0;
1.34 eric 548: Finger * newfinger = Finger_new(mr, dest, METHOD_HEAD);
549: HTRequest * newreq = newfinger->request;
1.2 frystyk 550: HyperDoc_new(mr, dest, depth);
551: if (SHOW_MSG) {
552: char * uri = HTAnchor_address((HTAnchor *) dest);
1.13 eric 553: HTTrace("Robot....... Checking Image `%s\'\n", uri);
1.11 frystyk 554: HT_FREE(uri);
1.2 frystyk 555: }
556: if (HTLoadAnchor((HTAnchor *) dest, newreq) != YES) {
557: if (SHOW_MSG)
1.13 eric 558: HTTrace("Robot....... Image not tested!\n");
1.34 eric 559: Finger_delete(newfinger);
1.1 frystyk 560: }
561: }
562: }
563: }
564:
565: PUBLIC void HText_endAnchor (HText * text) {}
1.14 frystyk 566: PUBLIC void HText_appendText (HText * text, const char * str) {}
1.1 frystyk 567: PUBLIC void HText_appendCharacter (HText * text, char ch) {}
568: PUBLIC void HText_endAppend (HText * text) {}
569: PUBLIC void HText_setStyle (HText * text, HTStyle * style) {}
570: PUBLIC void HText_beginAppend (HText * text) {}
571: PUBLIC void HText_appendParagraph (HText * text) {}
572:
573: /* ------------------------------------------------------------------------- */
574: /* MAIN PROGRAM */
575: /* ------------------------------------------------------------------------- */
576:
577: int main (int argc, char ** argv)
578: {
579: int status = 0;
580: int arg;
581: HTChunk * keywords = NULL; /* From command line */
582: int keycnt = 0;
1.12 frystyk 583: Robot * mr = NULL;
1.34 eric 584: Finger * finger;
585: HTParentAnchor * startAnchor;
1.1 frystyk 586:
587: /* Starts Mac GUSI socket library */
588: #ifdef GUSI
589: GUSISetup(GUSIwithSIOUXSockets);
590: GUSISetup(GUSIwithInternetSockets);
591: #endif
592:
593: #ifdef __MWERKS__ /* STR */
594: InitGraf((Ptr) &qd.thePort);
595: InitFonts();
596: InitWindows();
597: InitMenus(); TEInit();
598: InitDialogs(nil);
599: InitCursor();
600: SIOUXSettings.asktosaveonclose = false;
601: argc=ccommand(&argv);
602: #endif
603:
1.36 eric 604: HTWatch_logOpen("HTRobot", LOG_NDELAY, LOG_USER);
1.27 frystyk 605: /* Initiate W3C Reference Library with a robot profile */
606: HTProfile_newRobot(APP_NAME, APP_VERSION);
607:
608: /* Add the default HTML parser to the set of converters */
609: {
610: HTList * converters = HTFormat_conversion();
611: HTMLInit(converters);
612: }
1.1 frystyk 613:
1.12 frystyk 614: /* Build a new robot object */
615: mr = Robot_new();
616:
1.1 frystyk 617: /* Scan command Line for parameters */
618: for (arg=1; arg<argc; arg++) {
619: if (*argv[arg] == '-') {
620:
621: /* non-interactive */
1.17 frystyk 622: if (!strcmp(argv[arg], "-n")) {
1.1 frystyk 623: HTAlert_setInteractive(NO);
624:
625: /* log file */
626: } else if (!strcmp(argv[arg], "-l")) {
627: mr->logfile = (arg+1 < argc && *argv[arg+1] != '-') ?
628: argv[++arg] : DEFAULT_LOG_FILE;
629:
630: /* rule file */
631: } else if (!strcmp(argv[arg], "-r")) {
632: mr->rules = (arg+1 < argc && *argv[arg+1] != '-') ?
633: argv[++arg] : DEFAULT_RULE_FILE;
634:
635: /* output filename */
636: } else if (!strcmp(argv[arg], "-o")) {
637: mr->outputfile = (arg+1 < argc && *argv[arg+1] != '-') ?
638: argv[++arg] : DEFAULT_OUTPUT_FILE;
639:
640: /* timeout -- Change the default request timeout */
641: } else if (!strcmp(argv[arg], "-timeout")) {
642: int timeout = (arg+1 < argc && *argv[arg+1] != '-') ?
643: atoi(argv[++arg]) : DEFAULT_TIMEOUT;
644: if (timeout > 0) mr->tv->tv_sec = timeout;
645:
1.7 frystyk 646: /* preemptive or non-preemptive access */
1.1 frystyk 647: } else if (!strcmp(argv[arg], "-single")) {
1.7 frystyk 648: mr->flags |= MR_PREEMPTIVE;
1.2 frystyk 649:
650: /* test inlined images */
651: } else if (!strcmp(argv[arg], "-img")) {
652: mr->flags |= MR_IMG;
653:
654: /* load anchors */
655: } else if (!strcmp(argv[arg], "-link")) {
656: mr->flags |= MR_LINK;
1.7 frystyk 657: mr->depth = (arg+1 < argc && *argv[arg+1] != '-') ?
658: atoi(argv[++arg]) : DEFAULT_DEPTH;
1.2 frystyk 659:
1.12 frystyk 660: /* Output start and end time */
661: } else if (!strcmp(argv[arg], "-ss")) {
662: time_t local = time(NULL);
1.13 eric 663: HTTrace("Robot started on %s\n",
1.12 frystyk 664: HTDateTimeStr(&local, YES));
665: mr->flags |= MR_TIME;
666:
1.1 frystyk 667: /* print version and exit */
668: } else if (!strcmp(argv[arg], "-version")) {
669: VersionInfo();
670: Cleanup(mr, 0);
671:
672: #ifdef WWWTRACE
673: /* trace flags */
674: } else if (!strncmp(argv[arg], "-v", 2)) {
1.24 frystyk 675: HTSetTraceMessageMask(argv[arg]+2);
1.1 frystyk 676: #endif
677:
678: } else {
1.13 eric 679: if (SHOW_MSG) HTTrace("Bad Argument (%s)\n", argv[arg]);
1.1 frystyk 680: }
1.17 frystyk 681: } else { /* If no leading `-' then check for URL or keywords */
1.1 frystyk 682: if (!keycnt) {
683: char * ref = HTParse(argv[arg], mr->cwd, PARSE_ALL);
1.34 eric 684: startAnchor = (HTParentAnchor *) HTAnchor_findAddress(ref);
685: HyperDoc_new(mr, startAnchor, 0);
1.1 frystyk 686: keycnt = 1;
1.11 frystyk 687: HT_FREE(ref);
1.1 frystyk 688: } else { /* Check for successive keyword arguments */
689: char *escaped = HTEscape(argv[arg], URL_XALPHAS);
690: if (keycnt++ <= 1)
1.5 frystyk 691: keywords = HTChunk_new(128);
1.1 frystyk 692: else
1.5 frystyk 693: HTChunk_putc(keywords, ' ');
694: HTChunk_puts(keywords, HTStrip(escaped));
1.11 frystyk 695: HT_FREE(escaped);
1.1 frystyk 696: }
697: }
698: }
699:
700: #ifdef CATCH_SIG
701: SetSignal();
702: #endif
703:
704: if (!keycnt) {
1.13 eric 705: if (SHOW_MSG) HTTrace("Please specify URL to check.\n");
1.1 frystyk 706: Cleanup(mr, -1);
707: }
708:
1.23 manoli 709: /* Testing that HTTrace is working */
710: HTTrace ("Welcome to the W3C mini Robot\n");
711:
1.1 frystyk 712: /* Rule file specified? */
713: if (mr->rules) {
714: char * rules = HTParse(mr->rules, mr->cwd, PARSE_ALL);
1.27 frystyk 715: if (!HTLoadRules(rules))
1.13 eric 716: if (SHOW_MSG) HTTrace("Can't access rules\n");
1.11 frystyk 717: HT_FREE(rules);
1.1 frystyk 718: }
719:
720: /* Output file specified? */
721: if (mr->outputfile) {
722: if ((mr->output = fopen(mr->outputfile, "wb")) == NULL) {
1.13 eric 723: if (SHOW_MSG) HTTrace("Can't open `%s'\n", mr->outputfile);
1.1 frystyk 724: mr->output = OUTPUT;
725: }
726: }
727:
728: /* Log file specifed? */
729: if (mr->logfile) HTLog_open(mr->logfile, YES, YES);
730:
1.27 frystyk 731: /* Register our own someterminater filter */
1.32 frystyk 732: HTNet_addAfter(terminate_handler, NULL, NULL, HT_ALL, HT_FILTER_LAST);
1.37 frystyk 733:
1.34 eric 734: /* Start the request */
735: finger = Finger_new(mr, startAnchor, METHOD_GET);
736: if (mr->flags & MR_PREEMPTIVE)
737: HTRequest_setPreemptive(finger->request, YES);
1.1 frystyk 738:
739: if (keywords) /* Search */
1.34 eric 740: status = HTSearchAnchor(keywords, (HTAnchor *)startAnchor, finger->request);
1.1 frystyk 741: else
1.34 eric 742: status = HTLoadAnchor((HTAnchor *)startAnchor, finger->request);
1.1 frystyk 743:
1.5 frystyk 744: if (keywords) HTChunk_delete(keywords);
1.1 frystyk 745: if (status != YES) {
1.13 eric 746: if (SHOW_MSG) HTTrace("Can't access resource\n");
1.1 frystyk 747: Cleanup(mr, -1);
748: }
749:
750: /* Go into the event loop... */
1.34 eric 751: HTEventList_loop(finger->request);
1.1 frystyk 752:
753: /* Only gets here if event loop fails */
754: Cleanup(mr, 0);
755: return 0;
756: }
Webmaster