Annotation of libwww/Robot/src/HTRobot.c, revision 1.35
1.1 frystyk 1: /* HTRobot.c
2: ** W3C MINI ROBOT
3: **
4: ** (c) COPRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
6: **
7: ** This program illustrates how to travers links using the Anchor object
8: **
9: ** Authors:
10: ** HFN Henrik Frystyk Nielsen, (frystyk@w3.org)
11: **
12: ** History:
13: ** Dec 04 95 First version
14: */
15:
16: #include "WWWLib.h" /* Global Library Include file */
17: #include "WWWApp.h" /* Application stuff */
1.17 frystyk 18: #include "WWWTrans.h"
1.10 frystyk 19: #include "WWWInit.h"
1.9 frystyk 20:
1.4 frystyk 21: #include "HText.h"
1.1 frystyk 22:
23: #include "HTRobot.h" /* Implemented here */
1.33 eric 24: #include "HTWatch.h"
1.1 frystyk 25:
1.14 frystyk 26: #ifndef W3C_VERSION
1.33 eric 27: #define W3C_VERSION "Unspecified"
1.1 frystyk 28: #endif
29:
30: #define APP_NAME "W3CRobot"
1.14 frystyk 31: #define APP_VERSION W3C_VERSION
1.1 frystyk 32:
33: #define DEFAULT_OUTPUT_FILE "robot.out"
34: #define DEFAULT_RULE_FILE "robot.conf"
35: #define DEFAULT_LOG_FILE "robot.log"
1.7 frystyk 36: #define DEFAULT_DEPTH 0
1.1 frystyk 37:
38: #define SHOW_MSG (WWWTRACE || HTAlert_interactive())
39:
1.7 frystyk 40: #define DEFAULT_TIMEOUT 10 /* timeout in seconds */
1.1 frystyk 41:
42: #if defined(__svr4__)
43: #define CATCH_SIG
44: #endif
45:
46: typedef enum _MRFlags {
1.2 frystyk 47: MR_IMG = 0x1,
48: MR_LINK = 0x2,
1.12 frystyk 49: MR_PREEMPTIVE= 0x4,
50: MR_TIME = 0x8
1.1 frystyk 51: } MRFlags;
52:
53: typedef struct _Robot {
1.7 frystyk 54: HTRequest * timeout; /* Until we get a server eventloop */
1.2 frystyk 55: int depth; /* How deep is our tree */
1.30 frystyk 56: int cnt; /* Count of requests */
1.2 frystyk 57: HTList * hyperdoc; /* List of our HyperDoc Objects */
1.4 frystyk 58: HTList * htext; /* List of our HText Objects */
1.34 eric 59: HTList * fingers;
1.1 frystyk 60: struct timeval * tv; /* Timeout on socket */
61: char * cwd; /* Current dir URL */
62: char * rules;
63: char * logfile;
64: char * outputfile;
65: FILE * output;
66: MRFlags flags;
67: } Robot;
1.34 eric 68:
69: typedef struct _Finger {
70: Robot * robot;
71: HTRequest * request;
72: HTParentAnchor * dest;
73: } Finger;
74:
1.1 frystyk 75: typedef enum _LoadState {
76: L_INVALID = -2,
77: L_LOADING = -1,
78: L_SUCCESS = 0,
79: L_ERROR
80: } LoadState;
81:
82: /*
83: ** The HyperDoc object is bound to the anchor and contains information about
84: ** where we are in the search for recursive searches
85: */
86: typedef struct _HyperDoc {
87: HTParentAnchor * anchor;
88: LoadState state;
89: int depth;
90: } HyperDoc;
91:
92: /*
93: ** This is the HText object that is created every time we start parsing a
94: ** HTML object
95: */
1.4 frystyk 96: struct _HText {
1.1 frystyk 97: HTRequest * request;
1.4 frystyk 98: };
1.1 frystyk 99:
100: PUBLIC HText * HTMainText = NULL;
101: PUBLIC HTParentAnchor * HTMainAnchor = NULL;
102: PUBLIC HTStyleSheet * styleSheet = NULL;
103:
104: /* ------------------------------------------------------------------------- */
105:
1.33 eric 106: PUBLIC int HTWatch(int id, void * obj, const char * fmt, ...)
107: {
108: va_list pArgs;
109: va_start(pArgs, fmt);
110: fprintf(stderr, "id: %x obj: %p: ", id, obj);
111: return vfprintf(stderr, fmt, pArgs);
112: }
113:
1.13 eric 114: /* Standard (non-error) Output
115: ** ---------------------------
116: */
117: PUBLIC int OutputData(const char * fmt, ...)
118: {
119: int ret;
120: va_list pArgs;
121: va_start(pArgs, fmt);
122: ret = vfprintf(stdout, fmt, pArgs);
123: va_end(pArgs);
124: return ret;
125: }
126:
127: /* ------------------------------------------------------------------------- */
128:
1.2 frystyk 129: /* Create a "HyperDoc" object
130: ** --------------------------
131: ** A HyperDoc object contains information about whether we have already
132: ** started checking the anchor and the depth in our search
133: */
134: PRIVATE HyperDoc * HyperDoc_new (Robot * mr,HTParentAnchor * anchor, int depth)
135: {
136: HyperDoc * hd;
1.14 frystyk 137: if ((hd = (HyperDoc *) HT_CALLOC(1, sizeof(HyperDoc))) == NULL)
138: HT_OUTOFMEM("HyperDoc_new");
1.2 frystyk 139: hd->state = L_INVALID;
140: hd->depth = depth;
141:
142: /* Bind the HyperDoc object together with the Anchor Object */
143: hd->anchor = anchor;
144: HTAnchor_setDocument(anchor, (void *) hd);
145:
146: /* Add this HyperDoc object to our list */
147: if (!mr->hyperdoc) mr->hyperdoc = HTList_new();
148: HTList_addObject(mr->hyperdoc, (void *) hd);
149: return hd;
150: }
151:
152: /* Delete a "HyperDoc" object
153: ** --------------------------
154: */
155: PRIVATE BOOL HyperDoc_delete (HyperDoc * hd)
156: {
157: if (hd) {
1.11 frystyk 158: HT_FREE (hd);
1.2 frystyk 159: return YES;
160: }
161: return NO;
162: }
163:
1.1 frystyk 164: /* Create a Command Line Object
165: ** ----------------------------
166: */
167: PRIVATE Robot * Robot_new (void)
168: {
169: Robot * me;
1.14 frystyk 170: if ((me = (Robot *) HT_CALLOC(1, sizeof(Robot))) == NULL ||
171: (me->tv = (struct timeval*) HT_CALLOC(1, sizeof(struct timeval))) == NULL)
172: HT_OUTOFMEM("Robot_new");
1.2 frystyk 173: me->hyperdoc = HTList_new();
1.4 frystyk 174: me->htext = HTList_new();
1.1 frystyk 175: me->tv->tv_sec = DEFAULT_TIMEOUT;
1.25 frystyk 176: me->cwd = HTGetCurrentDirectoryURL();
1.1 frystyk 177: me->output = OUTPUT;
1.35 ! eric 178: me->cnt = 0;
1.34 eric 179: me->fingers = HTList_new();
1.1 frystyk 180:
1.7 frystyk 181: /* We keep an extra timeout request object for the timeout_handler */
182: me->timeout = HTRequest_new();
183: HTRequest_setContext (me->timeout, me);
184:
1.1 frystyk 185: return me;
186: }
187:
188: /* Delete a Command Line Object
189: ** ----------------------------
190: */
191: PRIVATE BOOL Robot_delete (Robot * me)
192: {
193: if (me) {
1.34 eric 194: HTList_delete(me->fingers);
1.2 frystyk 195: if (me->hyperdoc) {
196: HTList * cur = me->hyperdoc;
197: HyperDoc * pres;
198: while ((pres = (HyperDoc *) HTList_nextObject(cur)))
199: HyperDoc_delete(pres);
200: HTList_delete(me->hyperdoc);
201: }
1.4 frystyk 202: if (me->htext) {
203: HTList * cur = me->htext;
204: HText * pres;
205: while ((pres = (HText *) HTList_nextObject(cur)))
206: HText_free(pres);
207: HTList_delete(me->htext);
208: }
1.1 frystyk 209: if (me->logfile) HTLog_close();
210: if (me->output && me->output != STDOUT) fclose(me->output);
1.12 frystyk 211: if (me->flags & MR_TIME) {
212: time_t local = time(NULL);
1.13 eric 213: HTTrace("Robot terminated %s\n",HTDateTimeStr(&local,YES));
1.12 frystyk 214: }
1.11 frystyk 215: HT_FREE(me->cwd);
216: HT_FREE(me->tv);
217: HT_FREE(me);
1.1 frystyk 218: return YES;
219: }
220: return NO;
221: }
222:
1.2 frystyk 223: /*
1.34 eric 224: ** This function creates a new finger object and initializes it with a new request
1.2 frystyk 225: */
1.34 eric 226: PRIVATE Finger * Finger_new (Robot * robot, HTParentAnchor * dest, HTMethod method)
1.2 frystyk 227: {
1.34 eric 228: Finger * me;
229: HTRequest * request = HTRequest_new();
230: if ((me = (Finger *) HT_CALLOC(1, sizeof(Finger))) == NULL)
231: HT_OUTOFMEM("Finger_new");
232: me->robot = robot;
233: me->request = request;
234: me->dest = dest;
235: HTList_addObject(robot->fingers, (void *)me);
236:
237: HTRequest_setContext (request, me);
238: if (robot->flags & MR_PREEMPTIVE) HTRequest_setPreemptive(request, YES);
239: HTRequest_addRqHd(request, HT_C_HOST);
240: HTRequest_setMethod(request, method);
241: robot->cnt++;
242: return me;
1.2 frystyk 243: }
244:
1.34 eric 245: PRIVATE int Finger_delete (Finger * me)
1.2 frystyk 246: {
1.34 eric 247: HTList_removeObject(me->robot->fingers, (void *)me);
248: me->robot->cnt--;
249: if (me->request)
250: HTRequest_delete(me->request);
251: HT_FREE(me);
252: return YES;
1.2 frystyk 253: }
254:
255: /*
256: ** Cleanup and make sure we close all connections including the persistent
257: ** ones
258: */
1.1 frystyk 259: PRIVATE void Cleanup (Robot * me, int status)
260: {
261: Robot_delete(me);
1.29 eric 262: HTProfile_delete();
1.1 frystyk 263: #ifdef VMS
264: exit(status ? status : 1);
265: #else
266: exit(status ? status : 0);
267: #endif
268: }
269:
270: #ifdef CATCH_SIG
271: #include <signal.h>
272: /* SetSignal
273: ** This function sets up signal handlers. This might not be necessary to
274: ** call if the application has its own handlers (lossage on SVR4)
275: */
276: PRIVATE void SetSignal (void)
277: {
278: /* On some systems (SYSV) it is necessary to catch the SIGPIPE signal
279: ** when attemting to connect to a remote host where you normally should
280: ** get `connection refused' back
281: */
282: if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
1.13 eric 283: if (PROT_TRACE) HTTrace("HTSignal.... Can't catch SIGPIPE\n");
1.1 frystyk 284: } else {
1.13 eric 285: if (PROT_TRACE) HTTrace("HTSignal.... Ignoring SIGPIPE\n");
1.1 frystyk 286: }
287: }
288: #endif /* CATCH_SIG */
289:
290: PRIVATE void VersionInfo (void)
291: {
1.13 eric 292: OutputData("\n\nW3C Reference Software\n\n");
293: OutputData("\tW3C Mini Robot (%s) version %s.\n",
1.1 frystyk 294: APP_NAME, APP_VERSION);
1.13 eric 295: OutputData("\tW3C Reference Library version %s.\n\n",HTLib_version());
296: OutputData("Please send feedback to <libwww@w3.org>\n");
1.1 frystyk 297: }
298:
299: /* terminate_handler
300: ** -----------------
1.2 frystyk 301: ** This function is registered to handle the result of the request.
302: ** If no more requests are pending then terminate program
1.1 frystyk 303: */
1.32 frystyk 304: PRIVATE int terminate_handler (HTRequest * request, HTResponse * response,
305: void * param, int status)
1.1 frystyk 306: {
1.35 ! eric 307: /* int count = HTNet_count(); */
1.34 eric 308: Finger * finger = (Finger *) HTRequest_context(request);
309: Robot * robot = finger->robot;
310: if (SHOW_MSG) HTTrace("Robot....... done with %s\n", HTAnchor_physical(finger->dest));
311: Finger_delete(finger);
1.35 ! eric 312: switch (robot->cnt) {
1.34 eric 313: case 0:
314: if (SHOW_MSG) HTTrace(" Everything is finished...\n");
315: Cleanup(robot, 0);
316: case 1:
317: HTRequest_forceFlush(request);
318: default:
1.35 ! eric 319: if (SHOW_MSG) HTTrace(" %d outstanding request%s\n", robot->cnt, robot->cnt == 1 ? "" : "s");
1.30 frystyk 320: }
1.1 frystyk 321: return HT_OK;
322: }
323:
324: /* timeout_handler
325: ** ---------------
326: ** This function is registered to handle timeout in select eventloop
1.7 frystyk 327: **
328: ** BUG: This doesn't work as we don't get the right request object
329: ** back from the event loop
1.1 frystyk 330: */
331: PRIVATE int timeout_handler (HTRequest * request)
332: {
1.27 frystyk 333: #if 0
1.34 eric 334: Finger * finger = (Finger *) HTRequest_context(request);
1.27 frystyk 335: #endif
1.25 frystyk 336: if (SHOW_MSG) HTTrace("Robot....... We don't know how to handle timeout...\n");
1.7 frystyk 337: #if 0
1.1 frystyk 338: HTRequest_kill(request);
1.34 eric 339: Finger_delete(finger);
1.7 frystyk 340: #endif
1.4 frystyk 341: return HT_OK;
1.1 frystyk 342: }
343:
344: /* ------------------------------------------------------------------------- */
345: /* HTEXT INTERFACE */
346: /* ------------------------------------------------------------------------- */
347:
348: PUBLIC HText * HText_new2 (HTRequest * request, HTParentAnchor * anchor,
349: HTStream * stream)
350: {
351: HText * me;
1.34 eric 352: Finger * finger = (Finger *) HTRequest_context(request);
353: Robot * mr = finger->robot;
1.14 frystyk 354: if ((me = (HText *) HT_CALLOC(1, sizeof(HText))) == NULL)
355: HT_OUTOFMEM("HText_new2");
1.4 frystyk 356:
357: /* Bind the HText object together with the Request Object */
1.1 frystyk 358: me->request = request;
1.4 frystyk 359:
360: /* Add this HyperDoc object to our list */
361: if (!mr->htext) mr->htext = HTList_new();
362: HTList_addObject(mr->htext, (void *) me);
1.1 frystyk 363: return me;
364: }
365:
1.4 frystyk 366: PUBLIC void HText_free (HText * me) {
1.11 frystyk 367: if (me) HT_FREE (me);
1.4 frystyk 368: }
369:
1.1 frystyk 370: PUBLIC void HText_beginAnchor (HText * text, HTChildAnchor * anchor)
371: {
372: if (text && anchor) {
1.34 eric 373: Finger * finger = (Finger *) HTRequest_context(text->request);
374: Robot * mr = finger->robot;
1.1 frystyk 375: HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor);
376: HTParentAnchor * dest_parent = HTAnchor_parent(dest);
1.7 frystyk 377: char * uri = HTAnchor_address((HTAnchor *) dest_parent);
1.1 frystyk 378: HyperDoc * hd = HTAnchor_document(dest_parent);
379:
1.13 eric 380: if (SHOW_MSG) HTTrace("Robot....... Found `%s\' - ", uri ? uri : "NULL");
1.7 frystyk 381:
1.2 frystyk 382: /* Test whether we already have a hyperdoc for this document */
383: if (mr->flags & MR_LINK && dest_parent && !hd) {
1.1 frystyk 384: HTParentAnchor * parent = HTRequest_parent(text->request);
385: HyperDoc * last = HTAnchor_document(parent);
386: int depth = last ? last->depth+1 : 0;
1.34 eric 387: Finger * newfinger = Finger_new(mr, dest_parent, METHOD_GET);
388: HTRequest * newreq = newfinger->request;
1.2 frystyk 389: HyperDoc_new(mr, dest_parent, depth);
1.7 frystyk 390: HTRequest_setParent(newreq, HTRequest_anchor(text->request));
391: if (depth >= mr->depth) {
392: if (SHOW_MSG)
1.13 eric 393: HTTrace("loading at depth %d using HEAD\n", depth);
1.7 frystyk 394: HTRequest_setMethod(newreq, METHOD_HEAD);
1.30 frystyk 395: HTRequest_setOutputFormat(newreq, WWW_DEBUG);
1.7 frystyk 396: } else {
1.13 eric 397: if (SHOW_MSG) HTTrace("loading at depth %d\n", depth);
1.2 frystyk 398: }
399: if (HTLoadAnchor((HTAnchor *) dest_parent, newreq) != YES) {
1.13 eric 400: if (SHOW_MSG) HTTrace("not tested!\n");
1.34 eric 401: Finger_delete(newfinger);
1.2 frystyk 402: }
1.7 frystyk 403: } else {
1.18 frystyk 404: if (SHOW_MSG) HTTrace("duplicate or max depth reached\n");
1.2 frystyk 405: }
1.11 frystyk 406: HT_FREE(uri);
1.2 frystyk 407: }
408: }
409:
410: PUBLIC void HText_appendImage (HText * text, HTChildAnchor * anchor,
1.14 frystyk 411: const char *alt, const char * align, BOOL isMap)
1.2 frystyk 412: {
413: if (text && anchor) {
1.34 eric 414: Finger * finger = (Finger *) HTRequest_context(text->request);
415: Robot * mr = finger->robot;
1.2 frystyk 416: HTParentAnchor * dest = (HTParentAnchor *)
417: HTAnchor_followMainLink((HTAnchor *) anchor);
418: HyperDoc * hd = HTAnchor_document(dest);
1.1 frystyk 419:
1.2 frystyk 420: /* Test whether we already have a hyperdoc for this document */
421: if (mr->flags & MR_IMG && dest && !hd) {
422: HTParentAnchor * parent = HTRequest_parent(text->request);
423: HyperDoc * last = HTAnchor_document(parent);
424: int depth = last ? last->depth+1 : 0;
1.34 eric 425: Finger * newfinger = Finger_new(mr, dest, METHOD_HEAD);
426: HTRequest * newreq = newfinger->request;
1.2 frystyk 427: HyperDoc_new(mr, dest, depth);
428: if (SHOW_MSG) {
429: char * uri = HTAnchor_address((HTAnchor *) dest);
1.13 eric 430: HTTrace("Robot....... Checking Image `%s\'\n", uri);
1.11 frystyk 431: HT_FREE(uri);
1.2 frystyk 432: }
433: if (HTLoadAnchor((HTAnchor *) dest, newreq) != YES) {
434: if (SHOW_MSG)
1.13 eric 435: HTTrace("Robot....... Image not tested!\n");
1.34 eric 436: Finger_delete(newfinger);
1.1 frystyk 437: }
438: }
439: }
440: }
441:
442: PUBLIC void HText_endAnchor (HText * text) {}
1.14 frystyk 443: PUBLIC void HText_appendText (HText * text, const char * str) {}
1.1 frystyk 444: PUBLIC void HText_appendCharacter (HText * text, char ch) {}
445: PUBLIC void HText_endAppend (HText * text) {}
446: PUBLIC void HText_setStyle (HText * text, HTStyle * style) {}
447: PUBLIC void HText_beginAppend (HText * text) {}
448: PUBLIC void HText_appendParagraph (HText * text) {}
449:
450: /* ------------------------------------------------------------------------- */
451: /* MAIN PROGRAM */
452: /* ------------------------------------------------------------------------- */
453:
454: int main (int argc, char ** argv)
455: {
456: int status = 0;
457: int arg;
458: HTChunk * keywords = NULL; /* From command line */
459: int keycnt = 0;
1.12 frystyk 460: Robot * mr = NULL;
1.34 eric 461: Finger * finger;
462: HTParentAnchor * startAnchor;
1.1 frystyk 463:
464: /* Starts Mac GUSI socket library */
465: #ifdef GUSI
466: GUSISetup(GUSIwithSIOUXSockets);
467: GUSISetup(GUSIwithInternetSockets);
468: #endif
469:
470: #ifdef __MWERKS__ /* STR */
471: InitGraf((Ptr) &qd.thePort);
472: InitFonts();
473: InitWindows();
474: InitMenus(); TEInit();
475: InitDialogs(nil);
476: InitCursor();
477: SIOUXSettings.asktosaveonclose = false;
478: argc=ccommand(&argv);
479: #endif
480:
1.27 frystyk 481: /* Initiate W3C Reference Library with a robot profile */
482: HTProfile_newRobot(APP_NAME, APP_VERSION);
483:
484: /* Add the default HTML parser to the set of converters */
485: {
486: HTList * converters = HTFormat_conversion();
487: HTMLInit(converters);
488: }
1.1 frystyk 489:
1.12 frystyk 490: /* Build a new robot object */
491: mr = Robot_new();
492:
1.1 frystyk 493: /* Scan command Line for parameters */
494: for (arg=1; arg<argc; arg++) {
495: if (*argv[arg] == '-') {
496:
497: /* non-interactive */
1.17 frystyk 498: if (!strcmp(argv[arg], "-n")) {
1.1 frystyk 499: HTAlert_setInteractive(NO);
500:
501: /* log file */
502: } else if (!strcmp(argv[arg], "-l")) {
503: mr->logfile = (arg+1 < argc && *argv[arg+1] != '-') ?
504: argv[++arg] : DEFAULT_LOG_FILE;
505:
506: /* rule file */
507: } else if (!strcmp(argv[arg], "-r")) {
508: mr->rules = (arg+1 < argc && *argv[arg+1] != '-') ?
509: argv[++arg] : DEFAULT_RULE_FILE;
510:
511: /* output filename */
512: } else if (!strcmp(argv[arg], "-o")) {
513: mr->outputfile = (arg+1 < argc && *argv[arg+1] != '-') ?
514: argv[++arg] : DEFAULT_OUTPUT_FILE;
515:
516: /* timeout -- Change the default request timeout */
517: } else if (!strcmp(argv[arg], "-timeout")) {
518: int timeout = (arg+1 < argc && *argv[arg+1] != '-') ?
519: atoi(argv[++arg]) : DEFAULT_TIMEOUT;
520: if (timeout > 0) mr->tv->tv_sec = timeout;
521:
1.7 frystyk 522: /* preemptive or non-preemptive access */
1.1 frystyk 523: } else if (!strcmp(argv[arg], "-single")) {
1.7 frystyk 524: mr->flags |= MR_PREEMPTIVE;
1.2 frystyk 525:
526: /* test inlined images */
527: } else if (!strcmp(argv[arg], "-img")) {
528: mr->flags |= MR_IMG;
529:
530: /* load anchors */
531: } else if (!strcmp(argv[arg], "-link")) {
532: mr->flags |= MR_LINK;
1.7 frystyk 533: mr->depth = (arg+1 < argc && *argv[arg+1] != '-') ?
534: atoi(argv[++arg]) : DEFAULT_DEPTH;
1.2 frystyk 535:
1.12 frystyk 536: /* Output start and end time */
537: } else if (!strcmp(argv[arg], "-ss")) {
538: time_t local = time(NULL);
1.13 eric 539: HTTrace("Robot started on %s\n",
1.12 frystyk 540: HTDateTimeStr(&local, YES));
541: mr->flags |= MR_TIME;
542:
1.1 frystyk 543: /* print version and exit */
544: } else if (!strcmp(argv[arg], "-version")) {
545: VersionInfo();
546: Cleanup(mr, 0);
547:
548: #ifdef WWWTRACE
549: /* trace flags */
550: } else if (!strncmp(argv[arg], "-v", 2)) {
1.24 frystyk 551: HTSetTraceMessageMask(argv[arg]+2);
1.1 frystyk 552: #endif
553:
554: } else {
1.13 eric 555: if (SHOW_MSG) HTTrace("Bad Argument (%s)\n", argv[arg]);
1.1 frystyk 556: }
1.17 frystyk 557: } else { /* If no leading `-' then check for URL or keywords */
1.1 frystyk 558: if (!keycnt) {
559: char * ref = HTParse(argv[arg], mr->cwd, PARSE_ALL);
1.34 eric 560: startAnchor = (HTParentAnchor *) HTAnchor_findAddress(ref);
561: HyperDoc_new(mr, startAnchor, 0);
1.1 frystyk 562: keycnt = 1;
1.11 frystyk 563: HT_FREE(ref);
1.1 frystyk 564: } else { /* Check for successive keyword arguments */
565: char *escaped = HTEscape(argv[arg], URL_XALPHAS);
566: if (keycnt++ <= 1)
1.5 frystyk 567: keywords = HTChunk_new(128);
1.1 frystyk 568: else
1.5 frystyk 569: HTChunk_putc(keywords, ' ');
570: HTChunk_puts(keywords, HTStrip(escaped));
1.11 frystyk 571: HT_FREE(escaped);
1.1 frystyk 572: }
573: }
574: }
575:
576: #ifdef CATCH_SIG
577: SetSignal();
578: #endif
579:
580: if (!keycnt) {
1.13 eric 581: if (SHOW_MSG) HTTrace("Please specify URL to check.\n");
1.1 frystyk 582: Cleanup(mr, -1);
583: }
584:
1.23 manoli 585: /* Testing that HTTrace is working */
586: HTTrace ("Welcome to the W3C mini Robot\n");
587:
1.1 frystyk 588: /* Rule file specified? */
589: if (mr->rules) {
590: char * rules = HTParse(mr->rules, mr->cwd, PARSE_ALL);
1.27 frystyk 591: if (!HTLoadRules(rules))
1.13 eric 592: if (SHOW_MSG) HTTrace("Can't access rules\n");
1.11 frystyk 593: HT_FREE(rules);
1.1 frystyk 594: }
595:
596: /* Output file specified? */
597: if (mr->outputfile) {
598: if ((mr->output = fopen(mr->outputfile, "wb")) == NULL) {
1.13 eric 599: if (SHOW_MSG) HTTrace("Can't open `%s'\n", mr->outputfile);
1.1 frystyk 600: mr->output = OUTPUT;
601: }
602: }
603:
604: /* Log file specifed? */
605: if (mr->logfile) HTLog_open(mr->logfile, YES, YES);
606:
1.27 frystyk 607: /* Register our own someterminater filter */
1.32 frystyk 608: HTNet_addAfter(terminate_handler, NULL, NULL, HT_ALL, HT_FILTER_LAST);
1.34 eric 609: #if 0
1.1 frystyk 610: /* Set timeout on sockets */
1.33 eric 611: HTEventList_registerTimeout(mr->tv, mr->timeout, timeout_handler, NO);
1.34 eric 612: #endif
613: /* Start the request */
614: finger = Finger_new(mr, startAnchor, METHOD_GET);
615: if (mr->flags & MR_PREEMPTIVE)
616: HTRequest_setPreemptive(finger->request, YES);
1.1 frystyk 617:
618: if (keywords) /* Search */
1.34 eric 619: status = HTSearchAnchor(keywords, (HTAnchor *)startAnchor, finger->request);
1.1 frystyk 620: else
1.34 eric 621: status = HTLoadAnchor((HTAnchor *)startAnchor, finger->request);
1.1 frystyk 622:
1.5 frystyk 623: if (keywords) HTChunk_delete(keywords);
1.1 frystyk 624: if (status != YES) {
1.13 eric 625: if (SHOW_MSG) HTTrace("Can't access resource\n");
1.1 frystyk 626: Cleanup(mr, -1);
627: }
628:
629: /* Go into the event loop... */
1.34 eric 630: HTEventList_loop(finger->request);
1.1 frystyk 631:
632: /* Only gets here if event loop fails */
633: Cleanup(mr, 0);
634: return 0;
635: }
Webmaster