Annotation of libwww/Robot/src/HTRobot.c, revision 1.39
1.1 frystyk 1: /* HTRobot.c
2: ** W3C MINI ROBOT
3: **
4: ** (c) COPRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
6: **
7: ** This program illustrates how to travers links using the Anchor object
8: **
9: ** Authors:
10: ** HFN Henrik Frystyk Nielsen, (frystyk@w3.org)
11: **
12: ** History:
13: ** Dec 04 95 First version
14: */
15:
16: #include "WWWLib.h" /* Global Library Include file */
17: #include "WWWApp.h" /* Application stuff */
1.17 frystyk 18: #include "WWWTrans.h"
1.10 frystyk 19: #include "WWWInit.h"
1.9 frystyk 20:
1.4 frystyk 21: #include "HText.h"
1.1 frystyk 22:
1.39 ! eric 23: #include "HTMemLog.h"
1.1 frystyk 24: #include "HTRobot.h" /* Implemented here */
25:
1.14 frystyk 26: #ifndef W3C_VERSION
1.33 eric 27: #define W3C_VERSION "Unspecified"
1.1 frystyk 28: #endif
29:
30: #define APP_NAME "W3CRobot"
1.14 frystyk 31: #define APP_VERSION W3C_VERSION
1.1 frystyk 32:
33: #define DEFAULT_OUTPUT_FILE "robot.out"
34: #define DEFAULT_RULE_FILE "robot.conf"
35: #define DEFAULT_LOG_FILE "robot.log"
1.7 frystyk 36: #define DEFAULT_DEPTH 0
1.1 frystyk 37:
38: #define SHOW_MSG (WWWTRACE || HTAlert_interactive())
39:
1.7 frystyk 40: #define DEFAULT_TIMEOUT 10 /* timeout in seconds */
1.1 frystyk 41:
42: #if defined(__svr4__)
43: #define CATCH_SIG
44: #endif
45:
46: typedef enum _MRFlags {
1.2 frystyk 47: MR_IMG = 0x1,
48: MR_LINK = 0x2,
1.12 frystyk 49: MR_PREEMPTIVE= 0x4,
50: MR_TIME = 0x8
1.1 frystyk 51: } MRFlags;
52:
53: typedef struct _Robot {
1.2 frystyk 54: int depth; /* How deep is our tree */
1.30 frystyk 55: int cnt; /* Count of requests */
1.2 frystyk 56: HTList * hyperdoc; /* List of our HyperDoc Objects */
1.4 frystyk 57: HTList * htext; /* List of our HText Objects */
1.34 eric 58: HTList * fingers;
1.1 frystyk 59: struct timeval * tv; /* Timeout on socket */
60: char * cwd; /* Current dir URL */
61: char * rules;
62: char * logfile;
63: char * outputfile;
64: FILE * output;
65: MRFlags flags;
66: } Robot;
1.34 eric 67:
68: typedef struct _Finger {
69: Robot * robot;
70: HTRequest * request;
71: HTParentAnchor * dest;
72: } Finger;
73:
1.1 frystyk 74: typedef enum _LoadState {
75: L_INVALID = -2,
76: L_LOADING = -1,
77: L_SUCCESS = 0,
78: L_ERROR
79: } LoadState;
80:
81: /*
82: ** The HyperDoc object is bound to the anchor and contains information about
83: ** where we are in the search for recursive searches
84: */
85: typedef struct _HyperDoc {
86: HTParentAnchor * anchor;
87: LoadState state;
88: int depth;
89: } HyperDoc;
90:
91: /*
92: ** This is the HText object that is created every time we start parsing a
93: ** HTML object
94: */
1.4 frystyk 95: struct _HText {
1.1 frystyk 96: HTRequest * request;
1.4 frystyk 97: };
1.1 frystyk 98:
99: PUBLIC HText * HTMainText = NULL;
100: PUBLIC HTParentAnchor * HTMainAnchor = NULL;
101: PUBLIC HTStyleSheet * styleSheet = NULL;
102:
103: /* ------------------------------------------------------------------------- */
104:
1.13 eric 105: /* Standard (non-error) Output
106: ** ---------------------------
107: */
108: PUBLIC int OutputData(const char * fmt, ...)
109: {
110: int ret;
111: va_list pArgs;
112: va_start(pArgs, fmt);
113: ret = vfprintf(stdout, fmt, pArgs);
114: va_end(pArgs);
115: return ret;
116: }
117:
118: /* ------------------------------------------------------------------------- */
119:
1.2 frystyk 120: /* Create a "HyperDoc" object
121: ** --------------------------
122: ** A HyperDoc object contains information about whether we have already
123: ** started checking the anchor and the depth in our search
124: */
125: PRIVATE HyperDoc * HyperDoc_new (Robot * mr,HTParentAnchor * anchor, int depth)
126: {
127: HyperDoc * hd;
1.14 frystyk 128: if ((hd = (HyperDoc *) HT_CALLOC(1, sizeof(HyperDoc))) == NULL)
129: HT_OUTOFMEM("HyperDoc_new");
1.2 frystyk 130: hd->state = L_INVALID;
131: hd->depth = depth;
132:
133: /* Bind the HyperDoc object together with the Anchor Object */
134: hd->anchor = anchor;
135: HTAnchor_setDocument(anchor, (void *) hd);
136:
137: /* Add this HyperDoc object to our list */
138: if (!mr->hyperdoc) mr->hyperdoc = HTList_new();
139: HTList_addObject(mr->hyperdoc, (void *) hd);
140: return hd;
141: }
142:
143: /* Delete a "HyperDoc" object
144: ** --------------------------
145: */
146: PRIVATE BOOL HyperDoc_delete (HyperDoc * hd)
147: {
148: if (hd) {
1.11 frystyk 149: HT_FREE (hd);
1.2 frystyk 150: return YES;
151: }
152: return NO;
153: }
154:
1.1 frystyk 155: /* Create a Command Line Object
156: ** ----------------------------
157: */
158: PRIVATE Robot * Robot_new (void)
159: {
160: Robot * me;
1.14 frystyk 161: if ((me = (Robot *) HT_CALLOC(1, sizeof(Robot))) == NULL ||
162: (me->tv = (struct timeval*) HT_CALLOC(1, sizeof(struct timeval))) == NULL)
163: HT_OUTOFMEM("Robot_new");
1.2 frystyk 164: me->hyperdoc = HTList_new();
1.4 frystyk 165: me->htext = HTList_new();
1.1 frystyk 166: me->tv->tv_sec = DEFAULT_TIMEOUT;
1.25 frystyk 167: me->cwd = HTGetCurrentDirectoryURL();
1.1 frystyk 168: me->output = OUTPUT;
1.35 eric 169: me->cnt = 0;
1.34 eric 170: me->fingers = HTList_new();
1.1 frystyk 171: return me;
172: }
173:
174: /* Delete a Command Line Object
175: ** ----------------------------
176: */
177: PRIVATE BOOL Robot_delete (Robot * me)
178: {
179: if (me) {
1.34 eric 180: HTList_delete(me->fingers);
1.2 frystyk 181: if (me->hyperdoc) {
182: HTList * cur = me->hyperdoc;
183: HyperDoc * pres;
184: while ((pres = (HyperDoc *) HTList_nextObject(cur)))
185: HyperDoc_delete(pres);
186: HTList_delete(me->hyperdoc);
187: }
1.4 frystyk 188: if (me->htext) {
189: HTList * cur = me->htext;
190: HText * pres;
191: while ((pres = (HText *) HTList_nextObject(cur)))
192: HText_free(pres);
193: HTList_delete(me->htext);
194: }
1.1 frystyk 195: if (me->logfile) HTLog_close();
196: if (me->output && me->output != STDOUT) fclose(me->output);
1.12 frystyk 197: if (me->flags & MR_TIME) {
198: time_t local = time(NULL);
1.13 eric 199: HTTrace("Robot terminated %s\n",HTDateTimeStr(&local,YES));
1.12 frystyk 200: }
1.11 frystyk 201: HT_FREE(me->cwd);
202: HT_FREE(me->tv);
203: HT_FREE(me);
1.1 frystyk 204: return YES;
205: }
206: return NO;
207: }
208:
1.2 frystyk 209: /*
1.34 eric 210: ** This function creates a new finger object and initializes it with a new request
1.2 frystyk 211: */
1.34 eric 212: PRIVATE Finger * Finger_new (Robot * robot, HTParentAnchor * dest, HTMethod method)
1.2 frystyk 213: {
1.34 eric 214: Finger * me;
215: HTRequest * request = HTRequest_new();
216: if ((me = (Finger *) HT_CALLOC(1, sizeof(Finger))) == NULL)
217: HT_OUTOFMEM("Finger_new");
218: me->robot = robot;
219: me->request = request;
220: me->dest = dest;
221: HTList_addObject(robot->fingers, (void *)me);
222:
223: HTRequest_setContext (request, me);
224: if (robot->flags & MR_PREEMPTIVE) HTRequest_setPreemptive(request, YES);
225: HTRequest_addRqHd(request, HT_C_HOST);
226: HTRequest_setMethod(request, method);
227: robot->cnt++;
228: return me;
1.2 frystyk 229: }
230:
1.34 eric 231: PRIVATE int Finger_delete (Finger * me)
1.2 frystyk 232: {
1.34 eric 233: HTList_removeObject(me->robot->fingers, (void *)me);
234: me->robot->cnt--;
1.37 frystyk 235:
236: /*
237: ** If we are down at one request then flush the output buffer
238: */
239: if (me->request) {
240: if (me->robot->cnt == 1) HTRequest_forceFlush(me->request);
1.34 eric 241: HTRequest_delete(me->request);
1.37 frystyk 242: }
243:
244: /*
245: ** Delete the request and free myself
246: */
1.34 eric 247: HT_FREE(me);
248: return YES;
1.2 frystyk 249: }
250:
251: /*
252: ** Cleanup and make sure we close all connections including the persistent
253: ** ones
254: */
1.1 frystyk 255: PRIVATE void Cleanup (Robot * me, int status)
256: {
257: Robot_delete(me);
1.29 eric 258: HTProfile_delete();
1.39 ! eric 259: HTMemLog_close();
1.1 frystyk 260: #ifdef VMS
261: exit(status ? status : 1);
262: #else
263: exit(status ? status : 0);
264: #endif
265: }
266:
267: #ifdef CATCH_SIG
268: #include <signal.h>
269: /* SetSignal
270: ** This function sets up signal handlers. This might not be necessary to
271: ** call if the application has its own handlers (lossage on SVR4)
272: */
273: PRIVATE void SetSignal (void)
274: {
275: /* On some systems (SYSV) it is necessary to catch the SIGPIPE signal
276: ** when attemting to connect to a remote host where you normally should
277: ** get `connection refused' back
278: */
279: if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
1.13 eric 280: if (PROT_TRACE) HTTrace("HTSignal.... Can't catch SIGPIPE\n");
1.1 frystyk 281: } else {
1.13 eric 282: if (PROT_TRACE) HTTrace("HTSignal.... Ignoring SIGPIPE\n");
1.1 frystyk 283: }
284: }
285: #endif /* CATCH_SIG */
286:
287: PRIVATE void VersionInfo (void)
288: {
1.13 eric 289: OutputData("\n\nW3C Reference Software\n\n");
290: OutputData("\tW3C Mini Robot (%s) version %s.\n",
1.1 frystyk 291: APP_NAME, APP_VERSION);
1.13 eric 292: OutputData("\tW3C Reference Library version %s.\n\n",HTLib_version());
293: OutputData("Please send feedback to <libwww@w3.org>\n");
1.1 frystyk 294: }
295:
296: /* terminate_handler
297: ** -----------------
1.2 frystyk 298: ** This function is registered to handle the result of the request.
299: ** If no more requests are pending then terminate program
1.1 frystyk 300: */
1.32 frystyk 301: PRIVATE int terminate_handler (HTRequest * request, HTResponse * response,
302: void * param, int status)
1.1 frystyk 303: {
1.34 eric 304: Finger * finger = (Finger *) HTRequest_context(request);
305: Robot * robot = finger->robot;
306: if (SHOW_MSG) HTTrace("Robot....... done with %s\n", HTAnchor_physical(finger->dest));
307: Finger_delete(finger);
1.37 frystyk 308: if (robot->cnt <= 0) {
1.34 eric 309: if (SHOW_MSG) HTTrace(" Everything is finished...\n");
1.37 frystyk 310: Cleanup(robot, 0); /* No way back from here */
1.30 frystyk 311: }
1.37 frystyk 312:
313: if (SHOW_MSG) HTTrace(" %d outstanding request%s\n", robot->cnt, robot->cnt == 1 ? "" : "s");
1.1 frystyk 314: return HT_OK;
315: }
316:
1.37 frystyk 317: #if 0
1.1 frystyk 318: /* timeout_handler
319: ** ---------------
320: ** This function is registered to handle timeout in select eventloop
1.7 frystyk 321: **
322: ** BUG: This doesn't work as we don't get the right request object
323: ** back from the event loop
1.1 frystyk 324: */
325: PRIVATE int timeout_handler (HTRequest * request)
326: {
1.27 frystyk 327: #if 0
1.34 eric 328: Finger * finger = (Finger *) HTRequest_context(request);
1.27 frystyk 329: #endif
1.25 frystyk 330: if (SHOW_MSG) HTTrace("Robot....... We don't know how to handle timeout...\n");
1.7 frystyk 331: #if 0
1.1 frystyk 332: HTRequest_kill(request);
1.34 eric 333: Finger_delete(finger);
1.7 frystyk 334: #endif
1.4 frystyk 335: return HT_OK;
1.1 frystyk 336: }
1.37 frystyk 337: #endif
1.1 frystyk 338:
339: /* ------------------------------------------------------------------------- */
340: /* HTEXT INTERFACE */
341: /* ------------------------------------------------------------------------- */
342:
343: PUBLIC HText * HText_new2 (HTRequest * request, HTParentAnchor * anchor,
344: HTStream * stream)
345: {
346: HText * me;
1.34 eric 347: Finger * finger = (Finger *) HTRequest_context(request);
348: Robot * mr = finger->robot;
1.14 frystyk 349: if ((me = (HText *) HT_CALLOC(1, sizeof(HText))) == NULL)
350: HT_OUTOFMEM("HText_new2");
1.4 frystyk 351:
352: /* Bind the HText object together with the Request Object */
1.1 frystyk 353: me->request = request;
1.4 frystyk 354:
355: /* Add this HyperDoc object to our list */
356: if (!mr->htext) mr->htext = HTList_new();
357: HTList_addObject(mr->htext, (void *) me);
1.1 frystyk 358: return me;
359: }
360:
1.4 frystyk 361: PUBLIC void HText_free (HText * me) {
1.11 frystyk 362: if (me) HT_FREE (me);
1.4 frystyk 363: }
364:
1.1 frystyk 365: PUBLIC void HText_beginAnchor (HText * text, HTChildAnchor * anchor)
366: {
367: if (text && anchor) {
1.34 eric 368: Finger * finger = (Finger *) HTRequest_context(text->request);
369: Robot * mr = finger->robot;
1.1 frystyk 370: HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor);
371: HTParentAnchor * dest_parent = HTAnchor_parent(dest);
1.7 frystyk 372: char * uri = HTAnchor_address((HTAnchor *) dest_parent);
1.1 frystyk 373: HyperDoc * hd = HTAnchor_document(dest_parent);
374:
1.13 eric 375: if (SHOW_MSG) HTTrace("Robot....... Found `%s\' - ", uri ? uri : "NULL");
1.7 frystyk 376:
1.2 frystyk 377: /* Test whether we already have a hyperdoc for this document */
378: if (mr->flags & MR_LINK && dest_parent && !hd) {
1.1 frystyk 379: HTParentAnchor * parent = HTRequest_parent(text->request);
380: HyperDoc * last = HTAnchor_document(parent);
381: int depth = last ? last->depth+1 : 0;
1.34 eric 382: Finger * newfinger = Finger_new(mr, dest_parent, METHOD_GET);
383: HTRequest * newreq = newfinger->request;
1.2 frystyk 384: HyperDoc_new(mr, dest_parent, depth);
1.7 frystyk 385: HTRequest_setParent(newreq, HTRequest_anchor(text->request));
386: if (depth >= mr->depth) {
387: if (SHOW_MSG)
1.13 eric 388: HTTrace("loading at depth %d using HEAD\n", depth);
1.7 frystyk 389: HTRequest_setMethod(newreq, METHOD_HEAD);
1.30 frystyk 390: HTRequest_setOutputFormat(newreq, WWW_DEBUG);
1.7 frystyk 391: } else {
1.13 eric 392: if (SHOW_MSG) HTTrace("loading at depth %d\n", depth);
1.2 frystyk 393: }
394: if (HTLoadAnchor((HTAnchor *) dest_parent, newreq) != YES) {
1.13 eric 395: if (SHOW_MSG) HTTrace("not tested!\n");
1.34 eric 396: Finger_delete(newfinger);
1.2 frystyk 397: }
1.7 frystyk 398: } else {
1.18 frystyk 399: if (SHOW_MSG) HTTrace("duplicate or max depth reached\n");
1.2 frystyk 400: }
1.11 frystyk 401: HT_FREE(uri);
1.2 frystyk 402: }
403: }
404:
405: PUBLIC void HText_appendImage (HText * text, HTChildAnchor * anchor,
1.14 frystyk 406: const char *alt, const char * align, BOOL isMap)
1.2 frystyk 407: {
408: if (text && anchor) {
1.34 eric 409: Finger * finger = (Finger *) HTRequest_context(text->request);
410: Robot * mr = finger->robot;
1.2 frystyk 411: HTParentAnchor * dest = (HTParentAnchor *)
412: HTAnchor_followMainLink((HTAnchor *) anchor);
413: HyperDoc * hd = HTAnchor_document(dest);
1.1 frystyk 414:
1.2 frystyk 415: /* Test whether we already have a hyperdoc for this document */
416: if (mr->flags & MR_IMG && dest && !hd) {
417: HTParentAnchor * parent = HTRequest_parent(text->request);
418: HyperDoc * last = HTAnchor_document(parent);
419: int depth = last ? last->depth+1 : 0;
1.34 eric 420: Finger * newfinger = Finger_new(mr, dest, METHOD_HEAD);
421: HTRequest * newreq = newfinger->request;
1.2 frystyk 422: HyperDoc_new(mr, dest, depth);
423: if (SHOW_MSG) {
424: char * uri = HTAnchor_address((HTAnchor *) dest);
1.13 eric 425: HTTrace("Robot....... Checking Image `%s\'\n", uri);
1.11 frystyk 426: HT_FREE(uri);
1.2 frystyk 427: }
428: if (HTLoadAnchor((HTAnchor *) dest, newreq) != YES) {
429: if (SHOW_MSG)
1.13 eric 430: HTTrace("Robot....... Image not tested!\n");
1.34 eric 431: Finger_delete(newfinger);
1.1 frystyk 432: }
433: }
434: }
435: }
436:
437: PUBLIC void HText_endAnchor (HText * text) {}
1.14 frystyk 438: PUBLIC void HText_appendText (HText * text, const char * str) {}
1.1 frystyk 439: PUBLIC void HText_appendCharacter (HText * text, char ch) {}
440: PUBLIC void HText_endAppend (HText * text) {}
441: PUBLIC void HText_setStyle (HText * text, HTStyle * style) {}
442: PUBLIC void HText_beginAppend (HText * text) {}
443: PUBLIC void HText_appendParagraph (HText * text) {}
444:
445: /* ------------------------------------------------------------------------- */
446: /* MAIN PROGRAM */
447: /* ------------------------------------------------------------------------- */
448:
449: int main (int argc, char ** argv)
450: {
451: int status = 0;
452: int arg;
453: HTChunk * keywords = NULL; /* From command line */
454: int keycnt = 0;
1.12 frystyk 455: Robot * mr = NULL;
1.34 eric 456: Finger * finger;
457: HTParentAnchor * startAnchor;
1.1 frystyk 458:
459: /* Starts Mac GUSI socket library */
460: #ifdef GUSI
461: GUSISetup(GUSIwithSIOUXSockets);
462: GUSISetup(GUSIwithInternetSockets);
463: #endif
464:
465: #ifdef __MWERKS__ /* STR */
466: InitGraf((Ptr) &qd.thePort);
467: InitFonts();
468: InitWindows();
469: InitMenus(); TEInit();
470: InitDialogs(nil);
471: InitCursor();
472: SIOUXSettings.asktosaveonclose = false;
473: argc=ccommand(&argv);
474: #endif
475:
1.39 ! eric 476: HTMemLog_open("/usr/local/src/WWW/the-dart/Robot/src/HTRobot.log", 65536);
! 477: HTTraceData_setCallback(HTMemLog_callback);
1.27 frystyk 478: /* Initiate W3C Reference Library with a robot profile */
479: HTProfile_newRobot(APP_NAME, APP_VERSION);
480:
481: /* Add the default HTML parser to the set of converters */
482: {
483: HTList * converters = HTFormat_conversion();
484: HTMLInit(converters);
485: }
1.1 frystyk 486:
1.12 frystyk 487: /* Build a new robot object */
488: mr = Robot_new();
489:
1.1 frystyk 490: /* Scan command Line for parameters */
491: for (arg=1; arg<argc; arg++) {
492: if (*argv[arg] == '-') {
493:
494: /* non-interactive */
1.17 frystyk 495: if (!strcmp(argv[arg], "-n")) {
1.1 frystyk 496: HTAlert_setInteractive(NO);
497:
498: /* log file */
499: } else if (!strcmp(argv[arg], "-l")) {
500: mr->logfile = (arg+1 < argc && *argv[arg+1] != '-') ?
501: argv[++arg] : DEFAULT_LOG_FILE;
502:
503: /* rule file */
504: } else if (!strcmp(argv[arg], "-r")) {
505: mr->rules = (arg+1 < argc && *argv[arg+1] != '-') ?
506: argv[++arg] : DEFAULT_RULE_FILE;
507:
508: /* output filename */
509: } else if (!strcmp(argv[arg], "-o")) {
510: mr->outputfile = (arg+1 < argc && *argv[arg+1] != '-') ?
511: argv[++arg] : DEFAULT_OUTPUT_FILE;
512:
513: /* timeout -- Change the default request timeout */
514: } else if (!strcmp(argv[arg], "-timeout")) {
515: int timeout = (arg+1 < argc && *argv[arg+1] != '-') ?
516: atoi(argv[++arg]) : DEFAULT_TIMEOUT;
517: if (timeout > 0) mr->tv->tv_sec = timeout;
518:
1.7 frystyk 519: /* preemptive or non-preemptive access */
1.1 frystyk 520: } else if (!strcmp(argv[arg], "-single")) {
1.7 frystyk 521: mr->flags |= MR_PREEMPTIVE;
1.2 frystyk 522:
523: /* test inlined images */
524: } else if (!strcmp(argv[arg], "-img")) {
525: mr->flags |= MR_IMG;
526:
527: /* load anchors */
528: } else if (!strcmp(argv[arg], "-link")) {
529: mr->flags |= MR_LINK;
1.7 frystyk 530: mr->depth = (arg+1 < argc && *argv[arg+1] != '-') ?
531: atoi(argv[++arg]) : DEFAULT_DEPTH;
1.2 frystyk 532:
1.12 frystyk 533: /* Output start and end time */
534: } else if (!strcmp(argv[arg], "-ss")) {
535: time_t local = time(NULL);
1.13 eric 536: HTTrace("Robot started on %s\n",
1.12 frystyk 537: HTDateTimeStr(&local, YES));
538: mr->flags |= MR_TIME;
539:
1.1 frystyk 540: /* print version and exit */
541: } else if (!strcmp(argv[arg], "-version")) {
542: VersionInfo();
543: Cleanup(mr, 0);
544:
545: #ifdef WWWTRACE
546: /* trace flags */
547: } else if (!strncmp(argv[arg], "-v", 2)) {
1.24 frystyk 548: HTSetTraceMessageMask(argv[arg]+2);
1.1 frystyk 549: #endif
550:
551: } else {
1.13 eric 552: if (SHOW_MSG) HTTrace("Bad Argument (%s)\n", argv[arg]);
1.1 frystyk 553: }
1.17 frystyk 554: } else { /* If no leading `-' then check for URL or keywords */
1.1 frystyk 555: if (!keycnt) {
556: char * ref = HTParse(argv[arg], mr->cwd, PARSE_ALL);
1.34 eric 557: startAnchor = (HTParentAnchor *) HTAnchor_findAddress(ref);
558: HyperDoc_new(mr, startAnchor, 0);
1.1 frystyk 559: keycnt = 1;
1.11 frystyk 560: HT_FREE(ref);
1.1 frystyk 561: } else { /* Check for successive keyword arguments */
562: char *escaped = HTEscape(argv[arg], URL_XALPHAS);
563: if (keycnt++ <= 1)
1.5 frystyk 564: keywords = HTChunk_new(128);
1.1 frystyk 565: else
1.5 frystyk 566: HTChunk_putc(keywords, ' ');
567: HTChunk_puts(keywords, HTStrip(escaped));
1.11 frystyk 568: HT_FREE(escaped);
1.1 frystyk 569: }
570: }
571: }
572:
573: #ifdef CATCH_SIG
574: SetSignal();
575: #endif
576:
577: if (!keycnt) {
1.13 eric 578: if (SHOW_MSG) HTTrace("Please specify URL to check.\n");
1.1 frystyk 579: Cleanup(mr, -1);
580: }
581:
1.23 manoli 582: /* Testing that HTTrace is working */
583: HTTrace ("Welcome to the W3C mini Robot\n");
584:
1.1 frystyk 585: /* Rule file specified? */
586: if (mr->rules) {
587: char * rules = HTParse(mr->rules, mr->cwd, PARSE_ALL);
1.27 frystyk 588: if (!HTLoadRules(rules))
1.13 eric 589: if (SHOW_MSG) HTTrace("Can't access rules\n");
1.11 frystyk 590: HT_FREE(rules);
1.1 frystyk 591: }
592:
593: /* Output file specified? */
594: if (mr->outputfile) {
595: if ((mr->output = fopen(mr->outputfile, "wb")) == NULL) {
1.13 eric 596: if (SHOW_MSG) HTTrace("Can't open `%s'\n", mr->outputfile);
1.1 frystyk 597: mr->output = OUTPUT;
598: }
599: }
600:
601: /* Log file specifed? */
602: if (mr->logfile) HTLog_open(mr->logfile, YES, YES);
603:
1.27 frystyk 604: /* Register our own someterminater filter */
1.32 frystyk 605: HTNet_addAfter(terminate_handler, NULL, NULL, HT_ALL, HT_FILTER_LAST);
1.37 frystyk 606:
1.34 eric 607: /* Start the request */
608: finger = Finger_new(mr, startAnchor, METHOD_GET);
609: if (mr->flags & MR_PREEMPTIVE)
610: HTRequest_setPreemptive(finger->request, YES);
1.1 frystyk 611:
612: if (keywords) /* Search */
1.34 eric 613: status = HTSearchAnchor(keywords, (HTAnchor *)startAnchor, finger->request);
1.1 frystyk 614: else
1.34 eric 615: status = HTLoadAnchor((HTAnchor *)startAnchor, finger->request);
1.1 frystyk 616:
1.5 frystyk 617: if (keywords) HTChunk_delete(keywords);
1.1 frystyk 618: if (status != YES) {
1.13 eric 619: if (SHOW_MSG) HTTrace("Can't access resource\n");
1.1 frystyk 620: Cleanup(mr, -1);
621: }
622:
623: /* Go into the event loop... */
1.34 eric 624: HTEventList_loop(finger->request);
1.1 frystyk 625:
626: /* Only gets here if event loop fails */
627: Cleanup(mr, 0);
628: return 0;
629: }
Webmaster