Annotation of libwww/Robot/src/HTRobot.c, revision 1.23
1.1 frystyk 1: /* HTRobot.c
2: ** W3C MINI ROBOT
3: **
4: ** (c) COPRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
6: **
7: ** This program illustrates how to travers links using the Anchor object
8: **
9: ** Authors:
10: ** HFN Henrik Frystyk Nielsen, (frystyk@w3.org)
11: **
12: ** History:
13: ** Dec 04 95 First version
14: */
15:
16: #include "WWWLib.h" /* Global Library Include file */
17: #include "WWWApp.h" /* Application stuff */
1.9 frystyk 18: #include "WWWRules.h"
19: #include "WWWApp.h"
1.17 frystyk 20: #include "WWWTrans.h"
1.10 frystyk 21: #include "WWWInit.h"
1.9 frystyk 22:
1.4 frystyk 23: #include "HText.h"
1.1 frystyk 24:
25: #include "HTRobot.h" /* Implemented here */
26:
1.14 frystyk 27: #ifndef W3C_VERSION
28: #define W3C_VERSION "unspecified"
1.1 frystyk 29: #endif
30:
31: #define APP_NAME "W3CRobot"
1.14 frystyk 32: #define APP_VERSION W3C_VERSION
1.1 frystyk 33:
34: #define DEFAULT_OUTPUT_FILE "robot.out"
35: #define DEFAULT_RULE_FILE "robot.conf"
36: #define DEFAULT_LOG_FILE "robot.log"
1.7 frystyk 37: #define DEFAULT_DEPTH 0
1.1 frystyk 38:
39: #define SHOW_MSG (WWWTRACE || HTAlert_interactive())
40:
1.7 frystyk 41: #define DEFAULT_TIMEOUT 10 /* timeout in seconds */
1.1 frystyk 42:
43: #if defined(__svr4__)
44: #define CATCH_SIG
45: #endif
46:
47: typedef enum _MRFlags {
1.2 frystyk 48: MR_IMG = 0x1,
49: MR_LINK = 0x2,
1.12 frystyk 50: MR_PREEMPTIVE= 0x4,
51: MR_TIME = 0x8
1.1 frystyk 52: } MRFlags;
53:
54: typedef struct _Robot {
55: HTRequest * request;
1.7 frystyk 56: HTRequest * timeout; /* Until we get a server eventloop */
1.1 frystyk 57: HTParentAnchor * anchor;
1.2 frystyk 58: int depth; /* How deep is our tree */
59: HTList * hyperdoc; /* List of our HyperDoc Objects */
1.4 frystyk 60: HTList * htext; /* List of our HText Objects */
1.1 frystyk 61: struct timeval * tv; /* Timeout on socket */
62: char * cwd; /* Current dir URL */
63: HTList * converters;
64: char * rules;
65: char * logfile;
66: char * outputfile;
67: FILE * output;
68: MRFlags flags;
69: } Robot;
70:
71: typedef enum _LoadState {
72: L_INVALID = -2,
73: L_LOADING = -1,
74: L_SUCCESS = 0,
75: L_ERROR
76: } LoadState;
77:
78: /*
79: ** The HyperDoc object is bound to the anchor and contains information about
80: ** where we are in the search for recursive searches
81: */
82: typedef struct _HyperDoc {
83: HTParentAnchor * anchor;
84: LoadState state;
85: int depth;
86: } HyperDoc;
87:
88: /*
89: ** This is the HText object that is created every time we start parsing a
90: ** HTML object
91: */
1.4 frystyk 92: struct _HText {
1.1 frystyk 93: HTRequest * request;
1.4 frystyk 94: };
1.1 frystyk 95:
96: PUBLIC HText * HTMainText = NULL;
97: PUBLIC HTParentAnchor * HTMainAnchor = NULL;
98: PUBLIC HTStyleSheet * styleSheet = NULL;
99:
100: /* ------------------------------------------------------------------------- */
101:
1.13 eric 102: /* Standard (non-error) Output
103: ** ---------------------------
104: */
105: PUBLIC int OutputData(const char * fmt, ...)
106: {
107: int ret;
108: va_list pArgs;
109: va_start(pArgs, fmt);
110: ret = vfprintf(stdout, fmt, pArgs);
111: va_end(pArgs);
112: return ret;
113: }
114:
115: /* ------------------------------------------------------------------------- */
116:
1.2 frystyk 117: /* Create a "HyperDoc" object
118: ** --------------------------
119: ** A HyperDoc object contains information about whether we have already
120: ** started checking the anchor and the depth in our search
121: */
122: PRIVATE HyperDoc * HyperDoc_new (Robot * mr,HTParentAnchor * anchor, int depth)
123: {
124: HyperDoc * hd;
1.14 frystyk 125: if ((hd = (HyperDoc *) HT_CALLOC(1, sizeof(HyperDoc))) == NULL)
126: HT_OUTOFMEM("HyperDoc_new");
1.2 frystyk 127: hd->state = L_INVALID;
128: hd->depth = depth;
129:
130: /* Bind the HyperDoc object together with the Anchor Object */
131: hd->anchor = anchor;
132: HTAnchor_setDocument(anchor, (void *) hd);
133:
134: /* Add this HyperDoc object to our list */
135: if (!mr->hyperdoc) mr->hyperdoc = HTList_new();
136: HTList_addObject(mr->hyperdoc, (void *) hd);
137: return hd;
138: }
139:
140: /* Delete a "HyperDoc" object
141: ** --------------------------
142: */
143: PRIVATE BOOL HyperDoc_delete (HyperDoc * hd)
144: {
145: if (hd) {
1.11 frystyk 146: HT_FREE (hd);
1.2 frystyk 147: return YES;
148: }
149: return NO;
150: }
151:
1.1 frystyk 152: /* Create a Command Line Object
153: ** ----------------------------
154: */
155: PRIVATE Robot * Robot_new (void)
156: {
157: Robot * me;
1.14 frystyk 158: if ((me = (Robot *) HT_CALLOC(1, sizeof(Robot))) == NULL ||
159: (me->tv = (struct timeval*) HT_CALLOC(1, sizeof(struct timeval))) == NULL)
160: HT_OUTOFMEM("Robot_new");
1.2 frystyk 161: me->hyperdoc = HTList_new();
1.4 frystyk 162: me->htext = HTList_new();
1.1 frystyk 163: me->tv->tv_sec = DEFAULT_TIMEOUT;
164: me->cwd = HTFindRelatedName();
165: me->output = OUTPUT;
166:
1.7 frystyk 167: /* We keep an extra timeout request object for the timeout_handler */
168: me->timeout = HTRequest_new();
169: HTRequest_setContext (me->timeout, me);
170:
1.1 frystyk 171: /* Bind the Robot object together with the Request Object */
172: me->request = HTRequest_new();
173: HTRequest_setContext (me->request, me);
174: return me;
175: }
176:
177: /* Delete a Command Line Object
178: ** ----------------------------
179: */
180: PRIVATE BOOL Robot_delete (Robot * me)
181: {
182: if (me) {
1.2 frystyk 183: if (me->hyperdoc) {
184: HTList * cur = me->hyperdoc;
185: HyperDoc * pres;
186: while ((pres = (HyperDoc *) HTList_nextObject(cur)))
187: HyperDoc_delete(pres);
188: HTList_delete(me->hyperdoc);
189: }
1.4 frystyk 190: if (me->htext) {
191: HTList * cur = me->htext;
192: HText * pres;
193: while ((pres = (HText *) HTList_nextObject(cur)))
194: HText_free(pres);
195: HTList_delete(me->htext);
196: }
1.1 frystyk 197: if (me->logfile) HTLog_close();
198: if (me->output && me->output != STDOUT) fclose(me->output);
1.12 frystyk 199: if (me->flags & MR_TIME) {
200: time_t local = time(NULL);
1.13 eric 201: HTTrace("Robot terminated %s\n",HTDateTimeStr(&local,YES));
1.12 frystyk 202: }
1.11 frystyk 203: HT_FREE(me->cwd);
204: HT_FREE(me->tv);
205: HT_FREE(me);
1.1 frystyk 206: return YES;
207: }
208: return NO;
209: }
210:
1.2 frystyk 211: /*
212: ** This function creates a new request object and initializes it
213: */
214: PRIVATE HTRequest * Thread_new (Robot * mr, HTMethod method)
215: {
216: HTRequest * newreq = HTRequest_new();
217: HTRequest_setContext (newreq, mr);
1.7 frystyk 218: if (mr->flags & MR_PREEMPTIVE) HTRequest_setPreemptive(newreq, YES);
1.5 frystyk 219: HTRequest_addRqHd(newreq, HT_C_HOST);
1.2 frystyk 220: HTRequest_setMethod(newreq, method);
221: return newreq;
222: }
223:
224: PRIVATE BOOL Thread_delete (Robot * mr, HTRequest * request)
225: {
226: if (mr && request) {
227: HTRequest_delete(request);
228: return YES;
229: }
230: return NO;
231: }
232:
233: /*
234: ** Cleanup and make sure we close all connections including the persistent
235: ** ones
236: */
1.1 frystyk 237: PRIVATE void Cleanup (Robot * me, int status)
238: {
1.2 frystyk 239: HTNet_killAll();
1.1 frystyk 240: Robot_delete(me);
241: HTLibTerminate();
242: #ifdef VMS
243: exit(status ? status : 1);
244: #else
245: exit(status ? status : 0);
246: #endif
247: }
248:
249: #ifdef CATCH_SIG
250: #include <signal.h>
251: /* SetSignal
252: ** This function sets up signal handlers. This might not be necessary to
253: ** call if the application has its own handlers (lossage on SVR4)
254: */
255: PRIVATE void SetSignal (void)
256: {
257: /* On some systems (SYSV) it is necessary to catch the SIGPIPE signal
258: ** when attemting to connect to a remote host where you normally should
259: ** get `connection refused' back
260: */
261: if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
1.13 eric 262: if (PROT_TRACE) HTTrace("HTSignal.... Can't catch SIGPIPE\n");
1.1 frystyk 263: } else {
1.13 eric 264: if (PROT_TRACE) HTTrace("HTSignal.... Ignoring SIGPIPE\n");
1.1 frystyk 265: }
266: }
267: #endif /* CATCH_SIG */
268:
269: PRIVATE void VersionInfo (void)
270: {
1.13 eric 271: OutputData("\n\nW3C Reference Software\n\n");
272: OutputData("\tW3C Mini Robot (%s) version %s.\n",
1.1 frystyk 273: APP_NAME, APP_VERSION);
1.13 eric 274: OutputData("\tW3C Reference Library version %s.\n\n",HTLib_version());
275: OutputData("Please send feedback to <libwww@w3.org>\n");
1.1 frystyk 276: }
277:
278: /* terminate_handler
279: ** -----------------
1.2 frystyk 280: ** This function is registered to handle the result of the request.
281: ** If no more requests are pending then terminate program
1.1 frystyk 282: */
1.15 frystyk 283: PRIVATE int terminate_handler (HTRequest * request, void * param, int status)
1.1 frystyk 284: {
285: Robot * mr = (Robot *) HTRequest_context(request);
286: if (mr->logfile) HTLog_add(request, status);
1.2 frystyk 287: Thread_delete(mr, request);
1.3 frystyk 288: if (HTNet_isEmpty()) Cleanup(mr, 0);
1.1 frystyk 289: return HT_OK;
290: }
291:
292: /* timeout_handler
293: ** ---------------
294: ** This function is registered to handle timeout in select eventloop
1.7 frystyk 295: **
296: ** BUG: This doesn't work as we don't get the right request object
297: ** back from the event loop
1.1 frystyk 298: */
299: PRIVATE int timeout_handler (HTRequest * request)
300: {
1.2 frystyk 301: Robot * mr = (Robot *) HTRequest_context(request);
1.13 eric 302: if (SHOW_MSG) HTTrace("Robot....... Request timeout...\n");
1.7 frystyk 303: #if 0
1.1 frystyk 304: HTRequest_kill(request);
1.2 frystyk 305: Thread_delete(mr, request);
1.7 frystyk 306: #endif
307: Cleanup(mr, -1);
1.4 frystyk 308: return HT_OK;
1.1 frystyk 309: }
310:
1.8 frystyk 311: /* proxy_handler
312: ** ---------------
313: ** This function is registered to be called before a request is issued
314: ** We look for redirection for proxies and gateways
315: ** returns HT_LOADED We already have this
316: ** HT_ERROR We can't load this
317: ** HT_OK Success
318: */
1.15 frystyk 319: PRIVATE int proxy_handler (HTRequest * request, void * param, int status)
1.8 frystyk 320: {
321: HTParentAnchor *anchor = HTRequest_anchor(request);
322: char * addr = HTAnchor_address((HTAnchor *) anchor);
323: char * newaddr = NULL;
324: if ((newaddr = HTProxy_find(addr))) {
325: StrAllocCat(newaddr, addr);
1.19 frystyk 326: HTRequest_setFullURI(request, YES);
1.8 frystyk 327: HTAnchor_setPhysical(anchor, newaddr);
328: } else if ((newaddr = HTGateway_find(addr))) {
329: char * path = HTParse(addr,"",PARSE_HOST+PARSE_PATH+PARSE_PUNCTUATION);
330: /* Chop leading / off to make host into part of path */
331: char * gatewayed = HTParse(path+1, newaddr, PARSE_ALL);
1.19 frystyk 332: HTRequest_setFullURI(request, NO);
1.8 frystyk 333: HTAnchor_setPhysical(anchor, gatewayed);
1.11 frystyk 334: HT_FREE(path);
335: HT_FREE(gatewayed);
1.8 frystyk 336: } else
1.19 frystyk 337: HTRequest_setFullURI(request, NO);
1.11 frystyk 338: HT_FREE(newaddr);
339: HT_FREE(addr);
1.8 frystyk 340: return HT_OK;
341: }
342:
1.1 frystyk 343: /* ------------------------------------------------------------------------- */
344: /* HTEXT INTERFACE */
345: /* ------------------------------------------------------------------------- */
346:
347: PUBLIC HText * HText_new2 (HTRequest * request, HTParentAnchor * anchor,
348: HTStream * stream)
349: {
350: HText * me;
1.4 frystyk 351: Robot * mr = (Robot *) HTRequest_context(request);
1.14 frystyk 352: if ((me = (HText *) HT_CALLOC(1, sizeof(HText))) == NULL)
353: HT_OUTOFMEM("HText_new2");
1.4 frystyk 354:
355: /* Bind the HText object together with the Request Object */
1.1 frystyk 356: me->request = request;
1.4 frystyk 357:
358: /* Add this HyperDoc object to our list */
359: if (!mr->htext) mr->htext = HTList_new();
360: HTList_addObject(mr->htext, (void *) me);
1.1 frystyk 361: return me;
362: }
363:
1.4 frystyk 364: PUBLIC void HText_free (HText * me) {
1.11 frystyk 365: if (me) HT_FREE (me);
1.4 frystyk 366: }
367:
1.1 frystyk 368: PUBLIC void HText_beginAnchor (HText * text, HTChildAnchor * anchor)
369: {
370: if (text && anchor) {
1.2 frystyk 371: Robot * mr = (Robot *) HTRequest_context(text->request);
1.1 frystyk 372: HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor);
373: HTParentAnchor * dest_parent = HTAnchor_parent(dest);
1.7 frystyk 374: char * uri = HTAnchor_address((HTAnchor *) dest_parent);
1.1 frystyk 375: HyperDoc * hd = HTAnchor_document(dest_parent);
376:
1.13 eric 377: if (SHOW_MSG) HTTrace("Robot....... Found `%s\' - ", uri ? uri : "NULL");
1.7 frystyk 378:
1.2 frystyk 379: /* Test whether we already have a hyperdoc for this document */
380: if (mr->flags & MR_LINK && dest_parent && !hd) {
1.1 frystyk 381: HTParentAnchor * parent = HTRequest_parent(text->request);
382: HyperDoc * last = HTAnchor_document(parent);
383: int depth = last ? last->depth+1 : 0;
1.2 frystyk 384: HTRequest * newreq = Thread_new(mr, METHOD_GET);
385: HyperDoc_new(mr, dest_parent, depth);
1.7 frystyk 386: HTRequest_setParent(newreq, HTRequest_anchor(text->request));
387: if (depth >= mr->depth) {
388: if (SHOW_MSG)
1.13 eric 389: HTTrace("loading at depth %d using HEAD\n", depth);
1.7 frystyk 390: HTRequest_setMethod(newreq, METHOD_HEAD);
391: HTRequest_setOutputFormat(newreq, WWW_MIME);
392: } else {
1.13 eric 393: if (SHOW_MSG) HTTrace("loading at depth %d\n", depth);
1.2 frystyk 394: }
395: if (HTLoadAnchor((HTAnchor *) dest_parent, newreq) != YES) {
1.13 eric 396: if (SHOW_MSG) HTTrace("not tested!\n");
1.2 frystyk 397: Thread_delete(mr, newreq);
398: }
1.7 frystyk 399: } else {
1.18 frystyk 400: if (SHOW_MSG) HTTrace("duplicate or max depth reached\n");
1.2 frystyk 401: }
1.11 frystyk 402: HT_FREE(uri);
1.2 frystyk 403: }
404: }
405:
406: PUBLIC void HText_appendImage (HText * text, HTChildAnchor * anchor,
1.14 frystyk 407: const char *alt, const char * align, BOOL isMap)
1.2 frystyk 408: {
409: if (text && anchor) {
410: Robot * mr = (Robot *) HTRequest_context(text->request);
411: HTParentAnchor * dest = (HTParentAnchor *)
412: HTAnchor_followMainLink((HTAnchor *) anchor);
413: HyperDoc * hd = HTAnchor_document(dest);
1.1 frystyk 414:
1.2 frystyk 415: /* Test whether we already have a hyperdoc for this document */
416: if (mr->flags & MR_IMG && dest && !hd) {
417: HTParentAnchor * parent = HTRequest_parent(text->request);
418: HyperDoc * last = HTAnchor_document(parent);
419: int depth = last ? last->depth+1 : 0;
420: HTRequest * newreq = Thread_new(mr, METHOD_HEAD);
421: HyperDoc_new(mr, dest, depth);
422: if (SHOW_MSG) {
423: char * uri = HTAnchor_address((HTAnchor *) dest);
1.13 eric 424: HTTrace("Robot....... Checking Image `%s\'\n", uri);
1.11 frystyk 425: HT_FREE(uri);
1.2 frystyk 426: }
427: if (HTLoadAnchor((HTAnchor *) dest, newreq) != YES) {
428: if (SHOW_MSG)
1.13 eric 429: HTTrace("Robot....... Image not tested!\n");
1.2 frystyk 430: Thread_delete(mr, newreq);
1.1 frystyk 431: }
432: }
433: }
434: }
435:
436: PUBLIC void HText_endAnchor (HText * text) {}
1.14 frystyk 437: PUBLIC void HText_appendText (HText * text, const char * str) {}
1.1 frystyk 438: PUBLIC void HText_appendCharacter (HText * text, char ch) {}
439: PUBLIC void HText_endAppend (HText * text) {}
440: PUBLIC void HText_setStyle (HText * text, HTStyle * style) {}
441: PUBLIC void HText_beginAppend (HText * text) {}
442: PUBLIC void HText_appendParagraph (HText * text) {}
443:
444: /* ------------------------------------------------------------------------- */
445: /* MAIN PROGRAM */
446: /* ------------------------------------------------------------------------- */
447:
448: int main (int argc, char ** argv)
449: {
450: int status = 0;
451: int arg;
452: HTChunk * keywords = NULL; /* From command line */
453: int keycnt = 0;
1.12 frystyk 454: Robot * mr = NULL;
1.1 frystyk 455:
456: /* Starts Mac GUSI socket library */
457: #ifdef GUSI
458: GUSISetup(GUSIwithSIOUXSockets);
459: GUSISetup(GUSIwithInternetSockets);
460: #endif
461:
462: #ifdef __MWERKS__ /* STR */
463: InitGraf((Ptr) &qd.thePort);
464: InitFonts();
465: InitWindows();
466: InitMenus(); TEInit();
467: InitDialogs(nil);
468: InitCursor();
469: SIOUXSettings.asktosaveonclose = false;
470: argc=ccommand(&argv);
471: #endif
472:
473: /* Initiate W3C Reference Library */
474: HTLibInit(APP_NAME, APP_VERSION);
1.20 eric 475: HTMIMEInit();
1.1 frystyk 476:
1.12 frystyk 477: /* Build a new robot object */
478: mr = Robot_new();
479:
1.16 frystyk 480: /* Set up our event manager */
1.22 eric 481: HTEventrgInit();
1.16 frystyk 482:
1.17 frystyk 483: /* Register a transport */
484: HTTransportInit();
485:
1.1 frystyk 486: /* Initialize the protocol modules */
487: HTAccessInit();
488:
489: /* Initialize set of converters */
490: mr->converters = HTList_new();
491: HTConverterInit(mr->converters);
1.21 frystyk 492: HTMLInit(mr->converters);
1.1 frystyk 493: HTFormat_setConversion(mr->converters);
494:
495: /* Initialize bindings between file suffixes and media types */
496: HTFileInit();
497:
498: /* Get any proxy or gateway environment variables */
499: HTProxy_getEnvVar();
500:
501: /* Scan command Line for parameters */
502: for (arg=1; arg<argc; arg++) {
503: if (*argv[arg] == '-') {
504:
505: /* non-interactive */
1.17 frystyk 506: if (!strcmp(argv[arg], "-n")) {
1.1 frystyk 507: HTAlert_setInteractive(NO);
508:
509: /* log file */
510: } else if (!strcmp(argv[arg], "-l")) {
511: mr->logfile = (arg+1 < argc && *argv[arg+1] != '-') ?
512: argv[++arg] : DEFAULT_LOG_FILE;
513:
514: /* rule file */
515: } else if (!strcmp(argv[arg], "-r")) {
516: mr->rules = (arg+1 < argc && *argv[arg+1] != '-') ?
517: argv[++arg] : DEFAULT_RULE_FILE;
518:
519: /* output filename */
520: } else if (!strcmp(argv[arg], "-o")) {
521: mr->outputfile = (arg+1 < argc && *argv[arg+1] != '-') ?
522: argv[++arg] : DEFAULT_OUTPUT_FILE;
523:
524: /* timeout -- Change the default request timeout */
525: } else if (!strcmp(argv[arg], "-timeout")) {
526: int timeout = (arg+1 < argc && *argv[arg+1] != '-') ?
527: atoi(argv[++arg]) : DEFAULT_TIMEOUT;
528: if (timeout > 0) mr->tv->tv_sec = timeout;
529:
1.7 frystyk 530: /* preemptive or non-preemptive access */
1.1 frystyk 531: } else if (!strcmp(argv[arg], "-single")) {
1.7 frystyk 532: HTRequest_setPreemptive(mr->request, YES);
533: mr->flags |= MR_PREEMPTIVE;
1.2 frystyk 534:
535: /* test inlined images */
536: } else if (!strcmp(argv[arg], "-img")) {
537: mr->flags |= MR_IMG;
538:
539: /* load anchors */
540: } else if (!strcmp(argv[arg], "-link")) {
541: mr->flags |= MR_LINK;
1.7 frystyk 542: mr->depth = (arg+1 < argc && *argv[arg+1] != '-') ?
543: atoi(argv[++arg]) : DEFAULT_DEPTH;
1.2 frystyk 544:
1.7 frystyk 545: /* preemptive or non-preemptive access */
1.2 frystyk 546: } else if (!strcmp(argv[arg], "-single")) {
1.7 frystyk 547: HTRequest_setPreemptive(mr->request, YES);
548: mr->flags |= MR_PREEMPTIVE;
1.1 frystyk 549:
1.12 frystyk 550: /* Output start and end time */
551: } else if (!strcmp(argv[arg], "-ss")) {
552: time_t local = time(NULL);
1.13 eric 553: HTTrace("Robot started on %s\n",
1.12 frystyk 554: HTDateTimeStr(&local, YES));
555: mr->flags |= MR_TIME;
556:
1.1 frystyk 557: /* print version and exit */
558: } else if (!strcmp(argv[arg], "-version")) {
559: VersionInfo();
560: Cleanup(mr, 0);
561:
562: #ifdef WWWTRACE
563: /* trace flags */
564: } else if (!strncmp(argv[arg], "-v", 2)) {
565: char *p = argv[arg]+2;
566: WWWTRACE = 0;
567: for(; *p; p++) {
568: switch (*p) {
569: case 'a': WWWTRACE |= SHOW_ANCHOR_TRACE; break;
570: case 'b': WWWTRACE |= SHOW_BIND_TRACE; break;
571: case 'c': WWWTRACE |= SHOW_CACHE_TRACE; break;
572: case 'g': WWWTRACE |= SHOW_SGML_TRACE; break;
573: case 'p': WWWTRACE |= SHOW_PROTOCOL_TRACE; break;
574: case 's': WWWTRACE |= SHOW_STREAM_TRACE; break;
575: case 't': WWWTRACE |= SHOW_THREAD_TRACE; break;
576: case 'u': WWWTRACE |= SHOW_URI_TRACE; break;
577: default:
578: if (SHOW_MSG)
1.13 eric 579: HTTrace("Bad parameter (%s) in -v option\n",
1.1 frystyk 580: argv[arg]);
581: }
582: }
583: if (!WWWTRACE) WWWTRACE = SHOW_ALL_TRACE;
584: #endif
585:
586: } else {
1.13 eric 587: if (SHOW_MSG) HTTrace("Bad Argument (%s)\n", argv[arg]);
1.1 frystyk 588: }
1.17 frystyk 589: } else { /* If no leading `-' then check for URL or keywords */
1.1 frystyk 590: if (!keycnt) {
591: char * ref = HTParse(argv[arg], mr->cwd, PARSE_ALL);
592: mr->anchor = (HTParentAnchor *) HTAnchor_findAddress(ref);
1.7 frystyk 593: HyperDoc_new(mr, mr->anchor, 0);
1.1 frystyk 594: keycnt = 1;
1.11 frystyk 595: HT_FREE(ref);
1.1 frystyk 596: } else { /* Check for successive keyword arguments */
597: char *escaped = HTEscape(argv[arg], URL_XALPHAS);
598: if (keycnt++ <= 1)
1.5 frystyk 599: keywords = HTChunk_new(128);
1.1 frystyk 600: else
1.5 frystyk 601: HTChunk_putc(keywords, ' ');
602: HTChunk_puts(keywords, HTStrip(escaped));
1.11 frystyk 603: HT_FREE(escaped);
1.1 frystyk 604: }
605: }
606: }
607:
608: #ifdef CATCH_SIG
609: SetSignal();
610: #endif
611:
612: if (!keycnt) {
1.13 eric 613: if (SHOW_MSG) HTTrace("Please specify URL to check.\n");
1.1 frystyk 614: Cleanup(mr, -1);
615: }
616:
1.23 ! manoli 617: /* Testing that HTTrace is working */
! 618: HTTrace ("Welcome to the W3C mini Robot\n");
! 619:
1.1 frystyk 620: /* Rule file specified? */
621: if (mr->rules) {
622: HTList * list = HTList_new();
623: HTRequest * rr = HTRequest_new();
624: char * rules = HTParse(mr->rules, mr->cwd, PARSE_ALL);
625: HTParentAnchor * ra = (HTParentAnchor *) HTAnchor_findAddress(rules);
1.7 frystyk 626: HTRequest_setPreemptive(rr, YES);
1.1 frystyk 627: HTConversion_add(list, "application/x-www-rules", "*/*", HTRules,
628: 1.0, 0.0, 0.0);
629: HTRequest_setConversion(rr, list, YES);
1.8 frystyk 630: HTAlert_add(HTConfirm, HT_A_CONFIRM);
1.1 frystyk 631: if (HTLoadAnchor((HTAnchor *) ra, rr) != YES)
1.13 eric 632: if (SHOW_MSG) HTTrace("Can't access rules\n");
1.1 frystyk 633: HTConversion_deleteAll(list);
634: HTRequest_delete(rr);
1.8 frystyk 635: HTAlert_delete(HTConfirm);
1.11 frystyk 636: HT_FREE(rules);
1.1 frystyk 637: }
638:
639: /* Output file specified? */
640: if (mr->outputfile) {
641: if ((mr->output = fopen(mr->outputfile, "wb")) == NULL) {
1.13 eric 642: if (SHOW_MSG) HTTrace("Can't open `%s'\n", mr->outputfile);
1.1 frystyk 643: mr->output = OUTPUT;
644: }
645: }
646:
647: /* Log file specifed? */
648: if (mr->logfile) HTLog_open(mr->logfile, YES, YES);
649:
650: /* Register our User Prompts etc in the Alert Manager */
651: if (HTAlert_interactive()) {
652: HTAlert_add(HTError_print, HT_A_MESSAGE);
653: HTAlert_add(HTConfirm, HT_A_CONFIRM);
654: HTAlert_add(HTPrompt, HT_A_PROMPT);
655: HTAlert_add(HTPromptPassword, HT_A_SECRET);
656: HTAlert_add(HTPromptUsernameAndPassword, HT_A_USER_PW);
657: }
658:
659: /* Register a call back function for the Net Manager */
1.15 frystyk 660: HTNetCall_addBefore(proxy_handler, NULL, 0);
661: HTNetCall_addAfter(terminate_handler, NULL, HT_ALL);
1.1 frystyk 662:
663: /* Set timeout on sockets */
1.16 frystyk 664: HTEventrg_registerTimeout(mr->tv, mr->timeout, timeout_handler, NO);
1.1 frystyk 665:
666: /* Start the request */
667: if (keywords) /* Search */
1.5 frystyk 668: status = HTSearch(HTChunk_data(keywords), mr->anchor, mr->request);
1.1 frystyk 669: else
670: status = HTLoadAnchor((HTAnchor *) mr->anchor, mr->request);
671:
1.5 frystyk 672: if (keywords) HTChunk_delete(keywords);
1.1 frystyk 673: if (status != YES) {
1.13 eric 674: if (SHOW_MSG) HTTrace("Can't access resource\n");
1.1 frystyk 675: Cleanup(mr, -1);
676: }
677:
678: /* Go into the event loop... */
1.16 frystyk 679: HTEventrg_loop(mr->request);
1.1 frystyk 680:
681: /* Only gets here if event loop fails */
682: Cleanup(mr, 0);
683: return 0;
684: }
Webmaster