Annotation of libwww/Robot/src/HTRobot.c, revision 1.25
1.1 frystyk 1: /* HTRobot.c
2: ** W3C MINI ROBOT
3: **
4: ** (c) COPRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
6: **
7: ** This program illustrates how to travers links using the Anchor object
8: **
9: ** Authors:
10: ** HFN Henrik Frystyk Nielsen, (frystyk@w3.org)
11: **
12: ** History:
13: ** Dec 04 95 First version
14: */
15:
16: #include "WWWLib.h" /* Global Library Include file */
17: #include "WWWApp.h" /* Application stuff */
1.9 frystyk 18: #include "WWWRules.h"
19: #include "WWWApp.h"
1.17 frystyk 20: #include "WWWTrans.h"
1.10 frystyk 21: #include "WWWInit.h"
1.9 frystyk 22:
1.4 frystyk 23: #include "HText.h"
1.1 frystyk 24:
25: #include "HTRobot.h" /* Implemented here */
26:
1.14 frystyk 27: #ifndef W3C_VERSION
28: #define W3C_VERSION "unspecified"
1.1 frystyk 29: #endif
30:
31: #define APP_NAME "W3CRobot"
1.14 frystyk 32: #define APP_VERSION W3C_VERSION
1.1 frystyk 33:
34: #define DEFAULT_OUTPUT_FILE "robot.out"
35: #define DEFAULT_RULE_FILE "robot.conf"
36: #define DEFAULT_LOG_FILE "robot.log"
1.7 frystyk 37: #define DEFAULT_DEPTH 0
1.1 frystyk 38:
39: #define SHOW_MSG (WWWTRACE || HTAlert_interactive())
40:
1.7 frystyk 41: #define DEFAULT_TIMEOUT 10 /* timeout in seconds */
1.1 frystyk 42:
43: #if defined(__svr4__)
44: #define CATCH_SIG
45: #endif
46:
47: typedef enum _MRFlags {
1.2 frystyk 48: MR_IMG = 0x1,
49: MR_LINK = 0x2,
1.12 frystyk 50: MR_PREEMPTIVE= 0x4,
51: MR_TIME = 0x8
1.1 frystyk 52: } MRFlags;
53:
54: typedef struct _Robot {
55: HTRequest * request;
1.7 frystyk 56: HTRequest * timeout; /* Until we get a server eventloop */
1.1 frystyk 57: HTParentAnchor * anchor;
1.2 frystyk 58: int depth; /* How deep is our tree */
59: HTList * hyperdoc; /* List of our HyperDoc Objects */
1.4 frystyk 60: HTList * htext; /* List of our HText Objects */
1.1 frystyk 61: struct timeval * tv; /* Timeout on socket */
62: char * cwd; /* Current dir URL */
63: HTList * converters;
1.25 ! frystyk 64: HTList * encoders;
1.1 frystyk 65: char * rules;
66: char * logfile;
67: char * outputfile;
68: FILE * output;
69: MRFlags flags;
70: } Robot;
71:
72: typedef enum _LoadState {
73: L_INVALID = -2,
74: L_LOADING = -1,
75: L_SUCCESS = 0,
76: L_ERROR
77: } LoadState;
78:
79: /*
80: ** The HyperDoc object is bound to the anchor and contains information about
81: ** where we are in the search for recursive searches
82: */
83: typedef struct _HyperDoc {
84: HTParentAnchor * anchor;
85: LoadState state;
86: int depth;
87: } HyperDoc;
88:
89: /*
90: ** This is the HText object that is created every time we start parsing a
91: ** HTML object
92: */
1.4 frystyk 93: struct _HText {
1.1 frystyk 94: HTRequest * request;
1.4 frystyk 95: };
1.1 frystyk 96:
97: PUBLIC HText * HTMainText = NULL;
98: PUBLIC HTParentAnchor * HTMainAnchor = NULL;
99: PUBLIC HTStyleSheet * styleSheet = NULL;
100:
101: /* ------------------------------------------------------------------------- */
102:
1.13 eric 103: /* Standard (non-error) Output
104: ** ---------------------------
105: */
106: PUBLIC int OutputData(const char * fmt, ...)
107: {
108: int ret;
109: va_list pArgs;
110: va_start(pArgs, fmt);
111: ret = vfprintf(stdout, fmt, pArgs);
112: va_end(pArgs);
113: return ret;
114: }
115:
116: /* ------------------------------------------------------------------------- */
117:
1.2 frystyk 118: /* Create a "HyperDoc" object
119: ** --------------------------
120: ** A HyperDoc object contains information about whether we have already
121: ** started checking the anchor and the depth in our search
122: */
123: PRIVATE HyperDoc * HyperDoc_new (Robot * mr,HTParentAnchor * anchor, int depth)
124: {
125: HyperDoc * hd;
1.14 frystyk 126: if ((hd = (HyperDoc *) HT_CALLOC(1, sizeof(HyperDoc))) == NULL)
127: HT_OUTOFMEM("HyperDoc_new");
1.2 frystyk 128: hd->state = L_INVALID;
129: hd->depth = depth;
130:
131: /* Bind the HyperDoc object together with the Anchor Object */
132: hd->anchor = anchor;
133: HTAnchor_setDocument(anchor, (void *) hd);
134:
135: /* Add this HyperDoc object to our list */
136: if (!mr->hyperdoc) mr->hyperdoc = HTList_new();
137: HTList_addObject(mr->hyperdoc, (void *) hd);
138: return hd;
139: }
140:
141: /* Delete a "HyperDoc" object
142: ** --------------------------
143: */
144: PRIVATE BOOL HyperDoc_delete (HyperDoc * hd)
145: {
146: if (hd) {
1.11 frystyk 147: HT_FREE (hd);
1.2 frystyk 148: return YES;
149: }
150: return NO;
151: }
152:
1.1 frystyk 153: /* Create a Command Line Object
154: ** ----------------------------
155: */
156: PRIVATE Robot * Robot_new (void)
157: {
158: Robot * me;
1.14 frystyk 159: if ((me = (Robot *) HT_CALLOC(1, sizeof(Robot))) == NULL ||
160: (me->tv = (struct timeval*) HT_CALLOC(1, sizeof(struct timeval))) == NULL)
161: HT_OUTOFMEM("Robot_new");
1.2 frystyk 162: me->hyperdoc = HTList_new();
1.4 frystyk 163: me->htext = HTList_new();
1.1 frystyk 164: me->tv->tv_sec = DEFAULT_TIMEOUT;
1.25 ! frystyk 165: me->cwd = HTGetCurrentDirectoryURL();
1.1 frystyk 166: me->output = OUTPUT;
167:
1.7 frystyk 168: /* We keep an extra timeout request object for the timeout_handler */
169: me->timeout = HTRequest_new();
170: HTRequest_setContext (me->timeout, me);
171:
1.1 frystyk 172: /* Bind the Robot object together with the Request Object */
173: me->request = HTRequest_new();
174: HTRequest_setContext (me->request, me);
175: return me;
176: }
177:
178: /* Delete a Command Line Object
179: ** ----------------------------
180: */
181: PRIVATE BOOL Robot_delete (Robot * me)
182: {
183: if (me) {
1.2 frystyk 184: if (me->hyperdoc) {
185: HTList * cur = me->hyperdoc;
186: HyperDoc * pres;
187: while ((pres = (HyperDoc *) HTList_nextObject(cur)))
188: HyperDoc_delete(pres);
189: HTList_delete(me->hyperdoc);
190: }
1.4 frystyk 191: if (me->htext) {
192: HTList * cur = me->htext;
193: HText * pres;
194: while ((pres = (HText *) HTList_nextObject(cur)))
195: HText_free(pres);
196: HTList_delete(me->htext);
197: }
1.25 ! frystyk 198: HTConversion_deleteAll(me->converters);
! 199: HTCoding_deleteAll(me->encoders);
1.1 frystyk 200: if (me->logfile) HTLog_close();
201: if (me->output && me->output != STDOUT) fclose(me->output);
1.12 frystyk 202: if (me->flags & MR_TIME) {
203: time_t local = time(NULL);
1.13 eric 204: HTTrace("Robot terminated %s\n",HTDateTimeStr(&local,YES));
1.12 frystyk 205: }
1.11 frystyk 206: HT_FREE(me->cwd);
207: HT_FREE(me->tv);
208: HT_FREE(me);
1.1 frystyk 209: return YES;
210: }
211: return NO;
212: }
213:
1.2 frystyk 214: /*
215: ** This function creates a new request object and initializes it
216: */
217: PRIVATE HTRequest * Thread_new (Robot * mr, HTMethod method)
218: {
219: HTRequest * newreq = HTRequest_new();
220: HTRequest_setContext (newreq, mr);
1.7 frystyk 221: if (mr->flags & MR_PREEMPTIVE) HTRequest_setPreemptive(newreq, YES);
1.5 frystyk 222: HTRequest_addRqHd(newreq, HT_C_HOST);
1.2 frystyk 223: HTRequest_setMethod(newreq, method);
224: return newreq;
225: }
226:
227: PRIVATE BOOL Thread_delete (Robot * mr, HTRequest * request)
228: {
229: if (mr && request) {
230: HTRequest_delete(request);
231: return YES;
232: }
233: return NO;
234: }
235:
236: /*
237: ** Cleanup and make sure we close all connections including the persistent
238: ** ones
239: */
1.1 frystyk 240: PRIVATE void Cleanup (Robot * me, int status)
241: {
1.2 frystyk 242: HTNet_killAll();
1.1 frystyk 243: Robot_delete(me);
244: HTLibTerminate();
245: #ifdef VMS
246: exit(status ? status : 1);
247: #else
248: exit(status ? status : 0);
249: #endif
250: }
251:
252: #ifdef CATCH_SIG
253: #include <signal.h>
254: /* SetSignal
255: ** This function sets up signal handlers. This might not be necessary to
256: ** call if the application has its own handlers (lossage on SVR4)
257: */
258: PRIVATE void SetSignal (void)
259: {
260: /* On some systems (SYSV) it is necessary to catch the SIGPIPE signal
261: ** when attemting to connect to a remote host where you normally should
262: ** get `connection refused' back
263: */
264: if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
1.13 eric 265: if (PROT_TRACE) HTTrace("HTSignal.... Can't catch SIGPIPE\n");
1.1 frystyk 266: } else {
1.13 eric 267: if (PROT_TRACE) HTTrace("HTSignal.... Ignoring SIGPIPE\n");
1.1 frystyk 268: }
269: }
270: #endif /* CATCH_SIG */
271:
272: PRIVATE void VersionInfo (void)
273: {
1.13 eric 274: OutputData("\n\nW3C Reference Software\n\n");
275: OutputData("\tW3C Mini Robot (%s) version %s.\n",
1.1 frystyk 276: APP_NAME, APP_VERSION);
1.13 eric 277: OutputData("\tW3C Reference Library version %s.\n\n",HTLib_version());
278: OutputData("Please send feedback to <libwww@w3.org>\n");
1.1 frystyk 279: }
280:
281: /* terminate_handler
282: ** -----------------
1.2 frystyk 283: ** This function is registered to handle the result of the request.
284: ** If no more requests are pending then terminate program
1.1 frystyk 285: */
1.15 frystyk 286: PRIVATE int terminate_handler (HTRequest * request, void * param, int status)
1.1 frystyk 287: {
288: Robot * mr = (Robot *) HTRequest_context(request);
289: if (mr->logfile) HTLog_add(request, status);
1.2 frystyk 290: Thread_delete(mr, request);
1.3 frystyk 291: if (HTNet_isEmpty()) Cleanup(mr, 0);
1.1 frystyk 292: return HT_OK;
293: }
294:
295: /* timeout_handler
296: ** ---------------
297: ** This function is registered to handle timeout in select eventloop
1.7 frystyk 298: **
299: ** BUG: This doesn't work as we don't get the right request object
300: ** back from the event loop
1.1 frystyk 301: */
302: PRIVATE int timeout_handler (HTRequest * request)
303: {
1.2 frystyk 304: Robot * mr = (Robot *) HTRequest_context(request);
1.25 ! frystyk 305: if (SHOW_MSG) HTTrace("Robot....... We don't know how to handle timeout...\n");
1.7 frystyk 306: #if 0
1.1 frystyk 307: HTRequest_kill(request);
1.2 frystyk 308: Thread_delete(mr, request);
1.7 frystyk 309: #endif
1.4 frystyk 310: return HT_OK;
1.1 frystyk 311: }
312:
1.8 frystyk 313: /* proxy_handler
314: ** ---------------
315: ** This function is registered to be called before a request is issued
316: ** We look for redirection for proxies and gateways
317: ** returns HT_LOADED We already have this
318: ** HT_ERROR We can't load this
319: ** HT_OK Success
320: */
1.15 frystyk 321: PRIVATE int proxy_handler (HTRequest * request, void * param, int status)
1.8 frystyk 322: {
323: HTParentAnchor *anchor = HTRequest_anchor(request);
324: char * addr = HTAnchor_address((HTAnchor *) anchor);
325: char * newaddr = NULL;
326: if ((newaddr = HTProxy_find(addr))) {
327: StrAllocCat(newaddr, addr);
1.19 frystyk 328: HTRequest_setFullURI(request, YES);
1.8 frystyk 329: HTAnchor_setPhysical(anchor, newaddr);
330: } else if ((newaddr = HTGateway_find(addr))) {
331: char * path = HTParse(addr,"",PARSE_HOST+PARSE_PATH+PARSE_PUNCTUATION);
332: /* Chop leading / off to make host into part of path */
333: char * gatewayed = HTParse(path+1, newaddr, PARSE_ALL);
1.19 frystyk 334: HTRequest_setFullURI(request, NO);
1.8 frystyk 335: HTAnchor_setPhysical(anchor, gatewayed);
1.11 frystyk 336: HT_FREE(path);
337: HT_FREE(gatewayed);
1.8 frystyk 338: } else
1.19 frystyk 339: HTRequest_setFullURI(request, NO);
1.11 frystyk 340: HT_FREE(newaddr);
341: HT_FREE(addr);
1.8 frystyk 342: return HT_OK;
343: }
344:
1.1 frystyk 345: /* ------------------------------------------------------------------------- */
346: /* HTEXT INTERFACE */
347: /* ------------------------------------------------------------------------- */
348:
349: PUBLIC HText * HText_new2 (HTRequest * request, HTParentAnchor * anchor,
350: HTStream * stream)
351: {
352: HText * me;
1.4 frystyk 353: Robot * mr = (Robot *) HTRequest_context(request);
1.14 frystyk 354: if ((me = (HText *) HT_CALLOC(1, sizeof(HText))) == NULL)
355: HT_OUTOFMEM("HText_new2");
1.4 frystyk 356:
357: /* Bind the HText object together with the Request Object */
1.1 frystyk 358: me->request = request;
1.4 frystyk 359:
360: /* Add this HyperDoc object to our list */
361: if (!mr->htext) mr->htext = HTList_new();
362: HTList_addObject(mr->htext, (void *) me);
1.1 frystyk 363: return me;
364: }
365:
1.4 frystyk 366: PUBLIC void HText_free (HText * me) {
1.11 frystyk 367: if (me) HT_FREE (me);
1.4 frystyk 368: }
369:
1.1 frystyk 370: PUBLIC void HText_beginAnchor (HText * text, HTChildAnchor * anchor)
371: {
372: if (text && anchor) {
1.2 frystyk 373: Robot * mr = (Robot *) HTRequest_context(text->request);
1.1 frystyk 374: HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor);
375: HTParentAnchor * dest_parent = HTAnchor_parent(dest);
1.7 frystyk 376: char * uri = HTAnchor_address((HTAnchor *) dest_parent);
1.1 frystyk 377: HyperDoc * hd = HTAnchor_document(dest_parent);
378:
1.13 eric 379: if (SHOW_MSG) HTTrace("Robot....... Found `%s\' - ", uri ? uri : "NULL");
1.7 frystyk 380:
1.2 frystyk 381: /* Test whether we already have a hyperdoc for this document */
382: if (mr->flags & MR_LINK && dest_parent && !hd) {
1.1 frystyk 383: HTParentAnchor * parent = HTRequest_parent(text->request);
384: HyperDoc * last = HTAnchor_document(parent);
385: int depth = last ? last->depth+1 : 0;
1.2 frystyk 386: HTRequest * newreq = Thread_new(mr, METHOD_GET);
387: HyperDoc_new(mr, dest_parent, depth);
1.7 frystyk 388: HTRequest_setParent(newreq, HTRequest_anchor(text->request));
389: if (depth >= mr->depth) {
390: if (SHOW_MSG)
1.13 eric 391: HTTrace("loading at depth %d using HEAD\n", depth);
1.7 frystyk 392: HTRequest_setMethod(newreq, METHOD_HEAD);
393: HTRequest_setOutputFormat(newreq, WWW_MIME);
394: } else {
1.13 eric 395: if (SHOW_MSG) HTTrace("loading at depth %d\n", depth);
1.2 frystyk 396: }
397: if (HTLoadAnchor((HTAnchor *) dest_parent, newreq) != YES) {
1.13 eric 398: if (SHOW_MSG) HTTrace("not tested!\n");
1.2 frystyk 399: Thread_delete(mr, newreq);
400: }
1.7 frystyk 401: } else {
1.18 frystyk 402: if (SHOW_MSG) HTTrace("duplicate or max depth reached\n");
1.2 frystyk 403: }
1.11 frystyk 404: HT_FREE(uri);
1.2 frystyk 405: }
406: }
407:
408: PUBLIC void HText_appendImage (HText * text, HTChildAnchor * anchor,
1.14 frystyk 409: const char *alt, const char * align, BOOL isMap)
1.2 frystyk 410: {
411: if (text && anchor) {
412: Robot * mr = (Robot *) HTRequest_context(text->request);
413: HTParentAnchor * dest = (HTParentAnchor *)
414: HTAnchor_followMainLink((HTAnchor *) anchor);
415: HyperDoc * hd = HTAnchor_document(dest);
1.1 frystyk 416:
1.2 frystyk 417: /* Test whether we already have a hyperdoc for this document */
418: if (mr->flags & MR_IMG && dest && !hd) {
419: HTParentAnchor * parent = HTRequest_parent(text->request);
420: HyperDoc * last = HTAnchor_document(parent);
421: int depth = last ? last->depth+1 : 0;
422: HTRequest * newreq = Thread_new(mr, METHOD_HEAD);
423: HyperDoc_new(mr, dest, depth);
424: if (SHOW_MSG) {
425: char * uri = HTAnchor_address((HTAnchor *) dest);
1.13 eric 426: HTTrace("Robot....... Checking Image `%s\'\n", uri);
1.11 frystyk 427: HT_FREE(uri);
1.2 frystyk 428: }
429: if (HTLoadAnchor((HTAnchor *) dest, newreq) != YES) {
430: if (SHOW_MSG)
1.13 eric 431: HTTrace("Robot....... Image not tested!\n");
1.2 frystyk 432: Thread_delete(mr, newreq);
1.1 frystyk 433: }
434: }
435: }
436: }
437:
438: PUBLIC void HText_endAnchor (HText * text) {}
1.14 frystyk 439: PUBLIC void HText_appendText (HText * text, const char * str) {}
1.1 frystyk 440: PUBLIC void HText_appendCharacter (HText * text, char ch) {}
441: PUBLIC void HText_endAppend (HText * text) {}
442: PUBLIC void HText_setStyle (HText * text, HTStyle * style) {}
443: PUBLIC void HText_beginAppend (HText * text) {}
444: PUBLIC void HText_appendParagraph (HText * text) {}
445:
446: /* ------------------------------------------------------------------------- */
447: /* MAIN PROGRAM */
448: /* ------------------------------------------------------------------------- */
449:
450: int main (int argc, char ** argv)
451: {
452: int status = 0;
453: int arg;
454: HTChunk * keywords = NULL; /* From command line */
455: int keycnt = 0;
1.12 frystyk 456: Robot * mr = NULL;
1.1 frystyk 457:
458: /* Starts Mac GUSI socket library */
459: #ifdef GUSI
460: GUSISetup(GUSIwithSIOUXSockets);
461: GUSISetup(GUSIwithInternetSockets);
462: #endif
463:
464: #ifdef __MWERKS__ /* STR */
465: InitGraf((Ptr) &qd.thePort);
466: InitFonts();
467: InitWindows();
468: InitMenus(); TEInit();
469: InitDialogs(nil);
470: InitCursor();
471: SIOUXSettings.asktosaveonclose = false;
472: argc=ccommand(&argv);
473: #endif
474:
475: /* Initiate W3C Reference Library */
476: HTLibInit(APP_NAME, APP_VERSION);
1.20 eric 477: HTMIMEInit();
1.1 frystyk 478:
1.12 frystyk 479: /* Build a new robot object */
480: mr = Robot_new();
481:
1.16 frystyk 482: /* Set up our event manager */
1.22 eric 483: HTEventrgInit();
1.16 frystyk 484:
1.17 frystyk 485: /* Register a transport */
486: HTTransportInit();
487:
1.1 frystyk 488: /* Initialize the protocol modules */
1.25 ! frystyk 489: HTProtocolInit();
1.1 frystyk 490:
491: /* Initialize set of converters */
492: mr->converters = HTList_new();
493: HTConverterInit(mr->converters);
1.21 frystyk 494: HTMLInit(mr->converters);
1.1 frystyk 495: HTFormat_setConversion(mr->converters);
1.25 ! frystyk 496:
! 497: /* Set up encoders and decoders */
! 498: mr->encoders = HTList_new();
! 499: HTEncoderInit(mr->encoders);
! 500: HTFormat_setTransferCoding(mr->encoders);
1.1 frystyk 501:
502: /* Initialize bindings between file suffixes and media types */
503: HTFileInit();
504:
505: /* Get any proxy or gateway environment variables */
506: HTProxy_getEnvVar();
507:
508: /* Scan command Line for parameters */
509: for (arg=1; arg<argc; arg++) {
510: if (*argv[arg] == '-') {
511:
512: /* non-interactive */
1.17 frystyk 513: if (!strcmp(argv[arg], "-n")) {
1.1 frystyk 514: HTAlert_setInteractive(NO);
515:
516: /* log file */
517: } else if (!strcmp(argv[arg], "-l")) {
518: mr->logfile = (arg+1 < argc && *argv[arg+1] != '-') ?
519: argv[++arg] : DEFAULT_LOG_FILE;
520:
521: /* rule file */
522: } else if (!strcmp(argv[arg], "-r")) {
523: mr->rules = (arg+1 < argc && *argv[arg+1] != '-') ?
524: argv[++arg] : DEFAULT_RULE_FILE;
525:
526: /* output filename */
527: } else if (!strcmp(argv[arg], "-o")) {
528: mr->outputfile = (arg+1 < argc && *argv[arg+1] != '-') ?
529: argv[++arg] : DEFAULT_OUTPUT_FILE;
530:
531: /* timeout -- Change the default request timeout */
532: } else if (!strcmp(argv[arg], "-timeout")) {
533: int timeout = (arg+1 < argc && *argv[arg+1] != '-') ?
534: atoi(argv[++arg]) : DEFAULT_TIMEOUT;
535: if (timeout > 0) mr->tv->tv_sec = timeout;
536:
1.7 frystyk 537: /* preemptive or non-preemptive access */
1.1 frystyk 538: } else if (!strcmp(argv[arg], "-single")) {
1.7 frystyk 539: HTRequest_setPreemptive(mr->request, YES);
540: mr->flags |= MR_PREEMPTIVE;
1.2 frystyk 541:
542: /* test inlined images */
543: } else if (!strcmp(argv[arg], "-img")) {
544: mr->flags |= MR_IMG;
545:
546: /* load anchors */
547: } else if (!strcmp(argv[arg], "-link")) {
548: mr->flags |= MR_LINK;
1.7 frystyk 549: mr->depth = (arg+1 < argc && *argv[arg+1] != '-') ?
550: atoi(argv[++arg]) : DEFAULT_DEPTH;
1.2 frystyk 551:
1.7 frystyk 552: /* preemptive or non-preemptive access */
1.2 frystyk 553: } else if (!strcmp(argv[arg], "-single")) {
1.7 frystyk 554: HTRequest_setPreemptive(mr->request, YES);
555: mr->flags |= MR_PREEMPTIVE;
1.1 frystyk 556:
1.12 frystyk 557: /* Output start and end time */
558: } else if (!strcmp(argv[arg], "-ss")) {
559: time_t local = time(NULL);
1.13 eric 560: HTTrace("Robot started on %s\n",
1.12 frystyk 561: HTDateTimeStr(&local, YES));
562: mr->flags |= MR_TIME;
563:
1.1 frystyk 564: /* print version and exit */
565: } else if (!strcmp(argv[arg], "-version")) {
566: VersionInfo();
567: Cleanup(mr, 0);
568:
569: #ifdef WWWTRACE
570: /* trace flags */
571: } else if (!strncmp(argv[arg], "-v", 2)) {
1.24 frystyk 572: HTSetTraceMessageMask(argv[arg]+2);
1.1 frystyk 573: #endif
574:
575: } else {
1.13 eric 576: if (SHOW_MSG) HTTrace("Bad Argument (%s)\n", argv[arg]);
1.1 frystyk 577: }
1.17 frystyk 578: } else { /* If no leading `-' then check for URL or keywords */
1.1 frystyk 579: if (!keycnt) {
580: char * ref = HTParse(argv[arg], mr->cwd, PARSE_ALL);
581: mr->anchor = (HTParentAnchor *) HTAnchor_findAddress(ref);
1.7 frystyk 582: HyperDoc_new(mr, mr->anchor, 0);
1.1 frystyk 583: keycnt = 1;
1.11 frystyk 584: HT_FREE(ref);
1.1 frystyk 585: } else { /* Check for successive keyword arguments */
586: char *escaped = HTEscape(argv[arg], URL_XALPHAS);
587: if (keycnt++ <= 1)
1.5 frystyk 588: keywords = HTChunk_new(128);
1.1 frystyk 589: else
1.5 frystyk 590: HTChunk_putc(keywords, ' ');
591: HTChunk_puts(keywords, HTStrip(escaped));
1.11 frystyk 592: HT_FREE(escaped);
1.1 frystyk 593: }
594: }
595: }
596:
597: #ifdef CATCH_SIG
598: SetSignal();
599: #endif
600:
601: if (!keycnt) {
1.13 eric 602: if (SHOW_MSG) HTTrace("Please specify URL to check.\n");
1.1 frystyk 603: Cleanup(mr, -1);
604: }
605:
1.23 manoli 606: /* Testing that HTTrace is working */
607: HTTrace ("Welcome to the W3C mini Robot\n");
608:
1.1 frystyk 609: /* Rule file specified? */
610: if (mr->rules) {
611: HTList * list = HTList_new();
612: HTRequest * rr = HTRequest_new();
613: char * rules = HTParse(mr->rules, mr->cwd, PARSE_ALL);
614: HTParentAnchor * ra = (HTParentAnchor *) HTAnchor_findAddress(rules);
1.7 frystyk 615: HTRequest_setPreemptive(rr, YES);
1.24 frystyk 616: HTAlert_setInteractive(NO);
1.1 frystyk 617: HTConversion_add(list, "application/x-www-rules", "*/*", HTRules,
618: 1.0, 0.0, 0.0);
619: HTRequest_setConversion(rr, list, YES);
1.8 frystyk 620: HTAlert_add(HTConfirm, HT_A_CONFIRM);
1.1 frystyk 621: if (HTLoadAnchor((HTAnchor *) ra, rr) != YES)
1.13 eric 622: if (SHOW_MSG) HTTrace("Can't access rules\n");
1.1 frystyk 623: HTConversion_deleteAll(list);
624: HTRequest_delete(rr);
1.8 frystyk 625: HTAlert_delete(HTConfirm);
1.11 frystyk 626: HT_FREE(rules);
1.1 frystyk 627: }
628:
629: /* Output file specified? */
630: if (mr->outputfile) {
631: if ((mr->output = fopen(mr->outputfile, "wb")) == NULL) {
1.13 eric 632: if (SHOW_MSG) HTTrace("Can't open `%s'\n", mr->outputfile);
1.1 frystyk 633: mr->output = OUTPUT;
634: }
635: }
636:
637: /* Log file specifed? */
638: if (mr->logfile) HTLog_open(mr->logfile, YES, YES);
639:
640: /* Register our User Prompts etc in the Alert Manager */
641: if (HTAlert_interactive()) {
642: HTAlert_add(HTError_print, HT_A_MESSAGE);
643: HTAlert_add(HTConfirm, HT_A_CONFIRM);
644: HTAlert_add(HTPrompt, HT_A_PROMPT);
645: HTAlert_add(HTPromptPassword, HT_A_SECRET);
646: HTAlert_add(HTPromptUsernameAndPassword, HT_A_USER_PW);
647: }
648:
649: /* Register a call back function for the Net Manager */
1.15 frystyk 650: HTNetCall_addBefore(proxy_handler, NULL, 0);
651: HTNetCall_addAfter(terminate_handler, NULL, HT_ALL);
1.1 frystyk 652:
653: /* Set timeout on sockets */
1.16 frystyk 654: HTEventrg_registerTimeout(mr->tv, mr->timeout, timeout_handler, NO);
1.1 frystyk 655:
656: /* Start the request */
657: if (keywords) /* Search */
1.5 frystyk 658: status = HTSearch(HTChunk_data(keywords), mr->anchor, mr->request);
1.1 frystyk 659: else
660: status = HTLoadAnchor((HTAnchor *) mr->anchor, mr->request);
661:
1.5 frystyk 662: if (keywords) HTChunk_delete(keywords);
1.1 frystyk 663: if (status != YES) {
1.13 eric 664: if (SHOW_MSG) HTTrace("Can't access resource\n");
1.1 frystyk 665: Cleanup(mr, -1);
666: }
667:
668: /* Go into the event loop... */
1.16 frystyk 669: HTEventrg_loop(mr->request);
1.1 frystyk 670:
671: /* Only gets here if event loop fails */
672: Cleanup(mr, 0);
673: return 0;
674: }
Webmaster