Annotation of libwww/Robot/src/HTRobot.c, revision 1.58
1.1 frystyk 1: /* HTRobot.c
2: ** W3C MINI ROBOT
3: **
4: ** (c) COPRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
6: **
7: ** This program illustrates how to travers links using the Anchor object
8: **
9: ** Authors:
10: ** HFN Henrik Frystyk Nielsen, (frystyk@w3.org)
11: **
12: ** History:
13: ** Dec 04 95 First version
14: */
15:
16: #include "WWWLib.h" /* Global Library Include file */
17: #include "WWWApp.h" /* Application stuff */
1.17 frystyk 18: #include "WWWTrans.h"
1.10 frystyk 19: #include "WWWInit.h"
1.9 frystyk 20:
1.4 frystyk 21: #include "HText.h"
1.1 frystyk 22:
23: #include "HTRobot.h" /* Implemented here */
24:
1.58 ! frystyk 25: #ifdef HT_POSIX_REGEX
! 26: #include "rxposix.h"
! 27: #endif
! 28:
1.14 frystyk 29: #ifndef W3C_VERSION
1.33 eric 30: #define W3C_VERSION "Unspecified"
1.1 frystyk 31: #endif
32:
33: #define APP_NAME "W3CRobot"
1.14 frystyk 34: #define APP_VERSION W3C_VERSION
1.1 frystyk 35:
36: #define DEFAULT_OUTPUT_FILE "robot.out"
37: #define DEFAULT_RULE_FILE "robot.conf"
1.58 ! frystyk 38: #define DEFAULT_LOG_FILE "log-clf.txt"
! 39: #define DEFAULT_HIT_FILE "log-hit.txt"
! 40: #define DEFAULT_REFERER_FILE "log-referer.txt"
! 41: #define DEFAULT_REJECT_FILE "log-reject.txt"
! 42: #define DEFAULT_NOTFOUND_FILE "log-notfound.txt"
! 43: #define DEFAULT_CONNEG_FILE "log-conneg.txt"
! 44: #define DEFAULT_FORMAT_FILE "log-format.txt"
1.51 frystyk 45: #define DEFAULT_MEMLOG "robot.mem"
1.55 frystyk 46: #define DEFAULT_PREFIX ""
1.7 frystyk 47: #define DEFAULT_DEPTH 0
1.53 frystyk 48: #define DEFAULT_DELAY 50 /* Write delay in ms */
1.1 frystyk 49:
1.51 frystyk 50: #if 0
1.53 frystyk 51: #define HT_MEMLOG /* May be expensive in performance! */
1.51 frystyk 52: #endif
53:
1.46 eric 54: /* #define SHOW_MSG (WWWTRACE || HTAlert_interactive()) */
55: #define SHOW_MSG (!(mr->flags & MR_QUIET))
1.1 frystyk 56:
1.40 frystyk 57: #define DEFAULT_TIMEOUT 10000 /* timeout in millis */
1.1 frystyk 58:
59: #if defined(__svr4__)
60: #define CATCH_SIG
61: #endif
62:
63: typedef enum _MRFlags {
1.45 frystyk 64: MR_IMG = 0x1,
65: MR_LINK = 0x2,
66: MR_PREEMPTIVE = 0x4,
67: MR_TIME = 0x8,
1.46 eric 68: MR_SAVE = 0x10,
1.48 frystyk 69: MR_QUIET = 0x20,
70: MR_VALIDATE = 0x40,
1.58 ! frystyk 71: MR_END_VALIDATE = 0x80,
! 72: MR_KEEP_META = 0x100
1.1 frystyk 73: } MRFlags;
74:
75: typedef struct _Robot {
1.2 frystyk 76: int depth; /* How deep is our tree */
1.30 frystyk 77: int cnt; /* Count of requests */
1.2 frystyk 78: HTList * hyperdoc; /* List of our HyperDoc Objects */
1.4 frystyk 79: HTList * htext; /* List of our HText Objects */
1.34 eric 80: HTList * fingers;
1.40 frystyk 81: int timer;
1.1 frystyk 82: char * cwd; /* Current dir URL */
83: char * rules;
1.55 frystyk 84: char * prefix;
1.1 frystyk 85: char * logfile;
1.55 frystyk 86: HTLog * log;
1.57 frystyk 87: char * reffile;
88: HTLog * ref;
1.58 ! frystyk 89: char * rejectfile;
! 90: HTLog * reject;
! 91: char * notfoundfile;
! 92: HTLog * notfound;
! 93: char * connegfile;
! 94: HTLog * conneg;
1.1 frystyk 95: char * outputfile;
96: FILE * output;
1.55 frystyk 97: char * hitfile;
1.58 ! frystyk 98: char * mtfile;
1.1 frystyk 99: MRFlags flags;
1.55 frystyk 100:
101: long total_bytes; /* Total number of bytes processed */
102: long total_docs; /* Total number of documents processed */
1.56 frystyk 103: ms_t time; /* Time of run */
1.58 ! frystyk 104:
! 105: #ifdef HT_POSIX_REGEX
! 106: regex_t * include;
! 107: regex_t * exclude;
! 108: regex_t * check;
! 109: #endif
! 110:
1.1 frystyk 111: } Robot;
1.34 eric 112:
113: typedef struct _Finger {
114: Robot * robot;
115: HTRequest * request;
116: HTParentAnchor * dest;
117: } Finger;
118:
1.1 frystyk 119: typedef enum _LoadState {
120: L_INVALID = -2,
121: L_LOADING = -1,
122: L_SUCCESS = 0,
123: L_ERROR
124: } LoadState;
125:
126: /*
127: ** The HyperDoc object is bound to the anchor and contains information about
128: ** where we are in the search for recursive searches
129: */
130: typedef struct _HyperDoc {
131: HTParentAnchor * anchor;
132: LoadState state;
133: int depth;
1.55 frystyk 134: int hits;
1.1 frystyk 135: } HyperDoc;
136:
137: /*
138: ** This is the HText object that is created every time we start parsing a
139: ** HTML object
140: */
1.4 frystyk 141: struct _HText {
1.1 frystyk 142: HTRequest * request;
1.4 frystyk 143: };
1.1 frystyk 144:
1.58 ! frystyk 145: /*
! 146: ** A structure for calculating metadata distributions
! 147: */
! 148: typedef struct _MetaDist {
! 149: HTAtom * name;
! 150: int hits;
! 151: } MetaDist;
! 152:
! 153: /*
! 154: ** Some sorting algorithms
! 155: */
! 156: PRIVATE HTComparer HitSort, FormatSort;
! 157:
1.1 frystyk 158: PUBLIC HText * HTMainText = NULL;
159: PUBLIC HTParentAnchor * HTMainAnchor = NULL;
160: PUBLIC HTStyleSheet * styleSheet = NULL;
161:
162: /* ------------------------------------------------------------------------- */
163:
1.13 eric 164: /* Standard (non-error) Output
165: ** ---------------------------
166: */
167: PUBLIC int OutputData(const char * fmt, ...)
168: {
169: int ret;
170: va_list pArgs;
171: va_start(pArgs, fmt);
172: ret = vfprintf(stdout, fmt, pArgs);
173: va_end(pArgs);
174: return ret;
175: }
176:
177: /* ------------------------------------------------------------------------- */
178:
1.2 frystyk 179: /* Create a "HyperDoc" object
180: ** --------------------------
181: ** A HyperDoc object contains information about whether we have already
182: ** started checking the anchor and the depth in our search
183: */
184: PRIVATE HyperDoc * HyperDoc_new (Robot * mr,HTParentAnchor * anchor, int depth)
185: {
186: HyperDoc * hd;
1.14 frystyk 187: if ((hd = (HyperDoc *) HT_CALLOC(1, sizeof(HyperDoc))) == NULL)
188: HT_OUTOFMEM("HyperDoc_new");
1.2 frystyk 189: hd->state = L_INVALID;
190: hd->depth = depth;
1.55 frystyk 191: hd->hits = 1;
1.2 frystyk 192:
193: /* Bind the HyperDoc object together with the Anchor Object */
194: hd->anchor = anchor;
195: HTAnchor_setDocument(anchor, (void *) hd);
196:
197: /* Add this HyperDoc object to our list */
198: if (!mr->hyperdoc) mr->hyperdoc = HTList_new();
199: HTList_addObject(mr->hyperdoc, (void *) hd);
200: return hd;
201: }
202:
203: /* Delete a "HyperDoc" object
204: ** --------------------------
205: */
206: PRIVATE BOOL HyperDoc_delete (HyperDoc * hd)
207: {
208: if (hd) {
1.11 frystyk 209: HT_FREE (hd);
1.2 frystyk 210: return YES;
211: }
212: return NO;
213: }
214:
1.55 frystyk 215: /*
216: ** Sort the anchor array and log reference count
217: */
218: PRIVATE BOOL calculate_hits (Robot * mr, HTArray * array)
219: {
220: if (mr && array) {
221: HTLog * log = HTLog_open(mr->hitfile, YES, YES);
222: if (log) {
223: void ** data = NULL;
224: HTParentAnchor * anchor = NULL;
225: HTArray_sort(array, HitSort);
226: anchor = (HTParentAnchor *) HTArray_firstObject(array, data);
227: while (anchor) {
228: char * str = NULL;
229: char * uri = HTAnchor_address((HTAnchor *) anchor);
230: HyperDoc * hd = (HyperDoc *) HTAnchor_document(anchor);
231: if (uri && hd) {
232: if ((str = (char *) HT_MALLOC(strlen(uri) + 50)) == NULL)
233: HT_OUTOFMEM("calculate_hits");
1.58 ! frystyk 234: sprintf(str, "%8d %s", hd->hits, uri);
1.55 frystyk 235: HTLog_addLine(log, str);
236: HT_FREE(str);
237: }
238: HT_FREE(uri);
239: anchor = (HTParentAnchor *) HTArray_nextObject(array, data);
240: }
241: }
242: HTLog_close(log);
243: return YES;
244: }
245: return NO;
246: }
247:
248: PRIVATE int HitSort (const void * a, const void * b)
249: {
250: HyperDoc * aa = HTAnchor_document(*(HTParentAnchor **) a);
251: HyperDoc * bb = HTAnchor_document(*(HTParentAnchor **) b);
252: if (aa && bb) return (bb->hits - aa->hits);
253: return bb - aa;
254: }
255:
1.58 ! frystyk 256: /*
! 257: ** Calculate distributions for media types. The same mechanism
! 258: ** can be used for other characteristics with relatively
! 259: ** few outcomes.
! 260: */
! 261: PRIVATE HTList * mediatype_distribution (HTArray * array)
! 262: {
! 263: if (array) {
! 264: HTList * mt = HTList_new();
! 265: MetaDist * pres = NULL;
! 266: void ** data = NULL;
! 267: HTParentAnchor * anchor = NULL;
! 268: anchor = (HTParentAnchor *) HTArray_firstObject(array, data);
! 269: while (anchor) {
! 270: HTFormat format = HTAnchor_format(anchor);
! 271: if (format && format != WWW_UNKNOWN) {
! 272: HTList * cur = mt;
! 273:
! 274: /* If found then increase counter */
! 275: while ((pres = (MetaDist *) HTList_nextObject(cur))) {
! 276: if (pres->name == format) {
! 277: pres->hits++;
! 278: break;
! 279: }
! 280: }
! 281:
! 282: /* If not found then add new format to list */
! 283: if (!pres) {
! 284: if ((pres = (MetaDist *) HT_CALLOC(1, sizeof(MetaDist))) == NULL)
! 285: HT_OUTOFMEM("mediatype_distribution");
! 286: pres->name = format;
! 287: pres->hits = 1;
! 288: HTList_addObject(mt, pres);
! 289: HTList_insertionSort(mt, FormatSort);
! 290: }
! 291: }
! 292:
! 293: /* Find next anchor in array */
! 294: anchor = (HTParentAnchor *) HTArray_nextObject(array, data);
! 295: }
! 296: return mt;
! 297: }
! 298: return NULL;
! 299: }
! 300:
! 301: PRIVATE int FormatSort (const void * a, const void * b)
! 302: {
! 303: MetaDist * aa = (MetaDist *) a;
! 304: MetaDist * bb = (MetaDist *) b;
! 305: return strcmp(HTAtom_name(bb->name), HTAtom_name(aa->name));
! 306: }
! 307:
! 308: PRIVATE BOOL log_meta_distribution (const char * logfile, HTList * distribution)
! 309: {
! 310: if (logfile && distribution) {
! 311: HTLog * log = HTLog_open(logfile, YES, YES);
! 312: if (log) {
! 313: HTList * cur = distribution;
! 314: MetaDist * pres;
! 315: char str[64];
! 316: while ((pres = (MetaDist *) HTList_nextObject(cur))) {
! 317: if (pres->name) {
! 318: memset(str, '\0', 64*sizeof(char));
! 319: sprintf(str, "%8d ", pres->hits);
! 320: strncat(str, HTAtom_name(pres->name), 50);
! 321: HTLog_addLine(log, str);
! 322: }
! 323: }
! 324: HTLog_close(log);
! 325: }
! 326: }
! 327: return NO;
! 328: }
! 329:
! 330: PRIVATE BOOL delete_meta_distribution (HTList * distribution)
! 331: {
! 332: if (distribution) {
! 333: HTList * cur = distribution;
! 334: MetaDist * pres;
! 335: while ((pres = (MetaDist *) HTList_nextObject(cur)))
! 336: HT_FREE(pres);
! 337: HTList_delete(distribution);
! 338: return YES;
! 339: }
! 340: return NO;
! 341: }
! 342:
! 343:
1.55 frystyk 344: /* Statistics
345: ** ----------
346: ** Calculates a bunch of statistics for the anchors traversed
347: */
348: PRIVATE BOOL calculate_statistics (Robot * mr)
349: {
350: if (!mr) return NO;
351:
352: /* Calculate efficiency */
353: {
1.56 frystyk 354: ms_t t = HTGetTimeInMillis() - mr->time;
355: if (t > 0) {
356: double loadfactor = 1000 * (mr->total_bytes / t);
357: double secs = t / 1000.0;
1.55 frystyk 358: char bytes[50];
359: HTNumToStr(mr->total_bytes, bytes, 50);
1.56 frystyk 360: HTTrace("Downloaded %s bytes in %ld document bodies in %.2f seconds (%2.1f bytes/sec)\n",
361: bytes, mr->total_docs, secs, loadfactor);
1.55 frystyk 362: }
363: }
364:
365: /* Create an array of existing anchors */
366: if (mr->total_docs > 1) {
367: HTArray * array = HTAnchor_getArray(mr->total_docs);
368: if (array) {
369:
370: /* Sort after hit counts */
371: if (mr->hitfile) calculate_hits(mr, array);
372:
1.58 ! frystyk 373: /* Find mediatype distribution */
! 374: if (mr->mtfile) {
! 375: HTList * mtdist = mediatype_distribution(array);
! 376: if (mtdist) {
! 377: log_meta_distribution(mr->mtfile, mtdist);
! 378: delete_meta_distribution(mtdist);
! 379: }
! 380: }
1.55 frystyk 381:
382: /* Add as may other stats here as you like */
1.58 ! frystyk 383:
! 384:
! 385: /* Delete the array */
1.55 frystyk 386: HTArray_delete(array);
387: }
388: }
389: return YES;
390: }
391:
1.1 frystyk 392: /* Create a Command Line Object
393: ** ----------------------------
394: */
395: PRIVATE Robot * Robot_new (void)
396: {
397: Robot * me;
1.41 frystyk 398: if ((me = (Robot *) HT_CALLOC(1, sizeof(Robot))) == NULL)
1.14 frystyk 399: HT_OUTOFMEM("Robot_new");
1.2 frystyk 400: me->hyperdoc = HTList_new();
1.4 frystyk 401: me->htext = HTList_new();
1.40 frystyk 402: me->timer = DEFAULT_TIMEOUT;
1.25 frystyk 403: me->cwd = HTGetCurrentDirectoryURL();
1.1 frystyk 404: me->output = OUTPUT;
1.35 eric 405: me->cnt = 0;
1.34 eric 406: me->fingers = HTList_new();
1.1 frystyk 407: return me;
408: }
409:
410: /* Delete a Command Line Object
411: ** ----------------------------
412: */
413: PRIVATE BOOL Robot_delete (Robot * me)
414: {
415: if (me) {
1.34 eric 416: HTList_delete(me->fingers);
1.55 frystyk 417:
418: /* Calculate statistics */
419: calculate_statistics(me);
420:
421: if (me->hyperdoc) {
1.2 frystyk 422: HTList * cur = me->hyperdoc;
423: HyperDoc * pres;
424: while ((pres = (HyperDoc *) HTList_nextObject(cur)))
425: HyperDoc_delete(pres);
426: HTList_delete(me->hyperdoc);
427: }
1.4 frystyk 428: if (me->htext) {
429: HTList * cur = me->htext;
430: HText * pres;
431: while ((pres = (HText *) HTList_nextObject(cur)))
432: HText_free(pres);
433: HTList_delete(me->htext);
434: }
1.55 frystyk 435: if (me->log) HTLog_close(me->log);
1.57 frystyk 436: if (me->ref) HTLog_close(me->ref);
1.58 ! frystyk 437: if (me->reject) HTLog_close(me->reject);
! 438: if (me->notfound) HTLog_close(me->notfound);
! 439: if (me->conneg) HTLog_close(me->conneg);
1.1 frystyk 440: if (me->output && me->output != STDOUT) fclose(me->output);
1.12 frystyk 441: if (me->flags & MR_TIME) {
442: time_t local = time(NULL);
1.13 eric 443: HTTrace("Robot terminated %s\n",HTDateTimeStr(&local,YES));
1.12 frystyk 444: }
1.55 frystyk 445:
1.58 ! frystyk 446: #ifdef HT_POSIX_REGEX
! 447: if (me->include) {
! 448: regfree(me->include);
! 449: HT_FREE(me->include);
! 450: }
! 451: if (me->exclude) {
! 452: regfree(me->exclude);
! 453: HT_FREE(me->exclude);
! 454: }
! 455: if (me->check) {
! 456: regfree(me->check);
! 457: HT_FREE(me->check);
! 458: }
! 459: #endif
! 460:
1.11 frystyk 461: HT_FREE(me->cwd);
1.55 frystyk 462: HT_FREE(me->prefix);
1.11 frystyk 463: HT_FREE(me);
1.1 frystyk 464: return YES;
465: }
466: return NO;
467: }
468:
1.2 frystyk 469: /*
1.34 eric 470: ** This function creates a new finger object and initializes it with a new request
1.2 frystyk 471: */
1.34 eric 472: PRIVATE Finger * Finger_new (Robot * robot, HTParentAnchor * dest, HTMethod method)
1.2 frystyk 473: {
1.34 eric 474: Finger * me;
475: HTRequest * request = HTRequest_new();
476: if ((me = (Finger *) HT_CALLOC(1, sizeof(Finger))) == NULL)
477: HT_OUTOFMEM("Finger_new");
478: me->robot = robot;
479: me->request = request;
480: me->dest = dest;
481: HTList_addObject(robot->fingers, (void *)me);
482:
1.48 frystyk 483: /* Set the context for this request */
1.34 eric 484: HTRequest_setContext (request, me);
1.48 frystyk 485:
486: /* Check the various flags to customize the request */
487: if (robot->flags & MR_PREEMPTIVE)
488: HTRequest_setPreemptive(request, YES);
489: if (robot->flags & MR_VALIDATE)
490: HTRequest_setReloadMode(request, HT_CACHE_VALIDATE);
491: if (robot->flags & MR_END_VALIDATE)
492: HTRequest_setReloadMode(request, HT_CACHE_END_VALIDATE);
493:
494: /* We wanna make sure that we are sending a Host header (default) */
1.34 eric 495: HTRequest_addRqHd(request, HT_C_HOST);
1.48 frystyk 496:
497: /* Set the method for this request */
1.34 eric 498: HTRequest_setMethod(request, method);
499: robot->cnt++;
500: return me;
1.2 frystyk 501: }
502:
1.34 eric 503: PRIVATE int Finger_delete (Finger * me)
1.2 frystyk 504: {
1.34 eric 505: HTList_removeObject(me->robot->fingers, (void *)me);
506: me->robot->cnt--;
1.37 frystyk 507:
508: /*
509: ** If we are down at one request then flush the output buffer
510: */
511: if (me->request) {
512: if (me->robot->cnt == 1) HTRequest_forceFlush(me->request);
1.34 eric 513: HTRequest_delete(me->request);
1.37 frystyk 514: }
515:
516: /*
517: ** Delete the request and free myself
518: */
1.34 eric 519: HT_FREE(me);
520: return YES;
1.2 frystyk 521: }
522:
523: /*
524: ** Cleanup and make sure we close all connections including the persistent
525: ** ones
526: */
1.1 frystyk 527: PRIVATE void Cleanup (Robot * me, int status)
528: {
529: Robot_delete(me);
1.29 eric 530: HTProfile_delete();
1.50 frystyk 531: #ifdef HT_MEMLOG
1.39 eric 532: HTMemLog_close();
1.47 frystyk 533: #endif
534:
1.1 frystyk 535: #ifdef VMS
536: exit(status ? status : 1);
537: #else
538: exit(status ? status : 0);
539: #endif
540: }
541:
542: #ifdef CATCH_SIG
543: #include <signal.h>
544: /* SetSignal
545: ** This function sets up signal handlers. This might not be necessary to
546: ** call if the application has its own handlers (lossage on SVR4)
547: */
548: PRIVATE void SetSignal (void)
549: {
550: /* On some systems (SYSV) it is necessary to catch the SIGPIPE signal
551: ** when attemting to connect to a remote host where you normally should
552: ** get `connection refused' back
553: */
554: if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
1.13 eric 555: if (PROT_TRACE) HTTrace("HTSignal.... Can't catch SIGPIPE\n");
1.1 frystyk 556: } else {
1.13 eric 557: if (PROT_TRACE) HTTrace("HTSignal.... Ignoring SIGPIPE\n");
1.1 frystyk 558: }
1.47 frystyk 559:
1.50 frystyk 560: #ifdef HT_MEMLOG
1.44 eric 561: HTMemLog_flush();
1.47 frystyk 562: #endif
563:
1.1 frystyk 564: }
565: #endif /* CATCH_SIG */
566:
1.58 ! frystyk 567: #ifdef HT_POSIX_REGEX
! 568: PRIVATE char * get_regerror (int errcode, regex_t * compiled)
! 569: {
! 570: size_t length = regerror (errcode, compiled, NULL, 0);
! 571: char * str = NULL;
! 572: if ((str = (char *) HT_MALLOC(length+1)) == NULL)
! 573: HT_OUTOFMEM("get_regerror");
! 574: (void) regerror (errcode, compiled, str, length);
! 575: return str;
! 576: }
! 577:
! 578: PRIVATE regex_t * get_regtype (Robot * mr, const char * regex_str)
! 579: {
! 580: regex_t * regex = NULL;
! 581: if (regex_str && *regex_str) {
! 582: int status;
! 583: if ((regex = (regex_t *) HT_CALLOC(1, sizeof(regex_t))) == NULL)
! 584: HT_OUTOFMEM("get_regtype");
! 585: if ((status = regcomp(regex, regex_str, REG_EXTENDED))) {
! 586: char * err_msg = get_regerror(status, regex);
! 587: HTTrace("Regular expression error: %s\n", err_msg);
! 588: HT_FREE(err_msg);
! 589: Cleanup(mr, -1);
! 590: }
! 591: }
! 592: return regex;
! 593: }
! 594: #endif
! 595:
1.1 frystyk 596: PRIVATE void VersionInfo (void)
597: {
1.13 eric 598: OutputData("\n\nW3C Reference Software\n\n");
599: OutputData("\tW3C Mini Robot (%s) version %s.\n",
1.1 frystyk 600: APP_NAME, APP_VERSION);
1.13 eric 601: OutputData("\tW3C Reference Library version %s.\n\n",HTLib_version());
602: OutputData("Please send feedback to <libwww@w3.org>\n");
1.1 frystyk 603: }
604:
605: /* terminate_handler
606: ** -----------------
1.2 frystyk 607: ** This function is registered to handle the result of the request.
608: ** If no more requests are pending then terminate program
1.1 frystyk 609: */
1.32 frystyk 610: PRIVATE int terminate_handler (HTRequest * request, HTResponse * response,
611: void * param, int status)
1.1 frystyk 612: {
1.34 eric 613: Finger * finger = (Finger *) HTRequest_context(request);
1.46 eric 614: Robot * mr = finger->robot;
1.34 eric 615: if (SHOW_MSG) HTTrace("Robot....... done with %s\n", HTAnchor_physical(finger->dest));
1.55 frystyk 616:
1.58 ! frystyk 617: /* Check if negotiated resource and whether we should log that*/
! 618: if (mr->conneg) {
! 619: HTAssocList * cur = HTResponse_variant(response);
! 620: if (cur) {
! 621: BOOL first = YES;
! 622: HTChunk * buffer = HTChunk_new(128);
! 623: char * uri = HTAnchor_address((HTAnchor *) finger->dest);
! 624: HTAssoc * pres;
! 625: while ((pres = (HTAssoc *) HTAssocList_nextObject(cur))) {
! 626: char * value = HTAssoc_value(pres);
! 627: if (first) {
! 628: HTChunk_puts(buffer, "(");
! 629: first = NO;
! 630: } else
! 631: HTChunk_puts(buffer, ", ");
! 632:
! 633: /* Output the name */
! 634: HTChunk_puts(buffer, HTAssoc_name(pres));
! 635:
! 636: /* Only output the value if not empty string */
! 637: if (*value) {
! 638: HTChunk_puts(buffer, "=");
! 639: HTChunk_puts(buffer, value);
! 640: }
! 641: }
! 642: if (!first) HTChunk_puts(buffer, ")\t");
! 643: HTChunk_puts(buffer, uri);
! 644: HTLog_addLine(mr->conneg, HTChunk_toCString(buffer));
! 645: HTChunk_delete(buffer);
! 646: HT_FREE(uri);
! 647: }
! 648: }
! 649:
1.55 frystyk 650: /* Count the amount of body data that we have read */
651: if (status == HT_LOADED && HTRequest_method(request) == METHOD_GET) {
1.56 frystyk 652: int length = HTAnchor_length(HTRequest_anchor(request));
653: if (length > 0) mr->total_bytes += length;
1.55 frystyk 654: }
655:
656: /* Count the number of documents that we have processed */
657: mr->total_docs++;
658:
1.58 ! frystyk 659: /* Cleanup the anchor so that we don't drown in metainformation */
! 660: if (!(mr->flags & MR_KEEP_META))
! 661: HTAnchor_clearHeader(HTRequest_anchor(request));
! 662:
1.55 frystyk 663: /* Delete this thread */
1.34 eric 664: Finger_delete(finger);
1.55 frystyk 665:
666: /* Should we stop? */
1.46 eric 667: if (mr->cnt <= 0) {
1.34 eric 668: if (SHOW_MSG) HTTrace(" Everything is finished...\n");
1.46 eric 669: Cleanup(mr, 0); /* No way back from here */
1.30 frystyk 670: }
1.46 eric 671: if (SHOW_MSG) HTTrace(" %d outstanding request%s\n", mr->cnt, mr->cnt == 1 ? "" : "s");
1.1 frystyk 672: return HT_OK;
673: }
674:
675: /* ------------------------------------------------------------------------- */
676: /* HTEXT INTERFACE */
677: /* ------------------------------------------------------------------------- */
678:
679: PUBLIC HText * HText_new2 (HTRequest * request, HTParentAnchor * anchor,
680: HTStream * stream)
681: {
682: HText * me;
1.34 eric 683: Finger * finger = (Finger *) HTRequest_context(request);
684: Robot * mr = finger->robot;
1.14 frystyk 685: if ((me = (HText *) HT_CALLOC(1, sizeof(HText))) == NULL)
686: HT_OUTOFMEM("HText_new2");
1.4 frystyk 687:
688: /* Bind the HText object together with the Request Object */
1.1 frystyk 689: me->request = request;
1.4 frystyk 690:
691: /* Add this HyperDoc object to our list */
692: if (!mr->htext) mr->htext = HTList_new();
693: HTList_addObject(mr->htext, (void *) me);
1.1 frystyk 694: return me;
695: }
696:
1.4 frystyk 697: PUBLIC void HText_free (HText * me) {
1.11 frystyk 698: if (me) HT_FREE (me);
1.4 frystyk 699: }
700:
1.1 frystyk 701: PUBLIC void HText_beginAnchor (HText * text, HTChildAnchor * anchor)
702: {
703: if (text && anchor) {
1.34 eric 704: Finger * finger = (Finger *) HTRequest_context(text->request);
705: Robot * mr = finger->robot;
1.1 frystyk 706: HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor);
707: HTParentAnchor * dest_parent = HTAnchor_parent(dest);
1.7 frystyk 708: char * uri = HTAnchor_address((HTAnchor *) dest_parent);
1.1 frystyk 709: HyperDoc * hd = HTAnchor_document(dest_parent);
1.58 ! frystyk 710: BOOL match = YES;
! 711: BOOL check = NO;
1.1 frystyk 712:
1.55 frystyk 713: if (!uri) return;
714: if (SHOW_MSG) HTTrace("Robot....... Found `%s\' - ", uri ? uri : "NULL\n");
715:
716: if (hd) {
717: if (SHOW_MSG) HTTrace("Already checked\n");
718: hd->hits++;
1.58 ! frystyk 719: HT_FREE(uri);
! 720: return;
! 721: }
! 722:
! 723: /* Check for prefix match */
! 724: if (mr->prefix) match = HTStrMatch(mr->prefix, uri) ? YES : NO;
! 725:
! 726: #ifdef HT_POSIX_REGEX
! 727: /* Check for any regular expression */
! 728: if (match && mr->include) {
! 729: match = regexec(mr->include, uri, 0, NULL, 0) ? NO : YES;
! 730: }
! 731: if (match && mr->exclude) {
! 732: match = regexec(mr->exclude, uri, 0, NULL, 0) ? YES : NO;
! 733: }
! 734: if (match && mr->check) {
! 735: check = regexec(mr->check, uri, 0, NULL, 0) ? NO : YES;
! 736: }
! 737: #endif
! 738:
! 739: /* Test whether we already have a hyperdoc for this document */
! 740: if (mr->flags & MR_LINK && match && dest_parent) {
1.1 frystyk 741: HTParentAnchor * parent = HTRequest_parent(text->request);
742: HyperDoc * last = HTAnchor_document(parent);
743: int depth = last ? last->depth+1 : 0;
1.34 eric 744: Finger * newfinger = Finger_new(mr, dest_parent, METHOD_GET);
745: HTRequest * newreq = newfinger->request;
1.2 frystyk 746: HyperDoc_new(mr, dest_parent, depth);
1.7 frystyk 747: HTRequest_setParent(newreq, HTRequest_anchor(text->request));
1.58 ! frystyk 748: if (check || depth >= mr->depth) {
! 749: if (SHOW_MSG) HTTrace("loading at depth %d using HEAD\n", depth);
1.7 frystyk 750: HTRequest_setMethod(newreq, METHOD_HEAD);
1.30 frystyk 751: HTRequest_setOutputFormat(newreq, WWW_DEBUG);
1.7 frystyk 752: } else {
1.13 eric 753: if (SHOW_MSG) HTTrace("loading at depth %d\n", depth);
1.2 frystyk 754: }
755: if (HTLoadAnchor((HTAnchor *) dest_parent, newreq) != YES) {
1.13 eric 756: if (SHOW_MSG) HTTrace("not tested!\n");
1.34 eric 757: Finger_delete(newfinger);
1.2 frystyk 758: }
1.7 frystyk 759: } else {
1.55 frystyk 760: if (SHOW_MSG) HTTrace("does not fulfill constraints\n");
1.58 ! frystyk 761: if (mr->reject) HTLog_addLine(mr->reject, uri);
1.2 frystyk 762: }
1.11 frystyk 763: HT_FREE(uri);
1.2 frystyk 764: }
765: }
766:
767: PUBLIC void HText_appendImage (HText * text, HTChildAnchor * anchor,
1.14 frystyk 768: const char *alt, const char * align, BOOL isMap)
1.2 frystyk 769: {
770: if (text && anchor) {
1.34 eric 771: Finger * finger = (Finger *) HTRequest_context(text->request);
772: Robot * mr = finger->robot;
1.2 frystyk 773: HTParentAnchor * dest = (HTParentAnchor *)
774: HTAnchor_followMainLink((HTAnchor *) anchor);
775: HyperDoc * hd = HTAnchor_document(dest);
1.1 frystyk 776:
1.2 frystyk 777: /* Test whether we already have a hyperdoc for this document */
778: if (mr->flags & MR_IMG && dest && !hd) {
779: HTParentAnchor * parent = HTRequest_parent(text->request);
780: HyperDoc * last = HTAnchor_document(parent);
781: int depth = last ? last->depth+1 : 0;
1.45 frystyk 782: Finger * newfinger = Finger_new(mr, dest,
783: mr->flags & MR_SAVE ?
784: METHOD_GET : METHOD_HEAD);
1.34 eric 785: HTRequest * newreq = newfinger->request;
1.2 frystyk 786: HyperDoc_new(mr, dest, depth);
787: if (SHOW_MSG) {
788: char * uri = HTAnchor_address((HTAnchor *) dest);
1.13 eric 789: HTTrace("Robot....... Checking Image `%s\'\n", uri);
1.11 frystyk 790: HT_FREE(uri);
1.2 frystyk 791: }
792: if (HTLoadAnchor((HTAnchor *) dest, newreq) != YES) {
793: if (SHOW_MSG)
1.13 eric 794: HTTrace("Robot....... Image not tested!\n");
1.34 eric 795: Finger_delete(newfinger);
1.1 frystyk 796: }
797: }
798: }
799: }
800:
801: PUBLIC void HText_endAnchor (HText * text) {}
1.14 frystyk 802: PUBLIC void HText_appendText (HText * text, const char * str) {}
1.1 frystyk 803: PUBLIC void HText_appendCharacter (HText * text, char ch) {}
804: PUBLIC void HText_endAppend (HText * text) {}
805: PUBLIC void HText_setStyle (HText * text, HTStyle * style) {}
806: PUBLIC void HText_beginAppend (HText * text) {}
807: PUBLIC void HText_appendParagraph (HText * text) {}
808:
1.48 frystyk 809: PRIVATE int RobotTrace (const char * fmt, va_list pArgs)
810: {
811: return (vfprintf(stderr, fmt, pArgs));
812: }
813:
1.1 frystyk 814: /* ------------------------------------------------------------------------- */
815: /* MAIN PROGRAM */
816: /* ------------------------------------------------------------------------- */
817:
818: int main (int argc, char ** argv)
819: {
1.48 frystyk 820: int status = 0;
1.1 frystyk 821: int arg;
1.48 frystyk 822: BOOL cache = NO; /* Use persistent cache */
823: BOOL flush = NO; /* flush the persistent cache */
1.54 frystyk 824: char * cache_root = NULL;
1.1 frystyk 825: HTChunk * keywords = NULL; /* From command line */
826: int keycnt = 0;
1.12 frystyk 827: Robot * mr = NULL;
1.43 frystyk 828: Finger * finger = NULL;
829: HTParentAnchor * startAnchor = NULL;
1.1 frystyk 830:
831: /* Starts Mac GUSI socket library */
832: #ifdef GUSI
833: GUSISetup(GUSIwithSIOUXSockets);
834: GUSISetup(GUSIwithInternetSockets);
835: #endif
836:
837: #ifdef __MWERKS__ /* STR */
838: InitGraf((Ptr) &qd.thePort);
839: InitFonts();
840: InitWindows();
841: InitMenus(); TEInit();
842: InitDialogs(nil);
843: InitCursor();
844: SIOUXSettings.asktosaveonclose = false;
845: argc=ccommand(&argv);
1.50 frystyk 846: #endif /* __MWERKS__ */
1.1 frystyk 847:
1.50 frystyk 848: #ifdef HT_MEMLOG
1.51 frystyk 849: HTMemLog_open(DEFAULT_MEMLOG, 8192, YES);
1.47 frystyk 850: #endif
1.46 eric 851:
1.27 frystyk 852: /* Initiate W3C Reference Library with a robot profile */
853: HTProfile_newRobot(APP_NAME, APP_VERSION);
1.48 frystyk 854: HTTrace_setCallback(RobotTrace);
1.27 frystyk 855:
856: /* Add the default HTML parser to the set of converters */
857: {
858: HTList * converters = HTFormat_conversion();
859: HTMLInit(converters);
860: }
1.1 frystyk 861:
1.12 frystyk 862: /* Build a new robot object */
863: mr = Robot_new();
864:
1.1 frystyk 865: /* Scan command Line for parameters */
866: for (arg=1; arg<argc; arg++) {
867: if (*argv[arg] == '-') {
868:
869: /* non-interactive */
1.17 frystyk 870: if (!strcmp(argv[arg], "-n")) {
1.1 frystyk 871: HTAlert_setInteractive(NO);
872:
1.55 frystyk 873: /* log file */
1.1 frystyk 874: } else if (!strcmp(argv[arg], "-l")) {
875: mr->logfile = (arg+1 < argc && *argv[arg+1] != '-') ?
876: argv[++arg] : DEFAULT_LOG_FILE;
877:
1.55 frystyk 878: /* hit file */
879: } else if (!strcmp(argv[arg], "-hit")) {
880: mr->hitfile = (arg+1 < argc && *argv[arg+1] != '-') ?
881: argv[++arg] : DEFAULT_HIT_FILE;
882:
1.57 frystyk 883: /* referer file */
1.58 ! frystyk 884: } else if (!strncmp(argv[arg], "-ref", 4)) {
1.57 frystyk 885: mr->reffile = (arg+1 < argc && *argv[arg+1] != '-') ?
886: argv[++arg] : DEFAULT_REFERER_FILE;
887:
1.58 ! frystyk 888: /* Not found error log file */
! 889: } else if (!strncmp(argv[arg], "-404", 4)) {
! 890: mr->notfoundfile = (arg+1 < argc && *argv[arg+1] != '-') ?
! 891: argv[++arg] : DEFAULT_NOTFOUND_FILE;
! 892:
! 893: /* reject log file */
! 894: } else if (!strncmp(argv[arg], "-rej", 4)) {
! 895: mr->rejectfile = (arg+1 < argc && *argv[arg+1] != '-') ?
! 896: argv[++arg] : DEFAULT_REJECT_FILE;
! 897:
! 898: /* negoatiated resource log file */
! 899: } else if (!strncmp(argv[arg], "-neg", 4)) {
! 900: mr->connegfile = (arg+1 < argc && *argv[arg+1] != '-') ?
! 901: argv[++arg] : DEFAULT_CONNEG_FILE;
! 902:
! 903: /* mediatype distribution log file */
! 904: } else if (!strncmp(argv[arg], "-for", 4)) {
! 905: mr->mtfile = (arg+1 < argc && *argv[arg+1] != '-') ?
! 906: argv[++arg] : DEFAULT_FORMAT_FILE;
! 907: mr->flags |= MR_KEEP_META;
! 908:
1.55 frystyk 909: /* rule file */
1.1 frystyk 910: } else if (!strcmp(argv[arg], "-r")) {
911: mr->rules = (arg+1 < argc && *argv[arg+1] != '-') ?
912: argv[++arg] : DEFAULT_RULE_FILE;
913:
914: /* output filename */
915: } else if (!strcmp(argv[arg], "-o")) {
916: mr->outputfile = (arg+1 < argc && *argv[arg+1] != '-') ?
917: argv[++arg] : DEFAULT_OUTPUT_FILE;
918:
1.55 frystyk 919: /* URI prefix */
920: } else if (!strcmp(argv[arg], "-prefix")) {
921: char * prefix = NULL;
922: prefix = (arg+1 < argc && *argv[arg+1] != '-') ?
923: argv[++arg] : DEFAULT_PREFIX;
924: if (*prefix) {
925: StrAllocCopy(mr->prefix, prefix);
926: StrAllocCat(mr->prefix, "*");
927: }
928:
1.1 frystyk 929: /* timeout -- Change the default request timeout */
930: } else if (!strcmp(argv[arg], "-timeout")) {
931: int timeout = (arg+1 < argc && *argv[arg+1] != '-') ?
932: atoi(argv[++arg]) : DEFAULT_TIMEOUT;
1.40 frystyk 933: if (timeout > 0) mr->timer = timeout;
1.1 frystyk 934:
1.54 frystyk 935: /* Force no pipelined requests */
936: } else if (!strcmp(argv[arg], "-nopipe")) {
937: HTTP_setConnectionMode(HTTP_NO_PIPELINING);
938:
1.48 frystyk 939: /* Start the persistent cache */
940: } else if (!strcmp(argv[arg], "-cache")) {
941: cache = YES;
942:
1.54 frystyk 943: /* Determine the cache root */
944: } else if (!strcmp(argv[arg], "-cacheroot")) {
945: cache_root = (arg+1 < argc && *argv[arg+1] != '-') ?
946: argv[++arg] : NULL;
1.51 frystyk 947:
1.52 frystyk 948: /* Stream write flush delay in ms */
949: } else if (!strcmp(argv[arg], "-delay")) {
950: int delay = (arg+1 < argc && *argv[arg+1] != '-') ?
951: atoi(argv[++arg]) : DEFAULT_DELAY;
952: HTHost_setDefaultWriteDelay(delay);
953:
1.48 frystyk 954: /* Persistent cache flush */
955: } else if (!strcmp(argv[arg], "-flush")) {
956: flush = YES;
957:
958: /* Do a cache validation */
959: } else if (!strcmp(argv[arg], "-validate")) {
960: mr->flags |= MR_VALIDATE;
961:
962: /* Do an end-to-end cache-validation */
963: } else if (!strcmp(argv[arg], "-endvalidate")) {
964: mr->flags |= MR_END_VALIDATE;
965:
1.7 frystyk 966: /* preemptive or non-preemptive access */
1.1 frystyk 967: } else if (!strcmp(argv[arg], "-single")) {
1.7 frystyk 968: mr->flags |= MR_PREEMPTIVE;
1.2 frystyk 969:
970: /* test inlined images */
971: } else if (!strcmp(argv[arg], "-img")) {
972: mr->flags |= MR_IMG;
1.45 frystyk 973:
974: /* load inlined images */
975: } else if (!strcmp(argv[arg], "-saveimg")) {
976: mr->flags |= (MR_IMG | MR_SAVE);
1.2 frystyk 977:
978: /* load anchors */
1.58 ! frystyk 979: } else if (!strcmp(argv[arg], "-link") || !strcmp(argv[arg], "-depth")) {
1.2 frystyk 980: mr->flags |= MR_LINK;
1.7 frystyk 981: mr->depth = (arg+1 < argc && *argv[arg+1] != '-') ?
982: atoi(argv[++arg]) : DEFAULT_DEPTH;
1.2 frystyk 983:
1.12 frystyk 984: /* Output start and end time */
985: } else if (!strcmp(argv[arg], "-ss")) {
986: time_t local = time(NULL);
1.13 eric 987: HTTrace("Robot started on %s\n",
1.12 frystyk 988: HTDateTimeStr(&local, YES));
989: mr->flags |= MR_TIME;
990:
1.1 frystyk 991: /* print version and exit */
992: } else if (!strcmp(argv[arg], "-version")) {
993: VersionInfo();
994: Cleanup(mr, 0);
1.46 eric 995:
996: /* run in quiet mode */
997: } else if (!strcmp(argv[arg], "-q")) {
998: mr->flags |= MR_QUIET;
1.1 frystyk 999:
1000: #ifdef WWWTRACE
1001: /* trace flags */
1002: } else if (!strncmp(argv[arg], "-v", 2)) {
1.24 frystyk 1003: HTSetTraceMessageMask(argv[arg]+2);
1.1 frystyk 1004: #endif
1005:
1.58 ! frystyk 1006: #ifdef HT_POSIX_REGEX
! 1007:
! 1008: /* If we can link against a POSIX regex library */
! 1009: } else if (!strncmp(argv[arg], "-inc", 4)) {
! 1010: if (arg+1 < argc && *argv[arg+1] != '-') {
! 1011: mr->include = get_regtype(mr, argv[++arg]);
! 1012: }
! 1013: } else if (!strncmp(argv[arg], "-exc", 4)) {
! 1014: if (arg+1 < argc && *argv[arg+1] != '-') {
! 1015: mr->exclude = get_regtype(mr, argv[++arg]);
! 1016: }
! 1017: } else if (!strncmp(argv[arg], "-check", 6)) {
! 1018: if (arg+1 < argc && *argv[arg+1] != '-') {
! 1019: mr->check = get_regtype(mr, argv[++arg]);
! 1020: }
! 1021: #endif
! 1022:
1.1 frystyk 1023: } else {
1.13 eric 1024: if (SHOW_MSG) HTTrace("Bad Argument (%s)\n", argv[arg]);
1.1 frystyk 1025: }
1.17 frystyk 1026: } else { /* If no leading `-' then check for URL or keywords */
1.1 frystyk 1027: if (!keycnt) {
1028: char * ref = HTParse(argv[arg], mr->cwd, PARSE_ALL);
1.56 frystyk 1029: startAnchor = HTAnchor_parent(HTAnchor_findAddress(ref));
1.34 eric 1030: HyperDoc_new(mr, startAnchor, 0);
1.1 frystyk 1031: keycnt = 1;
1.11 frystyk 1032: HT_FREE(ref);
1.1 frystyk 1033: } else { /* Check for successive keyword arguments */
1034: char *escaped = HTEscape(argv[arg], URL_XALPHAS);
1035: if (keycnt++ <= 1)
1.5 frystyk 1036: keywords = HTChunk_new(128);
1.1 frystyk 1037: else
1.5 frystyk 1038: HTChunk_putc(keywords, ' ');
1039: HTChunk_puts(keywords, HTStrip(escaped));
1.11 frystyk 1040: HT_FREE(escaped);
1.1 frystyk 1041: }
1042: }
1043: }
1044:
1045: #ifdef CATCH_SIG
1046: SetSignal();
1047: #endif
1048:
1049: if (!keycnt) {
1.13 eric 1050: if (SHOW_MSG) HTTrace("Please specify URL to check.\n");
1.1 frystyk 1051: Cleanup(mr, -1);
1052: }
1053:
1.23 manoli 1054: /* Testing that HTTrace is working */
1.47 frystyk 1055: if (SHOW_MSG) HTTrace ("Welcome to the W3C mini Robot\n");
1.23 manoli 1056:
1.1 frystyk 1057: /* Rule file specified? */
1058: if (mr->rules) {
1059: char * rules = HTParse(mr->rules, mr->cwd, PARSE_ALL);
1.27 frystyk 1060: if (!HTLoadRules(rules))
1.13 eric 1061: if (SHOW_MSG) HTTrace("Can't access rules\n");
1.11 frystyk 1062: HT_FREE(rules);
1.1 frystyk 1063: }
1064:
1065: /* Output file specified? */
1066: if (mr->outputfile) {
1067: if ((mr->output = fopen(mr->outputfile, "wb")) == NULL) {
1.13 eric 1068: if (SHOW_MSG) HTTrace("Can't open `%s'\n", mr->outputfile);
1.1 frystyk 1069: mr->output = OUTPUT;
1070: }
1071: }
1072:
1.48 frystyk 1073: /* Should we use persistent cache? */
1074: if (cache) {
1.54 frystyk 1075: HTCacheInit(cache_root, 20);
1.49 frystyk 1076: HTNet_addBefore(HTCacheFilter, "http://*", NULL, HT_FILTER_MIDDLE);
1077: HTNet_addAfter(HTCacheUpdateFilter, "http://*", NULL,
1078: HT_NOT_MODIFIED, HT_FILTER_MIDDLE);
1.48 frystyk 1079:
1080: /* Should we start by flushing? */
1081: if (flush) HTCache_flushAll();
1082: }
1083:
1.58 ! frystyk 1084: /* CLF Log file specified? */
1.55 frystyk 1085: if (mr->logfile) {
1086: mr->log = HTLog_open(mr->logfile, YES, YES);
1087: if (mr->log) HTNet_addAfter(HTLogFilter, NULL, mr->log, HT_ALL, HT_FILTER_LATE);
1.57 frystyk 1088: }
1089:
1.58 ! frystyk 1090: /* Referer Log file specified? */
1.57 frystyk 1091: if (mr->reffile) {
1092: mr->ref = HTLog_open(mr->reffile, YES, YES);
1093: if (mr->ref)
1094: HTNet_addAfter(HTRefererFilter, NULL, mr->ref, HT_ALL, HT_FILTER_LATE);
1.55 frystyk 1095: }
1.1 frystyk 1096:
1.58 ! frystyk 1097: /* Not found error log specified? */
! 1098: if (mr->notfoundfile) {
! 1099: mr->notfound = HTLog_open(mr->notfoundfile, YES, YES);
! 1100: if (mr->notfound)
! 1101: HTNet_addAfter(HTRefererFilter, NULL, mr->notfound, -404, HT_FILTER_LATE);
! 1102: }
! 1103:
! 1104: /* Negotiated resource log specified? */
! 1105: if (mr->connegfile) mr->conneg = HTLog_open(mr->connegfile, YES, YES);
! 1106:
! 1107: /* Reject Log file specified? */
! 1108: if (mr->rejectfile) mr->reject = HTLog_open(mr->rejectfile, YES, YES);
! 1109:
! 1110: /* Register our own terminate filter */
1.32 frystyk 1111: HTNet_addAfter(terminate_handler, NULL, NULL, HT_ALL, HT_FILTER_LAST);
1.40 frystyk 1112:
1113: /* Setting event timeout */
1114: HTHost_setEventTimeout(mr->timer);
1.55 frystyk 1115:
1.56 frystyk 1116: mr->time = HTGetTimeInMillis();
1.37 frystyk 1117:
1.34 eric 1118: /* Start the request */
1119: finger = Finger_new(mr, startAnchor, METHOD_GET);
1.43 frystyk 1120:
1121: /*
1122: ** Make sure that the first request is flushed immediately and not
1123: ** buffered in the output buffer
1124: */
1125: HTRequest_setFlush(finger->request, YES);
1126:
1127: /*
1.48 frystyk 1128: ** Check whether we should do some kind of cache validation on
1129: ** the load
1130: */
1131: if (mr->flags & MR_VALIDATE)
1132: HTRequest_setReloadMode(finger->request, HT_CACHE_VALIDATE);
1133: if (mr->flags & MR_END_VALIDATE)
1134: HTRequest_setReloadMode(finger->request, HT_CACHE_END_VALIDATE);
1135:
1136: /*
1.43 frystyk 1137: ** Now do the load
1138: */
1.34 eric 1139: if (mr->flags & MR_PREEMPTIVE)
1140: HTRequest_setPreemptive(finger->request, YES);
1.1 frystyk 1141:
1142: if (keywords) /* Search */
1.34 eric 1143: status = HTSearchAnchor(keywords, (HTAnchor *)startAnchor, finger->request);
1.1 frystyk 1144: else
1.34 eric 1145: status = HTLoadAnchor((HTAnchor *)startAnchor, finger->request);
1.1 frystyk 1146:
1.5 frystyk 1147: if (keywords) HTChunk_delete(keywords);
1.1 frystyk 1148: if (status != YES) {
1.13 eric 1149: if (SHOW_MSG) HTTrace("Can't access resource\n");
1.1 frystyk 1150: Cleanup(mr, -1);
1151: }
1152:
1153: /* Go into the event loop... */
1.34 eric 1154: HTEventList_loop(finger->request);
1.1 frystyk 1155:
1156: /* Only gets here if event loop fails */
1157: Cleanup(mr, 0);
1158: return 0;
1159: }
Webmaster