Annotation of libwww/Robot/src/HTRobot.c, revision 1.59
1.1 frystyk 1: /* HTRobot.c
2: ** W3C MINI ROBOT
3: **
4: ** (c) COPRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
6: **
7: ** This program illustrates how to travers links using the Anchor object
8: **
9: ** Authors:
10: ** HFN Henrik Frystyk Nielsen, (frystyk@w3.org)
11: **
12: ** History:
13: ** Dec 04 95 First version
14: */
15:
16: #include "WWWLib.h" /* Global Library Include file */
17: #include "WWWApp.h" /* Application stuff */
1.17 frystyk 18: #include "WWWTrans.h"
1.10 frystyk 19: #include "WWWInit.h"
1.9 frystyk 20:
1.4 frystyk 21: #include "HText.h"
1.1 frystyk 22:
23: #include "HTRobot.h" /* Implemented here */
24:
1.58 frystyk 25: #ifdef HT_POSIX_REGEX
26: #include "rxposix.h"
27: #endif
28:
1.14 frystyk 29: #ifndef W3C_VERSION
1.33 eric 30: #define W3C_VERSION "Unspecified"
1.1 frystyk 31: #endif
32:
33: #define APP_NAME "W3CRobot"
1.14 frystyk 34: #define APP_VERSION W3C_VERSION
1.1 frystyk 35:
36: #define DEFAULT_OUTPUT_FILE "robot.out"
37: #define DEFAULT_RULE_FILE "robot.conf"
1.58 frystyk 38: #define DEFAULT_LOG_FILE "log-clf.txt"
39: #define DEFAULT_HIT_FILE "log-hit.txt"
40: #define DEFAULT_REFERER_FILE "log-referer.txt"
41: #define DEFAULT_REJECT_FILE "log-reject.txt"
42: #define DEFAULT_NOTFOUND_FILE "log-notfound.txt"
43: #define DEFAULT_CONNEG_FILE "log-conneg.txt"
44: #define DEFAULT_FORMAT_FILE "log-format.txt"
1.51 frystyk 45: #define DEFAULT_MEMLOG "robot.mem"
1.55 frystyk 46: #define DEFAULT_PREFIX ""
1.59 ! frystyk 47: #define DEFAULT_IMG_PREFIX ""
1.7 frystyk 48: #define DEFAULT_DEPTH 0
1.53 frystyk 49: #define DEFAULT_DELAY 50 /* Write delay in ms */
1.1 frystyk 50:
1.51 frystyk 51: #if 0
1.53 frystyk 52: #define HT_MEMLOG /* May be expensive in performance! */
1.51 frystyk 53: #endif
54:
1.46 eric 55: /* #define SHOW_MSG (WWWTRACE || HTAlert_interactive()) */
56: #define SHOW_MSG (!(mr->flags & MR_QUIET))
1.1 frystyk 57:
1.40 frystyk 58: #define DEFAULT_TIMEOUT 10000 /* timeout in millis */
1.1 frystyk 59:
60: #if defined(__svr4__)
61: #define CATCH_SIG
62: #endif
63:
64: typedef enum _MRFlags {
1.45 frystyk 65: MR_IMG = 0x1,
66: MR_LINK = 0x2,
67: MR_PREEMPTIVE = 0x4,
68: MR_TIME = 0x8,
1.46 eric 69: MR_SAVE = 0x10,
1.48 frystyk 70: MR_QUIET = 0x20,
71: MR_VALIDATE = 0x40,
1.58 frystyk 72: MR_END_VALIDATE = 0x80,
73: MR_KEEP_META = 0x100
1.1 frystyk 74: } MRFlags;
75:
76: typedef struct _Robot {
1.2 frystyk 77: int depth; /* How deep is our tree */
1.30 frystyk 78: int cnt; /* Count of requests */
1.2 frystyk 79: HTList * hyperdoc; /* List of our HyperDoc Objects */
1.4 frystyk 80: HTList * htext; /* List of our HText Objects */
1.34 eric 81: HTList * fingers;
1.59 ! frystyk 82:
1.40 frystyk 83: int timer;
1.1 frystyk 84: char * cwd; /* Current dir URL */
85: char * rules;
1.55 frystyk 86: char * prefix;
1.59 ! frystyk 87: char * img_prefix;
! 88:
1.1 frystyk 89: char * logfile;
1.55 frystyk 90: HTLog * log;
1.57 frystyk 91: char * reffile;
92: HTLog * ref;
1.58 frystyk 93: char * rejectfile;
94: HTLog * reject;
95: char * notfoundfile;
96: HTLog * notfound;
97: char * connegfile;
98: HTLog * conneg;
1.1 frystyk 99: char * outputfile;
100: FILE * output;
1.55 frystyk 101: char * hitfile;
1.58 frystyk 102: char * mtfile;
1.59 ! frystyk 103:
1.1 frystyk 104: MRFlags flags;
1.55 frystyk 105:
1.59 ! frystyk 106: long get_bytes; /* Total number of bytes processed using GET*/
! 107: long get_docs; /* Total number of documents using GET */
! 108:
! 109: long head_bytes; /* bytes processed bytes processed using HEAD */
! 110: long head_docs; /* Total number of documents using HEAD*/
! 111:
! 112: long other_docs;
! 113:
1.56 frystyk 114: ms_t time; /* Time of run */
1.58 frystyk 115:
116: #ifdef HT_POSIX_REGEX
117: regex_t * include;
118: regex_t * exclude;
119: regex_t * check;
120: #endif
121:
1.1 frystyk 122: } Robot;
1.34 eric 123:
124: typedef struct _Finger {
125: Robot * robot;
126: HTRequest * request;
127: HTParentAnchor * dest;
128: } Finger;
129:
1.1 frystyk 130: typedef enum _LoadState {
131: L_INVALID = -2,
132: L_LOADING = -1,
133: L_SUCCESS = 0,
134: L_ERROR
135: } LoadState;
136:
137: /*
138: ** The HyperDoc object is bound to the anchor and contains information about
139: ** where we are in the search for recursive searches
140: */
141: typedef struct _HyperDoc {
142: HTParentAnchor * anchor;
143: LoadState state;
144: int depth;
1.55 frystyk 145: int hits;
1.1 frystyk 146: } HyperDoc;
147:
148: /*
149: ** This is the HText object that is created every time we start parsing a
150: ** HTML object
151: */
1.4 frystyk 152: struct _HText {
1.1 frystyk 153: HTRequest * request;
1.4 frystyk 154: };
1.1 frystyk 155:
1.58 frystyk 156: /*
157: ** A structure for calculating metadata distributions
158: */
159: typedef struct _MetaDist {
160: HTAtom * name;
161: int hits;
162: } MetaDist;
163:
164: /*
165: ** Some sorting algorithms
166: */
167: PRIVATE HTComparer HitSort, FormatSort;
168:
1.1 frystyk 169: PUBLIC HText * HTMainText = NULL;
170: PUBLIC HTParentAnchor * HTMainAnchor = NULL;
171: PUBLIC HTStyleSheet * styleSheet = NULL;
172:
173: /* ------------------------------------------------------------------------- */
174:
1.13 eric 175: /* Standard (non-error) Output
176: ** ---------------------------
177: */
178: PUBLIC int OutputData(const char * fmt, ...)
179: {
180: int ret;
181: va_list pArgs;
182: va_start(pArgs, fmt);
183: ret = vfprintf(stdout, fmt, pArgs);
184: va_end(pArgs);
185: return ret;
186: }
187:
188: /* ------------------------------------------------------------------------- */
189:
1.2 frystyk 190: /* Create a "HyperDoc" object
191: ** --------------------------
192: ** A HyperDoc object contains information about whether we have already
193: ** started checking the anchor and the depth in our search
194: */
195: PRIVATE HyperDoc * HyperDoc_new (Robot * mr,HTParentAnchor * anchor, int depth)
196: {
197: HyperDoc * hd;
1.14 frystyk 198: if ((hd = (HyperDoc *) HT_CALLOC(1, sizeof(HyperDoc))) == NULL)
199: HT_OUTOFMEM("HyperDoc_new");
1.2 frystyk 200: hd->state = L_INVALID;
201: hd->depth = depth;
1.55 frystyk 202: hd->hits = 1;
1.2 frystyk 203:
204: /* Bind the HyperDoc object together with the Anchor Object */
205: hd->anchor = anchor;
206: HTAnchor_setDocument(anchor, (void *) hd);
207:
208: /* Add this HyperDoc object to our list */
209: if (!mr->hyperdoc) mr->hyperdoc = HTList_new();
210: HTList_addObject(mr->hyperdoc, (void *) hd);
211: return hd;
212: }
213:
214: /* Delete a "HyperDoc" object
215: ** --------------------------
216: */
217: PRIVATE BOOL HyperDoc_delete (HyperDoc * hd)
218: {
219: if (hd) {
1.11 frystyk 220: HT_FREE (hd);
1.2 frystyk 221: return YES;
222: }
223: return NO;
224: }
225:
1.55 frystyk 226: /*
227: ** Sort the anchor array and log reference count
228: */
229: PRIVATE BOOL calculate_hits (Robot * mr, HTArray * array)
230: {
231: if (mr && array) {
232: HTLog * log = HTLog_open(mr->hitfile, YES, YES);
233: if (log) {
234: void ** data = NULL;
235: HTParentAnchor * anchor = NULL;
236: HTArray_sort(array, HitSort);
237: anchor = (HTParentAnchor *) HTArray_firstObject(array, data);
238: while (anchor) {
239: char * str = NULL;
240: char * uri = HTAnchor_address((HTAnchor *) anchor);
241: HyperDoc * hd = (HyperDoc *) HTAnchor_document(anchor);
242: if (uri && hd) {
243: if ((str = (char *) HT_MALLOC(strlen(uri) + 50)) == NULL)
244: HT_OUTOFMEM("calculate_hits");
1.58 frystyk 245: sprintf(str, "%8d %s", hd->hits, uri);
1.55 frystyk 246: HTLog_addLine(log, str);
247: HT_FREE(str);
248: }
249: HT_FREE(uri);
250: anchor = (HTParentAnchor *) HTArray_nextObject(array, data);
251: }
252: }
253: HTLog_close(log);
254: return YES;
255: }
256: return NO;
257: }
258:
259: PRIVATE int HitSort (const void * a, const void * b)
260: {
261: HyperDoc * aa = HTAnchor_document(*(HTParentAnchor **) a);
262: HyperDoc * bb = HTAnchor_document(*(HTParentAnchor **) b);
263: if (aa && bb) return (bb->hits - aa->hits);
264: return bb - aa;
265: }
266:
1.58 frystyk 267: /*
268: ** Calculate distributions for media types. The same mechanism
269: ** can be used for other characteristics with relatively
270: ** few outcomes.
271: */
272: PRIVATE HTList * mediatype_distribution (HTArray * array)
273: {
274: if (array) {
275: HTList * mt = HTList_new();
276: MetaDist * pres = NULL;
277: void ** data = NULL;
278: HTParentAnchor * anchor = NULL;
279: anchor = (HTParentAnchor *) HTArray_firstObject(array, data);
280: while (anchor) {
281: HTFormat format = HTAnchor_format(anchor);
282: if (format && format != WWW_UNKNOWN) {
283: HTList * cur = mt;
284:
285: /* If found then increase counter */
286: while ((pres = (MetaDist *) HTList_nextObject(cur))) {
287: if (pres->name == format) {
288: pres->hits++;
289: break;
290: }
291: }
292:
293: /* If not found then add new format to list */
294: if (!pres) {
295: if ((pres = (MetaDist *) HT_CALLOC(1, sizeof(MetaDist))) == NULL)
296: HT_OUTOFMEM("mediatype_distribution");
297: pres->name = format;
298: pres->hits = 1;
299: HTList_addObject(mt, pres);
300: HTList_insertionSort(mt, FormatSort);
301: }
302: }
303:
304: /* Find next anchor in array */
305: anchor = (HTParentAnchor *) HTArray_nextObject(array, data);
306: }
307: return mt;
308: }
309: return NULL;
310: }
311:
312: PRIVATE int FormatSort (const void * a, const void * b)
313: {
314: MetaDist * aa = (MetaDist *) a;
315: MetaDist * bb = (MetaDist *) b;
316: return strcmp(HTAtom_name(bb->name), HTAtom_name(aa->name));
317: }
318:
319: PRIVATE BOOL log_meta_distribution (const char * logfile, HTList * distribution)
320: {
321: if (logfile && distribution) {
322: HTLog * log = HTLog_open(logfile, YES, YES);
323: if (log) {
324: HTList * cur = distribution;
325: MetaDist * pres;
326: char str[64];
327: while ((pres = (MetaDist *) HTList_nextObject(cur))) {
328: if (pres->name) {
329: memset(str, '\0', 64*sizeof(char));
330: sprintf(str, "%8d ", pres->hits);
331: strncat(str, HTAtom_name(pres->name), 50);
332: HTLog_addLine(log, str);
333: }
334: }
335: HTLog_close(log);
336: }
337: }
338: return NO;
339: }
340:
341: PRIVATE BOOL delete_meta_distribution (HTList * distribution)
342: {
343: if (distribution) {
344: HTList * cur = distribution;
345: MetaDist * pres;
346: while ((pres = (MetaDist *) HTList_nextObject(cur)))
347: HT_FREE(pres);
348: HTList_delete(distribution);
349: return YES;
350: }
351: return NO;
352: }
353:
354:
1.55 frystyk 355: /* Statistics
356: ** ----------
357: ** Calculates a bunch of statistics for the anchors traversed
358: */
359: PRIVATE BOOL calculate_statistics (Robot * mr)
360: {
1.59 ! frystyk 361: long total_docs = mr->get_docs + mr->head_docs + mr->other_docs;
1.55 frystyk 362: if (!mr) return NO;
363:
364: /* Calculate efficiency */
1.59 ! frystyk 365: if (mr->time > 0) {
1.56 frystyk 366: ms_t t = HTGetTimeInMillis() - mr->time;
367: if (t > 0) {
1.59 ! frystyk 368: double loadfactor = 1000 * (mr->get_bytes / t);
1.56 frystyk 369: double secs = t / 1000.0;
1.55 frystyk 370: char bytes[50];
1.59 ! frystyk 371: HTTrace("Accessed %ld documents in %.2f seconds\n",
! 372: total_docs, secs);
! 373:
! 374: HTNumToStr(mr->get_bytes, bytes, 50);
! 375: HTTrace("Did a GET on %ld document(s) and downloaded %s bytes of document bodies (%2.1f bytes/sec)\n",
! 376: mr->get_docs, bytes, loadfactor);
! 377:
! 378: HTNumToStr(mr->head_bytes, bytes, 50);
! 379: HTTrace("Did a HEAD on %ld document(s) with a total of %s bytes\n",
! 380: mr->head_docs, bytes);
1.55 frystyk 381: }
382: }
383:
384: /* Create an array of existing anchors */
1.59 ! frystyk 385: if (total_docs > 1) {
! 386: HTArray * array = HTAnchor_getArray(total_docs);
1.55 frystyk 387: if (array) {
388:
389: /* Sort after hit counts */
390: if (mr->hitfile) calculate_hits(mr, array);
391:
1.58 frystyk 392: /* Find mediatype distribution */
393: if (mr->mtfile) {
394: HTList * mtdist = mediatype_distribution(array);
395: if (mtdist) {
396: log_meta_distribution(mr->mtfile, mtdist);
397: delete_meta_distribution(mtdist);
398: }
399: }
1.55 frystyk 400:
401: /* Add as may other stats here as you like */
1.58 frystyk 402:
403:
404: /* Delete the array */
1.55 frystyk 405: HTArray_delete(array);
406: }
407: }
408: return YES;
409: }
410:
1.1 frystyk 411: /* Create a Command Line Object
412: ** ----------------------------
413: */
414: PRIVATE Robot * Robot_new (void)
415: {
416: Robot * me;
1.41 frystyk 417: if ((me = (Robot *) HT_CALLOC(1, sizeof(Robot))) == NULL)
1.14 frystyk 418: HT_OUTOFMEM("Robot_new");
1.2 frystyk 419: me->hyperdoc = HTList_new();
1.4 frystyk 420: me->htext = HTList_new();
1.40 frystyk 421: me->timer = DEFAULT_TIMEOUT;
1.25 frystyk 422: me->cwd = HTGetCurrentDirectoryURL();
1.1 frystyk 423: me->output = OUTPUT;
1.35 eric 424: me->cnt = 0;
1.34 eric 425: me->fingers = HTList_new();
1.1 frystyk 426: return me;
427: }
428:
429: /* Delete a Command Line Object
430: ** ----------------------------
431: */
432: PRIVATE BOOL Robot_delete (Robot * me)
433: {
434: if (me) {
1.34 eric 435: HTList_delete(me->fingers);
1.55 frystyk 436:
437: /* Calculate statistics */
438: calculate_statistics(me);
439:
440: if (me->hyperdoc) {
1.2 frystyk 441: HTList * cur = me->hyperdoc;
442: HyperDoc * pres;
443: while ((pres = (HyperDoc *) HTList_nextObject(cur)))
444: HyperDoc_delete(pres);
445: HTList_delete(me->hyperdoc);
446: }
1.4 frystyk 447: if (me->htext) {
448: HTList * cur = me->htext;
449: HText * pres;
450: while ((pres = (HText *) HTList_nextObject(cur)))
451: HText_free(pres);
452: HTList_delete(me->htext);
453: }
1.55 frystyk 454: if (me->log) HTLog_close(me->log);
1.57 frystyk 455: if (me->ref) HTLog_close(me->ref);
1.58 frystyk 456: if (me->reject) HTLog_close(me->reject);
457: if (me->notfound) HTLog_close(me->notfound);
458: if (me->conneg) HTLog_close(me->conneg);
1.1 frystyk 459: if (me->output && me->output != STDOUT) fclose(me->output);
1.12 frystyk 460: if (me->flags & MR_TIME) {
461: time_t local = time(NULL);
1.13 eric 462: HTTrace("Robot terminated %s\n",HTDateTimeStr(&local,YES));
1.12 frystyk 463: }
1.55 frystyk 464:
1.58 frystyk 465: #ifdef HT_POSIX_REGEX
466: if (me->include) {
467: regfree(me->include);
468: HT_FREE(me->include);
469: }
470: if (me->exclude) {
471: regfree(me->exclude);
472: HT_FREE(me->exclude);
473: }
474: if (me->check) {
475: regfree(me->check);
476: HT_FREE(me->check);
477: }
478: #endif
479:
1.11 frystyk 480: HT_FREE(me->cwd);
1.55 frystyk 481: HT_FREE(me->prefix);
1.59 ! frystyk 482: HT_FREE(me->img_prefix);
1.11 frystyk 483: HT_FREE(me);
1.1 frystyk 484: return YES;
485: }
486: return NO;
487: }
488:
1.2 frystyk 489: /*
1.34 eric 490: ** This function creates a new finger object and initializes it with a new request
1.2 frystyk 491: */
1.34 eric 492: PRIVATE Finger * Finger_new (Robot * robot, HTParentAnchor * dest, HTMethod method)
1.2 frystyk 493: {
1.34 eric 494: Finger * me;
495: HTRequest * request = HTRequest_new();
496: if ((me = (Finger *) HT_CALLOC(1, sizeof(Finger))) == NULL)
497: HT_OUTOFMEM("Finger_new");
498: me->robot = robot;
499: me->request = request;
500: me->dest = dest;
501: HTList_addObject(robot->fingers, (void *)me);
502:
1.48 frystyk 503: /* Set the context for this request */
1.34 eric 504: HTRequest_setContext (request, me);
1.48 frystyk 505:
506: /* Check the various flags to customize the request */
507: if (robot->flags & MR_PREEMPTIVE)
508: HTRequest_setPreemptive(request, YES);
509: if (robot->flags & MR_VALIDATE)
510: HTRequest_setReloadMode(request, HT_CACHE_VALIDATE);
511: if (robot->flags & MR_END_VALIDATE)
512: HTRequest_setReloadMode(request, HT_CACHE_END_VALIDATE);
513:
514: /* We wanna make sure that we are sending a Host header (default) */
1.34 eric 515: HTRequest_addRqHd(request, HT_C_HOST);
1.48 frystyk 516:
517: /* Set the method for this request */
1.34 eric 518: HTRequest_setMethod(request, method);
519: robot->cnt++;
520: return me;
1.2 frystyk 521: }
522:
1.34 eric 523: PRIVATE int Finger_delete (Finger * me)
1.2 frystyk 524: {
1.34 eric 525: HTList_removeObject(me->robot->fingers, (void *)me);
526: me->robot->cnt--;
1.37 frystyk 527:
528: /*
529: ** If we are down at one request then flush the output buffer
530: */
531: if (me->request) {
532: if (me->robot->cnt == 1) HTRequest_forceFlush(me->request);
1.34 eric 533: HTRequest_delete(me->request);
1.37 frystyk 534: }
535:
536: /*
537: ** Delete the request and free myself
538: */
1.34 eric 539: HT_FREE(me);
540: return YES;
1.2 frystyk 541: }
542:
543: /*
544: ** Cleanup and make sure we close all connections including the persistent
545: ** ones
546: */
1.1 frystyk 547: PRIVATE void Cleanup (Robot * me, int status)
548: {
549: Robot_delete(me);
1.29 eric 550: HTProfile_delete();
1.50 frystyk 551: #ifdef HT_MEMLOG
1.39 eric 552: HTMemLog_close();
1.47 frystyk 553: #endif
554:
1.1 frystyk 555: #ifdef VMS
556: exit(status ? status : 1);
557: #else
558: exit(status ? status : 0);
559: #endif
560: }
561:
562: #ifdef CATCH_SIG
563: #include <signal.h>
564: /* SetSignal
565: ** This function sets up signal handlers. This might not be necessary to
566: ** call if the application has its own handlers (lossage on SVR4)
567: */
568: PRIVATE void SetSignal (void)
569: {
570: /* On some systems (SYSV) it is necessary to catch the SIGPIPE signal
571: ** when attemting to connect to a remote host where you normally should
572: ** get `connection refused' back
573: */
574: if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
1.13 eric 575: if (PROT_TRACE) HTTrace("HTSignal.... Can't catch SIGPIPE\n");
1.1 frystyk 576: } else {
1.13 eric 577: if (PROT_TRACE) HTTrace("HTSignal.... Ignoring SIGPIPE\n");
1.1 frystyk 578: }
1.47 frystyk 579:
1.50 frystyk 580: #ifdef HT_MEMLOG
1.44 eric 581: HTMemLog_flush();
1.47 frystyk 582: #endif
583:
1.1 frystyk 584: }
585: #endif /* CATCH_SIG */
586:
1.58 frystyk 587: #ifdef HT_POSIX_REGEX
588: PRIVATE char * get_regerror (int errcode, regex_t * compiled)
589: {
590: size_t length = regerror (errcode, compiled, NULL, 0);
591: char * str = NULL;
592: if ((str = (char *) HT_MALLOC(length+1)) == NULL)
593: HT_OUTOFMEM("get_regerror");
594: (void) regerror (errcode, compiled, str, length);
595: return str;
596: }
597:
598: PRIVATE regex_t * get_regtype (Robot * mr, const char * regex_str)
599: {
600: regex_t * regex = NULL;
601: if (regex_str && *regex_str) {
602: int status;
603: if ((regex = (regex_t *) HT_CALLOC(1, sizeof(regex_t))) == NULL)
604: HT_OUTOFMEM("get_regtype");
605: if ((status = regcomp(regex, regex_str, REG_EXTENDED))) {
606: char * err_msg = get_regerror(status, regex);
607: HTTrace("Regular expression error: %s\n", err_msg);
608: HT_FREE(err_msg);
609: Cleanup(mr, -1);
610: }
611: }
612: return regex;
613: }
614: #endif
615:
1.1 frystyk 616: PRIVATE void VersionInfo (void)
617: {
1.13 eric 618: OutputData("\n\nW3C Reference Software\n\n");
619: OutputData("\tW3C Mini Robot (%s) version %s.\n",
1.1 frystyk 620: APP_NAME, APP_VERSION);
1.13 eric 621: OutputData("\tW3C Reference Library version %s.\n\n",HTLib_version());
622: OutputData("Please send feedback to <libwww@w3.org>\n");
1.1 frystyk 623: }
624:
625: /* terminate_handler
626: ** -----------------
1.2 frystyk 627: ** This function is registered to handle the result of the request.
628: ** If no more requests are pending then terminate program
1.1 frystyk 629: */
1.32 frystyk 630: PRIVATE int terminate_handler (HTRequest * request, HTResponse * response,
631: void * param, int status)
1.1 frystyk 632: {
1.34 eric 633: Finger * finger = (Finger *) HTRequest_context(request);
1.46 eric 634: Robot * mr = finger->robot;
1.34 eric 635: if (SHOW_MSG) HTTrace("Robot....... done with %s\n", HTAnchor_physical(finger->dest));
1.55 frystyk 636:
1.58 frystyk 637: /* Check if negotiated resource and whether we should log that*/
638: if (mr->conneg) {
639: HTAssocList * cur = HTResponse_variant(response);
640: if (cur) {
641: BOOL first = YES;
642: HTChunk * buffer = HTChunk_new(128);
643: char * uri = HTAnchor_address((HTAnchor *) finger->dest);
644: HTAssoc * pres;
645: while ((pres = (HTAssoc *) HTAssocList_nextObject(cur))) {
646: char * value = HTAssoc_value(pres);
647: if (first) {
648: HTChunk_puts(buffer, "(");
649: first = NO;
650: } else
651: HTChunk_puts(buffer, ", ");
652:
653: /* Output the name */
654: HTChunk_puts(buffer, HTAssoc_name(pres));
655:
656: /* Only output the value if not empty string */
657: if (*value) {
658: HTChunk_puts(buffer, "=");
659: HTChunk_puts(buffer, value);
660: }
661: }
662: if (!first) HTChunk_puts(buffer, ")\t");
663: HTChunk_puts(buffer, uri);
664: HTLog_addLine(mr->conneg, HTChunk_toCString(buffer));
665: HTChunk_delete(buffer);
666: HT_FREE(uri);
667: }
668: }
669:
1.55 frystyk 670: /* Count the amount of body data that we have read */
1.59 ! frystyk 671: if (HTRequest_method(request) == METHOD_GET) {
! 672: int length = HTAnchor_length(HTRequest_anchor(request));
! 673: if (length > 0) mr->get_bytes += length;
! 674: mr->get_docs++;
! 675: } else if (HTRequest_method(request) == METHOD_HEAD) {
1.56 frystyk 676: int length = HTAnchor_length(HTRequest_anchor(request));
1.59 ! frystyk 677: if (length > 0) mr->head_bytes += length;
! 678: mr->head_docs++;
! 679: } else {
! 680: mr->other_docs++;
1.55 frystyk 681: }
682:
1.58 frystyk 683: /* Cleanup the anchor so that we don't drown in metainformation */
684: if (!(mr->flags & MR_KEEP_META))
685: HTAnchor_clearHeader(HTRequest_anchor(request));
686:
1.55 frystyk 687: /* Delete this thread */
1.34 eric 688: Finger_delete(finger);
1.55 frystyk 689:
690: /* Should we stop? */
1.46 eric 691: if (mr->cnt <= 0) {
1.34 eric 692: if (SHOW_MSG) HTTrace(" Everything is finished...\n");
1.46 eric 693: Cleanup(mr, 0); /* No way back from here */
1.30 frystyk 694: }
1.46 eric 695: if (SHOW_MSG) HTTrace(" %d outstanding request%s\n", mr->cnt, mr->cnt == 1 ? "" : "s");
1.1 frystyk 696: return HT_OK;
697: }
698:
699: /* ------------------------------------------------------------------------- */
700: /* HTEXT INTERFACE */
701: /* ------------------------------------------------------------------------- */
702:
703: PUBLIC HText * HText_new2 (HTRequest * request, HTParentAnchor * anchor,
704: HTStream * stream)
705: {
706: HText * me;
1.34 eric 707: Finger * finger = (Finger *) HTRequest_context(request);
708: Robot * mr = finger->robot;
1.14 frystyk 709: if ((me = (HText *) HT_CALLOC(1, sizeof(HText))) == NULL)
710: HT_OUTOFMEM("HText_new2");
1.4 frystyk 711:
712: /* Bind the HText object together with the Request Object */
1.1 frystyk 713: me->request = request;
1.4 frystyk 714:
715: /* Add this HyperDoc object to our list */
716: if (!mr->htext) mr->htext = HTList_new();
717: HTList_addObject(mr->htext, (void *) me);
1.1 frystyk 718: return me;
719: }
720:
1.4 frystyk 721: PUBLIC void HText_free (HText * me) {
1.11 frystyk 722: if (me) HT_FREE (me);
1.4 frystyk 723: }
724:
1.1 frystyk 725: PUBLIC void HText_beginAnchor (HText * text, HTChildAnchor * anchor)
726: {
727: if (text && anchor) {
1.34 eric 728: Finger * finger = (Finger *) HTRequest_context(text->request);
729: Robot * mr = finger->robot;
1.1 frystyk 730: HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor);
731: HTParentAnchor * dest_parent = HTAnchor_parent(dest);
1.7 frystyk 732: char * uri = HTAnchor_address((HTAnchor *) dest_parent);
1.1 frystyk 733: HyperDoc * hd = HTAnchor_document(dest_parent);
1.58 frystyk 734: BOOL match = YES;
735: BOOL check = NO;
1.1 frystyk 736:
1.55 frystyk 737: if (!uri) return;
738: if (SHOW_MSG) HTTrace("Robot....... Found `%s\' - ", uri ? uri : "NULL\n");
739:
740: if (hd) {
741: if (SHOW_MSG) HTTrace("Already checked\n");
742: hd->hits++;
1.58 frystyk 743: HT_FREE(uri);
744: return;
745: }
746:
747: /* Check for prefix match */
748: if (mr->prefix) match = HTStrMatch(mr->prefix, uri) ? YES : NO;
749:
750: #ifdef HT_POSIX_REGEX
751: /* Check for any regular expression */
752: if (match && mr->include) {
753: match = regexec(mr->include, uri, 0, NULL, 0) ? NO : YES;
754: }
755: if (match && mr->exclude) {
756: match = regexec(mr->exclude, uri, 0, NULL, 0) ? YES : NO;
757: }
758: if (match && mr->check) {
759: check = regexec(mr->check, uri, 0, NULL, 0) ? NO : YES;
760: }
761: #endif
762:
763: /* Test whether we already have a hyperdoc for this document */
764: if (mr->flags & MR_LINK && match && dest_parent) {
1.1 frystyk 765: HTParentAnchor * parent = HTRequest_parent(text->request);
766: HyperDoc * last = HTAnchor_document(parent);
767: int depth = last ? last->depth+1 : 0;
1.34 eric 768: Finger * newfinger = Finger_new(mr, dest_parent, METHOD_GET);
769: HTRequest * newreq = newfinger->request;
1.2 frystyk 770: HyperDoc_new(mr, dest_parent, depth);
1.7 frystyk 771: HTRequest_setParent(newreq, HTRequest_anchor(text->request));
1.58 frystyk 772: if (check || depth >= mr->depth) {
773: if (SHOW_MSG) HTTrace("loading at depth %d using HEAD\n", depth);
1.7 frystyk 774: HTRequest_setMethod(newreq, METHOD_HEAD);
1.30 frystyk 775: HTRequest_setOutputFormat(newreq, WWW_DEBUG);
1.7 frystyk 776: } else {
1.13 eric 777: if (SHOW_MSG) HTTrace("loading at depth %d\n", depth);
1.2 frystyk 778: }
779: if (HTLoadAnchor((HTAnchor *) dest_parent, newreq) != YES) {
1.13 eric 780: if (SHOW_MSG) HTTrace("not tested!\n");
1.34 eric 781: Finger_delete(newfinger);
1.2 frystyk 782: }
1.7 frystyk 783: } else {
1.55 frystyk 784: if (SHOW_MSG) HTTrace("does not fulfill constraints\n");
1.58 frystyk 785: if (mr->reject) HTLog_addLine(mr->reject, uri);
1.2 frystyk 786: }
1.11 frystyk 787: HT_FREE(uri);
1.2 frystyk 788: }
789: }
790:
791: PUBLIC void HText_appendImage (HText * text, HTChildAnchor * anchor,
1.14 frystyk 792: const char *alt, const char * align, BOOL isMap)
1.2 frystyk 793: {
794: if (text && anchor) {
1.34 eric 795: Finger * finger = (Finger *) HTRequest_context(text->request);
796: Robot * mr = finger->robot;
1.59 ! frystyk 797: if (mr->flags & MR_IMG) {
! 798: HTParentAnchor * dest = (HTParentAnchor *)
! 799: HTAnchor_followMainLink((HTAnchor *) anchor);
! 800: char * uri = HTAnchor_address((HTAnchor *) dest);
! 801: HyperDoc * hd = HTAnchor_document(dest);
! 802: BOOL match = YES;
! 803:
! 804: if (hd) {
! 805: if (SHOW_MSG) HTTrace("Already checked\n");
! 806: hd->hits++;
1.11 frystyk 807: HT_FREE(uri);
1.59 ! frystyk 808: return;
1.2 frystyk 809: }
1.59 ! frystyk 810:
! 811: /* Check for prefix match */
! 812: if (mr->img_prefix) match = HTStrMatch(mr->img_prefix, uri) ? YES : NO;
! 813:
! 814: /* Test whether we already have a hyperdoc for this document */
! 815: if (match && dest) {
! 816: HTParentAnchor * parent = HTRequest_parent(text->request);
! 817: HyperDoc * last = HTAnchor_document(parent);
! 818: int depth = last ? last->depth+1 : 0;
! 819: Finger * newfinger = Finger_new(mr, dest,
! 820: mr->flags & MR_SAVE ?
! 821: METHOD_GET : METHOD_HEAD);
! 822: HTRequest * newreq = newfinger->request;
! 823: HyperDoc_new(mr, dest, depth);
! 824: if (SHOW_MSG) HTTrace("Robot....... Checking Image `%s\'\n", uri);
! 825: if (HTLoadAnchor((HTAnchor *) dest, newreq) != YES) {
! 826: if (SHOW_MSG) HTTrace("Robot....... Image not tested!\n");
! 827: Finger_delete(newfinger);
! 828: }
! 829: } else {
! 830: if (SHOW_MSG) HTTrace("does not fulfill constraints\n");
! 831: if (mr->reject) HTLog_addLine(mr->reject, uri);
1.1 frystyk 832: }
1.59 ! frystyk 833: HT_FREE(uri);
1.1 frystyk 834: }
835: }
836: }
837:
838: PUBLIC void HText_endAnchor (HText * text) {}
1.14 frystyk 839: PUBLIC void HText_appendText (HText * text, const char * str) {}
1.1 frystyk 840: PUBLIC void HText_appendCharacter (HText * text, char ch) {}
841: PUBLIC void HText_endAppend (HText * text) {}
842: PUBLIC void HText_setStyle (HText * text, HTStyle * style) {}
843: PUBLIC void HText_beginAppend (HText * text) {}
844: PUBLIC void HText_appendParagraph (HText * text) {}
845:
1.48 frystyk 846: PRIVATE int RobotTrace (const char * fmt, va_list pArgs)
847: {
848: return (vfprintf(stderr, fmt, pArgs));
849: }
850:
1.1 frystyk 851: /* ------------------------------------------------------------------------- */
852: /* MAIN PROGRAM */
853: /* ------------------------------------------------------------------------- */
854:
855: int main (int argc, char ** argv)
856: {
1.48 frystyk 857: int status = 0;
1.1 frystyk 858: int arg;
1.48 frystyk 859: BOOL cache = NO; /* Use persistent cache */
860: BOOL flush = NO; /* flush the persistent cache */
1.54 frystyk 861: char * cache_root = NULL;
1.1 frystyk 862: HTChunk * keywords = NULL; /* From command line */
863: int keycnt = 0;
1.12 frystyk 864: Robot * mr = NULL;
1.43 frystyk 865: Finger * finger = NULL;
866: HTParentAnchor * startAnchor = NULL;
1.1 frystyk 867:
868: /* Starts Mac GUSI socket library */
869: #ifdef GUSI
870: GUSISetup(GUSIwithSIOUXSockets);
871: GUSISetup(GUSIwithInternetSockets);
872: #endif
873:
874: #ifdef __MWERKS__ /* STR */
875: InitGraf((Ptr) &qd.thePort);
876: InitFonts();
877: InitWindows();
878: InitMenus(); TEInit();
879: InitDialogs(nil);
880: InitCursor();
881: SIOUXSettings.asktosaveonclose = false;
882: argc=ccommand(&argv);
1.50 frystyk 883: #endif /* __MWERKS__ */
1.1 frystyk 884:
1.50 frystyk 885: #ifdef HT_MEMLOG
1.51 frystyk 886: HTMemLog_open(DEFAULT_MEMLOG, 8192, YES);
1.47 frystyk 887: #endif
1.46 eric 888:
1.27 frystyk 889: /* Initiate W3C Reference Library with a robot profile */
890: HTProfile_newRobot(APP_NAME, APP_VERSION);
1.48 frystyk 891: HTTrace_setCallback(RobotTrace);
1.27 frystyk 892:
893: /* Add the default HTML parser to the set of converters */
894: {
895: HTList * converters = HTFormat_conversion();
896: HTMLInit(converters);
897: }
1.1 frystyk 898:
1.12 frystyk 899: /* Build a new robot object */
900: mr = Robot_new();
901:
1.1 frystyk 902: /* Scan command Line for parameters */
903: for (arg=1; arg<argc; arg++) {
904: if (*argv[arg] == '-') {
905:
906: /* non-interactive */
1.17 frystyk 907: if (!strcmp(argv[arg], "-n")) {
1.1 frystyk 908: HTAlert_setInteractive(NO);
909:
1.55 frystyk 910: /* log file */
1.1 frystyk 911: } else if (!strcmp(argv[arg], "-l")) {
912: mr->logfile = (arg+1 < argc && *argv[arg+1] != '-') ?
913: argv[++arg] : DEFAULT_LOG_FILE;
914:
1.55 frystyk 915: /* hit file */
916: } else if (!strcmp(argv[arg], "-hit")) {
917: mr->hitfile = (arg+1 < argc && *argv[arg+1] != '-') ?
918: argv[++arg] : DEFAULT_HIT_FILE;
919:
1.57 frystyk 920: /* referer file */
1.58 frystyk 921: } else if (!strncmp(argv[arg], "-ref", 4)) {
1.57 frystyk 922: mr->reffile = (arg+1 < argc && *argv[arg+1] != '-') ?
923: argv[++arg] : DEFAULT_REFERER_FILE;
924:
1.58 frystyk 925: /* Not found error log file */
926: } else if (!strncmp(argv[arg], "-404", 4)) {
927: mr->notfoundfile = (arg+1 < argc && *argv[arg+1] != '-') ?
928: argv[++arg] : DEFAULT_NOTFOUND_FILE;
929:
930: /* reject log file */
931: } else if (!strncmp(argv[arg], "-rej", 4)) {
932: mr->rejectfile = (arg+1 < argc && *argv[arg+1] != '-') ?
933: argv[++arg] : DEFAULT_REJECT_FILE;
934:
935: /* negoatiated resource log file */
936: } else if (!strncmp(argv[arg], "-neg", 4)) {
937: mr->connegfile = (arg+1 < argc && *argv[arg+1] != '-') ?
938: argv[++arg] : DEFAULT_CONNEG_FILE;
939:
940: /* mediatype distribution log file */
941: } else if (!strncmp(argv[arg], "-for", 4)) {
942: mr->mtfile = (arg+1 < argc && *argv[arg+1] != '-') ?
943: argv[++arg] : DEFAULT_FORMAT_FILE;
944: mr->flags |= MR_KEEP_META;
945:
1.55 frystyk 946: /* rule file */
1.1 frystyk 947: } else if (!strcmp(argv[arg], "-r")) {
948: mr->rules = (arg+1 < argc && *argv[arg+1] != '-') ?
949: argv[++arg] : DEFAULT_RULE_FILE;
950:
951: /* output filename */
952: } else if (!strcmp(argv[arg], "-o")) {
953: mr->outputfile = (arg+1 < argc && *argv[arg+1] != '-') ?
954: argv[++arg] : DEFAULT_OUTPUT_FILE;
955:
1.55 frystyk 956: /* URI prefix */
957: } else if (!strcmp(argv[arg], "-prefix")) {
958: char * prefix = NULL;
959: prefix = (arg+1 < argc && *argv[arg+1] != '-') ?
960: argv[++arg] : DEFAULT_PREFIX;
961: if (*prefix) {
962: StrAllocCopy(mr->prefix, prefix);
963: StrAllocCat(mr->prefix, "*");
964: }
965:
1.1 frystyk 966: /* timeout -- Change the default request timeout */
967: } else if (!strcmp(argv[arg], "-timeout")) {
968: int timeout = (arg+1 < argc && *argv[arg+1] != '-') ?
969: atoi(argv[++arg]) : DEFAULT_TIMEOUT;
1.40 frystyk 970: if (timeout > 0) mr->timer = timeout;
1.1 frystyk 971:
1.54 frystyk 972: /* Force no pipelined requests */
973: } else if (!strcmp(argv[arg], "-nopipe")) {
974: HTTP_setConnectionMode(HTTP_NO_PIPELINING);
975:
1.48 frystyk 976: /* Start the persistent cache */
977: } else if (!strcmp(argv[arg], "-cache")) {
978: cache = YES;
979:
1.54 frystyk 980: /* Determine the cache root */
981: } else if (!strcmp(argv[arg], "-cacheroot")) {
982: cache_root = (arg+1 < argc && *argv[arg+1] != '-') ?
983: argv[++arg] : NULL;
1.51 frystyk 984:
1.52 frystyk 985: /* Stream write flush delay in ms */
986: } else if (!strcmp(argv[arg], "-delay")) {
987: int delay = (arg+1 < argc && *argv[arg+1] != '-') ?
988: atoi(argv[++arg]) : DEFAULT_DELAY;
989: HTHost_setDefaultWriteDelay(delay);
990:
1.48 frystyk 991: /* Persistent cache flush */
992: } else if (!strcmp(argv[arg], "-flush")) {
993: flush = YES;
994:
995: /* Do a cache validation */
996: } else if (!strcmp(argv[arg], "-validate")) {
997: mr->flags |= MR_VALIDATE;
998:
999: /* Do an end-to-end cache-validation */
1000: } else if (!strcmp(argv[arg], "-endvalidate")) {
1001: mr->flags |= MR_END_VALIDATE;
1002:
1.7 frystyk 1003: /* preemptive or non-preemptive access */
1.1 frystyk 1004: } else if (!strcmp(argv[arg], "-single")) {
1.7 frystyk 1005: mr->flags |= MR_PREEMPTIVE;
1.2 frystyk 1006:
1007: /* test inlined images */
1008: } else if (!strcmp(argv[arg], "-img")) {
1009: mr->flags |= MR_IMG;
1.45 frystyk 1010:
1011: /* load inlined images */
1012: } else if (!strcmp(argv[arg], "-saveimg")) {
1013: mr->flags |= (MR_IMG | MR_SAVE);
1.59 ! frystyk 1014:
! 1015: /* URI prefix for inlined images */
! 1016: } else if (!strcmp(argv[arg], "-imgprefix")) {
! 1017: char * prefix = NULL;
! 1018: prefix = (arg+1 < argc && *argv[arg+1] != '-') ?
! 1019: argv[++arg] : DEFAULT_IMG_PREFIX;
! 1020: if (*prefix) {
! 1021: StrAllocCopy(mr->img_prefix, prefix);
! 1022: StrAllocCat(mr->img_prefix, "*");
! 1023: }
1.2 frystyk 1024:
1025: /* load anchors */
1.58 frystyk 1026: } else if (!strcmp(argv[arg], "-link") || !strcmp(argv[arg], "-depth")) {
1.2 frystyk 1027: mr->flags |= MR_LINK;
1.7 frystyk 1028: mr->depth = (arg+1 < argc && *argv[arg+1] != '-') ?
1029: atoi(argv[++arg]) : DEFAULT_DEPTH;
1.2 frystyk 1030:
1.12 frystyk 1031: /* Output start and end time */
1032: } else if (!strcmp(argv[arg], "-ss")) {
1033: time_t local = time(NULL);
1.13 eric 1034: HTTrace("Robot started on %s\n",
1.12 frystyk 1035: HTDateTimeStr(&local, YES));
1036: mr->flags |= MR_TIME;
1037:
1.1 frystyk 1038: /* print version and exit */
1039: } else if (!strcmp(argv[arg], "-version")) {
1040: VersionInfo();
1041: Cleanup(mr, 0);
1.46 eric 1042:
1043: /* run in quiet mode */
1044: } else if (!strcmp(argv[arg], "-q")) {
1045: mr->flags |= MR_QUIET;
1.1 frystyk 1046:
1047: #ifdef WWWTRACE
1048: /* trace flags */
1049: } else if (!strncmp(argv[arg], "-v", 2)) {
1.24 frystyk 1050: HTSetTraceMessageMask(argv[arg]+2);
1.1 frystyk 1051: #endif
1052:
1.58 frystyk 1053: #ifdef HT_POSIX_REGEX
1054:
1055: /* If we can link against a POSIX regex library */
1056: } else if (!strncmp(argv[arg], "-inc", 4)) {
1057: if (arg+1 < argc && *argv[arg+1] != '-') {
1058: mr->include = get_regtype(mr, argv[++arg]);
1059: }
1060: } else if (!strncmp(argv[arg], "-exc", 4)) {
1061: if (arg+1 < argc && *argv[arg+1] != '-') {
1062: mr->exclude = get_regtype(mr, argv[++arg]);
1063: }
1064: } else if (!strncmp(argv[arg], "-check", 6)) {
1065: if (arg+1 < argc && *argv[arg+1] != '-') {
1066: mr->check = get_regtype(mr, argv[++arg]);
1067: }
1068: #endif
1069:
1.1 frystyk 1070: } else {
1.13 eric 1071: if (SHOW_MSG) HTTrace("Bad Argument (%s)\n", argv[arg]);
1.1 frystyk 1072: }
1.17 frystyk 1073: } else { /* If no leading `-' then check for URL or keywords */
1.1 frystyk 1074: if (!keycnt) {
1075: char * ref = HTParse(argv[arg], mr->cwd, PARSE_ALL);
1.56 frystyk 1076: startAnchor = HTAnchor_parent(HTAnchor_findAddress(ref));
1.34 eric 1077: HyperDoc_new(mr, startAnchor, 0);
1.1 frystyk 1078: keycnt = 1;
1.11 frystyk 1079: HT_FREE(ref);
1.1 frystyk 1080: } else { /* Check for successive keyword arguments */
1081: char *escaped = HTEscape(argv[arg], URL_XALPHAS);
1082: if (keycnt++ <= 1)
1.5 frystyk 1083: keywords = HTChunk_new(128);
1.1 frystyk 1084: else
1.5 frystyk 1085: HTChunk_putc(keywords, ' ');
1086: HTChunk_puts(keywords, HTStrip(escaped));
1.11 frystyk 1087: HT_FREE(escaped);
1.1 frystyk 1088: }
1089: }
1090: }
1091:
1092: #ifdef CATCH_SIG
1093: SetSignal();
1094: #endif
1095:
1096: if (!keycnt) {
1.13 eric 1097: if (SHOW_MSG) HTTrace("Please specify URL to check.\n");
1.1 frystyk 1098: Cleanup(mr, -1);
1099: }
1100:
1.23 manoli 1101: /* Testing that HTTrace is working */
1.47 frystyk 1102: if (SHOW_MSG) HTTrace ("Welcome to the W3C mini Robot\n");
1.23 manoli 1103:
1.1 frystyk 1104: /* Rule file specified? */
1105: if (mr->rules) {
1106: char * rules = HTParse(mr->rules, mr->cwd, PARSE_ALL);
1.27 frystyk 1107: if (!HTLoadRules(rules))
1.13 eric 1108: if (SHOW_MSG) HTTrace("Can't access rules\n");
1.11 frystyk 1109: HT_FREE(rules);
1.1 frystyk 1110: }
1111:
1112: /* Output file specified? */
1113: if (mr->outputfile) {
1114: if ((mr->output = fopen(mr->outputfile, "wb")) == NULL) {
1.13 eric 1115: if (SHOW_MSG) HTTrace("Can't open `%s'\n", mr->outputfile);
1.1 frystyk 1116: mr->output = OUTPUT;
1117: }
1118: }
1119:
1.48 frystyk 1120: /* Should we use persistent cache? */
1121: if (cache) {
1.54 frystyk 1122: HTCacheInit(cache_root, 20);
1.49 frystyk 1123: HTNet_addBefore(HTCacheFilter, "http://*", NULL, HT_FILTER_MIDDLE);
1124: HTNet_addAfter(HTCacheUpdateFilter, "http://*", NULL,
1125: HT_NOT_MODIFIED, HT_FILTER_MIDDLE);
1.48 frystyk 1126:
1127: /* Should we start by flushing? */
1128: if (flush) HTCache_flushAll();
1129: }
1130:
1.58 frystyk 1131: /* CLF Log file specified? */
1.55 frystyk 1132: if (mr->logfile) {
1133: mr->log = HTLog_open(mr->logfile, YES, YES);
1134: if (mr->log) HTNet_addAfter(HTLogFilter, NULL, mr->log, HT_ALL, HT_FILTER_LATE);
1.57 frystyk 1135: }
1136:
1.58 frystyk 1137: /* Referer Log file specified? */
1.57 frystyk 1138: if (mr->reffile) {
1139: mr->ref = HTLog_open(mr->reffile, YES, YES);
1140: if (mr->ref)
1141: HTNet_addAfter(HTRefererFilter, NULL, mr->ref, HT_ALL, HT_FILTER_LATE);
1.55 frystyk 1142: }
1.1 frystyk 1143:
1.58 frystyk 1144: /* Not found error log specified? */
1145: if (mr->notfoundfile) {
1146: mr->notfound = HTLog_open(mr->notfoundfile, YES, YES);
1147: if (mr->notfound)
1148: HTNet_addAfter(HTRefererFilter, NULL, mr->notfound, -404, HT_FILTER_LATE);
1149: }
1150:
1151: /* Negotiated resource log specified? */
1152: if (mr->connegfile) mr->conneg = HTLog_open(mr->connegfile, YES, YES);
1153:
1154: /* Reject Log file specified? */
1155: if (mr->rejectfile) mr->reject = HTLog_open(mr->rejectfile, YES, YES);
1156:
1157: /* Register our own terminate filter */
1.32 frystyk 1158: HTNet_addAfter(terminate_handler, NULL, NULL, HT_ALL, HT_FILTER_LAST);
1.40 frystyk 1159:
1160: /* Setting event timeout */
1161: HTHost_setEventTimeout(mr->timer);
1.55 frystyk 1162:
1.56 frystyk 1163: mr->time = HTGetTimeInMillis();
1.37 frystyk 1164:
1.34 eric 1165: /* Start the request */
1166: finger = Finger_new(mr, startAnchor, METHOD_GET);
1.43 frystyk 1167:
1168: /*
1169: ** Make sure that the first request is flushed immediately and not
1170: ** buffered in the output buffer
1171: */
1172: HTRequest_setFlush(finger->request, YES);
1173:
1174: /*
1.48 frystyk 1175: ** Check whether we should do some kind of cache validation on
1176: ** the load
1177: */
1178: if (mr->flags & MR_VALIDATE)
1179: HTRequest_setReloadMode(finger->request, HT_CACHE_VALIDATE);
1180: if (mr->flags & MR_END_VALIDATE)
1181: HTRequest_setReloadMode(finger->request, HT_CACHE_END_VALIDATE);
1182:
1183: /*
1.43 frystyk 1184: ** Now do the load
1185: */
1.34 eric 1186: if (mr->flags & MR_PREEMPTIVE)
1187: HTRequest_setPreemptive(finger->request, YES);
1.1 frystyk 1188:
1189: if (keywords) /* Search */
1.34 eric 1190: status = HTSearchAnchor(keywords, (HTAnchor *)startAnchor, finger->request);
1.1 frystyk 1191: else
1.34 eric 1192: status = HTLoadAnchor((HTAnchor *)startAnchor, finger->request);
1.1 frystyk 1193:
1.5 frystyk 1194: if (keywords) HTChunk_delete(keywords);
1.1 frystyk 1195: if (status != YES) {
1.13 eric 1196: if (SHOW_MSG) HTTrace("Can't access resource\n");
1.1 frystyk 1197: Cleanup(mr, -1);
1198: }
1199:
1200: /* Go into the event loop... */
1.34 eric 1201: HTEventList_loop(finger->request);
1.1 frystyk 1202:
1203: /* Only gets here if event loop fails */
1204: Cleanup(mr, 0);
1205: return 0;
1206: }
Webmaster