version 1.6, 1995/12/18 22:03:36
|
version 1.7, 1996/01/24 20:12:46
|
Line 32
|
Line 32
|
#define DEFAULT_OUTPUT_FILE "robot.out" |
#define DEFAULT_OUTPUT_FILE "robot.out" |
#define DEFAULT_RULE_FILE "robot.conf" |
#define DEFAULT_RULE_FILE "robot.conf" |
#define DEFAULT_LOG_FILE "robot.log" |
#define DEFAULT_LOG_FILE "robot.log" |
|
#define DEFAULT_DEPTH 0 |
|
|
#define SHOW_MSG (WWWTRACE || HTAlert_interactive()) |
#define SHOW_MSG (WWWTRACE || HTAlert_interactive()) |
|
|
#define DEFAULT_TIMEOUT 60 /* timeout in seconds */ |
#define DEFAULT_TIMEOUT 10 /* timeout in seconds */ |
|
|
#if defined(__svr4__) |
#if defined(__svr4__) |
#define CATCH_SIG |
#define CATCH_SIG |
Line 44
|
Line 45
|
typedef enum _MRFlags { |
typedef enum _MRFlags { |
MR_IMG = 0x1, |
MR_IMG = 0x1, |
MR_LINK = 0x2, |
MR_LINK = 0x2, |
MR_PREEMTIVE= 0x4 |
MR_PREEMPTIVE= 0x4 |
} MRFlags; |
} MRFlags; |
|
|
typedef struct _Robot { |
typedef struct _Robot { |
HTRequest * request; |
HTRequest * request; |
|
HTRequest * timeout; /* Until we get a server eventloop */ |
HTParentAnchor * anchor; |
HTParentAnchor * anchor; |
int depth; /* How deep is our tree */ |
int depth; /* How deep is our tree */ |
HTList * hyperdoc; /* List of our HyperDoc Objects */ |
HTList * hyperdoc; /* List of our HyperDoc Objects */ |
Line 114 PRIVATE HyperDoc * HyperDoc_new (Robot *
|
Line 116 PRIVATE HyperDoc * HyperDoc_new (Robot *
|
/* Add this HyperDoc object to our list */ |
/* Add this HyperDoc object to our list */ |
if (!mr->hyperdoc) mr->hyperdoc = HTList_new(); |
if (!mr->hyperdoc) mr->hyperdoc = HTList_new(); |
HTList_addObject(mr->hyperdoc, (void *) hd); |
HTList_addObject(mr->hyperdoc, (void *) hd); |
|
|
if (SHOW_MSG) |
|
TTYPrint(TDEST, "HyperDoc.... %p bound to anchor %p with depth %d\n", |
|
hd, anchor, depth); |
|
return hd; |
return hd; |
} |
} |
|
|
Line 148 PRIVATE Robot * Robot_new (void)
|
Line 146 PRIVATE Robot * Robot_new (void)
|
me->cwd = HTFindRelatedName(); |
me->cwd = HTFindRelatedName(); |
me->output = OUTPUT; |
me->output = OUTPUT; |
|
|
|
/* We keep an extra timeout request object for the timeout_handler */ |
|
me->timeout = HTRequest_new(); |
|
HTRequest_setContext (me->timeout, me); |
|
|
/* Bind the Robot object together with the Request Object */ |
/* Bind the Robot object together with the Request Object */ |
me->request = HTRequest_new(); |
me->request = HTRequest_new(); |
HTRequest_setContext (me->request, me); |
HTRequest_setContext (me->request, me); |
Line 191 PRIVATE HTRequest * Thread_new (Robot *
|
Line 193 PRIVATE HTRequest * Thread_new (Robot *
|
{ |
{ |
HTRequest * newreq = HTRequest_new(); |
HTRequest * newreq = HTRequest_new(); |
HTRequest_setContext (newreq, mr); |
HTRequest_setContext (newreq, mr); |
if (mr->flags & MR_PREEMTIVE) HTRequest_setPreemtive(newreq, YES); |
if (mr->flags & MR_PREEMPTIVE) HTRequest_setPreemptive(newreq, YES); |
HTRequest_addRqHd(newreq, HT_C_HOST); |
HTRequest_addRqHd(newreq, HT_C_HOST); |
HTRequest_setMethod(newreq, method); |
HTRequest_setMethod(newreq, method); |
return newreq; |
return newreq; |
Line 268 PRIVATE int terminate_handler (HTRequest
|
Line 270 PRIVATE int terminate_handler (HTRequest
|
/* timeout_handler |
/* timeout_handler |
** --------------- |
** --------------- |
** This function is registered to handle timeout in select eventloop |
** This function is registered to handle timeout in select eventloop |
|
** |
|
** BUG: This doesn't work as we don't get the right request object |
|
** back from the event loop |
*/ |
*/ |
PRIVATE int timeout_handler (HTRequest * request) |
PRIVATE int timeout_handler (HTRequest * request) |
{ |
{ |
Robot * mr = (Robot *) HTRequest_context(request); |
Robot * mr = (Robot *) HTRequest_context(request); |
if (SHOW_MSG) TTYPrint(TDEST, "Robot....... Request timeout...\n"); |
if (SHOW_MSG) TTYPrint(TDEST, "Robot....... Request timeout...\n"); |
|
#if 0 |
HTRequest_kill(request); |
HTRequest_kill(request); |
Thread_delete(mr, request); |
Thread_delete(mr, request); |
if (HTNet_isEmpty()) Cleanup(mr, -1); |
#endif |
|
Cleanup(mr, -1); |
return HT_OK; |
return HT_OK; |
} |
} |
|
|
Line 310 PUBLIC void HText_beginAnchor (HText * t
|
Line 317 PUBLIC void HText_beginAnchor (HText * t
|
Robot * mr = (Robot *) HTRequest_context(text->request); |
Robot * mr = (Robot *) HTRequest_context(text->request); |
HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor); |
HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor); |
HTParentAnchor * dest_parent = HTAnchor_parent(dest); |
HTParentAnchor * dest_parent = HTAnchor_parent(dest); |
|
char * uri = HTAnchor_address((HTAnchor *) dest_parent); |
HyperDoc * hd = HTAnchor_document(dest_parent); |
HyperDoc * hd = HTAnchor_document(dest_parent); |
|
|
|
if (SHOW_MSG) TTYPrint(TDEST, "Robot....... Found `%s\' - ", uri ? uri : "NULL"); |
|
|
/* Test whether we already have a hyperdoc for this document */ |
/* Test whether we already have a hyperdoc for this document */ |
if (mr->flags & MR_LINK && dest_parent && !hd) { |
if (mr->flags & MR_LINK && dest_parent && !hd) { |
HTParentAnchor * parent = HTRequest_parent(text->request); |
HTParentAnchor * parent = HTRequest_parent(text->request); |
Line 319 PUBLIC void HText_beginAnchor (HText * t
|
Line 329 PUBLIC void HText_beginAnchor (HText * t
|
int depth = last ? last->depth+1 : 0; |
int depth = last ? last->depth+1 : 0; |
HTRequest * newreq = Thread_new(mr, METHOD_GET); |
HTRequest * newreq = Thread_new(mr, METHOD_GET); |
HyperDoc_new(mr, dest_parent, depth); |
HyperDoc_new(mr, dest_parent, depth); |
if (SHOW_MSG) { |
HTRequest_setParent(newreq, HTRequest_anchor(text->request)); |
char * uri = HTAnchor_address((HTAnchor *) dest_parent); |
if (depth >= mr->depth) { |
TTYPrint(TDEST, "Robot....... Loading `%s\'\n", uri); |
if (SHOW_MSG) |
free(uri); |
TTYPrint(TDEST, "loading at depth %d using HEAD\n", depth); |
|
HTRequest_setMethod(newreq, METHOD_HEAD); |
|
HTRequest_setOutputFormat(newreq, WWW_MIME); |
|
} else { |
|
if (SHOW_MSG) TTYPrint(TDEST, "loading at depth %d\n", depth); |
} |
} |
if (HTLoadAnchor((HTAnchor *) dest_parent, newreq) != YES) { |
if (HTLoadAnchor((HTAnchor *) dest_parent, newreq) != YES) { |
if (SHOW_MSG) TTYPrint(TDEST, "Robot...... URI Not tested!\n"); |
if (SHOW_MSG) TTYPrint(TDEST, "not tested!\n"); |
Thread_delete(mr, newreq); |
Thread_delete(mr, newreq); |
} |
} |
|
} else { |
|
if (SHOW_MSG) TTYPrint(TDEST, "duplicate\n"); |
} |
} |
|
FREE(uri); |
} |
} |
} |
} |
|
|
Line 450 int main (int argc, char ** argv)
|
Line 467 int main (int argc, char ** argv)
|
atoi(argv[++arg]) : DEFAULT_TIMEOUT; |
atoi(argv[++arg]) : DEFAULT_TIMEOUT; |
if (timeout > 0) mr->tv->tv_sec = timeout; |
if (timeout > 0) mr->tv->tv_sec = timeout; |
|
|
/* preemtive or non-preemtive access */ |
/* preemptive or non-preemptive access */ |
} else if (!strcmp(argv[arg], "-single")) { |
} else if (!strcmp(argv[arg], "-single")) { |
HTRequest_setPreemtive(mr->request, YES); |
HTRequest_setPreemptive(mr->request, YES); |
mr->flags |= MR_PREEMTIVE; |
mr->flags |= MR_PREEMPTIVE; |
|
|
/* test inlined images */ |
/* test inlined images */ |
} else if (!strcmp(argv[arg], "-img")) { |
} else if (!strcmp(argv[arg], "-img")) { |
Line 462 int main (int argc, char ** argv)
|
Line 479 int main (int argc, char ** argv)
|
/* load anchors */ |
/* load anchors */ |
} else if (!strcmp(argv[arg], "-link")) { |
} else if (!strcmp(argv[arg], "-link")) { |
mr->flags |= MR_LINK; |
mr->flags |= MR_LINK; |
|
mr->depth = (arg+1 < argc && *argv[arg+1] != '-') ? |
|
atoi(argv[++arg]) : DEFAULT_DEPTH; |
|
|
/* preemtive or non-preemtive access */ |
/* preemptive or non-preemptive access */ |
} else if (!strcmp(argv[arg], "-single")) { |
} else if (!strcmp(argv[arg], "-single")) { |
HTRequest_setPreemtive(mr->request, YES); |
HTRequest_setPreemptive(mr->request, YES); |
mr->flags |= MR_PREEMTIVE; |
mr->flags |= MR_PREEMPTIVE; |
|
|
/* print version and exit */ |
/* print version and exit */ |
} else if (!strcmp(argv[arg], "-version")) { |
} else if (!strcmp(argv[arg], "-version")) { |
Line 504 int main (int argc, char ** argv)
|
Line 523 int main (int argc, char ** argv)
|
if (!keycnt) { |
if (!keycnt) { |
char * ref = HTParse(argv[arg], mr->cwd, PARSE_ALL); |
char * ref = HTParse(argv[arg], mr->cwd, PARSE_ALL); |
mr->anchor = (HTParentAnchor *) HTAnchor_findAddress(ref); |
mr->anchor = (HTParentAnchor *) HTAnchor_findAddress(ref); |
|
HyperDoc_new(mr, mr->anchor, 0); |
keycnt = 1; |
keycnt = 1; |
FREE(ref); |
FREE(ref); |
} else { /* Check for successive keyword arguments */ |
} else { /* Check for successive keyword arguments */ |
Line 533 int main (int argc, char ** argv)
|
Line 553 int main (int argc, char ** argv)
|
HTRequest * rr = HTRequest_new(); |
HTRequest * rr = HTRequest_new(); |
char * rules = HTParse(mr->rules, mr->cwd, PARSE_ALL); |
char * rules = HTParse(mr->rules, mr->cwd, PARSE_ALL); |
HTParentAnchor * ra = (HTParentAnchor *) HTAnchor_findAddress(rules); |
HTParentAnchor * ra = (HTParentAnchor *) HTAnchor_findAddress(rules); |
HTRequest_setPreemtive(rr, YES); |
HTRequest_setPreemptive(rr, YES); |
HTConversion_add(list, "application/x-www-rules", "*/*", HTRules, |
HTConversion_add(list, "application/x-www-rules", "*/*", HTRules, |
1.0, 0.0, 0.0); |
1.0, 0.0, 0.0); |
HTRequest_setConversion(rr, list, YES); |
HTRequest_setConversion(rr, list, YES); |
Line 571 int main (int argc, char ** argv)
|
Line 591 int main (int argc, char ** argv)
|
HTNetCall_addAfter(terminate_handler, HT_ALL); |
HTNetCall_addAfter(terminate_handler, HT_ALL); |
|
|
/* Set timeout on sockets */ |
/* Set timeout on sockets */ |
HTEvent_registerTimeout(mr->tv, mr->request, timeout_handler, NO); |
HTEvent_registerTimeout(mr->tv, mr->timeout, timeout_handler, NO); |
|
|
/* Start the request */ |
/* Start the request */ |
if (keywords) /* Search */ |
if (keywords) /* Search */ |