Annotation of libwww/Library/src/HTFilter.c, revision 2.11
2.1 frystyk 1: /*
2: ** BEFORE AND AFTER FILTERS
3: **
4: ** (c) COPYRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
2.11 ! frystyk 6: ** @(#) $Id: HTFilter.c,v 2.10 1996/08/21 00:07:10 frystyk Exp $
2.1 frystyk 7: **
8: ** This module implrments a set of default filters that can be registerd
9: ** as BEFORE and AFTER filters to the Net manager
10: ** Authors
11: ** HFN Henrik Frystyk, frystyk@w.org
12: ** History
13: ** Jul 4, 96 Written
14: */
15:
16: /* Library include files */
17: #include "WWWLib.h"
18: #include "WWWCache.h"
19: #include "WWWHTTP.h"
20: #include "HTLog.h"
21: #include "HTAccess.h"
2.10 frystyk 22: #include "HTProxy.h"
23: #include "HTRules.h"
2.1 frystyk 24: #include "HTFilter.h" /* Implemented here */
25:
26: /* ------------------------------------------------------------------------- */
27:
28: /*
29: ** Proxy and Gateway BEFORE filter
30: ** -------------------------------
31: ** Checks for registerd proxy servers or gateways and sees whether this
32: ** request should be redirected to a proxy or a gateway. Proxies have
33: ** higher priority than gateways so we look for them first!
34: ** For HTTP/1.0 and HTTP/1.1 we may only send a full URL (including the
35: ** host portion) to proxy servers. Therefore, we tell the Library whether
36: ** to use the full URL or the traditional HTTP one without the host part.
37: */
38: PUBLIC int HTProxyFilter (HTRequest * request, void * param, int status)
39: {
40: HTParentAnchor * anchor = HTRequest_anchor(request);
2.2 frystyk 41: char * addr = HTAnchor_physical(anchor);
2.1 frystyk 42: char * physical = NULL;
43: if ((physical = HTProxy_find(addr))) {
2.6 frystyk 44: HTRequest_setFullURI(request, YES); /* For now */
2.5 frystyk 45: HTRequest_setProxy(request, physical);
2.8 frystyk 46: HT_FREE(physical);
2.6 frystyk 47: #if 0
48: /* Don't paste the URLs together anymore */
2.1 frystyk 49: StrAllocCat(physical, addr);
2.5 frystyk 50: HTAnchor_setPhysical(anchor, physical);
2.6 frystyk 51: #endif
2.1 frystyk 52: } else if ((physical = HTGateway_find(addr))) {
53: /*
54: ** A gateway URL is crated by chopping off any leading "/" to make the
55: ** host into part of path
56: */
57: char * path =
58: HTParse(addr, "", PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
59: char * gatewayed = HTParse(path+1, physical, PARSE_ALL);
60: HTAnchor_setPhysical(anchor, gatewayed);
61: HT_FREE(path);
62: HT_FREE(gatewayed);
63: HTRequest_setFullURI(request, NO);
2.6 frystyk 64: HTRequest_deleteProxy(request);
2.1 frystyk 65: } else {
2.6 frystyk 66: HTRequest_setFullURI(request, NO); /* For now */
67: HTRequest_deleteProxy(request);
2.1 frystyk 68: }
69: return HT_OK;
70: }
71:
72: /*
73: ** Rule Translation BEFORE Filter
74: ** ------------------------------
75: ** If we have a set of rules loaded (see the Rule manager) then check
76: ** before each request whether how that should be translated. The trick
77: ** is that a parent anchor has a "address" which is the part from the URL
78: ** we used when we created the anchor. However, it also have a "physical
79: ** address" which is the place we are actually going to look for the
2.2 frystyk 80: ** resource. Hence this filter translates the physical address
81: ** (if any translations are found)
2.1 frystyk 82: */
83: PUBLIC int HTRuleFilter (HTRequest * request, void * param, int status)
84: {
85: HTList * list = HTRule_global();
86: HTParentAnchor * anchor = HTRequest_anchor(request);
2.2 frystyk 87: char * addr = HTAnchor_physical(anchor);
2.1 frystyk 88: char * physical = HTRule_translate(list, addr, NO);
89: if (!physical) {
90: HTRequest_addError(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
91: NULL, 0, "HTRuleFilter");
92: return HT_ERROR;
93: }
94: HTAnchor_setPhysical(anchor, physical);
95: HT_FREE(physical);
96: return HT_OK;
97: }
98:
99: /*
100: ** Cache Validation BEFORE Filter
101: ** ------------------------------
102: ** Check the cache mode to see if we can use an already loaded version
103: ** of this document. If so and our copy is valid then we don't have
104: ** to go out and get it unless we are forced to
2.3 frystyk 105: ** We only check the cache in caseof a GET request. Otherwise, we go
106: ** directly to the source.
2.1 frystyk 107: */
108: PUBLIC int HTCacheFilter (HTRequest * request, void * param, int status)
109: {
110: HTParentAnchor * anchor = HTRequest_anchor(request);
111: HTReload mode = HTRequest_reloadMode(request);
2.3 frystyk 112: HTMethod method = HTRequest_method(request);
113:
114: /*
115: ** Check the method of the request
116: */
117: if (method != METHOD_GET) {
118: if (CACHE_TRACE) HTTrace("Cachefilter. We only check GET methods\n");
119: return HT_OK;
120: }
121:
2.1 frystyk 122: /*
123: ** If the mode if "Force Reload" then don't even bother to check the
124: ** cache - we flush everything we know about this document
125: */
2.11 ! frystyk 126: if (mode == HT_CACHE_FLUSH) {
2.1 frystyk 127: /*
128: ** Add the appropriate request headers. We use both the "pragma"
129: ** and the "cache-control" headers in order to be
130: ** backwards compatible with HTP/1.0
131: */
132: HTRequest_addGnHd(request, HT_G_PRAGMA_NO_CACHE);
2.11 ! frystyk 133: HTRequest_addCacheControl(request, "no-cache", "");
2.1 frystyk 134:
135: /*
136: ** We also flush the information in the anchor
137: */
138: HTAnchor_clearHeader(anchor);
139: return HT_OK;
140: }
141:
142: /*
143: ** Check the persistent cache manager. If we have a cache hit then
144: ** continue to see if the reload mode requires us to do a validation check.
145: ** This filter assumes that we can get the cached version through one of
146: ** our protocol modules (for example the file module)
147: */
148: {
149: char * addr = HTAnchor_address((HTAnchor *) anchor);
150: char * cache = HTCache_getReference(addr);
151: if (cache) {
2.11 ! frystyk 152: if (mode != HT_CACHE_VALIDATE) {
2.1 frystyk 153: HTAnchor_setPhysical(anchor, cache);
154: HTAnchor_setCacheHit(anchor, YES);
155: } else {
2.11 ! frystyk 156: /*
! 157: ** If we were asked to validate the memory version then
! 158: ** use the etag or the last modified for cache validation
! 159: */
! 160: HTRequest_addRqHd(request, HT_C_IF_NONE_MATCH | HT_C_IMS);
! 161: }
! 162: HT_FREE(cache);
! 163: }
! 164: HT_FREE(addr);
! 165: }
! 166: return HT_OK;
! 167: }
! 168:
! 169: /*
! 170: ** Check the Memory Cache (History list)
! 171: ** -------------------------------------
! 172: ** Check if document is already loaded. The user can define whether
! 173: ** the history list should follow normal expiration or work as a
! 174: ** traditional history list where expired documents are not updated.
! 175: ** We don't check for anything but existence proof of a document
! 176: ** associated with the anchor as the definition is left to the application
! 177: */
! 178: PUBLIC int HTMemoryCacheFilter (HTRequest * request, void * param, int status)
! 179: {
! 180: HTReload validation = HTRequest_reloadMode(request);
! 181: HTParentAnchor * anchor = HTRequest_anchor(request);
! 182: void * document = HTAnchor_document(anchor);
2.1 frystyk 183:
2.11 ! frystyk 184: /*
! 185: ** If we are asked to flush the persistent cache then there is no reason
! 186: ** to do anything here - we're flushing it anyway. Also if no document
! 187: ** then just exit from this filter.
! 188: */
! 189: if (!document || validation > HT_CACHE_FLUSH_MEM) {
! 190: if (CACHE_TRACE) HTTrace("Mem Cache... No fresh document...\n");
! 191: return HT_OK;
! 192: }
2.1 frystyk 193:
2.11 ! frystyk 194: /*
! 195: ** If we have a document object associated with this anchor then we also
! 196: ** have the object in the history list. Depending on what the user asked,
! 197: ** we can add a cache validator
! 198: */
! 199: if (document) {
! 200: HTExpiresMode expires = HTCache_expiresMode();
! 201: if (validation != HT_CACHE_FLUSH_MEM) {
! 202: if (CACHE_TRACE)
! 203: HTTrace("Mem Cache... Document already in memory\n");
! 204: if (expires != HT_EXPIRES_IGNORE) {
! 205:
! 206: /*
! 207: ** Ask the cache manager if this object has expired. Also
! 208: ** check if we should care about expiration or not.
! 209: */
! 210: if (!HTCache_isValid(anchor)) {
! 211: if (expires == HT_EXPIRES_NOTIFY) {
! 212:
! 213: /*
! 214: ** See if we have a function registered for outputting errors.
! 215: ** If so then call it and present the message to the user
! 216: */
! 217: HTAlertCallback * cbf = HTAlert_find(HT_A_MESSAGE);
! 218: if (cbf)
! 219: (*cbf)(request, HT_A_MESSAGE, HTERR_CACHE_EXPIRED,
! 220: NULL, HTRequest_error(request), NULL);
! 221: } else {
! 222: if (CACHE_TRACE) HTTrace("Mem Cache... Expired - autoreload\n");
! 223: HTRequest_addRqHd(request, HT_C_IF_NONE_MATCH | HT_C_IMS);
! 224: return HT_OK; /* Must go get it */
! 225: }
! 226: }
2.1 frystyk 227: }
2.11 ! frystyk 228: return HT_LOADED; /* Got it! */
! 229: } else {
! 230:
! 231: /*
! 232: ** If we were asked to validate the memory version then
! 233: ** use either the etag or the last modified for cache validation
! 234: */
! 235: HTRequest_addRqHd(request, HT_C_IF_NONE_MATCH | HT_C_IMS);
2.1 frystyk 236: }
237: }
238: return HT_OK;
239: }
240:
241: /*
242: ** Error and Information AFTER filter
243: ** ----------------------------------
244: ** It checks the status code from a request and generates an
245: ** error/information message if required.
246: */
247: PUBLIC int HTInfoFilter (HTRequest * request, void * param, int status)
248: {
249: HTParentAnchor * anchor = HTRequest_anchor(request);
250: char * uri = HTAnchor_address((HTAnchor*) anchor);
251: switch (status) {
252: case HT_RETRY:
253: if (PROT_TRACE)
254: HTTrace("Load End.... NOT AVAILABLE, RETRY AT %ld\n",
255: HTRequest_retryTime(request));
256: break;
257:
258: case HT_ERROR:
259: {
260: /*
261: ** See if we have a function registered for outputting errors.
262: ** If so then call it and present the message to the user
263: */
264: HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
265: if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
266: HTRequest_error(request), NULL);
267: if (PROT_TRACE)
268: HTTrace("Load End.... ERROR: Can't access `%s\'\n",
269: uri ? uri : "<UNKNOWN>");
2.3 frystyk 270: break;
2.7 frystyk 271: }
272:
273: case HT_NO_DATA:
274: {
275: /*
276: ** The document was empty
277: */
278: HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
279: if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
280: HTRequest_error(request), NULL);
281: if (PROT_TRACE)
282: HTTrace("Load End.... EMPTY: No content `%s\'\n",
283: uri ? uri : "<UNKNOWN>");
284: break;
285: }
2.3 frystyk 286:
287: case HT_LOADED:
288: {
289: /*
290: ** Even though we have received a loaded status the thing we have
291: ** loaded successfully may in fact be an error message. We therefore
292: ** look at the error stack to see what to do.
293: */
294: HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
295: if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
296: HTRequest_error(request), NULL);
297: if (PROT_TRACE) HTTrace("Load End.... OK: `%s\'\n", uri);
2.1 frystyk 298: break;
299: }
300:
301: default:
302: if (PROT_TRACE)
303: HTTrace("Load End.... Request ended with code %d\n", status);
304: break;
305: }
306:
307: HT_FREE(uri);
308: return HT_OK;
309: }
310:
311: /*
312: ** Redirection AFTER filter
313: ** ------------------------
314: ** The redirection handler only handles redirections
315: ** on the GET or HEAD method (or any other safe method)
316: */
317: PUBLIC int HTRedirectFilter (HTRequest * request, void * param, int status)
318: {
319: HTMethod method = HTRequest_method(request);
320: HTAnchor * new_anchor = HTRequest_redirection(request);
2.7 frystyk 321: if (!new_anchor) {
322: if (PROT_TRACE) HTTrace("Redirection. No destination\n");
323: return HT_OK;
324: }
325:
2.1 frystyk 326: /*
327: ** Only do redirect on GET and HEAD
328: */
2.7 frystyk 329: if (!HTMethod_isSafe(method)) {
2.4 frystyk 330: HTAlertCallback * prompt = HTAlert_find(HT_A_CONFIRM);
331: if (prompt) {
332: if ((*prompt)(request, HT_A_CONFIRM, HT_MSG_REDIRECTION,
333: NULL, NULL, NULL) != YES)
334: return HT_ERROR;
335: }
2.1 frystyk 336: }
337:
338: /*
339: ** Start new request with the redirect anchor found in the headers.
340: ** Note that we reuse the same request object which means that we must
341: ** keep this around until the redirected request has terminated. It also
342: ** allows us in an easy way to keep track of the number of redirections
343: ** so that we can detect endless loops.
344: */
2.4 frystyk 345: if (HTRequest_doRetry(request)) {
2.1 frystyk 346: HTLoadAnchor(new_anchor, request);
2.9 frystyk 347: } else {
348: HTRequest_addError(request, ERR_FATAL, NO, HTERR_MAX_REDIRECT,
349: NULL, 0, "HTRedirectFilter");
350: }
351:
352: /*
353: ** By returning HT_ERROR we make sure that this is the last handler to be
354: ** called. We do this as we don't want any other filter to delete the
355: ** request object now when we have just started a new one ourselves
356: */
357: return HT_ERROR;
358: }
359:
360: /*
361: ** Retry through Proxy Filter
362: ** --------------------------
363: ** This filter handles a 305 Use Proxy response and retries the request
364: ** through the proxy
365: */
366: PUBLIC int HTUseProxyFilter (HTRequest * request, void * param, int status)
367: {
368: HTAnchor * proxy_anchor = HTRequest_redirection(request);
369: if (!proxy_anchor) {
370: if (PROT_TRACE) HTTrace("Use Proxy... No proxy location\n");
371: return HT_OK;
372: }
373:
374: /*
375: ** Add the proxy to the list. Assume HTTP access method only!
376: */
377: {
378: char * addr = HTAnchor_address(proxy_anchor);
379: HTProxy_add("http", addr);
380: HT_FREE(addr);
381: }
382:
383: /*
384: ** Start new request through the proxy if we haven't reached the max
385: ** number of redirections for this request
386: */
387: if (HTRequest_doRetry(request)) {
388: HTLoadAnchor(proxy_anchor, request);
2.1 frystyk 389: } else {
390: HTRequest_addError(request, ERR_FATAL, NO, HTERR_MAX_REDIRECT,
391: NULL, 0, "HTRedirectFilter");
392: }
393:
394: /*
395: ** By returning HT_ERROR we make sure that this is the last handler to be
396: ** called. We do this as we don't want any other filter to delete the
397: ** request object now when we have just started a new one ourselves
398: */
399: return HT_ERROR;
400: }
401:
402: /*
403: ** Client side authentication BEFORE filter
404: ** ----------------------------------------
405: ** The filter generates the credentials required to access a document
406: ** Getting the credentials may involve asking the user
407: */
408: PUBLIC int HTCredentialsFilter (HTRequest * request, void * param, int status)
409: {
410: /*
411: ** Ask the authentication module to call the right credentials generator
412: ** that understands this scheme
413: */
414: if (HTAA_beforeFilter(request, param, status) == HT_OK) {
415: if (PROT_TRACE) HTTrace("Credentials. verified\n");
416: return HT_OK;
417: } else {
418: HTRequest_addError(request, ERR_FATAL, NO, HTERR_UNAUTHORIZED,
419: NULL, 0, "HTCredentialsFilter");
420: return HT_ERROR;
421: }
422: }
423:
424: /*
425: ** Client side authentication AFTER filter
426: ** ---------------------------------------
427: ** The client side authentication filter uses the
428: ** user dialog messages registered in the HTAlert module.
429: ** By default these are the ones used by the line mode browser but you can
430: ** just register something else.
431: */
432: PUBLIC int HTAuthFilter (HTRequest * request, void * param, int status)
433: {
434: /*
435: ** Ask the authentication module to call the right challenge parser
436: ** that understands this scheme
437: */
438: if (HTAA_afterFilter(request, param, status) == HT_OK) {
439:
440: /*
441: ** Start request with new credentials. As with the redirection filter
442: ** we reuse the same request object which means that we must
443: ** keep this around until the redirected request has terminated
444: */
445: HTLoad(request, NO);
446:
447: /*
448: ** We return HT_ERROR to make sure that this is the last handler to be
449: ** called. We do this as we don't want any other filter to delete the
450: ** request object now when we have just started a new one ourselves
451: */
452: return HT_ERROR;
453: }
454: return HT_OK;
455: }
456:
457: /*
458: ** Request Loggin AFTER filter
459: ** ---------------------------
460: ** Default Logging filter using the log manager provided by HTLog.c
461: */
462: PUBLIC int HTLogFilter (HTRequest * request, void * param, int status)
463: {
464: if (request) {
465: if (HTLog_isOpen()) HTLog_add(request, status);
466: return HT_OK;
467: }
468: return HT_ERROR;
469: }
Webmaster