Annotation of libwww/Library/src/HTFilter.c, revision 2.12
2.1 frystyk 1: /*
2: ** BEFORE AND AFTER FILTERS
3: **
4: ** (c) COPYRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
2.12 ! frystyk 6: ** @(#) $Id: HTFilter.c,v 2.11 1996/08/24 18:09:54 frystyk Exp $
2.1 frystyk 7: **
8: ** This module implrments a set of default filters that can be registerd
9: ** as BEFORE and AFTER filters to the Net manager
10: ** Authors
11: ** HFN Henrik Frystyk, frystyk@w.org
12: ** History
13: ** Jul 4, 96 Written
14: */
15:
16: /* Library include files */
17: #include "WWWLib.h"
18: #include "WWWCache.h"
19: #include "WWWHTTP.h"
20: #include "HTLog.h"
21: #include "HTAccess.h"
2.10 frystyk 22: #include "HTProxy.h"
23: #include "HTRules.h"
2.1 frystyk 24: #include "HTFilter.h" /* Implemented here */
25:
26: /* ------------------------------------------------------------------------- */
27:
28: /*
29: ** Proxy and Gateway BEFORE filter
30: ** -------------------------------
31: ** Checks for registerd proxy servers or gateways and sees whether this
32: ** request should be redirected to a proxy or a gateway. Proxies have
33: ** higher priority than gateways so we look for them first!
34: ** For HTTP/1.0 and HTTP/1.1 we may only send a full URL (including the
35: ** host portion) to proxy servers. Therefore, we tell the Library whether
36: ** to use the full URL or the traditional HTTP one without the host part.
37: */
38: PUBLIC int HTProxyFilter (HTRequest * request, void * param, int status)
39: {
40: HTParentAnchor * anchor = HTRequest_anchor(request);
2.2 frystyk 41: char * addr = HTAnchor_physical(anchor);
2.1 frystyk 42: char * physical = NULL;
43: if ((physical = HTProxy_find(addr))) {
2.6 frystyk 44: HTRequest_setFullURI(request, YES); /* For now */
2.5 frystyk 45: HTRequest_setProxy(request, physical);
2.8 frystyk 46: HT_FREE(physical);
2.6 frystyk 47: #if 0
48: /* Don't paste the URLs together anymore */
2.1 frystyk 49: StrAllocCat(physical, addr);
2.5 frystyk 50: HTAnchor_setPhysical(anchor, physical);
2.6 frystyk 51: #endif
2.1 frystyk 52: } else if ((physical = HTGateway_find(addr))) {
53: /*
54: ** A gateway URL is crated by chopping off any leading "/" to make the
55: ** host into part of path
56: */
57: char * path =
58: HTParse(addr, "", PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
59: char * gatewayed = HTParse(path+1, physical, PARSE_ALL);
60: HTAnchor_setPhysical(anchor, gatewayed);
61: HT_FREE(path);
62: HT_FREE(gatewayed);
63: HTRequest_setFullURI(request, NO);
2.6 frystyk 64: HTRequest_deleteProxy(request);
2.1 frystyk 65: } else {
2.6 frystyk 66: HTRequest_setFullURI(request, NO); /* For now */
67: HTRequest_deleteProxy(request);
2.1 frystyk 68: }
69: return HT_OK;
70: }
71:
72: /*
73: ** Rule Translation BEFORE Filter
74: ** ------------------------------
75: ** If we have a set of rules loaded (see the Rule manager) then check
76: ** before each request whether how that should be translated. The trick
77: ** is that a parent anchor has a "address" which is the part from the URL
78: ** we used when we created the anchor. However, it also have a "physical
79: ** address" which is the place we are actually going to look for the
2.2 frystyk 80: ** resource. Hence this filter translates the physical address
81: ** (if any translations are found)
2.1 frystyk 82: */
83: PUBLIC int HTRuleFilter (HTRequest * request, void * param, int status)
84: {
85: HTList * list = HTRule_global();
86: HTParentAnchor * anchor = HTRequest_anchor(request);
2.2 frystyk 87: char * addr = HTAnchor_physical(anchor);
2.1 frystyk 88: char * physical = HTRule_translate(list, addr, NO);
89: if (!physical) {
90: HTRequest_addError(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
91: NULL, 0, "HTRuleFilter");
92: return HT_ERROR;
93: }
94: HTAnchor_setPhysical(anchor, physical);
95: HT_FREE(physical);
96: return HT_OK;
97: }
98:
99: /*
100: ** Cache Validation BEFORE Filter
101: ** ------------------------------
102: ** Check the cache mode to see if we can use an already loaded version
103: ** of this document. If so and our copy is valid then we don't have
104: ** to go out and get it unless we are forced to
2.3 frystyk 105: ** We only check the cache in caseof a GET request. Otherwise, we go
106: ** directly to the source.
2.1 frystyk 107: */
108: PUBLIC int HTCacheFilter (HTRequest * request, void * param, int status)
109: {
110: HTParentAnchor * anchor = HTRequest_anchor(request);
2.12 ! frystyk 111: HTCache * cache = NULL;
2.1 frystyk 112: HTReload mode = HTRequest_reloadMode(request);
2.3 frystyk 113: HTMethod method = HTRequest_method(request);
2.12 ! frystyk 114: HTDisconnectedMode disconnect = HTCacheMode_disconnected();
! 115: BOOL validate = NO;
2.3 frystyk 116:
117: /*
2.12 ! frystyk 118: ** If the cache is disabled all together then it won't help looking, huh?
2.3 frystyk 119: */
2.12 ! frystyk 120: if (!HTCacheMode_enabled()) return HT_OK;
! 121: if (CACHE_TRACE) HTTrace("Cachefilter. Checking persistent cache\n");
2.3 frystyk 122:
2.1 frystyk 123: /*
2.12 ! frystyk 124: ** Now check the cache...
2.1 frystyk 125: */
2.12 ! frystyk 126: if (method != METHOD_GET) {
! 127: if (CACHE_TRACE) HTTrace("Cachefilter. We only check GET methods\n");
! 128: } else if (mode == HT_CACHE_FLUSH) {
! 129: /*
! 130: ** If the mode if "Force Reload" then don't even bother to check the
! 131: ** cache - we flush everything we know abut this document anyway.
2.1 frystyk 132: ** Add the appropriate request headers. We use both the "pragma"
133: ** and the "cache-control" headers in order to be
2.12 ! frystyk 134: ** backwards compatible with HTTP/1.0
2.1 frystyk 135: */
2.12 ! frystyk 136: validate = YES;
2.1 frystyk 137: HTRequest_addGnHd(request, HT_G_PRAGMA_NO_CACHE);
2.11 frystyk 138: HTRequest_addCacheControl(request, "no-cache", "");
2.1 frystyk 139:
140: /*
141: ** We also flush the information in the anchor
142: */
143: HTAnchor_clearHeader(anchor);
144:
2.12 ! frystyk 145: } else {
! 146: /*
! 147: ** Check the persistent cache manager. If we have a cache hit then
! 148: ** continue to see if the reload mode requires us to do a validation
! 149: ** check. This filter assumes that we can get the cached version
! 150: ** through one of our protocol modules (for example the file module)
! 151: */
! 152: cache = HTCache_find(anchor);
2.1 frystyk 153: if (cache) {
2.12 ! frystyk 154: mode = HTMAX(mode, HTCache_isFresh(cache, request));
! 155:
! 156: /*
! 157: ** Now check the mode and add the right headers for the validation
! 158: ** If we are to validate a cache entry then we get a lock
! 159: ** on it so that not other requests can steal it.
! 160: */
! 161: if (mode == HT_CACHE_END_VALIDATE) {
! 162: /*
! 163: ** If we were asked to end-to-end validate the cached object
! 164: ** then use a max-age=0 cache control directive
! 165: */
! 166: validate = YES;
! 167: HTCache_getLock(cache, request);
! 168: HTRequest_addCacheControl(request, "max-age", "0");
! 169: } else if (mode == HT_CACHE_VALIDATE) {
2.11 frystyk 170: /*
2.12 ! frystyk 171: ** If we were asked to validate the cached object then
2.11 frystyk 172: ** use the etag or the last modified for cache validation
173: */
2.12 ! frystyk 174: validate = YES;
! 175: HTCache_getLock(cache, request);
2.11 frystyk 176: HTRequest_addRqHd(request, HT_C_IF_NONE_MATCH | HT_C_IMS);
2.12 ! frystyk 177: } else {
! 178: /*
! 179: ** The entity does not require any validation at all. We
! 180: ** can just go ahead and get it from the cache
! 181: */
! 182: char * name = HTCache_name(cache);
! 183: HTAnchor_setPhysical(anchor, name);
! 184: HTCache_addHit(cache);
! 185: HT_FREE(name);
2.11 frystyk 186: }
187: }
2.12 ! frystyk 188: }
! 189:
! 190: /*
! 191: ** If we are in disconnected mode and we are to validate an entry
! 192: ** then check whether what mode of disconnected mode we're in. If
! 193: ** we are to use our own cache then return a "504 Gateway Timeout"
! 194: */
! 195: if ((!cache || validate) && disconnect != HT_DISCONNECT_NONE) {
! 196: if (disconnect == HT_DISCONNECT_EXTERNAL)
! 197: HTRequest_addCacheControl(request, "only-if-cached", "");
! 198: else {
! 199: HTRequest_addError(request, ERR_FATAL, NO,
! 200: HTERR_GATE_TIMEOUT, "Disconnected Cache Mode",
! 201: 0, "HTCacheFilter");
! 202: return HT_ERROR;
! 203: }
2.11 frystyk 204: }
205: return HT_OK;
206: }
207:
208: /*
2.12 ! frystyk 209: ** Cache Update AFTER filter
! 210: ** -------------------------
! 211: ** On our way out we catch the metainformation and stores it in
! 212: ** our persistent store. If we have a cache validation (a 304
! 213: ** response then we use the new metainformation and merges it with
! 214: ** the existing information already captured in the cache.
! 215: */
! 216: PUBLIC int HTCacheUpdateFilter (HTRequest * request, void * param, int status)
! 217: {
! 218: HTParentAnchor * anchor = HTRequest_anchor(request);
! 219: HTCache * cache = HTCache_find(anchor);
! 220:
! 221: /*
! 222: ** If this request resulted in a "304 Not Modified" response then
! 223: ** we merge the new metainformation with the old.
! 224: */
! 225: if (CACHE_TRACE) HTTrace("Cache....... Merging metainformation\n");
! 226:
! 227: /*
! 228: ** It may in fact be that the information in the 304 response
! 229: ** told us that we can't cache the entity anymore. If this is the
! 230: ** case then flush it now. Otherwise prepare for a cache read
! 231: */
! 232: if (HTAnchor_cachable(anchor) == NO) {
! 233: HTCache_remove(cache);
! 234: } else {
! 235: HTCache_update(cache, request);
! 236: HTRequest_setReloadMode(request, HT_CACHE_OK);
! 237: }
! 238:
! 239: /*
! 240: ** Start request directly from the cache. As with the redirection filter
! 241: ** we reuse the same request object which means that we must
! 242: ** keep this around until the cache load request has terminated
! 243: ** In the case of a
! 244: */
! 245: {
! 246: HTLoad(request, NO);
! 247: return HT_ERROR;
! 248: }
! 249: }
! 250:
! 251: /*
2.11 frystyk 252: ** Check the Memory Cache (History list)
253: ** -------------------------------------
254: ** Check if document is already loaded. The user can define whether
255: ** the history list should follow normal expiration or work as a
256: ** traditional history list where expired documents are not updated.
257: ** We don't check for anything but existence proof of a document
258: ** associated with the anchor as the definition is left to the application
259: */
260: PUBLIC int HTMemoryCacheFilter (HTRequest * request, void * param, int status)
261: {
262: HTReload validation = HTRequest_reloadMode(request);
263: HTParentAnchor * anchor = HTRequest_anchor(request);
264: void * document = HTAnchor_document(anchor);
2.1 frystyk 265:
2.11 frystyk 266: /*
267: ** If we are asked to flush the persistent cache then there is no reason
268: ** to do anything here - we're flushing it anyway. Also if no document
269: ** then just exit from this filter.
270: */
271: if (!document || validation > HT_CACHE_FLUSH_MEM) {
272: if (CACHE_TRACE) HTTrace("Mem Cache... No fresh document...\n");
273: return HT_OK;
274: }
2.1 frystyk 275:
2.11 frystyk 276: /*
277: ** If we have a document object associated with this anchor then we also
278: ** have the object in the history list. Depending on what the user asked,
279: ** we can add a cache validator
280: */
281: if (document) {
2.12 ! frystyk 282: HTExpiresMode expires = HTCacheMode_expires();
2.11 frystyk 283: if (validation != HT_CACHE_FLUSH_MEM) {
284: if (CACHE_TRACE)
285: HTTrace("Mem Cache... Document already in memory\n");
286: if (expires != HT_EXPIRES_IGNORE) {
287:
288: /*
289: ** Ask the cache manager if this object has expired. Also
290: ** check if we should care about expiration or not.
291: */
2.12 ! frystyk 292: #if 0
2.11 frystyk 293: if (!HTCache_isValid(anchor)) {
2.12 ! frystyk 294: #else
! 295: if (1) {
! 296: #endif
2.11 frystyk 297: if (expires == HT_EXPIRES_NOTIFY) {
298:
299: /*
300: ** See if we have a function registered for outputting errors.
301: ** If so then call it and present the message to the user
302: */
303: HTAlertCallback * cbf = HTAlert_find(HT_A_MESSAGE);
304: if (cbf)
305: (*cbf)(request, HT_A_MESSAGE, HTERR_CACHE_EXPIRED,
306: NULL, HTRequest_error(request), NULL);
307: } else {
308: if (CACHE_TRACE) HTTrace("Mem Cache... Expired - autoreload\n");
309: HTRequest_addRqHd(request, HT_C_IF_NONE_MATCH | HT_C_IMS);
310: return HT_OK; /* Must go get it */
311: }
312: }
2.1 frystyk 313: }
2.11 frystyk 314: return HT_LOADED; /* Got it! */
315: } else {
316:
317: /*
318: ** If we were asked to validate the memory version then
319: ** use either the etag or the last modified for cache validation
320: */
321: HTRequest_addRqHd(request, HT_C_IF_NONE_MATCH | HT_C_IMS);
2.1 frystyk 322: }
323: }
324: return HT_OK;
325: }
326:
327: /*
328: ** Error and Information AFTER filter
329: ** ----------------------------------
330: ** It checks the status code from a request and generates an
331: ** error/information message if required.
332: */
333: PUBLIC int HTInfoFilter (HTRequest * request, void * param, int status)
334: {
335: HTParentAnchor * anchor = HTRequest_anchor(request);
336: char * uri = HTAnchor_address((HTAnchor*) anchor);
337: switch (status) {
338: case HT_RETRY:
339: if (PROT_TRACE)
340: HTTrace("Load End.... NOT AVAILABLE, RETRY AT %ld\n",
341: HTRequest_retryTime(request));
342: break;
343:
344: case HT_ERROR:
345: {
346: /*
347: ** See if we have a function registered for outputting errors.
348: ** If so then call it and present the message to the user
349: */
350: HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
351: if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
352: HTRequest_error(request), NULL);
353: if (PROT_TRACE)
354: HTTrace("Load End.... ERROR: Can't access `%s\'\n",
355: uri ? uri : "<UNKNOWN>");
2.3 frystyk 356: break;
2.7 frystyk 357: }
358:
359: case HT_NO_DATA:
360: {
361: /*
362: ** The document was empty
363: */
364: HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
365: if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
366: HTRequest_error(request), NULL);
367: if (PROT_TRACE)
368: HTTrace("Load End.... EMPTY: No content `%s\'\n",
369: uri ? uri : "<UNKNOWN>");
370: break;
371: }
2.3 frystyk 372:
373: case HT_LOADED:
374: {
375: /*
376: ** Even though we have received a loaded status the thing we have
377: ** loaded successfully may in fact be an error message. We therefore
378: ** look at the error stack to see what to do.
379: */
380: HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
381: if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
382: HTRequest_error(request), NULL);
383: if (PROT_TRACE) HTTrace("Load End.... OK: `%s\'\n", uri);
2.1 frystyk 384: break;
385: }
386:
387: default:
388: if (PROT_TRACE)
389: HTTrace("Load End.... Request ended with code %d\n", status);
390: break;
391: }
392:
393: HT_FREE(uri);
394: return HT_OK;
395: }
396:
397: /*
398: ** Redirection AFTER filter
399: ** ------------------------
400: ** The redirection handler only handles redirections
401: ** on the GET or HEAD method (or any other safe method)
402: */
403: PUBLIC int HTRedirectFilter (HTRequest * request, void * param, int status)
404: {
405: HTMethod method = HTRequest_method(request);
406: HTAnchor * new_anchor = HTRequest_redirection(request);
2.7 frystyk 407: if (!new_anchor) {
408: if (PROT_TRACE) HTTrace("Redirection. No destination\n");
409: return HT_OK;
410: }
411:
2.1 frystyk 412: /*
413: ** Only do redirect on GET and HEAD
414: */
2.7 frystyk 415: if (!HTMethod_isSafe(method)) {
2.4 frystyk 416: HTAlertCallback * prompt = HTAlert_find(HT_A_CONFIRM);
417: if (prompt) {
418: if ((*prompt)(request, HT_A_CONFIRM, HT_MSG_REDIRECTION,
419: NULL, NULL, NULL) != YES)
420: return HT_ERROR;
421: }
2.1 frystyk 422: }
423:
424: /*
425: ** Start new request with the redirect anchor found in the headers.
426: ** Note that we reuse the same request object which means that we must
427: ** keep this around until the redirected request has terminated. It also
428: ** allows us in an easy way to keep track of the number of redirections
429: ** so that we can detect endless loops.
430: */
2.4 frystyk 431: if (HTRequest_doRetry(request)) {
2.1 frystyk 432: HTLoadAnchor(new_anchor, request);
2.9 frystyk 433: } else {
434: HTRequest_addError(request, ERR_FATAL, NO, HTERR_MAX_REDIRECT,
435: NULL, 0, "HTRedirectFilter");
436: }
437:
438: /*
439: ** By returning HT_ERROR we make sure that this is the last handler to be
440: ** called. We do this as we don't want any other filter to delete the
441: ** request object now when we have just started a new one ourselves
442: */
443: return HT_ERROR;
444: }
445:
446: /*
447: ** Retry through Proxy Filter
448: ** --------------------------
449: ** This filter handles a 305 Use Proxy response and retries the request
450: ** through the proxy
451: */
452: PUBLIC int HTUseProxyFilter (HTRequest * request, void * param, int status)
453: {
454: HTAnchor * proxy_anchor = HTRequest_redirection(request);
455: if (!proxy_anchor) {
456: if (PROT_TRACE) HTTrace("Use Proxy... No proxy location\n");
457: return HT_OK;
458: }
459:
460: /*
461: ** Add the proxy to the list. Assume HTTP access method only!
462: */
463: {
464: char * addr = HTAnchor_address(proxy_anchor);
465: HTProxy_add("http", addr);
466: HT_FREE(addr);
467: }
468:
469: /*
470: ** Start new request through the proxy if we haven't reached the max
471: ** number of redirections for this request
472: */
473: if (HTRequest_doRetry(request)) {
474: HTLoadAnchor(proxy_anchor, request);
2.1 frystyk 475: } else {
476: HTRequest_addError(request, ERR_FATAL, NO, HTERR_MAX_REDIRECT,
477: NULL, 0, "HTRedirectFilter");
478: }
479:
480: /*
481: ** By returning HT_ERROR we make sure that this is the last handler to be
482: ** called. We do this as we don't want any other filter to delete the
483: ** request object now when we have just started a new one ourselves
484: */
485: return HT_ERROR;
486: }
487:
488: /*
489: ** Client side authentication BEFORE filter
490: ** ----------------------------------------
491: ** The filter generates the credentials required to access a document
492: ** Getting the credentials may involve asking the user
493: */
494: PUBLIC int HTCredentialsFilter (HTRequest * request, void * param, int status)
495: {
496: /*
497: ** Ask the authentication module to call the right credentials generator
498: ** that understands this scheme
499: */
500: if (HTAA_beforeFilter(request, param, status) == HT_OK) {
501: if (PROT_TRACE) HTTrace("Credentials. verified\n");
502: return HT_OK;
503: } else {
504: HTRequest_addError(request, ERR_FATAL, NO, HTERR_UNAUTHORIZED,
505: NULL, 0, "HTCredentialsFilter");
506: return HT_ERROR;
507: }
508: }
509:
510: /*
511: ** Client side authentication AFTER filter
512: ** ---------------------------------------
513: ** The client side authentication filter uses the
514: ** user dialog messages registered in the HTAlert module.
515: ** By default these are the ones used by the line mode browser but you can
516: ** just register something else.
517: */
518: PUBLIC int HTAuthFilter (HTRequest * request, void * param, int status)
519: {
520: /*
521: ** Ask the authentication module to call the right challenge parser
522: ** that understands this scheme
523: */
524: if (HTAA_afterFilter(request, param, status) == HT_OK) {
525:
526: /*
527: ** Start request with new credentials. As with the redirection filter
528: ** we reuse the same request object which means that we must
529: ** keep this around until the redirected request has terminated
530: */
531: HTLoad(request, NO);
532:
533: /*
534: ** We return HT_ERROR to make sure that this is the last handler to be
535: ** called. We do this as we don't want any other filter to delete the
536: ** request object now when we have just started a new one ourselves
537: */
538: return HT_ERROR;
539: }
540: return HT_OK;
541: }
542:
543: /*
544: ** Request Loggin AFTER filter
545: ** ---------------------------
546: ** Default Logging filter using the log manager provided by HTLog.c
547: */
548: PUBLIC int HTLogFilter (HTRequest * request, void * param, int status)
549: {
550: if (request) {
551: if (HTLog_isOpen()) HTLog_add(request, status);
552: return HT_OK;
553: }
554: return HT_ERROR;
555: }
Webmaster