Annotation of libwww/Library/src/HTFilter.c, revision 2.10
2.1 frystyk 1: /*
2: ** BEFORE AND AFTER FILTERS
3: **
4: ** (c) COPYRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
2.10 ! frystyk 6: ** @(#) $Id: HTFilter.c,v 2.9 1996/08/20 04:53:37 frystyk Exp $
2.1 frystyk 7: **
8: ** This module implrments a set of default filters that can be registerd
9: ** as BEFORE and AFTER filters to the Net manager
10: ** Authors
11: ** HFN Henrik Frystyk, frystyk@w.org
12: ** History
13: ** Jul 4, 96 Written
14: */
15:
16: /* Library include files */
17: #include "WWWLib.h"
18: #include "WWWCache.h"
19: #include "WWWHTTP.h"
20: #include "HTLog.h"
21: #include "HTAccess.h"
2.10 ! frystyk 22: #include "HTProxy.h"
! 23: #include "HTRules.h"
2.1 frystyk 24: #include "HTFilter.h" /* Implemented here */
25:
26: /* ------------------------------------------------------------------------- */
27:
28: /*
29: ** Proxy and Gateway BEFORE filter
30: ** -------------------------------
31: ** Checks for registerd proxy servers or gateways and sees whether this
32: ** request should be redirected to a proxy or a gateway. Proxies have
33: ** higher priority than gateways so we look for them first!
34: ** For HTTP/1.0 and HTTP/1.1 we may only send a full URL (including the
35: ** host portion) to proxy servers. Therefore, we tell the Library whether
36: ** to use the full URL or the traditional HTTP one without the host part.
37: */
38: PUBLIC int HTProxyFilter (HTRequest * request, void * param, int status)
39: {
40: HTParentAnchor * anchor = HTRequest_anchor(request);
2.2 frystyk 41: char * addr = HTAnchor_physical(anchor);
2.1 frystyk 42: char * physical = NULL;
43: if ((physical = HTProxy_find(addr))) {
2.6 frystyk 44: HTRequest_setFullURI(request, YES); /* For now */
2.5 frystyk 45: HTRequest_setProxy(request, physical);
2.8 frystyk 46: HT_FREE(physical);
2.6 frystyk 47: #if 0
48: /* Don't paste the URLs together anymore */
2.1 frystyk 49: StrAllocCat(physical, addr);
2.5 frystyk 50: HTAnchor_setPhysical(anchor, physical);
2.6 frystyk 51: #endif
2.1 frystyk 52: } else if ((physical = HTGateway_find(addr))) {
53: /*
54: ** A gateway URL is crated by chopping off any leading "/" to make the
55: ** host into part of path
56: */
57: char * path =
58: HTParse(addr, "", PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
59: char * gatewayed = HTParse(path+1, physical, PARSE_ALL);
60: HTAnchor_setPhysical(anchor, gatewayed);
61: HT_FREE(path);
62: HT_FREE(gatewayed);
63: HTRequest_setFullURI(request, NO);
2.6 frystyk 64: HTRequest_deleteProxy(request);
2.1 frystyk 65: } else {
2.6 frystyk 66: HTRequest_setFullURI(request, NO); /* For now */
67: HTRequest_deleteProxy(request);
2.1 frystyk 68: }
69: return HT_OK;
70: }
71:
72: /*
73: ** Rule Translation BEFORE Filter
74: ** ------------------------------
75: ** If we have a set of rules loaded (see the Rule manager) then check
76: ** before each request whether how that should be translated. The trick
77: ** is that a parent anchor has a "address" which is the part from the URL
78: ** we used when we created the anchor. However, it also have a "physical
79: ** address" which is the place we are actually going to look for the
2.2 frystyk 80: ** resource. Hence this filter translates the physical address
81: ** (if any translations are found)
2.1 frystyk 82: */
83: PUBLIC int HTRuleFilter (HTRequest * request, void * param, int status)
84: {
85: HTList * list = HTRule_global();
86: HTParentAnchor * anchor = HTRequest_anchor(request);
2.2 frystyk 87: char * addr = HTAnchor_physical(anchor);
2.1 frystyk 88: char * physical = HTRule_translate(list, addr, NO);
89: if (!physical) {
90: HTRequest_addError(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
91: NULL, 0, "HTRuleFilter");
92: return HT_ERROR;
93: }
94: HTAnchor_setPhysical(anchor, physical);
95: HT_FREE(physical);
96: return HT_OK;
97: }
98:
99: /*
100: ** Cache Validation BEFORE Filter
101: ** ------------------------------
102: ** Check the cache mode to see if we can use an already loaded version
103: ** of this document. If so and our copy is valid then we don't have
104: ** to go out and get it unless we are forced to
2.3 frystyk 105: ** We only check the cache in caseof a GET request. Otherwise, we go
106: ** directly to the source.
2.1 frystyk 107: */
108: PUBLIC int HTCacheFilter (HTRequest * request, void * param, int status)
109: {
110: HTParentAnchor * anchor = HTRequest_anchor(request);
111: HTReload mode = HTRequest_reloadMode(request);
2.3 frystyk 112: HTMethod method = HTRequest_method(request);
113:
114: /*
115: ** Check the method of the request
116: */
117: if (method != METHOD_GET) {
118: if (CACHE_TRACE) HTTrace("Cachefilter. We only check GET methods\n");
119: return HT_OK;
120: }
121:
2.1 frystyk 122: /*
123: ** If the mode if "Force Reload" then don't even bother to check the
124: ** cache - we flush everything we know about this document
125: */
126: if (mode == HT_FORCE_RELOAD) {
127: /*
128: ** Add the appropriate request headers. We use both the "pragma"
129: ** and the "cache-control" headers in order to be
130: ** backwards compatible with HTP/1.0
131: */
132: HTRequest_addGnHd(request, HT_G_PRAGMA_NO_CACHE);
133:
134: /* @@@ CACHE CONTROL @@@ */
135:
136: /*
137: ** We also flush the information in the anchor
138: */
139: HTAnchor_clearHeader(anchor);
140: return HT_OK;
141: }
142:
143: /*
144: ** Check the application provided memory cache. This is equivalent to a
145: ** history list and does not follow the same cache mechanisms as the
146: ** persistent cache
147: */
148: if (HTMemoryCache_check(request) == HT_LOADED)
149: return HT_LOADED;
150:
151: /*
152: ** Check the persistent cache manager. If we have a cache hit then
153: ** continue to see if the reload mode requires us to do a validation check.
154: ** This filter assumes that we can get the cached version through one of
155: ** our protocol modules (for example the file module)
156: */
157: {
158: char * addr = HTAnchor_address((HTAnchor *) anchor);
159: char * cache = HTCache_getReference(addr);
160: if (cache) {
161: if (mode != HT_CACHE_REFRESH) {
162: HTAnchor_setPhysical(anchor, cache);
163: HTAnchor_setCacheHit(anchor, YES);
164: } else {
165:
166: /* @@@ Do cache validation @@@ */
167:
168: }
169: }
170: HT_FREE(addr);
171: }
172: return HT_OK;
173: }
174:
175: /*
176: ** Error and Information AFTER filter
177: ** ----------------------------------
178: ** It checks the status code from a request and generates an
179: ** error/information message if required.
180: */
181: PUBLIC int HTInfoFilter (HTRequest * request, void * param, int status)
182: {
183: HTParentAnchor * anchor = HTRequest_anchor(request);
184: char * uri = HTAnchor_address((HTAnchor*) anchor);
185: switch (status) {
186: case HT_RETRY:
187: if (PROT_TRACE)
188: HTTrace("Load End.... NOT AVAILABLE, RETRY AT %ld\n",
189: HTRequest_retryTime(request));
190: break;
191:
192: case HT_ERROR:
193: {
194: /*
195: ** See if we have a function registered for outputting errors.
196: ** If so then call it and present the message to the user
197: */
198: HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
199: if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
200: HTRequest_error(request), NULL);
201: if (PROT_TRACE)
202: HTTrace("Load End.... ERROR: Can't access `%s\'\n",
203: uri ? uri : "<UNKNOWN>");
2.3 frystyk 204: break;
2.7 frystyk 205: }
206:
207: case HT_NO_DATA:
208: {
209: /*
210: ** The document was empty
211: */
212: HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
213: if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
214: HTRequest_error(request), NULL);
215: if (PROT_TRACE)
216: HTTrace("Load End.... EMPTY: No content `%s\'\n",
217: uri ? uri : "<UNKNOWN>");
218: break;
219: }
2.3 frystyk 220:
221: case HT_LOADED:
222: {
223: /*
224: ** Even though we have received a loaded status the thing we have
225: ** loaded successfully may in fact be an error message. We therefore
226: ** look at the error stack to see what to do.
227: */
228: HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
229: if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
230: HTRequest_error(request), NULL);
231: if (PROT_TRACE) HTTrace("Load End.... OK: `%s\'\n", uri);
2.1 frystyk 232: break;
233: }
234:
235: default:
236: if (PROT_TRACE)
237: HTTrace("Load End.... Request ended with code %d\n", status);
238: break;
239: }
240:
241: HT_FREE(uri);
242: return HT_OK;
243: }
244:
245: /*
246: ** Redirection AFTER filter
247: ** ------------------------
248: ** The redirection handler only handles redirections
249: ** on the GET or HEAD method (or any other safe method)
250: */
251: PUBLIC int HTRedirectFilter (HTRequest * request, void * param, int status)
252: {
253: HTMethod method = HTRequest_method(request);
254: HTAnchor * new_anchor = HTRequest_redirection(request);
2.7 frystyk 255: if (!new_anchor) {
256: if (PROT_TRACE) HTTrace("Redirection. No destination\n");
257: return HT_OK;
258: }
259:
2.1 frystyk 260: /*
261: ** Only do redirect on GET and HEAD
262: */
2.7 frystyk 263: if (!HTMethod_isSafe(method)) {
2.4 frystyk 264: HTAlertCallback * prompt = HTAlert_find(HT_A_CONFIRM);
265: if (prompt) {
266: if ((*prompt)(request, HT_A_CONFIRM, HT_MSG_REDIRECTION,
267: NULL, NULL, NULL) != YES)
268: return HT_ERROR;
269: }
2.1 frystyk 270: }
271:
272: /*
273: ** Start new request with the redirect anchor found in the headers.
274: ** Note that we reuse the same request object which means that we must
275: ** keep this around until the redirected request has terminated. It also
276: ** allows us in an easy way to keep track of the number of redirections
277: ** so that we can detect endless loops.
278: */
2.4 frystyk 279: if (HTRequest_doRetry(request)) {
2.1 frystyk 280: HTLoadAnchor(new_anchor, request);
2.9 frystyk 281: } else {
282: HTRequest_addError(request, ERR_FATAL, NO, HTERR_MAX_REDIRECT,
283: NULL, 0, "HTRedirectFilter");
284: }
285:
286: /*
287: ** By returning HT_ERROR we make sure that this is the last handler to be
288: ** called. We do this as we don't want any other filter to delete the
289: ** request object now when we have just started a new one ourselves
290: */
291: return HT_ERROR;
292: }
293:
294: /*
295: ** Retry through Proxy Filter
296: ** --------------------------
297: ** This filter handles a 305 Use Proxy response and retries the request
298: ** through the proxy
299: */
300: PUBLIC int HTUseProxyFilter (HTRequest * request, void * param, int status)
301: {
302: HTAnchor * proxy_anchor = HTRequest_redirection(request);
303: if (!proxy_anchor) {
304: if (PROT_TRACE) HTTrace("Use Proxy... No proxy location\n");
305: return HT_OK;
306: }
307:
308: /*
309: ** Add the proxy to the list. Assume HTTP access method only!
310: */
311: {
312: char * addr = HTAnchor_address(proxy_anchor);
313: HTProxy_add("http", addr);
314: HT_FREE(addr);
315: }
316:
317: /*
318: ** Start new request through the proxy if we haven't reached the max
319: ** number of redirections for this request
320: */
321: if (HTRequest_doRetry(request)) {
322: HTLoadAnchor(proxy_anchor, request);
2.1 frystyk 323: } else {
324: HTRequest_addError(request, ERR_FATAL, NO, HTERR_MAX_REDIRECT,
325: NULL, 0, "HTRedirectFilter");
326: }
327:
328: /*
329: ** By returning HT_ERROR we make sure that this is the last handler to be
330: ** called. We do this as we don't want any other filter to delete the
331: ** request object now when we have just started a new one ourselves
332: */
333: return HT_ERROR;
334: }
335:
336: /*
337: ** Client side authentication BEFORE filter
338: ** ----------------------------------------
339: ** The filter generates the credentials required to access a document
340: ** Getting the credentials may involve asking the user
341: */
342: PUBLIC int HTCredentialsFilter (HTRequest * request, void * param, int status)
343: {
344: /*
345: ** Ask the authentication module to call the right credentials generator
346: ** that understands this scheme
347: */
348: if (HTAA_beforeFilter(request, param, status) == HT_OK) {
349: if (PROT_TRACE) HTTrace("Credentials. verified\n");
350: return HT_OK;
351: } else {
352: HTRequest_addError(request, ERR_FATAL, NO, HTERR_UNAUTHORIZED,
353: NULL, 0, "HTCredentialsFilter");
354: return HT_ERROR;
355: }
356: }
357:
358: /*
359: ** Client side authentication AFTER filter
360: ** ---------------------------------------
361: ** The client side authentication filter uses the
362: ** user dialog messages registered in the HTAlert module.
363: ** By default these are the ones used by the line mode browser but you can
364: ** just register something else.
365: */
366: PUBLIC int HTAuthFilter (HTRequest * request, void * param, int status)
367: {
368: /*
369: ** Ask the authentication module to call the right challenge parser
370: ** that understands this scheme
371: */
372: if (HTAA_afterFilter(request, param, status) == HT_OK) {
373:
374: /*
375: ** Start request with new credentials. As with the redirection filter
376: ** we reuse the same request object which means that we must
377: ** keep this around until the redirected request has terminated
378: */
379: HTLoad(request, NO);
380:
381: /*
382: ** We return HT_ERROR to make sure that this is the last handler to be
383: ** called. We do this as we don't want any other filter to delete the
384: ** request object now when we have just started a new one ourselves
385: */
386: return HT_ERROR;
387: }
388: return HT_OK;
389: }
390:
391: /*
392: ** Request Loggin AFTER filter
393: ** ---------------------------
394: ** Default Logging filter using the log manager provided by HTLog.c
395: */
396: PUBLIC int HTLogFilter (HTRequest * request, void * param, int status)
397: {
398: if (request) {
399: if (HTLog_isOpen()) HTLog_add(request, status);
400: return HT_OK;
401: }
402: return HT_ERROR;
403: }
Webmaster