Annotation of libwww/Library/src/HTFilter.c, revision 2.11

2.1       frystyk     1: /*
                      2: **     BEFORE AND AFTER FILTERS
                      3: **
                      4: **     (c) COPYRIGHT MIT 1995.
                      5: **     Please first read the full copyright statement in the file COPYRIGH.
2.11    ! frystyk     6: **     @(#) $Id: HTFilter.c,v 2.10 1996/08/21 00:07:10 frystyk Exp $
2.1       frystyk     7: **
                      8: **     This module implrments a set of default filters that can be registerd
                      9: **     as BEFORE and AFTER filters to the Net manager
                     10: ** Authors
                     11: **     HFN     Henrik Frystyk, frystyk@w.org
                     12: ** History
                     13: **     Jul 4, 96       Written
                     14: */
                     15: 
                     16: /* Library include files */
                     17: #include "WWWLib.h"
                     18: #include "WWWCache.h"
                     19: #include "WWWHTTP.h"
                     20: #include "HTLog.h"
                     21: #include "HTAccess.h"
2.10      frystyk    22: #include "HTProxy.h"
                     23: #include "HTRules.h"
2.1       frystyk    24: #include "HTFilter.h"                                   /* Implemented here */
                     25: 
                     26: /* ------------------------------------------------------------------------- */
                     27: 
                     28: /*
                     29: **     Proxy and Gateway BEFORE filter
                     30: **     -------------------------------
                     31: **     Checks for registerd proxy servers or gateways and sees whether this
                     32: **     request should be redirected to a proxy or a gateway. Proxies have
                     33: **     higher priority than gateways so we look for them first!
                     34: **     For HTTP/1.0 and HTTP/1.1 we may only send a full URL (including the
                     35: **     host portion) to proxy servers. Therefore, we tell the Library whether
                     36: **     to use the full URL or the traditional HTTP one without the host part.
                     37: */
                     38: PUBLIC int HTProxyFilter (HTRequest * request, void * param, int status)
                     39: {
                     40:     HTParentAnchor * anchor = HTRequest_anchor(request);
2.2       frystyk    41:     char * addr = HTAnchor_physical(anchor);
2.1       frystyk    42:     char * physical = NULL;
                     43:     if ((physical = HTProxy_find(addr))) {
2.6       frystyk    44:        HTRequest_setFullURI(request, YES);                       /* For now */
2.5       frystyk    45:        HTRequest_setProxy(request, physical);
2.8       frystyk    46:        HT_FREE(physical);
2.6       frystyk    47: #if 0
                     48:        /* Don't paste the URLs together anymore */
2.1       frystyk    49:        StrAllocCat(physical, addr);
2.5       frystyk    50:        HTAnchor_setPhysical(anchor, physical); 
2.6       frystyk    51: #endif
2.1       frystyk    52:     } else if ((physical = HTGateway_find(addr))) {
                     53:        /* 
                     54:        ** A gateway URL is crated by chopping off any leading "/" to make the
                     55:        ** host into part of path
                     56:        */
                     57:        char * path =
                     58:            HTParse(addr, "", PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
                     59:        char * gatewayed = HTParse(path+1, physical, PARSE_ALL);
                     60:        HTAnchor_setPhysical(anchor, gatewayed);
                     61:        HT_FREE(path);
                     62:        HT_FREE(gatewayed);
                     63:        HTRequest_setFullURI(request, NO);
2.6       frystyk    64:        HTRequest_deleteProxy(request);
2.1       frystyk    65:     } else {
2.6       frystyk    66:        HTRequest_setFullURI(request, NO);                        /* For now */
                     67:        HTRequest_deleteProxy(request);
2.1       frystyk    68:     }
                     69:     return HT_OK;
                     70: }
                     71: 
                     72: /*
                     73: **     Rule Translation BEFORE Filter
                     74: **     ------------------------------
                     75: **     If we have a set of rules loaded (see the Rule manager) then check
                     76: **     before each request whether how that should be translated. The trick
                     77: **     is that a parent anchor has a "address" which is the part from the URL
                     78: **     we used when we created the anchor. However, it also have a "physical
                     79: **     address" which is the place we are actually going to look for the
2.2       frystyk    80: **     resource. Hence this filter translates the physical address
                     81: **     (if any translations are found)
2.1       frystyk    82: */
                     83: PUBLIC int HTRuleFilter (HTRequest * request, void * param, int status)
                     84: {
                     85:     HTList * list = HTRule_global();
                     86:     HTParentAnchor * anchor = HTRequest_anchor(request);
2.2       frystyk    87:     char * addr = HTAnchor_physical(anchor);
2.1       frystyk    88:     char * physical = HTRule_translate(list, addr, NO);
                     89:     if (!physical) {
                     90:        HTRequest_addError(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
                     91:                           NULL, 0, "HTRuleFilter");
                     92:        return HT_ERROR;
                     93:     }
                     94:     HTAnchor_setPhysical(anchor, physical);
                     95:     HT_FREE(physical);
                     96:     return HT_OK;
                     97: }
                     98: 
                     99: /*
                    100: **     Cache Validation BEFORE Filter
                    101: **     ------------------------------
                    102: **     Check the cache mode to see if we can use an already loaded version
                    103: **     of this document. If so and our copy is valid then we don't have
                    104: **     to go out and get it unless we are forced to
2.3       frystyk   105: **     We only check the cache in caseof a GET request. Otherwise, we go
                    106: **     directly to the source.
2.1       frystyk   107: */
                    108: PUBLIC int HTCacheFilter (HTRequest * request, void * param, int status)
                    109: {
                    110:     HTParentAnchor * anchor = HTRequest_anchor(request);
                    111:     HTReload mode = HTRequest_reloadMode(request);
2.3       frystyk   112:     HTMethod method = HTRequest_method(request);
                    113: 
                    114:     /*
                    115:     ** Check the method of the request
                    116:     */
                    117:     if (method != METHOD_GET) {
                    118:        if (CACHE_TRACE) HTTrace("Cachefilter. We only check GET methods\n");
                    119:        return HT_OK;
                    120:     }
                    121: 
2.1       frystyk   122:     /*
                    123:     ** If the mode if "Force Reload" then don't even bother to check the
                    124:     ** cache - we flush everything we know about this document
                    125:     */
2.11    ! frystyk   126:     if (mode == HT_CACHE_FLUSH) {
2.1       frystyk   127:        /*
                    128:        ** Add the appropriate request headers. We use both the "pragma"
                    129:        ** and the "cache-control" headers in order to be
                    130:        ** backwards compatible with HTP/1.0
                    131:        */
                    132:        HTRequest_addGnHd(request, HT_G_PRAGMA_NO_CACHE);
2.11    ! frystyk   133:        HTRequest_addCacheControl(request, "no-cache", "");
2.1       frystyk   134: 
                    135:        /*
                    136:        ** We also flush the information in the anchor
                    137:        */
                    138:        HTAnchor_clearHeader(anchor);
                    139:        return HT_OK;
                    140:     }
                    141: 
                    142:     /*
                    143:     ** Check the persistent cache manager. If we have a cache hit then
                    144:     ** continue to see if the reload mode requires us to do a validation check.
                    145:     ** This filter assumes that we can get the cached version through one of
                    146:     ** our protocol modules (for example the file module)
                    147:     */
                    148:     {
                    149:        char * addr = HTAnchor_address((HTAnchor *) anchor);
                    150:        char * cache = HTCache_getReference(addr);
                    151:        if (cache) {
2.11    ! frystyk   152:            if (mode != HT_CACHE_VALIDATE) {
2.1       frystyk   153:                HTAnchor_setPhysical(anchor, cache);
                    154:                HTAnchor_setCacheHit(anchor, YES);
                    155:            } else {
2.11    ! frystyk   156:                /*
        !           157:                **  If we were asked to validate the memory version then
        !           158:                **  use the etag or the last modified for cache validation
        !           159:                */
        !           160:                HTRequest_addRqHd(request, HT_C_IF_NONE_MATCH | HT_C_IMS);
        !           161:            }
        !           162:            HT_FREE(cache);
        !           163:        }
        !           164:        HT_FREE(addr);
        !           165:     }
        !           166:     return HT_OK;
        !           167: }
        !           168: 
        !           169: /*
        !           170: **     Check the Memory Cache (History list) 
        !           171: **     -------------------------------------
        !           172: **     Check if document is already loaded. The user can define whether
        !           173: **     the history list should follow normal expiration or work as a
        !           174: **     traditional history list where expired documents are not updated.
        !           175: **     We don't check for anything but existence proof of a document
        !           176: **     associated with the anchor as the definition is left to the application
        !           177: */
        !           178: PUBLIC int HTMemoryCacheFilter (HTRequest * request, void * param, int status)
        !           179: {
        !           180:     HTReload validation = HTRequest_reloadMode(request);
        !           181:     HTParentAnchor * anchor = HTRequest_anchor(request);
        !           182:     void * document = HTAnchor_document(anchor);
2.1       frystyk   183: 
2.11    ! frystyk   184:     /*
        !           185:     **  If we are asked to flush the persistent cache then there is no reason
        !           186:     **  to do anything here - we're flushing it anyway. Also if no document
        !           187:     **  then just exit from this filter.
        !           188:     */
        !           189:     if (!document || validation > HT_CACHE_FLUSH_MEM) {
        !           190:        if (CACHE_TRACE) HTTrace("Mem Cache... No fresh document...\n");
        !           191:        return HT_OK;
        !           192:     }
2.1       frystyk   193: 
2.11    ! frystyk   194:     /*
        !           195:     **  If we have a document object associated with this anchor then we also
        !           196:     **  have the object in the history list. Depending on what the user asked,
        !           197:     **  we can add a cache validator
        !           198:     */
        !           199:     if (document) {
        !           200:        HTExpiresMode expires = HTCache_expiresMode();
        !           201:        if (validation != HT_CACHE_FLUSH_MEM) {
        !           202:            if (CACHE_TRACE)
        !           203:                HTTrace("Mem Cache... Document already in memory\n");
        !           204:            if (expires != HT_EXPIRES_IGNORE) {
        !           205: 
        !           206:                /*
        !           207:                **  Ask the cache manager if this object has expired. Also
        !           208:                **  check if we should care about expiration or not.
        !           209:                */
        !           210:                if (!HTCache_isValid(anchor)) {
        !           211:                    if (expires == HT_EXPIRES_NOTIFY) {
        !           212: 
        !           213:                        /*
        !           214:                        ** See if we have a function registered for outputting errors.
        !           215:                        ** If so then call it and present the message to the user
        !           216:                        */
        !           217:                        HTAlertCallback * cbf = HTAlert_find(HT_A_MESSAGE);
        !           218:                        if (cbf)
        !           219:                            (*cbf)(request, HT_A_MESSAGE, HTERR_CACHE_EXPIRED,
        !           220:                                   NULL, HTRequest_error(request), NULL);
        !           221:                    } else {
        !           222:                        if (CACHE_TRACE) HTTrace("Mem Cache... Expired - autoreload\n");
        !           223:                        HTRequest_addRqHd(request, HT_C_IF_NONE_MATCH | HT_C_IMS);
        !           224:                        return HT_OK;           /* Must go get it */
        !           225:                    }
        !           226:                }
2.1       frystyk   227:            }
2.11    ! frystyk   228:            return HT_LOADED;                   /* Got it! */
        !           229:        } else {
        !           230: 
        !           231:            /*
        !           232:            **  If we were asked to validate the memory version then
        !           233:            **  use either the etag or the last modified for cache validation
        !           234:            */
        !           235:            HTRequest_addRqHd(request, HT_C_IF_NONE_MATCH | HT_C_IMS);
2.1       frystyk   236:        }
                    237:     }
                    238:     return HT_OK;
                    239: }
                    240: 
                    241: /*
                    242: **     Error and Information AFTER filter
                    243: **     ----------------------------------
                    244: **     It checks the status code from a request and generates an 
                    245: **     error/information message if required.
                    246: */
                    247: PUBLIC int HTInfoFilter (HTRequest * request, void * param, int status)
                    248: {
                    249:     HTParentAnchor * anchor = HTRequest_anchor(request);
                    250:     char * uri = HTAnchor_address((HTAnchor*) anchor);
                    251:     switch (status) {
                    252:     case HT_RETRY:
                    253:        if (PROT_TRACE)
                    254:            HTTrace("Load End.... NOT AVAILABLE, RETRY AT %ld\n",
                    255:                    HTRequest_retryTime(request));
                    256:        break;
                    257: 
                    258:     case HT_ERROR:
                    259:     {
                    260:        /*
                    261:        ** See if we have a function registered for outputting errors.
                    262:        ** If so then call it and present the message to the user
                    263:        */
                    264:        HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
                    265:        if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
                    266:                        HTRequest_error(request), NULL);
                    267:        if (PROT_TRACE)
                    268:            HTTrace("Load End.... ERROR: Can't access `%s\'\n",
                    269:                    uri ? uri : "<UNKNOWN>");
2.3       frystyk   270:        break;
2.7       frystyk   271:     }    
                    272: 
                    273:     case HT_NO_DATA:
                    274:     {
                    275:        /*
                    276:        ** The document was empty
                    277:        */
                    278:        HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
                    279:        if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
                    280:                        HTRequest_error(request), NULL);
                    281:        if (PROT_TRACE)
                    282:            HTTrace("Load End.... EMPTY: No content `%s\'\n",
                    283:                    uri ? uri : "<UNKNOWN>");
                    284:        break;
                    285:     }    
2.3       frystyk   286: 
                    287:     case HT_LOADED:
                    288:     {
                    289:        /*
                    290:        ** Even though we have received a loaded status the thing we have
                    291:        ** loaded successfully may in fact be an error message. We therefore
                    292:        ** look at the error stack to see what to do.
                    293:        */
                    294:        HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
                    295:        if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
                    296:                        HTRequest_error(request), NULL);
                    297:        if (PROT_TRACE) HTTrace("Load End.... OK: `%s\'\n", uri);
2.1       frystyk   298:        break;
                    299:     }
                    300: 
                    301:     default:
                    302:        if (PROT_TRACE)
                    303:            HTTrace("Load End.... Request ended with code %d\n", status);
                    304:        break;
                    305:     }
                    306: 
                    307:     HT_FREE(uri);
                    308:     return HT_OK;
                    309: }
                    310: 
                    311: /*
                    312: **     Redirection AFTER filter
                    313: **     ------------------------
                    314: **     The redirection handler only handles redirections
                    315: **     on the GET or HEAD method (or any other safe method)
                    316: */
                    317: PUBLIC int HTRedirectFilter (HTRequest * request, void * param, int status)
                    318: {
                    319:     HTMethod method = HTRequest_method(request); 
                    320:     HTAnchor * new_anchor = HTRequest_redirection(request); 
2.7       frystyk   321:     if (!new_anchor) {
                    322:        if (PROT_TRACE) HTTrace("Redirection. No destination\n");
                    323:        return HT_OK;
                    324:     }
                    325: 
2.1       frystyk   326:     /*
                    327:     ** Only do redirect on GET and HEAD
                    328:     */
2.7       frystyk   329:     if (!HTMethod_isSafe(method)) { 
2.4       frystyk   330:        HTAlertCallback * prompt = HTAlert_find(HT_A_CONFIRM);
                    331:        if (prompt) {
                    332:            if ((*prompt)(request, HT_A_CONFIRM, HT_MSG_REDIRECTION,
                    333:                          NULL, NULL, NULL) != YES)
                    334:                return HT_ERROR;
                    335:        }
2.1       frystyk   336:     } 
                    337:  
                    338:     /*
                    339:     **  Start new request with the redirect anchor found in the headers.
                    340:     ** Note that we reuse the same request object which means that we must
                    341:     **  keep this around until the redirected request has terminated. It also
                    342:     **  allows us in an easy way to keep track of the number of redirections
                    343:     ** so that we can detect endless loops.
                    344:     */ 
2.4       frystyk   345:     if (HTRequest_doRetry(request)) { 
2.1       frystyk   346:        HTLoadAnchor(new_anchor, request);
2.9       frystyk   347:     } else {
                    348:        HTRequest_addError(request, ERR_FATAL, NO, HTERR_MAX_REDIRECT,
                    349:                           NULL, 0, "HTRedirectFilter");
                    350:     }
                    351: 
                    352:     /*
                    353:     **  By returning HT_ERROR we make sure that this is the last handler to be
                    354:     **  called. We do this as we don't want any other filter to delete the 
                    355:     **  request object now when we have just started a new one ourselves
                    356:     */
                    357:     return HT_ERROR;
                    358: } 
                    359: 
                    360: /*
                    361: **     Retry through Proxy Filter
                    362: **     --------------------------
                    363: **     This filter handles a 305 Use Proxy response and retries the request
                    364: **     through the proxy
                    365: */
                    366: PUBLIC int HTUseProxyFilter (HTRequest * request, void * param, int status)
                    367: {
                    368:     HTAnchor * proxy_anchor = HTRequest_redirection(request); 
                    369:     if (!proxy_anchor) {
                    370:        if (PROT_TRACE) HTTrace("Use Proxy... No proxy location\n");
                    371:        return HT_OK;
                    372:     }
                    373: 
                    374:     /*
                    375:     **  Add the proxy to the list. Assume HTTP access method only!
                    376:     */
                    377:     {
                    378:        char * addr = HTAnchor_address(proxy_anchor);
                    379:        HTProxy_add("http", addr);
                    380:        HT_FREE(addr);
                    381:     } 
                    382:  
                    383:     /*
                    384:     **  Start new request through the proxy if we haven't reached the max
                    385:     **  number of redirections for this request
                    386:     */ 
                    387:     if (HTRequest_doRetry(request)) { 
                    388:        HTLoadAnchor(proxy_anchor, request);
2.1       frystyk   389:     } else {
                    390:        HTRequest_addError(request, ERR_FATAL, NO, HTERR_MAX_REDIRECT,
                    391:                           NULL, 0, "HTRedirectFilter");
                    392:     }
                    393: 
                    394:     /*
                    395:     **  By returning HT_ERROR we make sure that this is the last handler to be
                    396:     **  called. We do this as we don't want any other filter to delete the 
                    397:     **  request object now when we have just started a new one ourselves
                    398:     */
                    399:     return HT_ERROR;
                    400: } 
                    401: 
                    402: /*
                    403: **     Client side authentication BEFORE filter
                    404: **     ----------------------------------------
                    405: **     The filter generates the credentials required to access a document
                    406: **     Getting the credentials may involve asking the user
                    407: */
                    408: PUBLIC int HTCredentialsFilter (HTRequest * request, void * param, int status)
                    409: {
                    410:     /*
                    411:     ** Ask the authentication module to call the right credentials generator
                    412:     ** that understands this scheme
                    413:     */
                    414:     if (HTAA_beforeFilter(request, param, status) == HT_OK) {
                    415:        if (PROT_TRACE) HTTrace("Credentials. verified\n");
                    416:        return HT_OK;
                    417:     } else {
                    418:        HTRequest_addError(request, ERR_FATAL, NO, HTERR_UNAUTHORIZED,
                    419:                           NULL, 0, "HTCredentialsFilter");
                    420:        return HT_ERROR;
                    421:     }
                    422: }
                    423: 
                    424: /*
                    425: **     Client side authentication AFTER filter
                    426: **     ---------------------------------------
                    427: **     The client side authentication filter uses the 
                    428: **     user dialog messages registered in the HTAlert module.
                    429: **     By default these are the ones used by the line mode browser but you can
                    430: **     just register something else.
                    431: */
                    432: PUBLIC int HTAuthFilter (HTRequest * request, void * param, int status)
                    433: {
                    434:     /*
                    435:     ** Ask the authentication module to call the right challenge parser
                    436:     ** that understands this scheme
                    437:     */
                    438:     if (HTAA_afterFilter(request, param, status) == HT_OK) {
                    439: 
                    440:        /*
                    441:        ** Start request with new credentials. As with the redirection filter
                    442:        ** we reuse the same request object which means that we must
                    443:        ** keep this around until the redirected request has terminated
                    444:        */
                    445:        HTLoad(request, NO);
                    446: 
                    447:        /*
                    448:        **  We return HT_ERROR to make sure that this is the last handler to be
                    449:        **  called. We do this as we don't want any other filter to delete the 
                    450:        **  request object now when we have just started a new one ourselves
                    451:        */
                    452:        return HT_ERROR;
                    453:     }
                    454:     return HT_OK;
                    455: }
                    456: 
                    457: /*
                    458: **     Request Loggin AFTER filter
                    459: **     ---------------------------
                    460: **     Default Logging filter using the log manager provided by HTLog.c
                    461: */
                    462: PUBLIC int HTLogFilter (HTRequest * request, void * param, int status)
                    463: {
                    464:     if (request) {
                    465:        if (HTLog_isOpen()) HTLog_add(request, status);
                    466:        return HT_OK;
                    467:     }
                    468:     return HT_ERROR;
                    469: }

Webmaster