Annotation of libwww/Library/src/HTFilter.c, revision 2.36

2.1       frystyk     1: /*
                      2: **     BEFORE AND AFTER FILTERS
                      3: **
                      4: **     (c) COPYRIGHT MIT 1995.
                      5: **     Please first read the full copyright statement in the file COPYRIGH.
2.36    ! frystyk     6: **     @(#) $Id: HTFilter.c,v 2.35 1999/02/22 22:10:11 frystyk Exp $
2.1       frystyk     7: **
                      8: **     This module implrments a set of default filters that can be registerd
                      9: **     as BEFORE and AFTER filters to the Net manager
                     10: ** Authors
                     11: **     HFN     Henrik Frystyk, frystyk@w.org
                     12: ** History
                     13: **     Jul 4, 96       Written
                     14: */
                     15: 
                     16: /* Library include files */
                     17: #include "WWWLib.h"
                     18: #include "WWWCache.h"
                     19: #include "WWWHTTP.h"
                     20: #include "HTLog.h"
                     21: #include "HTAccess.h"
2.10      frystyk    22: #include "HTProxy.h"
                     23: #include "HTRules.h"
2.1       frystyk    24: #include "HTFilter.h"                                   /* Implemented here */
                     25: 
                     26: /* ------------------------------------------------------------------------- */
                     27: 
                     28: /*
                     29: **     Proxy and Gateway BEFORE filter
                     30: **     -------------------------------
                     31: **     Checks for registerd proxy servers or gateways and sees whether this
                     32: **     request should be redirected to a proxy or a gateway. Proxies have
                     33: **     higher priority than gateways so we look for them first!
                     34: **     For HTTP/1.0 and HTTP/1.1 we may only send a full URL (including the
                     35: **     host portion) to proxy servers. Therefore, we tell the Library whether
                     36: **     to use the full URL or the traditional HTTP one without the host part.
                     37: */
2.15      frystyk    38: PUBLIC int HTProxyFilter (HTRequest * request, void * param, int mode)
2.1       frystyk    39: {
                     40:     HTParentAnchor * anchor = HTRequest_anchor(request);
2.2       frystyk    41:     char * addr = HTAnchor_physical(anchor);
2.1       frystyk    42:     char * physical = NULL;
                     43:     if ((physical = HTProxy_find(addr))) {
2.6       frystyk    44:        HTRequest_setFullURI(request, YES);                       /* For now */
2.5       frystyk    45:        HTRequest_setProxy(request, physical);
2.8       frystyk    46:        HT_FREE(physical);
2.6       frystyk    47: #if 0
                     48:        /* Don't paste the URLs together anymore */
2.1       frystyk    49:        StrAllocCat(physical, addr);
2.5       frystyk    50:        HTAnchor_setPhysical(anchor, physical); 
2.6       frystyk    51: #endif
2.1       frystyk    52:     } else if ((physical = HTGateway_find(addr))) {
                     53:        /* 
                     54:        ** A gateway URL is crated by chopping off any leading "/" to make the
                     55:        ** host into part of path
                     56:        */
                     57:        char * path =
                     58:            HTParse(addr, "", PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
2.26      frystyk    59:        char * gatewayed = HTParse((*path=='/') ? path+1 : path, physical, PARSE_ALL);
2.1       frystyk    60:        HTAnchor_setPhysical(anchor, gatewayed);
                     61:        HT_FREE(path);
                     62:        HT_FREE(gatewayed);
                     63:        HTRequest_setFullURI(request, NO);
2.6       frystyk    64:        HTRequest_deleteProxy(request);
2.1       frystyk    65:     } else {
2.6       frystyk    66:        HTRequest_setFullURI(request, NO);                        /* For now */
                     67:        HTRequest_deleteProxy(request);
2.1       frystyk    68:     }
                     69:     return HT_OK;
                     70: }
                     71: 
                     72: /*
                     73: **     Rule Translation BEFORE Filter
                     74: **     ------------------------------
                     75: **     If we have a set of rules loaded (see the Rule manager) then check
                     76: **     before each request whether how that should be translated. The trick
                     77: **     is that a parent anchor has a "address" which is the part from the URL
                     78: **     we used when we created the anchor. However, it also have a "physical
                     79: **     address" which is the place we are actually going to look for the
2.2       frystyk    80: **     resource. Hence this filter translates the physical address
                     81: **     (if any translations are found)
2.1       frystyk    82: */
2.15      frystyk    83: PUBLIC int HTRuleFilter (HTRequest * request, void * param, int mode)
2.1       frystyk    84: {
                     85:     HTList * list = HTRule_global();
                     86:     HTParentAnchor * anchor = HTRequest_anchor(request);
2.2       frystyk    87:     char * addr = HTAnchor_physical(anchor);
2.1       frystyk    88:     char * physical = HTRule_translate(list, addr, NO);
                     89:     if (!physical) {
                     90:        HTRequest_addError(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
                     91:                           NULL, 0, "HTRuleFilter");
                     92:        return HT_ERROR;
                     93:     }
                     94:     HTAnchor_setPhysical(anchor, physical);
                     95:     HT_FREE(physical);
                     96:     return HT_OK;
                     97: }
                     98: 
                     99: /*
2.16      frystyk   100: **     A small BEFORE filter that just finds a cache entry unconditionally
                    101: **     and loads the entry. All freshness and any other constraints are 
                    102: **     ignored.
                    103: */
                    104: PUBLIC int HTCacheLoadFilter (HTRequest * request, void * param, int mode)
                    105: {
                    106:     HTParentAnchor * anchor = HTRequest_anchor(request);
                    107:     HTCache * cache = HTCache_find(anchor);
                    108:     if (cache) {
                    109:        char * name = HTCache_name(cache);
                    110:        HTAnchor_setPhysical(anchor, name);
                    111:        HTCache_addHit(cache);
                    112:        HT_FREE(name);
                    113: 
                    114:        /*
                    115:        **  Start request directly from the cache. As with the redirection
                    116:        **  filter we reuse the same request object which means that we must
                    117:        **  keep this around until the cache load request has terminated
                    118:        */
                    119:        {
                    120:            HTLoad(request, NO);
                    121:            return HT_ERROR;
                    122:        }
                    123:     }
                    124:     return HT_OK;
                    125: }
                    126: 
                    127: /*
2.15      frystyk   128: **     Check the Memory Cache (History list) BEFORE filter
                    129: **     ---------------------------------------------------
2.11      frystyk   130: **     Check if document is already loaded. The user can define whether
                    131: **     the history list should follow normal expiration or work as a
                    132: **     traditional history list where expired documents are not updated.
                    133: **     We don't check for anything but existence proof of a document
                    134: **     associated with the anchor as the definition is left to the application
                    135: */
2.15      frystyk   136: PUBLIC int HTMemoryCacheFilter (HTRequest * request, void * param, int mode)
2.11      frystyk   137: {
                    138:     HTReload validation = HTRequest_reloadMode(request);
                    139:     HTParentAnchor * anchor = HTRequest_anchor(request);
                    140:     void * document = HTAnchor_document(anchor);
2.1       frystyk   141: 
2.11      frystyk   142:     /*
2.14      frystyk   143:     **  We only check the memory cache if it's a GET method
                    144:     */
                    145:     if (HTRequest_method(request) != METHOD_GET) {
2.35      frystyk   146:        HTTRACE(CACHE_TRACE, "Mem Cache... We only check GET methods\n");
2.14      frystyk   147:        return HT_OK;
                    148:     }
                    149: 
                    150:     /*
2.11      frystyk   151:     **  If we are asked to flush the persistent cache then there is no reason
                    152:     **  to do anything here - we're flushing it anyway. Also if no document
                    153:     **  then just exit from this filter.
                    154:     */
                    155:     if (!document || validation > HT_CACHE_FLUSH_MEM) {
2.35      frystyk   156:        HTTRACE(CACHE_TRACE, "Mem Cache... No fresh document...\n");
2.11      frystyk   157:        return HT_OK;
                    158:     }
2.1       frystyk   159: 
2.11      frystyk   160:     /*
                    161:     **  If we have a document object associated with this anchor then we also
                    162:     **  have the object in the history list. Depending on what the user asked,
                    163:     **  we can add a cache validator
                    164:     */
2.14      frystyk   165:     if (document && validation != HT_CACHE_FLUSH_MEM) {
2.35      frystyk   166:        HTTRACE(CACHE_TRACE, "Mem Cache... Document already in memory\n");
2.14      frystyk   167:        return HT_LOADED;
2.15      frystyk   168:     }
2.17      frystyk   169:     return HT_OK;
2.15      frystyk   170: }
                    171: 
                    172: /*
2.1       frystyk   173: **     Error and Information AFTER filter
                    174: **     ----------------------------------
                    175: **     It checks the status code from a request and generates an 
                    176: **     error/information message if required.
                    177: */
2.15      frystyk   178: PUBLIC int HTInfoFilter (HTRequest * request, HTResponse * response,
                    179:                         void * param, int status)
2.1       frystyk   180: {
                    181:     HTParentAnchor * anchor = HTRequest_anchor(request);
                    182:     char * uri = HTAnchor_address((HTAnchor*) anchor);
                    183:     switch (status) {
2.28      frystyk   184:     case HT_RETRY: {
                    185:         HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
                    186:        if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
                    187:                        HTRequest_error(request), NULL);
2.35      frystyk   188:        HTTRACE(PROT_TRACE, "Load End.... NOT AVAILABLE, RETRY AT %ld\n" _ 
2.15      frystyk   189:                    HTResponse_retryTime(response));
2.28      frystyk   190:         }
                    191:         break;
2.1       frystyk   192: 
2.7       frystyk   193:     case HT_NO_DATA:
                    194:     {
                    195:        /*
                    196:        ** The document was empty
                    197:        */
                    198:        HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
                    199:        if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
                    200:                        HTRequest_error(request), NULL);
2.35      frystyk   201:        HTTRACE(PROT_TRACE, "Load End.... EMPTY: No content `%s\'\n" _ 
2.7       frystyk   202:                    uri ? uri : "<UNKNOWN>");
                    203:        break;
                    204:     }    
2.3       frystyk   205: 
                    206:     case HT_LOADED:
2.35      frystyk   207:        HTTRACE(PROT_TRACE, "Load End.... OK: `%s\'\n" _ uri);
2.16      frystyk   208:        break;
                    209: 
                    210:     default:
2.3       frystyk   211:     {
                    212:        /*
2.16      frystyk   213:        ** See if we have a function registered for outputting errors.
                    214:        ** If so then call it and present the message to the user
2.3       frystyk   215:        */
                    216:        HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
                    217:        if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
                    218:                        HTRequest_error(request), NULL);
2.35      frystyk   219:        HTTRACE(PROT_TRACE, "Load End.... Request ended with code %d\n" _ status);
2.1       frystyk   220:        break;
                    221:     }
2.16      frystyk   222:     }
2.1       frystyk   223:     HT_FREE(uri);
                    224:     return HT_OK;
                    225: }
                    226: 
                    227: /*
                    228: **     Redirection AFTER filter
                    229: **     ------------------------
                    230: **     The redirection handler only handles redirections
                    231: **     on the GET or HEAD method (or any other safe method)
                    232: */
2.15      frystyk   233: PUBLIC int HTRedirectFilter (HTRequest * request, HTResponse * response,
                    234:                             void * param, int status)
2.1       frystyk   235: {
                    236:     HTMethod method = HTRequest_method(request); 
2.15      frystyk   237:     HTAnchor * new_anchor = HTResponse_redirection(response); 
2.7       frystyk   238:     if (!new_anchor) {
2.35      frystyk   239:        HTTRACE(PROT_TRACE, "Redirection. No destination\n");
2.7       frystyk   240:        return HT_OK;
                    241:     }
                    242: 
2.1       frystyk   243:     /*
2.21      frystyk   244:     ** Only do automatic redirect on GET and HEAD. Ask for all
2.27      frystyk   245:     ** other methods.
2.1       frystyk   246:     */
2.21      frystyk   247:     if (!HTMethod_isSafe(method)) {
                    248: 
                    249:        /*
                    250:        ** If we got a 303 See Other then change the method to GET.
                    251:        ** Otherwise ask the user whether we should continue.
                    252:        */
                    253:        if (status == HT_SEE_OTHER) {
2.35      frystyk   254:            HTTRACE(PROT_TRACE, "Redirection. Changing method from %s to GET\n" _ 
2.21      frystyk   255:                        HTMethod_name(method));
                    256:            HTRequest_setMethod(request, METHOD_GET);
                    257:        } else {
                    258:            HTAlertCallback * prompt = HTAlert_find(HT_A_CONFIRM);
                    259:            if (prompt) {
                    260:                if ((*prompt)(request, HT_A_CONFIRM, HT_MSG_REDIRECTION,
                    261:                              NULL, NULL, NULL) != YES)
2.27      frystyk   262:                    return HT_OK;
2.21      frystyk   263:            }
2.4       frystyk   264:        }
2.1       frystyk   265:     } 
2.36    ! frystyk   266: 
        !           267:     /* Register the redirection as a link relationship */
        !           268:     {
        !           269:        HTLinkType ltype = status==HT_PERM_REDIRECT ? HT_LR_PERM_REDIRECT :
        !           270:            (status==HT_TEMP_REDIRECT || status==HT_FOUND) ? HT_LR_TEMP_REDIRECT :
        !           271:            status==HT_SEE_OTHER ? HT_LR_SEE_OTHER : NULL;
        !           272:        if (ltype) {
        !           273:            HTLink_add((HTAnchor *) HTRequest_anchor(request), new_anchor, 
        !           274:                       ltype, method);
        !           275:        }
        !           276:     }
        !           277: 
2.34      frystyk   278:     /* Delete any auth credendials as they get regenerated */
                    279:     HTRequest_deleteCredentialsAll(request);
                    280: 
2.1       frystyk   281:     /*
                    282:     **  Start new request with the redirect anchor found in the headers.
                    283:     ** Note that we reuse the same request object which means that we must
                    284:     **  keep this around until the redirected request has terminated. It also
                    285:     **  allows us in an easy way to keep track of the number of redirections
                    286:     ** so that we can detect endless loops.
                    287:     */ 
2.4       frystyk   288:     if (HTRequest_doRetry(request)) { 
2.1       frystyk   289:        HTLoadAnchor(new_anchor, request);
2.9       frystyk   290:     } else {
                    291:        HTRequest_addError(request, ERR_FATAL, NO, HTERR_MAX_REDIRECT,
                    292:                           NULL, 0, "HTRedirectFilter");
                    293:     }
                    294: 
                    295:     /*
                    296:     **  By returning HT_ERROR we make sure that this is the last handler to be
                    297:     **  called. We do this as we don't want any other filter to delete the 
                    298:     **  request object now when we have just started a new one ourselves
                    299:     */
                    300:     return HT_ERROR;
                    301: } 
                    302: 
                    303: /*
2.15      frystyk   304: **     Retry through Proxy AFTER Filter
                    305: **     --------------------------------
2.9       frystyk   306: **     This filter handles a 305 Use Proxy response and retries the request
                    307: **     through the proxy
                    308: */
2.15      frystyk   309: PUBLIC int HTUseProxyFilter (HTRequest * request, HTResponse * response,
                    310:                             void * param, int status)
2.9       frystyk   311: {
2.20      frystyk   312:     HTAlertCallback * cbf = HTAlert_find(HT_A_CONFIRM);
2.15      frystyk   313:     HTAnchor * proxy_anchor = HTResponse_redirection(response); 
2.9       frystyk   314:     if (!proxy_anchor) {
2.35      frystyk   315:        HTTRACE(PROT_TRACE, "Use Proxy... No proxy location\n");
2.9       frystyk   316:        return HT_OK;
                    317:     }
                    318: 
                    319:     /*
                    320:     **  Add the proxy to the list. Assume HTTP access method only!
2.20      frystyk   321:     **  Because evil servers may rediret the client to an untrusted
                    322:     **  proxy, we can only accept redirects for this particular
                    323:     **  server. Also, we do not know whether this is for HTTP or all
                    324:     **  other requests as well
2.9       frystyk   325:     */
2.20      frystyk   326:     if ((cbf && (*cbf)(request, HT_A_CONFIRM, HT_MSG_PROXY, NULL,NULL,NULL))) {
2.9       frystyk   327:        char * addr = HTAnchor_address(proxy_anchor);
                    328:        HTProxy_add("http", addr);
                    329:        HT_FREE(addr);
                    330:  
2.20      frystyk   331:        /*
                    332:        **  Start new request through the proxy if we haven't reached the max
                    333:        **  number of redirections for this request
                    334:        */ 
                    335:        if (HTRequest_doRetry(request)) { 
                    336:            HTLoadAnchor(proxy_anchor, request);
                    337:        } else {
                    338:            HTRequest_addError(request, ERR_FATAL, NO, HTERR_MAX_REDIRECT,
                    339:                               NULL, 0, "HTRedirectFilter");
                    340:        }
                    341: 
                    342:        /*
                    343:        **  By returning HT_ERROR we make sure that this is the last handler to be
                    344:        **  called. We do this as we don't want any other filter to delete the 
                    345:        **  request object now when we have just started a new one ourselves
                    346:        */
                    347:        return HT_ERROR;
                    348: 
2.1       frystyk   349:     } else {
2.20      frystyk   350:        HTRequest_addError(request, ERR_FATAL, NO, HTERR_NO_AUTO_PROXY,
                    351:                           NULL, 0, "HTUseProxyFilter");
                    352:        return HT_OK;
2.1       frystyk   353:     }
                    354: } 
                    355: 
                    356: /*
                    357: **     Client side authentication BEFORE filter
                    358: **     ----------------------------------------
                    359: **     The filter generates the credentials required to access a document
                    360: **     Getting the credentials may involve asking the user
                    361: */
2.15      frystyk   362: PUBLIC int HTCredentialsFilter (HTRequest * request, void * param, int mode)
2.1       frystyk   363: {
                    364:     /*
                    365:     ** Ask the authentication module to call the right credentials generator
                    366:     ** that understands this scheme
                    367:     */
2.15      frystyk   368:     if (HTAA_beforeFilter(request, param, mode) == HT_OK) {
2.35      frystyk   369:        HTTRACE(PROT_TRACE, "Credentials. verified\n");
2.1       frystyk   370:        return HT_OK;
                    371:     } else {
                    372:        HTRequest_addError(request, ERR_FATAL, NO, HTERR_UNAUTHORIZED,
                    373:                           NULL, 0, "HTCredentialsFilter");
                    374:        return HT_ERROR;
                    375:     }
                    376: }
                    377: 
                    378: /*
                    379: **     Client side authentication AFTER filter
                    380: **     ---------------------------------------
                    381: **     The client side authentication filter uses the 
                    382: **     user dialog messages registered in the HTAlert module.
                    383: **     By default these are the ones used by the line mode browser but you can
                    384: **     just register something else.
                    385: */
2.15      frystyk   386: PUBLIC int HTAuthFilter (HTRequest * request, HTResponse * response,
                    387:                         void * param, int status)
2.1       frystyk   388: {
                    389:     /*
                    390:     ** Ask the authentication module to call the right challenge parser
                    391:     ** that understands this scheme
                    392:     */
2.15      frystyk   393:     if (HTAA_afterFilter(request, response, param, status) == HT_OK) {
2.1       frystyk   394: 
                    395:        /*
                    396:        ** Start request with new credentials. As with the redirection filter
                    397:        ** we reuse the same request object which means that we must
                    398:        ** keep this around until the redirected request has terminated
                    399:        */
                    400:        HTLoad(request, NO);
                    401: 
                    402:        /*
                    403:        **  We return HT_ERROR to make sure that this is the last handler to be
                    404:        **  called. We do this as we don't want any other filter to delete the 
                    405:        **  request object now when we have just started a new one ourselves
                    406:        */
                    407:        return HT_ERROR;
                    408:     }
                    409:     return HT_OK;
2.32      kahan     410: }
                    411: 
                    412: /*
                    413: **     Client side authentication info AFTER filter
                    414: **     ---------------------------------------
                    415: */
                    416: PUBLIC int HTAuthInfoFilter (HTRequest * request, HTResponse * response,
                    417:                             void * param, int status)
                    418: {
                    419:     /*
                    420:     ** Ask the authentication module to call the right authentication info
                    421:     ** parser
                    422:     */
                    423:     if (! HTResponse_challenge (response))
                    424:       return HT_OK;
                    425:     else if (HTAA_updateFilter(request, response, param, status) == HT_OK) 
                    426:       return HT_OK;
                    427:     else
                    428:       return HT_ERROR;
2.1       frystyk   429: }
                    430: 
                    431: /*
2.24      frystyk   432: **     Request Logging AFTER filter
                    433: **     ----------------------------
2.1       frystyk   434: **     Default Logging filter using the log manager provided by HTLog.c
                    435: */
2.15      frystyk   436: PUBLIC int HTLogFilter (HTRequest * request, HTResponse * response,
                    437:                        void * param, int status)
2.1       frystyk   438: {
                    439:     if (request) {
2.23      frystyk   440:        HTLog * log = (HTLog *) param;
                    441:        if (log) HTLog_addCLF(log, request, status);
2.24      frystyk   442:        return HT_OK;
                    443:     }
                    444:     return HT_ERROR;
                    445: }
                    446: 
                    447: /*
                    448: **     Request Referer AFTER filter
                    449: **     ----------------------------
                    450: **     Default Referer Log filter using the log manager provided by HTLog.c
                    451: */
                    452: PUBLIC int HTRefererFilter (HTRequest * request, HTResponse * response,
                    453:                            void * param, int status)
                    454: {
                    455:     if (request) {
                    456:        HTLog * log = (HTLog *) param;
                    457:        if (log) HTLog_addReferer(log, request, status);
2.1       frystyk   458:        return HT_OK;
                    459:     }
                    460:     return HT_ERROR;
                    461: }

Webmaster