Annotation of libwww/Library/src/HTFilter.c, revision 2.37

2.1       frystyk     1: /*
                      2: **     BEFORE AND AFTER FILTERS
                      3: **
                      4: **     (c) COPYRIGHT MIT 1995.
                      5: **     Please first read the full copyright statement in the file COPYRIGH.
2.37    ! frystyk     6: **     @(#) $Id: HTFilter.c,v 2.36 1999/03/08 16:53:40 frystyk Exp $
2.1       frystyk     7: **
                      8: **     This module implrments a set of default filters that can be registerd
                      9: **     as BEFORE and AFTER filters to the Net manager
                     10: ** Authors
                     11: **     HFN     Henrik Frystyk, frystyk@w.org
                     12: ** History
                     13: **     Jul 4, 96       Written
                     14: */
                     15: 
                     16: /* Library include files */
                     17: #include "WWWLib.h"
                     18: #include "WWWCache.h"
                     19: #include "WWWHTTP.h"
                     20: #include "HTLog.h"
                     21: #include "HTAccess.h"
2.10      frystyk    22: #include "HTProxy.h"
                     23: #include "HTRules.h"
2.1       frystyk    24: #include "HTFilter.h"                                   /* Implemented here */
                     25: 
                     26: /* ------------------------------------------------------------------------- */
                     27: 
                     28: /*
                     29: **     Proxy and Gateway BEFORE filter
                     30: **     -------------------------------
                     31: **     Checks for registerd proxy servers or gateways and sees whether this
                     32: **     request should be redirected to a proxy or a gateway. Proxies have
                     33: **     higher priority than gateways so we look for them first!
                     34: **     For HTTP/1.0 and HTTP/1.1 we may only send a full URL (including the
                     35: **     host portion) to proxy servers. Therefore, we tell the Library whether
                     36: **     to use the full URL or the traditional HTTP one without the host part.
                     37: */
2.15      frystyk    38: PUBLIC int HTProxyFilter (HTRequest * request, void * param, int mode)
2.1       frystyk    39: {
                     40:     HTParentAnchor * anchor = HTRequest_anchor(request);
2.2       frystyk    41:     char * addr = HTAnchor_physical(anchor);
2.1       frystyk    42:     char * physical = NULL;
                     43:     if ((physical = HTProxy_find(addr))) {
2.6       frystyk    44:        HTRequest_setFullURI(request, YES);                       /* For now */
2.5       frystyk    45:        HTRequest_setProxy(request, physical);
2.8       frystyk    46:        HT_FREE(physical);
2.6       frystyk    47: #if 0
                     48:        /* Don't paste the URLs together anymore */
2.1       frystyk    49:        StrAllocCat(physical, addr);
2.5       frystyk    50:        HTAnchor_setPhysical(anchor, physical); 
2.6       frystyk    51: #endif
2.1       frystyk    52:     } else if ((physical = HTGateway_find(addr))) {
                     53:        /* 
                     54:        ** A gateway URL is crated by chopping off any leading "/" to make the
                     55:        ** host into part of path
                     56:        */
                     57:        char * path =
                     58:            HTParse(addr, "", PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
2.26      frystyk    59:        char * gatewayed = HTParse((*path=='/') ? path+1 : path, physical, PARSE_ALL);
2.1       frystyk    60:        HTAnchor_setPhysical(anchor, gatewayed);
                     61:        HT_FREE(path);
                     62:        HT_FREE(gatewayed);
                     63:        HTRequest_setFullURI(request, NO);
2.6       frystyk    64:        HTRequest_deleteProxy(request);
2.1       frystyk    65:     } else {
2.6       frystyk    66:        HTRequest_setFullURI(request, NO);                        /* For now */
                     67:        HTRequest_deleteProxy(request);
2.1       frystyk    68:     }
                     69:     return HT_OK;
                     70: }
                     71: 
                     72: /*
                     73: **     Rule Translation BEFORE Filter
                     74: **     ------------------------------
                     75: **     If we have a set of rules loaded (see the Rule manager) then check
                     76: **     before each request whether how that should be translated. The trick
                     77: **     is that a parent anchor has a "address" which is the part from the URL
                     78: **     we used when we created the anchor. However, it also have a "physical
                     79: **     address" which is the place we are actually going to look for the
2.2       frystyk    80: **     resource. Hence this filter translates the physical address
                     81: **     (if any translations are found)
2.1       frystyk    82: */
2.15      frystyk    83: PUBLIC int HTRuleFilter (HTRequest * request, void * param, int mode)
2.1       frystyk    84: {
                     85:     HTList * list = HTRule_global();
                     86:     HTParentAnchor * anchor = HTRequest_anchor(request);
2.2       frystyk    87:     char * addr = HTAnchor_physical(anchor);
2.1       frystyk    88:     char * physical = HTRule_translate(list, addr, NO);
                     89:     if (!physical) {
                     90:        HTRequest_addError(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
                     91:                           NULL, 0, "HTRuleFilter");
                     92:        return HT_ERROR;
                     93:     }
                     94:     HTAnchor_setPhysical(anchor, physical);
                     95:     HT_FREE(physical);
                     96:     return HT_OK;
                     97: }
                     98: 
                     99: /*
2.16      frystyk   100: **     A small BEFORE filter that just finds a cache entry unconditionally
                    101: **     and loads the entry. All freshness and any other constraints are 
                    102: **     ignored.
                    103: */
                    104: PUBLIC int HTCacheLoadFilter (HTRequest * request, void * param, int mode)
                    105: {
                    106:     HTParentAnchor * anchor = HTRequest_anchor(request);
                    107:     HTCache * cache = HTCache_find(anchor);
                    108:     if (cache) {
                    109:        char * name = HTCache_name(cache);
                    110:        HTAnchor_setPhysical(anchor, name);
                    111:        HTCache_addHit(cache);
                    112:        HT_FREE(name);
                    113: 
                    114:        /*
                    115:        **  Start request directly from the cache. As with the redirection
                    116:        **  filter we reuse the same request object which means that we must
                    117:        **  keep this around until the cache load request has terminated
                    118:        */
                    119:        {
                    120:            HTLoad(request, NO);
                    121:            return HT_ERROR;
                    122:        }
                    123:     }
                    124:     return HT_OK;
                    125: }
                    126: 
                    127: /*
2.15      frystyk   128: **     Check the Memory Cache (History list) BEFORE filter
                    129: **     ---------------------------------------------------
2.11      frystyk   130: **     Check if document is already loaded. The user can define whether
                    131: **     the history list should follow normal expiration or work as a
                    132: **     traditional history list where expired documents are not updated.
                    133: **     We don't check for anything but existence proof of a document
                    134: **     associated with the anchor as the definition is left to the application
                    135: */
2.15      frystyk   136: PUBLIC int HTMemoryCacheFilter (HTRequest * request, void * param, int mode)
2.11      frystyk   137: {
                    138:     HTReload validation = HTRequest_reloadMode(request);
                    139:     HTParentAnchor * anchor = HTRequest_anchor(request);
                    140:     void * document = HTAnchor_document(anchor);
2.1       frystyk   141: 
2.11      frystyk   142:     /*
2.14      frystyk   143:     **  We only check the memory cache if it's a GET method
                    144:     */
                    145:     if (HTRequest_method(request) != METHOD_GET) {
2.35      frystyk   146:        HTTRACE(CACHE_TRACE, "Mem Cache... We only check GET methods\n");
2.14      frystyk   147:        return HT_OK;
                    148:     }
                    149: 
                    150:     /*
2.11      frystyk   151:     **  If we are asked to flush the persistent cache then there is no reason
                    152:     **  to do anything here - we're flushing it anyway. Also if no document
                    153:     **  then just exit from this filter.
                    154:     */
                    155:     if (!document || validation > HT_CACHE_FLUSH_MEM) {
2.35      frystyk   156:        HTTRACE(CACHE_TRACE, "Mem Cache... No fresh document...\n");
2.11      frystyk   157:        return HT_OK;
                    158:     }
2.1       frystyk   159: 
2.11      frystyk   160:     /*
                    161:     **  If we have a document object associated with this anchor then we also
                    162:     **  have the object in the history list. Depending on what the user asked,
                    163:     **  we can add a cache validator
                    164:     */
2.14      frystyk   165:     if (document && validation != HT_CACHE_FLUSH_MEM) {
2.35      frystyk   166:        HTTRACE(CACHE_TRACE, "Mem Cache... Document already in memory\n");
2.14      frystyk   167:        return HT_LOADED;
2.15      frystyk   168:     }
2.17      frystyk   169:     return HT_OK;
2.15      frystyk   170: }
                    171: 
                    172: /*
2.1       frystyk   173: **     Error and Information AFTER filter
                    174: **     ----------------------------------
                    175: **     It checks the status code from a request and generates an 
                    176: **     error/information message if required.
                    177: */
2.15      frystyk   178: PUBLIC int HTInfoFilter (HTRequest * request, HTResponse * response,
                    179:                         void * param, int status)
2.1       frystyk   180: {
                    181:     HTParentAnchor * anchor = HTRequest_anchor(request);
                    182:     char * uri = HTAnchor_address((HTAnchor*) anchor);
                    183:     switch (status) {
2.28      frystyk   184:     case HT_RETRY: {
                    185:         HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
                    186:        if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
                    187:                        HTRequest_error(request), NULL);
2.35      frystyk   188:        HTTRACE(PROT_TRACE, "Load End.... NOT AVAILABLE, RETRY AT %ld\n" _ 
2.15      frystyk   189:                    HTResponse_retryTime(response));
2.28      frystyk   190:         }
                    191:         break;
2.1       frystyk   192: 
2.7       frystyk   193:     case HT_NO_DATA:
                    194:     {
                    195:        /*
                    196:        ** The document was empty
                    197:        */
                    198:        HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
                    199:        if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
                    200:                        HTRequest_error(request), NULL);
2.35      frystyk   201:        HTTRACE(PROT_TRACE, "Load End.... EMPTY: No content `%s\'\n" _ 
2.7       frystyk   202:                    uri ? uri : "<UNKNOWN>");
                    203:        break;
                    204:     }    
2.3       frystyk   205: 
                    206:     case HT_LOADED:
2.35      frystyk   207:        HTTRACE(PROT_TRACE, "Load End.... OK: `%s\'\n" _ uri);
2.16      frystyk   208:        break;
                    209: 
                    210:     default:
2.3       frystyk   211:     {
                    212:        /*
2.16      frystyk   213:        ** See if we have a function registered for outputting errors.
                    214:        ** If so then call it and present the message to the user
2.3       frystyk   215:        */
                    216:        HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
                    217:        if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
                    218:                        HTRequest_error(request), NULL);
2.35      frystyk   219:        HTTRACE(PROT_TRACE, "Load End.... Request ended with code %d\n" _ status);
2.1       frystyk   220:        break;
                    221:     }
2.16      frystyk   222:     }
2.1       frystyk   223:     HT_FREE(uri);
                    224:     return HT_OK;
                    225: }
                    226: 
                    227: /*
                    228: **     Redirection AFTER filter
                    229: **     ------------------------
                    230: **     The redirection handler only handles redirections
                    231: **     on the GET or HEAD method (or any other safe method)
                    232: */
2.15      frystyk   233: PUBLIC int HTRedirectFilter (HTRequest * request, HTResponse * response,
                    234:                             void * param, int status)
2.1       frystyk   235: {
                    236:     HTMethod method = HTRequest_method(request); 
2.15      frystyk   237:     HTAnchor * new_anchor = HTResponse_redirection(response); 
2.37    ! frystyk   238: 
        !           239:     /* Check for destination */
2.7       frystyk   240:     if (!new_anchor) {
2.35      frystyk   241:        HTTRACE(PROT_TRACE, "Redirection. No destination\n");
2.7       frystyk   242:        return HT_OK;
                    243:     }
                    244: 
2.1       frystyk   245:     /*
2.21      frystyk   246:     ** Only do automatic redirect on GET and HEAD. Ask for all
2.27      frystyk   247:     ** other methods.
2.1       frystyk   248:     */
2.21      frystyk   249:     if (!HTMethod_isSafe(method)) {
                    250: 
                    251:        /*
                    252:        ** If we got a 303 See Other then change the method to GET.
                    253:        ** Otherwise ask the user whether we should continue.
                    254:        */
                    255:        if (status == HT_SEE_OTHER) {
2.35      frystyk   256:            HTTRACE(PROT_TRACE, "Redirection. Changing method from %s to GET\n" _ 
2.21      frystyk   257:                        HTMethod_name(method));
                    258:            HTRequest_setMethod(request, METHOD_GET);
                    259:        } else {
                    260:            HTAlertCallback * prompt = HTAlert_find(HT_A_CONFIRM);
                    261:            if (prompt) {
                    262:                if ((*prompt)(request, HT_A_CONFIRM, HT_MSG_REDIRECTION,
                    263:                              NULL, NULL, NULL) != YES)
2.27      frystyk   264:                    return HT_OK;
2.21      frystyk   265:            }
2.4       frystyk   266:        }
2.1       frystyk   267:     } 
2.36      frystyk   268: 
                    269:     /* Register the redirection as a link relationship */
                    270:     {
                    271:        HTLinkType ltype = status==HT_PERM_REDIRECT ? HT_LR_PERM_REDIRECT :
                    272:            (status==HT_TEMP_REDIRECT || status==HT_FOUND) ? HT_LR_TEMP_REDIRECT :
                    273:            status==HT_SEE_OTHER ? HT_LR_SEE_OTHER : NULL;
                    274:        if (ltype) {
                    275:            HTLink_add((HTAnchor *) HTRequest_anchor(request), new_anchor, 
                    276:                       ltype, method);
                    277:        }
                    278:     }
                    279: 
2.34      frystyk   280:     /* Delete any auth credendials as they get regenerated */
                    281:     HTRequest_deleteCredentialsAll(request);
                    282: 
2.1       frystyk   283:     /*
                    284:     **  Start new request with the redirect anchor found in the headers.
                    285:     ** Note that we reuse the same request object which means that we must
                    286:     **  keep this around until the redirected request has terminated. It also
                    287:     **  allows us in an easy way to keep track of the number of redirections
                    288:     ** so that we can detect endless loops.
                    289:     */ 
2.4       frystyk   290:     if (HTRequest_doRetry(request)) { 
2.1       frystyk   291:        HTLoadAnchor(new_anchor, request);
2.9       frystyk   292:     } else {
                    293:        HTRequest_addError(request, ERR_FATAL, NO, HTERR_MAX_REDIRECT,
                    294:                           NULL, 0, "HTRedirectFilter");
2.37    ! frystyk   295:        return HT_OK;           /* Wanna fall through */
2.9       frystyk   296:     }
                    297: 
                    298:     /*
                    299:     **  By returning HT_ERROR we make sure that this is the last handler to be
                    300:     **  called. We do this as we don't want any other filter to delete the 
                    301:     **  request object now when we have just started a new one ourselves
                    302:     */
                    303:     return HT_ERROR;
                    304: } 
                    305: 
                    306: /*
2.15      frystyk   307: **     Retry through Proxy AFTER Filter
                    308: **     --------------------------------
2.9       frystyk   309: **     This filter handles a 305 Use Proxy response and retries the request
                    310: **     through the proxy
                    311: */
2.15      frystyk   312: PUBLIC int HTUseProxyFilter (HTRequest * request, HTResponse * response,
                    313:                             void * param, int status)
2.9       frystyk   314: {
2.20      frystyk   315:     HTAlertCallback * cbf = HTAlert_find(HT_A_CONFIRM);
2.15      frystyk   316:     HTAnchor * proxy_anchor = HTResponse_redirection(response); 
2.9       frystyk   317:     if (!proxy_anchor) {
2.35      frystyk   318:        HTTRACE(PROT_TRACE, "Use Proxy... No proxy location\n");
2.9       frystyk   319:        return HT_OK;
                    320:     }
                    321: 
                    322:     /*
                    323:     **  Add the proxy to the list. Assume HTTP access method only!
2.20      frystyk   324:     **  Because evil servers may rediret the client to an untrusted
                    325:     **  proxy, we can only accept redirects for this particular
                    326:     **  server. Also, we do not know whether this is for HTTP or all
                    327:     **  other requests as well
2.9       frystyk   328:     */
2.20      frystyk   329:     if ((cbf && (*cbf)(request, HT_A_CONFIRM, HT_MSG_PROXY, NULL,NULL,NULL))) {
2.9       frystyk   330:        char * addr = HTAnchor_address(proxy_anchor);
                    331:        HTProxy_add("http", addr);
                    332:        HT_FREE(addr);
                    333:  
2.20      frystyk   334:        /*
                    335:        **  Start new request through the proxy if we haven't reached the max
                    336:        **  number of redirections for this request
                    337:        */ 
                    338:        if (HTRequest_doRetry(request)) { 
                    339:            HTLoadAnchor(proxy_anchor, request);
                    340:        } else {
                    341:            HTRequest_addError(request, ERR_FATAL, NO, HTERR_MAX_REDIRECT,
                    342:                               NULL, 0, "HTRedirectFilter");
                    343:        }
                    344: 
                    345:        /*
                    346:        **  By returning HT_ERROR we make sure that this is the last handler to be
                    347:        **  called. We do this as we don't want any other filter to delete the 
                    348:        **  request object now when we have just started a new one ourselves
                    349:        */
                    350:        return HT_ERROR;
                    351: 
2.1       frystyk   352:     } else {
2.20      frystyk   353:        HTRequest_addError(request, ERR_FATAL, NO, HTERR_NO_AUTO_PROXY,
                    354:                           NULL, 0, "HTUseProxyFilter");
                    355:        return HT_OK;
2.1       frystyk   356:     }
                    357: } 
                    358: 
                    359: /*
                    360: **     Client side authentication BEFORE filter
                    361: **     ----------------------------------------
                    362: **     The filter generates the credentials required to access a document
                    363: **     Getting the credentials may involve asking the user
                    364: */
2.15      frystyk   365: PUBLIC int HTCredentialsFilter (HTRequest * request, void * param, int mode)
2.1       frystyk   366: {
                    367:     /*
                    368:     ** Ask the authentication module to call the right credentials generator
                    369:     ** that understands this scheme
                    370:     */
2.15      frystyk   371:     if (HTAA_beforeFilter(request, param, mode) == HT_OK) {
2.35      frystyk   372:        HTTRACE(PROT_TRACE, "Credentials. verified\n");
2.1       frystyk   373:        return HT_OK;
                    374:     } else {
                    375:        HTRequest_addError(request, ERR_FATAL, NO, HTERR_UNAUTHORIZED,
                    376:                           NULL, 0, "HTCredentialsFilter");
                    377:        return HT_ERROR;
                    378:     }
                    379: }
                    380: 
                    381: /*
                    382: **     Client side authentication AFTER filter
                    383: **     ---------------------------------------
                    384: **     The client side authentication filter uses the 
                    385: **     user dialog messages registered in the HTAlert module.
                    386: **     By default these are the ones used by the line mode browser but you can
                    387: **     just register something else.
                    388: */
2.15      frystyk   389: PUBLIC int HTAuthFilter (HTRequest * request, HTResponse * response,
                    390:                         void * param, int status)
2.1       frystyk   391: {
                    392:     /*
                    393:     ** Ask the authentication module to call the right challenge parser
                    394:     ** that understands this scheme
                    395:     */
2.15      frystyk   396:     if (HTAA_afterFilter(request, response, param, status) == HT_OK) {
2.1       frystyk   397: 
                    398:        /*
                    399:        ** Start request with new credentials. As with the redirection filter
                    400:        ** we reuse the same request object which means that we must
                    401:        ** keep this around until the redirected request has terminated
                    402:        */
                    403:        HTLoad(request, NO);
                    404: 
                    405:        /*
                    406:        **  We return HT_ERROR to make sure that this is the last handler to be
                    407:        **  called. We do this as we don't want any other filter to delete the 
                    408:        **  request object now when we have just started a new one ourselves
                    409:        */
                    410:        return HT_ERROR;
                    411:     }
                    412:     return HT_OK;
2.32      kahan     413: }
                    414: 
                    415: /*
                    416: **     Client side authentication info AFTER filter
                    417: **     ---------------------------------------
                    418: */
                    419: PUBLIC int HTAuthInfoFilter (HTRequest * request, HTResponse * response,
                    420:                             void * param, int status)
                    421: {
                    422:     /*
                    423:     ** Ask the authentication module to call the right authentication info
                    424:     ** parser
                    425:     */
                    426:     if (! HTResponse_challenge (response))
                    427:       return HT_OK;
                    428:     else if (HTAA_updateFilter(request, response, param, status) == HT_OK) 
                    429:       return HT_OK;
                    430:     else
                    431:       return HT_ERROR;
2.1       frystyk   432: }
                    433: 
                    434: /*
2.24      frystyk   435: **     Request Logging AFTER filter
                    436: **     ----------------------------
2.1       frystyk   437: **     Default Logging filter using the log manager provided by HTLog.c
                    438: */
2.15      frystyk   439: PUBLIC int HTLogFilter (HTRequest * request, HTResponse * response,
                    440:                        void * param, int status)
2.1       frystyk   441: {
                    442:     if (request) {
2.23      frystyk   443:        HTLog * log = (HTLog *) param;
                    444:        if (log) HTLog_addCLF(log, request, status);
2.24      frystyk   445:        return HT_OK;
                    446:     }
                    447:     return HT_ERROR;
                    448: }
                    449: 
                    450: /*
                    451: **     Request Referer AFTER filter
                    452: **     ----------------------------
                    453: **     Default Referer Log filter using the log manager provided by HTLog.c
                    454: */
                    455: PUBLIC int HTRefererFilter (HTRequest * request, HTResponse * response,
                    456:                            void * param, int status)
                    457: {
                    458:     if (request) {
                    459:        HTLog * log = (HTLog *) param;
                    460:        if (log) HTLog_addReferer(log, request, status);
2.1       frystyk   461:        return HT_OK;
                    462:     }
                    463:     return HT_ERROR;
                    464: }

Webmaster