Annotation of libwww/Library/src/HTFilter.c, revision 2.9

2.1       frystyk     1: /*
                      2: **     BEFORE AND AFTER FILTERS
                      3: **
                      4: **     (c) COPYRIGHT MIT 1995.
                      5: **     Please first read the full copyright statement in the file COPYRIGH.
2.9     ! frystyk     6: **     @(#) $Id: HTFilter.c,v 2.8 1996/08/13 02:17:15 frystyk Exp $
2.1       frystyk     7: **
                      8: **     This module implrments a set of default filters that can be registerd
                      9: **     as BEFORE and AFTER filters to the Net manager
                     10: ** Authors
                     11: **     HFN     Henrik Frystyk, frystyk@w.org
                     12: ** History
                     13: **     Jul 4, 96       Written
                     14: */
                     15: 
                     16: /* Library include files */
                     17: #include "WWWLib.h"
                     18: #include "WWWCache.h"
                     19: #include "WWWRules.h"
                     20: #include "WWWHTTP.h"
                     21: #include "HTLog.h"
                     22: #include "HTAccess.h"
                     23: #include "HTFilter.h"                                   /* Implemented here */
                     24: 
                     25: /* ------------------------------------------------------------------------- */
                     26: 
                     27: /*
                     28: **     Proxy and Gateway BEFORE filter
                     29: **     -------------------------------
                     30: **     Checks for registerd proxy servers or gateways and sees whether this
                     31: **     request should be redirected to a proxy or a gateway. Proxies have
                     32: **     higher priority than gateways so we look for them first!
                     33: **     For HTTP/1.0 and HTTP/1.1 we may only send a full URL (including the
                     34: **     host portion) to proxy servers. Therefore, we tell the Library whether
                     35: **     to use the full URL or the traditional HTTP one without the host part.
                     36: */
                     37: PUBLIC int HTProxyFilter (HTRequest * request, void * param, int status)
                     38: {
                     39:     HTParentAnchor * anchor = HTRequest_anchor(request);
2.2       frystyk    40:     char * addr = HTAnchor_physical(anchor);
2.1       frystyk    41:     char * physical = NULL;
                     42:     if ((physical = HTProxy_find(addr))) {
2.6       frystyk    43:        HTRequest_setFullURI(request, YES);                       /* For now */
2.5       frystyk    44:        HTRequest_setProxy(request, physical);
2.8       frystyk    45:        HT_FREE(physical);
2.6       frystyk    46: #if 0
                     47:        /* Don't paste the URLs together anymore */
2.1       frystyk    48:        StrAllocCat(physical, addr);
2.5       frystyk    49:        HTAnchor_setPhysical(anchor, physical); 
2.6       frystyk    50: #endif
2.1       frystyk    51:     } else if ((physical = HTGateway_find(addr))) {
                     52:        /* 
                     53:        ** A gateway URL is crated by chopping off any leading "/" to make the
                     54:        ** host into part of path
                     55:        */
                     56:        char * path =
                     57:            HTParse(addr, "", PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
                     58:        char * gatewayed = HTParse(path+1, physical, PARSE_ALL);
                     59:        HTAnchor_setPhysical(anchor, gatewayed);
                     60:        HT_FREE(path);
                     61:        HT_FREE(gatewayed);
                     62:        HTRequest_setFullURI(request, NO);
2.6       frystyk    63:        HTRequest_deleteProxy(request);
2.1       frystyk    64:     } else {
2.6       frystyk    65:        HTRequest_setFullURI(request, NO);                        /* For now */
                     66:        HTRequest_deleteProxy(request);
2.1       frystyk    67:     }
                     68:     return HT_OK;
                     69: }
                     70: 
                     71: /*
                     72: **     Rule Translation BEFORE Filter
                     73: **     ------------------------------
                     74: **     If we have a set of rules loaded (see the Rule manager) then check
                     75: **     before each request whether how that should be translated. The trick
                     76: **     is that a parent anchor has a "address" which is the part from the URL
                     77: **     we used when we created the anchor. However, it also have a "physical
                     78: **     address" which is the place we are actually going to look for the
2.2       frystyk    79: **     resource. Hence this filter translates the physical address
                     80: **     (if any translations are found)
2.1       frystyk    81: */
                     82: PUBLIC int HTRuleFilter (HTRequest * request, void * param, int status)
                     83: {
                     84:     HTList * list = HTRule_global();
                     85:     HTParentAnchor * anchor = HTRequest_anchor(request);
2.2       frystyk    86:     char * addr = HTAnchor_physical(anchor);
2.1       frystyk    87:     char * physical = HTRule_translate(list, addr, NO);
                     88:     if (!physical) {
                     89:        HTRequest_addError(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
                     90:                           NULL, 0, "HTRuleFilter");
                     91:        return HT_ERROR;
                     92:     }
                     93:     HTAnchor_setPhysical(anchor, physical);
                     94:     HT_FREE(physical);
                     95:     return HT_OK;
                     96: }
                     97: 
                     98: /*
                     99: **     Cache Validation BEFORE Filter
                    100: **     ------------------------------
                    101: **     Check the cache mode to see if we can use an already loaded version
                    102: **     of this document. If so and our copy is valid then we don't have
                    103: **     to go out and get it unless we are forced to
2.3       frystyk   104: **     We only check the cache in caseof a GET request. Otherwise, we go
                    105: **     directly to the source.
2.1       frystyk   106: */
                    107: PUBLIC int HTCacheFilter (HTRequest * request, void * param, int status)
                    108: {
                    109:     HTParentAnchor * anchor = HTRequest_anchor(request);
                    110:     HTReload mode = HTRequest_reloadMode(request);
2.3       frystyk   111:     HTMethod method = HTRequest_method(request);
                    112: 
                    113:     /*
                    114:     ** Check the method of the request
                    115:     */
                    116:     if (method != METHOD_GET) {
                    117:        if (CACHE_TRACE) HTTrace("Cachefilter. We only check GET methods\n");
                    118:        return HT_OK;
                    119:     }
                    120: 
2.1       frystyk   121:     /*
                    122:     ** If the mode if "Force Reload" then don't even bother to check the
                    123:     ** cache - we flush everything we know about this document
                    124:     */
                    125:     if (mode == HT_FORCE_RELOAD) {
                    126:        /*
                    127:        ** Add the appropriate request headers. We use both the "pragma"
                    128:        ** and the "cache-control" headers in order to be
                    129:        ** backwards compatible with HTP/1.0
                    130:        */
                    131:        HTRequest_addGnHd(request, HT_G_PRAGMA_NO_CACHE);
                    132: 
                    133:        /* @@@ CACHE CONTROL @@@ */
                    134: 
                    135:        /*
                    136:        ** We also flush the information in the anchor
                    137:        */
                    138:        HTAnchor_clearHeader(anchor);
                    139:        return HT_OK;
                    140:     }
                    141: 
                    142:     /*
                    143:     ** Check the application provided memory cache. This is equivalent to a
                    144:     ** history list and does not follow the same cache mechanisms as the 
                    145:     ** persistent cache
                    146:     */
                    147:     if (HTMemoryCache_check(request) == HT_LOADED)
                    148:        return HT_LOADED;
                    149:     
                    150:     /*
                    151:     ** Check the persistent cache manager. If we have a cache hit then
                    152:     ** continue to see if the reload mode requires us to do a validation check.
                    153:     ** This filter assumes that we can get the cached version through one of
                    154:     ** our protocol modules (for example the file module)
                    155:     */
                    156:     {
                    157:        char * addr = HTAnchor_address((HTAnchor *) anchor);
                    158:        char * cache = HTCache_getReference(addr);
                    159:        if (cache) {
                    160:            if (mode != HT_CACHE_REFRESH) {
                    161:                HTAnchor_setPhysical(anchor, cache);
                    162:                HTAnchor_setCacheHit(anchor, YES);
                    163:            } else {
                    164: 
                    165:                /* @@@ Do cache validation @@@ */
                    166: 
                    167:            }
                    168:        }
                    169:        HT_FREE(addr);
                    170:     }
                    171:     return HT_OK;
                    172: }
                    173: 
                    174: /*
                    175: **     Error and Information AFTER filter
                    176: **     ----------------------------------
                    177: **     It checks the status code from a request and generates an 
                    178: **     error/information message if required.
                    179: */
                    180: PUBLIC int HTInfoFilter (HTRequest * request, void * param, int status)
                    181: {
                    182:     HTParentAnchor * anchor = HTRequest_anchor(request);
                    183:     char * uri = HTAnchor_address((HTAnchor*) anchor);
                    184:     switch (status) {
                    185:     case HT_RETRY:
                    186:        if (PROT_TRACE)
                    187:            HTTrace("Load End.... NOT AVAILABLE, RETRY AT %ld\n",
                    188:                    HTRequest_retryTime(request));
                    189:        break;
                    190: 
                    191:     case HT_ERROR:
                    192:     {
                    193:        /*
                    194:        ** See if we have a function registered for outputting errors.
                    195:        ** If so then call it and present the message to the user
                    196:        */
                    197:        HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
                    198:        if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
                    199:                        HTRequest_error(request), NULL);
                    200:        if (PROT_TRACE)
                    201:            HTTrace("Load End.... ERROR: Can't access `%s\'\n",
                    202:                    uri ? uri : "<UNKNOWN>");
2.3       frystyk   203:        break;
2.7       frystyk   204:     }    
                    205: 
                    206:     case HT_NO_DATA:
                    207:     {
                    208:        /*
                    209:        ** The document was empty
                    210:        */
                    211:        HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
                    212:        if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
                    213:                        HTRequest_error(request), NULL);
                    214:        if (PROT_TRACE)
                    215:            HTTrace("Load End.... EMPTY: No content `%s\'\n",
                    216:                    uri ? uri : "<UNKNOWN>");
                    217:        break;
                    218:     }    
2.3       frystyk   219: 
                    220:     case HT_LOADED:
                    221:     {
                    222:        /*
                    223:        ** Even though we have received a loaded status the thing we have
                    224:        ** loaded successfully may in fact be an error message. We therefore
                    225:        ** look at the error stack to see what to do.
                    226:        */
                    227:        HTAlertCallback *cbf = HTAlert_find(HT_A_MESSAGE);
                    228:        if (cbf) (*cbf)(request, HT_A_MESSAGE, HT_MSG_NULL, NULL,
                    229:                        HTRequest_error(request), NULL);
                    230:        if (PROT_TRACE) HTTrace("Load End.... OK: `%s\'\n", uri);
2.1       frystyk   231:        break;
                    232:     }
                    233: 
                    234:     default:
                    235:        if (PROT_TRACE)
                    236:            HTTrace("Load End.... Request ended with code %d\n", status);
                    237:        break;
                    238:     }
                    239: 
                    240:     HT_FREE(uri);
                    241:     return HT_OK;
                    242: }
                    243: 
                    244: /*
                    245: **     Redirection AFTER filter
                    246: **     ------------------------
                    247: **     The redirection handler only handles redirections
                    248: **     on the GET or HEAD method (or any other safe method)
                    249: */
                    250: PUBLIC int HTRedirectFilter (HTRequest * request, void * param, int status)
                    251: {
                    252:     HTMethod method = HTRequest_method(request); 
                    253:     HTAnchor * new_anchor = HTRequest_redirection(request); 
2.7       frystyk   254:     if (!new_anchor) {
                    255:        if (PROT_TRACE) HTTrace("Redirection. No destination\n");
                    256:        return HT_OK;
                    257:     }
                    258: 
2.1       frystyk   259:     /*
                    260:     ** Only do redirect on GET and HEAD
                    261:     */
2.7       frystyk   262:     if (!HTMethod_isSafe(method)) { 
2.4       frystyk   263:        HTAlertCallback * prompt = HTAlert_find(HT_A_CONFIRM);
                    264:        if (prompt) {
                    265:            if ((*prompt)(request, HT_A_CONFIRM, HT_MSG_REDIRECTION,
                    266:                          NULL, NULL, NULL) != YES)
                    267:                return HT_ERROR;
                    268:        }
2.1       frystyk   269:     } 
                    270:  
                    271:     /*
                    272:     **  Start new request with the redirect anchor found in the headers.
                    273:     ** Note that we reuse the same request object which means that we must
                    274:     **  keep this around until the redirected request has terminated. It also
                    275:     **  allows us in an easy way to keep track of the number of redirections
                    276:     ** so that we can detect endless loops.
                    277:     */ 
2.4       frystyk   278:     if (HTRequest_doRetry(request)) { 
2.1       frystyk   279:        HTLoadAnchor(new_anchor, request);
2.9     ! frystyk   280:     } else {
        !           281:        HTRequest_addError(request, ERR_FATAL, NO, HTERR_MAX_REDIRECT,
        !           282:                           NULL, 0, "HTRedirectFilter");
        !           283:     }
        !           284: 
        !           285:     /*
        !           286:     **  By returning HT_ERROR we make sure that this is the last handler to be
        !           287:     **  called. We do this as we don't want any other filter to delete the 
        !           288:     **  request object now when we have just started a new one ourselves
        !           289:     */
        !           290:     return HT_ERROR;
        !           291: } 
        !           292: 
        !           293: /*
        !           294: **     Retry through Proxy Filter
        !           295: **     --------------------------
        !           296: **     This filter handles a 305 Use Proxy response and retries the request
        !           297: **     through the proxy
        !           298: */
        !           299: PUBLIC int HTUseProxyFilter (HTRequest * request, void * param, int status)
        !           300: {
        !           301:     HTAnchor * proxy_anchor = HTRequest_redirection(request); 
        !           302:     if (!proxy_anchor) {
        !           303:        if (PROT_TRACE) HTTrace("Use Proxy... No proxy location\n");
        !           304:        return HT_OK;
        !           305:     }
        !           306: 
        !           307:     /*
        !           308:     **  Add the proxy to the list. Assume HTTP access method only!
        !           309:     */
        !           310:     {
        !           311:        char * addr = HTAnchor_address(proxy_anchor);
        !           312:        HTProxy_add("http", addr);
        !           313:        HT_FREE(addr);
        !           314:     } 
        !           315:  
        !           316:     /*
        !           317:     **  Start new request through the proxy if we haven't reached the max
        !           318:     **  number of redirections for this request
        !           319:     */ 
        !           320:     if (HTRequest_doRetry(request)) { 
        !           321:        HTLoadAnchor(proxy_anchor, request);
2.1       frystyk   322:     } else {
                    323:        HTRequest_addError(request, ERR_FATAL, NO, HTERR_MAX_REDIRECT,
                    324:                           NULL, 0, "HTRedirectFilter");
                    325:     }
                    326: 
                    327:     /*
                    328:     **  By returning HT_ERROR we make sure that this is the last handler to be
                    329:     **  called. We do this as we don't want any other filter to delete the 
                    330:     **  request object now when we have just started a new one ourselves
                    331:     */
                    332:     return HT_ERROR;
                    333: } 
                    334: 
                    335: /*
                    336: **     Client side authentication BEFORE filter
                    337: **     ----------------------------------------
                    338: **     The filter generates the credentials required to access a document
                    339: **     Getting the credentials may involve asking the user
                    340: */
                    341: PUBLIC int HTCredentialsFilter (HTRequest * request, void * param, int status)
                    342: {
                    343:     /*
                    344:     ** Ask the authentication module to call the right credentials generator
                    345:     ** that understands this scheme
                    346:     */
                    347:     if (HTAA_beforeFilter(request, param, status) == HT_OK) {
                    348:        if (PROT_TRACE) HTTrace("Credentials. verified\n");
                    349:        return HT_OK;
                    350:     } else {
                    351:        HTRequest_addError(request, ERR_FATAL, NO, HTERR_UNAUTHORIZED,
                    352:                           NULL, 0, "HTCredentialsFilter");
                    353:        return HT_ERROR;
                    354:     }
                    355: }
                    356: 
                    357: /*
                    358: **     Client side authentication AFTER filter
                    359: **     ---------------------------------------
                    360: **     The client side authentication filter uses the 
                    361: **     user dialog messages registered in the HTAlert module.
                    362: **     By default these are the ones used by the line mode browser but you can
                    363: **     just register something else.
                    364: */
                    365: PUBLIC int HTAuthFilter (HTRequest * request, void * param, int status)
                    366: {
                    367:     /*
                    368:     ** Ask the authentication module to call the right challenge parser
                    369:     ** that understands this scheme
                    370:     */
                    371:     if (HTAA_afterFilter(request, param, status) == HT_OK) {
                    372: 
                    373:        /*
                    374:        ** Start request with new credentials. As with the redirection filter
                    375:        ** we reuse the same request object which means that we must
                    376:        ** keep this around until the redirected request has terminated
                    377:        */
                    378:        HTLoad(request, NO);
                    379: 
                    380:        /*
                    381:        **  We return HT_ERROR to make sure that this is the last handler to be
                    382:        **  called. We do this as we don't want any other filter to delete the 
                    383:        **  request object now when we have just started a new one ourselves
                    384:        */
                    385:        return HT_ERROR;
                    386:     }
                    387:     return HT_OK;
                    388: }
                    389: 
                    390: /*
                    391: **     Request Loggin AFTER filter
                    392: **     ---------------------------
                    393: **     Default Logging filter using the log manager provided by HTLog.c
                    394: */
                    395: PUBLIC int HTLogFilter (HTRequest * request, void * param, int status)
                    396: {
                    397:     if (request) {
                    398:        if (HTLog_isOpen()) HTLog_add(request, status);
                    399:        return HT_OK;
                    400:     }
                    401:     return HT_ERROR;
                    402: }

Webmaster