Annotation of libwww/Library/src/HTProxy.c, revision 2.18
2.1 frystyk 1: /* HTProxy.c
2: ** GATEWAY AND PROXY MANAGER
3: **
4: ** (c) COPYRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
2.18 ! frystyk 6: ** @(#) $Id: HTProxy.c,v 2.17 1998/07/22 19:23:54 frystyk Exp $
2.1 frystyk 7: **
8: ** Replaces the old env variables for gateways and proxies. However for
9: ** backward compatibility there is a function that reads the env variables
10: ** at start up. Note that there is a difference between a proxy and a
11: ** gateway!
12: **
13: ** Authors
14: ** HF Henrik Frystyk, frystyk@w3.org
15: ** History
16: ** 4 Jun 95 Written on a rainy day
17: */
18:
19: #if !defined(HT_DIRECT_WAIS) && !defined(HT_DEFAULT_WAIS_GATEWAY)
20: #define HT_DEFAULT_WAIS_GATEWAY "http://www.w3.org:8001/"
21: #endif
22:
23: /* Library include files */
2.15 frystyk 24: #include "wwwsys.h"
2.11 frystyk 25: #include "WWWUtil.h"
26: #include "WWWCore.h"
27: #include "WWWHTTP.h"
2.12 frystyk 28: #include "WWWApp.h"
2.1 frystyk 29: #include "HTProxy.h" /* Implemented here */
30:
31: /* Variables and typedefs local to this module */
32:
33: typedef struct _HTProxy {
34: char * access;
35: char * url; /* URL of Gateway or Proxy */
2.17 frystyk 36: #ifdef HT_POSIX_REGEX
37: regex_t * regex; /* Compiled regex */
38: #endif
2.1 frystyk 39: } HTProxy;
40:
41: typedef struct _HTHostlist {
42: char * access;
2.17 frystyk 43: char * host; /* Host or domain name */
2.1 frystyk 44: unsigned port;
2.17 frystyk 45: #ifdef HT_POSIX_REGEX
46: regex_t * regex; /* Compiled regex */
47: #endif
2.1 frystyk 48: } HTHostList;
49:
50: PRIVATE HTList * proxies = NULL; /* List of proxy servers */
51: PRIVATE HTList * gateways = NULL; /* List of gateways */
52: PRIVATE HTList * noproxy = NULL; /* Don't proxy on these hosts and domains */
53:
54: #if 0
55: PRIVATE HTList * onlyproxy = NULL; /* Proxy only on these hosts and domains */
56: #endif
57:
58: /* ------------------------------------------------------------------------- */
59:
2.17 frystyk 60: #ifdef HT_POSIX_REGEX
61: PRIVATE char * get_regex_error (int errcode, regex_t * compiled)
62: {
63: size_t length = regerror (errcode, compiled, NULL, 0);
64: char * str = NULL;
65: if ((str = (char *) HT_MALLOC(length+1)) == NULL)
66: HT_OUTOFMEM("get_regex_error");
67: (void) regerror (errcode, compiled, str, length);
68: return str;
69: }
70:
71: PRIVATE regex_t * get_regex_t (const char * regex_str, int cflags)
72: {
73: regex_t * regex = NULL;
74: if (regex_str && *regex_str) {
75: int status;
76: if ((regex = (regex_t *) HT_CALLOC(1, sizeof(regex_t))) == NULL)
77: HT_OUTOFMEM("get_regex_t");
78: if ((status = regcomp(regex, regex_str, cflags))) {
79: char * err_msg = get_regex_error(status, regex);
2.18 ! frystyk 80: HTTRACE(PROT_TRACE, "HTProxy..... Regular expression error: %s\n" _ err_msg);
2.17 frystyk 81: HT_FREE(err_msg);
82: HT_FREE(regex);
83: }
84: }
85: return regex;
86: }
87: #endif
88:
2.4 frystyk 89: /*
2.1 frystyk 90: ** Existing entries are replaced with new ones
91: */
2.17 frystyk 92: PRIVATE BOOL add_object (HTList * list, const char * access, const char * url,
93: BOOL regex, int regex_flags)
2.1 frystyk 94: {
95: HTProxy *me;
96: if (!list || !access || !url || !*url)
97: return NO;
2.7 frystyk 98: if ((me = (HTProxy *) HT_CALLOC(1, sizeof(HTProxy))) == NULL)
99: HT_OUTOFMEM("add_object");
2.1 frystyk 100: StrAllocCopy(me->access, access); /* Access method */
2.17 frystyk 101:
102: #ifdef HT_POSIX_REGEX
103: /*
104: ** If we support regular expressions then compile one up for
105: ** this regular expression. Otherwise use is as a normal
106: ** access scheme.
107: */
108: if (regex) {
109: me->regex = get_regex_t(access,
110: regex_flags < 0 ?
111: W3C_DEFAULT_REGEX_FLAGS : regex_flags);
112: } else
113: #endif
2.1 frystyk 114: {
115: char *ptr = me->access;
116: while ((*ptr = TOLOWER(*ptr))) ptr++;
117: }
2.17 frystyk 118:
2.1 frystyk 119: me->url = HTParse(url, "", PARSE_ACCESS+PARSE_HOST+PARSE_PUNCTUATION);
120: if (*(me->url+strlen(me->url)-1) != '/')
121: StrAllocCat(me->url, "/");
122: me->url = HTSimplify(&me->url);
123:
124: /* See if we already have this one */
125: {
126: HTList *cur = list;
127: HTProxy *pres;
128: while ((pres = (HTProxy *) HTList_nextObject(cur)) != NULL) {
129: if (!strcmp(pres->access, me->access))
130: break; /* We already have it */
131: }
132: if (pres) {
2.18 ! frystyk 133: HTTRACE(PROT_TRACE, "HTProxy..... replacing for `%s\' access %s\n" _
! 134: me->url _ me->access);
2.7 frystyk 135: HT_FREE(pres->access);
136: HT_FREE(pres->url);
2.17 frystyk 137: #ifdef HT_POSIX_REGEX
138: if (pres->regex) regfree(pres->regex);
139: #endif
2.1 frystyk 140: HTList_removeObject(list, (void *) pres);
2.7 frystyk 141: HT_FREE(pres);
2.1 frystyk 142: }
2.18 ! frystyk 143: HTTRACE(PROT_TRACE, "HTProxy..... adding for `%s\' access %s\n" _
! 144: me->url _ me->access);
2.1 frystyk 145: HTList_addObject(list, (void *) me);
146: }
147: return YES;
148: }
149:
2.4 frystyk 150: PRIVATE BOOL remove_allObjects (HTList * list)
2.1 frystyk 151: {
152: if (list) {
153: HTList *cur = list;
154: HTProxy *pres;
155: while ((pres = (HTProxy *) HTList_nextObject(cur)) != NULL) {
2.7 frystyk 156: HT_FREE(pres->access);
157: HT_FREE(pres->url);
2.17 frystyk 158: #ifdef HT_POSIX_REGEX
159: if (pres->regex) regfree(pres->regex);
160: #endif
2.7 frystyk 161: HT_FREE(pres);
2.1 frystyk 162: }
163: return YES;
164: }
165: return NO;
166: }
167:
2.4 frystyk 168: /* Add an entry to a list of host names
169: ** ------------------------------------
170: ** Existing entries are replaced with new ones
171: */
2.9 frystyk 172: PRIVATE BOOL add_hostname (HTList * list, const char * host,
2.17 frystyk 173: const char * access, unsigned port,
174: BOOL regex, int regex_flags)
2.4 frystyk 175: {
176: HTHostList *me;
177: if (!list || !host || !*host)
178: return NO;
2.7 frystyk 179: if ((me = (HTHostList *) HT_CALLOC(1, sizeof(HTHostList))) == NULL)
180: HT_OUTOFMEM("add_hostname");
2.17 frystyk 181: #ifdef HT_POSIX_REGEX
182: if (regex)
183: me->regex = get_regex_t(host,
184: regex_flags < 0 ?
185: W3C_DEFAULT_REGEX_FLAGS : regex_flags);
186: #endif
187:
2.4 frystyk 188: if (access) {
189: char *ptr;
190: StrAllocCopy(me->access, access); /* Access method */
191: ptr = me->access;
192: while ((*ptr = TOLOWER(*ptr))) ptr++;
193: }
194: StrAllocCopy(me->host, host); /* Host name */
195: {
196: char *ptr = me->host;
197: while ((*ptr = TOLOWER(*ptr))) ptr++;
198: }
199: me->port = port; /* Port number */
2.18 ! frystyk 200: HTTRACE(PROT_TRACE, "HTHostList.. adding `%s\' to list\n" _ me->host);
2.4 frystyk 201: HTList_addObject(list, (void *) me);
202: return YES;
203: }
2.1 frystyk 204:
2.4 frystyk 205: PRIVATE BOOL remove_AllHostnames (HTList * list)
206: {
207: if (list) {
208: HTList *cur = list;
209: HTHostList *pres;
210: while ((pres = (HTHostList *) HTList_nextObject(cur)) != NULL) {
2.7 frystyk 211: HT_FREE(pres->access);
212: HT_FREE(pres->host);
2.17 frystyk 213: #ifdef HT_POSIX_REGEX
214: if (pres->regex) regfree(pres->regex);
215: #endif
2.7 frystyk 216: HT_FREE(pres);
2.4 frystyk 217: }
218: return YES;
219: }
220: return NO;
221: }
222:
223: /* HTProxy_add
224: ** -----------
2.1 frystyk 225: ** Registers a proxy as the server to contact for a specific
226: ** access method. `proxy' should be a fully valid name, like
227: ** "http://proxy.w3.org:8001" but domain name is not required.
228: ** If an entry exists for this access then delete it and use the
229: ** ne one. Returns YES if OK, else NO
230: */
2.9 frystyk 231: PUBLIC BOOL HTProxy_add (const char * access, const char * proxy)
2.1 frystyk 232: {
2.11 frystyk 233: /*
234: ** If this is the first time here then also add a before filter to handle
2.12 frystyk 235: ** proxy authentication and the normal AA after filter as well.
236: ** These filters will be removed if we remove all proxies again.
2.11 frystyk 237: */
238: if (!proxies) {
239: proxies = HTList_new();
2.13 frystyk 240: HTNet_addBefore(HTAA_proxyBeforeFilter, NULL, NULL,
241: HT_FILTER_MIDDLE);
242: HTNet_addAfter(HTAuthFilter, NULL, NULL,
243: HT_NO_PROXY_ACCESS, HT_FILTER_MIDDLE);
2.14 frystyk 244: HTNet_addAfter(HTAuthFilter, NULL, NULL,
245: HT_PROXY_REAUTH, HT_FILTER_MIDDLE);
2.11 frystyk 246: }
2.17 frystyk 247: return add_object(proxies, access, proxy, NO, -1);
248: }
249:
250: /* HTProxy_addRegex
251: ** ----------------
252: ** Registers a proxy as the server to contact for any URL matching the
253: ** regular expression. `proxy' should be a fully valid name, like
254: ** "http://proxy.w3.org:8001".
255: ** If an entry exists for this access then delete it and use the
256: ** new one. Returns YES if OK, else NO
257: */
258: PUBLIC BOOL HTProxy_addRegex (const char * regex,
259: const char * proxy,
260: int regex_flags)
261: {
262: /*
263: ** If this is the first time here then also add a before filter to handle
264: ** proxy authentication and the normal AA after filter as well.
265: ** These filters will be removed if we remove all proxies again.
266: */
267: if (!proxies) {
268: proxies = HTList_new();
269: HTNet_addBefore(HTAA_proxyBeforeFilter, NULL, NULL,
270: HT_FILTER_MIDDLE);
271: HTNet_addAfter(HTAuthFilter, NULL, NULL,
272: HT_NO_PROXY_ACCESS, HT_FILTER_MIDDLE);
273: HTNet_addAfter(HTAuthFilter, NULL, NULL,
274: HT_PROXY_REAUTH, HT_FILTER_MIDDLE);
275: }
276: #ifdef HT_POSIX_REGEX
277: return add_object(proxies, regex, proxy, YES, regex_flags);
278: #else
279: return add_object(proxies, regex, proxy, NO, -1);
280: #endif
2.1 frystyk 281: }
282:
2.4 frystyk 283: /*
2.1 frystyk 284: ** Removes all registered proxies
285: */
2.4 frystyk 286: PUBLIC BOOL HTProxy_deleteAll (void)
2.1 frystyk 287: {
2.4 frystyk 288: if (remove_allObjects(proxies)) {
2.1 frystyk 289: HTList_delete(proxies);
2.11 frystyk 290:
291: /*
292: ** If we have no more proxies then there is no reason for checking
2.12 frystyk 293: ** proxy authentication. We therefore unregister the filters for
294: ** handling proxy authentication
2.11 frystyk 295: */
2.13 frystyk 296: HTNet_deleteBefore(HTAA_proxyBeforeFilter);
2.16 frystyk 297: HTNet_deleteAfter(HTAuthFilter);
2.11 frystyk 298:
2.1 frystyk 299: proxies = NULL;
300: return YES;
301: }
302: return NO;
303: }
304:
2.4 frystyk 305: /* HTGateway_add
306: ** -------------
2.1 frystyk 307: ** Registers a gateway as the server to contact for a specific
308: ** access method. `gateway' should be a fully valid name, like
309: ** "http://gateway.w3.org:8001" but domain name is not required.
310: ** If an entry exists for this access then delete it and use the
311: ** ne one. Returns YES if OK, else NO
312: */
2.9 frystyk 313: PUBLIC BOOL HTGateway_add (const char * access, const char * gate)
2.1 frystyk 314: {
315: if (!gateways)
316: gateways = HTList_new();
2.17 frystyk 317: return add_object(gateways, access, gate, NO, -1);
2.1 frystyk 318: }
319:
2.4 frystyk 320: /*
2.1 frystyk 321: ** Removes all registered gateways
322: */
2.4 frystyk 323: PUBLIC BOOL HTGateway_deleteAll (void)
2.1 frystyk 324: {
2.4 frystyk 325: if (remove_allObjects(gateways)) {
2.1 frystyk 326: HTList_delete(gateways);
327: gateways = NULL;
328: return YES;
329: }
330: return NO;
331: }
332:
2.4 frystyk 333: /* HTNoProxy_add
334: ** -------------
2.1 frystyk 335: ** Registers a host name or a domain as a place where no proxy should
336: ** be contacted - for example a very fast link. If `port' is '0' then
337: ** it applies to all ports and if `access' is NULL then it applies to
338: ** to all access methods.
339: **
340: ** Examples: w3.org
341: ** www.close.com
342: */
2.9 frystyk 343: PUBLIC BOOL HTNoProxy_add (const char * host, const char * access,
2.4 frystyk 344: unsigned port)
2.1 frystyk 345: {
346: if (!noproxy)
347: noproxy = HTList_new();
2.17 frystyk 348: return add_hostname(noproxy, host, access, port, NO, -1);
349: }
350:
351: /* HTNoProxy_addRegex
352: ** ------------------
353: ** Registers a regular expression where URIs matching this expression
354: ** should go directly and not via a proxy.
355: **
356: */
357: PUBLIC BOOL HTNoProxy_addRegex (const char * regex, int regex_flags)
358: {
359: if (!noproxy)
360: noproxy = HTList_new();
361: #ifdef HT_POSIX_REGEX
362: return add_hostname(noproxy, regex, NULL, 0, YES, regex_flags);
363: #else
364: return add_hostname(noproxy, regex, NULL, 0, NO, -1);
365: #endif
2.1 frystyk 366: }
367:
2.4 frystyk 368: /* HTNoProxy_deleteAll
369: ** -------------------
2.1 frystyk 370: ** Removes all registered no_proxy directives
371: */
2.4 frystyk 372: PUBLIC BOOL HTNoProxy_deleteAll (void)
2.1 frystyk 373: {
2.4 frystyk 374: if (remove_AllHostnames(noproxy)) {
2.1 frystyk 375: HTList_delete(noproxy);
376: noproxy = NULL;
377: return YES;
378: }
379: return NO;
380: }
381:
2.4 frystyk 382: /* HTProxy_find
383: ** ------------
2.1 frystyk 384: ** This function evaluates the lists of registered proxies and if
385: ** one is found for the actual access method and it is not registered
386: ** in the `noproxy' list, then a URL containing the host to be contacted
387: ** is returned to the caller. This string must be freed be the caller.
388: **
389: ** Returns: proxy If OK (must be freed by caller)
390: ** NULL If no proxy is found or error
391: */
2.9 frystyk 392: PUBLIC char * HTProxy_find (const char * url)
2.1 frystyk 393: {
394: char * access;
395: char * proxy = NULL;
396: if (!url || !proxies)
397: return NULL;
398: access = HTParse(url, "", PARSE_ACCESS);
399:
400: /* First check if the host (if any) is registered in the noproxy list */
401: if (noproxy) {
402: char *host = HTParse(url, "", PARSE_HOST);
403: char *ptr;
404: unsigned port=0;
405: if ((ptr = strchr(host, ':')) != NULL) {
406: *ptr++ = '\0'; /* Chop off port */
407: if (*ptr) port = (unsigned) atoi(ptr);
408: }
409: if (*host) { /* If we have a host name */
410: HTList *cur = noproxy;
411: HTHostList *pres;
412: while ((pres = (HTHostList *) HTList_nextObject(cur)) != NULL) {
2.17 frystyk 413: #ifdef HT_POSIX_REGEX
414: if (pres->regex) {
415: BOOL match = regexec(pres->regex, url, 0, NULL, 0) ? NO : YES;
416: if (match) {
2.18 ! frystyk 417: HTTRACE(PROT_TRACE, "GetProxy.... No proxy directive found: `%s\'\n" _ pres->host);
2.17 frystyk 418: HT_FREE(access);
419: return NULL;
420: }
421: } else
422: #endif
2.1 frystyk 423: if (!pres->access ||
424: (pres->access && !strcmp(pres->access, access))) {
425: if (pres->port == port) {
426: char *np = pres->host+strlen(pres->host);
427: char *hp = host+strlen(host);
428: while (np>=pres->host && hp>=host && (*np--==*hp--));
429: if (np==pres->host-1 && (hp==host-1 || *hp=='.')) {
2.18 ! frystyk 430: HTTRACE(PROT_TRACE, "GetProxy.... No proxy directive found: `%s\'\n" _ pres->host);
2.7 frystyk 431: HT_FREE(access);
2.1 frystyk 432: return NULL;
433: }
434: }
435: }
436: }
437: }
2.7 frystyk 438: HT_FREE(host);
2.1 frystyk 439: }
440:
441: /* Now check if we have a proxy registered for this access method */
442: {
443: HTList *cur = proxies;
444: HTProxy *pres;
445: while ((pres = (HTProxy *) HTList_nextObject(cur)) != NULL) {
2.17 frystyk 446: #ifdef HT_POSIX_REGEX
447: if (pres->regex) {
448: BOOL match = regexec(pres->regex, url, 0, NULL, 0) ? NO : YES;
449: if (match) {
450: StrAllocCopy(proxy, pres->url);
2.18 ! frystyk 451: HTTRACE(PROT_TRACE, "GetProxy.... Found: `%s\'\n" _ pres->url);
2.17 frystyk 452: break;
453: }
454: } else
455: #endif
2.1 frystyk 456: if (!strcmp(pres->access, access)) {
457: StrAllocCopy(proxy, pres->url);
2.18 ! frystyk 458: HTTRACE(PROT_TRACE, "GetProxy.... Found: `%s\'\n" _ pres->url);
2.1 frystyk 459: break;
460: }
461: }
462: }
2.7 frystyk 463: HT_FREE(access);
2.1 frystyk 464: return proxy;
2.9 frystyk 465: }
2.1 frystyk 466:
467:
2.4 frystyk 468: /* HTGateway_find
469: ** --------------
2.1 frystyk 470: ** This function evaluates the lists of registered gateways and if
471: ** one is found for the actual access method then it is returned
472: **
473: ** Returns: gateway If OK (must be freed by caller)
474: ** NULL If no gateway is found or error
475: */
2.9 frystyk 476: PUBLIC char * HTGateway_find (const char * url)
2.1 frystyk 477: {
478: char * access;
479: char * gateway = NULL;
480: if (!url || !gateways)
481: return NULL;
482: access = HTParse(url, "", PARSE_ACCESS);
483:
484: /* Check if we have a gateway registered for this access method */
485: {
486: HTList *cur = gateways;
487: HTProxy *pres;
488: while ((pres = (HTProxy *) HTList_nextObject(cur)) != NULL) {
489: if (!strcmp(pres->access, access)) {
490: StrAllocCopy(gateway, pres->url);
2.18 ! frystyk 491: HTTRACE(PROT_TRACE, "GetGateway.. Found: `%s\'\n" _ pres->url);
2.1 frystyk 492: break;
493: }
494: }
495: }
2.7 frystyk 496: HT_FREE(access);
2.1 frystyk 497: return gateway;
498: }
499:
500:
501: /*
502: ** This function maintains backwards compatibility with the old
503: ** environment variables and searches for the most common values:
504: ** http, ftp, news, wais, and gopher
505: */
2.4 frystyk 506: PUBLIC void HTProxy_getEnvVar (void)
2.1 frystyk 507: {
508: char buf[80];
2.9 frystyk 509: static const char *accesslist[] = {
2.1 frystyk 510: "http",
511: "ftp",
512: "news",
513: "wais",
514: "gopher",
515: NULL
516: };
2.9 frystyk 517: const char **access = accesslist;
2.18 ! frystyk 518: HTTRACE(PROT_TRACE, "Proxy....... Looking for environment variables\n");
2.1 frystyk 519: while (*access) {
2.11 frystyk 520: BOOL found = NO;
2.1 frystyk 521: char *gateway=NULL;
522: char *proxy=NULL;
523:
2.11 frystyk 524: /* Search for proxy gateways */
525: if (found == NO) {
526: strcpy(buf, *access);
527: strcat(buf, "_proxy");
528: if ((proxy = (char *) getenv(buf)) && *proxy) {
529: HTProxy_add(*access, proxy);
530: found = YES;
531: }
532:
533: /* Try the same with upper case */
534: if (found == NO) {
535: char * up = buf;
536: while ((*up = TOUPPER(*up))) up++;
537: if ((proxy = (char *) getenv(buf)) && *proxy) {
538: HTProxy_add(*access, proxy);
539: found = YES;
540: }
541: }
542: }
543:
544: /* As a last resort, search for gateway servers */
545: if (found == NO) {
546: strcpy(buf, "WWW_");
547: strcat(buf, *access);
548: strcat(buf, "_GATEWAY");
549: if ((gateway = (char *) getenv(buf)) && *gateway) {
550: HTGateway_add(*access, gateway);
551: found = YES;
552: }
553: }
2.1 frystyk 554: ++access;
555: }
556:
557: /* Search for `noproxy' directive */
558: {
559: char *noproxy = getenv("no_proxy");
560: if (noproxy && *noproxy) {
561: char *str = NULL;
562: char *strptr;
563: char *name;
564: StrAllocCopy(str, noproxy); /* Get copy we can mutilate */
565: strptr = str;
566: while ((name = HTNextField(&strptr)) != NULL) {
567: char *portstr = strchr(name, ':');
568: unsigned port=0;
569: if (portstr) {
570: *portstr++ = '\0';
571: if (*portstr) port = (unsigned) atoi(portstr);
572: }
573:
574: /* Register it for all access methods */
2.4 frystyk 575: HTNoProxy_add(name, NULL, port);
2.1 frystyk 576: }
2.7 frystyk 577: HT_FREE(str);
2.1 frystyk 578: }
579: }
580: }
581:
Webmaster