Annotation of libwww/Library/src/HTProxy.c, revision 2.20
2.1 frystyk 1: /* HTProxy.c
2: ** GATEWAY AND PROXY MANAGER
3: **
4: ** (c) COPYRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
2.20 ! frystyk 6: ** @(#) $Id: HTProxy.c,v 2.19 1999/05/18 20:44:40 frystyk Exp $
2.1 frystyk 7: **
8: ** Replaces the old env variables for gateways and proxies. However for
9: ** backward compatibility there is a function that reads the env variables
10: ** at start up. Note that there is a difference between a proxy and a
11: ** gateway!
12: **
13: ** Authors
14: ** HF Henrik Frystyk, frystyk@w3.org
15: ** History
16: ** 4 Jun 95 Written on a rainy day
17: */
18:
19: #if !defined(HT_DIRECT_WAIS) && !defined(HT_DEFAULT_WAIS_GATEWAY)
20: #define HT_DEFAULT_WAIS_GATEWAY "http://www.w3.org:8001/"
21: #endif
22:
23: /* Library include files */
2.15 frystyk 24: #include "wwwsys.h"
2.11 frystyk 25: #include "WWWUtil.h"
26: #include "WWWCore.h"
27: #include "WWWHTTP.h"
2.12 frystyk 28: #include "WWWApp.h"
2.1 frystyk 29: #include "HTProxy.h" /* Implemented here */
30:
31: /* Variables and typedefs local to this module */
32:
33: typedef struct _HTProxy {
34: char * access;
35: char * url; /* URL of Gateway or Proxy */
2.17 frystyk 36: #ifdef HT_POSIX_REGEX
37: regex_t * regex; /* Compiled regex */
38: #endif
2.1 frystyk 39: } HTProxy;
40:
41: typedef struct _HTHostlist {
42: char * access;
2.17 frystyk 43: char * host; /* Host or domain name */
2.1 frystyk 44: unsigned port;
2.17 frystyk 45: #ifdef HT_POSIX_REGEX
46: regex_t * regex; /* Compiled regex */
47: #endif
2.1 frystyk 48: } HTHostList;
49:
50: PRIVATE HTList * proxies = NULL; /* List of proxy servers */
51: PRIVATE HTList * gateways = NULL; /* List of gateways */
52: PRIVATE HTList * noproxy = NULL; /* Don't proxy on these hosts and domains */
53:
54: #if 0
55: PRIVATE HTList * onlyproxy = NULL; /* Proxy only on these hosts and domains */
56: #endif
57:
58: /* ------------------------------------------------------------------------- */
59:
2.17 frystyk 60: #ifdef HT_POSIX_REGEX
61: PRIVATE char * get_regex_error (int errcode, regex_t * compiled)
62: {
63: size_t length = regerror (errcode, compiled, NULL, 0);
64: char * str = NULL;
65: if ((str = (char *) HT_MALLOC(length+1)) == NULL)
66: HT_OUTOFMEM("get_regex_error");
67: (void) regerror (errcode, compiled, str, length);
68: return str;
69: }
70:
71: PRIVATE regex_t * get_regex_t (const char * regex_str, int cflags)
72: {
73: regex_t * regex = NULL;
74: if (regex_str && *regex_str) {
75: int status;
76: if ((regex = (regex_t *) HT_CALLOC(1, sizeof(regex_t))) == NULL)
77: HT_OUTOFMEM("get_regex_t");
78: if ((status = regcomp(regex, regex_str, cflags))) {
79: char * err_msg = get_regex_error(status, regex);
2.18 frystyk 80: HTTRACE(PROT_TRACE, "HTProxy..... Regular expression error: %s\n" _ err_msg);
2.17 frystyk 81: HT_FREE(err_msg);
82: HT_FREE(regex);
83: }
84: }
85: return regex;
86: }
87: #endif
88:
2.4 frystyk 89: /*
2.1 frystyk 90: ** Existing entries are replaced with new ones
91: */
2.17 frystyk 92: PRIVATE BOOL add_object (HTList * list, const char * access, const char * url,
93: BOOL regex, int regex_flags)
2.1 frystyk 94: {
95: HTProxy *me;
96: if (!list || !access || !url || !*url)
97: return NO;
2.7 frystyk 98: if ((me = (HTProxy *) HT_CALLOC(1, sizeof(HTProxy))) == NULL)
99: HT_OUTOFMEM("add_object");
2.1 frystyk 100: StrAllocCopy(me->access, access); /* Access method */
2.17 frystyk 101:
102: #ifdef HT_POSIX_REGEX
103: /*
104: ** If we support regular expressions then compile one up for
105: ** this regular expression. Otherwise use is as a normal
106: ** access scheme.
107: */
108: if (regex) {
109: me->regex = get_regex_t(access,
110: regex_flags < 0 ?
111: W3C_DEFAULT_REGEX_FLAGS : regex_flags);
112: } else
113: #endif
2.1 frystyk 114: {
115: char *ptr = me->access;
116: while ((*ptr = TOLOWER(*ptr))) ptr++;
117: }
2.17 frystyk 118:
2.1 frystyk 119: me->url = HTParse(url, "", PARSE_ACCESS+PARSE_HOST+PARSE_PUNCTUATION);
120: if (*(me->url+strlen(me->url)-1) != '/')
121: StrAllocCat(me->url, "/");
122: me->url = HTSimplify(&me->url);
123:
124: /* See if we already have this one */
125: {
126: HTList *cur = list;
127: HTProxy *pres;
128: while ((pres = (HTProxy *) HTList_nextObject(cur)) != NULL) {
129: if (!strcmp(pres->access, me->access))
130: break; /* We already have it */
131: }
132: if (pres) {
2.18 frystyk 133: HTTRACE(PROT_TRACE, "HTProxy..... replacing for `%s\' access %s\n" _
134: me->url _ me->access);
2.7 frystyk 135: HT_FREE(pres->access);
136: HT_FREE(pres->url);
2.17 frystyk 137: #ifdef HT_POSIX_REGEX
138: if (pres->regex) regfree(pres->regex);
139: #endif
2.1 frystyk 140: HTList_removeObject(list, (void *) pres);
2.7 frystyk 141: HT_FREE(pres);
2.1 frystyk 142: }
2.18 frystyk 143: HTTRACE(PROT_TRACE, "HTProxy..... adding for `%s\' access %s\n" _
144: me->url _ me->access);
2.1 frystyk 145: HTList_addObject(list, (void *) me);
146: }
147: return YES;
148: }
149:
2.4 frystyk 150: PRIVATE BOOL remove_allObjects (HTList * list)
2.1 frystyk 151: {
152: if (list) {
153: HTList *cur = list;
154: HTProxy *pres;
155: while ((pres = (HTProxy *) HTList_nextObject(cur)) != NULL) {
2.7 frystyk 156: HT_FREE(pres->access);
157: HT_FREE(pres->url);
2.17 frystyk 158: #ifdef HT_POSIX_REGEX
159: if (pres->regex) regfree(pres->regex);
160: #endif
2.7 frystyk 161: HT_FREE(pres);
2.1 frystyk 162: }
163: return YES;
164: }
165: return NO;
166: }
167:
2.4 frystyk 168: /* Add an entry to a list of host names
169: ** ------------------------------------
170: ** Existing entries are replaced with new ones
171: */
2.9 frystyk 172: PRIVATE BOOL add_hostname (HTList * list, const char * host,
2.17 frystyk 173: const char * access, unsigned port,
174: BOOL regex, int regex_flags)
2.4 frystyk 175: {
176: HTHostList *me;
177: if (!list || !host || !*host)
178: return NO;
2.7 frystyk 179: if ((me = (HTHostList *) HT_CALLOC(1, sizeof(HTHostList))) == NULL)
180: HT_OUTOFMEM("add_hostname");
2.17 frystyk 181: #ifdef HT_POSIX_REGEX
182: if (regex)
183: me->regex = get_regex_t(host,
184: regex_flags < 0 ?
185: W3C_DEFAULT_REGEX_FLAGS : regex_flags);
186: #endif
187:
2.4 frystyk 188: if (access) {
189: char *ptr;
190: StrAllocCopy(me->access, access); /* Access method */
191: ptr = me->access;
192: while ((*ptr = TOLOWER(*ptr))) ptr++;
193: }
194: StrAllocCopy(me->host, host); /* Host name */
195: {
196: char *ptr = me->host;
197: while ((*ptr = TOLOWER(*ptr))) ptr++;
198: }
199: me->port = port; /* Port number */
2.18 frystyk 200: HTTRACE(PROT_TRACE, "HTHostList.. adding `%s\' to list\n" _ me->host);
2.4 frystyk 201: HTList_addObject(list, (void *) me);
202: return YES;
203: }
2.1 frystyk 204:
2.4 frystyk 205: PRIVATE BOOL remove_AllHostnames (HTList * list)
206: {
207: if (list) {
208: HTList *cur = list;
209: HTHostList *pres;
210: while ((pres = (HTHostList *) HTList_nextObject(cur)) != NULL) {
2.7 frystyk 211: HT_FREE(pres->access);
212: HT_FREE(pres->host);
2.17 frystyk 213: #ifdef HT_POSIX_REGEX
214: if (pres->regex) regfree(pres->regex);
215: #endif
2.7 frystyk 216: HT_FREE(pres);
2.4 frystyk 217: }
218: return YES;
219: }
220: return NO;
221: }
222:
223: /* HTProxy_add
224: ** -----------
2.1 frystyk 225: ** Registers a proxy as the server to contact for a specific
226: ** access method. `proxy' should be a fully valid name, like
227: ** "http://proxy.w3.org:8001" but domain name is not required.
228: ** If an entry exists for this access then delete it and use the
229: ** ne one. Returns YES if OK, else NO
230: */
2.9 frystyk 231: PUBLIC BOOL HTProxy_add (const char * access, const char * proxy)
2.1 frystyk 232: {
2.11 frystyk 233: /*
234: ** If this is the first time here then also add a before filter to handle
2.12 frystyk 235: ** proxy authentication and the normal AA after filter as well.
236: ** These filters will be removed if we remove all proxies again.
2.11 frystyk 237: */
238: if (!proxies) {
239: proxies = HTList_new();
2.13 frystyk 240: HTNet_addBefore(HTAA_proxyBeforeFilter, NULL, NULL,
241: HT_FILTER_MIDDLE);
242: HTNet_addAfter(HTAuthFilter, NULL, NULL,
243: HT_NO_PROXY_ACCESS, HT_FILTER_MIDDLE);
2.14 frystyk 244: HTNet_addAfter(HTAuthFilter, NULL, NULL,
245: HT_PROXY_REAUTH, HT_FILTER_MIDDLE);
2.11 frystyk 246: }
2.17 frystyk 247: return add_object(proxies, access, proxy, NO, -1);
248: }
249:
250: /* HTProxy_addRegex
251: ** ----------------
252: ** Registers a proxy as the server to contact for any URL matching the
253: ** regular expression. `proxy' should be a fully valid name, like
254: ** "http://proxy.w3.org:8001".
255: ** If an entry exists for this access then delete it and use the
256: ** new one. Returns YES if OK, else NO
257: */
258: PUBLIC BOOL HTProxy_addRegex (const char * regex,
259: const char * proxy,
260: int regex_flags)
261: {
262: /*
263: ** If this is the first time here then also add a before filter to handle
264: ** proxy authentication and the normal AA after filter as well.
265: ** These filters will be removed if we remove all proxies again.
266: */
267: if (!proxies) {
268: proxies = HTList_new();
269: HTNet_addBefore(HTAA_proxyBeforeFilter, NULL, NULL,
270: HT_FILTER_MIDDLE);
271: HTNet_addAfter(HTAuthFilter, NULL, NULL,
272: HT_NO_PROXY_ACCESS, HT_FILTER_MIDDLE);
273: HTNet_addAfter(HTAuthFilter, NULL, NULL,
274: HT_PROXY_REAUTH, HT_FILTER_MIDDLE);
275: }
276: #ifdef HT_POSIX_REGEX
277: return add_object(proxies, regex, proxy, YES, regex_flags);
278: #else
279: return add_object(proxies, regex, proxy, NO, -1);
280: #endif
2.1 frystyk 281: }
282:
2.4 frystyk 283: /*
2.1 frystyk 284: ** Removes all registered proxies
285: */
2.4 frystyk 286: PUBLIC BOOL HTProxy_deleteAll (void)
2.1 frystyk 287: {
2.4 frystyk 288: if (remove_allObjects(proxies)) {
2.1 frystyk 289: HTList_delete(proxies);
2.11 frystyk 290:
291: /*
292: ** If we have no more proxies then there is no reason for checking
2.12 frystyk 293: ** proxy authentication. We therefore unregister the filters for
294: ** handling proxy authentication
2.11 frystyk 295: */
2.13 frystyk 296: HTNet_deleteBefore(HTAA_proxyBeforeFilter);
2.19 frystyk 297: HTNet_deleteAfterStatus(HT_NO_PROXY_ACCESS);
298: HTNet_deleteAfterStatus(HT_PROXY_REAUTH);
2.11 frystyk 299:
2.1 frystyk 300: proxies = NULL;
301: return YES;
302: }
303: return NO;
304: }
305:
2.4 frystyk 306: /* HTGateway_add
307: ** -------------
2.1 frystyk 308: ** Registers a gateway as the server to contact for a specific
309: ** access method. `gateway' should be a fully valid name, like
310: ** "http://gateway.w3.org:8001" but domain name is not required.
311: ** If an entry exists for this access then delete it and use the
312: ** ne one. Returns YES if OK, else NO
313: */
2.9 frystyk 314: PUBLIC BOOL HTGateway_add (const char * access, const char * gate)
2.1 frystyk 315: {
316: if (!gateways)
317: gateways = HTList_new();
2.17 frystyk 318: return add_object(gateways, access, gate, NO, -1);
2.1 frystyk 319: }
320:
2.4 frystyk 321: /*
2.1 frystyk 322: ** Removes all registered gateways
323: */
2.4 frystyk 324: PUBLIC BOOL HTGateway_deleteAll (void)
2.1 frystyk 325: {
2.4 frystyk 326: if (remove_allObjects(gateways)) {
2.1 frystyk 327: HTList_delete(gateways);
328: gateways = NULL;
329: return YES;
330: }
331: return NO;
332: }
333:
2.4 frystyk 334: /* HTNoProxy_add
335: ** -------------
2.1 frystyk 336: ** Registers a host name or a domain as a place where no proxy should
337: ** be contacted - for example a very fast link. If `port' is '0' then
338: ** it applies to all ports and if `access' is NULL then it applies to
339: ** to all access methods.
340: **
341: ** Examples: w3.org
342: ** www.close.com
343: */
2.9 frystyk 344: PUBLIC BOOL HTNoProxy_add (const char * host, const char * access,
2.4 frystyk 345: unsigned port)
2.1 frystyk 346: {
347: if (!noproxy)
348: noproxy = HTList_new();
2.17 frystyk 349: return add_hostname(noproxy, host, access, port, NO, -1);
350: }
351:
352: /* HTNoProxy_addRegex
353: ** ------------------
354: ** Registers a regular expression where URIs matching this expression
355: ** should go directly and not via a proxy.
356: **
357: */
358: PUBLIC BOOL HTNoProxy_addRegex (const char * regex, int regex_flags)
359: {
360: if (!noproxy)
361: noproxy = HTList_new();
362: #ifdef HT_POSIX_REGEX
363: return add_hostname(noproxy, regex, NULL, 0, YES, regex_flags);
364: #else
365: return add_hostname(noproxy, regex, NULL, 0, NO, -1);
366: #endif
2.1 frystyk 367: }
368:
2.4 frystyk 369: /* HTNoProxy_deleteAll
370: ** -------------------
2.1 frystyk 371: ** Removes all registered no_proxy directives
372: */
2.4 frystyk 373: PUBLIC BOOL HTNoProxy_deleteAll (void)
2.1 frystyk 374: {
2.4 frystyk 375: if (remove_AllHostnames(noproxy)) {
2.1 frystyk 376: HTList_delete(noproxy);
377: noproxy = NULL;
378: return YES;
379: }
380: return NO;
381: }
382:
2.4 frystyk 383: /* HTProxy_find
384: ** ------------
2.1 frystyk 385: ** This function evaluates the lists of registered proxies and if
386: ** one is found for the actual access method and it is not registered
387: ** in the `noproxy' list, then a URL containing the host to be contacted
388: ** is returned to the caller. This string must be freed be the caller.
389: **
390: ** Returns: proxy If OK (must be freed by caller)
391: ** NULL If no proxy is found or error
392: */
2.9 frystyk 393: PUBLIC char * HTProxy_find (const char * url)
2.1 frystyk 394: {
395: char * access;
396: char * proxy = NULL;
397: if (!url || !proxies)
398: return NULL;
399: access = HTParse(url, "", PARSE_ACCESS);
400:
401: /* First check if the host (if any) is registered in the noproxy list */
402: if (noproxy) {
403: char *host = HTParse(url, "", PARSE_HOST);
404: char *ptr;
405: unsigned port=0;
406: if ((ptr = strchr(host, ':')) != NULL) {
407: *ptr++ = '\0'; /* Chop off port */
408: if (*ptr) port = (unsigned) atoi(ptr);
409: }
410: if (*host) { /* If we have a host name */
411: HTList *cur = noproxy;
412: HTHostList *pres;
413: while ((pres = (HTHostList *) HTList_nextObject(cur)) != NULL) {
2.17 frystyk 414: #ifdef HT_POSIX_REGEX
415: if (pres->regex) {
416: BOOL match = regexec(pres->regex, url, 0, NULL, 0) ? NO : YES;
417: if (match) {
2.18 frystyk 418: HTTRACE(PROT_TRACE, "GetProxy.... No proxy directive found: `%s\'\n" _ pres->host);
2.17 frystyk 419: HT_FREE(access);
420: return NULL;
421: }
422: } else
423: #endif
2.1 frystyk 424: if (!pres->access ||
425: (pres->access && !strcmp(pres->access, access))) {
2.20 ! frystyk 426: if ((pres->port == 0) || (pres->port == port)) {
2.1 frystyk 427: char *np = pres->host+strlen(pres->host);
428: char *hp = host+strlen(host);
429: while (np>=pres->host && hp>=host && (*np--==*hp--));
430: if (np==pres->host-1 && (hp==host-1 || *hp=='.')) {
2.18 frystyk 431: HTTRACE(PROT_TRACE, "GetProxy.... No proxy directive found: `%s\'\n" _ pres->host);
2.7 frystyk 432: HT_FREE(access);
2.1 frystyk 433: return NULL;
434: }
435: }
436: }
437: }
438: }
2.7 frystyk 439: HT_FREE(host);
2.1 frystyk 440: }
441:
442: /* Now check if we have a proxy registered for this access method */
443: {
444: HTList *cur = proxies;
445: HTProxy *pres;
446: while ((pres = (HTProxy *) HTList_nextObject(cur)) != NULL) {
2.17 frystyk 447: #ifdef HT_POSIX_REGEX
448: if (pres->regex) {
449: BOOL match = regexec(pres->regex, url, 0, NULL, 0) ? NO : YES;
450: if (match) {
451: StrAllocCopy(proxy, pres->url);
2.18 frystyk 452: HTTRACE(PROT_TRACE, "GetProxy.... Found: `%s\'\n" _ pres->url);
2.17 frystyk 453: break;
454: }
455: } else
456: #endif
2.1 frystyk 457: if (!strcmp(pres->access, access)) {
458: StrAllocCopy(proxy, pres->url);
2.18 frystyk 459: HTTRACE(PROT_TRACE, "GetProxy.... Found: `%s\'\n" _ pres->url);
2.1 frystyk 460: break;
461: }
462: }
463: }
2.7 frystyk 464: HT_FREE(access);
2.1 frystyk 465: return proxy;
2.9 frystyk 466: }
2.1 frystyk 467:
468:
2.4 frystyk 469: /* HTGateway_find
470: ** --------------
2.1 frystyk 471: ** This function evaluates the lists of registered gateways and if
472: ** one is found for the actual access method then it is returned
473: **
474: ** Returns: gateway If OK (must be freed by caller)
475: ** NULL If no gateway is found or error
476: */
2.9 frystyk 477: PUBLIC char * HTGateway_find (const char * url)
2.1 frystyk 478: {
479: char * access;
480: char * gateway = NULL;
481: if (!url || !gateways)
482: return NULL;
483: access = HTParse(url, "", PARSE_ACCESS);
484:
485: /* Check if we have a gateway registered for this access method */
486: {
487: HTList *cur = gateways;
488: HTProxy *pres;
489: while ((pres = (HTProxy *) HTList_nextObject(cur)) != NULL) {
490: if (!strcmp(pres->access, access)) {
491: StrAllocCopy(gateway, pres->url);
2.18 frystyk 492: HTTRACE(PROT_TRACE, "GetGateway.. Found: `%s\'\n" _ pres->url);
2.1 frystyk 493: break;
494: }
495: }
496: }
2.7 frystyk 497: HT_FREE(access);
2.1 frystyk 498: return gateway;
499: }
500:
501:
502: /*
503: ** This function maintains backwards compatibility with the old
504: ** environment variables and searches for the most common values:
505: ** http, ftp, news, wais, and gopher
506: */
2.4 frystyk 507: PUBLIC void HTProxy_getEnvVar (void)
2.1 frystyk 508: {
509: char buf[80];
2.9 frystyk 510: static const char *accesslist[] = {
2.1 frystyk 511: "http",
512: "ftp",
513: "news",
514: "wais",
515: "gopher",
516: NULL
517: };
2.9 frystyk 518: const char **access = accesslist;
2.18 frystyk 519: HTTRACE(PROT_TRACE, "Proxy....... Looking for environment variables\n");
2.1 frystyk 520: while (*access) {
2.11 frystyk 521: BOOL found = NO;
2.1 frystyk 522: char *gateway=NULL;
523: char *proxy=NULL;
524:
2.11 frystyk 525: /* Search for proxy gateways */
526: if (found == NO) {
527: strcpy(buf, *access);
528: strcat(buf, "_proxy");
529: if ((proxy = (char *) getenv(buf)) && *proxy) {
530: HTProxy_add(*access, proxy);
531: found = YES;
532: }
533:
534: /* Try the same with upper case */
535: if (found == NO) {
536: char * up = buf;
537: while ((*up = TOUPPER(*up))) up++;
538: if ((proxy = (char *) getenv(buf)) && *proxy) {
539: HTProxy_add(*access, proxy);
540: found = YES;
541: }
542: }
543: }
544:
545: /* As a last resort, search for gateway servers */
546: if (found == NO) {
547: strcpy(buf, "WWW_");
548: strcat(buf, *access);
549: strcat(buf, "_GATEWAY");
550: if ((gateway = (char *) getenv(buf)) && *gateway) {
551: HTGateway_add(*access, gateway);
552: found = YES;
553: }
554: }
2.1 frystyk 555: ++access;
556: }
557:
558: /* Search for `noproxy' directive */
559: {
560: char *noproxy = getenv("no_proxy");
561: if (noproxy && *noproxy) {
562: char *str = NULL;
563: char *strptr;
564: char *name;
565: StrAllocCopy(str, noproxy); /* Get copy we can mutilate */
566: strptr = str;
567: while ((name = HTNextField(&strptr)) != NULL) {
568: char *portstr = strchr(name, ':');
569: unsigned port=0;
570: if (portstr) {
571: *portstr++ = '\0';
572: if (*portstr) port = (unsigned) atoi(portstr);
573: }
574:
575: /* Register it for all access methods */
2.4 frystyk 576: HTNoProxy_add(name, NULL, port);
2.1 frystyk 577: }
2.7 frystyk 578: HT_FREE(str);
2.1 frystyk 579: }
580: }
581: }
582:
Webmaster