Annotation of libwww/Library/src/HTCache.c, revision 2.9
2.1 frystyk 1: /* HTCache.c
2: ** CACHE WRITER
3: **
4: ** (c) COPYRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
6: **
7: ** This modules manages the cache
8: **
9: ** History:
10: ** HFN: spawned from HTFwrite
11: ** HWL: converted the caching scheme to be hierachical by taking
12: ** AL code from Deamon
13: **
14: */
15:
16: /* Library include files */
17: #include "tcp.h"
18: #include "HTUtils.h"
19: #include "HTString.h"
20: #include "HTFormat.h"
21: #include "HTFWrite.h"
22: #include "HTBind.h"
23: #include "HTList.h"
2.7 frystyk 24: #include "HTReqMan.h"
2.1 frystyk 25: #include "HTParse.h"
26: #include "HTCache.h" /* Implemented here */
27:
28: /*
29: ** The cache limit is the number of files which are kept. Yes, I know,
30: ** the amount of disk space would be more relevant. So this may change.
31: ** Currently it is preset to 100 but may be changed by the application by
32: ** writing into this variable.
33: */
34: #define CACHE_LIMIT 5 /* Number of files */
35:
36: #define CACHE_INFO ".cache_info"
37: #define INDEX_FILE ".cache_dirindex"
38: #define WELCOME_FILE ".cache_welcome"
39: #define TMP_SUFFIX ".cache_tmp"
40: #define LOCK_SUFFIX ".cache_lock"
41:
2.7 frystyk 42: /* This is the default cache directory: */
43: #ifndef HT_CACHE_ROOT
44: #define HT_CACHE_ROOT "/tmp"
45: #endif
46:
47: typedef struct _HTCache {
2.1 frystyk 48: HTFormat format; /* May have many formats per anchor */
49: char * filename;
50: time_t start_time;
51: time_t load_delay;
52: int reference_count;
2.7 frystyk 53: } HTCache;
2.1 frystyk 54:
55: struct _HTStream {
56: CONST HTStreamClass * isa;
57: FILE * fp;
2.7 frystyk 58: HTCache * cache;
2.1 frystyk 59: HTRequest * request;
60: };
61:
62: PRIVATE BOOL HTCacheEnable = NO;
63: PRIVATE char * HTCacheRoot = NULL; /* Destination for cache */
2.7 frystyk 64: PRIVATE HTList * HTCacheList = NULL; /* List of cached elements */
2.1 frystyk 65: PRIVATE int HTCacheLimit = CACHE_LIMIT;
66:
2.7 frystyk 67: PRIVATE HTExpiresMode HTExpMode = HT_EXPIRES_IGNORE;
68: PRIVATE char *HTExpNotify = NULL;
69:
70: PRIVATE HTMemoryCacheHandler *HTMemoryCache = NULL; /* Memory cache handler */
2.2 frystyk 71:
2.1 frystyk 72: /* ------------------------------------------------------------------------- */
2.2 frystyk 73: /* GARBAGE COLLECTOR */
2.1 frystyk 74: /* ------------------------------------------------------------------------- */
75:
76: /*
77: ** Removes cache item from disk and corresponding object from list in memory
78: */
2.7 frystyk 79: PRIVATE void HTCache_remove ARGS1(HTCache *, item)
2.1 frystyk 80: {
2.7 frystyk 81: if (HTCacheList && item) {
2.1 frystyk 82: if (CACHE_TRACE)
83: fprintf(TDEST, "Cache....... Removing %s\n", item->filename);
2.7 frystyk 84: HTList_removeObject(HTCacheList, item);
2.1 frystyk 85: REMOVE(item->filename);
86:
87: /* HWL 22/9/94: Clean up hierachical file structure */
88: {
89: char * p;
90: while ((p = strrchr(item->filename,'/')) && (p != NULL)){
91: item->filename[p-item->filename] = 0;
92: if (strcmp(item->filename, HTCacheRoot) != 0) {
93: if (CACHE_TRACE)
94: fprintf(TDEST, "rmdir....... %s\n", item->filename);
95: RMDIR(item->filename); /* fails if directory isn't empty */
96: }
97: }
98: }
99: free(item->filename);
100: free(item);
101: }
102: }
103:
104:
105: /*
106: ** Remove a file from the cache to prevent too many files from being cached
107: */
108: PRIVATE void limit_cache ARGS1(HTList * , list)
109: {
110: HTList * cur = list;
2.7 frystyk 111: HTCache * item;
2.1 frystyk 112: time_t best_delay = 0; /* time_t in principle can be any arith type */
2.7 frystyk 113: HTCache* best_item = NULL;
2.1 frystyk 114:
115: if (HTList_count(list) < HTCacheLimit) return; /* Limit not reached */
116:
2.7 frystyk 117: while (NULL != (item = (HTCache*)HTList_nextObject(cur))) {
2.1 frystyk 118: if (best_delay == 0 || item->load_delay < best_delay) {
119: best_delay = item->load_delay;
120: best_item = item;
121: }
122: }
123: if (best_item) HTCache_remove(best_item);
124: }
125:
126: /*
127: ** Check that the name we're about to generate doesn't
128: ** clash with anything used by the caching system.
129: */
130: PRIVATE BOOL reserved_name ARGS1(char *, url)
131: {
132: char * name = strrchr(url, '/');
133: char * suff = NULL;
134:
135: if (name) name++;
136: else name = url;
137:
138: if (!strcmp(name, CACHE_INFO) ||
139: !strcmp(name, INDEX_FILE) ||
140: !strcmp(name, WELCOME_FILE))
141: return YES;
142:
143: suff = strrchr(name, TMP_SUFFIX[0]);
144: if (suff && !strcmp(suff, TMP_SUFFIX))
145: return YES;
146:
147: suff = strrchr(name, LOCK_SUFFIX[0]);
148: if (suff && !strcmp(suff, LOCK_SUFFIX))
149: return YES;
150:
151: return NO;
152: }
153:
154: /*
2.2 frystyk 155: ** Removes all cache entries in memory
156: */
157: PUBLIC void HTCache_clearMem NOARGS
158: {
2.7 frystyk 159: HTList *cur=HTCacheList;
160: HTCache *pres;
2.2 frystyk 161: if (cur) {
2.7 frystyk 162: while ((pres = (HTCache *) HTList_nextObject(cur))) {
2.2 frystyk 163: FREE(pres->filename);
164: free(pres);
165: }
2.7 frystyk 166: HTList_delete(HTCacheList);
167: HTCacheList = NULL;
2.2 frystyk 168: }
169: }
170:
171: /*
172: ** Removes all cache entries in memory and on disk
173: */
174: PUBLIC void HTCache_deleteAll NOARGS
175: {
2.7 frystyk 176: HTList *cur=HTCacheList;
177: HTCache * pres;
2.2 frystyk 178: if (cur) {
2.7 frystyk 179: while ((pres = (HTCache *) HTList_lastObject(cur)))
2.2 frystyk 180: HTCache_remove(pres);
2.7 frystyk 181: HTList_delete(HTCacheList);
182: HTCacheList = NULL;
2.2 frystyk 183: }
184: }
185:
186: /* ------------------------------------------------------------------------- */
187: /* NAMING SCHEME */
188: /* ------------------------------------------------------------------------- */
189:
190: /*
2.1 frystyk 191: ** Map url to cache file name.
192: */
193: PRIVATE char * cache_file_name ARGS1(char *, url)
194: {
195: char * access = NULL;
196: char * host = NULL;
197: char * path = NULL;
198: char * cfn = NULL;
199: BOOL welcome = NO;
200: BOOL res = NO;
201:
202: if (!url || strchr(url, '?') || (res = reserved_name(url)) ||
203: !(access = HTParse(url, "", PARSE_ACCESS)) ||
204: (0 != strcmp(access, "http") &&
205: 0 != strcmp(access, "ftp") &&
206: 0 != strcmp(access, "gopher"))) {
207:
208: if (access) free(access);
209:
210: if (res && CACHE_TRACE)
211: fprintf(TDEST,
212: "Cache....... Clash with reserved name (\"%s\")\n",url);
213:
214: return NULL;
215: }
216:
217: host = HTParse(url, "", PARSE_HOST);
218: path = HTParse(url, "", PARSE_PATH | PARSE_PUNCTUATION);
219: if (path && path[strlen(path)-1] == '/')
220: welcome = YES;
221:
222: cfn = (char*)malloc(strlen(HTCacheRoot) +
223: strlen(access) +
224: (host ? strlen(host) : 0) +
225: (path ? strlen(path) : 0) +
226: (welcome ? strlen(WELCOME_FILE) : 0) + 3);
227: if (!cfn) outofmem(__FILE__, "cache_file_name");
228:
229: /* Removed extra slash - HF May2,95 */
230: sprintf(cfn, "%s%s/%s%s%s", HTCacheRoot, access, host, path,
231: (welcome ? WELCOME_FILE : ""));
232:
233: FREE(access); FREE(host); FREE(path);
234:
235: /*
236: ** This checks that the last component is not too long.
237: ** It could check all the components, but the last one
238: ** is most important because it could later blow up the
239: ** whole gc when reading cache info files.
240: ** Operating system handles other cases.
241: ** 64 = 42 + 22 and 22 = 42 - 20 :-)
242: ** In other words I just picked some number, it doesn't
243: ** really matter that much.
244: */
245: {
246: char * last = strrchr(cfn, '/');
247: if (!last) last = cfn;
248: if ((int)strlen(last) > 64) {
249: if (CACHE_TRACE)
250: fprintf(TDEST, "Too long.... cache file name \"%s\"\n", cfn);
251: free(cfn);
252: cfn = NULL;
253: }
254: }
255: return cfn;
256: }
257:
258:
259: /*
260: ** Create directory path for cache file
261: **
262: ** On exit:
263: ** return YES
264: ** if directories created -- after that caller
265: ** can rely on fopen(cfn,"w") succeeding.
266: **
267: */
268: PRIVATE BOOL create_cache_place ARGS1(char *, cfn)
269: {
270: struct stat stat_info;
271: char * cur = NULL;
272: BOOL create = NO;
273:
274: if (!cfn || (int)strlen(cfn) <= (int)strlen(HTCacheRoot) + 1)
275: return NO;
276:
277: cur = cfn + strlen(HTCacheRoot) + 1;
278:
279: while ((cur = strchr(cur, '/'))) {
280: *cur = 0;
2.6 frystyk 281: if (create || HT_STAT(cfn, &stat_info) == -1) {
2.1 frystyk 282: create = YES; /* To avoid doing stat()s in vain */
283: if (CACHE_TRACE)
284: fprintf(TDEST,"Cache....... creating cache dir \"%s\"\n",cfn);
285: if (MKDIR(cfn, 0777) < 0) {
286: if (CACHE_TRACE)
287: fprintf(TDEST,"Cache....... can't create dir `%s\'\n",cfn);
288: return NO;
289: }
290: } else {
291: if (S_ISREG(stat_info.st_mode)) {
292: int len = strlen(cfn);
293: char * tmp1 = (char*)malloc(len + strlen(TMP_SUFFIX) + 1);
294: char * tmp2 = (char*)malloc(len + strlen(INDEX_FILE) + 2);
295: /* time_t t1,t2,t3,t4,t5; */
296:
297:
298: sprintf(tmp1, "%s%s", cfn, TMP_SUFFIX);
299: sprintf(tmp2, "%s/%s", cfn, INDEX_FILE);
300:
301: if (CACHE_TRACE) {
302: fprintf(TDEST,"Cache....... moving \"%s\" to \"%s\"\n",
303: cfn,tmp1);
304: fprintf(TDEST,"and......... creating dir \"%s\"\n",
305: cfn);
306: fprintf(TDEST,"and......... moving \"%s\" to \"%s\"\n",
307: tmp1,tmp2);
308: }
309: rename(cfn,tmp1);
310: (void) MKDIR(cfn, 0777);
311: rename(tmp1,tmp2);
312: free(tmp1);
313: free(tmp2);
314: }
315: else {
316: if (CACHE_TRACE)
317: fprintf(TDEST,"Cache....... dir \"%s\" already exists\n",
318: cfn);
319: }
320: }
321: *cur = '/';
322: cur++;
323: }
324: return YES;
325: }
326:
327:
328: /* Create a cache path
329: ** -------------------
330: ** Find a full path name for the cache file and create the path if it
331: ** does not already exist. Returns name or NULL
332: ** HWL 22/9/94
333: ** HWL added support for hierachical structure
334: */
335: PRIVATE char *HTCache_getName ARGS1(char *, url)
336: {
337: char *filename = cache_file_name(url);
338: if (!filename)
339: return NULL;
340: if (create_cache_place(filename))
341: return(filename);
342: return NULL;
343: }
344:
345: /*
346: ** Make a WWW name from a cache name and returns it if OK, else NULL.
347: ** The string returned must be freed by the caller.
348: ** We keep this function private as we might change the naming scheme for
349: ** cache files. Right now it follows the file hierarchi.
350: */
351: PRIVATE char *HTCache_wwwName ARGS1 (char *, name)
352: {
353: char * result = NULL;
354: if (name && *name) {
355: StrAllocCopy(result, "file:"); /* We get an absolute file name */
356: #ifdef VMS
357: /* convert directory name to Unix-style syntax */
2.4 frystyk 358: {
359: char * disk = strchr (name, ':');
360: char * dir = strchr (name, '[');
361: if (disk) {
362: *disk = '\0';
363: StrAllocCat(result, "/"); /* needs delimiter */
364: StrAllocCat(result, name);
365: }
366: if (dir) {
367: char *p;
368: *dir = '/'; /* Convert leading '[' */
369: for (p = dir ; *p != ']'; ++p)
370: if (*p == '.') *p = '/';
371: *p = '\0'; /* Cut on final ']' */
372: StrAllocCat(result, dir);
373: }
2.1 frystyk 374: }
375: #else /* not VMS */
376: #ifdef WIN32
2.4 frystyk 377: {
378: char * p = name; /* a colon */
379: while( *p != 0 ) {
380: if (*p == '\\') /* change to one true slash */
381: *p = '/' ;
382: p++;
383: }
384: StrAllocCat(result, name);
2.1 frystyk 385: }
386: #else /* not WIN32 */
387: StrAllocCat (result, name);
388: #endif /* not WIN32 */
389: #endif /* not VMS */
390: }
391: return result;
392: }
393:
2.2 frystyk 394: /* ------------------------------------------------------------------------- */
395: /* CACHE PARAMETERS */
396: /* ------------------------------------------------------------------------- */
2.1 frystyk 397:
398: /* Enable Cache
399: ** ------------
400: ** If `cache_root' is NULL then reuse old value or use HT_CACHE_ROOT.
401: ** An empty string will make '/' as cache root
402: */
403: PUBLIC BOOL HTCache_enable ARGS1(CONST char *, cache_root)
404: {
405: if (cache_root)
406: HTCache_setRoot(cache_root);
407: HTCacheEnable = YES;
408: return YES;
409: }
410:
411:
412: /* Disable Cache
413: ** ------------
414: ** Turns off the cache. Note that the cache can be disabled and enabled
415: ** at any time. The cache root is kept and can be reused during the
416: ** execution.
417: */
418: PUBLIC BOOL HTCache_disable NOARGS
419: {
420: HTCacheEnable = NO;
421: return YES;
422: }
423:
424: /* Is Cache Enabled
425: ** ----------------
426: ** Returns YES or NO. Also makes sure that we have a root value
427: ** (even though it might be invalid)
428: */
429: PUBLIC BOOL HTCache_isEnabled NOARGS
430: {
431: if (!HTSecure && HTCacheEnable) {
432: if (!HTCacheRoot)
433: HTCache_setRoot(NULL);
434: return YES;
435: }
436: return NO;
437: }
438:
439:
440: /* Set Cache Root
441: ** --------------
442: ** If `cache_root' is NULL then the current value (might be a define)
443: ** Should we check if the cache_root is actually OK? I think not!
444: */
445: PUBLIC BOOL HTCache_setRoot ARGS1(CONST char *, cache_root)
446: {
447: StrAllocCopy(HTCacheRoot, cache_root ? cache_root : HT_CACHE_ROOT);
448: if (*(HTCacheRoot+strlen(HTCacheRoot)-1) != '/')
449: StrAllocCat(HTCacheRoot, "/");
450: if (CACHE_TRACE)
451: fprintf(TDEST, "Cache Root.. Root set to `%s\'\n", HTCacheRoot);
452: return YES;
453: }
454:
455:
456: /* Get Cache Root
457: ** --------------
458: */
459: PUBLIC CONST char * HTCache_getRoot NOARGS
460: {
461: return HTCacheRoot;
462: }
463:
464: /* Free Cache Root
465: ** --------------
466: ** For clean up memory
467: */
468: PUBLIC void HTCache_freeRoot NOARGS
469: {
470: FREE(HTCacheRoot);
471: }
472:
473: /* ------------------------------------------------------------------------- */
2.7 frystyk 474: /* MEMORY CACHE */
475: /* ------------------------------------------------------------------------- */
476:
477: /*
478: ** Register a Memory Cache Handler. This function is introduced in order to
479: ** avoid having references to HText module outside HTML.
480: */
481: PUBLIC BOOL HTMemoryCache_register (HTMemoryCacheHandler * cbf)
482: {
483: return (HTMemoryCache = cbf) ? YES : NO;
484: }
485:
486: PUBLIC BOOL HTMemoryCache_unRegister (void)
487: {
488: HTMemoryCache = NULL;
489: return YES;
490: }
491:
492: PUBLIC int HTMemoryCache_check (HTRequest * request)
493: {
2.8 frystyk 494: return HTMemoryCache ? HTMemoryCache(request,HTExpMode,HTExpNotify) : 0;
2.7 frystyk 495: }
496:
497: /*
498: ** Set the mode for how we handle Expires header from the local history
499: ** list. The following modes are available:
500: **
501: ** HT_EXPIRES_IGNORE : No update in the history list
502: ** HT_EXPIRES_NOTIFY : The user is notified but no reload
503: ** HT_EXPIRES_AUTO : Automatic reload
504: **
505: ** The notify only makes sense when HT_EXPIRES_NOTIFY. NULL is valid.
506: */
507: PUBLIC void HTCache_setExpiresMode ARGS2(HTExpiresMode, mode, char *, notify)
508: {
509: HTExpMode = mode;
510: HTExpNotify = notify;
511: }
512:
513: PUBLIC HTExpiresMode HTCache_expiresMode ARGS1(char **, notify)
514: {
515: *notify = HTExpNotify ? HTExpNotify : "This version has expired!";
516: return HTExpMode;
517: }
518:
519: /* ------------------------------------------------------------------------- */
2.2 frystyk 520: /* CACHE MANAGER */
521: /* ------------------------------------------------------------------------- */
522:
523: /*
524: ** Verifies if a cache object exists for this URL and if so returns a URL
525: ** for the cached object. It does not verify whether the object is valid or
526: ** not, for example it might have expired.
527: **
528: ** Returns: file name If OK (must be freed by caller)
529: ** NULL If no cache object found
530: */
531: PUBLIC char * HTCache_getReference ARGS1(char *, url)
532: {
533: if (url && HTCache_isEnabled()) {
534: char *fnam = cache_file_name(url);
535: if (fnam) {
536: FILE *fp = fopen(fnam, "r");
537: if (fp) {
538: char *url = HTCache_wwwName(fnam);
539: fclose(fp);
540: if (CACHE_TRACE)
541: fprintf(TDEST, "Cache....... Object found `%s\'\n", url);
542: free(fnam);
543: return url;
544: } else
545: free(fnam);
546: }
547: }
548: return NULL;
549: }
550:
551: /*
552: ** This function checks whether a document has expired or not.
553: ** The check is based on the metainformation passed in the anchor object
554: ** The function returns YES or NO.
555: */
556: PUBLIC BOOL HTCache_isValid ARGS1(HTParentAnchor *, anchor)
557: {
558: time_t cur = time(NULL);
559: time_t expires = HTAnchor_expires(anchor);
560: return (expires>0 && cur>0 && expires<cur) ? NO : YES;
561: }
562:
563: /* ------------------------------------------------------------------------- */
2.1 frystyk 564: /* CACHE WRITER STREAM */
565: /* ------------------------------------------------------------------------- */
566:
567: PRIVATE int HTCache_flush ARGS1(HTStream *, me)
568: {
569: return (fflush(me->fp) == EOF) ? HT_ERROR : HT_OK;
570: }
571:
572: PRIVATE int HTCache_putBlock ARGS3(HTStream *, me, CONST char*, s, int, l)
573: {
574: int status = (fwrite(s, 1, l, me->fp) != l) ? HT_ERROR : HT_OK;
575: if (l > 1 && status == HT_OK)
576: (void) HTCache_flush(me);
577: return status;
578: }
579:
580: PRIVATE int HTCache_putChar ARGS2(HTStream *, me, char, c)
581: {
582: return HTCache_putBlock(me, &c, 1);
583: }
584:
585: PRIVATE int HTCache_putString ARGS2(HTStream *, me, CONST char*, s)
586: {
587: return HTCache_putBlock(me, s, (int) strlen(s));
588: }
589:
590: PRIVATE int HTCache_free ARGS1(HTStream *, me)
591: {
2.7 frystyk 592: me->cache->load_delay = time(NULL) - me->cache->start_time;
2.1 frystyk 593: fclose(me->fp);
594: free(me);
595: return HT_OK;
596: }
597:
598: PRIVATE int HTCache_abort ARGS2(HTStream *, me, HTError, e)
599: {
600: if (CACHE_TRACE)
601: fprintf(TDEST, "Cache....... ABORTING\n");
602: if (me->fp)
603: fclose(me->fp);
604: if (me->cache)
605: HTCache_remove(me->cache);
606: free(me);
607: return HT_ERROR;
608: }
609:
610: PRIVATE CONST HTStreamClass HTCacheClass =
611: {
612: "Cache",
613: HTCache_flush,
614: HTCache_free,
615: HTCache_abort,
616: HTCache_putChar,
617: HTCache_putString,
618: HTCache_putBlock
619: };
620:
621:
622: /* Cache Writer
623: ** ------------------
624: **
625: */
626: PUBLIC HTStream* HTCacheWriter ARGS5(
627: HTRequest *, request,
628: void *, param,
629: HTFormat, input_format,
630: HTFormat, output_format,
631: HTStream *, output_stream)
632:
633: {
634: char *fnam;
635: HTStream *me;
636: if (HTSecure) {
637: if (CACHE_TRACE)
638: fprintf(TDEST, "Cache....... No caching in secure mode.\n");
639: return HTBlackHole();
640: }
641:
642: /* Get a file name and open file */
643: if ((fnam = HTCache_getName(HTAnchor_physical(request->anchor))) == NULL)
644: return HTBlackHole();
645:
646: /* Set up the stream */
647: if ((me = (HTStream *) calloc(sizeof(*me), 1)) == NULL)
648: outofmem(__FILE__, "Cache");
649: me->isa = &HTCacheClass;
650: me->request = request;
2.5 frystyk 651: if ((me->fp = fopen(fnam, "wb")) == NULL) {
2.1 frystyk 652: if (CACHE_TRACE)
653: fprintf(TDEST, "Cache....... Can't open %s for writing\n", fnam);
654: free(fnam);
655: return HTBlackHole();
656: } else
657: if (CACHE_TRACE)
658: fprintf(TDEST, "Cache....... Creating file %s\n", fnam);
659:
660: /* Set up a cache record */
2.7 frystyk 661: if ((me->cache = (HTCache *) calloc(sizeof(*me->cache), 1)) == NULL)
2.1 frystyk 662: outofmem(__FILE__, "Cache");
663: me->cache->filename = fnam;
664: me->cache->start_time = time(NULL);
665: me->cache->format = input_format;
666:
667: /* Keep a global list of all cache items */
2.7 frystyk 668: if (!HTCacheList) HTCacheList = HTList_new();
669: HTList_addObject(HTCacheList, me->cache);
670: limit_cache(HTCacheList); /* Limit number (not size) of files */
2.1 frystyk 671: return me;
672: }
Webmaster