Annotation of libwww/Library/src/HTCache.c, revision 2.3
2.1 frystyk 1: /* HTCache.c
2: ** CACHE WRITER
3: **
4: ** (c) COPYRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
6: **
7: ** This modules manages the cache
8: **
9: ** History:
10: ** HFN: spawned from HTFwrite
11: ** HWL: converted the caching scheme to be hierachical by taking
12: ** AL code from Deamon
13: **
14: */
15:
16: /* Library include files */
17: #include "tcp.h"
18: #include "HTUtils.h"
19: #include "HTString.h"
20: #include "HTFormat.h"
21: #include "HTFWrite.h"
22: #include "HTBind.h"
23: #include "HTList.h"
24: #include "HTParse.h"
25: #include "HTCache.h" /* Implemented here */
26:
27: /*
28: ** The cache limit is the number of files which are kept. Yes, I know,
29: ** the amount of disk space would be more relevant. So this may change.
30: ** Currently it is preset to 100 but may be changed by the application by
31: ** writing into this variable.
32: */
33: #define CACHE_LIMIT 5 /* Number of files */
34:
35: #define CACHE_INFO ".cache_info"
36: #define INDEX_FILE ".cache_dirindex"
37: #define WELCOME_FILE ".cache_welcome"
38: #define TMP_SUFFIX ".cache_tmp"
39: #define LOCK_SUFFIX ".cache_lock"
40:
41: typedef struct _HTCacheItem {
42: HTFormat format; /* May have many formats per anchor */
43: char * filename;
44: time_t start_time;
45: time_t load_delay;
46: int reference_count;
47: } HTCacheItem;
48:
49: struct _HTStream {
50: CONST HTStreamClass * isa;
51: FILE * fp;
52: HTCacheItem * cache;
53: HTRequest * request;
54: };
55:
56: PRIVATE BOOL HTCacheEnable = NO;
57: PRIVATE char * HTCacheRoot = NULL; /* Destination for cache */
58: PRIVATE HTList * HTCache = NULL; /* List of cached elements */
59: PRIVATE int HTCacheLimit = CACHE_LIMIT;
60:
2.2 frystyk 61:
2.1 frystyk 62: /* ------------------------------------------------------------------------- */
2.2 frystyk 63: /* GARBAGE COLLECTOR */
2.1 frystyk 64: /* ------------------------------------------------------------------------- */
65:
66: /*
67: ** Removes cache item from disk and corresponding object from list in memory
68: */
69: PRIVATE void HTCache_remove ARGS1(HTCacheItem *, item)
70: {
71: if (HTCache && item) {
72: if (CACHE_TRACE)
73: fprintf(TDEST, "Cache....... Removing %s\n", item->filename);
74: HTList_removeObject(HTCache, item);
75: REMOVE(item->filename);
76:
77: /* HWL 22/9/94: Clean up hierachical file structure */
78: {
79: char * p;
80: while ((p = strrchr(item->filename,'/')) && (p != NULL)){
81: item->filename[p-item->filename] = 0;
82: if (strcmp(item->filename, HTCacheRoot) != 0) {
83: if (CACHE_TRACE)
84: fprintf(TDEST, "rmdir....... %s\n", item->filename);
85: RMDIR(item->filename); /* fails if directory isn't empty */
86: }
87: }
88: }
89: free(item->filename);
90: free(item);
91: }
92: }
93:
94:
95: /*
96: ** Remove a file from the cache to prevent too many files from being cached
97: */
98: PRIVATE void limit_cache ARGS1(HTList * , list)
99: {
100: HTList * cur = list;
101: HTCacheItem * item;
102: time_t best_delay = 0; /* time_t in principle can be any arith type */
103: HTCacheItem* best_item = NULL;
104:
105: if (HTList_count(list) < HTCacheLimit) return; /* Limit not reached */
106:
107: while (NULL != (item = (HTCacheItem*)HTList_nextObject(cur))) {
108: if (best_delay == 0 || item->load_delay < best_delay) {
109: best_delay = item->load_delay;
110: best_item = item;
111: }
112: }
113: if (best_item) HTCache_remove(best_item);
114: }
115:
116: /*
117: ** Check that the name we're about to generate doesn't
118: ** clash with anything used by the caching system.
119: */
120: PRIVATE BOOL reserved_name ARGS1(char *, url)
121: {
122: char * name = strrchr(url, '/');
123: char * suff = NULL;
124:
125: if (name) name++;
126: else name = url;
127:
128: if (!strcmp(name, CACHE_INFO) ||
129: !strcmp(name, INDEX_FILE) ||
130: !strcmp(name, WELCOME_FILE))
131: return YES;
132:
133: suff = strrchr(name, TMP_SUFFIX[0]);
134: if (suff && !strcmp(suff, TMP_SUFFIX))
135: return YES;
136:
137: suff = strrchr(name, LOCK_SUFFIX[0]);
138: if (suff && !strcmp(suff, LOCK_SUFFIX))
139: return YES;
140:
141: return NO;
142: }
143:
144: /*
2.2 frystyk 145: ** Removes all cache entries in memory
146: */
147: PUBLIC void HTCache_clearMem NOARGS
148: {
149: HTList *cur=HTCache;
150: HTCacheItem *pres;
151: if (cur) {
152: while ((pres = (HTCacheItem *) HTList_nextObject(cur))) {
153: FREE(pres->filename);
154: free(pres);
155: }
156: HTList_delete(HTCache);
157: HTCache = NULL;
158: }
159: }
160:
161: /*
162: ** Removes all cache entries in memory and on disk
163: */
164: PUBLIC void HTCache_deleteAll NOARGS
165: {
166: HTList *cur=HTCache;
167: HTCacheItem * pres;
168: if (cur) {
169: while ((pres = (HTCacheItem *) HTList_lastObject(cur)))
170: HTCache_remove(pres);
171: HTList_delete(HTCache);
172: HTCache = NULL;
173: }
174: }
175:
176: /* ------------------------------------------------------------------------- */
177: /* NAMING SCHEME */
178: /* ------------------------------------------------------------------------- */
179:
180: /*
2.1 frystyk 181: ** Map url to cache file name.
182: */
183: PRIVATE char * cache_file_name ARGS1(char *, url)
184: {
185: char * access = NULL;
186: char * host = NULL;
187: char * path = NULL;
188: char * cfn = NULL;
189: BOOL welcome = NO;
190: BOOL res = NO;
191:
192: if (!url || strchr(url, '?') || (res = reserved_name(url)) ||
193: !(access = HTParse(url, "", PARSE_ACCESS)) ||
194: (0 != strcmp(access, "http") &&
195: 0 != strcmp(access, "ftp") &&
196: 0 != strcmp(access, "gopher"))) {
197:
198: if (access) free(access);
199:
200: if (res && CACHE_TRACE)
201: fprintf(TDEST,
202: "Cache....... Clash with reserved name (\"%s\")\n",url);
203:
204: return NULL;
205: }
206:
207: host = HTParse(url, "", PARSE_HOST);
208: path = HTParse(url, "", PARSE_PATH | PARSE_PUNCTUATION);
209: if (path && path[strlen(path)-1] == '/')
210: welcome = YES;
211:
212: cfn = (char*)malloc(strlen(HTCacheRoot) +
213: strlen(access) +
214: (host ? strlen(host) : 0) +
215: (path ? strlen(path) : 0) +
216: (welcome ? strlen(WELCOME_FILE) : 0) + 3);
217: if (!cfn) outofmem(__FILE__, "cache_file_name");
218:
219: /* Removed extra slash - HF May2,95 */
220: sprintf(cfn, "%s%s/%s%s%s", HTCacheRoot, access, host, path,
221: (welcome ? WELCOME_FILE : ""));
222:
223: FREE(access); FREE(host); FREE(path);
224:
225: /*
226: ** This checks that the last component is not too long.
227: ** It could check all the components, but the last one
228: ** is most important because it could later blow up the
229: ** whole gc when reading cache info files.
230: ** Operating system handles other cases.
231: ** 64 = 42 + 22 and 22 = 42 - 20 :-)
232: ** In other words I just picked some number, it doesn't
233: ** really matter that much.
234: */
235: {
236: char * last = strrchr(cfn, '/');
237: if (!last) last = cfn;
238: if ((int)strlen(last) > 64) {
239: if (CACHE_TRACE)
240: fprintf(TDEST, "Too long.... cache file name \"%s\"\n", cfn);
241: free(cfn);
242: cfn = NULL;
243: }
244: }
245: return cfn;
246: }
247:
248:
249: /*
250: ** Create directory path for cache file
251: **
252: ** On exit:
253: ** return YES
254: ** if directories created -- after that caller
255: ** can rely on fopen(cfn,"w") succeeding.
256: **
257: */
258: PRIVATE BOOL create_cache_place ARGS1(char *, cfn)
259: {
260: struct stat stat_info;
261: char * cur = NULL;
262: BOOL create = NO;
263:
264: if (!cfn || (int)strlen(cfn) <= (int)strlen(HTCacheRoot) + 1)
265: return NO;
266:
267: cur = cfn + strlen(HTCacheRoot) + 1;
268:
269: while ((cur = strchr(cur, '/'))) {
270: *cur = 0;
2.3 ! frystyk 271: if (create || STAT(cfn, &stat_info) == -1) {
2.1 frystyk 272: create = YES; /* To avoid doing stat()s in vain */
273: if (CACHE_TRACE)
274: fprintf(TDEST,"Cache....... creating cache dir \"%s\"\n",cfn);
275: if (MKDIR(cfn, 0777) < 0) {
276: if (CACHE_TRACE)
277: fprintf(TDEST,"Cache....... can't create dir `%s\'\n",cfn);
278: return NO;
279: }
280: } else {
281: if (S_ISREG(stat_info.st_mode)) {
282: int len = strlen(cfn);
283: char * tmp1 = (char*)malloc(len + strlen(TMP_SUFFIX) + 1);
284: char * tmp2 = (char*)malloc(len + strlen(INDEX_FILE) + 2);
285: /* time_t t1,t2,t3,t4,t5; */
286:
287:
288: sprintf(tmp1, "%s%s", cfn, TMP_SUFFIX);
289: sprintf(tmp2, "%s/%s", cfn, INDEX_FILE);
290:
291: if (CACHE_TRACE) {
292: fprintf(TDEST,"Cache....... moving \"%s\" to \"%s\"\n",
293: cfn,tmp1);
294: fprintf(TDEST,"and......... creating dir \"%s\"\n",
295: cfn);
296: fprintf(TDEST,"and......... moving \"%s\" to \"%s\"\n",
297: tmp1,tmp2);
298: }
299: rename(cfn,tmp1);
300: (void) MKDIR(cfn, 0777);
301: rename(tmp1,tmp2);
302: free(tmp1);
303: free(tmp2);
304: }
305: else {
306: if (CACHE_TRACE)
307: fprintf(TDEST,"Cache....... dir \"%s\" already exists\n",
308: cfn);
309: }
310: }
311: *cur = '/';
312: cur++;
313: }
314: return YES;
315: }
316:
317:
318: /* Create a cache path
319: ** -------------------
320: ** Find a full path name for the cache file and create the path if it
321: ** does not already exist. Returns name or NULL
322: ** HWL 22/9/94
323: ** HWL added support for hierachical structure
324: */
325: PRIVATE char *HTCache_getName ARGS1(char *, url)
326: {
327: char *filename = cache_file_name(url);
328: if (!filename)
329: return NULL;
330: if (create_cache_place(filename))
331: return(filename);
332: return NULL;
333: }
334:
335: /*
336: ** Make a WWW name from a cache name and returns it if OK, else NULL.
337: ** The string returned must be freed by the caller.
338: ** We keep this function private as we might change the naming scheme for
339: ** cache files. Right now it follows the file hierarchi.
340: */
341: PRIVATE char *HTCache_wwwName ARGS1 (char *, name)
342: {
343: char * result = NULL;
344: if (name && *name) {
345: StrAllocCopy(result, "file:"); /* We get an absolute file name */
346: #ifdef VMS
347: /* convert directory name to Unix-style syntax */
348: char * disk = strchr (name, ':');
349: char * dir = strchr (name, '[');
350: if (disk) {
351: *disk = '\0';
352: StrAllocCat(result, "/"); /* needs delimiter */
353: StrAllocCat(result, name);
354: }
355: if (dir) {
356: char *p;
357: *dir = '/'; /* Convert leading '[' */
358: for (p = dir ; *p != ']'; ++p)
359: if (*p == '.') *p = '/';
360: *p = '\0'; /* Cut on final ']' */
361: StrAllocCat(result, dir);
362: }
363: #else /* not VMS */
364: #ifdef WIN32
365: char * p = name; /* a colon */
366: StrAllocCat(result, "/");
367: while( *p != 0 ) {
368: if (*p == '\\') /* change to one true slash */
369: *p = '/' ;
370: p++;
371: }
372: StrAllocCat(result, name);
373: #else /* not WIN32 */
374: StrAllocCat (result, name);
375: #endif /* not WIN32 */
376: #endif /* not VMS */
377: }
378: return result;
379: }
380:
2.2 frystyk 381: /* ------------------------------------------------------------------------- */
382: /* CACHE PARAMETERS */
383: /* ------------------------------------------------------------------------- */
2.1 frystyk 384:
385: /* Enable Cache
386: ** ------------
387: ** If `cache_root' is NULL then reuse old value or use HT_CACHE_ROOT.
388: ** An empty string will make '/' as cache root
389: */
390: PUBLIC BOOL HTCache_enable ARGS1(CONST char *, cache_root)
391: {
392: if (cache_root)
393: HTCache_setRoot(cache_root);
394: HTCacheEnable = YES;
395: return YES;
396: }
397:
398:
399: /* Disable Cache
400: ** ------------
401: ** Turns off the cache. Note that the cache can be disabled and enabled
402: ** at any time. The cache root is kept and can be reused during the
403: ** execution.
404: */
405: PUBLIC BOOL HTCache_disable NOARGS
406: {
407: HTCacheEnable = NO;
408: return YES;
409: }
410:
411: /* Is Cache Enabled
412: ** ----------------
413: ** Returns YES or NO. Also makes sure that we have a root value
414: ** (even though it might be invalid)
415: */
416: PUBLIC BOOL HTCache_isEnabled NOARGS
417: {
418: if (!HTSecure && HTCacheEnable) {
419: if (!HTCacheRoot)
420: HTCache_setRoot(NULL);
421: return YES;
422: }
423: return NO;
424: }
425:
426:
427: /* Set Cache Root
428: ** --------------
429: ** If `cache_root' is NULL then the current value (might be a define)
430: ** Should we check if the cache_root is actually OK? I think not!
431: */
432: PUBLIC BOOL HTCache_setRoot ARGS1(CONST char *, cache_root)
433: {
434: StrAllocCopy(HTCacheRoot, cache_root ? cache_root : HT_CACHE_ROOT);
435: if (*(HTCacheRoot+strlen(HTCacheRoot)-1) != '/')
436: StrAllocCat(HTCacheRoot, "/");
437: if (CACHE_TRACE)
438: fprintf(TDEST, "Cache Root.. Root set to `%s\'\n", HTCacheRoot);
439: return YES;
440: }
441:
442:
443: /* Get Cache Root
444: ** --------------
445: */
446: PUBLIC CONST char * HTCache_getRoot NOARGS
447: {
448: return HTCacheRoot;
449: }
450:
451: /* Free Cache Root
452: ** --------------
453: ** For clean up memory
454: */
455: PUBLIC void HTCache_freeRoot NOARGS
456: {
457: FREE(HTCacheRoot);
458: }
459:
460: /* ------------------------------------------------------------------------- */
2.2 frystyk 461: /* CACHE MANAGER */
462: /* ------------------------------------------------------------------------- */
463:
464: /*
465: ** Verifies if a cache object exists for this URL and if so returns a URL
466: ** for the cached object. It does not verify whether the object is valid or
467: ** not, for example it might have expired.
468: **
469: ** Returns: file name If OK (must be freed by caller)
470: ** NULL If no cache object found
471: */
472: PUBLIC char * HTCache_getReference ARGS1(char *, url)
473: {
474: if (url && HTCache_isEnabled()) {
475: char *fnam = cache_file_name(url);
476: if (fnam) {
477: FILE *fp = fopen(fnam, "r");
478: if (fp) {
479: char *url = HTCache_wwwName(fnam);
480: fclose(fp);
481: if (CACHE_TRACE)
482: fprintf(TDEST, "Cache....... Object found `%s\'\n", url);
483: free(fnam);
484: return url;
485: } else
486: free(fnam);
487: }
488: }
489: return NULL;
490: }
491:
492: /*
493: ** This function checks whether a document has expired or not.
494: ** The check is based on the metainformation passed in the anchor object
495: ** The function returns YES or NO.
496: */
497: PUBLIC BOOL HTCache_isValid ARGS1(HTParentAnchor *, anchor)
498: {
499: time_t cur = time(NULL);
500: time_t expires = HTAnchor_expires(anchor);
501: return (expires>0 && cur>0 && expires<cur) ? NO : YES;
502: }
503:
504: /* ------------------------------------------------------------------------- */
2.1 frystyk 505: /* CACHE WRITER STREAM */
506: /* ------------------------------------------------------------------------- */
507:
508: PRIVATE int HTCache_flush ARGS1(HTStream *, me)
509: {
510: return (fflush(me->fp) == EOF) ? HT_ERROR : HT_OK;
511: }
512:
513: PRIVATE int HTCache_putBlock ARGS3(HTStream *, me, CONST char*, s, int, l)
514: {
515: int status = (fwrite(s, 1, l, me->fp) != l) ? HT_ERROR : HT_OK;
516: if (l > 1 && status == HT_OK)
517: (void) HTCache_flush(me);
518: return status;
519: }
520:
521: PRIVATE int HTCache_putChar ARGS2(HTStream *, me, char, c)
522: {
523: return HTCache_putBlock(me, &c, 1);
524: }
525:
526: PRIVATE int HTCache_putString ARGS2(HTStream *, me, CONST char*, s)
527: {
528: return HTCache_putBlock(me, s, (int) strlen(s));
529: }
530:
531: PRIVATE int HTCache_free ARGS1(HTStream *, me)
532: {
533: me->cache->load_delay = time(NULL)-me->cache->start_time;
534: fclose(me->fp);
535: free(me);
536: return HT_OK;
537: }
538:
539: PRIVATE int HTCache_abort ARGS2(HTStream *, me, HTError, e)
540: {
541: if (CACHE_TRACE)
542: fprintf(TDEST, "Cache....... ABORTING\n");
543: if (me->fp)
544: fclose(me->fp);
545: if (me->cache)
546: HTCache_remove(me->cache);
547: free(me);
548: return HT_ERROR;
549: }
550:
551: PRIVATE CONST HTStreamClass HTCacheClass =
552: {
553: "Cache",
554: HTCache_flush,
555: HTCache_free,
556: HTCache_abort,
557: HTCache_putChar,
558: HTCache_putString,
559: HTCache_putBlock
560: };
561:
562:
563: /* Cache Writer
564: ** ------------------
565: **
566: */
567: PUBLIC HTStream* HTCacheWriter ARGS5(
568: HTRequest *, request,
569: void *, param,
570: HTFormat, input_format,
571: HTFormat, output_format,
572: HTStream *, output_stream)
573:
574: {
575: char *fnam;
576: HTStream *me;
577: if (HTSecure) {
578: if (CACHE_TRACE)
579: fprintf(TDEST, "Cache....... No caching in secure mode.\n");
580: return HTBlackHole();
581: }
582:
583: /* Get a file name and open file */
584: if ((fnam = HTCache_getName(HTAnchor_physical(request->anchor))) == NULL)
585: return HTBlackHole();
586:
587: /* Set up the stream */
588: if ((me = (HTStream *) calloc(sizeof(*me), 1)) == NULL)
589: outofmem(__FILE__, "Cache");
590: me->isa = &HTCacheClass;
591: me->request = request;
592: if ((me->fp = fopen(fnam, "w")) == NULL) {
593: if (CACHE_TRACE)
594: fprintf(TDEST, "Cache....... Can't open %s for writing\n", fnam);
595: free(fnam);
596: return HTBlackHole();
597: } else
598: if (CACHE_TRACE)
599: fprintf(TDEST, "Cache....... Creating file %s\n", fnam);
600:
601: /* Set up a cache record */
602: if ((me->cache = (HTCacheItem *) calloc(sizeof(*me->cache), 1)) == NULL)
603: outofmem(__FILE__, "Cache");
604: me->cache->filename = fnam;
605: me->cache->start_time = time(NULL);
606: me->cache->format = input_format;
607:
608: /* Keep a global list of all cache items */
609: if (!HTCache) HTCache = HTList_new();
610: HTList_addObject(HTCache, me->cache);
611: limit_cache(HTCache); /* Limit number (not size) of files */
612: return me;
613: }
Webmaster