Amaya/amaya/AHTURLTools.c - annotate

Return to AHTURLTools.c CVS log
Up to [Public] / Amaya / amaya
Annotation of Amaya/amaya/AHTURLTools.c, revision 1.3

1.3     ! cvs         1: /* 
        !             2: ** -- Copyright (c) 1996-1997 Inria/CNRS  All rights reserved. -- 
        !             3: */
        !             4: 
        !             5: #include "amaya.h"
        !             6: 
        !             7: #include "dialog.h"
        !             8: #include "content.h"
        !             9: #include "view.h"
        !            10: #include "interface.h"
        !            11: #include "message.h"
        !            12: #include "conststr.h"
        !            13: #include "AHTMemConv.h"
        !            14: #include "init.h"
        !            15: #include "AHTURLTools.h" /** defined here **/
        !            16: 
        !            17: /*+--------------------------------------------------------------------+ */
        !            18: /*| IsHTMLName                                                         | */
        !            19: /*+--------------------------------------------------------------------+ */
        !            20: 
        !            21: #ifdef __STDC__
        !            22: boolean             IsHTMLName (char *path)
        !            23: #else  /* __STDC__ */
        !            24: boolean             IsHTMLName (path)
        !            25: char               *path;
        !            26: 
        !            27: #endif /* __STDC__ */
        !            28: {
        !            29:   char                temppath[MAX_LENGTH];
        !            30:   char                suffix[MAX_LENGTH];
        !            31:   char                nsuffix[MAX_LENGTH];
        !            32:   int                 i;
        !            33: 
        !            34:   if (!path)
        !            35:     return FALSE;
        !            36: 
        !            37:   strcpy (temppath, path);
        !            38:   ExtractSuffix (temppath, suffix);
        !            39: 
        !            40:   /* Normalize the suffix */
        !            41:   i = 0;
        !            42:   while (suffix[i] != EOS)
        !            43:     nsuffix[i] = TOLOWER (suffix[i++]);
        !            44:   nsuffix[i] = EOS;
        !            45:   if ((strcmp (nsuffix, "html")) &&
        !            46:       (strcmp (nsuffix, "htm")) &&
        !            47:       (strcmp (nsuffix, "shtml")))
        !            48:     return FALSE;
        !            49:   return TRUE;
        !            50: }
        !            51: 
        !            52: /*+--------------------------------------------------------------------+ */
        !            53: /*| IsImageName                                                        | */
        !            54: /*+--------------------------------------------------------------------+ */
        !            55: 
        !            56: #ifdef __STDC__
        !            57: boolean             IsImageName (char *path)
        !            58: #else  /* __STDC__ */
        !            59: boolean             IsImageName (path)
        !            60: char               *path;
        !            61: 
        !            62: #endif /* __STDC__ */
        !            63: {
        !            64:   char                temppath[MAX_LENGTH];
        !            65:   char                suffix[MAX_LENGTH];
        !            66:   char                nsuffix[MAX_LENGTH];
        !            67:   int                 i;
        !            68: 
        !            69:   if (!path)
        !            70:     return FALSE;
        !            71: 
        !            72:   strcpy (temppath, path);
        !            73:   ExtractSuffix (temppath, suffix);
        !            74: 
        !            75:   /* Normalize the suffix */
        !            76:   i = 0;
        !            77:   while (suffix[i] != EOS)
        !            78:     nsuffix[i] = TOLOWER (suffix[i++]);
        !            79:   nsuffix[i] = EOS;
        !            80:   if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
        !            81:       (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
        !            82:       (strcmp (nsuffix, "png")) && (strcmp (nsuffix, "au")))
        !            83:     return FALSE;
        !            84:   return TRUE;
        !            85: }
        !            86: 
        !            87: /*+--------------------------------------------------------------------+ */
        !            88: /*| IsTextName                                                         | */
        !            89: /*+--------------------------------------------------------------------+ */
        !            90: 
        !            91: #ifdef __STDC__
        !            92: boolean             IsTextName (char *path)
        !            93: #else  /* __STDC__ */
        !            94: boolean             IsTextName (path)
        !            95: char               *path;
        !            96: 
        !            97: #endif /* __STDC__ */
        !            98: {
        !            99:   char                temppath[MAX_LENGTH];
        !           100:   char                suffix[MAX_LENGTH];
        !           101:   char                nsuffix[MAX_LENGTH];
        !           102:   int                 i;
        !           103: 
        !           104:   if (!path)
        !           105:     return FALSE;
        !           106: 
        !           107:   strcpy (temppath, path);
        !           108:   ExtractSuffix (temppath, suffix);
        !           109: 
        !           110:   /* Normalize the suffix */
        !           111:   i = 0;
        !           112:   while (suffix[i] != EOS)
        !           113:     {
        !           114:       nsuffix[i] = TOLOWER (suffix[i]);
        !           115:       i++;
        !           116:     }
        !           117:   nsuffix[i] = EOS;
        !           118: 
        !           119:   if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
        !           120:       (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
        !           121:       (strcmp (nsuffix, "pdf")) && (strcmp (nsuffix, "png")) &&
        !           122:       (strcmp (nsuffix, "Z")) && (strcmp (nsuffix, "gz")) &&
        !           123:       (strcmp (nsuffix, "tgz")) && (strcmp (nsuffix, "xpg")) &&
        !           124:       (strcmp (nsuffix, "xpd")) && (strcmp (nsuffix, "ps")) &&
        !           125:       (strcmp (nsuffix, "au")))
        !           126:     return TRUE;
        !           127:   return FALSE;
        !           128: }
        !           129: 
        !           130: /*+--------------------------------------------------------------------+ */
        !           131: /*| IsHTTPPath                                                         | */
        !           132: /*+--------------------------------------------------------------------+ */
        !           133: 
        !           134: #ifdef __STDC__
        !           135: boolean             IsHTTPPath (char *path)
        !           136: #else  /* __STDC__ */
        !           137: boolean             IsHTTPPath (path)
        !           138: char               *path;
        !           139: 
        !           140: #endif /* __STDC__ */
        !           141: {
        !           142:   if (!path)
        !           143:     return FALSE;
        !           144: 
        !           145:   if (strncmp (path, "http:", 5) != 0)
        !           146:     return FALSE;
        !           147:   return TRUE;
        !           148: }
        !           149: 
        !           150: /*+--------------------------------------------------------------------+ */
        !           151: /*| IsWithParameters                                                   | */
        !           152: /*+--------------------------------------------------------------------+ */
        !           153: 
        !           154: #ifdef __STDC__
        !           155: boolean             IsWithParameters (char *path)
        !           156: #else  /* __STDC__ */
        !           157: boolean             IsWithParameters (path)
        !           158: char               *path;
        !           159: 
        !           160: #endif /* __STDC__ */
        !           161: {
        !           162:   int                 i;
        !           163: 
        !           164:   if ((!path) || (path[0] == EOS))
        !           165:     return FALSE;
        !           166: 
        !           167:   i = strlen (path) - 1;
        !           168:   while (i > 0 && path[i--] != '?')
        !           169:     if (i < 0)
        !           170:       return FALSE;
        !           171: 
        !           172:   /* There is a parameter */
        !           173:   return TRUE;
        !           174: }
        !           175: 
        !           176: /*+--------------------------------------------------------------------+ */
        !           177: /*| IsW3Path                                                           | */
        !           178: /*+--------------------------------------------------------------------+ */
        !           179: 
        !           180: #ifdef __STDC__
        !           181: boolean             IsW3Path (char *path)
        !           182: #else  /* __STDC__ */
        !           183: boolean             IsW3Path (path)
        !           184: char               *path;
        !           185: 
        !           186: #endif /* __STDC__ */
        !           187: {
        !           188:   if ((strncmp (path, "http:", 5)) && (strncmp (path, "ftp:", 4)) &&
        !           189:       (strncmp (path, "telnet:", 7)) && (strncmp (path, "wais:", 5)) &&
        !           190:       (strncmp (path, "news:", 5)) && (strncmp (path, "gopher:", 7)) &&
        !           191:       (strncmp (path, "mailto:", 7)) && (strncmp (path, "archie:", 7)))
        !           192:     return FALSE;
        !           193:   return TRUE;
        !           194: }
        !           195: 
        !           196: /*+--------------------------------------------------------------------+ */
        !           197: /*| IsValidProtocol                                                    | */
        !           198: /*+--------------------------------------------------------------------+ */
        !           199: 
        !           200: #ifdef __STDC__
        !           201: boolean             IsValidProtocol (char *path)
        !           202: #else  /* __STDC__ */
        !           203: boolean             IsValidProtocol (path)
        !           204: char               *path;
        !           205: 
        !           206: #endif /* __STDC__ */
        !           207: {
        !           208:   if (!strncmp (path, "http:", 5)
        !           209:       /***|| !strncmp (path, "ftp:", 4)
        !           210:       || !strncmp (path, "news:", 5)***/)
        !           211:     return (YES);
        !           212:   else
        !           213:     return (NO);
        !           214: }
        !           215: 
        !           216: /*+--------------------------------------------------------------------+ */
        !           217: /*| IsValidNormalizeURL  says which URL's may be normalized            | */
        !           218: /*+--------------------------------------------------------------------+ */
        !           219: 
        !           220: #ifdef __STDC__
        !           221: boolean             IsValidNormalizeURL (char *path)
        !           222: #else  /* __STDC__ */
        !           223: boolean             IsValidNormalizeURL (path)
        !           224: char               *path;
        !           225: 
        !           226: #endif /* __STDC__ */
        !           227: {
        !           228:  if (strchr(path,':') && !strncmp (path, "http:", 5))
        !           229:    return (YES);
        !           230:  else
        !           231:    return (NO);
        !           232: }
        !           233: 
        !           234: 
        !           235: /*+--------------------------------------------------------------------+ */
        !           236: /*| NormalizeURL provides the new complete and normalized URL or file  | */
        !           237: /*|            name path and the name of the document.                 | */
        !           238: /*|            orgName is the original requested name.                 | */
        !           239: /*|            doc identifies the document which provides the original | */
        !           240: /*|            name.                                                   | */
        !           241: /*|            newName is the resulting URL of file name.              | */
        !           242: /*|            docName is the resulting document name.                 | */
        !           243: /*+--------------------------------------------------------------------+ */
        !           244: 
        !           245: #ifdef __STDC__
        !           246: void                NormalizeURL (char *orgName, Document doc, char *newName, char *docName)
        !           247: #else  /* __STDC__ */
        !           248: void                NormalizeURL (orgName, doc, newName, docName)
        !           249: char               *orgName;
        !           250: Document            doc;
        !           251: char               *newName;
        !           252: char               *docName;
        !           253: 
        !           254: #endif /* __STDC__ */
        !           255: {
        !           256:   char                basename[MAX_LENGTH];
        !           257:   char                tempname[MAX_LENGTH];
        !           258:   int                 i;
        !           259:   char               *ptr;
        !           260:   char               *basename_ptr;
        !           261:   int                 basename_flag;
        !           262:   Element             el;
        !           263:   ElementType         elType;
        !           264:   AttributeType       attrType;
        !           265:   Attribute           attrHREF;
        !           266:   int                 length;
        !           267: 
        !           268:   /* Fix up orgName, by erasing leading and trailing white space */
        !           269:   if (!newName || !docName)
        !           270:     return;
        !           271:   ptr = orgName;
        !           272:   while (*ptr == ' ' && *ptr++ != EOS) ;
        !           273:   strcpy (tempname, ptr);
        !           274:   ptr = strchr (tempname, ' ');
        !           275:   if (ptr)
        !           276:     *ptr = EOS;
        !           277: 
        !           278:   /* 
        !           279:   ** the following block to take into account the BASE element.
        !           280:   ** This is not very optimized, as this procedure is repeated for
        !           281:   ** each element which is retrieved. A better way would be to
        !           282:   ** move this higher up in the function call hierarchy.
        !           283:   */
        !           284:   if (IsValidNormalizeURL (tempname) && doc)
        !           285:     {
        !           286:       length = MAX_LENGTH;
        !           287:       /* get the root element    */
        !           288:       el = TtaGetMainRoot (doc);
        !           289: 
        !           290:       /* search the BASE element */
        !           291:       elType.ElSSchema = TtaGetDocumentSSchema (doc);
        !           292:       elType.ElTypeNum = HTML_EL_BASE;
        !           293:       el = TtaSearchTypedElement (elType, SearchInTree, el);
        !           294:       if (el)
        !           295:        {
        !           296:          /* 
        !           297:          ** The document has a BASE element 
        !           298:          ** Get the HREF attribute of the BASE Element 
        !           299:          */
        !           300:          attrType.AttrSSchema = elType.ElSSchema;
        !           301:          attrType.AttrTypeNum = HTML_ATTR_HREF_;
        !           302:          attrHREF = TtaGetAttribute (el, attrType);
        !           303:          if (attrHREF)
        !           304:            {
        !           305:              /* 
        !           306:              ** Use the base path of the document 
        !           307:              ** To do: verify length of the buffer
        !           308:              ** length > TtaGetTextAttributeLength (attrHREF) + strlen (orgName) 
        !           309:              */
        !           310:              TtaGiveTextAttributeValue (attrHREF, basename, &length);
        !           311: 
        !           312:              /* 
        !           313:              ** base and orgName have to be separated by a DIR_SEP 
        !           314:              */
        !           315:              if (basename[strlen (basename) - 1] != DIR_SEP && tempname[0] != DIR_SEP)
        !           316:                strcat (basename, DIR_STR);
        !           317:            }
        !           318:        }
        !           319:       else
        !           320:        basename[0] = EOS;
        !           321:     }
        !           322:   else
        !           323:     basename[0] = EOS;
        !           324: 
        !           325:   if (basename[0] == EOS)
        !           326:     {
        !           327:       /* 
        !           328:       ** There is no BASE element in that document.
        !           329:       ** A temporary fix as TtaExtractName does not tolerate a name
        !           330:       ** ending in /. Here, we reinsert the slash, in order to
        !           331:       ** parse the name in the following two lines. A bit
        !           332:       ** redundant and has to be reviewed.  
        !           333:       */
        !           334:       if (DocumentURLs[(int) doc])
        !           335:        {
        !           336:          basename_ptr = HTParse (DocumentURLs[(int) doc], "", PARSE_ALL);
        !           337:          basename_flag = TRUE;
        !           338:        }
        !           339:       else
        !           340:        {
        !           341:          basename_ptr = "";
        !           342:          basename_flag = FALSE;
        !           343:        }
        !           344:     }
        !           345:   else
        !           346:     {
        !           347:       basename_ptr = HTParse (basename, "", PARSE_ALL);
        !           348:       basename_flag = TRUE;
        !           349:     }                          /* if-else tempname */
        !           350: 
        !           351:   ptr = HTParse (tempname, basename_ptr, PARSE_ALL);
        !           352:   if (basename_flag)
        !           353:     HT_FREE (basename_ptr);
        !           354:   if (ptr)
        !           355:     {
        !           356:       ptr = HTSimplify (&ptr);
        !           357:       strcpy (newName, ptr);
        !           358:       HT_FREE (ptr);
        !           359:     }
        !           360:   else
        !           361:     newName[0] = EOS;
        !           362: 
        !           363:   i = strlen (newName) - 1;
        !           364:   if (i > 0)
        !           365:     {
        !           366:       /* 
        !           367:       ** A temporary fix for an interfacing problem:
        !           368:       ** TtaExtractName does not tolerate url's finished on DIR_SEP
        !           369:       */
        !           370:       ptr = strrchr (newName, DIR_SEP);
        !           371:       if (ptr)
        !           372:        ptr++;
        !           373:       if (ptr && *ptr != EOS)
        !           374:        strcpy (docName, ptr);
        !           375:       else                     
        !           376:        /*
        !           377:        ** The docname was not comprised inside the URL, so let's 
        !           378:        ** assign a "noname.html" name :)
        !           379:        */
        !           380:        strcpy (docName, "noname.html");
        !           381: 
        !           382:       /* 
        !           383:       ** A temporary fix for an interfacing problem:
        !           384:       ** TtaExtractName does not tolerate url's finished on DIR_SEP
        !           385:       */
        !           386:       if (newName[i] == DIR_SEP)
        !           387:        newName[i] = EOS;
        !           388:     }
        !           389: }
        !           390: 
        !           391: /*+--------------------------------------------------------------------+ */
        !           392: /*| IsSameHost                                                         | */
        !           393: /*+--------------------------------------------------------------------+ */
        !           394: 
        !           395: #ifdef __STDC__
        !           396: boolean             IsSameHost (char *url1, char *url2)
        !           397: #else  /* __STDC__ */
        !           398: boolean             IsSameHost (url1, url2)
        !           399: char               *path;
        !           400: 
        !           401: #endif /* __STDC__ */
        !           402: {
        !           403:  char *basename_ptr1, *basename_ptr2;
        !           404:  boolean result;
        !           405: 
        !           406:  basename_ptr1 = HTParse(url1, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
        !           407:  basename_ptr2 = HTParse(url2, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
        !           408: 
        !           409:  if (strcmp (basename_ptr1, basename_ptr2))
        !           410:    result = NO;
        !           411:  else
        !           412:    result = YES;
        !           413: 
        !           414:  HT_FREE(basename_ptr1);
        !           415:  HT_FREE(basename_ptr2);
        !           416: 
        !           417:  return(result);
        !           418: }
        !           419: 
        !           420: 
        !           421: /*+--------------------------------------------------------------------+ */
        !           422: /*| AHTMakeRelativeURL                                                | */
        !           423: /*+--------------------------------------------------------------------+ */
        !           424: 
        !           425: #ifdef __STDC__
        !           426: char *AHTMakeRelativeName(char *url, char *base_url)
        !           427: #else  /* __STDC__ */
        !           428: char *AHTMakeRelativeName(url, base_url)
        !           429: char url;
        !           430: char base_url;
        !           431: #endif /* __STDC__ */
        !           432: {
        !           433:  char *base_ptr, *url_ptr;
        !           434:  char *result;
        !           435: 
        !           436:  /* verify if we are in the same host */
        !           437: 
        !           438:  base_ptr = HTParse(base_url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
        !           439:  url_ptr = HTParse(url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
        !           440:  
        !           441:  if (!strcmp (base_ptr, url_ptr)) {
        !           442:    HT_FREE(base_ptr);
        !           443:    HT_FREE(url_ptr);
        !           444: 
        !           445:    /* Normalize the URLs */
        !           446: 
        !           447:    base_ptr = HTParse(base_url, "", PARSE_ALL);
        !           448:    url_ptr = HTParse(url, "", PARSE_ALL);
        !           449: 
        !           450:    /* Use libwww to make relative name */
        !           451: 
        !           452:    result = HTRelative(url_ptr, base_ptr);
        !           453:    HT_FREE(base_ptr);
        !           454:    HT_FREE(url_ptr);
        !           455:  }
        !           456:  else
        !           457:    result = (char *) NULL;
        !           458: 
        !           459:  return(result);
        !           460: }
        !           461: 
        !           462: 
        !           463: 
        !           464: 
        !           465: 
        !           466: 
        !           467:
Webmaster