Amaya/amaya/AHTURLTools.c - annotate

Return to AHTURLTools.c CVS log
Up to [Public] / Amaya / amaya
Annotation of Amaya/amaya/AHTURLTools.c, revision 1.1.1.1

1.1       cvs         1: /* 
                      2: ** -- Copyright (c) 1996-1997 Inria/CNRS  All rights reserved. -- 
                      3: */
                      4: 
                      5: #include "amaya.h"
                      6: 
                      7: #include "dialog.h"
                      8: #include "content.h"
                      9: #include "view.h"
                     10: #include "interface.h"
                     11: #include "message.h"
                     12: #include "conststr.h"
                     13: #include "AHTMemConv.h"
                     14: #include "init.h"
                     15: #include "AHTURLTools.h" /** defined here **/
                     16: 
                     17: /*+--------------------------------------------------------------------+ */
                     18: /*| IsHTMLName                                                         | */
                     19: /*+--------------------------------------------------------------------+ */
                     20: 
                     21: #ifdef __STDC__
                     22: boolean             IsHTMLName (char *path)
                     23: #else  /* __STDC__ */
                     24: boolean             IsHTMLName (path)
                     25: char               *path;
                     26: 
                     27: #endif /* __STDC__ */
                     28: {
                     29:   char                temppath[MAX_LENGTH];
                     30:   char                suffix[MAX_LENGTH];
                     31:   char                nsuffix[MAX_LENGTH];
                     32:   int                 i;
                     33: 
                     34:   if (!path)
                     35:     return FALSE;
                     36: 
                     37:   strcpy (temppath, path);
                     38:   ExtractSuffix (temppath, suffix);
                     39: 
                     40:   /* Normalize the suffix */
                     41:   i = 0;
                     42:   while (suffix[i] != EOS)
                     43:     nsuffix[i] = TOLOWER (suffix[i++]);
                     44:   nsuffix[i] = EOS;
                     45:   if ((strcmp (nsuffix, "html")) &&
                     46:       (strcmp (nsuffix, "htm")) &&
                     47:       (strcmp (nsuffix, "shtml")))
                     48:     return FALSE;
                     49:   return TRUE;
                     50: }
                     51: 
                     52: /*+--------------------------------------------------------------------+ */
                     53: /*| IsImageName                                                        | */
                     54: /*+--------------------------------------------------------------------+ */
                     55: 
                     56: #ifdef __STDC__
                     57: boolean             IsImageName (char *path)
                     58: #else  /* __STDC__ */
                     59: boolean             IsImageName (path)
                     60: char               *path;
                     61: 
                     62: #endif /* __STDC__ */
                     63: {
                     64:   char                temppath[MAX_LENGTH];
                     65:   char                suffix[MAX_LENGTH];
                     66:   char                nsuffix[MAX_LENGTH];
                     67:   int                 i;
                     68: 
                     69:   if (!path)
                     70:     return FALSE;
                     71: 
                     72:   strcpy (temppath, path);
                     73:   ExtractSuffix (temppath, suffix);
                     74: 
                     75:   /* Normalize the suffix */
                     76:   i = 0;
                     77:   while (suffix[i] != EOS)
                     78:     nsuffix[i] = TOLOWER (suffix[i++]);
                     79:   nsuffix[i] = EOS;
                     80:   if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
                     81:       (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
                     82:       (strcmp (nsuffix, "png")) && (strcmp (nsuffix, "au")))
                     83:     return FALSE;
                     84:   return TRUE;
                     85: }
                     86: 
                     87: /*+--------------------------------------------------------------------+ */
                     88: /*| IsTextName                                                         | */
                     89: /*+--------------------------------------------------------------------+ */
                     90: 
                     91: #ifdef __STDC__
                     92: boolean             IsTextName (char *path)
                     93: #else  /* __STDC__ */
                     94: boolean             IsTextName (path)
                     95: char               *path;
                     96: 
                     97: #endif /* __STDC__ */
                     98: {
                     99:   char                temppath[MAX_LENGTH];
                    100:   char                suffix[MAX_LENGTH];
                    101:   char                nsuffix[MAX_LENGTH];
                    102:   int                 i;
                    103: 
                    104:   if (!path)
                    105:     return FALSE;
                    106: 
                    107:   strcpy (temppath, path);
                    108:   ExtractSuffix (temppath, suffix);
                    109: 
                    110:   /* Normalize the suffix */
                    111:   i = 0;
                    112:   while (suffix[i] != EOS)
                    113:     {
                    114:       nsuffix[i] = TOLOWER (suffix[i]);
                    115:       i++;
                    116:     }
                    117:   nsuffix[i] = EOS;
                    118: 
                    119:   if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
                    120:       (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
                    121:       (strcmp (nsuffix, "pdf")) && (strcmp (nsuffix, "png")) &&
                    122:       (strcmp (nsuffix, "Z")) && (strcmp (nsuffix, "gz")) &&
                    123:       (strcmp (nsuffix, "tgz")) && (strcmp (nsuffix, "xpg")) &&
                    124:       (strcmp (nsuffix, "xpd")) && (strcmp (nsuffix, "ps")) &&
                    125:       (strcmp (nsuffix, "au")))
                    126:     return TRUE;
                    127:   return FALSE;
                    128: }
                    129: 
                    130: /*+--------------------------------------------------------------------+ */
                    131: /*| IsHTTPPath                                                         | */
                    132: /*+--------------------------------------------------------------------+ */
                    133: 
                    134: #ifdef __STDC__
                    135: boolean             IsHTTPPath (char *path)
                    136: #else  /* __STDC__ */
                    137: boolean             IsHTTPPath (path)
                    138: char               *path;
                    139: 
                    140: #endif /* __STDC__ */
                    141: {
                    142:   if (!path)
                    143:     return FALSE;
                    144: 
                    145:   if (strncmp (path, "http:", 5) != 0)
                    146:     return FALSE;
                    147:   return TRUE;
                    148: }
                    149: 
                    150: /*+--------------------------------------------------------------------+ */
                    151: /*| IsWithParameters                                                   | */
                    152: /*+--------------------------------------------------------------------+ */
                    153: 
                    154: #ifdef __STDC__
                    155: boolean             IsWithParameters (char *path)
                    156: #else  /* __STDC__ */
                    157: boolean             IsWithParameters (path)
                    158: char               *path;
                    159: 
                    160: #endif /* __STDC__ */
                    161: {
                    162:   int                 i;
                    163: 
                    164:   if ((!path) || (path[0] == EOS))
                    165:     return FALSE;
                    166: 
                    167:   i = strlen (path) - 1;
                    168:   while (i > 0 && path[i--] != '?')
                    169:     if (i < 0)
                    170:       return FALSE;
                    171: 
                    172:   /* There is a parameter */
                    173:   return TRUE;
                    174: }
                    175: 
                    176: /*+--------------------------------------------------------------------+ */
                    177: /*| IsW3Path                                                           | */
                    178: /*+--------------------------------------------------------------------+ */
                    179: 
                    180: #ifdef __STDC__
                    181: boolean             IsW3Path (char *path)
                    182: #else  /* __STDC__ */
                    183: boolean             IsW3Path (path)
                    184: char               *path;
                    185: 
                    186: #endif /* __STDC__ */
                    187: {
                    188:   if ((strncmp (path, "http:", 5)) && (strncmp (path, "ftp:", 4)) &&
                    189:       (strncmp (path, "telnet:", 7)) && (strncmp (path, "wais:", 5)) &&
                    190:       (strncmp (path, "news:", 5)) && (strncmp (path, "gopher:", 7)) &&
                    191:       (strncmp (path, "mailto:", 7)) && (strncmp (path, "archie:", 7)))
                    192:     return FALSE;
                    193:   return TRUE;
                    194: }
                    195: 
                    196: /*+--------------------------------------------------------------------+ */
                    197: /*| IsValidProtocol                                                    | */
                    198: /*+--------------------------------------------------------------------+ */
                    199: 
                    200: #ifdef __STDC__
                    201: boolean             IsValidProtocol (char *path)
                    202: #else  /* __STDC__ */
                    203: boolean             IsValidProtocol (path)
                    204: char               *path;
                    205: 
                    206: #endif /* __STDC__ */
                    207: {
                    208:   if (!strncmp (path, "http:", 5)
                    209:       /***|| !strncmp (path, "ftp:", 4)
                    210:       || !strncmp (path, "news:", 5)***/)
                    211:     return (YES);
                    212:   else
                    213:     return (NO);
                    214: }
                    215: 
                    216: /*+--------------------------------------------------------------------+ */
                    217: /*| IsValidNormalizeURL  says which URL's may be normalized            | */
                    218: /*+--------------------------------------------------------------------+ */
                    219: 
                    220: #ifdef __STDC__
                    221: boolean             IsValidNormalizeURL (char *path)
                    222: #else  /* __STDC__ */
                    223: boolean             IsValidNormalizeURL (path)
                    224: char               *path;
                    225: 
                    226: #endif /* __STDC__ */
                    227: {
                    228:  if (strchr(path,':') && !strncmp (path, "http:", 5))
                    229:    return (YES);
                    230:  else
                    231:    return (NO);
                    232: }
                    233: 
                    234: 
                    235: /*+--------------------------------------------------------------------+ */
                    236: /*| NormalizeURL provides the new complete and normalized URL or file  | */
                    237: /*|            name path and the name of the document.                 | */
                    238: /*|            orgName is the original requested name.                 | */
                    239: /*|            doc identifies the document which provides the original | */
                    240: /*|            name.                                                   | */
                    241: /*|            newName is the resulting URL of file name.              | */
                    242: /*|            docName is the resulting document name.                 | */
                    243: /*+--------------------------------------------------------------------+ */
                    244: 
                    245: #ifdef __STDC__
                    246: void                NormalizeURL (char *orgName, Document doc, char *newName, char *docName)
                    247: #else  /* __STDC__ */
                    248: void                NormalizeURL (orgName, doc, newName, docName)
                    249: char               *orgName;
                    250: Document            doc;
                    251: char               *newName;
                    252: char               *docName;
                    253: 
                    254: #endif /* __STDC__ */
                    255: {
                    256:   char                basename[MAX_LENGTH];
                    257:   char                tempname[MAX_LENGTH];
                    258:   int                 i;
                    259:   char               *ptr;
                    260:   char               *basename_ptr;
                    261:   int                 basename_flag;
                    262:   Element             el;
                    263:   ElementType         elType;
                    264:   AttributeType       attrType;
                    265:   Attribute           attrHREF;
                    266:   int                 length;
                    267: 
                    268:   /* Fix up orgName, by erasing leading and trailing white space */
                    269:   if (!newName || !docName)
                    270:     return;
                    271:   ptr = orgName;
                    272:   while (*ptr == ' ' && *ptr++ != EOS) ;
                    273:   strcpy (tempname, ptr);
                    274:   ptr = strchr (tempname, ' ');
                    275:   if (ptr)
                    276:     *ptr = EOS;
                    277: 
                    278:   /* 
                    279:   ** the following block to take into account the BASE element.
                    280:   ** This is not very optimized, as this procedure is repeated for
                    281:   ** each element which is retrieved. A better way would be to
                    282:   ** move this higher up in the function call hierarchy.
                    283:   */
                    284:   if (IsValidNormalizeURL (tempname) && doc)
                    285:     {
                    286:       length = MAX_LENGTH;
                    287:       /* get the root element    */
                    288:       el = TtaGetMainRoot (doc);
                    289: 
                    290:       /* search the BASE element */
                    291:       elType.ElSSchema = TtaGetDocumentSSchema (doc);
                    292:       elType.ElTypeNum = HTML_EL_BASE;
                    293:       el = TtaSearchTypedElement (elType, SearchInTree, el);
                    294:       if (el)
                    295:        {
                    296:          /* 
                    297:          ** The document has a BASE element 
                    298:          ** Get the HREF attribute of the BASE Element 
                    299:          */
                    300:          attrType.AttrSSchema = elType.ElSSchema;
                    301:          attrType.AttrTypeNum = HTML_ATTR_HREF_;
                    302:          attrHREF = TtaGetAttribute (el, attrType);
                    303:          if (attrHREF)
                    304:            {
                    305:              /* 
                    306:              ** Use the base path of the document 
                    307:              ** To do: verify length of the buffer
                    308:              ** length > TtaGetTextAttributeLength (attrHREF) + strlen (orgName) 
                    309:              */
                    310:              TtaGiveTextAttributeValue (attrHREF, basename, &length);
                    311: 
                    312:              /* 
                    313:              ** base and orgName have to be separated by a DIR_SEP 
                    314:              */
                    315:              if (basename[strlen (basename) - 1] != DIR_SEP && tempname[0] != DIR_SEP)
                    316:                strcat (basename, DIR_STR);
                    317:            }
                    318:        }
                    319:       else
                    320:        basename[0] = EOS;
                    321:     }
                    322:   else
                    323:     basename[0] = EOS;
                    324: 
                    325:   if (basename[0] == EOS)
                    326:     {
                    327:       /* 
                    328:       ** There is no BASE element in that document.
                    329:       ** A temporary fix as TtaExtractName does not tolerate a name
                    330:       ** ending in /. Here, we reinsert the slash, in order to
                    331:       ** parse the name in the following two lines. A bit
                    332:       ** redundant and has to be reviewed.  
                    333:       */
                    334:       if (DocumentURLs[(int) doc])
                    335:        {
                    336:          basename_ptr = HTParse (DocumentURLs[(int) doc], "", PARSE_ALL);
                    337:          basename_flag = TRUE;
                    338:        }
                    339:       else
                    340:        {
                    341:          basename_ptr = "";
                    342:          basename_flag = FALSE;
                    343:        }
                    344:     }
                    345:   else
                    346:     {
                    347:       basename_ptr = HTParse (basename, "", PARSE_ALL);
                    348:       basename_flag = TRUE;
                    349:     }                          /* if-else tempname */
                    350: 
                    351:   ptr = HTParse (tempname, basename_ptr, PARSE_ALL);
                    352:   if (basename_flag)
                    353:     HT_FREE (basename_ptr);
                    354:   if (ptr)
                    355:     {
                    356:       ptr = HTSimplify (&ptr);
                    357:       strcpy (newName, ptr);
                    358:       HT_FREE (ptr);
                    359:     }
                    360:   else
                    361:     newName[0] = EOS;
                    362: 
                    363:   i = strlen (newName) - 1;
                    364:   if (i > 0)
                    365:     {
                    366:       /* 
                    367:       ** A temporary fix for an interfacing problem:
                    368:       ** TtaExtractName does not tolerate url's finished on DIR_SEP
                    369:       */
                    370:       ptr = strrchr (newName, DIR_SEP);
                    371:       if (ptr)
                    372:        ptr++;
                    373:       if (ptr && *ptr != EOS)
                    374:        strcpy (docName, ptr);
                    375:       else                     
                    376:        /*
                    377:        ** The docname was not comprised inside the URL, so let's 
                    378:        ** assign a "noname.html" name :)
                    379:        */
                    380:        strcpy (docName, "noname.html");
                    381: 
                    382:       /* 
                    383:       ** A temporary fix for an interfacing problem:
                    384:       ** TtaExtractName does not tolerate url's finished on DIR_SEP
                    385:       */
                    386:       if (newName[i] == DIR_SEP)
                    387:        newName[i] = EOS;
                    388:     }
                    389: }
                    390: 
                    391: /*+--------------------------------------------------------------------+ */
                    392: /*| IsSameHost                                                         | */
                    393: /*+--------------------------------------------------------------------+ */
                    394: 
                    395: #ifdef __STDC__
                    396: boolean             IsSameHost (char *url1, char *url2)
                    397: #else  /* __STDC__ */
                    398: boolean             IsSameHost (url1, url2)
                    399: char               *path;
                    400: 
                    401: #endif /* __STDC__ */
                    402: {
                    403:  char *basename_ptr1, *basename_ptr2;
                    404:  boolean result;
                    405: 
                    406:  basename_ptr1 = HTParse(url1, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
                    407:  basename_ptr2 = HTParse(url2, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
                    408: 
                    409:  if (strcmp (basename_ptr1, basename_ptr2))
                    410:    result = NO;
                    411:  else
                    412:    result = YES;
                    413: 
                    414:  HT_FREE(basename_ptr1);
                    415:  HT_FREE(basename_ptr2);
                    416: 
                    417:  return(result);
                    418: }
                    419: 
                    420: 
                    421: /*+--------------------------------------------------------------------+ */
                    422: /*| AHTMakeRelativeURL                                                | */
                    423: /*+--------------------------------------------------------------------+ */
                    424: 
                    425: #ifdef __STDC__
                    426: char *AHTMakeRelativeName(char *url, char *base_url)
                    427: #else  /* __STDC__ */
                    428: char *AHTMakeRelativeName(url, base_url)
                    429: char url;
                    430: char base_url;
                    431: #endif /* __STDC__ */
                    432: {
                    433:  char *base_ptr, *url_ptr;
                    434:  char *result;
                    435: 
                    436:  /* verify if we are in the same host */
                    437: 
                    438:  base_ptr = HTParse(base_url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
                    439:  url_ptr = HTParse(url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
                    440:  
                    441:  if (!strcmp (base_ptr, url_ptr)) {
                    442:    HT_FREE(base_ptr);
                    443:    HT_FREE(url_ptr);
                    444: 
                    445:    /* Normalize the URLs */
                    446: 
                    447:    base_ptr = HTParse(base_url, "", PARSE_ALL);
                    448:    url_ptr = HTParse(url, "", PARSE_ALL);
                    449: 
                    450:    /* Use libwww to make relative name */
                    451: 
                    452:    result = HTRelative(url_ptr, base_ptr);
                    453:    HT_FREE(base_ptr);
                    454:    HT_FREE(url_ptr);
                    455:  }
                    456:  else
                    457:    result = (char *) NULL;
                    458: 
                    459:  return(result);
                    460: }
                    461: 
                    462: 
                    463: 
                    464: 
                    465: 
                    466: 
                    467:
Webmaster