Amaya/amaya/AHTURLTools.c - annotate

Return to AHTURLTools.c CVS log
Up to [Public] / Amaya / amaya
Annotation of Amaya/amaya/AHTURLTools.c, revision 1.29

1.7       cvs         1: /*
                      2:  *
                      3:  *  (c) COPYRIGHT MIT and INRIA, 1996.
                      4:  *  Please first read the full copyright statement in file COPYRIGHT.
                      5:  *
                      6:  */
1.9       cvs         7: 
1.10      cvs         8: /*
                      9:  * AHTURLTools.c: contains all the functions for testing, manipulating,
1.25      cvs        10:  * and normalizing URLs. It also contains a local copy of the libWWW
                     11:  * URL parsing functions.
1.10      cvs        12:  *
                     13:  * Authors: J. Kahan, I. Vatton
                     14:  *
                     15:  */
1.7       cvs        16:  
1.15      cvs        17: #define THOT_EXPORT extern
1.3       cvs        18: #include "amaya.h"
                     19: 
1.8       cvs        20: #include "init_f.h"
                     21: #include "AHTURLTools_f.h"
                     22: 
1.24      cvs        23: #define MAX_PRINT_URL_LENGTH 50
1.29    ! cvs        24: typedef struct _HTURI {
        !            25:     char * access;             /* Now known as "scheme" */
        !            26:     char * host;
        !            27:     char * absolute;
        !            28:     char * relative;
        !            29:     char * fragment;
        !            30: } HTURI;
1.24      cvs        31: 
1.28      cvs        32: 
                     33: /*----------------------------------------------------------------------
                     34:   ConvertToLowerCase
                     35:   Converts a string to lowercase.
                     36:   ----------------------------------------------------------------------*/
1.22      cvs        37: #ifdef __STDC__
1.28      cvs        38: static void         ConvertToLowerCase (char *string)
                     39: #else  /* __STDC__ */
                     40: static void         ConvertToLowerCase (string)
                     41: char                *string;
                     42: 
                     43: #endif /* __STDC__ */
                     44: {
                     45:  int i;
                     46: 
                     47:  if (!string)
                     48:    return;
                     49: 
                     50:  for (i = 0; string[i] != EOS; i++)
                     51:    string[i] = tolower (string[i]);
                     52: }
1.22      cvs        53: 
1.8       cvs        54: /*----------------------------------------------------------------------
1.11      cvs        55:   ExplodeURL 
1.8       cvs        56:   ----------------------------------------------------------------------*/
                     57: #ifdef __STDC__
                     58: void                ExplodeURL (char *url, char **proto, char **host, char **dir, char **file)
                     59: #else
                     60: void                ExplodeURL (url, proto, host, dir, file)
                     61: char               *url;
                     62: char              **proto;
                     63: char              **host;
                     64: char              **dir;
                     65: char              **file;
                     66: 
                     67: #endif
                     68: {
1.9       cvs        69:    char               *curr, *temp;
1.8       cvs        70: 
                     71:    if ((url == NULL) || (proto == NULL) || (host == NULL) ||
                     72:        (dir == NULL) || (file == NULL))
                     73:       return;
                     74: 
                     75:    /* initialize every pointer */
                     76:    *proto = *host = *dir = *file = NULL;
                     77: 
                     78:    /* skip any leading space */
                     79:    while ((*url == SPACE) || (*url == TAB))
                     80:       url++;
1.9       cvs        81:    curr = url;
                     82:    if (*curr == 0)
1.8       cvs        83:       goto finished;
                     84: 
                     85:    /* go to the end of the URL */
1.9       cvs        86:    while ((*curr != 0) && (*curr != SPACE) && (*curr != '\b') &&
                     87:          (*curr != '\r') && (*curr != EOL))
                     88:       curr++;
1.8       cvs        89: 
                     90:    /* mark the end of the chain */
1.9       cvs        91:    *curr = EOS;
                     92:    curr--;
                     93:    if (curr <= url)
1.8       cvs        94:       goto finished;
                     95: 
                     96:    /* search the next DIR_SEP indicating the beginning of the file name */
                     97:    do
1.11      cvs        98:      curr--;
1.9       cvs        99:    while ((curr >= url) && (*curr != DIR_SEP));
1.11      cvs       100: 
1.9       cvs       101:    if (curr < url)
1.8       cvs       102:       goto finished;
1.9       cvs       103:    *file = curr + 1;
1.8       cvs       104: 
                    105:    /* mark the end of the dir */
1.9       cvs       106:    *curr = EOS;
                    107:    curr--;
                    108:    if (curr < url)
1.8       cvs       109:       goto finished;
                    110: 
1.29    ! cvs       111:    /* search for the DIR_STR indicating the host name start */
1.9       cvs       112:    while ((curr > url) && ((*curr != DIR_SEP) || (*(curr + 1) != DIR_SEP)))
                    113:       curr--;
1.8       cvs       114: 
                    115:    /* if we found it, separate the host name from the directory */
1.9       cvs       116:    if ((*curr == DIR_SEP) && (*(curr + 1) == DIR_SEP))
1.8       cvs       117:      {
1.9       cvs       118:        *host = temp = curr + 2;
1.8       cvs       119:        while ((*temp != 0) && (*temp != DIR_SEP))
                    120:           temp++;
                    121:        if (*temp == DIR_SEP)
                    122:          {
                    123:             *temp = EOS;
                    124:             *dir = temp + 1;
                    125:          }
                    126:      }
                    127:    else
1.11      cvs       128:      *dir = curr;
                    129: 
1.9       cvs       130:    if (curr <= url)
1.8       cvs       131:       goto finished;
                    132: 
                    133:    /* mark the end of the proto */
1.9       cvs       134:    *curr = EOS;
                    135:    curr--;
                    136:    if (curr < url)
1.8       cvs       137:       goto finished;
                    138: 
1.29    ! cvs       139:    if (*curr == PATH_SEP)
1.8       cvs       140:      {
1.9       cvs       141:        *curr = EOS;
                    142:        curr--;
1.8       cvs       143:      }
                    144:    else
                    145:       goto finished;
1.11      cvs       146: 
1.9       cvs       147:    if (curr < url)
1.8       cvs       148:       goto finished;
1.9       cvs       149:    while ((curr > url) && (isalpha (*curr)))
                    150:       curr--;
                    151:    *proto = curr;
1.8       cvs       152: 
                    153:  finished:;
                    154: 
                    155: #ifdef AMAYA_DEBUG
                    156:    fprintf (stderr, "ExplodeURL(%s)\n\t", url);
                    157:    if (*proto)
                    158:       fprintf (stderr, "proto : %s, ", *proto);
                    159:    if (*host)
                    160:       fprintf (stderr, "host : %s, ", *host);
                    161:    if (*dir)
                    162:       fprintf (stderr, "dir : %s, ", *dir);
                    163:    if (*file)
                    164:       fprintf (stderr, "file : %s ", *file);
                    165:    fprintf (stderr, "\n");
                    166: #endif
                    167: 
                    168: }
1.3       cvs       169: 
1.4       cvs       170: /*----------------------------------------------------------------------
1.9       cvs       171:   IsHTMLName                                                         
                    172:   returns TRUE if path points to an HTML resource.
1.4       cvs       173:   ----------------------------------------------------------------------*/
1.3       cvs       174: #ifdef __STDC__
                    175: boolean             IsHTMLName (char *path)
                    176: #else  /* __STDC__ */
                    177: boolean             IsHTMLName (path)
                    178: char               *path;
                    179: #endif /* __STDC__ */
                    180: {
1.5       cvs       181:    char                temppath[MAX_LENGTH];
                    182:    char                suffix[MAX_LENGTH];
                    183:    char                nsuffix[MAX_LENGTH];
                    184:    int                 i;
                    185: 
                    186:    if (!path)
1.13      cvs       187:      return (FALSE);
1.5       cvs       188: 
                    189:    strcpy (temppath, path);
                    190:    ExtractSuffix (temppath, suffix);
                    191: 
                    192:    /* Normalize the suffix */
                    193:    i = 0;
                    194:    while (suffix[i] != EOS)
1.13      cvs       195:      {
1.25      cvs       196:        nsuffix[i] = tolower (suffix[i]);
1.13      cvs       197:        i++;
                    198:      }
1.5       cvs       199:    nsuffix[i] = EOS;
                    200:    if ((strcmp (nsuffix, "html")) &&
                    201:        (strcmp (nsuffix, "htm")) &&
                    202:        (strcmp (nsuffix, "shtml")))
1.13      cvs       203:      return (FALSE);
1.22      cvs       204:    else if (!strcmp (nsuffix, "gz"))
1.13      cvs       205:      {
                    206:        /* take in account compressed files */
                    207:        ExtractSuffix (temppath, suffix);       
                    208:        /* Normalize the suffix */
                    209:        i = 0;
                    210:        while (suffix[i] != EOS)
                    211:         {
1.25      cvs       212:           nsuffix[i] = tolower (suffix[i]);
1.13      cvs       213:           i++;
                    214:         }
                    215:        nsuffix[i] = EOS;
                    216:        if ((strcmp (nsuffix, "html")) &&
                    217:           (strcmp (nsuffix, "htm")) &&
                    218:           (strcmp (nsuffix, "shtml")))
                    219:         return (FALSE);
                    220:        else
                    221:         return (TRUE);
                    222:      }
                    223:    else
                    224:      return (TRUE);
1.3       cvs       225: }
                    226: 
1.4       cvs       227: /*----------------------------------------------------------------------
1.9       cvs       228:   IsImageName                                
                    229:   returns TRUE if path points to an image resource.
1.4       cvs       230:   ----------------------------------------------------------------------*/
1.3       cvs       231: #ifdef __STDC__
                    232: boolean             IsImageName (char *path)
                    233: #else  /* __STDC__ */
                    234: boolean             IsImageName (path)
                    235: char               *path;
                    236: #endif /* __STDC__ */
                    237: {
1.5       cvs       238:    char                temppath[MAX_LENGTH];
                    239:    char                suffix[MAX_LENGTH];
                    240:    char                nsuffix[MAX_LENGTH];
                    241:    int                 i;
                    242: 
                    243:    if (!path)
1.13      cvs       244:       return (FALSE);
1.5       cvs       245: 
                    246:    strcpy (temppath, path);
                    247:    ExtractSuffix (temppath, suffix);
                    248: 
                    249:    /* Normalize the suffix */
                    250:    i = 0;
                    251:    while (suffix[i] != EOS)
1.13      cvs       252:      {
1.25      cvs       253:        nsuffix[i] = tolower (suffix[i]);
1.13      cvs       254:        i++;
                    255:      }
1.5       cvs       256:    nsuffix[i] = EOS;
                    257:    if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
                    258:        (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
                    259:        (strcmp (nsuffix, "png")) && (strcmp (nsuffix, "au")))
1.13      cvs       260:       return (FALSE);
                    261:    return (TRUE);
1.3       cvs       262: }
                    263: 
1.4       cvs       264: /*----------------------------------------------------------------------
1.9       cvs       265:   IsTextName                                                         
1.4       cvs       266:   ----------------------------------------------------------------------*/
1.3       cvs       267: #ifdef __STDC__
                    268: boolean             IsTextName (char *path)
                    269: #else  /* __STDC__ */
                    270: boolean             IsTextName (path)
                    271: char               *path;
                    272: 
                    273: #endif /* __STDC__ */
                    274: {
1.5       cvs       275:    char                temppath[MAX_LENGTH];
                    276:    char                suffix[MAX_LENGTH];
                    277:    char                nsuffix[MAX_LENGTH];
                    278:    int                 i;
                    279: 
                    280:    if (!path)
1.13      cvs       281:      return (FALSE);
1.5       cvs       282: 
                    283:    strcpy (temppath, path);
                    284:    ExtractSuffix (temppath, suffix);
                    285: 
                    286:    /* Normalize the suffix */
                    287:    i = 0;
                    288:    while (suffix[i] != EOS)
                    289:      {
1.25      cvs       290:        nsuffix[i] = tolower (suffix[i]);
1.5       cvs       291:        i++;
                    292:      }
                    293:    nsuffix[i] = EOS;
                    294: 
                    295:    if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
                    296:        (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
                    297:        (strcmp (nsuffix, "pdf")) && (strcmp (nsuffix, "png")) &&
1.22      cvs       298:        (strcmp (nsuffix, "tgz")) && (strcmp (nsuffix, "tar")) &&
                    299:        (strcmp (nsuffix, "xpg")) && (strcmp (nsuffix, "xpd")) &&
                    300:        (strcmp (nsuffix, "ps"))  && (strcmp (nsuffix, "au")))
1.13      cvs       301:       return (TRUE);
1.22      cvs       302:    else if (!strcmp (nsuffix, "gz"))
1.13      cvs       303:      {
                    304:        /* take in account compressed files */
                    305:        ExtractSuffix (temppath, suffix);       
                    306:        /* Normalize the suffix */
                    307:        i = 0;
                    308:        while (suffix[i] != EOS)
                    309:         {
1.25      cvs       310:           nsuffix[i] = tolower (suffix[i]);
1.13      cvs       311:           i++;
                    312:         }
                    313:        nsuffix[i] = EOS;
                    314:        if ((!strcmp (nsuffix, "html")) ||
                    315:           (!strcmp (nsuffix, "htm")) ||
                    316:           (!strcmp (nsuffix, "shtml")))
                    317:         return (TRUE);
                    318:        else
                    319:         return (FALSE);
                    320:      }
                    321:    else
                    322:      return (FALSE);
1.3       cvs       323: }
                    324: 
1.4       cvs       325: /*----------------------------------------------------------------------
1.9       cvs       326:   IsHTTPPath                                     
                    327:   returns TRUE if path is in fact an http URL.
1.4       cvs       328:   ----------------------------------------------------------------------*/
1.3       cvs       329: #ifdef __STDC__
                    330: boolean             IsHTTPPath (char *path)
                    331: #else  /* __STDC__ */
                    332: boolean             IsHTTPPath (path)
                    333: char               *path;
                    334: #endif /* __STDC__ */
                    335: {
1.5       cvs       336:    if (!path)
                    337:       return FALSE;
1.3       cvs       338: 
1.5       cvs       339:    if (strncmp (path, "http:", 5) != 0)
                    340:       return FALSE;
                    341:    return TRUE;
1.3       cvs       342: }
                    343: 
1.4       cvs       344: /*----------------------------------------------------------------------
1.9       cvs       345:   IsWithParameters                           
                    346:   returns TRUE if url has a concatenated query string.
1.4       cvs       347:   ----------------------------------------------------------------------*/
1.3       cvs       348: #ifdef __STDC__
1.9       cvs       349: boolean             IsWithParameters (char *url)
1.3       cvs       350: #else  /* __STDC__ */
1.9       cvs       351: boolean             IsWithParameters (url)
                    352: char               *url;
1.3       cvs       353: #endif /* __STDC__ */
                    354: {
1.5       cvs       355:    int                 i;
1.3       cvs       356: 
1.9       cvs       357:    if ((!url) || (url[0] == EOS))
1.5       cvs       358:       return FALSE;
1.3       cvs       359: 
1.9       cvs       360:    i = strlen (url) - 1;
                    361:    while (i > 0 && url[i--] != '?')
1.5       cvs       362:       if (i < 0)
                    363:         return FALSE;
1.3       cvs       364: 
1.5       cvs       365:    /* There is a parameter */
                    366:    return TRUE;
1.3       cvs       367: }
                    368: 
1.4       cvs       369: /*----------------------------------------------------------------------
1.9       cvs       370:   IsW3Path                                           
                    371:   returns TRUE if path is in fact a URL.
1.4       cvs       372:   ----------------------------------------------------------------------*/
1.3       cvs       373: #ifdef __STDC__
                    374: boolean             IsW3Path (char *path)
                    375: #else  /* __STDC__ */
                    376: boolean             IsW3Path (path)
                    377: char               *path;
                    378: #endif /* __STDC__ */
                    379: {
1.5       cvs       380:    if ((strncmp (path, "http:", 5)) && (strncmp (path, "ftp:", 4)) &&
                    381:        (strncmp (path, "telnet:", 7)) && (strncmp (path, "wais:", 5)) &&
                    382:        (strncmp (path, "news:", 5)) && (strncmp (path, "gopher:", 7)) &&
                    383:        (strncmp (path, "mailto:", 7)) && (strncmp (path, "archie:", 7)))
                    384:       return FALSE;
                    385:    return TRUE;
1.3       cvs       386: }
                    387: 
1.4       cvs       388: /*----------------------------------------------------------------------
1.9       cvs       389:   IsValidProtocol                                                    
                    390:   returns true if the url protocol is supported by Amaya.
1.4       cvs       391:   ----------------------------------------------------------------------*/
1.3       cvs       392: #ifdef __STDC__
1.9       cvs       393: boolean             IsValidProtocol (char *url)
1.3       cvs       394: #else  /* __STDC__ */
1.9       cvs       395: boolean             IsValidProtocol (url)
                    396: char               *url;
1.3       cvs       397: #endif /* __STDC__ */
                    398: {
1.26      cvs       399:    if (!strncmp (url, "http:", 5))
1.22      cvs       400:        /* experimental */
1.26      cvs       401:       /***  || !strncmp (url, "ftp:", 4)) ***/
1.24      cvs       402:      /*** || !strncmp (path, "news:", 5)***/ 
1.8       cvs       403:       return (TRUE);
1.5       cvs       404:    else
1.8       cvs       405:       return (FALSE);
1.3       cvs       406: }
                    407: 
1.4       cvs       408: /*----------------------------------------------------------------------
1.9       cvs       409:    NormalizeURL
                    410:    normalizes orgName according to a base associated with doc, and
                    411:    following the standard URL format rules.
                    412:    The function returns the new complete and normalized URL 
1.12      cvs       413:    or file name path (newName) and the name of the document (docName).        
1.9       cvs       414:    N.B. If the function can't find out what's the docName, it assigns
                    415:    the name "noname.html".
1.4       cvs       416:   ----------------------------------------------------------------------*/
1.3       cvs       417: #ifdef __STDC__
                    418: void                NormalizeURL (char *orgName, Document doc, char *newName, char *docName)
                    419: #else  /* __STDC__ */
                    420: void                NormalizeURL (orgName, doc, newName, docName)
                    421: char               *orgName;
                    422: Document            doc;
                    423: char               *newName;
                    424: char               *docName;
                    425: #endif /* __STDC__ */
                    426: {
1.5       cvs       427:    char                basename[MAX_LENGTH];
1.18      cvs       428:    char                tempOrgName[MAX_LENGTH];
1.5       cvs       429:    char               *ptr;
                    430:    Element             el;
                    431:    ElementType         elType;
                    432:    AttributeType       attrType;
1.18      cvs       433:    Attribute           attrHREF = NULL;
1.5       cvs       434:    int                 length;
                    435: 
                    436:    if (!newName || !docName)
                    437:       return;
1.18      cvs       438: 
                    439:    /*
                    440:    ** First Step: Clean orgName
                    441:    ** Make sure we have a complete orgName, without any leading or trailing
                    442:    ** white spaces, or trailinbg new lines
                    443:    */
                    444: 
1.5       cvs       445:    ptr = orgName;
1.18      cvs       446:    /* skip leading white space and new line characters */
1.19      cvs       447:    while ((*ptr == ' ' || *ptr == EOL) && *ptr++ != EOS);
1.18      cvs       448:    strcpy (tempOrgName, ptr);
                    449:    /* clean trailing white space */
                    450:    ptr = strchr (tempOrgName, ' ');
                    451:    if (ptr)
                    452:       *ptr = EOS;
                    453:    /* clean trailing new lines */
1.19      cvs       454:    ptr = strchr (tempOrgName, EOL);
1.5       cvs       455:    if (ptr)
                    456:       *ptr = EOS;
                    457: 
1.18      cvs       458:    /*
                    459:    ** Second Step: make orgName a complete URL
                    460:    ** If the URL does not include a protocol, then
                    461:    ** try to calculate one using the doc's base element 
                    462:    ** (if it exists),
                    463:    */
1.21      cvs       464:    if (tempOrgName[0] == EOS)
                    465:      {
                    466:        newName[0] = EOS;
                    467:        return;
                    468:      }
                    469:    else if (IsW3Path (tempOrgName))
                    470:      {
                    471:        /* the name is complete, go to the Sixth Step */
                    472:        strcpy (newName, tempOrgName);
                    473:        /* verify if the URL has the form "protocol://server:port" */
1.25      cvs       474:        ptr = AmayaParseUrl (newName, "", AMAYA_PARSE_ACCESS | AMAYA_PARSE_HOST |
                    475:                      AMAYA_PARSE_PUNCTUATION);
1.21      cvs       476:        if (ptr && !strcmp (ptr, newName))
                    477:         {
1.29    ! cvs       478:           /* it has this form, we complete it by adding a DIR_STR  */
        !           479:           strcat (newName, DIR_STR);
1.21      cvs       480:         }
                    481:        if (ptr)
1.25      cvs       482:         TtaFreeMemory (ptr);
1.21      cvs       483:      }
                    484:    else if ( doc == 0)
1.19      cvs       485:      /* the name is complete, go to the Sixth Step */
1.18      cvs       486:      strcpy (newName, tempOrgName);
1.5       cvs       487:    else
                    488:      {
1.18      cvs       489:        /* take into account the BASE element. */
1.22      cvs       490:        length = MAX_LENGTH -1;
1.18      cvs       491:        /* get the root element    */
                    492:        el = TtaGetMainRoot (doc);
                    493:           
                    494:        /* search the BASE element */
                    495:        elType.ElSSchema = TtaGetDocumentSSchema (doc);
                    496:        elType.ElTypeNum = HTML_EL_BASE;
                    497:        el = TtaSearchTypedElement (elType, SearchInTree, el);
                    498:        if (el)
1.17      cvs       499:         {
1.18      cvs       500:           /* 
                    501:           ** The document has a BASE element 
                    502:           ** Get the HREF attribute of the BASE Element 
                    503:           */
                    504:           attrType.AttrSSchema = elType.ElSSchema;
                    505:           attrType.AttrTypeNum = HTML_ATTR_HREF_;
                    506:           attrHREF = TtaGetAttribute (el, attrType);
                    507:           if (attrHREF)
1.14      cvs       508:             {
1.18      cvs       509:               /* Use the base path of the document */
                    510:               TtaGiveTextAttributeValue (attrHREF, basename, &length);
                    511:               /* base and orgName have to be separated by a DIR_SEP */
1.20      cvs       512:               length--;
1.28      cvs       513:               if (basename[0] != EOS && basename[length] != DIR_SEP) 
1.18      cvs       514:                 /* verify if the base has the form "protocol://server:port" */
1.14      cvs       515:                 {
1.25      cvs       516:                   ptr = AmayaParseUrl (basename, "", AMAYA_PARSE_ACCESS | AMAYA_PARSE_HOST |
                    517:                                                AMAYA_PARSE_PUNCTUATION);
1.18      cvs       518:                   if (ptr && !strcmp (ptr, basename))
1.14      cvs       519:                     {
1.29    ! cvs       520:                     /* it has this form, we complete it by adding a DIR_STR  */
        !           521:                     strcat (basename, DIR_STR);
1.18      cvs       522:                     length++;
1.14      cvs       523:                     }
1.18      cvs       524:                   if (ptr)
1.25      cvs       525:                     TtaFreeMemory (ptr);
1.14      cvs       526:                 }
1.19      cvs       527:               /* Third Step: prepare the base
                    528:               ** Removing anything after the
                    529:               ** last DIR_SEP char. If no such char is found, then search for
                    530:               ** the first ":" char, hoping that what's before that is a
                    531:               ** protocol. If found, end the string there. If neither
                    532:               ** char is found, then discard the whole base element.
                    533:               */
                    534: 
                    535:               /* search for the last DIR_SEP char */
1.18      cvs       536:               while (length >= 0  && basename[length] != DIR_SEP)
1.19      cvs       537:                 length--;
                    538:               if (length >= 0)
                    539:                 /* found the last DIR_SEP char, end the string there */
                    540:                 basename[length + 1] = EOS;               
                    541:               else
1.29    ! cvs       542:                 /* search for the first PATH_STR char */
1.19      cvs       543:                 {
1.29    ! cvs       544:                   for (length = 0; basename[length] != PATH_SEP && 
1.20      cvs       545:                          basename[length] != EOS; length++);
1.29    ! cvs       546:                   if (basename[length] == PATH_SEP)
1.19      cvs       547:                     /* found, so end the string there */
                    548:                     basename[length + 1] = EOS;
                    549:                   else
                    550:                     /* not found, discard the base */
                    551:                     basename[0] = EOS;
                    552:                 }
1.14      cvs       553:             }
                    554:           else
                    555:             basename[0] = EOS;
1.18      cvs       556:         }
1.22      cvs       557:      
1.18      cvs       558:        /*
1.19      cvs       559:        ** Fourth Step: 
1.18      cvs       560:        ** If there's no base element, and if we're following
1.19      cvs       561:        ** a link, use the URL of the current document as a base.
1.18      cvs       562:        */
                    563: 
                    564:        if (!attrHREF)
                    565:         {
                    566:           if (DocumentURLs[(int) doc])
1.14      cvs       567:             {
1.18      cvs       568:               strcpy (basename, DocumentURLs[(int) doc]);
                    569:               /* base and orgName have to be separated by a DIR_SEP */
                    570:               length = strlen (basename) - 1;
1.19      cvs       571:               /* search for the last DIR_SEP char */
1.18      cvs       572:               while (length >= 0  && basename[length] != DIR_SEP)
1.19      cvs       573:                 length--;
                    574:               if (length >= 0)
                    575:                 /* found the last DIR_SEP char, end the string there */
                    576:                 basename[length + 1] = EOS;               
                    577:               else
1.29    ! cvs       578:                 /* search for the first PATH_STR char */
1.19      cvs       579:                 {
1.29    ! cvs       580:                   for (length = 0; basename[length] != PATH_SEP && 
1.19      cvs       581:                          basename[length] != EOS; length ++);
1.29    ! cvs       582:                   if (basename[length] == PATH_SEP)
1.19      cvs       583:                     /* found, so end the string there */
                    584:                     basename[length + 1] = EOS;
                    585:                   else
                    586:                     /* not found, discard the base */
                    587:                     basename[0] = EOS;
                    588:                 }
1.14      cvs       589:             }
                    590:           else
1.19      cvs       591:               basename[0] = EOS;
1.14      cvs       592:         }
1.22      cvs       593:      
1.18      cvs       594:        /*
1.19      cvs       595:        ** Fifth Step, calculate the absolute URL, using the base
1.18      cvs       596:        */
                    597: 
1.25      cvs       598:        ptr = AmayaParseUrl (tempOrgName, basename, AMAYA_PARSE_ALL);
1.16      cvs       599: 
1.14      cvs       600:        if (ptr)
                    601:         {
1.29    ! cvs       602:           SimplifyUrl (&ptr);
1.14      cvs       603:           strcpy (newName, ptr);
1.25      cvs       604:           TtaFreeMemory (ptr);
1.14      cvs       605:         }
                    606:        else
1.18      cvs       607:           newName[0] = EOS;
1.5       cvs       608:      }
                    609: 
1.18      cvs       610:    /*
1.19      cvs       611:    ** Sixth and last Step:
1.18      cvs       612:    ** Prepare the docname that will refer to this ressource in the
1.19      cvs       613:    ** .amaya directory. If the new URL finishes on DIR_SEP, then use
1.18      cvs       614:    ** noname.html as a default ressource name
                    615:    */
1.19      cvs       616: 
                    617:    if (newName[0] != EOS)
1.5       cvs       618:      {
1.19      cvs       619:        length = strlen (newName) - 1;
1.18      cvs       620:        if (newName[length] == DIR_SEP)
                    621:         {
                    622:           /* docname was not comprised inside the URL, so let's */
                    623:           /* assign the default ressource name */
                    624:           strcpy (docName, "noname.html");
                    625:           /* remove DIR_SEP at the end of complete path */
1.23      cvs       626:           /* newName[length] = EOS; */
1.18      cvs       627:         }
1.14      cvs       628:        else
1.18      cvs       629:         {
                    630:           /* docname is comprised inside the URL */
                    631:           while (length >= 0  && newName[length] != DIR_SEP)
                    632:             length--;
                    633:           if (length < 0)
                    634:             strcpy (docName, newName);
                    635:           else
                    636:             strcpy (docName, &newName[length+1]);
                    637:         }
1.19      cvs       638: 
1.5       cvs       639:      }
1.18      cvs       640:    else
                    641:      docName[0] = EOS;
                    642: } 
1.3       cvs       643: 
1.4       cvs       644: /*----------------------------------------------------------------------
1.9       cvs       645:   IsSameHost                                                         
1.4       cvs       646:   ----------------------------------------------------------------------*/
1.3       cvs       647: #ifdef __STDC__
                    648: boolean             IsSameHost (char *url1, char *url2)
                    649: #else  /* __STDC__ */
                    650: boolean             IsSameHost (url1, url2)
                    651: char               *path;
                    652: #endif /* __STDC__ */
                    653: {
1.5       cvs       654:    char               *basename_ptr1, *basename_ptr2;
                    655:    boolean             result;
1.3       cvs       656: 
1.25      cvs       657:    basename_ptr1 = AmayaParseUrl (url1, "", AMAYA_PARSE_ACCESS | AMAYA_PARSE_HOST | AMAYA_PARSE_PUNCTUATION);
                    658:    basename_ptr2 = AmayaParseUrl (url2, "", AMAYA_PARSE_ACCESS | AMAYA_PARSE_HOST | AMAYA_PARSE_PUNCTUATION);
1.3       cvs       659: 
1.5       cvs       660:    if (strcmp (basename_ptr1, basename_ptr2))
1.8       cvs       661:       result = FALSE;
1.5       cvs       662:    else
1.8       cvs       663:       result = TRUE;
1.3       cvs       664: 
1.25      cvs       665:    TtaFreeMemory (basename_ptr1);
                    666:    TtaFreeMemory (basename_ptr2);
1.5       cvs       667:    return (result);
1.3       cvs       668: }
                    669: 
                    670: 
1.4       cvs       671: /*----------------------------------------------------------------------
1.22      cvs       672:   HasKnownFileSuffix
                    673:   returns TRUE if path points to a file ending with a suffix.
                    674:   ----------------------------------------------------------------------*/
                    675: #ifdef __STDC__
                    676: boolean             HasKnownFileSuffix (char *path)
                    677: #else  /* __STDC__ */
                    678: boolean             HasKnownFileSuffix (path)
                    679: char               *path;
                    680: #endif /* __STDC__ */
                    681: {
1.29    ! cvs       682:    char            *root;
        !           683:    char             temppath[MAX_LENGTH];
        !           684:    char             suffix[MAX_LENGTH];
1.22      cvs       685: 
1.24      cvs       686:    if (!path || path[0] == EOS || path[strlen(path)] == DIR_SEP)
1.22      cvs       687:      return (FALSE);
                    688: 
1.29    ! cvs       689:    root = AmayaParseUrl(path, "", AMAYA_PARSE_PATH | AMAYA_PARSE_PUNCTUATION);
1.22      cvs       690: 
                    691:    if (root) 
                    692:      {
                    693:        strcpy (temppath, root);
1.25      cvs       694:        TtaFreeMemory (root);
1.22      cvs       695:        /* Get the suffix */
                    696:        ExtractSuffix (temppath, suffix); 
                    697: 
                    698:        if( suffix[0] == EOS)
                    699:         /* no suffix */
                    700:         return (FALSE);
                    701: 
                    702:        /* Normalize the suffix */
                    703:        ConvertToLowerCase (suffix);
                    704: 
1.23      cvs       705:        if (!strcmp (suffix, "gz"))
1.22      cvs       706:         /* skip the compressed suffix */
                    707:         {
                    708:         ExtractSuffix (temppath, suffix);
                    709:         if(suffix[0] == EOS)
                    710:           /* no suffix */
                    711:           return (FALSE);
                    712:          /* Normalize the suffix */
                    713:          ConvertToLowerCase (suffix);
                    714:         }
                    715: 
                    716:        if ((strcmp (suffix, "gif")) && (strcmp (suffix, "xbm")) &&
                    717:           (strcmp (suffix, "xpm")) && (strcmp (suffix, "jpg")) &&
                    718:           (strcmp (suffix, "pdf")) && (strcmp (suffix, "png")) &&
                    719:           (strcmp (suffix, "tgz")) && (strcmp (suffix, "xpg")) &&
                    720:           (strcmp (suffix, "xpd")) && (strcmp (suffix, "ps")) &&
                    721:           (strcmp (suffix, "au"))  && (strcmp (suffix, "html")) &&
                    722:           (strcmp (suffix, "htm")) && (strcmp (suffix, "shtml")) &&
                    723:           (strcmp (suffix, "txt")) && (strcmp (suffix, "css")) &&
                    724:           (strcmp (suffix, "eps")))
                    725:         return (FALSE);
                    726:        else
                    727:         return (TRUE);
                    728:      }
                    729:    else
                    730:      return (FALSE);
                    731: }
                    732: 
                    733: 
                    734: /*----------------------------------------------------------------------
1.24      cvs       735:   ChopURL
                    736:   Gives back a URL no longer than MAX_PRINT_URL_LENGTH chars (outputURL). 
                    737:   If inputURL is  bigger than that size, outputURL receives
                    738:   MAX_PRINT_URL_LENGTH / 2 chars from the beginning of inputURL, "...", 
                    739:   and MAX_PRINT_URL_LENGTH / 2 chars from the end of inputURL.
                    740:   If inputURL is not longer than MAX_PRINT_URL_LENGTH chars, it gets
                    741:   copied into outputURL. 
                    742:   N.B.: outputURL must point to a memory block of MAX_PRINT_URL_LENGTH
                    743:   chars.
                    744:   ----------------------------------------------------------------------*/
                    745: #ifdef __STDC__
                    746: void ChopURL (char *outputURL, char *inputURL)
                    747: #else
                    748: void ChopURL (outputURL, inputURL)
                    749: char *outputURL;
                    750: char *inputURL;
                    751: #endif
1.22      cvs       752: 
1.24      cvs       753: {
                    754:   int len;
1.9       cvs       755: 
1.24      cvs       756:   len = strlen (inputURL);
                    757:   if (len <= MAX_PRINT_URL_LENGTH) 
1.29    ! cvs       758:     strcpy (outputURL, inputURL);
1.24      cvs       759:   else
                    760:     /* make a truncated urlName on the status window */
                    761:     {
                    762:       strncpy (outputURL, inputURL, MAX_PRINT_URL_LENGTH / 2);
                    763:       outputURL [MAX_PRINT_URL_LENGTH / 2] = EOS;
                    764:       strcat (outputURL, "...");
                    765:       strcat (outputURL, &(inputURL[len - MAX_PRINT_URL_LENGTH / 2 ]));
                    766:     }
1.25      cvs       767: }
                    768: 
                    769: 
                    770: /*----------------------------------------------------------------------
                    771:    scan
                    772:        Scan a filename for its consituents
                    773:        -----------------------------------
                    774:   
                    775:    On entry,
                    776:        name    points to a document name which may be incomplete.
                    777:    On exit,
                    778:         absolute or relative may be nonzero (but not both).
                    779:        host, fragment and access may be nonzero if they were specified.
                    780:        Any which are nonzero point to zero terminated strings.
                    781:   ----------------------------------------------------------------------*/
                    782: #ifdef __STDC__
                    783: static void scan (char * name, HTURI * parts)
                    784: #else  /* __STDC__ */
                    785: static void scan (name, parts)
                    786: char                *name;
                    787: HTURI               *parts;
                    788: 
                    789: #endif /* __STDC__ */
                    790: {
1.28      cvs       791:   char * p;
                    792:   char * after_access = name;
1.25      cvs       793: 
1.28      cvs       794:   memset(parts, '\0', sizeof(HTURI));
                    795:   /* Look for fragment identifier */
                    796:   if ((p = strrchr(name, '#')) != NULL)
                    797:     {
                    798:       *p++ = '\0';
                    799:       parts->fragment = p;
1.25      cvs       800:     }
                    801:     
1.28      cvs       802:   for (p=name; *p; p++)
                    803:     {
                    804:       if (*p==DIR_SEP || *p=='#' || *p=='?')
                    805:        break;
1.29    ! cvs       806:       if (*p==PATH_SEP)
1.28      cvs       807:        {
                    808:          *p = 0;
                    809:          parts->access = after_access; /* Scheme has been specified */
                    810: 
                    811:          /* The combination of gcc, the "-O" flag and the HP platform is
                    812:             unhealthy. The following three lines is a quick & dirty fix, but is
                    813:             not recommended. Rather, turn off "-O". */
                    814: 
                    815:          /*            after_access = p;*/
                    816:          /*            while (*after_access == 0)*/
                    817:          /*                after_access++;*/
                    818:          after_access = p+1;
                    819:          if (0==strcasecmp("URL", parts->access))
                    820:            /* Ignore IETF's URL: pre-prefix */
                    821:            parts->access = NULL;
                    822:          else
1.25      cvs       823:            break;
                    824:        }
                    825:     }
                    826:     
                    827:     p = after_access;
1.28      cvs       828:     if (*p==DIR_SEP)
                    829:       {
                    830:        if (p[1]==DIR_SEP)
                    831:          {
1.25      cvs       832:            parts->host = p+2;          /* host has been specified      */
1.28      cvs       833:            *p = 0;                     /* Terminate access             */
                    834:            /* look for end of host name if any */
                    835:            p = strchr(parts->host,DIR_SEP);
                    836:            if (p)
                    837:              {
1.25      cvs       838:                *p=0;                   /* Terminate host */
                    839:                parts->absolute = p+1;          /* Root has been found */
1.28      cvs       840:              }
                    841:          }
                    842:        else
                    843:          /* Root found but no host */
                    844:          parts->absolute = p+1;
                    845:       }
                    846:     else
                    847:       {
1.25      cvs       848:         parts->relative = (*after_access) ? after_access : 0; /* zero for "" */
1.28      cvs       849:       }
1.25      cvs       850: }
                    851: 
                    852: 
                    853: /*----------------------------------------------------------------------
1.28      cvs       854:   AmayaParseUrl: parse a Name relative to another name
                    855: 
                    856:   This returns those parts of a name which are given (and requested)
                    857:   substituting bits from the related name where necessary.
1.25      cvs       858:   
1.28      cvs       859:   On entry,
1.25      cvs       860:        aName           A filename given
                    861:         relatedName     A name relative to which aName is to be parsed. Give
                    862:                         it an empty string if aName is absolute.
                    863:         wanted          A mask for the bits which are wanted.
                    864:   
1.28      cvs       865:   On exit,
1.25      cvs       866:        returns         A pointer to a malloc'd string which MUST BE FREED
                    867:   ----------------------------------------------------------------------*/
                    868: #ifdef __STDC__
1.28      cvs       869: char          *AmayaParseUrl (char *aName, char *relatedName, int wanted)
1.25      cvs       870: #else  /* __STDC__ */
1.28      cvs       871: char          *AmayaParseUrl (aName, relatedName, wanted)
                    872: char          *aName;
                    873: char          *relatedName;
                    874: int            wanted;
1.25      cvs       875: 
                    876: #endif /* __STDC__ */
                    877: {
1.29    ! cvs       878:   char      *return_value;
        !           879:   char       result[MAX_LENGTH];
        !           880:   char       name[MAX_LENGTH];
        !           881:   char       rel[MAX_LENGTH];
        !           882:   char      *p, *access;
        !           883:   HTURI      given, related;
        !           884:   int        len;
1.25      cvs       885:     
1.29    ! cvs       886:   /* Make working copies of input strings to cut up: */
        !           887:   return_value = NULL;
        !           888:   result[0] = 0;               /* Clear string  */
        !           889:   strcpy (name, aName);
        !           890:   if (relatedName != NULL)  
        !           891:     strcpy (rel, relatedName);
        !           892:   else
        !           893:     relatedName[0] = EOS;
        !           894:   
        !           895:   scan (name, &given);
        !           896:   scan (rel,  &related); 
        !           897:   access = given.access ? given.access : related.access;
        !           898:   if (wanted & AMAYA_PARSE_ACCESS)
        !           899:     if (access)
        !           900:       {
        !           901:        strcat (result, access);
        !           902:        if(wanted & AMAYA_PARSE_PUNCTUATION)
        !           903:          strcat (result, PATH_STR);
        !           904:       }
        !           905:   
        !           906:   if (given.access && related.access)
        !           907:     /* If different, inherit nothing. */
        !           908:     if (strcmp (given.access, related.access) != 0)
        !           909:       {
        !           910:        related.host = 0;
        !           911:        related.absolute = 0;
        !           912:        related.relative = 0;
        !           913:        related.fragment = 0;
        !           914:       }
        !           915:   
        !           916:   if (wanted & AMAYA_PARSE_HOST)
        !           917:     if(given.host || related.host)
        !           918:       {
        !           919:        if(wanted & AMAYA_PARSE_PUNCTUATION)
        !           920:          strcat (result, "//");
        !           921:        strcat (result, given.host ? given.host : related.host);
        !           922:       }
        !           923:   
        !           924:   if (given.host && related.host)
        !           925:     /* If different hosts, inherit no path. */
        !           926:     if (strcmp(given.host, related.host) != 0)
        !           927:       {
        !           928:        related.absolute = 0;
        !           929:        related.relative = 0;
        !           930:        related.fragment = 0;
        !           931:       }
        !           932:   
        !           933:   if (wanted & AMAYA_PARSE_PATH)
        !           934:     {
        !           935:       if (given.absolute)
        !           936:        {
        !           937:          /* All is given */
        !           938:          if (wanted & AMAYA_PARSE_PUNCTUATION)
        !           939:            strcat (result, DIR_STR);
        !           940:          strcat (result, given.absolute);
1.25      cvs       941:        }
1.29    ! cvs       942:       else if (related.absolute)
        !           943:        {
        !           944:          /* Adopt path not name */
        !           945:          strcat (result, DIR_STR);
        !           946:          strcat (result, related.absolute);
        !           947:          if (given.relative)
        !           948:            {
        !           949:              /* Search part? */
        !           950:              p = strchr (result, '?');
        !           951:              if (!p)
        !           952:                p=result+strlen(result)-1;
        !           953:              for (; *p!=DIR_SEP; p--); /* last / */
        !           954:              /* Remove filename */
        !           955:              p[1]=0;
        !           956:              /* Add given one */
        !           957:              strcat (result, given.relative);
        !           958:              /*SimplifyUrl (&result);*/
1.25      cvs       959:            }
                    960:        }
1.29    ! cvs       961:       else if (given.relative)
        !           962:        /* what we've got */
        !           963:        strcat (result, given.relative);
        !           964:       else if (related.relative)
        !           965:        strcat (result, related.relative);
        !           966:       else
        !           967:        /* No inheritance */
        !           968:        strcat (result, DIR_STR);
1.25      cvs       969:     }
1.29    ! cvs       970:   
        !           971:   if (wanted & AMAYA_PARSE_ANCHOR)
        !           972:     if (given.fragment || related.fragment)
        !           973:       {
        !           974:        if (given.absolute && given.fragment)
        !           975:          {
        !           976:            /*Fixes for relURLs...*/
        !           977:            if (wanted & AMAYA_PARSE_PUNCTUATION)
        !           978:              strcat (result, "#");
        !           979:            strcat (result, given.fragment); 
        !           980:          }
        !           981:        else if (!(given.absolute) && !(given.fragment))
        !           982:          strcat (result, "");
        !           983:        else
        !           984:          {
        !           985:            if (wanted & AMAYA_PARSE_PUNCTUATION)
        !           986:              strcat (result, "#");
        !           987:            strcat (result, given.fragment ? given.fragment : related.fragment); 
        !           988:          }
        !           989:       }
        !           990:   len = strlen (result);
        !           991:   if ((return_value = (char *) TtaGetMemory (len + 1)) != NULL)
        !           992:     strcpy (return_value, result);
        !           993:   return (return_value);               /* exactly the right length */
1.25      cvs       994: }
                    995: 
                    996: /*----------------------------------------------------------------------
                    997:      HTCanon
                    998:        Canonicalizes the URL in the following manner starting from the host
                    999:        pointer:
                   1000:   
                   1001:        1) The host name is converted to lowercase
                   1002:        2) Chop off port if `:80' (http), `:70' (gopher), or `:21' (ftp)
                   1003:   
                   1004:        Return: OK      The position of the current path part of the URL
                   1005:                        which might be the old one or a new one.
                   1006:   
                   1007:   ----------------------------------------------------------------------*/
                   1008: #ifdef __STDC__
1.28      cvs      1009: static char *HTCanon (char ** filename, char * host)
1.25      cvs      1010: #else  /* __STDC__ */
1.28      cvs      1011: static char *HTCanon (filename, host)
                   1012: char       **filename;
                   1013: char        *host;
1.25      cvs      1014: #endif /* __STDC__ */
                   1015: {
                   1016:     char *newname = NULL;
                   1017:     char *port;
                   1018:     char *strptr;
                   1019:     char *path;
                   1020:     char *access = host-3;
                   1021: 
1.28      cvs      1022:     while (access>*filename && *(access-1)!=DIR_SEP)       /* Find access method */
1.25      cvs      1023:        access--;
1.28      cvs      1024:     if ((path = strchr(host, DIR_SEP)) == NULL)                        /* Find path */
1.25      cvs      1025:        path = host + strlen(host);
                   1026:     if ((strptr = strchr(host, '@')) != NULL && strptr<path)      /* UserId */
                   1027:        host = strptr;
1.29    ! cvs      1028:     if ((port = strchr(host, PATH_SEP)) != NULL && port>path)      /* Port number */
1.25      cvs      1029:        port = NULL;
                   1030: 
                   1031:     strptr = host;                                 /* Convert to lower-case */
                   1032:     while (strptr<path) {
                   1033:        *strptr = tolower(*strptr);
                   1034:        strptr++;
                   1035:     }
                   1036:     
                   1037:     /* Does the URL contain a full domain name? This also works for a
                   1038:        numerical host name. The domain name is already made lower-case
                   1039:        and without a trailing dot. */
                   1040:     {
                   1041:        char *dot = port ? port : path;
                   1042:        if (dot > *filename && *--dot=='.') {
                   1043:            char *orig=dot, *dest=dot+1;
                   1044:            while((*orig++ = *dest++));
                   1045:            if (port) port--;
                   1046:            path--;
                   1047:        }
                   1048:     }
                   1049:     /* Chop off port if `:', `:80' (http), `:70' (gopher), or `:21' (ftp) */
                   1050:     if (port) {
1.28      cvs      1051:        if (!*(port+1) || *(port+1)==DIR_SEP) {
1.25      cvs      1052:            if (!newname) {
                   1053:                char *orig=port, *dest=port+1;
                   1054:                while((*orig++ = *dest++));
                   1055:            }
                   1056:        } else if ((!strncmp(access, "http", 4) &&
1.28      cvs      1057:             (*(port+1)=='8'&&*(port+2)=='0'&&(*(port+3)==DIR_SEP||!*(port+3)))) ||
1.25      cvs      1058:            (!strncmp(access, "gopher", 6) &&
1.28      cvs      1059:             (*(port+1)=='7'&&*(port+2)=='0'&&(*(port+3)==DIR_SEP||!*(port+3)))) ||
1.25      cvs      1060:            (!strncmp(access, "ftp", 3) &&
1.28      cvs      1061:             (*(port+1)=='2'&&*(port+2)=='1'&&(*(port+3)==DIR_SEP||!*(port+3))))) {
1.25      cvs      1062:            if (!newname) {
                   1063:                char *orig=port, *dest=port+3;
                   1064:                while((*orig++ = *dest++));
1.28      cvs      1065:                /* Update path position, Henry Minsky */
                   1066:                path -= 3;
1.25      cvs      1067:            }
                   1068:        } else if (newname)
                   1069:            strncat(newname, port, (int) (path-port));
                   1070:     }
                   1071: 
                   1072:     if (newname) {
                   1073:        char *newpath = newname+strlen(newname);
                   1074:        strcat(newname, path);
                   1075:        path = newpath;
1.28      cvs      1076:        /* Free old copy */
                   1077:        TtaFreeMemory(*filename);
1.25      cvs      1078:        *filename = newname;
                   1079:     }
                   1080:     return path;
                   1081: }
                   1082: 
                   1083: 
                   1084: /*----------------------------------------------------------------------
1.29    ! cvs      1085:   SimplifyUrl: simplify a URI
1.28      cvs      1086:   A URI is allowed to contain the seqeunce xxx/../ which may be
1.29    ! cvs      1087:   replaced by "" , and the seqeunce "/./" which may be replaced by DIR_STR.
1.28      cvs      1088:   Simplification helps us recognize duplicate URIs. 
1.25      cvs      1089:   
1.28      cvs      1090:   Thus,        /etc/junk/../fred       becomes /etc/fred
                   1091:                 /etc/junk/./fred       becomes /etc/junk/fred
1.25      cvs      1092:   
1.28      cvs      1093:   but we should NOT change
                   1094:                 http://fred.xxx.edu/../..
1.25      cvs      1095:   
                   1096:        or      ../../albert.html
                   1097:   
1.28      cvs      1098:   In order to avoid empty URLs the following URLs become:
1.25      cvs      1099:   
                   1100:                /fred/..                becomes /fred/..
                   1101:                /fred/././..            becomes /fred/..
                   1102:                /fred/.././junk/.././   becomes /fred/..
                   1103:   
1.28      cvs      1104:   If more than one set of `://' is found (several proxies in cascade) then
                   1105:   only the part after the last `://' is simplified.
1.25      cvs      1106:   
1.28      cvs      1107:   Returns: A string which might be the old one or a new one.
1.25      cvs      1108:   ----------------------------------------------------------------------*/
                   1109: #ifdef __STDC__
1.29    ! cvs      1110: void         SimplifyUrl (char ** url)
1.25      cvs      1111: #else  /* __STDC__ */
1.29    ! cvs      1112: void         SimplifyUrl (url)
1.28      cvs      1113: char        **url;
1.25      cvs      1114: #endif /* __STDC__ */
                   1115: {
1.28      cvs      1116:   char *path, *p;
                   1117:   char *newptr, *access;
                   1118:   char *orig, *dest, *end;
                   1119: 
                   1120:   if (!url || !*url)
                   1121:     return;
                   1122: 
                   1123:   /* Find any scheme name */
                   1124:   if ((path = strstr(*url, "://")) != NULL)
                   1125:     {             /* Find host name */
                   1126:       access = *url;
                   1127:       while (access<path && (*access=tolower(*access)))
                   1128:        access++;
                   1129:       path += 3;
                   1130:       while ((newptr = strstr(path, "://")) != NULL)
                   1131:         /* For proxies */
                   1132:        path = newptr+3;
                   1133:       /* We have a host name */
                   1134:       path = HTCanon(url, path);
1.25      cvs      1135:     }
1.28      cvs      1136:   else if ((path = strstr(*url, ":/")) != NULL)
                   1137:     path += 2;
                   1138:   else
                   1139:     path = *url;
1.25      cvs      1140: 
1.28      cvs      1141:   if (*path == DIR_SEP && *(path+1)==DIR_SEP)
                   1142:     /* Some URLs start //<foo> */
                   1143:     path += 1;
                   1144:   else if (!strncmp(path, "news:", 5))
                   1145:     {
                   1146:       newptr = strchr(path+5, '@');
                   1147:       if (!newptr)
                   1148:        newptr = path + 5;
                   1149:       while (*newptr)
                   1150:        {
                   1151:          /* Make group or host lower case */
                   1152:          *newptr = tolower (*newptr);
                   1153:          newptr++;
1.25      cvs      1154:        }
1.28      cvs      1155:       /* Doesn't need to do any more */
                   1156:       return;
1.25      cvs      1157:     }
1.28      cvs      1158: 
                   1159:   if ((p = path))
                   1160:     {
                   1161:       if (!((end = strchr (path, ';')) || (end = strchr (path, '?')) ||
                   1162:            (end = strchr (path, '#'))))
                   1163:        end = path + strlen (path);
                   1164:       
                   1165:       /* Parse string second time to simplify */
                   1166:       p = path;
                   1167:       while (p < end)
                   1168:        {
                   1169:          if (*p==DIR_SEP)
                   1170:            {
                   1171:              if (p > *url && *(p+1) == '.' && (*(p+2) == DIR_SEP || !*(p+2)))
                   1172:                {
                   1173:                  orig = p + 1;
                   1174:                  dest = (*(p+2)!=DIR_SEP) ? p+2 : p+3;
                   1175:                  while ((*orig++ = *dest++)); /* Remove a slash and a dot */
                   1176:                  end = orig - 1;
                   1177:                }
                   1178:              else if (*(p+1)=='.' && *(p+2)=='.' && (*(p+3)==DIR_SEP || !*(p+3)))
                   1179:                {
                   1180:                  newptr = p;
                   1181:                  while (newptr>path && *--newptr!=DIR_SEP); /* prev slash */
                   1182:                  if (strncmp(newptr, "/../", 4))
                   1183:                    {
                   1184:                      orig = newptr + 1;
                   1185:                      dest = (*(p+3)!=DIR_SEP) ? p+3 : p+4;
                   1186:                      while ((*orig++ = *dest++)); /* Remove /xxx/.. */
                   1187:                      end = orig-1;
                   1188:                      /* Start again with prev slash */
                   1189:                      p = newptr;
1.25      cvs      1190:                    }
1.28      cvs      1191:                  else
1.25      cvs      1192:                    p++;
1.28      cvs      1193:                }
                   1194:              else if (*(p+1) == DIR_SEP)
                   1195:                {
                   1196:                  while (*(p+1) == DIR_SEP)
                   1197:                    {
                   1198:                      orig = p;
                   1199:                      dest = p + 1;
                   1200:                      while ((*orig++ = *dest++));  /* Remove multiple /'s */
                   1201:                      end = orig-1;
                   1202:                    }
                   1203:                }
                   1204:              else
1.25      cvs      1205:                p++;
1.28      cvs      1206:            }
                   1207:          else
                   1208:            p++;
1.25      cvs      1209:        }
                   1210:     }
1.28      cvs      1211:   return;
                   1212: }
                   1213: 
                   1214: 
                   1215: /*----------------------------------------------------------------------
                   1216:    NormalizeFile normalizes  local names.                             
                   1217:    Return TRUE if target and src differ.                           
                   1218:   ----------------------------------------------------------------------*/
                   1219: #ifdef __STDC__
                   1220: boolean             NormalizeFile (char *src, char *target)
                   1221: #else
                   1222: boolean             NormalizeFile (src, target)
                   1223: char               *src;
                   1224: char               *target;
                   1225: 
                   1226: #endif
                   1227: {
                   1228:    char               *s;
                   1229:    boolean             change;
                   1230: 
                   1231:    change = FALSE;
                   1232:    if (src[0] == '~')
                   1233:      {
                   1234:        /* replace ~ */
                   1235:        s = (char *) TtaGetEnvString ("HOME");
                   1236:        strcpy (target, s);
                   1237:        strcat (target, &src[1]);
                   1238:        change = TRUE;
                   1239:      }
                   1240:    else if (strncmp (src, "file:", 5) == 0)
                   1241:      {
                   1242:        /* remove the prefix file: */
                   1243:        if (src[5] == EOS)
                   1244:           strcpy (target, DIR_STR);
                   1245:        else if (src[0] == '~')
                   1246:          {
                   1247:            /* replace ~ */
                   1248:            s = (char *) TtaGetEnvString ("HOME");
                   1249:            strcpy (target, s);
                   1250:            strcat (target, &src[5]);
                   1251:          }
                   1252:        else
                   1253:           strcpy (target, &src[5]);
                   1254:        change = TRUE;
                   1255:      }
                   1256:    else
                   1257:       strcpy (target, src);
                   1258: 
                   1259:    /* remove /../ and /./ */
1.29    ! cvs      1260:    SimplifyUrl (&target);
1.28      cvs      1261:    return (change);
1.25      cvs      1262: }
                   1263: 
1.28      cvs      1264: 
1.25      cvs      1265: /*----------------------------------------------------------------------
1.29    ! cvs      1266:   MakeRelativeUrl: make relative name
1.25      cvs      1267:   
1.28      cvs      1268:   This function creates and returns a string which gives an expression of
                   1269:   one address as related to another. Where there is no relation, an absolute
                   1270:   address is retured.
1.25      cvs      1271:   
1.28      cvs      1272:   On entry,
1.25      cvs      1273:        Both names must be absolute, fully qualified names of nodes
                   1274:        (no fragment bits)
                   1275:   
1.28      cvs      1276:   On exit,
1.25      cvs      1277:        The return result points to a newly allocated name which, if
                   1278:        parsed by AmayaParseUrl relative to relatedName, will yield aName.
                   1279:        The caller is responsible for freeing the resulting name later.
                   1280:   ----------------------------------------------------------------------*/
                   1281: #ifdef __STDC__
1.29    ! cvs      1282: char            *MakeRelativeUrl (char *aName, char *relatedName)
1.25      cvs      1283: #else  /* __STDC__ */
1.29    ! cvs      1284: char            *MakeRelativeUrl (aName, relatedName)
1.28      cvs      1285: char            *aName;
                   1286: char            *relatedName;
1.25      cvs      1287: #endif  /* __STDC__ */
                   1288: {
1.29    ! cvs      1289:   char      *return_value;
        !          1290:   char       result[MAX_LENGTH];
        !          1291:   char          *p;
        !          1292:   char          *q = relatedName;
        !          1293:   char          *after_access;
        !          1294:   char          *last_slash = NULL;
        !          1295:   int            slashes, levels, len;
        !          1296: 
        !          1297:   if (aName == NULL || relatedName == NULL)
        !          1298:     return (NULL);
        !          1299: 
        !          1300:   slashes = 0;
        !          1301:   after_access = NULL;
        !          1302:   p = aName;
        !          1303:   q = relatedName;
        !          1304:   for (; *p && (*p == *q); p++, q++)
1.27      cvs      1305:     {
                   1306:       /* Find extent of match */
1.29    ! cvs      1307:       if (*p == PATH_SEP)
        !          1308:        after_access = p + 1;
1.28      cvs      1309:       if (*p == DIR_SEP)
1.27      cvs      1310:        {
1.29    ! cvs      1311:          /* memorize the last slash position and count them */
1.27      cvs      1312:          last_slash = p;
                   1313:          slashes++;
1.25      cvs      1314:        }
                   1315:     }
                   1316:     
                   1317:     /* q, p point to the first non-matching character or zero */
1.29    ! cvs      1318:     if ((slashes < 2 && after_access == NULL)
        !          1319:        || (slashes < 3 && after_access != NULL))
1.27      cvs      1320:       {
1.29    ! cvs      1321:        /* Local files or remote files whitout common path */
        !          1322:        /* exactly the right length */
        !          1323:        len = strlen (aName);
        !          1324:        if ((return_value = (char *) TtaGetMemory (len + 1)) != NULL)
        !          1325:          strcpy (return_value, aName);
1.27      cvs      1326:       }
                   1327:     else
                   1328:       {
                   1329:        /* Some path in common */
1.29    ! cvs      1330:        if (slashes == 3 && strncmp (aName, "http:", 5) != 0)
        !          1331:          /* just the same server */
        !          1332:          strcpy (result, last_slash);
        !          1333:        else
        !          1334:          {
        !          1335:            levels= 0; 
        !          1336:            for (; *q && (*q != '#'); q++)
        !          1337:              if (*q == DIR_SEP)
        !          1338:                levels++;
        !          1339:            
        !          1340:            result[0] = 0;
        !          1341:            for (;levels; levels--)
        !          1342:              strcat (result, "../");
        !          1343:            strcat (result, last_slash+1);
        !          1344:          } 
        !          1345: 
        !          1346:        /* exactly the right length */
        !          1347:        len = strlen (result);
        !          1348:        if ((return_value = (char *) TtaGetMemory (len + 1)) != NULL)
        !          1349:          strcpy (return_value, result);
1.25      cvs      1350:     }
1.29    ! cvs      1351:   return (return_value);
1.24      cvs      1352: }
Webmaster