File:
[Public] /
Amaya /
amaya /
AHTURLTools.c
Revision
1.22:
download - view:
text,
annotated -
select for diffs
Wed Feb 5 22:06:44 1997 UTC (27 years, 4 months ago) by
cvs
Branches:
MAIN
CVS tags:
HEAD
JK: Added Content Negotiation:
Accept: headers for GET
Content-Type: headers for PUT
Content-Type guessing either from the picture driver or from the file extension. The default value is text/html
/*
*
* (c) COPYRIGHT MIT and INRIA, 1996.
* Please first read the full copyright statement in file COPYRIGHT.
*
*/
/*
* AHTURLTools.c: contains all the functions for testing, manipulating,
* and normalizing URLs.
*
* Authors: J. Kahan, I. Vatton
*
*/
/* Amaya includes */
#define THOT_EXPORT extern
#include "amaya.h"
#include "init_f.h"
#include "AHTURLTools_f.h"
/* Private functions */
#ifdef __STDC__
static void ConvertToLowerCase (char *string);
#else
static void ConvertToLowerCase (/*char *string*/);
#endif
/*----------------------------------------------------------------------
ExplodeURL
----------------------------------------------------------------------*/
#ifdef __STDC__
void ExplodeURL (char *url, char **proto, char **host, char **dir, char **file)
#else
void ExplodeURL (url, proto, host, dir, file)
char *url;
char **proto;
char **host;
char **dir;
char **file;
#endif
{
char *curr, *temp;
if ((url == NULL) || (proto == NULL) || (host == NULL) ||
(dir == NULL) || (file == NULL))
return;
/* initialize every pointer */
*proto = *host = *dir = *file = NULL;
/* skip any leading space */
while ((*url == SPACE) || (*url == TAB))
url++;
curr = url;
if (*curr == 0)
goto finished;
/* go to the end of the URL */
while ((*curr != 0) && (*curr != SPACE) && (*curr != '\b') &&
(*curr != '\r') && (*curr != EOL))
curr++;
/* mark the end of the chain */
*curr = EOS;
curr--;
if (curr <= url)
goto finished;
/* search the next DIR_SEP indicating the beginning of the file name */
do
curr--;
while ((curr >= url) && (*curr != DIR_SEP));
if (curr < url)
goto finished;
*file = curr + 1;
/* mark the end of the dir */
*curr = EOS;
curr--;
if (curr < url)
goto finished;
/* search for the "/" indicating the host name start */
while ((curr > url) && ((*curr != DIR_SEP) || (*(curr + 1) != DIR_SEP)))
curr--;
/* if we found it, separate the host name from the directory */
if ((*curr == DIR_SEP) && (*(curr + 1) == DIR_SEP))
{
*host = temp = curr + 2;
while ((*temp != 0) && (*temp != DIR_SEP))
temp++;
if (*temp == DIR_SEP)
{
*temp = EOS;
*dir = temp + 1;
}
}
else
*dir = curr;
if (curr <= url)
goto finished;
/* mark the end of the proto */
*curr = EOS;
curr--;
if (curr < url)
goto finished;
if (*curr == ':')
{
*curr = EOS;
curr--;
}
else
goto finished;
if (curr < url)
goto finished;
while ((curr > url) && (isalpha (*curr)))
curr--;
*proto = curr;
finished:;
#ifdef AMAYA_DEBUG
fprintf (stderr, "ExplodeURL(%s)\n\t", url);
if (*proto)
fprintf (stderr, "proto : %s, ", *proto);
if (*host)
fprintf (stderr, "host : %s, ", *host);
if (*dir)
fprintf (stderr, "dir : %s, ", *dir);
if (*file)
fprintf (stderr, "file : %s ", *file);
fprintf (stderr, "\n");
#endif
}
/*----------------------------------------------------------------------
IsHTMLName
returns TRUE if path points to an HTML resource.
----------------------------------------------------------------------*/
#ifdef __STDC__
boolean IsHTMLName (char *path)
#else /* __STDC__ */
boolean IsHTMLName (path)
char *path;
#endif /* __STDC__ */
{
char temppath[MAX_LENGTH];
char suffix[MAX_LENGTH];
char nsuffix[MAX_LENGTH];
int i;
if (!path)
return (FALSE);
strcpy (temppath, path);
ExtractSuffix (temppath, suffix);
/* Normalize the suffix */
i = 0;
while (suffix[i] != EOS)
{
nsuffix[i] = TOLOWER (suffix[i]);
i++;
}
nsuffix[i] = EOS;
if ((strcmp (nsuffix, "html")) &&
(strcmp (nsuffix, "htm")) &&
(strcmp (nsuffix, "shtml")))
return (FALSE);
else if (!strcmp (nsuffix, "gz"))
{
/* take in account compressed files */
ExtractSuffix (temppath, suffix);
/* Normalize the suffix */
i = 0;
while (suffix[i] != EOS)
{
nsuffix[i] = TOLOWER (suffix[i]);
i++;
}
nsuffix[i] = EOS;
if ((strcmp (nsuffix, "html")) &&
(strcmp (nsuffix, "htm")) &&
(strcmp (nsuffix, "shtml")))
return (FALSE);
else
return (TRUE);
}
else
return (TRUE);
}
/*----------------------------------------------------------------------
IsImageName
returns TRUE if path points to an image resource.
----------------------------------------------------------------------*/
#ifdef __STDC__
boolean IsImageName (char *path)
#else /* __STDC__ */
boolean IsImageName (path)
char *path;
#endif /* __STDC__ */
{
char temppath[MAX_LENGTH];
char suffix[MAX_LENGTH];
char nsuffix[MAX_LENGTH];
int i;
if (!path)
return (FALSE);
strcpy (temppath, path);
ExtractSuffix (temppath, suffix);
/* Normalize the suffix */
i = 0;
while (suffix[i] != EOS)
{
nsuffix[i] = TOLOWER (suffix[i]);
i++;
}
nsuffix[i] = EOS;
if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
(strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
(strcmp (nsuffix, "png")) && (strcmp (nsuffix, "au")))
return (FALSE);
return (TRUE);
}
/*----------------------------------------------------------------------
IsTextName
----------------------------------------------------------------------*/
#ifdef __STDC__
boolean IsTextName (char *path)
#else /* __STDC__ */
boolean IsTextName (path)
char *path;
#endif /* __STDC__ */
{
char temppath[MAX_LENGTH];
char suffix[MAX_LENGTH];
char nsuffix[MAX_LENGTH];
int i;
if (!path)
return (FALSE);
strcpy (temppath, path);
ExtractSuffix (temppath, suffix);
/* Normalize the suffix */
i = 0;
while (suffix[i] != EOS)
{
nsuffix[i] = TOLOWER (suffix[i]);
i++;
}
nsuffix[i] = EOS;
if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
(strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
(strcmp (nsuffix, "pdf")) && (strcmp (nsuffix, "png")) &&
(strcmp (nsuffix, "tgz")) && (strcmp (nsuffix, "tar")) &&
(strcmp (nsuffix, "xpg")) && (strcmp (nsuffix, "xpd")) &&
(strcmp (nsuffix, "ps")) && (strcmp (nsuffix, "au")))
return (TRUE);
else if (!strcmp (nsuffix, "gz"))
{
/* take in account compressed files */
ExtractSuffix (temppath, suffix);
/* Normalize the suffix */
i = 0;
while (suffix[i] != EOS)
{
nsuffix[i] = TOLOWER (suffix[i]);
i++;
}
nsuffix[i] = EOS;
if ((!strcmp (nsuffix, "html")) ||
(!strcmp (nsuffix, "htm")) ||
(!strcmp (nsuffix, "shtml")))
return (TRUE);
else
return (FALSE);
}
else
return (FALSE);
}
/*----------------------------------------------------------------------
IsHTTPPath
returns TRUE if path is in fact an http URL.
----------------------------------------------------------------------*/
#ifdef __STDC__
boolean IsHTTPPath (char *path)
#else /* __STDC__ */
boolean IsHTTPPath (path)
char *path;
#endif /* __STDC__ */
{
if (!path)
return FALSE;
if (strncmp (path, "http:", 5) != 0)
return FALSE;
return TRUE;
}
/*----------------------------------------------------------------------
IsWithParameters
returns TRUE if url has a concatenated query string.
----------------------------------------------------------------------*/
#ifdef __STDC__
boolean IsWithParameters (char *url)
#else /* __STDC__ */
boolean IsWithParameters (url)
char *url;
#endif /* __STDC__ */
{
int i;
if ((!url) || (url[0] == EOS))
return FALSE;
i = strlen (url) - 1;
while (i > 0 && url[i--] != '?')
if (i < 0)
return FALSE;
/* There is a parameter */
return TRUE;
}
/*----------------------------------------------------------------------
IsW3Path
returns TRUE if path is in fact a URL.
----------------------------------------------------------------------*/
#ifdef __STDC__
boolean IsW3Path (char *path)
#else /* __STDC__ */
boolean IsW3Path (path)
char *path;
#endif /* __STDC__ */
{
if ((strncmp (path, "http:", 5)) && (strncmp (path, "ftp:", 4)) &&
(strncmp (path, "telnet:", 7)) && (strncmp (path, "wais:", 5)) &&
(strncmp (path, "news:", 5)) && (strncmp (path, "gopher:", 7)) &&
(strncmp (path, "mailto:", 7)) && (strncmp (path, "archie:", 7)))
return FALSE;
return TRUE;
}
/*----------------------------------------------------------------------
IsValidProtocol
returns true if the url protocol is supported by Amaya.
----------------------------------------------------------------------*/
#ifdef __STDC__
boolean IsValidProtocol (char *url)
#else /* __STDC__ */
boolean IsValidProtocol (url)
char *url;
#endif /* __STDC__ */
{
if (!strncmp (url, "http:", 5))
/* experimental */
/***|| !strncmp (path, "ftp:", 4)
|| !strncmp (path, "news:", 5)***/
return (TRUE);
else
return (FALSE);
}
/*----------------------------------------------------------------------
NormalizeURL
normalizes orgName according to a base associated with doc, and
following the standard URL format rules.
The function returns the new complete and normalized URL
or file name path (newName) and the name of the document (docName).
N.B. If the function can't find out what's the docName, it assigns
the name "noname.html".
----------------------------------------------------------------------*/
#ifdef __STDC__
void NormalizeURL (char *orgName, Document doc, char *newName, char *docName)
#else /* __STDC__ */
void NormalizeURL (orgName, doc, newName, docName)
char *orgName;
Document doc;
char *newName;
char *docName;
#endif /* __STDC__ */
{
char basename[MAX_LENGTH];
char tempOrgName[MAX_LENGTH];
char *ptr;
Element el;
ElementType elType;
AttributeType attrType;
Attribute attrHREF = NULL;
int length;
if (!newName || !docName)
return;
/*
** First Step: Clean orgName
** Make sure we have a complete orgName, without any leading or trailing
** white spaces, or trailinbg new lines
*/
ptr = orgName;
/* skip leading white space and new line characters */
while ((*ptr == ' ' || *ptr == EOL) && *ptr++ != EOS);
strcpy (tempOrgName, ptr);
/* clean trailing white space */
ptr = strchr (tempOrgName, ' ');
if (ptr)
*ptr = EOS;
/* clean trailing new lines */
ptr = strchr (tempOrgName, EOL);
if (ptr)
*ptr = EOS;
/*
** Second Step: make orgName a complete URL
** If the URL does not include a protocol, then
** try to calculate one using the doc's base element
** (if it exists),
*/
if (tempOrgName[0] == EOS)
{
newName[0] = EOS;
return;
}
else if (IsW3Path (tempOrgName))
{
/* the name is complete, go to the Sixth Step */
strcpy (newName, tempOrgName);
/* verify if the URL has the form "protocol://server:port" */
ptr = HTParse (newName, "", PARSE_ACCESS | PARSE_HOST |
PARSE_PUNCTUATION);
if (ptr && !strcmp (ptr, newName))
{
/* it has this form, we complete it by adding a "/" */
strcat (newName, "/");
}
if (ptr)
HT_FREE (ptr);
}
else if ( doc == 0)
/* the name is complete, go to the Sixth Step */
strcpy (newName, tempOrgName);
else
{
/* take into account the BASE element. */
length = MAX_LENGTH -1;
/* get the root element */
el = TtaGetMainRoot (doc);
/* search the BASE element */
elType.ElSSchema = TtaGetDocumentSSchema (doc);
elType.ElTypeNum = HTML_EL_BASE;
el = TtaSearchTypedElement (elType, SearchInTree, el);
if (el)
{
/*
** The document has a BASE element
** Get the HREF attribute of the BASE Element
*/
attrType.AttrSSchema = elType.ElSSchema;
attrType.AttrTypeNum = HTML_ATTR_HREF_;
attrHREF = TtaGetAttribute (el, attrType);
if (attrHREF)
{
/* Use the base path of the document */
TtaGiveTextAttributeValue (attrHREF, basename, &length);
/* base and orgName have to be separated by a DIR_SEP */
length--;
if (basename[0] != EOS && basename[length] != '/')
/* verify if the base has the form "protocol://server:port" */
{
ptr = HTParse (basename, "", PARSE_ACCESS | PARSE_HOST |
PARSE_PUNCTUATION);
if (ptr && !strcmp (ptr, basename))
{
/* it has this form, we complete it by adding a "/" */
strcat (basename, "/");
length++;
}
if (ptr)
HT_FREE (ptr);
}
/* Third Step: prepare the base
** Removing anything after the
** last DIR_SEP char. If no such char is found, then search for
** the first ":" char, hoping that what's before that is a
** protocol. If found, end the string there. If neither
** char is found, then discard the whole base element.
*/
/* search for the last DIR_SEP char */
while (length >= 0 && basename[length] != DIR_SEP)
length--;
if (length >= 0)
/* found the last DIR_SEP char, end the string there */
basename[length + 1] = EOS;
else
/* search for the first ":" char */
{
for (length = 0; basename[length] != ':' &&
basename[length] != EOS; length++);
if (basename[length] == ':')
/* found, so end the string there */
basename[length + 1] = EOS;
else
/* not found, discard the base */
basename[0] = EOS;
}
}
else
basename[0] = EOS;
}
/*
** Fourth Step:
** If there's no base element, and if we're following
** a link, use the URL of the current document as a base.
*/
if (!attrHREF)
{
if (DocumentURLs[(int) doc])
{
strcpy (basename, DocumentURLs[(int) doc]);
/* base and orgName have to be separated by a DIR_SEP */
length = strlen (basename) - 1;
/* search for the last DIR_SEP char */
while (length >= 0 && basename[length] != DIR_SEP)
length--;
if (length >= 0)
/* found the last DIR_SEP char, end the string there */
basename[length + 1] = EOS;
else
/* search for the first ":" char */
{
for (length = 0; basename[length] != ':' &&
basename[length] != EOS; length ++);
if (basename[length] == ':')
/* found, so end the string there */
basename[length + 1] = EOS;
else
/* not found, discard the base */
basename[0] = EOS;
}
}
else
basename[0] = EOS;
}
/*
** Fifth Step, calculate the absolute URL, using the base
*/
ptr = HTParse (tempOrgName, basename, PARSE_ALL);
if (ptr)
{
ptr = HTSimplify (&ptr);
strcpy (newName, ptr);
HT_FREE (ptr);
}
else
newName[0] = EOS;
}
/*
** Sixth and last Step:
** Prepare the docname that will refer to this ressource in the
** .amaya directory. If the new URL finishes on DIR_SEP, then use
** noname.html as a default ressource name
*/
if (newName[0] != EOS)
{
length = strlen (newName) - 1;
if (newName[length] == DIR_SEP)
{
/* docname was not comprised inside the URL, so let's */
/* assign the default ressource name */
strcpy (docName, "noname.html");
/* remove DIR_SEP at the end of complete path */
newName[length] = EOS;
}
else
{
/* docname is comprised inside the URL */
while (length >= 0 && newName[length] != DIR_SEP)
length--;
if (length < 0)
strcpy (docName, newName);
else
strcpy (docName, &newName[length+1]);
}
}
else
docName[0] = EOS;
}
/*----------------------------------------------------------------------
IsSameHost
----------------------------------------------------------------------*/
#ifdef __STDC__
boolean IsSameHost (char *url1, char *url2)
#else /* __STDC__ */
boolean IsSameHost (url1, url2)
char *path;
#endif /* __STDC__ */
{
char *basename_ptr1, *basename_ptr2;
boolean result;
basename_ptr1 = HTParse (url1, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
basename_ptr2 = HTParse (url2, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
if (strcmp (basename_ptr1, basename_ptr2))
result = FALSE;
else
result = TRUE;
HT_FREE (basename_ptr1);
HT_FREE (basename_ptr2);
return (result);
}
/*----------------------------------------------------------------------
AHTMakeRelativeURL
converts url into a relative url to base_url.
If succesful, returns the new URL, otherwise, it returns NULL.
The caller has to free the new URL.
----------------------------------------------------------------------*/
#ifdef __STDC__
char *AHTMakeRelativeName (char *url, char *base_url)
#else /* __STDC__ */
char *AHTMakeRelativeName (url, base_url)
char url;
char base_url;
#endif /* __STDC__ */
{
char *base_ptr, *url_ptr;
char *result;
/* verify if we are in the same host */
base_ptr = HTParse (base_url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
url_ptr = HTParse (url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
if (!strcmp (base_ptr, url_ptr))
{
HT_FREE (base_ptr);
HT_FREE (url_ptr);
/* Normalize the URLs */
base_ptr = HTParse (base_url, "", PARSE_ALL);
url_ptr = HTParse (url, "", PARSE_ALL);
/* Use libwww to make relative name */
result = HTRelative (url_ptr, base_ptr);
HT_FREE (base_ptr);
HT_FREE (url_ptr);
}
else
result = (char *) NULL;
return (result);
}
/*----------------------------------------------------------------------
HasKnownFileSuffix
returns TRUE if path points to a file ending with a suffix.
----------------------------------------------------------------------*/
#ifdef __STDC__
boolean HasKnownFileSuffix (char *path)
#else /* __STDC__ */
boolean HasKnownFileSuffix (path)
char *path;
#endif /* __STDC__ */
{
char *root;
char temppath[MAX_LENGTH];
char suffix[MAX_LENGTH];
if (!path || path[0] == EOS)
return (FALSE);
root = HTParse(path, (char *) NULL, PARSE_PATH | PARSE_PUNCTUATION);
if (root)
{
strcpy (temppath, root);
HT_FREE (root);
/* Get the suffix */
ExtractSuffix (temppath, suffix);
if( suffix[0] == EOS)
/* no suffix */
return (FALSE);
/* Normalize the suffix */
ConvertToLowerCase (suffix);
if ((!strcmp (suffix, "gz")) ||
(!strcmp (suffix, "z")))
/* skip the compressed suffix */
{
ExtractSuffix (temppath, suffix);
if(suffix[0] == EOS)
/* no suffix */
return (FALSE);
/* Normalize the suffix */
ConvertToLowerCase (suffix);
}
if ((strcmp (suffix, "gif")) && (strcmp (suffix, "xbm")) &&
(strcmp (suffix, "xpm")) && (strcmp (suffix, "jpg")) &&
(strcmp (suffix, "pdf")) && (strcmp (suffix, "png")) &&
(strcmp (suffix, "tgz")) && (strcmp (suffix, "xpg")) &&
(strcmp (suffix, "xpd")) && (strcmp (suffix, "ps")) &&
(strcmp (suffix, "au")) && (strcmp (suffix, "html")) &&
(strcmp (suffix, "htm")) && (strcmp (suffix, "shtml")) &&
(strcmp (suffix, "txt")) && (strcmp (suffix, "css")) &&
(strcmp (suffix, "eps")))
return (FALSE);
else
return (TRUE);
}
else
return (FALSE);
}
/*----------------------------------------------------------------------
ConvertToLowerCase
Converts a string to lowercase.
----------------------------------------------------------------------*/
#ifdef __STDC__
static void ConvertToLowerCase (char *string)
#else /* __STDC__ */
static void ConvertToLowerCase (string)
char *string;
#endif /* __STDC__ */
{
int i;
if (!string)
return;
for (i = 0; string[i] != EOS; i++)
string[i] = TOLOWER (string[i]);
}
Webmaster