File:  [Public] / libwww / Library / src / HTAccess.html
Revision 2.43.2.1: download - view: text, annotated - select for diffs
Fri Jan 6 22:23:24 1995 UTC (29 years, 5 months ago) by frystyk
Branches: without-configure
Diff to: branchpoint 2.43: preferred, colored
Mainly adjustments in HTTP.c

<HTML>
<HEAD>
<TITLE>HTAccess:  Access manager  for libwww</TITLE>
<NEXTID N="z11">
</HEAD>
<BODY>

<H1>Access Manager</H1>

<PRE>
/*
**	(c) COPYRIGHT CERN 1994.
**	Please first read the full copyright statement in the file COPYRIGH.
*/
</PRE>

This module keeps a list of valid protocol (naming scheme) specifiers
with associated access code.  It allows documents to be loaded given
various combinations of parameters.  New access protocols may be
registered at any time.<P>

<B>Note:</B> HTRequest defined and request parameter added to almost
all calls 18 Nov 1993.<P>

This module is implemented by <A HREF="HTAccess.c">HTAccess.c</A>, and
it is a part of the <A NAME="z10"
HREF="http://info.cern.ch/hypertext/WWW/Library/User/Guide/Guide.html">Library
of Common Code</A>. <P>

The module contains a lot of stuff but the main topics are:

<UL>
<LI><A HREF="#Library">Initializing and Terminating the Library</A>
<LI><A HREF="#Methods">Management of access methods</A>
<LI><A HREF="#Addresses">A lot of hard coded addresses</A>
<LI><A HREF="#z1">The HTRequest structure</A>
<LI><A HREF="#z100">Management of the HTRequest structure</A>
<LI><A HREF="#LoadDoc">Functions for loading a document</A>
<LI><A HREF="#ClientHelp">Help functions for clients to get started</A>
<LI><A HREF="#PostDoc">Functions for posting a document</A>
<LI><A HREF="#ProtReg">Access Method Registration</H2></A>
</UL>


<PRE>
#ifndef HTACCESS_H
#define HTACCESS_H
#include "HTList.h"
#include "HTChunk.h"
</PRE>

<B>Short Names</B>
<PRE>
#ifdef SHORT_NAMES
#define HTClientHost 		HTClHost
#define HTSearchAbsolute	HTSeAbso
#define HTOutputStream		HTOuStre
#define HTOutputFormat		HTOuForm
#endif
</PRE>

<H2>Flags which may be set to control this module</H2>

<PRE>
extern char * HTSaveLocallyDir;		/* Dir home for "save locally" files */
extern char * HTCacheDir;		/* Cache dir, default NULL: no cache */
extern char * HTClientHost;		/* Name or number of telnetting host */
extern FILE * HTlogfile;		/* File to output one-liners to */
extern BOOL HTSecure;			/* Disable security holes? */
extern char * HTImServer;		/* If I'm cern_httpd */
extern BOOL HTImProxy;			/* If I'm cern_httpd as a proxy */
extern BOOL HTForceReload;		/* Force reload from cache or net */
</PRE>

<A NAME="Library"><H2>Initializing and Terminating the Library</H2></A>

<IMG SRC="http://info.cern.ch/hypertext/WWW/Icons/32x32/warning.gif">
These two functions initiates memory and settings for the Library and
cleans up memory kept by the Library when about to exit the
application. It is highly recommended that they are used!

<PRE>
extern BOOL HTLibInit NOPARAMS;
extern BOOL HTLibTerminate NOPARAMS;
</PRE>

<A NAME="Methods"><H2>Method Management</H2></A>

These are the valid methods, see <A
HREF="http://info.cern.ch/hypertext/WWW/Protocols/HTTP/Methods.html">HTTP
Methods</A>

<PRE>
typedef enum {
	METHOD_INVALID	= 0,
	METHOD_GET	= 1,
	METHOD_HEAD,
	METHOD_POST,
	METHOD_PUT,
	METHOD_DELETE,
	METHOD_CHECKOUT,
	METHOD_CHECKIN,
	METHOD_SHOWMETHOD,
	METHOD_LINK,
	METHOD_UNLINK,
	MAX_METHODS
} HTMethod;
</PRE>

<H3>Get Method Enumeration</H3>

Gives the enumeration value of the method as a function of the (char *) name.

<PRE>
extern HTMethod HTMethod_enum PARAMS((char * name));
</PRE>

<H3>Get Method String</H3>

The reverse of <I>HTMethod_enum()</I>

<PRE>
extern char * HTMethod_name PARAMS((HTMethod method));
</PRE>

<H3>Valid Methods</H3>

This functions searches the list of valid methods for a given anchor, see
<A HREF="HTAnchor.html">HTAnchor module</A> If the method is found it returns
YES else NO.

<PRE>
extern BOOL HTMethod_inList PARAMS((HTMethod	method,
				    HTList *	list));
</PRE>

<HR>
<EM>This section might be move to the Access Authentication Module</EM>

<H4>Match Template Against Filename</H4>
<PRE>
/* extern						HTAA_templateMatch()
**		STRING COMPARISON FUNCTION FOR FILE NAMES
**		   WITH ONE WILDCARD * IN THE TEMPLATE
** NOTE:
**	This is essentially the same code as in HTRules.c, but it
**	cannot be used because it is embedded in between other code.
**	(In fact, HTRules.c should use this routine, but then this
**	 routine would have to be more sophisticated... why is life
**	 sometimes so hard...)
**
** ON ENTRY:
**	tmplate		is a template string to match the file name
**			agaist, may contain a single wildcard
**			character * which matches zero or more
**			arbitrary characters.
**	filename	is the filename (or pathname) to be matched
**			agaist the template.
**
** ON EXIT:
**	returns		YES, if filename matches the template.
**			NO, otherwise.
*/
extern BOOL HTAA_templateMatch PARAMS((CONST char * tmplate, 
				       CONST char * filename));
</PRE>

<HR>

The following have to be defined
in advance of the other include files
because of circular references.
<PRE>
typedef struct _HTRequest HTRequest;
typedef struct _HTNetInfo HTNetInfo;

/*
** Callback to a protocol module
*/
typedef int (*HTLoadCallBack)	PARAMS((HTRequest *	req));

#include "HTAnchor.h"
#include <A
NAME="z3" HREF="HTFormat.html">"HTFormat.h"</A>
#include "HTAAUtil.h"		/* HTAAScheme, HTAAFailReason */
#include "HTAABrow.h"		/* HTAASetup */
</PRE>

<A NAME="Addresses"><H2>Default WWW Addresses</H2></A>

These control the home page selection. To mess with these for normal browses
is asking for user confusion.
<PRE>
#define LOGICAL_DEFAULT "WWW_HOME"	      /* Defined to be the home page */

#ifndef PERSONAL_DEFAULT
#define PERSONAL_DEFAULT "WWW/default.html"		/* in home directory */
#endif

#ifndef LOCAL_DEFAULT_FILE
#define LOCAL_DEFAULT_FILE "/usr/local/lib/WWW/default.html"
#endif

/* If one telnets to an access point it will look in this file for home page */
#ifndef REMOTE_POINTER
#define REMOTE_POINTER  "/etc/www-remote.url"		    /* can't be file */
#endif

/* and if that fails it will use this. */
#ifndef REMOTE_ADDRESS
#define REMOTE_ADDRESS  "http://info.cern.ch/remote.html"   /* can't be file */
#endif

/* If run from telnet daemon and no -l specified, use this file: */
#ifndef DEFAULT_LOGFILE
#define DEFAULT_LOGFILE	"/usr/adm/www-log/www-log"
#endif

/* If the home page isn't found, use this file: */
#ifndef LAST_RESORT
#define LAST_RESORT	"http://info.cern.ch/default.html"
#endif

/* This is the default cache directory: */
#ifndef CACHE_HOME_DIR
#define CACHE_HOME_DIR		"/tmp/"
#endif

/* The default directory for "save locally" and "save and execute" files: */
#ifndef SAVE_LOCALLY_HOME_DIR
#define SAVE_LOCALLY_HOME_DIR	"/tmp/"
#endif
</PRE>

<H2><A NAME="HTNetInfo">Protocol Specific Information</A></H2>

This structure contains information about socket number, input buffer
for reading from the network etc. The structure is used through out
the protocol modules and is the refenrence point for introducing multi
threaded execution into the library, see specifications on <A
HREF="http://info.cern.ch/hypertext/WWW/Library/User/Features/multithread.html">Multiple
Threads</A>.

<PRE>
struct _HTNetInfo {
    int			sockfd;				/* Socket descripter */
    SockA 		sock_addr;		/* SockA is defined in tcp.h */
    HTInputSocket *	isoc;				     /* Input buffer */
    HTStream *		target;			            /* Output stream */
    HTChunk *		transmit;			  /* Line to be send */
    int 		addressCount;	     /* Attempts if multi-homed host */
    time_t		connecttime;		 /* Used on multihomed hosts */
    struct _HTRequest *	request;	   /* Link back to request structure */
};
</PRE>

<EM><B>Note:</B> The AddressCount varaible is used to count the number
of attempt to connect to a multi-homed host so we know when to stop
trying new IP-addresses.</EM>

<H2><A
NAME="z1">The Request structure</A></H2>When a request is handled, all kinds
of things about it need to be passed
along.  These are all put into a
HTRequest structure. <P>

<B>Note 1:</B> There is also a <A NAME="z4" HREF="HTFormat.html#z17">global list of converters</A> <P>
<B>Note 2:</B> If you reuse the request structure for more than one
request then make sure that the request is re-initialized, so that no `old'
data is reused, see <A HREF="#z100">functions to manipulate HTRequest
Structure</A>. The library handles its own internal information from request
to request but the information set by the caller is untouched. <P>

The elements of the request structure are as follows.

<PRE>
struct _HTRequest {
</PRE>

<H3>Set by the caller of HTaccess:</H3>

<PRE>
    <A HREF="#Methods">HTMethod</A>	method;
</PRE>

An enum used to specify the HTTP <A NAME="z7"
HREF="../../Protocols/HTTP/Methods.html">method</A> used. The default
value is <CODE>GET</CODE>

<PRE>
    HTList * 	conversions;
</PRE>

NULL, or a list of conversions which the format manager can do in
order to fulfill the request.  It typically points to a list set up an
initialisation time for example by <A HREF="HTInit.html">HTInit().</A>

<PRE>
    HTList *	encodings;
</PRE>

The list of encodings acceptable in the output stream.

<PRE>
    HTList *	languages;
</PRE>

The list of (human) language values acceptable in the response. The default
is all languages.

<PRE>
    BOOL (*<A NAME="z9"> callback</A> ) PARAMS((struct _HTRequest* request,
						void *param));
</PRE>

A function to be called back in the event that a file has been saved
to disk by HTSaveAndCallBack for example.

<PRE>
    void *	context;
</PRE>

An arbitrary pointer passed to HTAccess and passed back as a parameter
to the <A NAME="z10" HREF="#z9">callback</A> .

<PRE>
    HTStream*	output_stream; 
</PRE>

The output stream to be used to put data down to as they come in from the
network. The default value is <CODE>NULL</CODE> which means that the stream
goes to the user (display).

<PRE>
    HTStream*	output_flush; 
</PRE>

All object bodies sent from the server with status codes different
from <CODE>200 OK</CODE> will be put down this stream. This can be
used as a debug window etc. If the value is NULL (default) then the
stream used is <A HREF="HTFormat.html#BlackHole">HTBlackHole</A>.

<PRE>
    HTAtom * 	output_format;
</PRE>

The output format of the stream. This can be used to get unconverted
data etc. from the library. If <CODE>NULL</CODE>, then WWW_PRESENT is
default value.

<PRE>
    BOOL BlockingIO;
</PRE>

This flag can be set to override if a protocol module is registered as
using non-blocking IO.

<PRE>
    HTParentAnchor *parentAnchor;
</PRE>

If this parameter is set then a `Referer: &lt;parent address&gt; is
generated in the request to the server, see <A
HREF="http://info.cern.ch/hypertext/WWW/Protocols/HTTP/HTRQ_Headers.html#z14">
Referer field in a HTTP Request</A>

<H3>Set by HTAccess</H3>

None of the bits below may be looked at by a client application except
in the callback routine, when the anchor may be picked out.

<PRE>
    HTParentAnchor*	anchor;
</PRE>

The anchor for the object in question.  Set immediately by HTAcesss.
Used by the protocol and parsing modules.  Valid thoughout the access.

<PRE>
    HTChildAnchor *	childAnchor;	/* For element within the object  */
</PRE>

The anchor for the sub object if any.  The object builder should
ensure that htis is selected, highlighted, etc when the object is
loaded.

<PRE>
    void *		using_cache;
</PRE>

Pointer to cache element if cache hit

<H3>Error Diagnostics</H3>
<PRE>
	BOOL		error_block;	/* YES if stream has been used	  */
	HTList *	error_stack;	/* List of errors		  */

</PRE>
<H3>Library Side</H3>
<PRE>
	HTNetInfo *	net_info;	/* Information about socket etc. */
	int		redirections;	/* Number of redirections */
</PRE>

<H3>Temporary until we get a good MIME parser</H3>
<PRE>
	char *		redirect;		       /* Location or URI */
	char *		WWWAAScheme;	       /* WWW-Authenticate scheme */
	char *		WWWAARealm;	        /* WWW-Authenticate realm */
	char *		WWWprotection;	       /* WWW-Protection-Template */
</PRE>

<H3>Server Side</H3>
<PRE>
	HTAtom *	content_type;	/* Content-Type:		  */
	HTAtom *	content_language;/* Language			  */
	HTAtom *	content_encoding;/* Encoding			  */
	int		content_length;	/* Content-Length:		  */
	HTInputSocket *	isoc;		/* InputSocket object for reading */
	char *		authorization;	/* Authorization: field		  */
	HTAAScheme	scheme;		/* Authentication scheme used	  */
</PRE>
<H3>Client Side</H3>
<PRE>
	HTList *	valid_schemes;	/* Valid auth.schemes		  */
	HTAssocList **	scheme_specifics;/* Scheme-specific parameters	  */
	char *		authenticate;	/* WWW-authenticate: field */
	char *		prot_template;	/* WWW-Protection-Template: field */
	HTAASetup *	setup;		/* Doc protection info		  */
	HTAARealm *	realm;		/* Password realm		  */
	char *		dialog_msg;	/* Authentication prompt (client) */
};

</PRE>

<H2><A NAME="z100">Functions to Manipulate a HTRequest Structure</A></H2>

Just to make things easier especially for clients, here are some functions to
manipulate the request structure:

<H3>Create blank request</H3>This request has defaults in -- in
most cases it will need some information
added before being passed to HTAccess,
but it will work as is for a simple
request.
<PRE>
extern HTRequest * HTRequest_new NOPARAMS;
</PRE>

<H3>Delete request structure</H3>Frees also conversion list hanging
from req->conversions.
<PRE>
extern void HTRequest_delete PARAMS((HTRequest * req));
</PRE>

<H3>Clear a request structure</H3>
Clears a request structure so that it can be reused. The only thing that differs from using free/new is that the list of conversions is kept.
<PRE>
extern void HTRequest_clear PARAMS((HTRequest * req));
</PRE>

<A NAME="LoadDoc"><H2>Functions for Loading a Document</H2></A>

There are several different ways of loading a document. However, the
major difference between them is whether the document is referenced by

<UL>
<LI><A HREF="#Relative">Relative URI</A>
<LI><A HREF="#Absolute">Absolute URI</A>
<LI><A HREF="#Anchor">Anchor element</A> or
<LI>Contains keywords for <A HREF="#RelSearch">searching an relative URI</A>
<LI>Contains keywords for <A HREF="#AbsSearch">searching an absolute URI</A>
</UL>

<B>NOTE:</B> From release 3.0 of the Library, the return codes from
the loading functions are no mode <CODE>BOOL</CODE>, that is
<CODE>YES</CODE> or <CODE>NO</CODE>. Insted they have been replaced
with the following set of return codes defined in the <A
HREF="HTUtils.html#ReturnCodes">Utility module</A>:

<DL>
<DT>HT_WOULD_BLOCK
<DD>An I/O operation would block

<DT>HT_ERROR
<DD>Error has occured

<DT>HT_LOADED
<DD>Success

<DT>HT_NO_DATA
<DD>Success, but no document loaded. This might be the situation when a 
telnet sesssion is started etc.
</DL>

However, a general rule about the return codes is that <B>ERRORS</B>
have a <EM>negative</EM> value whereas <B>SUCCESS</B> has a
<EM>positive</EM> value. <P>

There are also some functions to help the client getting started with
<A HREF="#ClientHelp">the first URI</A>.

<A NAME="Relative"><H3>Load a document from relative name</H3></A>

<PRE>
extern int HTLoadRelative	PARAMS((CONST char * 	relative_name,
					HTParentAnchor*	here,
					HTRequest *	request));
</PRE>

<A NAME="Absolute"></A><H3>Load a document from absolute name</H3>

<PRE>
extern int HTLoadAbsolute	PARAMS((CONST char *	addr,
					HTRequest *	request));
</PRE>

<H3>Load a document from absolute name to a stream</H3>

<PRE>
extern int HTLoadToStream	PARAMS((CONST char * 	addr,
					BOOL 		filter,
					HTRequest *	request));
</PRE>

<A NAME="Anchor"><H3>Load if necessary, and select an anchor</H3></A>

The anchor parameter may be a child anchor. The anchor in the request
is set to the parent anchor. The recursive function keeps the error
stack in the request structure so that no information is lost having
more than one call. See also <A HREF="#BindAnchor">HTBindAnchor()</A>.

<PRE>
extern int HTLoadAnchor		PARAMS((HTAnchor  * 	a,
					HTRequest *	request));
extern int HTLoadAnchorRecursive PARAMS((HTAnchor *	a,
					HTRequest *	request));
</PRE>

<H3>Load a Document</H3>

These are two internal routines fro loading a document which has an
address AND a matching anchor.  (The public routines are called with
one OR the other.)  This is recursively called from file load module
to try ftp (though this will be obsolete in the next major
release).<P>

If <CODE>keep_error_stack</CODE> is YES then the error (or info) stack
is not cleared from the previous call.

<PRE>
extern int HTLoad		PARAMS((HTRequest * request,
					BOOL keep_error_stack));
</PRE>

<PRE>
extern BOOL HTLoadTerminate	PARAMS((HTRequest * request, int status));
</PRE>

<A NAME="RelSearch"><H3>Search Using Relative URI</H3></A>

Performs a search on word given by the user. Adds the search words to
the end of the current address and attempts to open the new address.

<PRE>
extern int HTSearch		PARAMS((CONST char *	keywords,
					HTParentAnchor*	here,
					HTRequest *	request));
</PRE>

<A NAME="AbsSearch"><H3>Search using Absolute URI</H3></A>

Performs a keyword search on word given by the user. Adds the keyword
to the end of the current address and attempts to open the new
address.

<PRE>
extern int HTSearchAbsolute	PARAMS((CONST char *	keywords,
					CONST char * 	indexname,
					HTRequest *	request));
</PRE>


<A NAME="ClientHelp"><H2>Help Function for Clients to get started</H2></A>

These function helps the client to load the first document. They are
not mandatory to use - but they make life easier!

<A NAME="BindAnchor"><H3>Bind an anchor to a request structure without
loading</H3></A>

<PRE>
extern BOOL HTBindAnchor PARAMS((HTAnchor *anchor, HTRequest *request));
</PRE>

<A NAME="HomePage"><H3>Generate the Anchor for the Home Page</H3></A>

As it involves file access, this should only be done once when the
program first runs. This is a default algorithm using the
<CODE>WWW_HOME</CODE> environment variable.

<PRE>
extern HTParentAnchor * HTHomeAnchor NOPARAMS;
</PRE>

<H3>Find Related Name</H3>

Creates a local file URI that can be used as a relative name when
calling HTParse() to expand a relative file name to an absolute
one. <P>

The code for this routine originates from the Linemode browser and was
moved here by howcome@dxcern.cern.ch in order for all clients to take
advantage. <P>

<PRE>
extern char *  HTFindRelatedName NOPARAMS;
</PRE>

<A NAME="PostDoc"><H2>Functions for Posting a Document</H2></A>

This is not quite finished yet but more to come!

<H3>Get a Save Stream</H3>

<H4>On Entry,</H4>
<DL>
<DT>request->anchor
<DD> is valid anchor which
has previously beeing loaded
</DL>

<H4>On exit,</H4>
<DL>
<DT>returns
<DD> 0 if error else a stream
to save the object to.
</DL>

<PRE>
extern HTStream * HTSaveStream PARAMS((HTRequest * request));
</PRE>


<A NAME="ProtReg"><H2>Access Method Registration</H2></A>

An access method is defined by an HTProtocol structure which point to
the routines for performing the various logical operations on an
object: in HTTP terms, GET, PUT, and POST. The access methods
supported in the Library are initiated automaticly using the private
function <CODE>HTAccessInit()</CODE> <B>if not</B> defined
<CODE>NO_INIT</CODE> <P>

Each of these routine takes as a parameter a <A NAME="z2"
HREF="#z1">request structure</A> containing details of the request.
When the protocol class routine is called, the anchor element in the
request is already valid (made valid by HTAccess).

<PRE>
typedef enum _HTSocBlock {
    SOC_BLOCK,
    SOC_NON_BLOCK
} HTSocBlock;

typedef struct _HTProtocol {
    char *	name;
    HTSocBlock	block;	
    int		(*load)		PARAMS((HTRequest *	request));
    HTStream*	(*saveStream)	PARAMS((HTRequest *	request));
    HTStream* 	(*postStream)	PARAMS((HTRequest *	request,
					HTParentAnchor* postTo));
} HTProtocol;

extern BOOL HTRegisterProtocol PARAMS((HTProtocol * protocol));
extern void HTDisposeProtocols NOPARAMS;
</PRE>

<H3>Uses Protocol Blocking IO</H3>

A small function to make life easier. Returns <CODE>YES</CODE> or
<CODE>NO</CODE>. If the Library is run in NON-INTERACTIVE MODE then
the function always returns YES;

<PRE>
extern BOOL HTProtocolBlocking	PARAMS((HTRequest *	request));
</PRE>

end

<PRE>
#endif /* HTACCESS_H */
</PRE>
end of HTAccess
</BODY>
</HTML>

Webmaster