Annotation of libwww/Library/src/HTAccess.html, revision 2.51

2.9       timbl       1: <HTML>
                      2: <HEAD>
2.45      frystyk     3: <TITLE>Access manager  for libwww</TITLE>
2.19      timbl       4: <NEXTID N="z11">
2.9       timbl       5: </HEAD>
2.5       timbl       6: <BODY>
2.39      frystyk     7: 
2.33      frystyk     8: <H1>Access Manager</H1>
2.39      frystyk     9: 
2.41      frystyk    10: <PRE>
                     11: /*
2.50      frystyk    12: **     (c) COPYRIGHT MIT 1995.
2.41      frystyk    13: **     Please first read the full copyright statement in the file COPYRIGH.
                     14: */
                     15: </PRE>
                     16: 
2.39      frystyk    17: This module keeps a list of valid protocol (naming scheme) specifiers
                     18: with associated access code.  It allows documents to be loaded given
                     19: various combinations of parameters.  New access protocols may be
                     20: registered at any time.<P>
                     21: 
                     22: This module is implemented by <A HREF="HTAccess.c">HTAccess.c</A>, and
                     23: it is a part of the <A NAME="z10"
2.46      frystyk    24: HREF="http://www.w3.org/hypertext/WWW/Library/User/Guide/Guide.html">Library
2.39      frystyk    25: of Common Code</A>. <P>
                     26: 
                     27: The module contains a lot of stuff but the main topics are:
                     28: 
                     29: <UL>
2.41      frystyk    30: <LI><A HREF="#Library">Initializing and Terminating the Library</A>
2.46      frystyk    31: <LI><A HREF="#log">Logging of requests</A>
2.39      frystyk    32: <LI><A HREF="#Methods">Management of access methods</A>
                     33: <LI><A HREF="#Addresses">A lot of hard coded addresses</A>
2.51    ! frystyk    34: <LI><A HREF="#socket">Buffering for the Network</A>
        !            35: <LI><A HREF="#NetInfo">Protocol Specific Information</A>
2.39      frystyk    36: <LI><A HREF="#z1">The HTRequest structure</A>
                     37: <LI><A HREF="#z100">Management of the HTRequest structure</A>
                     38: <LI><A HREF="#LoadDoc">Functions for loading a document</A>
                     39: <LI><A HREF="#ClientHelp">Help functions for clients to get started</A>
                     40: <LI><A HREF="#PostDoc">Functions for posting a document</A>
                     41: <LI><A HREF="#ProtReg">Access Method Registration</H2></A>
                     42: </UL>
                     43: 
2.33      frystyk    44: 
                     45: <PRE>
                     46: #ifndef HTACCESS_H
1.1       timbl      47: #define HTACCESS_H
2.44      roeber     48: 
2.51    ! frystyk    49: #include <A HREF="HTList.h">"HTList.h"</A>
        !            50: #include <A HREF="HTChunk.h">"HTChunk.h"</A>
        !            51: #include <A HREF="HTAnchor.html">"HTAnchor.h"</A>
        !            52: #include <A HREF="HTStream.h">"HTStream.h"</A>
2.35      frystyk    53: </PRE>
1.1       timbl      54: 
2.46      frystyk    55: <H2>Global Flags</H2>
                     56: 
                     57: Flags and variables which may be set to control the Library
                     58: 
                     59: <H3>Redirections</H3>
                     60: 
                     61: The maximum number of redirections is pr. default 10. This prevents
                     62: the library from going into an infinite loop which is kind of nice :-)
                     63: 
2.35      frystyk    64: <PRE>
2.46      frystyk    65: extern int HTMaxRedirections;
2.33      frystyk    66: </PRE>
1.1       timbl      67: 
2.46      frystyk    68: <H3>Other Flags</H3>
2.36      frystyk    69: 
                     70: <PRE>
                     71: extern char * HTClientHost;            /* Name or number of telnetting host */
                     72: extern BOOL HTSecure;                  /* Disable security holes? */
2.46      frystyk    73: 
2.36      frystyk    74: extern char * HTImServer;              /* If I'm cern_httpd */
                     75: extern BOOL HTImProxy;                 /* If I'm cern_httpd as a proxy */
                     76: </PRE>
                     77: 
2.41      frystyk    78: <A NAME="Library"><H2>Initializing and Terminating the Library</H2></A>
                     79: 
2.46      frystyk    80: <IMG SRC="http://www.w3.org/hypertext/WWW/Icons/32x32/warning.gif">
2.41      frystyk    81: These two functions initiates memory and settings for the Library and
                     82: cleans up memory kept by the Library when about to exit the
                     83: application. It is highly recommended that they are used!
                     84: 
                     85: <PRE>
                     86: extern BOOL HTLibInit NOPARAMS;
                     87: extern BOOL HTLibTerminate NOPARAMS;
                     88: </PRE>
                     89: 
2.39      frystyk    90: <A NAME="Methods"><H2>Method Management</H2></A>
2.33      frystyk    91: 
2.41      frystyk    92: These are the valid methods, see <A
2.46      frystyk    93: HREF="http://www.w3.org/hypertext/WWW/Protocols/HTTP/Methods.html">HTTP
                     94: Methods</A>. <P>
                     95: 
                     96: <B>NOTE:</B> the anchor list of allowed methods are not a bitflag, not
                     97: at list.
2.33      frystyk    98: 
                     99: <PRE>
2.16      luotonen  100: typedef enum {
2.46      frystyk   101:        METHOD_INVALID  = 0x0,
                    102:        METHOD_GET      = 0x1,
                    103:        METHOD_HEAD     = 0x2,    
                    104:        METHOD_POST     = 0x4,    
                    105:        METHOD_PUT      = 0x8,    
                    106:        METHOD_DELETE   = 0x10,
                    107:        METHOD_LINK     = 0x20,
                    108:        METHOD_UNLINK   = 0x40
2.16      luotonen  109: } HTMethod;
2.33      frystyk   110: </PRE>
                    111: 
                    112: <H3>Get Method Enumeration</H3>
2.16      luotonen  113: 
2.33      frystyk   114: Gives the enumeration value of the method as a function of the (char *) name.
2.16      luotonen  115: 
2.33      frystyk   116: <PRE>
2.46      frystyk   117: extern HTMethod HTMethod_enum PARAMS((CONST char * name));
2.16      luotonen  118: </PRE>
                    119: 
2.33      frystyk   120: <H3>Get Method String</H3>
2.16      luotonen  121: 
2.33      frystyk   122: The reverse of <I>HTMethod_enum()</I>
2.16      luotonen  123: 
2.33      frystyk   124: <PRE>
2.46      frystyk   125: extern CONST char * HTMethod_name PARAMS((HTMethod method));
2.16      luotonen  126: </PRE>
2.33      frystyk   127: 
2.35      frystyk   128: <HR>
                    129: <EM>This section might be move to the Access Authentication Module</EM>
                    130: 
2.36      frystyk   131: <H4>Match Template Against Filename</H4>
2.16      luotonen  132: <PRE>
2.40      frystyk   133: /* extern                                              HTAA_templateMatch()
2.16      luotonen  134: **             STRING COMPARISON FUNCTION FOR FILE NAMES
                    135: **                WITH ONE WILDCARD * IN THE TEMPLATE
                    136: ** NOTE:
                    137: **     This is essentially the same code as in HTRules.c, but it
                    138: **     cannot be used because it is embedded in between other code.
                    139: **     (In fact, HTRules.c should use this routine, but then this
                    140: **      routine would have to be more sophisticated... why is life
                    141: **      sometimes so hard...)
                    142: **
                    143: ** ON ENTRY:
2.37      frystyk   144: **     tmplate         is a template string to match the file name
2.16      luotonen  145: **                     agaist, may contain a single wildcard
                    146: **                     character * which matches zero or more
                    147: **                     arbitrary characters.
                    148: **     filename        is the filename (or pathname) to be matched
                    149: **                     agaist the template.
                    150: **
                    151: ** ON EXIT:
                    152: **     returns         YES, if filename matches the template.
                    153: **                     NO, otherwise.
                    154: */
2.40      frystyk   155: extern BOOL HTAA_templateMatch PARAMS((CONST char * tmplate, 
2.16      luotonen  156:                                       CONST char * filename));
2.35      frystyk   157: </PRE>
                    158: <HR>
2.16      luotonen  159: 
2.46      frystyk   160: The following have to be defined in advance of the other include files
2.10      timbl     161: because of circular references.
2.46      frystyk   162: 
2.33      frystyk   163: <PRE>
                    164: typedef struct _HTRequest HTRequest;
2.39      frystyk   165: typedef struct _HTNetInfo HTNetInfo;
2.10      timbl     166: 
2.51    ! frystyk   167: #include "HTAAUtil.h"                         /* HTAAScheme, HTAAFailReason */
        !           168: #include "HTAABrow.h"                                          /* HTAASetup */
2.14      luotonen  169: /*
2.39      frystyk   170: ** Callback to a protocol module
2.14      luotonen  171: */
2.39      frystyk   172: typedef int (*HTLoadCallBack)  PARAMS((HTRequest *     req));
2.33      frystyk   173: </PRE>
2.10      timbl     174: 
2.46      frystyk   175: <A NAME="HeaderMask"><H2>General HTTP Header Mask</H2></A>
                    176: 
2.51    ! frystyk   177: There are a few header fields which have general applicability for
        !           178: both request and response mesages, but which do not apply to the
        !           179: communication parties or theentity being transferred. This mask
        !           180: enables and disables these headers. If the bit is not turned on they
2.46      frystyk   181: are not sent. Extra header can be generated when initializing the <A
2.51    ! frystyk   182: HREF="#ExtraHeaders">ExtraHeaders field</A>. All headers are optional
        !           183: and the default value is <EM>NO GENERAL HEADERS</EM>
2.46      frystyk   184: 
                    185: <PRE>
2.51    ! frystyk   186: typedef enum _GenHeaderEnum {
2.46      frystyk   187:     HT_DATE            = 0x1,
2.51    ! frystyk   188:     HT_FORWARDED       = 0x2,
        !           189:     HT_MESSAGE_ID      = 0x4,
        !           190:     HT_MIME            = 0x8
        !           191: } GenHeaderEnum;
        !           192: 
        !           193: #define <A NAME="DEF_HEAD">DEFAULT_GENERAL_HEADERS</A> 0
        !           194: </PRE>
        !           195: 
        !           196: <H2>Request Headers</H2>
2.46      frystyk   197: 
2.51    ! frystyk   198: The request header fields allow the client to pass additional
        !           199: information about the request (and about the client itself) to the
        !           200: server. All headers are optional but the default value is all request
        !           201: headers if present <EM>except</EM> <CODE>From</CODE> and
        !           202: <CODE>Pragma</CODE>.
        !           203: 
        !           204: <PRE>
        !           205: typedef enum _ReqHeaderEnum {
        !           206:     HT_ACCEPT_TYPE     = 0x1,
        !           207:     HT_ACCEPT_CHAR     = 0x2,
        !           208:     HT_ACCEPT_ENC      = 0x4,
        !           209:     HT_ACCEPT_LAN      = 0x8,
2.46      frystyk   210:     HT_FROM            = 0x10,
                    211:     HT_PRAGMA          = 0x20,
                    212:     HT_REFERER         = 0x40,
                    213:     HT_USER_AGENT      = 0x80
2.51    ! frystyk   214: } ReqHeaderEnum;
2.46      frystyk   215: 
2.51    ! frystyk   216: #define <A NAME="DEF_REQ">DEFAULT_REQUEST_HEADERS</A> \
        !           217: HT_ACCEPT_TYPE+HT_ACCEPT_CHAR+HT_ACCEPT_ENC+HT_ACCEPT_LAN+HT_REFERER+HT_USER_AGENT
2.46      frystyk   218: </PRE>
                    219: 
                    220: <H2>Entity Header Mask</H2>
                    221: 
                    222: The entity headers contain information about the object sent in the
                    223: HTTP transaction. See the <A HREF="HTAnchor.html">Anchor module</A>,
                    224: for the storage of entity headers. This flag defines which headers are
                    225: to be sent in a request together with an entity body (the <B>O</B>
2.51    ! frystyk   226: stands for <EM>object</EM>). All headers are optional but the default
        !           227: value is <EM>ALL ENTITY HEADERS IF PRESENT</EM>
2.46      frystyk   228: 
                    229: <PRE>
                    230: typedef enum _EntityHeaderEnum {
                    231:     HT_ALLOW           = 0x1,
                    232:     HT_CONTENT_ENCODING        = 0x2,
                    233:     HT_CONTENT_LANGUAGE        = 0x4,
                    234:     HT_CONTENT_LENGTH  = 0x8,
                    235:     HT_CTE             = 0x10,                 /* Content-Transfer-Encoding */
                    236:     HT_CONTENT_TYPE    = 0x20,
                    237:     HT_DERIVED_FROM    = 0x40,
                    238:     HT_EXPIRES         = 0x80,
                    239:     HT_LAST_MODIFIED   = 0x200,
                    240:     HT_LINK            = 0x400,
                    241:     HT_TITLE           = 0x800,
                    242:     HT_URI             = 0x1000,
                    243:     HT_VERSION         = 0x2000
                    244: } EntityHeaderEnum;
                    245: 
2.51    ! frystyk   246: #define <A NAME="DEF_ENTITY">DEFAULT_ENTITY_HEADERS</A> 0xFFFF
2.46      frystyk   247: </PRE>
                    248: 
2.39      frystyk   249: <A NAME="Addresses"><H2>Default WWW Addresses</H2></A>
2.10      timbl     250: 
2.33      frystyk   251: These control the home page selection. To mess with these for normal browses
2.6       timbl     252: is asking for user confusion.
2.33      frystyk   253: <PRE>
                    254: #define LOGICAL_DEFAULT "WWW_HOME"           /* Defined to be the home page */
1.1       timbl     255: 
2.6       timbl     256: #ifndef PERSONAL_DEFAULT
2.33      frystyk   257: #define PERSONAL_DEFAULT "WWW/default.html"            /* in home directory */
2.6       timbl     258: #endif
2.33      frystyk   259: 
2.46      frystyk   260: /* If the home page isn't found, use this file: */
                    261: #ifndef LAST_RESORT
                    262: #define LAST_RESORT    "http://www.w3.org/"
2.6       timbl     263: #endif
2.33      frystyk   264: 
                    265: /* If one telnets to an access point it will look in this file for home page */
2.7       timbl     266: #ifndef REMOTE_POINTER
2.33      frystyk   267: #define REMOTE_POINTER  "/etc/www-remote.url"              /* can't be file */
2.7       timbl     268: #endif
2.33      frystyk   269: 
2.7       timbl     270: /* and if that fails it will use this. */
2.6       timbl     271: #ifndef REMOTE_ADDRESS
2.46      frystyk   272: #define REMOTE_ADDRESS  "http://www.w3.org/remote.html"   /* can't be file */
1.1       timbl     273: #endif
                    274: 
2.46      frystyk   275: /* Default log file name */
1.1       timbl     276: #ifndef DEFAULT_LOGFILE
2.46      frystyk   277: #define DEFAULT_LOGFILE                "www-log"
1.1       timbl     278: #endif
                    279: 
2.46      frystyk   280: #ifndef LOCAL_DEFAULT_FILE
                    281: #define LOCAL_DEFAULT_FILE "/usr/local/lib/WWW/default.html"
1.1       timbl     282: #endif
                    283: 
2.33      frystyk   284: /* This is the default cache directory: */
2.46      frystyk   285: #ifndef HT_CACHE_ROOT
                    286: #define HT_CACHE_ROOT          "/tmp"
2.23      frystyk   287: #endif
                    288: 
2.33      frystyk   289: /* The default directory for "save locally" and "save and execute" files: */
2.46      frystyk   290: #ifndef HT_TMP_ROOT
                    291: #define HT_TMP_ROOT            "/tmp"
2.23      frystyk   292: #endif
2.33      frystyk   293: </PRE>
2.10      timbl     294: 
2.51    ! frystyk   295: <A NAME="socket"><H2>Buffering for the network</H2></A>
        !           296: 
        !           297: This structure provides buffering for READ (and future WRITE) to the
        !           298: network. It is used by all the protocol modules. The size of the
        !           299: buffer, <CODE>INPUT_BUFFER_SIZE</CODE>, is a compromis between speed
        !           300: and memory.
        !           301: 
        !           302: <PRE>
        !           303: #define INPUT_BUFFER_SIZE 8192
        !           304: 
        !           305: typedef struct _socket_buffer {
        !           306:        char    input_buffer[INPUT_BUFFER_SIZE];
        !           307:        char *  input_pointer;
        !           308:        char *  input_limit;
        !           309:        SOCKFD  input_file_number;
        !           310: } HTInputSocket;
        !           311: </PRE>
        !           312: 
2.34      frystyk   313: <H2><A NAME="HTNetInfo">Protocol Specific Information</A></H2>
                    314: 
2.45      frystyk   315: This structure contains information about socket number, input buffer
                    316: for reading from the network etc. The structure is used through out
                    317: the protocol modules and is the refenrence point for introducing multi
                    318: threaded execution into the library, see specifications on <A
2.46      frystyk   319: HREF="http://www.w3.org/hypertext/WWW/Library/User/Features/multithread.html">Multiple
2.45      frystyk   320: Threads</A>.
2.34      frystyk   321: 
                    322: <PRE>
2.46      frystyk   323: typedef enum _SocAction {
                    324:     SOC_INVALID = -1,
                    325:     SOC_WRITE = 0,                             /* By default ready to write */
                    326:     SOC_READ,
                    327:     SOC_INTERRUPT
                    328: } SocAction;
                    329: 
2.39      frystyk   330: struct _HTNetInfo {
2.45      frystyk   331:     SOCKFD             sockfd;                         /* Socket descripter */
2.39      frystyk   332:     SockA              sock_addr;              /* SockA is defined in tcp.h */
2.36      frystyk   333:     HTInputSocket *    isoc;                                /* Input buffer */
2.46      frystyk   334:     SocAction          action;                 /* Result of the select call */
                    335:     HTStream *         target;                             /* Target stream */
2.36      frystyk   336:     int                addressCount;        /* Attempts if multi-homed host */
2.39      frystyk   337:     time_t             connecttime;             /* Used on multihomed hosts */
2.36      frystyk   338:     struct _HTRequest *        request;           /* Link back to request structure */
2.39      frystyk   339: };
2.34      frystyk   340: </PRE>
                    341: 
2.36      frystyk   342: <EM><B>Note:</B> The AddressCount varaible is used to count the number
                    343: of attempt to connect to a multi-homed host so we know when to stop
                    344: trying new IP-addresses.</EM>
                    345: 
2.46      frystyk   346: <H2><A NAME="z1">The Request structure</A></H2>
                    347: 
                    348: When a request is handled, all kinds of things about it need to be
                    349: passed along.  These are all put into a HTRequest structure. This is
                    350: the most essential structure in the library. It contains two main
                    351: categories of information regarding a request:
                    352: 
                    353: <UL>
                    354: <LI>Application dependent information
                    355: <LI>Library dependent information
                    356: </UL>
                    357: 
                    358: Applications using the Library should <EM>never</EM> use the internal
                    359: library dependent information. It's only because we dont have real
                    360: classes that we can't hide it. <P>
                    361: 
                    362: <B>Note:</B> If you reuse the request structure for more than one
                    363: request then make sure that the request is re-initialized, so that no
                    364: `old' data is reused, see <A HREF="#z100">functions to manipulate
                    365: HTRequest Structure</A>. The library handles its own internal
                    366: information from request to request but the information set by the
                    367: caller is untouched. <P>
2.31      frystyk   368: 
2.46      frystyk   369: The elements of the request structure are as follows:
2.39      frystyk   370: 
                    371: <PRE>
                    372: struct _HTRequest {
                    373: </PRE>
2.19      timbl     374: 
2.46      frystyk   375: <H3>Application Dependent - Set by the caller of HTAccess</H3>
2.19      timbl     376: 
2.39      frystyk   377: <PRE>
                    378:     <A HREF="#Methods">HTMethod</A>    method;
                    379: </PRE>
                    380: 
                    381: An enum used to specify the HTTP <A NAME="z7"
2.46      frystyk   382: HREF="../../Protocols/HTTP/Methods.html">method</A> used for the
                    383: actual request. The default value is <A
                    384: HREF="#Methods"><CODE>GET</CODE></A>.
                    385: 
                    386: <H4>HTTP Header Information</H4>
2.39      frystyk   387: 
                    388: <PRE>
                    389:     HTList *   conversions;
                    390: </PRE>
                    391: 
2.46      frystyk   392: NULL, or a <EM>local</EM> list of specific conversions which the
                    393: format manager can do in order to fulfill the request.  It typically
                    394: points to a list set up on initialisation time for example by <A
                    395: HREF="HTInit.html">HTInit()</A>. There is also a <A
                    396: HREF="HTFormat.html#z17"><EM>global</EM></A> list of conversions which
                    397: contains a generic set of possible conversions.
2.39      frystyk   398: 
                    399: <PRE>
                    400:     HTList *   encodings;
                    401: </PRE>
                    402: 
                    403: The list of encodings acceptable in the output stream.
                    404: 
                    405: <PRE>
                    406:     HTList *   languages;
                    407: </PRE>
                    408: 
                    409: The list of (human) language values acceptable in the response. The default
                    410: is all languages.
                    411: 
                    412: <PRE>
2.51    ! frystyk   413:     HTList *   charsets;
        !           414: </PRE>
        !           415: 
        !           416: The list of charsets accepted by the application
        !           417: 
        !           418: <PRE>
        !           419:     GenHeaderEnum      GenMask;
        !           420:     ReqHeaderEnum      RequestMask;
2.46      frystyk   421:     EntityHeaderEnum   EntityMask;
                    422: </PRE>
                    423: 
2.51    ! frystyk   424: These bitmask variables defines which headers to include in a HTTP
        !           425: request (or any other MIME-like protocol). See <A
        !           426: HREF="#HeaderMask">header masks</A> for more information on default
        !           427: values.
2.46      frystyk   428: 
                    429: <PRE>
                    430:     HTParentAnchor *parentAnchor;
                    431: </PRE>
                    432: 
                    433: If this parameter is set then a `Referer: &lt;parent address&gt; can
                    434: be generated in the request to the server, see <A
                    435: HREF="http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRQ_Headers.html#z14">
                    436: Referer field in a HTTP Request</A>
                    437: 
                    438: <PRE>
                    439:    <A NAME="#ExtraHeaders">char * ExtraHeaders;</A>
2.39      frystyk   440: </PRE>
                    441: 
2.46      frystyk   442: Extra header information can be send along with a request using this
                    443: variable. The text is sent as is so it must be preformatted with
                    444: &lt;CRLF&gt; line terminators.
                    445: 
                    446: <H4>Streams From Network to Application</H4>
2.39      frystyk   447: 
                    448: <PRE>
2.46      frystyk   449:     HTStream * output_stream; 
2.39      frystyk   450: </PRE>
                    451: 
2.46      frystyk   452: The output stream is to be used to put data down to as they come in
                    453: <B>from</B> the network and back to the application. The default value
                    454: is <CODE>NULL</CODE> which means that the stream goes to the user
                    455: (display).
2.39      frystyk   456: 
                    457: <PRE>
2.46      frystyk   458:     HTAtom *   output_format;
2.39      frystyk   459: </PRE>
                    460: 
2.46      frystyk   461: The desired format of the output stream. This can be used to get
                    462: unconverted data etc. from the library. If <CODE>NULL</CODE>, then <A
                    463: HREF="HTFormat.html#FormatTypes">WWW_PRESENT</A> is default value.
2.45      frystyk   464: 
                    465: <PRE>
2.46      frystyk   466:     HTStream*  error_stream;
2.45      frystyk   467: </PRE>
                    468: 
                    469: All object bodies sent from the server with status codes different
                    470: from <CODE>200 OK</CODE> will be put down this stream. This can be
                    471: used as a debug window etc. If the value is NULL (default) then the
                    472: stream used is <A HREF="HTFormat.html#BlackHole">HTBlackHole</A>.
2.39      frystyk   473: 
2.47      frystyk   474: <PRE>
                    475:     HTAtom *   error_format;
                    476: </PRE>
                    477: 
                    478: The desired format of the error stream. This can be used to get
                    479: unconverted data etc. from the library. The default value if
                    480: <CODE>WWW_HTML</CODE> as a character based only has one WWW_PRESENT.
                    481: 
2.46      frystyk   482: <H4>Streams From Application to Network</H4>
                    483: 
2.39      frystyk   484: <PRE>
2.46      frystyk   485:     HTStream * input_stream; 
                    486: </PRE>
                    487: 
                    488: The input stream is to be used by the <CODE>PostCallBack</CODE>
                    489: function to put data out on the network. The user should not
                    490: initialize this field.
                    491: 
                    492: <PRE>
                    493:     HTAtom *   input_format;
                    494: </PRE>
                    495: 
                    496: The desired format of the output stream. This can be used to upload
                    497: converted data to a remote server. If <CODE>NULL</CODE>, then <A
                    498: HREF="HTFormat.html#FormatTypes">WWW_SOURCE</A> is default value.
                    499: 
                    500: <PRE>
                    501:     int (*PostCallBack)                PARAMS((struct _HTRequest *     request,
                    502:                                        HTStream *              target));
2.39      frystyk   503: </PRE>
                    504: 
2.46      frystyk   505: The call back function which is called when the current request is
                    506: ready for sending (posting) the data object. The request is the
                    507: current request so that the application knows which post we are
                    508: handling. The function must have the same return values as the other
                    509: <A HREF="#LoadDoc">Load functions</A>.
                    510: 
                    511: <H4>Other Flags</H4>
2.39      frystyk   512: 
                    513: <PRE>
2.43      frystyk   514:     BOOL BlockingIO;
2.46      frystyk   515:     BOOL ForceReload;
2.51    ! frystyk   516:     BOOL ContentNegotiation;
2.43      frystyk   517: </PRE>
                    518: 
2.51    ! frystyk   519: <CODE>BlockingIO</CODE> can be set to override if a protocol module is
        !           520: registered as using non-blocking IO, <CODE>ForceReload</CODE> will
        !           521: cancel any cached element, and <CODE>ContentNegotioation</CODE> will
        !           522: force content negotiation when looking for a local file. This is the
        !           523: default!
2.43      frystyk   524: 
                    525: <PRE>
2.46      frystyk   526:     BOOL (*<A NAME="z9"> callback</A> ) PARAMS((struct _HTRequest* request,
                    527:                                                void *param));
                    528: </PRE>
                    529: 
                    530: A function to be called back in the event that a file has been saved
                    531: to disk by HTSaveAndCallBack for example.
                    532: 
                    533: <PRE>
                    534:     void *     context;
2.39      frystyk   535: </PRE>
                    536: 
2.46      frystyk   537: An arbitrary pointer passed to HTAccess and passed back as a parameter
                    538: to the <A NAME="z10" HREF="#z9">callback</A>.
2.39      frystyk   539: 
2.46      frystyk   540: <H3>Library Dependent - Set by Library</H3>
2.39      frystyk   541: 
2.46      frystyk   542: None of the bits below may be looked at by a WWW application. The
                    543: Library handles the cleanup by itself.
2.39      frystyk   544: 
                    545: <PRE>
                    546:     HTParentAnchor*    anchor;
                    547: </PRE>
                    548: 
                    549: The anchor for the object in question.  Set immediately by HTAcesss.
                    550: Used by the protocol and parsing modules.  Valid thoughout the access.
                    551: 
                    552: <PRE>
                    553:     HTChildAnchor *    childAnchor;    /* For element within the object  */
                    554: </PRE>
                    555: 
                    556: The anchor for the sub object if any.  The object builder should
2.46      frystyk   557: ensure that is is selected, highlighted, etc when the object is
2.39      frystyk   558: loaded.
                    559: 
                    560: <PRE>
2.46      frystyk   561:     struct _HTRequest *        CopyRequest;
                    562: </PRE>
                    563: 
                    564: We need to know if we have a remote request sending data along with
                    565: this request.
                    566: 
                    567: <PRE>
                    568:     void *     using_cache;
                    569:     BOOL       using_proxy;
2.39      frystyk   570: </PRE>
                    571: 
2.46      frystyk   572: Pointer to cache element if cache hit anfd if using proxy
2.19      timbl     573: 
2.25      luotonen  574: <PRE>
2.46      frystyk   575:     BOOL       error_block;            /* YES if stream has been used    */
                    576:     HTList *   error_stack;            /* List of errors                 */
                    577: </PRE>
2.25      luotonen  578: 
2.46      frystyk   579: These two fields are used by the error reporting system to keep a
                    580: stack of messages.
                    581: 
                    582: <PRE>
                    583:     HTNetInfo *        net_info;               /* Information about socket etc. */
                    584:     int                redirections;           /* Number of redirections */
2.47      frystyk   585:     time_t     retry_after;            /* Absolut time for a retry */
2.34      frystyk   586: </PRE>
2.46      frystyk   587: 
                    588: Protocol specific information, socket number etc.
                    589: 
2.34      frystyk   590: <PRE>
2.46      frystyk   591:     char *     redirect;               /* Location or URI */
                    592:     char *     WWWAAScheme;            /* WWW-Authenticate scheme */
                    593:     char *     WWWAARealm;             /* WWW-Authenticate realm */
                    594:     char *     WWWprotection;          /* WWW-Protection-Template */
2.25      luotonen  595: </PRE>
2.39      frystyk   596: 
2.46      frystyk   597: Information taken from the MIME header specifically oriented towards
                    598: the request (not the object itself)
                    599: 
2.39      frystyk   600: <PRE>
2.46      frystyk   601:     char *     authorization;          /* Authorization: field           */
                    602:     HTAAScheme scheme;                 /* Authentication scheme used     */
                    603:     HTInputSocket *    isoc;           /* InputSocket object for reading */
2.48      frystyk   604: #if 0
2.46      frystyk   605:     HTAtom *   content_type;           /* Content-Type:                  */
                    606:     HTAtom *   content_language;       /* Language                       */
                    607:     HTAtom *   content_encoding;       /* Encoding                       */
                    608:     int                content_length;         /* Content-Length:                */
2.48      frystyk   609: #endif
2.39      frystyk   610: </PRE>
                    611: 
2.46      frystyk   612: These header fields are only used by the server and will be removed at some
                    613: point.
                    614: 
2.21      luotonen  615: <PRE>
2.46      frystyk   616:     HTList *   valid_schemes;          /* Valid auth.schemes             */
                    617:     HTAssocList **     scheme_specifics;/* Scheme-specific parameters    */
                    618:     char *     authenticate;           /* WWW-authenticate: field */
                    619:     char *     prot_template;          /* WWW-Protection-Template: field */
                    620:     HTAASetup *        setup;                  /* Doc protection info            */
                    621:     HTAARealm *        realm;                  /* Password realm                 */
                    622:     char *     dialog_msg;             /* Authentication prompt (client) */
2.19      timbl     623: </PRE>
2.46      frystyk   624: 
                    625: These fields are used by the HTTP access authentication used by a
                    626: client application.
                    627: 
2.49      frystyk   628: <H3>Windows Specific Information</H3>
                    629: 
                    630: <PRE>
                    631: #ifdef _WINDOWS 
                    632:        HWND            hwnd;           /* Windows handle for MSWindows   */
                    633:        unsigned long   winMsg;         /* msg number of Windows eloop    */
                    634: #endif /* _WINDOWS */
                    635: </PRE>
                    636: 
2.19      timbl     637: <PRE>
2.10      timbl     638: };
2.31      frystyk   639: </PRE>
                    640: 
                    641: <H2><A NAME="z100">Functions to Manipulate a HTRequest Structure</A></H2>
                    642: 
                    643: Just to make things easier especially for clients, here are some functions to
                    644: manipulate the request structure:
                    645: 
                    646: <H3>Create blank request</H3>This request has defaults in -- in
2.9       timbl     647: most cases it will need some information
                    648: added before being passed to HTAccess,
                    649: but it will work as is for a simple
                    650: request.
2.14      luotonen  651: <PRE>
2.40      frystyk   652: extern HTRequest * HTRequest_new NOPARAMS;
2.31      frystyk   653: </PRE>
2.14      luotonen  654: 
2.31      frystyk   655: <H3>Delete request structure</H3>Frees also conversion list hanging
2.19      timbl     656: from req->conversions.
2.14      luotonen  657: <PRE>
2.40      frystyk   658: extern void HTRequest_delete PARAMS((HTRequest * req));
2.31      frystyk   659: </PRE>
1.1       timbl     660: 
2.31      frystyk   661: <H3>Clear a request structure</H3>
2.46      frystyk   662: 
                    663: Clears a request structure so that it can be reused. The only thing
                    664: that differs from using free/new is that the list of conversions is
                    665: kept. <P>
                    666: 
                    667: <B>NOTE:</B> It is <B>NOT</B> recommended to reuse a request structure!!!
                    668: 
2.31      frystyk   669: <PRE>
                    670: extern void HTRequest_clear PARAMS((HTRequest * req));
                    671: </PRE>
2.9       timbl     672: 
2.39      frystyk   673: <A NAME="LoadDoc"><H2>Functions for Loading a Document</H2></A>
                    674: 
                    675: There are several different ways of loading a document. However, the
                    676: major difference between them is whether the document is referenced by
                    677: 
                    678: <UL>
                    679: <LI><A HREF="#Relative">Relative URI</A>
                    680: <LI><A HREF="#Absolute">Absolute URI</A>
                    681: <LI><A HREF="#Anchor">Anchor element</A> or
                    682: <LI>Contains keywords for <A HREF="#RelSearch">searching an relative URI</A>
                    683: <LI>Contains keywords for <A HREF="#AbsSearch">searching an absolute URI</A>
                    684: </UL>
                    685: 
                    686: <B>NOTE:</B> From release 3.0 of the Library, the return codes from
                    687: the loading functions are no mode <CODE>BOOL</CODE>, that is
                    688: <CODE>YES</CODE> or <CODE>NO</CODE>. Insted they have been replaced
                    689: with the following set of return codes defined in the <A
                    690: HREF="HTUtils.html#ReturnCodes">Utility module</A>:
                    691: 
2.5       timbl     692: <DL>
2.39      frystyk   693: <DT>HT_WOULD_BLOCK
                    694: <DD>An I/O operation would block
                    695: 
                    696: <DT>HT_ERROR
                    697: <DD>Error has occured
                    698: 
                    699: <DT>HT_LOADED
                    700: <DD>Success
                    701: 
                    702: <DT>HT_NO_DATA
                    703: <DD>Success, but no document loaded. This might be the situation when a 
                    704: telnet sesssion is started etc.
2.47      frystyk   705: 
                    706: <DT>HT_RETRY
                    707: <DD>The remote server is down but will serve documents from the
                    708: calendar time indicated in HTRequest-&gt;retry_after.
                    709: 
2.5       timbl     710: </DL>
                    711: 
2.39      frystyk   712: However, a general rule about the return codes is that <B>ERRORS</B>
                    713: have a <EM>negative</EM> value whereas <B>SUCCESS</B> has a
                    714: <EM>positive</EM> value. <P>
1.1       timbl     715: 
2.39      frystyk   716: There are also some functions to help the client getting started with
                    717: <A HREF="#ClientHelp">the first URI</A>.
1.1       timbl     718: 
2.46      frystyk   719: <A NAME="Relative"><H3>Load a document from relative URL</H3></A>
1.1       timbl     720: 
2.39      frystyk   721: <PRE>
                    722: extern int HTLoadRelative      PARAMS((CONST char *    relative_name,
                    723:                                        HTParentAnchor* here,
                    724:                                        HTRequest *     request));
2.5       timbl     725: </PRE>
2.39      frystyk   726: 
2.46      frystyk   727: <A NAME="Absolute"></A><H3>Load a document from absolute URL</H3>
1.1       timbl     728: 
2.5       timbl     729: <PRE>
2.39      frystyk   730: extern int HTLoadAbsolute      PARAMS((CONST char *    addr,
                    731:                                        HTRequest *     request));
2.5       timbl     732: </PRE>
2.39      frystyk   733: 
                    734: <H3>Load a document from absolute name to a stream</H3>
                    735: 
2.5       timbl     736: <PRE>
2.39      frystyk   737: extern int HTLoadToStream      PARAMS((CONST char *    addr,
                    738:                                        BOOL            filter,
                    739:                                        HTRequest *     request));
2.5       timbl     740: </PRE>
1.1       timbl     741: 
2.46      frystyk   742: <A NAME="Anchor"><H3>Load a document from anchor</H3></A>
1.1       timbl     743: 
2.39      frystyk   744: The anchor parameter may be a child anchor. The anchor in the request
                    745: is set to the parent anchor. The recursive function keeps the error
                    746: stack in the request structure so that no information is lost having
                    747: more than one call. See also <A HREF="#BindAnchor">HTBindAnchor()</A>.
1.1       timbl     748: 
2.39      frystyk   749: <PRE>
2.41      frystyk   750: extern int HTLoadAnchor                PARAMS((HTAnchor  *     a,
                    751:                                        HTRequest *     request));
2.39      frystyk   752: extern int HTLoadAnchorRecursive PARAMS((HTAnchor *    a,
                    753:                                        HTRequest *     request));
2.5       timbl     754: </PRE>
                    755: 
2.39      frystyk   756: <H3>Load a Document</H3>
                    757: 
2.46      frystyk   758: These are two internal routines for loading a document which has an
2.41      frystyk   759: address AND a matching anchor.  (The public routines are called with
                    760: one OR the other.)  This is recursively called from file load module
                    761: to try ftp (though this will be obsolete in the next major
                    762: release).<P>
1.1       timbl     763: 
2.39      frystyk   764: If <CODE>keep_error_stack</CODE> is YES then the error (or info) stack
                    765: is not cleared from the previous call.
1.1       timbl     766: 
2.39      frystyk   767: <PRE>
2.41      frystyk   768: extern int HTLoad              PARAMS((HTRequest * request,
                    769:                                        BOOL keep_error_stack));
                    770: </PRE>
                    771: 
                    772: <PRE>
                    773: extern BOOL HTLoadTerminate    PARAMS((HTRequest * request, int status));
2.5       timbl     774: </PRE>
                    775: 
2.46      frystyk   776: <A NAME="RelSearch"><H3>Search Using Relative URL</H3></A>
2.39      frystyk   777: 
                    778: Performs a search on word given by the user. Adds the search words to
                    779: the end of the current address and attempts to open the new address.
                    780: 
2.5       timbl     781: <PRE>
2.39      frystyk   782: extern int HTSearch            PARAMS((CONST char *    keywords,
                    783:                                        HTParentAnchor* here,
                    784:                                        HTRequest *     request));
2.5       timbl     785: </PRE>
2.39      frystyk   786: 
2.46      frystyk   787: <A NAME="AbsSearch"><H3>Search using Absolute URL</H3></A>
2.39      frystyk   788: 
                    789: Performs a keyword search on word given by the user. Adds the keyword
                    790: to the end of the current address and attempts to open the new
                    791: address.
2.5       timbl     792: 
2.33      frystyk   793: <PRE>
2.40      frystyk   794: extern int HTSearchAbsolute    PARAMS((CONST char *    keywords,
2.39      frystyk   795:                                        CONST char *    indexname,
                    796:                                        HTRequest *     request));
2.5       timbl     797: </PRE>
2.24      luotonen  798: 
2.39      frystyk   799: 
                    800: <A NAME="ClientHelp"><H2>Help Function for Clients to get started</H2></A>
                    801: 
                    802: These function helps the client to load the first document. They are
                    803: not mandatory to use - but they make life easier!
                    804: 
                    805: <A NAME="BindAnchor"><H3>Bind an anchor to a request structure without
                    806: loading</H3></A>
2.24      luotonen  807: 
                    808: <PRE>
2.39      frystyk   809: extern BOOL HTBindAnchor PARAMS((HTAnchor *anchor, HTRequest *request));
2.24      luotonen  810: </PRE>
                    811: 
2.39      frystyk   812: <A NAME="HomePage"><H3>Generate the Anchor for the Home Page</H3></A>
2.24      luotonen  813: 
2.39      frystyk   814: As it involves file access, this should only be done once when the
                    815: program first runs. This is a default algorithm using the
                    816: <CODE>WWW_HOME</CODE> environment variable.
2.20      frystyk   817: 
                    818: <PRE>
2.40      frystyk   819: extern HTParentAnchor * HTHomeAnchor NOPARAMS;
2.39      frystyk   820: </PRE>
                    821: 
                    822: <H3>Find Related Name</H3>
                    823: 
                    824: Creates a local file URI that can be used as a relative name when
                    825: calling HTParse() to expand a relative file name to an absolute
                    826: one. <P>
                    827: 
2.46      frystyk   828: The code for this routine originates from the Line Mode Browser and
                    829: was moved here by <EM>howcome@dxcern.cern.ch</EM> in order for all
                    830: clients to take advantage.<P>
2.39      frystyk   831: 
2.20      frystyk   832: <PRE>
2.39      frystyk   833: extern char *  HTFindRelatedName NOPARAMS;
                    834: </PRE>
                    835: 
                    836: <A NAME="PostDoc"><H2>Functions for Posting a Document</H2></A>
                    837: 
2.46      frystyk   838: <B>NOTE:</B> The Posting functions are used to send a data object
                    839: along with the request. The functions have the same set of return
                    840: codes as for the <A HREF="#LoadDoc">Load Functions</A>.
2.20      frystyk   841: 
2.39      frystyk   842: <H3>Get a Save Stream</H3>
2.20      frystyk   843: 
2.39      frystyk   844: <H4>On Entry,</H4>
2.5       timbl     845: <DL>
2.9       timbl     846: <DT>request->anchor
                    847: <DD> is valid anchor which
                    848: has previously beeing loaded
2.5       timbl     849: </DL>
                    850: 
2.39      frystyk   851: <H4>On exit,</H4>
2.5       timbl     852: <DL>
                    853: <DT>returns
2.6       timbl     854: <DD> 0 if error else a stream
                    855: to save the object to.
2.5       timbl     856: </DL>
                    857: 
                    858: <PRE>
2.46      frystyk   859: extern HTStream * HTSaveStream PARAMS((HTRequest * request));
                    860: </PRE>
                    861: 
                    862: <H3>Copy an Anchor</H3>
                    863: 
                    864: Fetch the URL (possibly local file URL) and send it using either
                    865: <B>PUT</B> or <B>POST</B> directly to the remote destination using
                    866: HTTP, that is remote copy of object <EM>O</EM> from <EM>A</EM> to
                    867: <EM>B</EM> where <EM>A</EM> might be the host of the application. The
                    868: caller can decide the exact method used and which HTTP header fields
                    869: to transmit by setting the user fields in the destination request
                    870: structure.
                    871: 
                    872: <PRE>
                    873: extern int HTCopyAnchor                PARAMS((HTAnchor *      src_anchor,
                    874:                                        HTRequest *     src_req,
                    875:                                        HTParentAnchor *dest_anchor,
                    876:                                        HTRequest *     dest_req));
                    877: </PRE>
                    878: 
                    879: 
                    880: <H3>Upload an Anchor</H3>
                    881: 
                    882: Send the contents (in hyperdoc) of the source anchor using either
                    883: <B>PUT</B> or <B>POST</B> to the remote destination using HTTP. The
                    884: caller can decide the exact method used and which HTTP header fields
                    885: to transmit by setting the user fields in the request structure.
                    886: <EM>Format conversion</EM> can be made on the fly by setting the <A
                    887: HREF="#input_format">input_format field</A> in the destination request
                    888: structure. If the content-length is unknown (-1) then a <A
                    889: HREF="HTConLen.html">content-length counter</A> is automaticly put
                    890: into the stream pipe.
                    891: 
                    892: 
                    893: <PRE>
                    894: extern int HTUploadAnchor      PARAMS((HTAnchor *      src_anchor,
                    895:                                        HTParentAnchor *dest_anchor,
                    896:                                        HTRequest *     dest_req));
2.39      frystyk   897: </PRE>
2.5       timbl     898: 
1.1       timbl     899: 
2.39      frystyk   900: <A NAME="ProtReg"><H2>Access Method Registration</H2></A>
1.1       timbl     901: 
2.39      frystyk   902: An access method is defined by an HTProtocol structure which point to
                    903: the routines for performing the various logical operations on an
                    904: object: in HTTP terms, GET, PUT, and POST. The access methods
                    905: supported in the Library are initiated automaticly using the private
                    906: function <CODE>HTAccessInit()</CODE> <B>if not</B> defined
2.46      frystyk   907: <CODE>HT_NO_INIT</CODE> <P>
2.39      frystyk   908: 
                    909: Each of these routine takes as a parameter a <A NAME="z2"
                    910: HREF="#z1">request structure</A> containing details of the request.
                    911: When the protocol class routine is called, the anchor element in the
                    912: request is already valid (made valid by HTAccess).
                    913: 
                    914: <PRE>
                    915: typedef enum _HTSocBlock {
                    916:     SOC_BLOCK,
2.42      frystyk   917:     SOC_NON_BLOCK
2.39      frystyk   918: } HTSocBlock;
                    919: 
                    920: typedef struct _HTProtocol {
                    921:     char *     name;
                    922:     HTSocBlock block;  
                    923:     int                (*load)         PARAMS((HTRequest *     request));
                    924:     HTStream*  (*saveStream)   PARAMS((HTRequest *     request));
                    925:     HTStream*  (*postStream)   PARAMS((HTRequest *     request,
                    926:                                        HTParentAnchor* postTo));
                    927: } HTProtocol;
1.1       timbl     928: 
2.40      frystyk   929: extern BOOL HTRegisterProtocol PARAMS((HTProtocol * protocol));
2.42      frystyk   930: extern void HTDisposeProtocols NOPARAMS;
2.5       timbl     931: </PRE>
1.1       timbl     932: 
2.39      frystyk   933: <H3>Uses Protocol Blocking IO</H3>
1.1       timbl     934: 
2.39      frystyk   935: A small function to make life easier. Returns <CODE>YES</CODE> or
2.42      frystyk   936: <CODE>NO</CODE>. If the Library is run in NON-INTERACTIVE MODE then
                    937: the function always returns YES;
2.38      howcome   938: 
                    939: <PRE>
2.40      frystyk   940: extern BOOL HTProtocolBlocking PARAMS((HTRequest *     request));
2.38      howcome   941: </PRE>
                    942: 
2.39      frystyk   943: end
2.25      luotonen  944: 
                    945: <PRE>
1.1       timbl     946: #endif /* HTACCESS_H */
2.25      luotonen  947: </PRE>
                    948: end of HTAccess
                    949: </BODY>
2.9       timbl     950: </HTML>

Webmaster