Annotation of libwww/Library/src/HTTP.c, revision 1.2

1.1       timbl       1: /*     HyperText Tranfer Protocol      - Client implementation         HTTP.c
                      2: **     ==========================
1.2     ! timbl       3: **
        !             4: ** Bugs:
        !             5: **     Not implemented:
        !             6: **             Forward
        !             7: **             Redirection
        !             8: **             Error handling
1.1       timbl       9: */
                     10: 
                     11: /*     Module parameters:
                     12: **     -----------------
                     13: **
                     14: **  These may be undefined and redefined by syspec.h
                     15: */
1.2     ! timbl      16: 
        !            17: /* Implements:
        !            18: */
        !            19: #include "HTTP.h"
        !            20: 
        !            21: #define HTTP_VERSION   "HTTP/1.0"
        !            22: #define HTTP2                          /* Version is greater than 0.9 */
        !            23: 
        !            24: #define INIT_LINE_SIZE         1024    /* Start with line buffer this big */
        !            25: #define LINE_EXTEND_THRESH     256     /* Minimum read size */
        !            26: #define VERSION_LENGTH                 20      /* for returned protocol version */
        !            27: 
        !            28: /* Uses:
        !            29: */
1.1       timbl      30: #include "HTParse.h"
                     31: #include "HTUtils.h"
                     32: #include "tcp.h"
                     33: #include "HTTCP.h"
                     34: #include "HTFormat.h"
1.2     ! timbl      35: #include <ctype.h>
        !            36: #include "HTAlert.h"
        !            37: #include "HTMIME.h"
1.1       timbl      38: 
                     39: 
1.2     ! timbl      40: struct _HTStream {
        !            41:        HTStreamClass * isa;            /* all we need to know */
        !            42: };
        !            43: 
        !            44: 
1.1       timbl      45: /*             Load Document from HTTP Server                  HTLoadHTTP()
                     46: **             ==============================
                     47: **
                     48: **     Given a hypertext address, this routine loads a document.
                     49: **
                     50: **
                     51: ** On entry,
                     52: **     arg     is the hypertext reference of the article to be loaded.
                     53: **     gate    is nill if no gateway, else the gateway address.
                     54: **
                     55: ** On exit,
                     56: **     returns >=0     If no error, a good socket number
                     57: **             <0      Error.
                     58: **
                     59: **     The socket must be closed by the caller after the document has been
                     60: **     read.
                     61: **
                     62: */
1.2     ! timbl      63: PUBLIC int HTLoadHTTP ARGS4 (
        !            64:        CONST char *,           arg,
        !            65: /*     CONST char *,           gate, */
        !            66:        HTParentAnchor *,       anAnchor,
        !            67:        HTFormat,               format_out,
        !            68:        HTStream*,              sink)
1.1       timbl      69: {
                     70:     int s;                             /* Socket number for returned data */
                     71:     char *command;                     /* The whole command */
                     72:     int status;                                /* tcp return */
1.2     ! timbl      73:     CONST char* gate = 0;              /* disable this feature */
        !            74:     HTFormat format = WWW_HTML;                /* default is HTTP2 */   
1.1       timbl      75:     SockA soc_address;                 /* Binary network address */
                     76:     SockA * sin = &soc_address;
1.2     ! timbl      77:     BOOL had_header = NO;              /* Have we had at least one header? */
        !            78:     char * line_buffer = NULL;
        !            79:     BOOL extensions = YES;             /* Assume good HTTP server */
1.1       timbl      80:     if (!arg) return -3;               /* Bad if no name sepcified     */
                     81:     if (!*arg) return -2;              /* Bad if name had zero length  */
                     82: 
                     83: /*  Set up defaults:
                     84: */
                     85: #ifdef DECNET
1.2     ! timbl      86:     sin->sdn_family = AF_DECnet;           /* Family = DECnet, host order */
        !            87:     sin->sdn_objnum = DNP_OBJ;          /* Default: http object number */
1.1       timbl      88: #else  /* Internet */
1.2     ! timbl      89:     sin->sin_family = AF_INET;     /* Family = internet, host order */
        !            90:     sin->sin_port = htons(TCP_PORT);    /* Default: http port    */
1.1       timbl      91: #endif
                     92: 
                     93:     if (TRACE) {
                     94:         if (gate) fprintf(stderr,
                     95:                "HTTPAccess: Using gateway %s for %s\n", gate, arg);
                     96:         else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
                     97:     }
                     98:     
                     99: /* Get node name and optional port number:
                    100: */
                    101:     {
                    102:        char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
                    103:        int status = HTParseInet(sin, p1);  /* TBL 920622 */
                    104:         free(p1);
                    105:        if (status) return status;   /* No such host for example */
                    106:     }
                    107:     
1.2     ! timbl     108: retry:
1.1       timbl     109:    
                    110: /*     Now, let's get a socket set up from the server for the sgml data:
                    111: */      
                    112: #ifdef DECNET
                    113:     s = socket(AF_DECnet, SOCK_STREAM, 0);
                    114: #else
                    115:     s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
                    116: #endif
                    117:     status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
                    118:     if (status < 0) {
                    119: #ifndef DECNET
                    120:        /* This code is temporary backward-compatibility. It should
                    121:           go away when no server runs on port 2784 alone */
                    122:        if (sin->sin_port == htons(TCP_PORT)) {  /* Try the old one */
                    123:          if (TRACE) printf (
                    124:            "HTTP: Port %d doesn't answer (errno = %d). Trying good old port %d...\n",
                    125:            TCP_PORT, errno, OLD_TCP_PORT);
                    126:          sin->sin_port = htons(OLD_TCP_PORT);
                    127:          /* First close current socket and open a clean one */
                    128:          status = NETCLOSE (s);
                    129:          s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
                    130:          status = connect(s, (struct sockaddr*)&soc_address,
                    131:                           sizeof(soc_address));
                    132:        }
                    133:        if (status < 0)
                    134: #endif
                    135:          {
                    136:            if (TRACE) fprintf(stderr, 
                    137:              "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
                    138:            /* free(command);   BUG OUT TBL 921121 */
                    139:            return HTInetStatus("connect");
                    140:          }
                    141:       }
                    142:     
                    143:     if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
                    144: 
                    145: /*     Ask that node for the document,
                    146: **     omitting the host name & anchor if not gatewayed.
                    147: */        
                    148:     if (gate) {
1.2     ! timbl     149:         command = malloc(4 + strlen(arg)+ 2 + 31);
1.1       timbl     150:         if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
                    151:         strcpy(command, "GET ");
                    152:        strcat(command, arg);
                    153:     } else { /* not gatewayed */
                    154:        char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
1.2     ! timbl     155:         command = malloc(4 + strlen(p1)+ 2 + 31);
1.1       timbl     156:         if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
                    157:         strcpy(command, "GET ");
                    158:        strcat(command, p1);
                    159:        free(p1);
                    160:     }
1.2     ! timbl     161: #ifdef HTTP2
        !           162:     if (extensions) {
        !           163:         strcat(command, " ");
        !           164:         strcat(command, HTTP_VERSION);
        !           165:     }
        !           166: #endif
        !           167:     strcat(command, "\r\n");   /* Include CR for telnet compat. */
1.1       timbl     168:            
                    169: 
1.2     ! timbl     170: #ifdef HTTP2
        !           171:     if (extensions) {
        !           172: 
        !           173:        int n;
        !           174:        int i;
        !           175:         HTAtom * present = WWW_PRESENT;
        !           176:        char line[256];    /*@@@@ */
        !           177: 
        !           178:        if (!HTPresentations) HTFormatInit();
        !           179:        n = HTList_count(HTPresentations);
        !           180: 
        !           181:        for(i=0; i<n; i++) {
        !           182:            HTPresentation * pres = HTList_objectAt(HTPresentations, i);
        !           183:            if (pres->rep_out == present) {
        !           184:              if (pres->quality != 1.0) {
        !           185:                  sprintf(line, "Accept: %s q=%.3f\r\n",
        !           186:                         HTAtom_name(pres->rep), pres->quality);
        !           187:              } else {
        !           188:                  sprintf(line, "Accept: %s\r\n",
        !           189:                         HTAtom_name(pres->rep));
        !           190:              }
        !           191:              StrAllocCat(command, line);
        !           192: 
        !           193:            }
        !           194:        }
        !           195:     }
1.1       timbl     196:     
1.2     ! timbl     197:     StrAllocCat(command, "\r\n");      /* BLANK LINE means "end" */
        !           198:    
        !           199: #endif
        !           200: 
1.1       timbl     201: #ifdef NOT_ASCII
                    202:     {
                    203:        char * p;
                    204:        for(p = command; *p; p++) {
                    205:            *p = TOASCII(*p);
                    206:        }
                    207:     }
                    208: #endif
                    209: 
1.2     ! timbl     210:     if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
1.1       timbl     211:     status = NETWRITE(s, command, (int)strlen(command));
                    212:     free(command);
                    213:     if (status<0) {
                    214:        if (TRACE) fprintf(stderr, "HTTPAccess: Unable to send command.\n");
                    215:            return HTInetStatus("send");
                    216:     }
                    217: 
1.2     ! timbl     218: 
        !           219: /*     Now load the data:      HTTP2 response parse
        !           220: */
        !           221: #ifdef HTTP2
        !           222:     {
        !           223:     
        !           224:     /* Get numeric status etc */
        !           225: 
        !           226:        int status;
        !           227:        int length = 0;
        !           228:        char * eol = 0;
        !           229:        BOOL end_of_file = NO;
        !           230:        HTFormat format = WWW_PLAINTEXT;        /* default */
        !           231:        HTAtom * encoding = HTAtom_for("7bit");
        !           232:        int buffer_length = INIT_LINE_SIZE;     /* Why not? */
        !           233:        
        !           234:        line_buffer = (char *) malloc(buffer_length * sizeof(char));
        !           235:        if (!line_buffer) outofmem(__FILE__, "HTLoadHTTP");
        !           236:        
        !           237:        for(;;) {
        !           238: 
        !           239:            int fields;
        !           240:            char server_version [VERSION_LENGTH+1];
        !           241:            int server_status;
        !           242:            
        !           243:           /* Extend line buffer if necessary for those crazy WAIS URLs ;-) */
        !           244:           
        !           245:            if (buffer_length - length < LINE_EXTEND_THRESH) {
        !           246:                buffer_length = buffer_length + buffer_length;
        !           247:                line_buffer = (char *) realloc(
        !           248:                        line_buffer, buffer_length * sizeof(char));
        !           249:                if (!line_buffer) outofmem(__FILE__, "HTLoadHTTP");
        !           250:            }
        !           251:            status = NETREAD(s, line_buffer + length,
        !           252:                                buffer_length - length -1);
        !           253:            if (status < 0) {
        !           254:                HTAlert("Unexpected network read error on response");
        !           255:                return status;
        !           256:            }
        !           257:            if (status == 0) {
        !           258:                end_of_file = YES;
        !           259:                break;
        !           260:            }
        !           261:            line_buffer[length+status] = 0;
        !           262: #ifdef NOT_ASCII
        !           263:            {
        !           264:                char * p;
        !           265:                for(p = line_buffer+length; *p; p++) {
        !           266:                    *p = FROMASCII(*p);
        !           267:                }
        !           268:            }
        !           269: #endif
        !           270:            eol = strchr(line_buffer + length, '\n');
        !           271:             if (eol && *(eol-1) == '\r') *(eol-1) = ' '; 
        !           272: 
        !           273:            length = length + status;
        !           274:                    
        !           275:            if (!eol && !end_of_file) continue;         /* No LF */         
        !           276:            
        !           277:            *eol = 0;           /* Terminate the line */
        !           278: 
        !           279: 
        !           280: /*      We now have a terminated unfolded line.
        !           281: */
        !           282: 
        !           283:            if (TRACE)fprintf(stderr, "HTTP: Rx: %s\n", line_buffer);
        !           284: 
        !           285: /* Kludge to work with old buggy servers.  They can't handle the third word
        !           286: ** so we try again without it.
        !           287: */
        !           288:            if (extensions &&
        !           289:                 0==strcmp(line_buffer,         /* Old buggy server? */
        !           290:                   "Document address invalid or access not authorised")) {
        !           291:                extensions = NO;
        !           292:                if (line_buffer) free(line_buffer);
        !           293:                if (TRACE) fprintf(stderr,
        !           294:                    "HTTP: close socket %d to retry with HTTP0\n", s);
        !           295:                NETCLOSE(s);
        !           296:                goto retry;             /* @@@@@@@@@@ */
        !           297:            }
        !           298: 
        !           299:            fields = sscanf(line_buffer, "%20s%d",
        !           300:                server_version,
        !           301:                &server_status);
        !           302: 
        !           303:            if (fields < 2) break;
        !           304:            
        !           305:            switch (server_status / 100) {
        !           306:            
        !           307:            case 3:             /* Various forms of redirection */
        !           308:            case 4:             /* "I think I goofed" */
        !           309:            case 5:             /* I think you goofed */
        !           310:            default:            /* bad number */
        !           311:                
        !           312:                HTAlert("Bad status reply from server");
        !           313:                /* Fall through @@@@@@@@@@@@@@@@@@@@@ */
        !           314:                
        !           315:            case 2:             /* Good: Got MIME object */
        !           316:                {
        !           317:                    HTStream * mime = HTStreamStack(HTAtom_for("www/mime"),
        !           318:                        format_out, sink, anAnchor);
        !           319:                        
        !           320:                    if (!mime) {
        !           321:                        if (line_buffer) free(line_buffer);
        !           322:                        return HTLoadError(sink, 403,
        !           323:                                "MIME: Can't convert this format");
        !           324:                    }
        !           325:                    mime->isa->put_string(mime, eol+1); /* Rest of buffer */
        !           326:                    HTCopyNoCR(s, mime);                /* Rest of doc */
        !           327:                    mime->isa->end_document(mime);
        !           328:                    mime->isa->free(mime);
        !           329:                    goto done;
        !           330:                }
        !           331:                break;
        !           332: 
        !           333:            }
        !           334:            
        !           335:            break;              /* Get out of for loop */
        !           336:            
        !           337:        } /* Loop over lines */
        !           338:     }          /* Scope of HTTP2 handling block */
        !           339: 
        !           340: /* Now, we can assume that we did NOT have a MIME header so behave as for HTTP0
        !           341: */
        !           342:     {
        !           343:        HTParseSocket(format, format_out,
        !           344:                 (HTParentAnchor *) anAnchor, s, sink);
        !           345:     }
        !           346: #else
        !           347:     HTParseSocket(format, format_out,
        !           348:                 (HTParentAnchor *) anAnchor, s, sink);
        !           349: #endif
        !           350: 
        !           351: /*     Clean up
1.1       timbl     352: */
1.2     ! timbl     353: done:
        !           354:     if (line_buffer) free(line_buffer);
1.1       timbl     355:     
                    356:     if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
                    357:     status = NETCLOSE(s);
                    358: 
                    359:     return HT_LOADED;                  /* Good return */
                    360: }
                    361: 
                    362: /*     Protocol descriptor
                    363: */
                    364: 
1.2     ! timbl     365: PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0 };

Webmaster