Annotation of libwww/Library/src/HTMIME.c, revision 2.18
2.15 frystyk 1: /* HTMIME.c
2: ** MIME MESSAGE PARSE
3: **
4: ** (c) COPYRIGHT CERN 1994.
5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This is RFC 1341-specific code.
8: ** The input stream pushed into this parser is assumed to be
9: ** stripped on CRs, ie lines end with LF, not CR LF.
10: ** (It is easy to change this except for the body part where
11: ** conversion can be slow.)
12: **
13: ** History:
14: ** Feb 92 Written Tim Berners-Lee, CERN
2.13 duns 15: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.18 ! frystyk 16: ** 14 Mar 95 HFN Now using anchor for storing data. No more `\n',
! 17: ** static buffers etc.
2.1 timbl 18: */
2.17 frystyk 19:
20: /* Library include files */
21: #include "tcp.h"
22: #include "HTUtils.h"
23: #include "HTString.h"
2.9 luotonen 24: #include "HTFormat.h"
2.18 ! frystyk 25: #include "HTChunk.h"
2.17 frystyk 26: #include "HTFWrite.h"
2.14 frystyk 27: #include "HTMIME.h" /* Implemented here */
2.1 timbl 28:
29: /* MIME Object
30: ** -----------
31: */
32: typedef enum _MIME_state {
2.14 frystyk 33: BEGINNING_OF_LINE,
2.18 ! frystyk 34: CHECK, /* check against check_pointer */
! 35: UNKNOWN, /* Unknown header */
! 36: JUNK_LINE, /* Ignore rest of header */
! 37:
! 38: CONTENT, /* Intermediate states */
! 39: FIRSTLETTER_D,
! 40: FIRSTLETTER_L,
! 41: CONTENTLETTER_L,
! 42: CONTENTLETTER_T,
! 43:
! 44: ALLOW, /* Headers supported */
! 45: AUTHENTICATE,
! 46: CONTENT_ENCODING,
! 47: CONTENT_LANGUAGE,
! 48: CONTENT_LENGTH,
2.14 frystyk 49: CONTENT_TRANSFER_ENCODING,
50: CONTENT_TYPE,
2.18 ! frystyk 51: DATE,
! 52: DERIVED_FROM,
! 53: EXPIRES,
! 54: LAST_MODIFIED,
! 55: LINK,
2.14 frystyk 56: LOCATION,
2.18 ! frystyk 57: PUBLIC_METHODS,
! 58: RETRY_AFTER,
! 59: TITLE,
! 60: URI_HEADER,
! 61: VERSION
2.1 timbl 62: } MIME_state;
63:
64: struct _HTStream {
2.18 ! frystyk 65: CONST HTStreamClass * isa;
! 66: HTRequest * request;
! 67: HTStream * target;
! 68: HTFormat target_format;
! 69: HTChunk * buffer;
! 70: HTSocketEOL EOLstate;
! 71: BOOL transparent;
2.1 timbl 72: };
73:
2.18 ! frystyk 74: /* ------------------------------------------------------------------------- */
2.1 timbl 75:
2.18 ! frystyk 76: /*
2.1 timbl 77: ** This is a FSM parser which is tolerant as it can be of all
78: ** syntax errors. It ignores field names it does not understand,
79: ** and resynchronises on line beginnings.
80: */
2.18 ! frystyk 81: PRIVATE void parseheader ARGS3(HTStream *, me, HTRequest *, request,
! 82: HTParentAnchor *, anchor)
! 83: {
! 84: MIME_state state = BEGINNING_OF_LINE;
! 85: MIME_state ok_state; /* got this state if match */
! 86: char *ptr = me->buffer->data-1; /* We dont change the data in length */
! 87: char *stop = ptr+me->buffer->size; /* When to stop */
! 88: char *header = ptr; /* For diagnostics */
! 89: CONST char * check_pointer; /* checking input */
! 90: char *value;
! 91: me->transparent = YES; /* Pump rest of data right through */
! 92: if (!ptr) /* No header to parse */
! 93: return;
! 94: while (ptr < stop) {
! 95: switch (state) {
! 96: case BEGINNING_OF_LINE:
! 97: header = ++ptr;
! 98: switch (TOLOWER(*ptr)) {
! 99: case 'a':
! 100: check_pointer = "llow";
! 101: ok_state = ALLOW;
! 102: state = CHECK;
! 103: break;
! 104:
! 105: case 'c':
! 106: check_pointer = "ontent-";
! 107: ok_state = CONTENT;
! 108: state = CHECK;
! 109: break;
! 110:
! 111: case 'd':
! 112: state = FIRSTLETTER_D;
! 113: break;
! 114:
! 115: case 'e':
! 116: check_pointer = "xpires";
! 117: ok_state = EXPIRES;
! 118: state = CHECK;
! 119: break;
! 120:
! 121: case 'l':
! 122: state = FIRSTLETTER_L;
! 123: break;
! 124:
! 125: case 'm':
! 126: check_pointer = "ime-version";
! 127: ok_state = JUNK_LINE; /* We don't use this but recognize it */
! 128: state = CHECK;
! 129: break;
! 130:
! 131: case 'p':
! 132: break;
! 133:
! 134: case 'r':
! 135: check_pointer = "etry-after";
! 136: ok_state = RETRY_AFTER;
! 137: state = CHECK;
! 138: break;
! 139:
! 140: case 's':
! 141: check_pointer = "erver";
! 142: ok_state = JUNK_LINE; /* We don't use this but recognize it */
! 143: state = CHECK;
! 144: break;
2.1 timbl 145:
2.18 ! frystyk 146: case 't':
! 147: check_pointer = "itle";
! 148: ok_state = TITLE;
! 149: state = CHECK;
! 150: break;
! 151:
! 152: case 'u':
! 153: check_pointer = "ri";
! 154: ok_state = URI_HEADER;
! 155: state = CHECK;
! 156: break;
! 157:
! 158: case 'v':
! 159: check_pointer = "ersion";
! 160: ok_state = VERSION;
! 161: state = CHECK;
! 162: break;
! 163:
! 164: case 'w':
! 165: check_pointer = "ww-authenticate";
! 166: ok_state = AUTHENTICATE;
! 167: state = CHECK;
! 168: break;
2.1 timbl 169:
2.18 ! frystyk 170: default:
! 171: state = UNKNOWN;
! 172: break;
! 173: }
! 174: ptr++;
2.1 timbl 175: break;
176:
2.18 ! frystyk 177: case FIRSTLETTER_D:
! 178: switch (TOLOWER(*ptr)) {
! 179: case 'a':
! 180: check_pointer = "te";
! 181: ok_state = DATE;
! 182: state = CHECK;
! 183: break;
! 184:
! 185: case 'e':
! 186: check_pointer = "rived-from";
! 187: ok_state = DERIVED_FROM;
! 188: state = CHECK;
! 189: break;
! 190:
! 191: default:
! 192: state = UNKNOWN;
! 193: break;
! 194: }
! 195: ptr++;
! 196: break;
! 197:
! 198: case FIRSTLETTER_L:
! 199: switch (TOLOWER(*ptr)) {
! 200: case 'a':
! 201: check_pointer = "st-modified";
! 202: ok_state = LAST_MODIFIED;
! 203: state = CHECK;
! 204: break;
! 205:
! 206: case 'i':
! 207: check_pointer = "nk";
! 208: ok_state = LINK;
! 209: state = CHECK;
! 210: break;
! 211:
! 212: case 'o':
! 213: check_pointer = "cation";
! 214: ok_state = LOCATION;
! 215: state = CHECK;
! 216: break;
! 217:
! 218: default:
! 219: state = UNKNOWN;
! 220: break;
! 221: }
! 222: ptr++;
! 223: break;
! 224:
! 225: case CONTENT:
! 226: switch (TOLOWER(*ptr)) {
! 227: case 'e':
! 228: check_pointer = "ncoding";
! 229: ok_state = CONTENT_ENCODING;
! 230: state = CHECK;
! 231: break;
! 232:
! 233: case 'l':
! 234: state = CONTENTLETTER_L;
! 235: break;
! 236:
! 237: case 't':
! 238: state = CONTENTLETTER_T;
! 239: break;
! 240:
! 241: default:
! 242: state = UNKNOWN;
! 243: break;
! 244: }
! 245: ptr++;
2.1 timbl 246: break;
2.14 frystyk 247:
2.18 ! frystyk 248: case CONTENTLETTER_L:
! 249: switch (TOLOWER(*ptr)) {
! 250: case 'a':
! 251: check_pointer = "nguage";
! 252: ok_state = CONTENT_LANGUAGE;
! 253: state = CHECK;
! 254: break;
! 255:
! 256: case 'e':
! 257: check_pointer = "ngth";
! 258: ok_state = CONTENT_LENGTH;
! 259: state = CHECK;
! 260: break;
! 261:
! 262: default:
! 263: state = UNKNOWN;
! 264: break;
! 265: }
! 266: ptr++;
2.14 frystyk 267: break;
268:
2.18 ! frystyk 269: case CONTENTLETTER_T:
! 270: switch (TOLOWER(*ptr)) {
! 271: case 'r':
! 272: check_pointer = "ansfer-encoding";
! 273: ok_state = CONTENT_TRANSFER_ENCODING;
! 274: state = CHECK;
! 275: break;
! 276:
! 277: case 'y':
! 278: check_pointer = "pe";
! 279: ok_state = CONTENT_TYPE;
! 280: state = CHECK;
! 281: break;
! 282:
! 283: default:
! 284: state = UNKNOWN;
! 285: break;
! 286: }
! 287: ptr++;
2.14 frystyk 288: break;
289:
2.18 ! frystyk 290: case CHECK: /* Check against string */
! 291: while (TOLOWER(*ptr) == *(check_pointer)++) ptr++;
! 292: if (!*--check_pointer) {
! 293: state = ok_state;
! 294: while (*ptr && (WHITE(*ptr) || *ptr==':')) /* Spool to value */
! 295: ptr++;
! 296: } else
! 297: state = UNKNOWN;
2.14 frystyk 298: break;
299:
2.18 ! frystyk 300: case ALLOW:
2.1 timbl 301: {
2.18 ! frystyk 302: while ((value = HTNextField(&ptr)) != NULL) {
! 303: char *lc = value;
! 304: HTMethod new_method;
! 305: while ((*lc = TOUPPER(*lc))) lc++;;
! 306: if ((new_method = HTMethod_enum(value)) != METHOD_INVALID)
! 307: anchor->methods += new_method;
2.2 timbl 308: }
2.1 timbl 309: }
2.18 ! frystyk 310: if (STREAM_TRACE)
! 311: fprintf(TDEST, "MIMEParser.. Methods allowed: %d\n",
! 312: anchor->methods);
! 313: state = JUNK_LINE;
2.1 timbl 314: break;
2.18 ! frystyk 315:
! 316: case AUTHENTICATE:
! 317: if ((value = HTNextField(&ptr)) != NULL) {
! 318: StrAllocCopy(request->WWWAAScheme, value);
! 319: if ((value = HTNextField(&ptr)) != NULL) {
! 320: StrAllocCopy(request->WWWAARealm, value);
2.14 frystyk 321: }
2.1 timbl 322: }
2.18 ! frystyk 323: state = JUNK_LINE;
! 324: break;
! 325:
! 326: case CONTENT_ENCODING:
! 327: if ((value = HTNextField(&ptr)) != NULL) {
! 328: char *lc = value;
! 329: while ((*lc = TOLOWER(*lc))) lc++;;
! 330: anchor->content_encoding = HTAtom_for(value);
! 331: }
! 332: state = JUNK_LINE;
! 333: break;
! 334:
! 335: case CONTENT_LANGUAGE:
! 336: state = UNKNOWN;
! 337: break;
! 338:
! 339: case CONTENT_LENGTH:
! 340: if ((value = HTNextField(&ptr)) != NULL)
! 341: anchor->content_length = atol(value);
! 342: state = JUNK_LINE;
! 343: break;
! 344:
! 345: case CONTENT_TRANSFER_ENCODING:
! 346: if ((value = HTNextField(&ptr)) != NULL) {
! 347: char *lc = value;
! 348: while ((*lc = TOLOWER(*lc))) lc++;;
! 349: anchor->cte = HTAtom_for(value);
! 350: }
! 351: state = JUNK_LINE;
! 352: break;
! 353:
! 354: case CONTENT_TYPE:
! 355: if ((value = HTNextField(&ptr)) != NULL) {
! 356: char *lc = value;
! 357: while ((*lc = TOLOWER(*lc))) lc++;
! 358: anchor->content_type = HTAtom_for(value);
2.1 timbl 359: }
2.18 ! frystyk 360: state = JUNK_LINE; /* Skip charset :-( */
! 361: break;
! 362:
! 363: case DATE:
! 364: anchor->date = HTParseTime(ptr);
! 365: state = JUNK_LINE;
! 366: break;
! 367:
! 368: case DERIVED_FROM:
! 369: if ((value = HTNextField(&ptr)) != NULL)
! 370: StrAllocCopy(anchor->derived_from, value);
! 371: state = JUNK_LINE;
! 372: break;
! 373:
! 374: case EXPIRES:
! 375: anchor->expires = HTParseTime(ptr);
! 376: state = JUNK_LINE;
! 377: break;
! 378:
! 379: case LAST_MODIFIED:
! 380: anchor->last_modified = HTParseTime(ptr);
! 381: state = JUNK_LINE;
! 382: break;
! 383:
! 384: case LINK:
! 385: state = UNKNOWN;
! 386: break;
! 387:
! 388: case LOCATION:
! 389: if ((value = HTNextField(&ptr)) != NULL)
! 390: StrAllocCopy(request->redirect, value);
! 391: state = JUNK_LINE;
! 392: break;
! 393:
! 394: case PUBLIC_METHODS:
! 395: state = UNKNOWN;
! 396: break;
! 397:
! 398: case RETRY_AFTER:
! 399: state = UNKNOWN;
! 400: break;
! 401:
! 402: case TITLE: /* Can't reuse buffer as HTML version might differ */
! 403: if ((value = HTNextField(&ptr)) != NULL)
! 404: StrAllocCopy(anchor->title, value);
! 405: state = JUNK_LINE;
! 406: break;
! 407:
! 408: case URI_HEADER:
! 409: state = LOCATION; /* @@@ Need extended parsing */
! 410: break;
! 411:
! 412: case VERSION:
! 413: if ((value = HTNextField(&ptr)) != NULL)
! 414: StrAllocCopy(anchor->version, value);
! 415: state = JUNK_LINE;
! 416: break;
! 417:
! 418: case UNKNOWN:
! 419: if (STREAM_TRACE)
! 420: fprintf(TDEST,"MIMEParser.. Unknown header: `%s\'\n", header);
! 421: HTAnchor_addExtra(anchor, header);
! 422:
! 423: /* Fall through */
! 424:
! 425: case JUNK_LINE:
! 426: while (*ptr) ptr++;
! 427: state = BEGINNING_OF_LINE;
! 428: break;
2.1 timbl 429: }
2.18 ! frystyk 430: }
! 431:
! 432: if (STREAM_TRACE)
! 433: fprintf(TDEST, "MIMEParser.. Media type %s is converted to %s\n",
! 434: HTAtom_name(anchor->content_type),
! 435: HTAtom_name(me->target_format));
! 436: if ((me->target = HTStreamStack(anchor->content_type,
! 437: me->target_format, me->target,
! 438: me->request, YES)) == NULL) {
! 439: if (STREAM_TRACE)
! 440: fprintf(TDEST, "MIMEParser.. Can't convert media type\n");
! 441: me->target = HTBlackHole();
! 442: }
! 443: anchor->header = me->buffer; /* Gets freed when anchor is freed */
! 444: anchor->header_parsed = YES;
2.1 timbl 445: }
446:
447:
2.18 ! frystyk 448: /*
! 449: ** Header is terminated by CRCR, LFLF, CRLFLF, CRLFCRLF
! 450: ** Folding is either of CF LWS, LF LWS, CRLF LWS
! 451: */
! 452: PRIVATE int HTMIME_put_block ARGS3(HTStream *, me, CONST char *, b, int, l)
! 453: {
! 454: while (!me->transparent && l-- > 0) {
! 455: if (me->EOLstate == EOL_FCR) {
! 456: if (*b == CR) /* End of header */
! 457: parseheader(me, me->request, me->request->anchor);
! 458: else if (*b == LF) /* CRLF */
! 459: me->EOLstate = EOL_FLF;
! 460: else if (WHITE(*b)) { /* Folding: CR SP */
! 461: me->EOLstate = EOL_BEGIN;
! 462: HTChunkPutc(me->buffer, ' ');
! 463: } else { /* New line */
! 464: me->EOLstate = EOL_BEGIN;
! 465: HTChunkPutc(me->buffer, '\0');
! 466: HTChunkPutc(me->buffer, *b);
! 467: }
! 468: } else if (me->EOLstate == EOL_FLF) {
! 469: if (*b == CR) /* LF CR or CR LF CR */
! 470: me->EOLstate = EOL_SCR;
! 471: else if (*b == LF) /* End of header */
! 472: parseheader(me, me->request, me->request->anchor);
! 473: else if (WHITE(*b)) { /* Folding: LF SP or CR LF SP */
! 474: me->EOLstate = EOL_BEGIN;
! 475: HTChunkPutc(me->buffer, ' ');
! 476: } else { /* New line */
! 477: me->EOLstate = EOL_BEGIN;
! 478: HTChunkPutc(me->buffer, '\0');
! 479: HTChunkPutc(me->buffer, *b);
! 480: }
! 481: } else if (me->EOLstate == EOL_SCR) {
! 482: if (*b==CR || *b==LF) /* End of header */
! 483: parseheader(me, me->request, me->request->anchor);
! 484: else if (WHITE(*b)) { /* Folding: LF CR SP or CR LF CR SP */
! 485: me->EOLstate = EOL_BEGIN;
! 486: HTChunkPutc(me->buffer, ' ');
! 487: } else { /* New line */
! 488: me->EOLstate = EOL_BEGIN;
! 489: HTChunkPutc(me->buffer, '\0');
! 490: HTChunkPutc(me->buffer, *b);
! 491: }
! 492: } else if (*b == CR) {
! 493: me->EOLstate = EOL_FCR;
! 494: } else if (*b == LF) {
! 495: me->EOLstate = EOL_FLF; /* Line found */
! 496: } else
! 497: HTChunkPutc(me->buffer, *b);
! 498: b++;
! 499: }
! 500: if (l > 0) /* Anything left? */
! 501: return (*me->target->isa->put_block)(me->target, b, l);
! 502: return HT_OK;
! 503: }
! 504:
! 505:
! 506: /* Character handling
! 507: ** ------------------
! 508: */
! 509: PRIVATE int HTMIME_put_character ARGS2(HTStream *, me, CONST char, c)
! 510: {
! 511: return HTMIME_put_block(me, &c, 1);
! 512: }
! 513:
2.1 timbl 514:
515: /* String handling
516: ** ---------------
517: */
2.18 ! frystyk 518: PRIVATE int HTMIME_put_string ARGS2(HTStream *, me, CONST char *, s)
2.1 timbl 519: {
2.18 ! frystyk 520: return HTMIME_put_block(me, s, (int) strlen(s));
2.1 timbl 521: }
522:
523:
2.18 ! frystyk 524: /* Flush an stream object
! 525: ** ---------------------
2.1 timbl 526: */
2.18 ! frystyk 527: PRIVATE int HTMIME_flush ARGS1(HTStream *, me)
2.1 timbl 528: {
2.18 ! frystyk 529: return (*me->target->isa->flush)(me->target);
2.1 timbl 530: }
531:
2.18 ! frystyk 532: /* Free a stream object
! 533: ** --------------------
2.1 timbl 534: */
2.14 frystyk 535: PRIVATE int HTMIME_free ARGS1(HTStream *, me)
2.1 timbl 536: {
2.18 ! frystyk 537: int status = HT_OK;
! 538: if (me->target)
! 539: status = (*me->target->isa->_free)(me->target);
2.1 timbl 540: free(me);
2.18 ! frystyk 541: return status;
2.1 timbl 542: }
543:
544: /* End writing
545: */
2.14 frystyk 546: PRIVATE int HTMIME_abort ARGS2(HTStream *, me, HTError, e)
2.1 timbl 547: {
2.18 ! frystyk 548: int status = HT_ERROR;
! 549: if (me->target)
! 550: status = (*me->target->isa->abort)(me->target, e);
2.6 timbl 551: free(me);
2.18 ! frystyk 552: return status;
2.1 timbl 553: }
554:
555:
556:
557: /* Structured Object Class
558: ** -----------------------
559: */
2.6 timbl 560: PRIVATE CONST HTStreamClass HTMIME =
2.1 timbl 561: {
562: "MIMEParser",
2.18 ! frystyk 563: HTMIME_flush,
2.1 timbl 564: HTMIME_free,
2.6 timbl 565: HTMIME_abort,
566: HTMIME_put_character,
567: HTMIME_put_string,
2.18 ! frystyk 568: HTMIME_put_block
2.1 timbl 569: };
570:
571:
572: /* Subclass-specific Methods
573: ** -------------------------
574: */
2.7 timbl 575: PUBLIC HTStream* HTMIMEConvert ARGS5(
576: HTRequest *, request,
577: void *, param,
578: HTFormat, input_format,
579: HTFormat, output_format,
580: HTStream *, output_stream)
2.1 timbl 581: {
582: HTStream* me;
2.18 ! frystyk 583: if ((me=(HTStream *) calloc(1, sizeof(* me))) == NULL)
! 584: outofmem(__FILE__, "HTMIMEConvert");
2.1 timbl 585: me->isa = &HTMIME;
2.18 ! frystyk 586: me->request = request;
! 587: me->target = output_stream;
! 588: me->target_format = output_format;
! 589: me->buffer = HTChunkCreate(512);
! 590: me->EOLstate = EOL_BEGIN;
2.1 timbl 591: return me;
592: }
Webmaster