Annotation of libwww/Library/src/HTMIME.c, revision 2.17.2.1
2.15 frystyk 1: /* HTMIME.c
2: ** MIME MESSAGE PARSE
3: **
4: ** (c) COPYRIGHT CERN 1994.
5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This is RFC 1341-specific code.
8: ** The input stream pushed into this parser is assumed to be
9: ** stripped on CRs, ie lines end with LF, not CR LF.
10: ** (It is easy to change this except for the body part where
11: ** conversion can be slow.)
12: **
13: ** History:
14: ** Feb 92 Written Tim Berners-Lee, CERN
2.13 duns 15: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.17.2.1! frystyk 16: ** 14 Mar 95 HFN Now using anchor for storing data. No more `\n',
! 17: ** static buffers etc.
2.1 timbl 18: */
2.17 frystyk 19:
20: /* Library include files */
21: #include "tcp.h"
22: #include "HTUtils.h"
23: #include "HTString.h"
2.9 luotonen 24: #include "HTFormat.h"
2.17.2.1! frystyk 25: #include "HTChunk.h"
2.17 frystyk 26: #include "HTFWrite.h"
2.14 frystyk 27: #include "HTMIME.h" /* Implemented here */
2.1 timbl 28:
29: /* MIME Object
30: ** -----------
31: */
32: typedef enum _MIME_state {
2.14 frystyk 33: BEGINNING_OF_LINE,
2.17.2.1! frystyk 34: CHECK, /* check against check_pointer */
! 35: UNKNOWN, /* Unknown header */
! 36: JUNK_LINE, /* Ignore rest of header */
! 37:
! 38: CONTENT, /* Intermediate states */
! 39: FIRSTLETTER_D,
! 40: FIRSTLETTER_L,
! 41: CONTENTLETTER_L,
! 42: CONTENTLETTER_T,
! 43:
! 44: ALLOW, /* Headers supported */
! 45: AUTHENTICATE,
! 46: CONTENT_ENCODING,
! 47: CONTENT_LANGUAGE,
! 48: CONTENT_LENGTH,
2.14 frystyk 49: CONTENT_TRANSFER_ENCODING,
50: CONTENT_TYPE,
2.17.2.1! frystyk 51: DATE,
! 52: DERIVED_FROM,
! 53: EXPIRES,
! 54: LAST_MODIFIED,
! 55: LINK,
2.14 frystyk 56: LOCATION,
2.17.2.1! frystyk 57: PUBLIC_METHODS,
! 58: RETRY_AFTER,
! 59: TITLE,
! 60: URI_HEADER,
! 61: VERSION
2.1 timbl 62: } MIME_state;
63:
64: struct _HTStream {
2.17.2.1! frystyk 65: CONST HTStreamClass * isa;
! 66: HTRequest * request;
! 67: HTStream * target;
! 68: HTFormat target_format;
! 69: HTChunk * buffer;
! 70: HTSocketEOL EOLstate;
! 71: BOOL transparent;
2.1 timbl 72: };
73:
2.17.2.1! frystyk 74: /* ------------------------------------------------------------------------- */
2.1 timbl 75:
2.17.2.1! frystyk 76: /*
2.1 timbl 77: ** This is a FSM parser which is tolerant as it can be of all
78: ** syntax errors. It ignores field names it does not understand,
79: ** and resynchronises on line beginnings.
80: */
2.17.2.1! frystyk 81: PRIVATE void parseheader ARGS3(HTStream *, me, HTRequest *, request,
! 82: HTParentAnchor *, anchor)
2.1 timbl 83: {
2.17.2.1! frystyk 84: MIME_state state = BEGINNING_OF_LINE;
! 85: MIME_state ok_state; /* got this state if match */
! 86: char *ptr = me->buffer->data-1; /* We dont change the data in length */
! 87: char *stop = ptr+me->buffer->size; /* When to stop */
! 88: char *header = ptr; /* For diagnostics */
! 89: CONST char * check_pointer; /* checking input */
! 90: char *value;
! 91: me->transparent = YES; /* Pump rest of data right through */
! 92: if (!ptr) /* No header to parse */
! 93: return;
! 94: while (ptr < stop) {
! 95: switch (state) {
! 96: case BEGINNING_OF_LINE:
! 97: header = ++ptr;
! 98: switch (TOLOWER(*ptr)) {
! 99: case 'a':
! 100: check_pointer = "llow";
! 101: ok_state = ALLOW;
! 102: state = CHECK;
! 103: break;
2.1 timbl 104:
2.17.2.1! frystyk 105: case 'c':
! 106: check_pointer = "ontent-";
! 107: ok_state = CONTENT;
! 108: state = CHECK;
! 109: break;
! 110:
! 111: case 'd':
! 112: state = FIRSTLETTER_D;
! 113: break;
! 114:
! 115: case 'e':
! 116: check_pointer = "xpires";
! 117: ok_state = EXPIRES;
! 118: state = CHECK;
! 119: break;
! 120:
! 121: case 'l':
! 122: state = FIRSTLETTER_L;
! 123: break;
! 124:
! 125: case 'm':
! 126: check_pointer = "ime-version";
! 127: ok_state = JUNK_LINE; /* We don't use this but recognize it */
! 128: state = CHECK;
! 129: break;
! 130:
! 131: case 'p':
! 132: break;
! 133:
! 134: case 'r':
! 135: check_pointer = "etry-after";
! 136: ok_state = RETRY_AFTER;
! 137: state = CHECK;
! 138: break;
! 139:
! 140: case 's':
! 141: check_pointer = "erver";
! 142: ok_state = JUNK_LINE; /* We don't use this but recognize it */
! 143: state = CHECK;
! 144: break;
! 145:
! 146: case 't':
! 147: check_pointer = "itle";
! 148: ok_state = TITLE;
! 149: state = CHECK;
! 150: break;
! 151:
! 152: case 'u':
! 153: check_pointer = "ri";
! 154: ok_state = URI_HEADER;
! 155: state = CHECK;
! 156: break;
! 157:
! 158: case 'v':
! 159: check_pointer = "ersion";
! 160: ok_state = VERSION;
! 161: state = CHECK;
! 162: break;
! 163:
! 164: case 'w':
! 165: check_pointer = "ww-authenticate";
! 166: ok_state = AUTHENTICATE;
! 167: state = CHECK;
! 168: break;
! 169:
! 170: default:
! 171: state = UNKNOWN;
! 172: break;
! 173: }
! 174: ptr++;
2.1 timbl 175: break;
176:
2.17.2.1! frystyk 177: case FIRSTLETTER_D:
! 178: switch (TOLOWER(*ptr)) {
! 179: case 'a':
! 180: check_pointer = "te";
! 181: ok_state = DATE;
! 182: state = CHECK;
! 183: break;
! 184:
! 185: case 'e':
! 186: check_pointer = "rived-from";
! 187: ok_state = DERIVED_FROM;
! 188: state = CHECK;
! 189: break;
! 190:
! 191: default:
! 192: state = UNKNOWN;
! 193: break;
2.1 timbl 194: }
2.17.2.1! frystyk 195: ptr++;
2.1 timbl 196: break;
2.17.2.1! frystyk 197:
! 198: case FIRSTLETTER_L:
! 199: switch (TOLOWER(*ptr)) {
! 200: case 'a':
! 201: check_pointer = "st-modified";
! 202: ok_state = LAST_MODIFIED;
! 203: state = CHECK;
2.14 frystyk 204: break;
2.17.2.1! frystyk 205:
! 206: case 'i':
! 207: check_pointer = "nk";
! 208: ok_state = LINK;
! 209: state = CHECK;
! 210: break;
! 211:
! 212: case 'o':
! 213: check_pointer = "cation";
! 214: ok_state = LOCATION;
! 215: state = CHECK;
! 216: break;
! 217:
! 218: default:
! 219: state = UNKNOWN;
2.14 frystyk 220: break;
2.1 timbl 221: }
2.17.2.1! frystyk 222: ptr++;
! 223: break;
! 224:
! 225: case CONTENT:
! 226: switch (TOLOWER(*ptr)) {
! 227: case 'e':
! 228: check_pointer = "ncoding";
! 229: ok_state = CONTENT_ENCODING;
! 230: state = CHECK;
! 231: break;
! 232:
! 233: case 'l':
! 234: state = CONTENTLETTER_L;
! 235: break;
! 236:
! 237: case 't':
! 238: state = CONTENTLETTER_T;
! 239: break;
! 240:
! 241: default:
! 242: state = UNKNOWN;
2.1 timbl 243: break;
244: }
2.17.2.1! frystyk 245: ptr++;
! 246: break;
! 247:
! 248: case CONTENTLETTER_L:
! 249: switch (TOLOWER(*ptr)) {
! 250: case 'a':
! 251: check_pointer = "nguage";
! 252: ok_state = CONTENT_LANGUAGE;
! 253: state = CHECK;
! 254: break;
! 255:
! 256: case 'e':
! 257: check_pointer = "ngth";
! 258: ok_state = CONTENT_LENGTH;
! 259: state = CHECK;
! 260: break;
! 261:
! 262: default:
! 263: state = UNKNOWN;
! 264: break;
! 265: }
! 266: ptr++;
! 267: break;
! 268:
! 269: case CONTENTLETTER_T:
! 270: switch (TOLOWER(*ptr)) {
! 271: case 'r':
! 272: check_pointer = "ansfer-encoding";
! 273: ok_state = CONTENT_TRANSFER_ENCODING;
! 274: state = CHECK;
! 275: break;
! 276:
! 277: case 'y':
! 278: check_pointer = "pe";
! 279: ok_state = CONTENT_TYPE;
! 280: state = CHECK;
! 281: break;
! 282:
! 283: default:
! 284: state = UNKNOWN;
! 285: break;
! 286: }
! 287: ptr++;
! 288: break;
! 289:
! 290: case CHECK: /* Check against string */
! 291: while (TOLOWER(*ptr) == *(check_pointer)++) ptr++;
! 292: if (!*--check_pointer) {
! 293: state = ok_state;
! 294: while (*ptr && (WHITE(*ptr) || *ptr==':')) /* Spool to value */
! 295: ptr++;
! 296: } else
! 297: state = UNKNOWN;
! 298: break;
! 299:
! 300: case ALLOW:
! 301: while ((value = HTNextField(&ptr)) != NULL) {
! 302: char *lc = value;
! 303: HTMethod new_method;
! 304: while ((*lc = TOUPPER(*lc))) lc++;
! 305: if ((new_method = HTMethod_enum(value)) != METHOD_INVALID)
! 306: anchor->methods += new_method;
! 307: }
! 308: if (STREAM_TRACE)
! 309: fprintf(TDEST, "MIMEParser.. Methods allowed: %d\n",
! 310: anchor->methods);
! 311: state = JUNK_LINE;
! 312: break;
! 313:
! 314: case AUTHENTICATE:
! 315: if ((value = HTNextField(&ptr)) != NULL) {
! 316: StrAllocCopy(request->WWWAAScheme, value);
! 317:
! 318: /* The parsing is done in HTSSUtils.c for the moment */
! 319: if (*ptr) StrAllocCopy(request->WWWAARealm, ptr);
! 320: }
! 321: state = JUNK_LINE;
! 322: break;
! 323:
! 324: case CONTENT_ENCODING:
! 325: if ((value = HTNextField(&ptr)) != NULL) {
! 326: char *lc = value;
! 327: while ((*lc = TOLOWER(*lc))) lc++;
! 328: anchor->content_encoding = HTAtom_for(value);
! 329: }
! 330: state = JUNK_LINE;
! 331: break;
! 332:
! 333: case CONTENT_LANGUAGE:
! 334: state = UNKNOWN; /* @@@@@@@@@@@ */
! 335: break;
! 336:
! 337: case CONTENT_LENGTH:
! 338: if ((value = HTNextField(&ptr)) != NULL)
! 339: anchor->content_length = atol(value);
! 340: state = JUNK_LINE;
! 341: break;
! 342:
! 343: case CONTENT_TRANSFER_ENCODING:
! 344: if ((value = HTNextField(&ptr)) != NULL) {
! 345: char *lc = value;
! 346: while ((*lc = TOLOWER(*lc))) lc++;
! 347: anchor->cte = HTAtom_for(value);
! 348: }
! 349: state = JUNK_LINE;
! 350: break;
! 351:
! 352: case CONTENT_TYPE:
! 353: if ((value = HTNextField(&ptr)) != NULL) {
! 354: char *lc = value;
! 355: while ((*lc = TOLOWER(*lc))) lc++;
! 356: anchor->content_type = HTAtom_for(value);
! 357: while ((value = HTNextField(&ptr)) != NULL) { /* Charset */
! 358: if (!strcasecomp(value, "charset")) {
! 359: if ((value = HTNextField(&ptr)) != NULL) {
! 360: lc = value;
! 361: while ((*lc = TOLOWER(*lc))) lc++;
! 362: anchor->charset = HTAtom_for(value);
! 363: }
! 364: } else if (!strcasecomp(value, "level")) { /* Level */
! 365: if ((value = HTNextField(&ptr)) != NULL) {
! 366: lc = value;
! 367: while ((*lc = TOLOWER(*lc))) lc++;
! 368: anchor->level = HTAtom_for(value);
! 369: }
! 370: }
! 371: }
! 372: }
! 373: state = JUNK_LINE;
! 374: break;
! 375:
! 376: case DATE:
! 377: anchor->date = HTParseTime(ptr);
! 378: state = JUNK_LINE;
! 379: break;
! 380:
! 381: case DERIVED_FROM:
! 382: if ((value = HTNextField(&ptr)) != NULL)
! 383: StrAllocCopy(anchor->derived_from, value);
! 384: state = JUNK_LINE;
! 385: break;
! 386:
! 387: case EXPIRES:
! 388: anchor->expires = HTParseTime(ptr);
! 389: state = JUNK_LINE;
! 390: break;
! 391:
! 392: case LAST_MODIFIED:
! 393: anchor->last_modified = HTParseTime(ptr);
! 394: state = JUNK_LINE;
! 395: break;
! 396:
! 397: case LINK:
! 398: state = UNKNOWN; /* @@@@@@@@@@@ */
! 399: break;
! 400:
! 401: case LOCATION:
! 402: if ((value = HTNextField(&ptr)) != NULL)
! 403: StrAllocCopy(request->redirect, value);
! 404: state = JUNK_LINE;
! 405: break;
! 406:
! 407: case PUBLIC_METHODS:
! 408: state = UNKNOWN; /* @@@@@@@@@@@ */
! 409: break;
! 410:
! 411: case RETRY_AFTER:
! 412: request->retry_after = HTParseTime(ptr);
! 413: state = JUNK_LINE;
! 414: break;
! 415:
! 416: case TITLE: /* Can't reuse buffer as HTML version might differ */
! 417: if ((value = HTNextField(&ptr)) != NULL)
! 418: StrAllocCopy(anchor->title, value);
! 419: state = JUNK_LINE;
! 420: break;
! 421:
! 422: case URI_HEADER:
! 423: state = LOCATION; /* @@@ Need extended parsing */
! 424: break;
! 425:
! 426: case VERSION:
! 427: if ((value = HTNextField(&ptr)) != NULL)
! 428: StrAllocCopy(anchor->version, value);
! 429: state = JUNK_LINE;
! 430: break;
! 431:
! 432: case UNKNOWN:
! 433: if (STREAM_TRACE)
! 434: fprintf(TDEST,"MIMEParser.. Unknown header: `%s\'\n", header);
! 435: HTAnchor_addExtra(anchor, header);
! 436:
! 437: /* Fall through */
! 438:
! 439: case JUNK_LINE:
! 440: while (*ptr) ptr++;
! 441: state = BEGINNING_OF_LINE;
! 442: break;
2.1 timbl 443: }
2.17.2.1! frystyk 444: }
! 445:
! 446: if (STREAM_TRACE)
! 447: fprintf(TDEST, "MIMEParser.. Media type %s is converted to %s\n",
! 448: HTAtom_name(anchor->content_type),
! 449: HTAtom_name(me->target_format));
! 450: if ((me->target = HTStreamStack(anchor->content_type,
! 451: me->target_format, me->target,
! 452: me->request, YES)) == NULL) {
! 453: if (STREAM_TRACE)
! 454: fprintf(TDEST, "MIMEParser.. Can't convert media type\n");
! 455: me->target = HTBlackHole();
! 456: }
! 457: anchor->header_parsed = YES;
2.1 timbl 458: }
459:
460:
2.17.2.1! frystyk 461: /*
! 462: ** Header is terminated by CRCR, LFLF, CRLFLF, CRLFCRLF
! 463: ** Folding is either of CF LWS, LF LWS, CRLF LWS
! 464: */
! 465: PRIVATE int HTMIME_put_block ARGS3(HTStream *, me, CONST char *, b, int, l)
! 466: {
! 467: while (!me->transparent && l-- > 0) {
! 468: if (me->EOLstate == EOL_FCR) {
! 469: if (*b == CR) /* End of header */
! 470: parseheader(me, me->request, me->request->anchor);
! 471: else if (*b == LF) /* CRLF */
! 472: me->EOLstate = EOL_FLF;
! 473: else if (WHITE(*b)) { /* Folding: CR SP */
! 474: me->EOLstate = EOL_BEGIN;
! 475: HTChunkPutc(me->buffer, ' ');
! 476: } else { /* New line */
! 477: me->EOLstate = EOL_BEGIN;
! 478: HTChunkPutc(me->buffer, '\0');
! 479: HTChunkPutc(me->buffer, *b);
! 480: }
! 481: } else if (me->EOLstate == EOL_FLF) {
! 482: if (*b == CR) /* LF CR or CR LF CR */
! 483: me->EOLstate = EOL_SCR;
! 484: else if (*b == LF) /* End of header */
! 485: parseheader(me, me->request, me->request->anchor);
! 486: else if (WHITE(*b)) { /* Folding: LF SP or CR LF SP */
! 487: me->EOLstate = EOL_BEGIN;
! 488: HTChunkPutc(me->buffer, ' ');
! 489: } else { /* New line */
! 490: me->EOLstate = EOL_BEGIN;
! 491: HTChunkPutc(me->buffer, '\0');
! 492: HTChunkPutc(me->buffer, *b);
! 493: }
! 494: } else if (me->EOLstate == EOL_SCR) {
! 495: if (*b==CR || *b==LF) /* End of header */
! 496: parseheader(me, me->request, me->request->anchor);
! 497: else if (WHITE(*b)) { /* Folding: LF CR SP or CR LF CR SP */
! 498: me->EOLstate = EOL_BEGIN;
! 499: HTChunkPutc(me->buffer, ' ');
! 500: } else { /* New line */
! 501: me->EOLstate = EOL_BEGIN;
! 502: HTChunkPutc(me->buffer, '\0');
! 503: HTChunkPutc(me->buffer, *b);
! 504: }
! 505: } else if (*b == CR) {
! 506: me->EOLstate = EOL_FCR;
! 507: } else if (*b == LF) {
! 508: me->EOLstate = EOL_FLF; /* Line found */
! 509: } else
! 510: HTChunkPutc(me->buffer, *b);
! 511: b++;
! 512: }
! 513: if (l > 0) /* Anything left? */
! 514: return (*me->target->isa->put_block)(me->target, b, l);
! 515: return HT_OK;
! 516: }
! 517:
! 518:
! 519: /* Character handling
! 520: ** ------------------
! 521: */
! 522: PRIVATE int HTMIME_put_character ARGS2(HTStream *, me, CONST char, c)
! 523: {
! 524: return HTMIME_put_block(me, &c, 1);
! 525: }
! 526:
2.1 timbl 527:
528: /* String handling
529: ** ---------------
530: */
2.17.2.1! frystyk 531: PRIVATE int HTMIME_put_string ARGS2(HTStream *, me, CONST char *, s)
2.1 timbl 532: {
2.17.2.1! frystyk 533: return HTMIME_put_block(me, s, (int) strlen(s));
2.1 timbl 534: }
535:
536:
2.17.2.1! frystyk 537: /* Flush an stream object
! 538: ** ---------------------
2.1 timbl 539: */
2.17.2.1! frystyk 540: PRIVATE int HTMIME_flush ARGS1(HTStream *, me)
2.1 timbl 541: {
2.17.2.1! frystyk 542: return (*me->target->isa->flush)(me->target);
2.1 timbl 543: }
544:
2.17.2.1! frystyk 545: /* Free a stream object
! 546: ** --------------------
2.1 timbl 547: */
2.14 frystyk 548: PRIVATE int HTMIME_free ARGS1(HTStream *, me)
2.1 timbl 549: {
2.17.2.1! frystyk 550: int status = HT_OK;
! 551: if (me->target)
! 552: status = (*me->target->isa->_free)(me->target);
! 553: HTChunkFree(me->buffer);
2.1 timbl 554: free(me);
2.17.2.1! frystyk 555: return status;
2.1 timbl 556: }
557:
558: /* End writing
559: */
2.14 frystyk 560: PRIVATE int HTMIME_abort ARGS2(HTStream *, me, HTError, e)
2.1 timbl 561: {
2.17.2.1! frystyk 562: int status = HT_ERROR;
! 563: if (me->target)
! 564: status = (*me->target->isa->abort)(me->target, e);
2.6 timbl 565: free(me);
2.17.2.1! frystyk 566: return status;
2.1 timbl 567: }
568:
569:
570:
571: /* Structured Object Class
572: ** -----------------------
573: */
2.6 timbl 574: PRIVATE CONST HTStreamClass HTMIME =
2.1 timbl 575: {
576: "MIMEParser",
2.17.2.1! frystyk 577: HTMIME_flush,
2.1 timbl 578: HTMIME_free,
2.6 timbl 579: HTMIME_abort,
580: HTMIME_put_character,
581: HTMIME_put_string,
2.17.2.1! frystyk 582: HTMIME_put_block
2.1 timbl 583: };
584:
585:
586: /* Subclass-specific Methods
587: ** -------------------------
588: */
2.7 timbl 589: PUBLIC HTStream* HTMIMEConvert ARGS5(
590: HTRequest *, request,
591: void *, param,
592: HTFormat, input_format,
593: HTFormat, output_format,
594: HTStream *, output_stream)
2.1 timbl 595: {
596: HTStream* me;
2.17.2.1! frystyk 597: if ((me=(HTStream *) calloc(1, sizeof(* me))) == NULL)
! 598: outofmem(__FILE__, "HTMIMEConvert");
2.1 timbl 599: me->isa = &HTMIME;
2.17.2.1! frystyk 600: me->request = request;
! 601: me->target = output_stream;
! 602: me->target_format = output_format;
! 603: me->buffer = HTChunkCreate(512);
! 604: me->EOLstate = EOL_BEGIN;
2.6 timbl 605: return me;
606: }
Webmaster