Annotation of libwww/Library/src/HTMIME.c, revision 2.1
2.1 ! timbl 1: /* MIME Message Parse HTMIME.c
! 2: ** ==================
! 3: **
! 4: ** This is RFC 1341-specific code.
! 5: ** The input stream pushed into this parser is assumed to be
! 6: ** stripped on CRs, ie lines end with LF, not CR LF.
! 7: ** (It is easy to change this except for the body part where
! 8: ** conversion can be slow.)
! 9: **
! 10: ** History:
! 11: ** Feb 92 Written Tim Berners-Lee, CERN
! 12: **
! 13: */
! 14: #include "HTMIME.h" /* Implemented here */
! 15: #include "HTAlert.h"
! 16:
! 17:
! 18: /* MIME Object
! 19: ** -----------
! 20: */
! 21:
! 22: typedef enum _MIME_state {
! 23: BEGINNING_OF_LINE,
! 24: CONTENT_T,
! 25: CONTENT_TRANSFER_ENCODING,
! 26: CONTENT_TYPE,
! 27: SKIP_GET_VALUE, /* Skip space then get value */
! 28: GET_VALUE, /* Get value till white space */
! 29: JUNK_LINE, /* Ignore the rest of this folded line */
! 30: NEWLINE, /* Just found a LF .. maybe continuation */
! 31: CHECK, /* check against check_pointer */
! 32: TRANSPARENT /* put straight through to target ASAP! */
! 33: } MIME_state;
! 34:
! 35: #define VALUE_SIZE 128 /* @@@@@@@ Arbitrary? */
! 36: struct _HTStream {
! 37: CONST HTStreamClass * isa;
! 38:
! 39: MIME_state state; /* current state */
! 40: MIME_state if_ok; /* got this state if match */
! 41: MIME_state field; /* remember which field */
! 42: MIME_state fold_state; /* state on a fold */
! 43: CONST char * check_pointer; /* checking input */
! 44:
! 45: char * value_pointer; /* storing values */
! 46: char value[VALUE_SIZE];
! 47:
! 48: HTParentAnchor * anchor; /* Given on creation */
! 49: HTStream * sink; /* Given on creation */
! 50:
! 51: char * boundary; /* For multipart */
! 52:
! 53: HTFormat encoding; /* Content-Transfer-Encoding */
! 54: HTFormat format; /* Content-Type */
! 55: HTStream * target; /* While writing out */
! 56: HTStreamClass targetClass;
! 57:
! 58: HTAtom * targetRep; /* Converting into? */
! 59: };
! 60:
! 61:
! 62: /*_________________________________________________________________________
! 63: **
! 64: ** A C T I O N R O U T I N E S
! 65: */
! 66:
! 67: /* Character handling
! 68: ** ------------------
! 69: **
! 70: ** This is a FSM parser which is tolerant as it can be of all
! 71: ** syntax errors. It ignores field names it does not understand,
! 72: ** and resynchronises on line beginnings.
! 73: */
! 74:
! 75: PRIVATE void HTMIME_put_character ARGS2(HTStream *, me, char, c)
! 76: {
! 77: switch(me->state) {
! 78:
! 79: case TRANSPARENT:
! 80: (*me->targetClass.put_character)(me->target, c); /* MUST BE FAST */
! 81: return;
! 82:
! 83: case NEWLINE:
! 84: if (c != '\n' && WHITE(c)) { /* Folded line */
! 85: me->state = me->fold_state; /* pop state before newline */
! 86: break;
! 87: }
! 88:
! 89: /* else Falls through */
! 90:
! 91: case BEGINNING_OF_LINE:
! 92: switch(c) {
! 93: case 'c':
! 94: case 'C':
! 95: me->check_pointer = "ontent-t";
! 96: me->if_ok = CONTENT_T;
! 97: me->state = CHECK;
! 98: break;
! 99: case '\n': /* Blank line: End of Header! */
! 100: {
! 101: if (TRACE) fprintf(stderr,
! 102: "HTMIME: MIME content type is %s, converting to\n",
! 103: HTAtom_name(me->format),HTAtom_name(me->targetRep));
! 104: me->target = HTStreamStack(me->format, me->targetRep,
! 105: me->sink , me->anchor);
! 106: me->targetClass = *me->target->isa;
! 107: /* Check for encoding and select state from there @@ */
! 108:
! 109: me->state = TRANSPARENT; /* From now on push straight through */
! 110: }
! 111: break;
! 112:
! 113: default:
! 114: goto bad_field_name;
! 115: break;
! 116:
! 117: } /* switch on character */
! 118: break;
! 119:
! 120: case CHECK: /* Check against string */
! 121: if (TOLOWER(c) == *(me->check_pointer)++) {
! 122: if (!*me->check_pointer) me->state = me->if_ok;
! 123: } else { /* Error */
! 124: if (TRACE) fprintf(stderr,
! 125: "HTMIME: Bad character `%c' found where `%s' expected",
! 126: c, me->check_pointer - 1);
! 127: goto bad_field_name;
! 128: }
! 129: break;
! 130:
! 131: case CONTENT_T:
! 132: switch(c) {
! 133: case 'r':
! 134: case 'R':
! 135: me->check_pointer = "ansfer-encoding:";
! 136: me->if_ok = CONTENT_TRANSFER_ENCODING;
! 137: me->state = CHECK;
! 138: break;
! 139:
! 140: case 'y':
! 141: case 'Y':
! 142: me->check_pointer = "pe:";
! 143: me->if_ok = CONTENT_TYPE;
! 144: me->state = CHECK;
! 145: break;
! 146:
! 147: default:
! 148: goto bad_field_name;
! 149:
! 150: } /* switch on character */
! 151: break;
! 152:
! 153: case CONTENT_TYPE:
! 154: case CONTENT_TRANSFER_ENCODING:
! 155: me->field = me->state; /* remember it */
! 156: me->state = SKIP_GET_VALUE;
! 157: /* Fall through! */
! 158: case SKIP_GET_VALUE:
! 159: if (c == '\n') {
! 160: me->fold_state = me->state;
! 161: me->state = NEWLINE;
! 162: break;
! 163: }
! 164: if (WHITE(c)) break; /* Skip white space */
! 165:
! 166: me->value_pointer = me->value;
! 167: me->state = GET_VALUE;
! 168: /* Fall through to store first character */
! 169:
! 170: case GET_VALUE:
! 171: if (WHITE(c)) { /* End of field */
! 172: *me->value_pointer = 0;
! 173: switch (me->field) {
! 174: case CONTENT_TYPE:
! 175: me->format = HTAtom_for(me->value);
! 176: break;
! 177: case CONTENT_TRANSFER_ENCODING:
! 178: me->encoding = HTAtom_for(me->value);
! 179: break;
! 180: default: /* Should never get here */
! 181: break;
! 182: }
! 183: } else {
! 184: if (me->value_pointer < me->value + VALUE_SIZE - 1) {
! 185: *me->value_pointer++ = c;
! 186: break;
! 187: } else {
! 188: goto value_too_long;
! 189: }
! 190: }
! 191: /* Fall through */
! 192:
! 193: case JUNK_LINE:
! 194: if (c == '\n') {
! 195: me->state = NEWLINE;
! 196: me->fold_state = me->state;
! 197: }
! 198: break;
! 199:
! 200:
! 201: } /* switch on state*/
! 202:
! 203: return;
! 204:
! 205: value_too_long:
! 206: if (TRACE) fprintf(stderr,
! 207: "HTMIME: *** Syntax error. (string too long)\n");
! 208:
! 209: bad_field_name: /* Ignore it */
! 210: me->state = JUNK_LINE;
! 211: return;
! 212:
! 213: }
! 214:
! 215:
! 216:
! 217: /* String handling
! 218: ** ---------------
! 219: **
! 220: ** Strings must be smaller than this buffer size.
! 221: */
! 222: PRIVATE void HTMIME_put_string ARGS2(HTStream *, me, CONST char*, s)
! 223: {
! 224: CONST char * p;
! 225: if (me->state == TRANSPARENT) /* Optimisation */
! 226: (*me->targetClass.put_string)(me->target,s);
! 227: else
! 228: for (p=s; *p; p++) HTMIME_put_character(me, *p);
! 229: }
! 230:
! 231:
! 232: /* Buffer write. Buffers can (and should!) be big.
! 233: ** ------------
! 234: */
! 235: PRIVATE void HTMIME_write ARGS3(HTStream *, me, CONST char*, s, int, l)
! 236: {
! 237: CONST char * p;
! 238: if (me->state == TRANSPARENT) /* Optimisation */
! 239: (*me->targetClass.write)(me->target, s, l);
! 240: else
! 241: for (p=s; p < s+l; p++) HTMIME_put_character(me, *p);
! 242: }
! 243:
! 244:
! 245:
! 246:
! 247: /* Free an HTML object
! 248: ** -------------------
! 249: **
! 250: */
! 251: PRIVATE void HTMIME_free ARGS1(HTStream *, me)
! 252: {
! 253: if (me->target) (*me->targetClass.free)(me->target);
! 254: free(me);
! 255: }
! 256:
! 257: /* End writing
! 258: */
! 259:
! 260: PRIVATE void HTMIME_end_document ARGS1(HTStream *, me)
! 261: {
! 262: (*me->targetClass.end_document)(me->target);
! 263: }
! 264:
! 265:
! 266:
! 267: /* Structured Object Class
! 268: ** -----------------------
! 269: */
! 270: PUBLIC CONST HTStreamClass HTMIME =
! 271: {
! 272: "MIMEParser",
! 273: HTMIME_free,
! 274: HTMIME_end_document,
! 275: HTMIME_put_character, HTMIME_put_string,
! 276: HTMIME_write
! 277: };
! 278:
! 279:
! 280: /* Subclass-specific Methods
! 281: ** -------------------------
! 282: */
! 283:
! 284: PUBLIC HTStream* HTMIMEConvert ARGS3(
! 285: HTPresentation *, pres,
! 286: HTParentAnchor *, anchor,
! 287: HTStream *, sink)
! 288: {
! 289: HTStream* me;
! 290:
! 291: me = malloc(sizeof(*me));
! 292: if (me == NULL) outofmem(__FILE__, "HTML_new");
! 293: me->isa = &HTMIME;
! 294:
! 295: me->sink = sink;
! 296: me->anchor = anchor;
! 297: me->target = NULL;
! 298: me->state = BEGINNING_OF_LINE;
! 299: me->format = WWW_PLAINTEXT;
! 300: me->targetRep = pres->rep_out;
! 301: me->boundary = 0; /* Not set yet */
! 302: return me;
! 303: }
! 304:
! 305:
Webmaster