Annotation of libwww/Library/src/SGML.c, revision 1.42
1.23 frystyk 1: /* SGML.c
2: ** GENERAL SGML PARSER CODE
3: **
1.27 frystyk 4: ** (c) COPYRIGHT MIT 1995.
1.23 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.42 ! frystyk 6: ** @(#) $Id: SGML.c,v 1.41 1996/07/02 22:55:21 frystyk Exp $
1.1 timbl 7: **
1.2 timbl 8: ** This module implements an HTStream object. To parse an
1.1 timbl 9: ** SGML file, create this object which is a parser. The object
1.2 timbl 10: ** is (currently) created by being passed a DTD structure,
11: ** and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 12: **
1.19 duns 13: ** 6 Feb 93 Binary seraches used. Intreface modified.
14: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.42 ! frystyk 15: ** Nov 1996 msa Strip down the parser to minimal HTML tokenizer,
! 16: ** Stop allocating space for the attribute values,
! 17: ** use pointers to the string chunk instead.
1.1 timbl 18: */
19:
1.25 frystyk 20: /* Library include files */
1.38 frystyk 21: #include "sysdep.h"
1.1 timbl 22: #include "HTUtils.h"
1.25 frystyk 23: #include "HTString.h"
1.1 timbl 24: #include "HTChunk.h"
1.20 frystyk 25: #include "SGML.h"
1.1 timbl 26:
1.2 timbl 27: #define INVALID (-1)
28:
1.1 timbl 29: /* The State (context) of the parser
30: **
1.2 timbl 31: ** This is passed with each call to make the parser reentrant
1.1 timbl 32: **
33: */
1.42 ! frystyk 34: typedef enum _sgml_state
! 35: {
! 36: S_text, S_literal, S_tag, S_tag_gap,
! 37: S_attr, S_attr_gap, S_equals, S_value, S_after_open,
! 38: S_nl, S_nl_tago,
! 39: S_ero, S_cro,
1.21 frystyk 40: #ifdef ISO_2022_JP
1.42 ! frystyk 41: S_esc, S_dollar, S_paren, S_nonascii_text,
1.21 frystyk 42: #endif
1.42 ! frystyk 43: S_squoted, S_dquoted, S_end, S_entity, S_junk_tag,
! 44: S_md, S_md_sqs, S_md_dqs, S_com_1, S_com, S_com_2
! 45: } sgml_state;
1.21 frystyk 46:
47:
1.2 timbl 48: /* Internal Context Data Structure
49: ** -------------------------------
50: */
1.42 ! frystyk 51: struct _HTStream
! 52: {
! 53: const HTStreamClass *isa; /* inherited from HTStream */
! 54: const SGML_dtd *dtd;
! 55: HTStructuredClass *actions; /* target class */
! 56: HTStructured *target; /* target object */
1.2 timbl 57:
1.42 ! frystyk 58: HTTag *current_tag;
! 59: int current_attribute_number;
! 60: SGMLContent contents; /* current content mode */
! 61: HTChunk *string;
! 62: int token; /* ptr into string buffer */
! 63: sgml_state state;
! 64: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
! 65: int value[MAX_ATTRIBUTES]; /* Offset pointers to the string */
! 66: };
1.2 timbl 67:
68:
69: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
1.42 ! frystyk 70: #define PUTB(b,l) ((*context->actions->put_block)(context->target, b, l))
1.2 timbl 71:
1.42 ! frystyk 72: #define TRACE1(f,a) \
! 73: do {if (SGML_TRACE) HTTrace("SGML Parser. " f,a); } while(0)
! 74: #define TRACE2(f,a,b) \
! 75: do {if (SGML_TRACE) HTTrace("SGML Parser. " f,a,b); } while(0)
1.1 timbl 76:
1.17 timbl 77: /* Find Attribute Number
78: ** ---------------------
79: */
1.40 frystyk 80: PRIVATE int SGMLFindAttribute (HTTag* tag, const char * s)
1.42 ! frystyk 81: {
! 82: attr* attributes = tag->attributes;
1.17 timbl 83:
1.42 ! frystyk 84: int high, low, i, diff; /* Binary search for attribute name */
! 85: for(low=0, high=tag->number_of_attributes;
! 86: high > low ;
! 87: diff < 0 ? (low = i+1) : (high = i) )
! 88: {
! 89: i = (low + (high-low)/2);
! 90: diff = strcasecomp(attributes[i].name, s);
! 91: if (diff==0)
! 92: return i; /* success: found it */
! 93: }
! 94: return -1;
! 95: }
1.17 timbl 96:
1.1 timbl 97:
98: /* Handle Attribute
99: ** ----------------
100: */
1.38 frystyk 101: /* PUBLIC const char * SGML_default = ""; ?? */
1.1 timbl 102:
1.38 frystyk 103: PRIVATE void handle_attribute_name (HTStream * context, const char * s)
1.42 ! frystyk 104: {
! 105: HTTag * tag = context->current_tag;
1.2 timbl 106:
1.42 ! frystyk 107: /* Note: if tag==NULL, we are skipping unknown tag... */
! 108: if (tag)
! 109: {
! 110: int i = SGMLFindAttribute(tag, s);
! 111: if (i >= 0)
! 112: {
! 113: context->current_attribute_number = i;
! 114: context->present[i] = YES;
! 115: return;
! 116: }
! 117: TRACE2("Unknown attribute %s for tag %s\n",
! 118: s, context->current_tag->name);
! 119: }
! 120: context->current_attribute_number = INVALID; /* Invalid */
! 121: }
1.2 timbl 122:
1.1 timbl 123:
124: /* Handle attribute value
125: ** ----------------------
126: */
1.42 ! frystyk 127: PRIVATE void handle_attribute_value (HTStream * context)
! 128: {
! 129: /* Deal with attributes only if tag is known,
! 130: ignore silently otherwise */
! 131:
! 132: if (context->current_tag)
! 133: {
! 134: if (context->current_attribute_number != INVALID)
! 135: context->value[context->current_attribute_number] =
! 136: context->token;
! 137: else
! 138: TRACE1("Attribute value %s ignored\n",
! 139: context->string->data + context->token);
! 140:
! 141: }
! 142: context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 143: }
144:
145: /* Handle entity
146: ** -------------
147: **
148: ** On entry,
149: ** s contains the entity name zero terminated
150: */
1.42 ! frystyk 151: PRIVATE void handle_entity (HTStream * context)
1.1 timbl 152: {
1.42 ! frystyk 153: const char ** entities = context->dtd->entity_names;
! 154: const char *s = context->string->data;
1.1 timbl 155:
1.42 ! frystyk 156: int high, low, i, diff;
! 157: for(low=0, high = context->dtd->number_of_entities;
! 158: high > low ;
! 159: diff < 0 ? (low = i+1) : (high = i))
! 160: {
! 161: i = (low + (high-low)/2);
! 162: diff = strcmp(entities[i], s); /* Case sensitive! */
! 163: if (diff==0)
! 164: { /* success: found it */
! 165: (*context->actions->put_entity)(context->target, i);
! 166: return;
! 167: }
! 168: }
! 169: /* If entity string not found, display as text */
! 170: TRACE1("Unknown entity %s\n", s);
! 171: PUTC('&');
! 172: {
! 173: const char *p;
! 174: for (p=s; *p; p++)
! 175: PUTC(*p);
! 176: }
1.35 frystyk 177: }
1.2 timbl 178:
1.1 timbl 179: /* End element
1.2 timbl 180: ** -----------
1.1 timbl 181: */
1.42 ! frystyk 182: PRIVATE void end_element (HTStream * context, HTTag *tag)
! 183: {
! 184: TRACE1("End </%s>\n", tag->name);
! 185: (*context->actions->end_element)
! 186: (context->target, tag - context->dtd->tags);
1.1 timbl 187: }
188:
1.17 timbl 189: /* Start an element
190: ** ----------------
1.1 timbl 191: */
1.31 frystyk 192: PRIVATE void start_element (HTStream * context)
1.42 ! frystyk 193: {
! 194: int i;
! 195: char *value[MAX_ATTRIBUTES];
! 196: HTTag *tag = context->current_tag;
! 197:
! 198: TRACE1("Start <%s>\n", tag->name);
! 199: context->contents = tag->contents;
! 200:
! 201: /*
! 202: ** Build the actual pointers to the value strings stored in the
! 203: ** chunk buffer. (Must use offsets while collecting the values,
! 204: ** because the string chunk may get resized during the collection
! 205: ** and potentially relocated).
! 206: */
! 207: for (i = 0; i < MAX_ATTRIBUTES; ++i)
! 208: value[i] = context->value[i] < 0 ? NULL :
! 209: context->string->data + context->value[i];
! 210: (*context->actions->start_element)
! 211: (context->target,
! 212: tag - context->dtd->tags,
! 213: context->present,
! 214: (const char**)value); /* coerce type for think c */
1.1 timbl 215: }
216:
217:
1.2 timbl 218: /* Find Tag in DTD tag list
219: ** ------------------------
1.1 timbl 220: **
221: ** On entry,
1.2 timbl 222: ** dtd points to dtd structire including valid tag list
223: ** string points to name of tag in question
1.1 timbl 224: **
1.2 timbl 225: ** On exit,
226: ** returns:
1.7 timbl 227: ** NULL tag not found
228: ** else address of tag structure in dtd
1.2 timbl 229: */
1.40 frystyk 230: PRIVATE HTTag * SGMLFindTag (const SGML_dtd* dtd, const char * string)
1.42 ! frystyk 231: {
! 232: int high, low, i, diff;
! 233: for(low=0, high=dtd->number_of_tags;
! 234: high > low ;
! 235: diff < 0 ? (low = i+1) : (high = i))
! 236: { /* Binary serach */
! 237: i = (low + (high-low)/2);
! 238: diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
! 239: if (diff==0)
! 240: /* success: found it */
! 241: return &dtd->tags[i];
! 242: }
! 243: return NULL;
1.2 timbl 244: }
245:
246: /*________________________________________________________________________
247: ** Public Methods
1.1 timbl 248: */
249:
1.2 timbl 250:
251: /* Could check that we are back to bottom of stack! @@ */
1.40 frystyk 252: PRIVATE int SGML_flush (HTStream * context)
1.42 ! frystyk 253: {
! 254: return (*context->actions->flush)(context->target);
1.26 frystyk 255: }
1.1 timbl 256:
1.40 frystyk 257: PRIVATE int SGML_free (HTStream * context)
1.42 ! frystyk 258: {
! 259: int status;
1.15 frystyk 260:
1.42 ! frystyk 261: if ((status = (*context->actions->_free)(context->target)) != HT_OK)
! 262: return status;
! 263: HTChunk_delete(context->string);
! 264: HT_FREE(context);
! 265: return HT_OK;
1.15 frystyk 266: }
1.1 timbl 267:
1.40 frystyk 268: PRIVATE int SGML_abort (HTStream * context, HTList * e)
1.42 ! frystyk 269: {
! 270: (*context->actions->abort)(context->target, e);
! 271: HTChunk_delete(context->string);
! 272: HT_FREE(context);
! 273: return HT_ERROR;
1.15 frystyk 274: }
1.1 timbl 275:
1.41 frystyk 276: PRIVATE int SGML_write (HTStream * context, const char * b, int l)
1.42 ! frystyk 277: {
! 278: const SGML_dtd *dtd = context->dtd;
! 279: HTChunk *string = context->string;
! 280: const char *text = b;
! 281: int count = 0;
1.18 timbl 282:
1.42 ! frystyk 283: while (l-- > 0)
! 284: {
! 285: char c = *b++;
! 286: switch(context->state)
! 287: {
! 288: got_element_open:
! 289: /*
! 290: ** The label is jumped when the '>' of a the element
! 291: ** start tag has been detected. This DOES NOT FALL TO
! 292: ** THE CODE S_after_open, only processes the tag and
! 293: ** sets the state (c should still contain the
! 294: ** terminating character of the tag ('>'))
! 295: */
! 296: if (context->current_tag && context->current_tag->name)
! 297: start_element(context);
! 298: context->state = S_after_open;
! 299: break;
1.18 timbl 300:
1.42 ! frystyk 301: case S_after_open:
! 302: /*
! 303: ** State S_after_open is entered only for single
! 304: ** character after the element opening tag to test
! 305: ** against newline. Strip one trainling newline only
! 306: ** after opening nonempty element. - SGML: Ugh!
! 307: */
! 308: text = b;
! 309: count = 0;
! 310: if (c == '\n' && (context->contents != SGML_EMPTY))
! 311: {
! 312: context->state = S_text;
! 313: break;
! 314: }
! 315: --text;
! 316: goto S_text;
! 317:
! 318: S_text:
! 319: context->state = S_text;
! 320: case S_text:
1.13 timbl 321: #ifdef ISO_2022_JP
1.42 ! frystyk 322: if (c == '\033')
! 323: {
! 324: context->state = S_esc;
! 325: ++count;
! 326: break;
! 327: }
1.13 timbl 328: #endif /* ISO_2022_JP */
1.42 ! frystyk 329: if (c == '&')
! 330: {
! 331: if (count > 0)
! 332: PUTB(text, count);
! 333: count = 0;
! 334: string->size = 0;
! 335: context->state = S_ero;
! 336: }
! 337: else if (c == '<')
! 338: {
! 339: if (count > 0)
! 340: PUTB(text, count);
! 341: count = 0;
! 342: string->size = 0;
! 343: /* should scrap LITERAL, and use CDATA and
! 344: RCDATA -- msa */
! 345: context->state =
! 346: (context->contents == SGML_LITERAL) ?
! 347: S_literal : S_tag;
! 348: }
! 349: else if (c == '\n')
! 350: /* Newline - ignore if before end tag! */
! 351: context->state = S_nl;
! 352: else
! 353: ++count;
! 354: break;
1.13 timbl 355:
1.42 ! frystyk 356: case S_nl:
! 357: if (c == '<')
! 358: {
! 359: if (count > 0)
! 360: PUTB(text, count);
! 361: count = 0;
! 362: string->size = 0;
! 363: context->state =
! 364: (context->contents == SGML_LITERAL) ?
! 365: S_literal : S_nl_tago;
! 366: }
! 367: else
! 368: {
! 369: ++count;
! 370: goto S_text;
! 371: }
! 372: break;
1.18 timbl 373:
1.42 ! frystyk 374: case S_nl_tago: /* Had newline and tag opener */
! 375: if (c != '/')
! 376: PUTC('\n'); /* Only ignore newline before </ */
! 377: context->state = S_tag;
! 378: goto handle_S_tag;
1.18 timbl 379:
1.13 timbl 380: #ifdef ISO_2022_JP
1.42 ! frystyk 381: case S_esc:
! 382: if (c=='$')
! 383: context->state = S_dollar;
! 384: else if (c=='(')
! 385: context->state = S_paren;
! 386: else
! 387: context->state = S_text;
! 388: ++count;
! 389: break;
! 390:
! 391: case S_dollar:
! 392: if (c=='@' || c=='B')
! 393: context->state = S_nonascii_text;
! 394: else
! 395: context->state = S_text;
! 396: ++count;
! 397: break;
! 398:
! 399: case S_paren:
! 400: if (c=='B' || c=='J')
! 401: context->state = S_text;
! 402: else
! 403: context->state = S_text;
! 404: ++count;
! 405: break;
! 406:
! 407: case S_nonascii_text:
! 408: if (c == '\033')
! 409: context->state = S_esc;
! 410: ++count;
! 411: break;
1.13 timbl 412: #endif /* ISO_2022_JP */
1.1 timbl 413:
1.42 ! frystyk 414: /* In literal mode, waits only for specific end tag!
! 415: ** Only foir compatibility with old servers.
! 416: */
! 417: case S_literal:
! 418: HTChunk_putc(string, c);
! 419: if ( TOUPPER(c) !=
! 420: ((string->size == 1) ? '/'
! 421: : context->current_tag->name[string->size-2]))
! 422: {
1.1 timbl 423:
1.42 ! frystyk 424: /* If complete match, end literal */
! 425: if ((c == '>') &&
! 426: (!context->current_tag->name[string->size-2]))
! 427: {
! 428: end_element
! 429: (context,context->current_tag);
! 430: /*
! 431: ...setting SGML_MIXED below is a
! 432: bit of kludge, but a good guess that
! 433: currently works, anything other than
! 434: SGML_LITERAL would work... -- msa */
! 435: context->contents = SGML_MIXED;
! 436: }
! 437: else
! 438: {
! 439: /* If Mismatch: recover string. */
! 440: PUTC( '<');
! 441: PUTB(string->data, string->size);
! 442: }
! 443: context->state = S_text;
! 444: text = b;
! 445: count = 0;
! 446: }
! 447: break;
1.1 timbl 448:
1.42 ! frystyk 449: /*
! 450: ** Character reference or Entity
! 451: */
! 452: case S_ero:
! 453: if (c == '#')
! 454: {
! 455: /* &# is Char Ref Open */
! 456: context->state = S_cro;
! 457: break;
! 458: }
! 459: context->state = S_entity;
1.1 timbl 460:
1.42 ! frystyk 461: /** FALL THROUGH TO S_entity !! ***/
1.18 timbl 462:
1.42 ! frystyk 463: /*
! 464: ** Handle Entities
! 465: */
! 466: case S_entity:
! 467: if (isalnum(c))
! 468: HTChunk_putc(string, c);
! 469: else
! 470: {
! 471: HTChunk_terminate(string);
! 472: handle_entity(context);
! 473: text = b;
! 474: count = 0;
! 475: if (c != ';')
! 476: {
! 477: --text;
! 478: goto S_text;
! 479: }
! 480: context->state = S_text;
! 481: }
! 482: break;
1.2 timbl 483:
1.42 ! frystyk 484: /* Character reference
! 485: */
! 486: case S_cro:
! 487: if (isalnum(c))
! 488: /* accumulate a character NUMBER */
! 489: HTChunk_putc(string, c);
! 490: else
! 491: {
! 492: int value;
! 493: HTChunk_terminate(string);
! 494: if (sscanf(string->data, "%d", &value)==1)
! 495: PUTC((char)value);
! 496: else
! 497: {
! 498: PUTB("&#", 2);
! 499: PUTB(string->data, string->size-1);
! 500: }
! 501: text = b;
! 502: count = 0;
! 503: if (c != ';')
! 504: {
! 505: --text;
! 506: goto S_text;
! 507: }
! 508: context->state = S_text;
! 509: }
! 510: break;
1.1 timbl 511:
1.42 ! frystyk 512: case S_tag: /* new tag */
! 513: handle_S_tag:
! 514: if (isalnum(c))
! 515: HTChunk_putc(string, c);
! 516: else
! 517: { /* End of tag name */
! 518: int i;
1.1 timbl 519:
1.42 ! frystyk 520: if (c == '/')
! 521: {
! 522: if (string->size != 0)
! 523: TRACE1("`<%s/' found!\n",
! 524: string->data);
! 525: context->state = S_end;
! 526: break;
! 527: }
! 528: else if (c == '!')
! 529: {
! 530: if (string->size != 0)
! 531: TRACE1(" `<%s!' found!\n",
! 532: string->data);
! 533: context->state = S_md;
! 534: break;
! 535: }
! 536: HTChunk_terminate(string);
! 537: context->current_tag = SGMLFindTag(dtd, string->data);
! 538: if (context->current_tag == NULL)
! 539: TRACE1("*** Unknown element %s\n",
! 540: string->data);
! 541: else for (i=0;
! 542: i < context->current_tag->number_of_attributes; i++)
! 543: {
! 544: context->present[i] = NO;
! 545: context->value[i] = -1;
! 546: }
! 547: context->token = string->size = 0;
! 548: context->current_attribute_number = INVALID;
! 549: goto S_tag_gap;
! 550: }
! 551: break;
! 552:
! 553: S_tag_gap:
! 554: context->state = S_tag_gap;
! 555: case S_tag_gap: /* Expecting attribute or > */
! 556: if (WHITE(c))
! 557: break; /* Gap between attributes */
! 558:
! 559: if (c == '>')
! 560: goto got_element_open;
! 561: else
! 562: goto S_attr;
! 563:
! 564: S_attr:
! 565: /*
! 566: ** Start collecting the attribute name and collect
! 567: ** it in S_attr.
! 568: */
! 569: context->state = S_attr;
! 570: string->size = context->token;
! 571: case S_attr:
! 572: if (WHITE(c) || c == '>' || c == '=')
! 573: goto got_attribute_name;
! 574: else
! 575: HTChunk_putc(string, c);
! 576: break;
! 577:
! 578: got_attribute_name:
! 579: /*
! 580: ** This label is entered when attribute name has been
! 581: ** collected. Process it and enter S_attr_gap for
! 582: ** potential value or start of the next attribute.
! 583: */
! 584: HTChunk_terminate(string) ;
! 585: handle_attribute_name
! 586: (context, string->data + context->token);
! 587: string->size = context->token;
! 588: context->state = S_attr_gap;
! 589: case S_attr_gap: /* Expecting attribute or = or > */
! 590: if (WHITE(c))
! 591: break; /* Gap after attribute */
! 592:
! 593: if (c == '>')
! 594: goto got_element_open;
! 595: else if (c == '=')
! 596: context->state = S_equals;
! 597: else
! 598: goto S_attr; /* Get next attribute */
! 599: break;
! 600:
! 601: case S_equals: /* After attr = */
! 602: if (WHITE(c))
! 603: break; /* Before attribute value */
! 604:
! 605: if (c == '>')
! 606: { /* End of tag */
! 607: TRACE1("found = but no value\n", NULL);
! 608: goto got_element_open;
! 609: }
! 610: else if (c == '\'')
! 611: context->state = S_squoted;
! 612: else if (c == '"')
! 613: context->state = S_dquoted;
! 614: else
! 615: goto S_value;
! 616: break;
! 617:
! 618: S_value:
! 619: context->state = S_value;
! 620: string->size = context->token;
! 621: case S_value:
! 622: if (WHITE(c) || c == '>')
! 623: {
! 624: HTChunk_terminate(string);
! 625: handle_attribute_value(context);
! 626: context->token = string->size;
! 627: goto S_tag_gap;
! 628: }
! 629: else
! 630: HTChunk_putc(string, c);
! 631: break;
1.1 timbl 632:
1.42 ! frystyk 633: case S_squoted: /* Quoted attribute value */
! 634: if (c == '\'')
! 635: {
! 636: HTChunk_terminate(string);
! 637: handle_attribute_value(context);
! 638: context->token = string->size;
! 639: context->state = S_tag_gap;
! 640: }
! 641: else if (c && c != '\n' && c != '\r')
! 642: HTChunk_putc(string, c);
! 643: break;
1.1 timbl 644:
1.42 ! frystyk 645: case S_dquoted: /* Quoted attribute value */
! 646: if (c == '"')
! 647: {
! 648: HTChunk_terminate(string);
! 649: handle_attribute_value(context);
! 650: context->token = string->size;
! 651: context->state = S_tag_gap;
! 652: }
! 653: else if (c && c != '\n' && c != '\r')
! 654: HTChunk_putc(string, c);
! 655: break;
1.2 timbl 656:
1.42 ! frystyk 657: case S_end: /* </ */
! 658: if (isalnum(c))
! 659: HTChunk_putc(string, c);
! 660: else
! 661: { /* End of end tag name */
! 662: HTTag *t;
! 663:
! 664: HTChunk_terminate(string);
! 665: if (*string->data)
! 666: t = SGMLFindTag(dtd, string->data);
! 667: else
! 668: /* Empty end tag */
! 669: /* Original code popped here one
! 670: from the stack. If this feature
! 671: is required, I have to put the
! 672: stack back... -- msa */
! 673: t = NULL;
! 674: if (!t)
! 675: TRACE1("Unknown end tag </%s>\n",
! 676: string->data);
! 677: else
! 678: {
! 679: context->current_tag = NULL;
! 680: end_element(context, t);
! 681: }
! 682: string->size = 0;
! 683: context->current_attribute_number = INVALID;
! 684: if (c != '>')
! 685: {
! 686: if (!WHITE(c))
! 687: TRACE2("`</%s%c' found!\n",
! 688: string->data, c);
! 689: context->state = S_junk_tag;
! 690: }
! 691: else
! 692: {
! 693: text = b;
! 694: count = 0;
! 695: context->state = S_text;
! 696: }
! 697: }
! 698: break;
! 699:
! 700: S_junk_tag:
! 701: context->state = S_junk_tag;
! 702: case S_junk_tag:
! 703: if (c == '>')
! 704: {
! 705: text = b;
! 706: count = 0;
! 707: context->state = S_text;
! 708: }
! 709: break;
! 710:
! 711: /*
! 712: ** Scanning (actually skipping) declarations
! 713: */
! 714: case S_md:
! 715: if (c == '-')
! 716: context->state = S_com_1;
! 717: else if (c == '"')
! 718: context->state = S_md_dqs;
! 719: else if (c == '\'')
! 720: context->state = S_md_sqs;
! 721: else if (c == '>')
! 722: {
! 723: text = b;
! 724: count = 0;
! 725: context->state = S_text;
! 726: }
! 727: break;
! 728:
! 729: case S_md_dqs: /* Skip double quoted string */
! 730: if (c == '"')
! 731: context->state = S_md;
! 732: break;
! 733:
! 734: case S_md_sqs: /* Skip single quoted string */
! 735: if (c == '\'')
! 736: context->state = S_md;
! 737: break;
! 738:
! 739: case S_com_1: /* Starting a comment? */
! 740: context->state = (c == '-') ? S_com : S_md;
! 741: break;
! 742:
! 743: case S_com: /* ..within comment */
! 744: if (c == '-')
! 745: context->state = S_com_2;
! 746: break;
! 747:
! 748: case S_com_2: /* Ending a comment ? */
! 749: context->state = (c == '-') ? S_md : S_com;
! 750: break;
! 751: }
1.7 timbl 752: }
1.42 ! frystyk 753: if (count > 0)
! 754: PUTB(text, count);
! 755: return HT_OK;
! 756: }
1.1 timbl 757:
1.2 timbl 758:
1.40 frystyk 759: PRIVATE int SGML_string (HTStream * context, const char* s)
1.42 ! frystyk 760: {
! 761: return SGML_write(context, s, (int) strlen(s));
! 762: }
1.2 timbl 763:
764:
1.41 frystyk 765: PRIVATE int SGML_character (HTStream * context, char c)
1.42 ! frystyk 766: {
! 767: return SGML_write(context, &c, 1);
! 768: }
1.2 timbl 769:
770: /*_______________________________________________________________________
771: */
772:
773: /* Structured Object Class
774: ** -----------------------
775: */
1.38 frystyk 776: PRIVATE const HTStreamClass SGMLParser =
1.42 ! frystyk 777: {
! 778: "SGMLParser",
! 779: SGML_flush,
! 780: SGML_free,
! 781: SGML_abort,
! 782: SGML_character,
! 783: SGML_string,
! 784: SGML_write,
! 785: };
1.2 timbl 786:
787: /* Create SGML Engine
788: ** ------------------
789: **
790: ** On entry,
791: ** dtd represents the DTD, along with
792: ** actions is the sink for the data as a set of routines.
793: **
794: */
1.42 ! frystyk 795: PUBLIC HTStream *SGML_new(const SGML_dtd * dtd, HTStructured * target)
! 796: {
! 797: int i;
! 798: HTStream* context;
! 799: if ((context = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
! 800: HT_OUTOFMEM("SGML_begin");
1.2 timbl 801:
1.42 ! frystyk 802: context->isa = &SGMLParser;
! 803: context->string = HTChunk_new(128); /* Grow by this much */
! 804: context->dtd = dtd;
! 805: context->target = target;
! 806: context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
! 807: /* Ugh: no OO */
! 808: context->state = S_text;
! 809: for(i=0; i<MAX_ATTRIBUTES; i++)
! 810: context->value[i] = 0;
! 811: return context;
! 812: }
Webmaster