Annotation of libwww/Library/src/SGML.c, revision 1.40
1.23 frystyk 1: /* SGML.c
2: ** GENERAL SGML PARSER CODE
3: **
1.27 frystyk 4: ** (c) COPYRIGHT MIT 1995.
1.23 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.40 ! frystyk 6: ** @(#) $Id: SGML.c,v 1.39 1996/04/12 17:49:57 frystyk Exp $
1.1 timbl 7: **
1.2 timbl 8: ** This module implements an HTStream object. To parse an
1.1 timbl 9: ** SGML file, create this object which is a parser. The object
1.2 timbl 10: ** is (currently) created by being passed a DTD structure,
11: ** and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 12: **
1.19 duns 13: ** 6 Feb 93 Binary seraches used. Intreface modified.
14: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.1 timbl 15: */
16:
1.25 frystyk 17: /* Library include files */
1.38 frystyk 18: #include "sysdep.h"
1.1 timbl 19: #include "HTUtils.h"
1.25 frystyk 20: #include "HTString.h"
1.1 timbl 21: #include "HTChunk.h"
1.20 frystyk 22: #include "SGML.h"
1.1 timbl 23:
1.2 timbl 24: #define INVALID (-1)
25:
1.1 timbl 26: /* The State (context) of the parser
27: **
1.2 timbl 28: ** This is passed with each call to make the parser reentrant
1.1 timbl 29: **
30: */
31:
1.16 frystyk 32:
1.2 timbl 33:
34:
35: /* Element Stack
36: ** -------------
37: ** This allows us to return down the stack reselcting styles.
38: ** As we return, attribute values will be garbage in general.
39: */
40: typedef struct _HTElement HTElement;
41: struct _HTElement {
42: HTElement * next; /* Previously nested element or 0 */
43: HTTag* tag; /* The tag at this level */
44: };
45:
46:
1.21 frystyk 47: typedef enum _sgml_state {
48: S_text, S_literal, S_tag, S_tag_gap,
49: S_attr, S_attr_gap, S_equals, S_value, S_after_open,
50: S_nl, S_nl_tago,
51: S_ero, S_cro,
52: #ifdef ISO_2022_JP
53: S_esc, S_dollar, S_paren, S_nonascii_text,
54: #endif
55: S_squoted, S_dquoted, S_end, S_entity, S_junk_tag
56: } sgml_state;
57:
58:
1.2 timbl 59: /* Internal Context Data Structure
60: ** -------------------------------
61: */
62: struct _HTStream {
63:
1.38 frystyk 64: const HTStreamClass * isa; /* inherited from HTStream */
1.2 timbl 65:
1.38 frystyk 66: const SGML_dtd *dtd;
1.2 timbl 67: HTStructuredClass *actions; /* target class */
68: HTStructured *target; /* target object */
69:
1.1 timbl 70: HTTag *current_tag;
1.2 timbl 71: int current_attribute_number;
1.1 timbl 72: HTChunk *string;
73: HTElement *element_stack;
1.21 frystyk 74: sgml_state state;
1.2 timbl 75: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
76: char * value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
77: } ;
78:
79:
80: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
81:
1.1 timbl 82:
1.17 timbl 83: /* Find Attribute Number
84: ** ---------------------
85: */
86:
1.40 ! frystyk 87: PRIVATE int SGMLFindAttribute (HTTag* tag, const char * s)
1.17 timbl 88: {
89: attr* attributes = tag->attributes;
90:
91: int high, low, i, diff; /* Binary search for attribute name */
92: for(low=0, high=tag->number_of_attributes;
93: high > low ;
94: diff < 0 ? (low = i+1) : (high = i) ) {
95: i = (low + (high-low)/2);
96: diff = strcasecomp(attributes[i].name, s);
97: if (diff==0) return i; /* success: found it */
98: } /* for */
99:
100: return -1;
101: }
102:
1.1 timbl 103:
104: /* Handle Attribute
105: ** ----------------
106: */
1.38 frystyk 107: /* PUBLIC const char * SGML_default = ""; ?? */
1.1 timbl 108:
1.38 frystyk 109: PRIVATE void handle_attribute_name (HTStream * context, const char * s)
1.1 timbl 110: {
1.2 timbl 111:
112: HTTag * tag = context->current_tag;
113:
1.17 timbl 114: int i = SGMLFindAttribute(tag, s);
115: if (i>=0) {
116: context->current_attribute_number = i;
117: context->present[i] = YES;
118: if (context->value[i]) {
1.36 frystyk 119: HT_FREE(context->value[i]);
1.17 timbl 120: context->value[i] = NULL;
121: }
122: return;
123: } /* if */
1.2 timbl 124:
1.20 frystyk 125: if (SGML_TRACE)
1.37 eric 126: HTTrace("SGML: Unknown attribute %s for tag %s\n",
1.2 timbl 127: s, context->current_tag->name);
128: context->current_attribute_number = INVALID; /* Invalid */
1.1 timbl 129: }
130:
131:
132: /* Handle attribute value
133: ** ----------------------
134: */
1.38 frystyk 135: PRIVATE void handle_attribute_value (HTStream * context, const char * s)
1.1 timbl 136: {
1.2 timbl 137: if (context->current_attribute_number != INVALID) {
138: StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 139: } else {
1.37 eric 140: if (SGML_TRACE) HTTrace("SGML: Attribute value %s ignored\n", s);
1.1 timbl 141: }
1.2 timbl 142: context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 143: }
144:
1.2 timbl 145:
1.1 timbl 146: /* Handle entity
147: ** -------------
148: **
149: ** On entry,
150: ** s contains the entity name zero terminated
151: ** Bugs:
152: ** If the entity name is unknown, the terminator is treated as
153: ** a printable non-special character in all cases, even if it is '<'
154: */
1.31 frystyk 155: PRIVATE void handle_entity (HTStream * context, char term)
1.1 timbl 156: {
1.2 timbl 157:
1.38 frystyk 158: const char ** entities = context->dtd->entity_names;
159: const char *s = context->string->data;
1.2 timbl 160:
161: int high, low, i, diff;
162: for(low=0, high = context->dtd->number_of_entities;
163: high > low ;
164: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
165: i = (low + (high-low)/2);
166: diff = strcmp(entities[i], s); /* Csse sensitive! */
167: if (diff==0) { /* success: found it */
168: (*context->actions->put_entity)(context->target, i);
169: return;
1.1 timbl 170: }
171: }
172: /* If entity string not found, display as text */
1.20 frystyk 173: if (SGML_TRACE)
1.37 eric 174: HTTrace("SGML: Unknown entity %s\n", s);
1.2 timbl 175: PUTC('&');
1.1 timbl 176: {
1.38 frystyk 177: const char *p;
1.1 timbl 178: for (p=s; *p; p++) {
1.2 timbl 179: PUTC(*p);
1.1 timbl 180: }
181: }
1.2 timbl 182: PUTC(term);
1.1 timbl 183: }
184:
1.35 frystyk 185: /*
186: ** Helper function to check if the tag is on the stack
187: */
188: PRIVATE BOOL lookup_element_stack (HTElement* stack, HTTag *tag)
189: {
190: HTElement* elem;
191: for (elem = stack; elem != NULL; elem = elem->next)
192: {
193: if (elem->tag == tag) return YES;
194: }
195: return NO;
196: }
1.2 timbl 197:
1.1 timbl 198: /* End element
1.2 timbl 199: ** -----------
1.1 timbl 200: */
1.31 frystyk 201: PRIVATE void end_element (HTStream * context, HTTag * old_tag)
1.1 timbl 202: {
1.37 eric 203: if (SGML_TRACE) HTTrace("SGML: End </%s>\n", old_tag->name);
1.2 timbl 204: if (old_tag->contents == SGML_EMPTY) {
1.37 eric 205: if (SGML_TRACE) HTTrace("SGML: Illegal end tag </%s> found.\n",
1.1 timbl 206: old_tag->name);
207: return;
208: }
209: while (context->element_stack) {/* Loop is error path only */
210: HTElement * N = context->element_stack;
211: HTTag * t = N->tag;
212:
213: if (old_tag != t) { /* Mismatch: syntax error */
1.35 frystyk 214: /*
215: ** Patch from Maciej Puzio, puzio@laser.mimuw.edu.pl
216: ** See explanation in ../User/Patch/lib_4.0_1.fix
217: */
218: if (context->element_stack->next /* This is not the last level */
219: && lookup_element_stack(context->element_stack, old_tag)) {
1.37 eric 220: if (SGML_TRACE) HTTrace(
1.1 timbl 221: "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
222: old_tag->name, t->name, t->name);
223: } else { /* last level */
1.37 eric 224: if (SGML_TRACE) HTTrace(
1.1 timbl 225: "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
226: old_tag->name, t->name, old_tag->name);
227: return; /* Ignore */
228: }
229: }
230:
231: context->element_stack = N->next; /* Remove from stack */
1.36 frystyk 232: HT_FREE(N);
1.2 timbl 233: (*context->actions->end_element)(context->target,
234: t - context->dtd->tags);
1.1 timbl 235: if (old_tag == t) return; /* Correct sequence */
236:
237: /* Syntax error path only */
238:
239: }
1.37 eric 240: if (SGML_TRACE) HTTrace(
1.1 timbl 241: "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
242: }
243:
244:
1.17 timbl 245: /* Start an element
246: ** ----------------
1.1 timbl 247: */
1.31 frystyk 248: PRIVATE void start_element (HTStream * context)
1.1 timbl 249: {
250: HTTag * new_tag = context->current_tag;
251:
1.37 eric 252: if (SGML_TRACE) HTTrace("SGML: Start <%s>\n", new_tag->name);
1.2 timbl 253: (*context->actions->start_element)(
254: context->target,
255: new_tag - context->dtd->tags,
256: context->present,
1.38 frystyk 257: (const char**) context->value); /* coerce type for think c */
1.2 timbl 258: if (new_tag->contents != SGML_EMPTY) { /* i.e. tag not empty */
1.36 frystyk 259: HTElement * N;
260: if ((N = (HTElement *) HT_MALLOC(sizeof(HTElement))) == NULL)
261: HT_OUTOFMEM("start_element");
1.1 timbl 262: N->next = context->element_stack;
263: N->tag = new_tag;
264: context->element_stack = N;
265: }
266: }
267:
268:
1.2 timbl 269: /* Find Tag in DTD tag list
270: ** ------------------------
1.1 timbl 271: **
272: ** On entry,
1.2 timbl 273: ** dtd points to dtd structire including valid tag list
274: ** string points to name of tag in question
1.1 timbl 275: **
1.2 timbl 276: ** On exit,
277: ** returns:
1.7 timbl 278: ** NULL tag not found
279: ** else address of tag structure in dtd
1.2 timbl 280: */
1.40 ! frystyk 281: PRIVATE HTTag * SGMLFindTag (const SGML_dtd* dtd, const char * string)
1.2 timbl 282: {
283: int high, low, i, diff;
284: for(low=0, high=dtd->number_of_tags;
285: high > low ;
286: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
287: i = (low + (high-low)/2);
1.3 timbl 288: diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 289: if (diff==0) { /* success: found it */
1.7 timbl 290: return &dtd->tags[i];
1.2 timbl 291: }
292: }
1.7 timbl 293: return NULL;
1.2 timbl 294: }
295:
296: /*________________________________________________________________________
297: ** Public Methods
1.1 timbl 298: */
299:
1.2 timbl 300:
301: /* Could check that we are back to bottom of stack! @@ */
1.40 ! frystyk 302: PRIVATE int SGML_flush (HTStream * context)
1.26 frystyk 303: {
304: while (context->element_stack) {
305: HTElement *ptr = context->element_stack;
306: if (SGML_TRACE)
1.37 eric 307: HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 308: context->element_stack->tag->name);
309: context->element_stack = ptr->next;
1.36 frystyk 310: HT_FREE(ptr);
1.26 frystyk 311: }
312: return (*context->actions->flush)(context->target);
313: }
1.1 timbl 314:
1.40 ! frystyk 315: PRIVATE int SGML_free (HTStream * context)
1.8 timbl 316: {
1.26 frystyk 317: int status;
1.14 frystyk 318: int cnt;
1.15 frystyk 319: while (context->element_stack) { /* Make sure, that all tags are gone */
320: HTElement *ptr = context->element_stack;
321:
1.26 frystyk 322: if (SGML_TRACE)
1.37 eric 323: HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 324: context->element_stack->tag->name);
1.15 frystyk 325: context->element_stack = ptr->next;
1.36 frystyk 326: HT_FREE(ptr);
1.15 frystyk 327: }
1.26 frystyk 328: if ((status = (*context->actions->_free)(context->target)) != HT_OK)
329: return status;
1.33 frystyk 330: HTChunk_delete(context->string);
1.15 frystyk 331: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
1.14 frystyk 332: if(context->value[cnt])
1.36 frystyk 333: HT_FREE(context->value[cnt]);
334: HT_FREE(context);
1.26 frystyk 335: return HT_OK;
1.1 timbl 336: }
337:
1.40 ! frystyk 338: PRIVATE int SGML_abort (HTStream * context, HTList * e)
1.1 timbl 339: {
1.14 frystyk 340: int cnt;
1.15 frystyk 341: while (context->element_stack) { /* Make sure, that all tags are gone */
342: HTElement *ptr = context->element_stack;
1.26 frystyk 343: if (SGML_TRACE)
1.37 eric 344: HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 345: context->element_stack->tag->name);
1.15 frystyk 346: context->element_stack = ptr->next;
1.36 frystyk 347: HT_FREE(ptr);
1.15 frystyk 348: }
1.8 timbl 349: (*context->actions->abort)(context->target, e);
1.33 frystyk 350: HTChunk_delete(context->string);
1.14 frystyk 351: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
352: if(context->value[cnt])
1.36 frystyk 353: HT_FREE(context->value[cnt]);
354: HT_FREE(context);
1.26 frystyk 355: return HT_ERROR;
1.1 timbl 356: }
357:
1.40 ! frystyk 358: PRIVATE int SGML_character (HTStream * context, char c)
1.1 timbl 359:
360: {
1.38 frystyk 361: const SGML_dtd *dtd = context->dtd;
1.1 timbl 362: HTChunk *string = context->string;
363:
364: switch(context->state) {
1.18 timbl 365:
366: case S_after_open: /* Strip one trainling newline
367: only after opening nonempty element. - SGML:Ugh! */
368: if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
369: break;
370: }
371: context->state = S_text;
372: goto normal_text;
373: /* (***falls through***) */
374:
1.1 timbl 375: case S_text:
1.18 timbl 376: normal_text:
377:
1.13 timbl 378: #ifdef ISO_2022_JP
379: if (c=='\033') {
380: context->state = S_esc;
381: PUTC(c);
382: break;
383: }
384: #endif /* ISO_2022_JP */
1.6 timbl 385: if (c=='&' && (!context->element_stack || (
386: context->element_stack->tag &&
387: ( context->element_stack->tag->contents == SGML_MIXED
388: || context->element_stack->tag->contents ==
389: SGML_RCDATA)
390: ))) {
1.1 timbl 391: string->size = 0;
392: context->state = S_ero;
393:
394: } else if (c=='<') {
395: string->size = 0;
396: context->state = (context->element_stack &&
1.13 timbl 397: context->element_stack->tag &&
398: context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 399: S_literal : S_tag;
1.18 timbl 400: } else if (c=='\n') { /* Newline - ignore if before tag end! */
401: context->state = S_nl;
1.2 timbl 402: } else PUTC(c);
1.1 timbl 403: break;
1.13 timbl 404:
1.18 timbl 405: case S_nl:
406: if (c=='<') {
407: string->size = 0;
408: context->state = (context->element_stack &&
409: context->element_stack->tag &&
410: context->element_stack->tag->contents == SGML_LITERAL) ?
411: S_literal : S_nl_tago;
412: } else {
413: PUTC('\n');
414: context->state = S_text;
415: goto normal_text;
416: }
417: break;
418:
419: case S_nl_tago: /* Had newline and tag opener */
420: if (c != '/') {
421: PUTC('\n'); /* Only ignore newline before </ */
422: }
423: context->state = S_tag;
424: goto handle_S_tag;
425:
1.13 timbl 426: #ifdef ISO_2022_JP
427: case S_esc:
428: if (c=='$') {
429: context->state = S_dollar;
430: } else if (c=='(') {
431: context->state = S_paren;
432: } else {
433: context->state = S_text;
434: }
435: PUTC(c);
436: break;
437: case S_dollar:
438: if (c=='@' || c=='B') {
439: context->state = S_nonascii_text;
440: } else {
441: context->state = S_text;
442: }
443: PUTC(c);
444: break;
445: case S_paren:
446: if (c=='B' || c=='J') {
447: context->state = S_text;
448: } else {
449: context->state = S_text;
450: }
451: PUTC(c);
452: break;
453: case S_nonascii_text:
454: if (c=='\033') {
455: context->state = S_esc;
456: PUTC(c);
457: } else {
458: PUTC(c);
459: }
460: break;
461: #endif /* ISO_2022_JP */
1.1 timbl 462:
1.12 timbl 463: /* In literal mode, waits only for specific end tag!
1.2 timbl 464: ** Only foir compatibility with old servers.
1.1 timbl 465: */
1.12 timbl 466: case S_literal :
1.33 frystyk 467: HTChunk_putc(string, c);
1.1 timbl 468: if ( TOUPPER(c) != ((string->size ==1) ? '/'
469: : context->element_stack->tag->name[string->size-2])) {
470: int i;
471:
1.12 timbl 472: /* If complete match, end literal */
1.1 timbl 473: if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
474: end_element(context, context->element_stack->tag);
475: string->size = 0;
1.2 timbl 476: context->current_attribute_number = INVALID;
1.1 timbl 477: context->state = S_text;
478: break;
479: } /* If Mismatch: recover string. */
1.2 timbl 480: PUTC( '<');
1.1 timbl 481: for (i=0; i<string->size; i++) /* recover */
1.2 timbl 482: PUTC(
1.1 timbl 483: string->data[i]);
484: context->state = S_text;
485: }
486:
487: break;
488:
489: /* Character reference or Entity
490: */
491: case S_ero:
492: if (c=='#') {
493: context->state = S_cro; /* &# is Char Ref Open */
494: break;
495: }
496: context->state = S_entity; /* Fall through! */
497:
498: /* Handle Entities
499: */
500: case S_entity:
501: if (isalnum(c))
1.33 frystyk 502: HTChunk_putc(string, c);
1.1 timbl 503: else {
1.33 frystyk 504: HTChunk_terminate(string);
1.1 timbl 505: handle_entity(context, c);
506: context->state = S_text;
507: }
508: break;
509:
510: /* Character reference
511: */
512: case S_cro:
513: if (isalnum(c))
1.33 frystyk 514: HTChunk_putc(string, c); /* accumulate a character NUMBER */
1.1 timbl 515: else {
516: int value;
1.33 frystyk 517: HTChunk_terminate(string);
1.1 timbl 518: if (sscanf(string->data, "%d", &value)==1)
1.28 frystyk 519: PUTC((char) value);
1.1 timbl 520: context->state = S_text;
521: }
522: break;
523:
524: /* Tag
525: */
526: case S_tag: /* new tag */
1.18 timbl 527: handle_S_tag:
528:
1.1 timbl 529: if (isalnum(c))
1.33 frystyk 530: HTChunk_putc(string, c);
1.1 timbl 531: else { /* End of tag name */
1.7 timbl 532: HTTag * t;
1.1 timbl 533: if (c=='/') {
1.20 frystyk 534: if (SGML_TRACE) if (string->size!=0)
1.37 eric 535: HTTrace("SGML: `<%s/' found!\n", string->data);
1.1 timbl 536: context->state = S_end;
537: break;
538: }
1.33 frystyk 539: HTChunk_terminate(string) ;
1.2 timbl 540:
1.10 timbl 541: t = SGMLFindTag(dtd, string->data);
1.7 timbl 542: if (!t) {
1.37 eric 543: if(SGML_TRACE) HTTrace("SGML: *** Unknown element %s\n",
1.1 timbl 544: string->data);
545: context->state = (c=='>') ? S_text : S_junk_tag;
546: break;
547: }
1.7 timbl 548: context->current_tag = t;
1.2 timbl 549:
550: /* Clear out attributes
551: */
1.1 timbl 552:
1.2 timbl 553: {
554: int i;
555: for (i=0; i< context->current_tag->number_of_attributes; i++)
556: context->present[i] = NO;
1.1 timbl 557: }
558: string->size = 0;
1.2 timbl 559: context->current_attribute_number = INVALID;
1.1 timbl 560:
561: if (c=='>') {
562: if (context->current_tag->name) start_element(context);
1.18 timbl 563: context->state = S_after_open;
1.1 timbl 564: } else {
565: context->state = S_tag_gap;
566: }
567: }
568: break;
569:
570:
571: case S_tag_gap: /* Expecting attribute or > */
572: if (WHITE(c)) break; /* Gap between attributes */
573: if (c=='>') { /* End of tag */
574: if (context->current_tag->name) start_element(context);
1.18 timbl 575: context->state = S_after_open;
1.1 timbl 576: break;
577: }
1.33 frystyk 578: HTChunk_putc(string, c);
1.1 timbl 579: context->state = S_attr; /* Get attribute */
580: break;
581:
582: /* accumulating value */
583: case S_attr:
584: if (WHITE(c) || (c=='>') || (c=='=')) { /* End of word */
1.33 frystyk 585: HTChunk_terminate(string) ;
1.1 timbl 586: handle_attribute_name(context, string->data);
587: string->size = 0;
588: if (c=='>') { /* End of tag */
589: if (context->current_tag->name) start_element(context);
1.18 timbl 590: context->state = S_after_open;
1.1 timbl 591: break;
592: }
593: context->state = (c=='=' ? S_equals: S_attr_gap);
594: } else {
1.33 frystyk 595: HTChunk_putc(string, c);
1.1 timbl 596: }
597: break;
598:
599: case S_attr_gap: /* Expecting attribute or = or > */
600: if (WHITE(c)) break; /* Gap after attribute */
601: if (c=='>') { /* End of tag */
602: if (context->current_tag->name) start_element(context);
1.18 timbl 603: context->state = S_after_open;
1.1 timbl 604: break;
605: } else if (c=='=') {
606: context->state = S_equals;
607: break;
608: }
1.33 frystyk 609: HTChunk_putc(string, c);
1.1 timbl 610: context->state = S_attr; /* Get next attribute */
611: break;
612:
613: case S_equals: /* After attr = */
614: if (WHITE(c)) break; /* Before attribute value */
615: if (c=='>') { /* End of tag */
1.37 eric 616: if (SGML_TRACE) HTTrace("SGML: found = but no value\n");
1.1 timbl 617: if (context->current_tag->name) start_element(context);
1.18 timbl 618: context->state = S_after_open;
1.1 timbl 619: break;
620:
621: } else if (c=='\'') {
622: context->state = S_squoted;
623: break;
624:
625: } else if (c=='"') {
626: context->state = S_dquoted;
627: break;
628: }
1.33 frystyk 629: HTChunk_putc(string, c);
1.1 timbl 630: context->state = S_value;
631: break;
632:
633: case S_value:
634: if (WHITE(c) || (c=='>')) { /* End of word */
1.33 frystyk 635: HTChunk_terminate(string) ;
1.1 timbl 636: handle_attribute_value(context, string->data);
637: string->size = 0;
638: if (c=='>') { /* End of tag */
639: if (context->current_tag->name) start_element(context);
1.18 timbl 640: context->state = S_after_open;
1.1 timbl 641: break;
642: }
643: else context->state = S_tag_gap;
644: } else {
1.33 frystyk 645: HTChunk_putc(string, c);
1.1 timbl 646: }
647: break;
648:
649: case S_squoted: /* Quoted attribute value */
650: if (c=='\'') { /* End of attribute value */
1.33 frystyk 651: HTChunk_terminate(string) ;
1.1 timbl 652: handle_attribute_value(context, string->data);
653: string->size = 0;
654: context->state = S_tag_gap;
655: } else {
1.33 frystyk 656: HTChunk_putc(string, c);
1.1 timbl 657: }
658: break;
659:
660: case S_dquoted: /* Quoted attribute value */
661: if (c=='"') { /* End of attribute value */
1.33 frystyk 662: HTChunk_terminate(string) ;
1.1 timbl 663: handle_attribute_value(context, string->data);
664: string->size = 0;
665: context->state = S_tag_gap;
666: } else {
1.33 frystyk 667: HTChunk_putc(string, c);
1.1 timbl 668: }
669: break;
670:
671: case S_end: /* </ */
672: if (isalnum(c))
1.33 frystyk 673: HTChunk_putc(string, c);
1.1 timbl 674: else { /* End of end tag name */
1.7 timbl 675: HTTag * t;
1.33 frystyk 676: HTChunk_terminate(string) ;
1.7 timbl 677: if (!*string->data) { /* Empty end tag */
678: t = context->element_stack->tag;
679: } else {
1.10 timbl 680: t = SGMLFindTag(dtd, string->data);
1.1 timbl 681: }
1.7 timbl 682: if (!t) {
1.37 eric 683: if(SGML_TRACE) HTTrace(
1.1 timbl 684: "Unknown end tag </%s>\n", string->data);
1.2 timbl 685: } else {
1.7 timbl 686: context->current_tag = t;
1.2 timbl 687: end_element( context, context->current_tag);
1.1 timbl 688: }
1.2 timbl 689:
1.1 timbl 690: string->size = 0;
1.2 timbl 691: context->current_attribute_number = INVALID;
1.7 timbl 692: if (c!='>') {
1.20 frystyk 693: if (SGML_TRACE && !WHITE(c))
1.37 eric 694: HTTrace("SGML: `</%s%c' found!\n",
1.7 timbl 695: string->data, c);
696: context->state = S_junk_tag;
697: } else {
698: context->state = S_text;
699: }
1.1 timbl 700: }
701: break;
702:
703:
704: case S_junk_tag:
705: if (c=='>') {
706: context->state = S_text;
707: }
708: } /* switch on context->state */
1.26 frystyk 709: return HT_OK;
710: }
1.2 timbl 711:
712:
1.40 ! frystyk 713: PRIVATE int SGML_string (HTStream * context, const char* s)
1.2 timbl 714: {
1.26 frystyk 715: while (*s)
716: SGML_character(context, *s++);
717: return HT_OK;
1.2 timbl 718: }
719:
720:
1.40 ! frystyk 721: PRIVATE int SGML_write (HTStream * context, const char* b, int l)
1.2 timbl 722: {
1.26 frystyk 723: while (l-- > 0)
724: SGML_character(context, *b++);
725: return HT_OK;
1.2 timbl 726: }
727:
728: /*_______________________________________________________________________
729: */
730:
731: /* Structured Object Class
732: ** -----------------------
733: */
1.38 frystyk 734: PRIVATE const HTStreamClass SGMLParser =
1.2 timbl 735: {
1.32 frystyk 736: "SGMLParser",
737: SGML_flush,
738: SGML_free,
739: SGML_abort,
740: SGML_character,
741: SGML_string,
742: SGML_write,
1.2 timbl 743: };
744:
745: /* Create SGML Engine
746: ** ------------------
747: **
748: ** On entry,
749: ** dtd represents the DTD, along with
750: ** actions is the sink for the data as a set of routines.
751: **
752: */
1.38 frystyk 753: PUBLIC HTStream * SGML_new (const SGML_dtd * dtd, HTStructured * target)
1.2 timbl 754: {
755: int i;
1.36 frystyk 756: HTStream* context;
1.40 ! frystyk 757: if ((context = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
1.36 frystyk 758: HT_OUTOFMEM("SGML_begin");
1.2 timbl 759:
760: context->isa = &SGMLParser;
1.33 frystyk 761: context->string = HTChunk_new(128); /* Grow by this much */
1.2 timbl 762: context->dtd = dtd;
763: context->target = target;
764: context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
765: /* Ugh: no OO */
766: context->state = S_text;
767: context->element_stack = 0; /* empty */
768: for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
769:
770: return context;
771: }
Webmaster