Annotation of libwww/Library/src/SGML.c, revision 1.38
1.23 frystyk 1: /* SGML.c
2: ** GENERAL SGML PARSER CODE
3: **
1.27 frystyk 4: ** (c) COPYRIGHT MIT 1995.
1.23 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
1.2 timbl 7: ** This module implements an HTStream object. To parse an
1.1 timbl 8: ** SGML file, create this object which is a parser. The object
1.2 timbl 9: ** is (currently) created by being passed a DTD structure,
10: ** and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 11: **
1.19 duns 12: ** 6 Feb 93 Binary seraches used. Intreface modified.
13: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.1 timbl 14: */
15:
1.25 frystyk 16: /* Library include files */
1.38 ! frystyk 17: #include "sysdep.h"
1.1 timbl 18: #include "HTUtils.h"
1.25 frystyk 19: #include "HTString.h"
1.1 timbl 20: #include "HTChunk.h"
1.20 frystyk 21: #include "SGML.h"
1.1 timbl 22:
1.2 timbl 23: #define INVALID (-1)
24:
1.1 timbl 25: /* The State (context) of the parser
26: **
1.2 timbl 27: ** This is passed with each call to make the parser reentrant
1.1 timbl 28: **
29: */
30:
1.16 frystyk 31:
1.2 timbl 32:
33:
34: /* Element Stack
35: ** -------------
36: ** This allows us to return down the stack reselcting styles.
37: ** As we return, attribute values will be garbage in general.
38: */
39: typedef struct _HTElement HTElement;
40: struct _HTElement {
41: HTElement * next; /* Previously nested element or 0 */
42: HTTag* tag; /* The tag at this level */
43: };
44:
45:
1.21 frystyk 46: typedef enum _sgml_state {
47: S_text, S_literal, S_tag, S_tag_gap,
48: S_attr, S_attr_gap, S_equals, S_value, S_after_open,
49: S_nl, S_nl_tago,
50: S_ero, S_cro,
51: #ifdef ISO_2022_JP
52: S_esc, S_dollar, S_paren, S_nonascii_text,
53: #endif
54: S_squoted, S_dquoted, S_end, S_entity, S_junk_tag
55: } sgml_state;
56:
57:
1.2 timbl 58: /* Internal Context Data Structure
59: ** -------------------------------
60: */
61: struct _HTStream {
62:
1.38 ! frystyk 63: const HTStreamClass * isa; /* inherited from HTStream */
1.2 timbl 64:
1.38 ! frystyk 65: const SGML_dtd *dtd;
1.2 timbl 66: HTStructuredClass *actions; /* target class */
67: HTStructured *target; /* target object */
68:
1.1 timbl 69: HTTag *current_tag;
1.2 timbl 70: int current_attribute_number;
1.1 timbl 71: HTChunk *string;
72: HTElement *element_stack;
1.21 frystyk 73: sgml_state state;
1.2 timbl 74: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
75: char * value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
76: } ;
77:
78:
79: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
80:
1.1 timbl 81:
1.17 timbl 82: /* Find Attribute Number
83: ** ---------------------
84: */
85:
1.38 ! frystyk 86: PUBLIC int SGMLFindAttribute (HTTag* tag, const char * s)
1.17 timbl 87: {
88: attr* attributes = tag->attributes;
89:
90: int high, low, i, diff; /* Binary search for attribute name */
91: for(low=0, high=tag->number_of_attributes;
92: high > low ;
93: diff < 0 ? (low = i+1) : (high = i) ) {
94: i = (low + (high-low)/2);
95: diff = strcasecomp(attributes[i].name, s);
96: if (diff==0) return i; /* success: found it */
97: } /* for */
98:
99: return -1;
100: }
101:
1.1 timbl 102:
103: /* Handle Attribute
104: ** ----------------
105: */
1.38 ! frystyk 106: /* PUBLIC const char * SGML_default = ""; ?? */
1.1 timbl 107:
1.38 ! frystyk 108: PRIVATE void handle_attribute_name (HTStream * context, const char * s)
1.1 timbl 109: {
1.2 timbl 110:
111: HTTag * tag = context->current_tag;
112:
1.17 timbl 113: int i = SGMLFindAttribute(tag, s);
114: if (i>=0) {
115: context->current_attribute_number = i;
116: context->present[i] = YES;
117: if (context->value[i]) {
1.36 frystyk 118: HT_FREE(context->value[i]);
1.17 timbl 119: context->value[i] = NULL;
120: }
121: return;
122: } /* if */
1.2 timbl 123:
1.20 frystyk 124: if (SGML_TRACE)
1.37 eric 125: HTTrace("SGML: Unknown attribute %s for tag %s\n",
1.2 timbl 126: s, context->current_tag->name);
127: context->current_attribute_number = INVALID; /* Invalid */
1.1 timbl 128: }
129:
130:
131: /* Handle attribute value
132: ** ----------------------
133: */
1.38 ! frystyk 134: PRIVATE void handle_attribute_value (HTStream * context, const char * s)
1.1 timbl 135: {
1.2 timbl 136: if (context->current_attribute_number != INVALID) {
137: StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 138: } else {
1.37 eric 139: if (SGML_TRACE) HTTrace("SGML: Attribute value %s ignored\n", s);
1.1 timbl 140: }
1.2 timbl 141: context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 142: }
143:
1.2 timbl 144:
1.1 timbl 145: /* Handle entity
146: ** -------------
147: **
148: ** On entry,
149: ** s contains the entity name zero terminated
150: ** Bugs:
151: ** If the entity name is unknown, the terminator is treated as
152: ** a printable non-special character in all cases, even if it is '<'
153: */
1.31 frystyk 154: PRIVATE void handle_entity (HTStream * context, char term)
1.1 timbl 155: {
1.2 timbl 156:
1.38 ! frystyk 157: const char ** entities = context->dtd->entity_names;
! 158: const char *s = context->string->data;
1.2 timbl 159:
160: int high, low, i, diff;
161: for(low=0, high = context->dtd->number_of_entities;
162: high > low ;
163: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
164: i = (low + (high-low)/2);
165: diff = strcmp(entities[i], s); /* Csse sensitive! */
166: if (diff==0) { /* success: found it */
167: (*context->actions->put_entity)(context->target, i);
168: return;
1.1 timbl 169: }
170: }
171: /* If entity string not found, display as text */
1.20 frystyk 172: if (SGML_TRACE)
1.37 eric 173: HTTrace("SGML: Unknown entity %s\n", s);
1.2 timbl 174: PUTC('&');
1.1 timbl 175: {
1.38 ! frystyk 176: const char *p;
1.1 timbl 177: for (p=s; *p; p++) {
1.2 timbl 178: PUTC(*p);
1.1 timbl 179: }
180: }
1.2 timbl 181: PUTC(term);
1.1 timbl 182: }
183:
1.35 frystyk 184: /*
185: ** Helper function to check if the tag is on the stack
186: */
187: PRIVATE BOOL lookup_element_stack (HTElement* stack, HTTag *tag)
188: {
189: HTElement* elem;
190: for (elem = stack; elem != NULL; elem = elem->next)
191: {
192: if (elem->tag == tag) return YES;
193: }
194: return NO;
195: }
1.2 timbl 196:
1.1 timbl 197: /* End element
1.2 timbl 198: ** -----------
1.1 timbl 199: */
1.31 frystyk 200: PRIVATE void end_element (HTStream * context, HTTag * old_tag)
1.1 timbl 201: {
1.37 eric 202: if (SGML_TRACE) HTTrace("SGML: End </%s>\n", old_tag->name);
1.2 timbl 203: if (old_tag->contents == SGML_EMPTY) {
1.37 eric 204: if (SGML_TRACE) HTTrace("SGML: Illegal end tag </%s> found.\n",
1.1 timbl 205: old_tag->name);
206: return;
207: }
208: while (context->element_stack) {/* Loop is error path only */
209: HTElement * N = context->element_stack;
210: HTTag * t = N->tag;
211:
212: if (old_tag != t) { /* Mismatch: syntax error */
1.35 frystyk 213: /*
214: ** Patch from Maciej Puzio, puzio@laser.mimuw.edu.pl
215: ** See explanation in ../User/Patch/lib_4.0_1.fix
216: */
217: if (context->element_stack->next /* This is not the last level */
218: && lookup_element_stack(context->element_stack, old_tag)) {
1.37 eric 219: if (SGML_TRACE) HTTrace(
1.1 timbl 220: "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
221: old_tag->name, t->name, t->name);
222: } else { /* last level */
1.37 eric 223: if (SGML_TRACE) HTTrace(
1.1 timbl 224: "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
225: old_tag->name, t->name, old_tag->name);
226: return; /* Ignore */
227: }
228: }
229:
230: context->element_stack = N->next; /* Remove from stack */
1.36 frystyk 231: HT_FREE(N);
1.2 timbl 232: (*context->actions->end_element)(context->target,
233: t - context->dtd->tags);
1.1 timbl 234: if (old_tag == t) return; /* Correct sequence */
235:
236: /* Syntax error path only */
237:
238: }
1.37 eric 239: if (SGML_TRACE) HTTrace(
1.1 timbl 240: "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
241: }
242:
243:
1.17 timbl 244: /* Start an element
245: ** ----------------
1.1 timbl 246: */
1.31 frystyk 247: PRIVATE void start_element (HTStream * context)
1.1 timbl 248: {
249: HTTag * new_tag = context->current_tag;
250:
1.37 eric 251: if (SGML_TRACE) HTTrace("SGML: Start <%s>\n", new_tag->name);
1.2 timbl 252: (*context->actions->start_element)(
253: context->target,
254: new_tag - context->dtd->tags,
255: context->present,
1.38 ! frystyk 256: (const char**) context->value); /* coerce type for think c */
1.2 timbl 257: if (new_tag->contents != SGML_EMPTY) { /* i.e. tag not empty */
1.36 frystyk 258: HTElement * N;
259: if ((N = (HTElement *) HT_MALLOC(sizeof(HTElement))) == NULL)
260: HT_OUTOFMEM("start_element");
1.1 timbl 261: N->next = context->element_stack;
262: N->tag = new_tag;
263: context->element_stack = N;
264: }
265: }
266:
267:
1.2 timbl 268: /* Find Tag in DTD tag list
269: ** ------------------------
1.1 timbl 270: **
271: ** On entry,
1.2 timbl 272: ** dtd points to dtd structire including valid tag list
273: ** string points to name of tag in question
1.1 timbl 274: **
1.2 timbl 275: ** On exit,
276: ** returns:
1.7 timbl 277: ** NULL tag not found
278: ** else address of tag structure in dtd
1.2 timbl 279: */
1.38 ! frystyk 280: PUBLIC HTTag * SGMLFindTag (const SGML_dtd* dtd, const char * string)
1.2 timbl 281: {
282: int high, low, i, diff;
283: for(low=0, high=dtd->number_of_tags;
284: high > low ;
285: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
286: i = (low + (high-low)/2);
1.3 timbl 287: diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 288: if (diff==0) { /* success: found it */
1.7 timbl 289: return &dtd->tags[i];
1.2 timbl 290: }
291: }
1.7 timbl 292: return NULL;
1.2 timbl 293: }
294:
295: /*________________________________________________________________________
296: ** Public Methods
1.1 timbl 297: */
298:
1.2 timbl 299:
300: /* Could check that we are back to bottom of stack! @@ */
1.31 frystyk 301: PUBLIC int SGML_flush (HTStream * context)
1.26 frystyk 302: {
303: while (context->element_stack) {
304: HTElement *ptr = context->element_stack;
305: if (SGML_TRACE)
1.37 eric 306: HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 307: context->element_stack->tag->name);
308: context->element_stack = ptr->next;
1.36 frystyk 309: HT_FREE(ptr);
1.26 frystyk 310: }
311: return (*context->actions->flush)(context->target);
312: }
1.1 timbl 313:
1.31 frystyk 314: PUBLIC int SGML_free (HTStream * context)
1.8 timbl 315: {
1.26 frystyk 316: int status;
1.14 frystyk 317: int cnt;
1.15 frystyk 318: while (context->element_stack) { /* Make sure, that all tags are gone */
319: HTElement *ptr = context->element_stack;
320:
1.26 frystyk 321: if (SGML_TRACE)
1.37 eric 322: HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 323: context->element_stack->tag->name);
1.15 frystyk 324: context->element_stack = ptr->next;
1.36 frystyk 325: HT_FREE(ptr);
1.15 frystyk 326: }
1.26 frystyk 327: if ((status = (*context->actions->_free)(context->target)) != HT_OK)
328: return status;
1.33 frystyk 329: HTChunk_delete(context->string);
1.15 frystyk 330: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
1.14 frystyk 331: if(context->value[cnt])
1.36 frystyk 332: HT_FREE(context->value[cnt]);
333: HT_FREE(context);
1.26 frystyk 334: return HT_OK;
1.1 timbl 335: }
336:
1.31 frystyk 337: PUBLIC int SGML_abort (HTStream * context, HTList * e)
1.1 timbl 338: {
1.14 frystyk 339: int cnt;
1.15 frystyk 340: while (context->element_stack) { /* Make sure, that all tags are gone */
341: HTElement *ptr = context->element_stack;
1.26 frystyk 342: if (SGML_TRACE)
1.37 eric 343: HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 344: context->element_stack->tag->name);
1.15 frystyk 345: context->element_stack = ptr->next;
1.36 frystyk 346: HT_FREE(ptr);
1.15 frystyk 347: }
1.8 timbl 348: (*context->actions->abort)(context->target, e);
1.33 frystyk 349: HTChunk_delete(context->string);
1.14 frystyk 350: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
351: if(context->value[cnt])
1.36 frystyk 352: HT_FREE(context->value[cnt]);
353: HT_FREE(context);
1.26 frystyk 354: return HT_ERROR;
1.1 timbl 355: }
356:
1.31 frystyk 357: PUBLIC int SGML_character (HTStream * context, char c)
1.1 timbl 358:
359: {
1.38 ! frystyk 360: const SGML_dtd *dtd = context->dtd;
1.1 timbl 361: HTChunk *string = context->string;
362:
363: switch(context->state) {
1.18 timbl 364:
365: case S_after_open: /* Strip one trainling newline
366: only after opening nonempty element. - SGML:Ugh! */
367: if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
368: break;
369: }
370: context->state = S_text;
371: goto normal_text;
372: /* (***falls through***) */
373:
1.1 timbl 374: case S_text:
1.18 timbl 375: normal_text:
376:
1.13 timbl 377: #ifdef ISO_2022_JP
378: if (c=='\033') {
379: context->state = S_esc;
380: PUTC(c);
381: break;
382: }
383: #endif /* ISO_2022_JP */
1.6 timbl 384: if (c=='&' && (!context->element_stack || (
385: context->element_stack->tag &&
386: ( context->element_stack->tag->contents == SGML_MIXED
387: || context->element_stack->tag->contents ==
388: SGML_RCDATA)
389: ))) {
1.1 timbl 390: string->size = 0;
391: context->state = S_ero;
392:
393: } else if (c=='<') {
394: string->size = 0;
395: context->state = (context->element_stack &&
1.13 timbl 396: context->element_stack->tag &&
397: context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 398: S_literal : S_tag;
1.18 timbl 399: } else if (c=='\n') { /* Newline - ignore if before tag end! */
400: context->state = S_nl;
1.2 timbl 401: } else PUTC(c);
1.1 timbl 402: break;
1.13 timbl 403:
1.18 timbl 404: case S_nl:
405: if (c=='<') {
406: string->size = 0;
407: context->state = (context->element_stack &&
408: context->element_stack->tag &&
409: context->element_stack->tag->contents == SGML_LITERAL) ?
410: S_literal : S_nl_tago;
411: } else {
412: PUTC('\n');
413: context->state = S_text;
414: goto normal_text;
415: }
416: break;
417:
418: case S_nl_tago: /* Had newline and tag opener */
419: if (c != '/') {
420: PUTC('\n'); /* Only ignore newline before </ */
421: }
422: context->state = S_tag;
423: goto handle_S_tag;
424:
1.13 timbl 425: #ifdef ISO_2022_JP
426: case S_esc:
427: if (c=='$') {
428: context->state = S_dollar;
429: } else if (c=='(') {
430: context->state = S_paren;
431: } else {
432: context->state = S_text;
433: }
434: PUTC(c);
435: break;
436: case S_dollar:
437: if (c=='@' || c=='B') {
438: context->state = S_nonascii_text;
439: } else {
440: context->state = S_text;
441: }
442: PUTC(c);
443: break;
444: case S_paren:
445: if (c=='B' || c=='J') {
446: context->state = S_text;
447: } else {
448: context->state = S_text;
449: }
450: PUTC(c);
451: break;
452: case S_nonascii_text:
453: if (c=='\033') {
454: context->state = S_esc;
455: PUTC(c);
456: } else {
457: PUTC(c);
458: }
459: break;
460: #endif /* ISO_2022_JP */
1.1 timbl 461:
1.12 timbl 462: /* In literal mode, waits only for specific end tag!
1.2 timbl 463: ** Only foir compatibility with old servers.
1.1 timbl 464: */
1.12 timbl 465: case S_literal :
1.33 frystyk 466: HTChunk_putc(string, c);
1.1 timbl 467: if ( TOUPPER(c) != ((string->size ==1) ? '/'
468: : context->element_stack->tag->name[string->size-2])) {
469: int i;
470:
1.12 timbl 471: /* If complete match, end literal */
1.1 timbl 472: if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
473: end_element(context, context->element_stack->tag);
474: string->size = 0;
1.2 timbl 475: context->current_attribute_number = INVALID;
1.1 timbl 476: context->state = S_text;
477: break;
478: } /* If Mismatch: recover string. */
1.2 timbl 479: PUTC( '<');
1.1 timbl 480: for (i=0; i<string->size; i++) /* recover */
1.2 timbl 481: PUTC(
1.1 timbl 482: string->data[i]);
483: context->state = S_text;
484: }
485:
486: break;
487:
488: /* Character reference or Entity
489: */
490: case S_ero:
491: if (c=='#') {
492: context->state = S_cro; /* &# is Char Ref Open */
493: break;
494: }
495: context->state = S_entity; /* Fall through! */
496:
497: /* Handle Entities
498: */
499: case S_entity:
500: if (isalnum(c))
1.33 frystyk 501: HTChunk_putc(string, c);
1.1 timbl 502: else {
1.33 frystyk 503: HTChunk_terminate(string);
1.1 timbl 504: handle_entity(context, c);
505: context->state = S_text;
506: }
507: break;
508:
509: /* Character reference
510: */
511: case S_cro:
512: if (isalnum(c))
1.33 frystyk 513: HTChunk_putc(string, c); /* accumulate a character NUMBER */
1.1 timbl 514: else {
515: int value;
1.33 frystyk 516: HTChunk_terminate(string);
1.1 timbl 517: if (sscanf(string->data, "%d", &value)==1)
1.28 frystyk 518: PUTC((char) value);
1.1 timbl 519: context->state = S_text;
520: }
521: break;
522:
523: /* Tag
524: */
525: case S_tag: /* new tag */
1.18 timbl 526: handle_S_tag:
527:
1.1 timbl 528: if (isalnum(c))
1.33 frystyk 529: HTChunk_putc(string, c);
1.1 timbl 530: else { /* End of tag name */
1.7 timbl 531: HTTag * t;
1.1 timbl 532: if (c=='/') {
1.20 frystyk 533: if (SGML_TRACE) if (string->size!=0)
1.37 eric 534: HTTrace("SGML: `<%s/' found!\n", string->data);
1.1 timbl 535: context->state = S_end;
536: break;
537: }
1.33 frystyk 538: HTChunk_terminate(string) ;
1.2 timbl 539:
1.10 timbl 540: t = SGMLFindTag(dtd, string->data);
1.7 timbl 541: if (!t) {
1.37 eric 542: if(SGML_TRACE) HTTrace("SGML: *** Unknown element %s\n",
1.1 timbl 543: string->data);
544: context->state = (c=='>') ? S_text : S_junk_tag;
545: break;
546: }
1.7 timbl 547: context->current_tag = t;
1.2 timbl 548:
549: /* Clear out attributes
550: */
1.1 timbl 551:
1.2 timbl 552: {
553: int i;
554: for (i=0; i< context->current_tag->number_of_attributes; i++)
555: context->present[i] = NO;
1.1 timbl 556: }
557: string->size = 0;
1.2 timbl 558: context->current_attribute_number = INVALID;
1.1 timbl 559:
560: if (c=='>') {
561: if (context->current_tag->name) start_element(context);
1.18 timbl 562: context->state = S_after_open;
1.1 timbl 563: } else {
564: context->state = S_tag_gap;
565: }
566: }
567: break;
568:
569:
570: case S_tag_gap: /* Expecting attribute or > */
571: if (WHITE(c)) break; /* Gap between attributes */
572: if (c=='>') { /* End of tag */
573: if (context->current_tag->name) start_element(context);
1.18 timbl 574: context->state = S_after_open;
1.1 timbl 575: break;
576: }
1.33 frystyk 577: HTChunk_putc(string, c);
1.1 timbl 578: context->state = S_attr; /* Get attribute */
579: break;
580:
581: /* accumulating value */
582: case S_attr:
583: if (WHITE(c) || (c=='>') || (c=='=')) { /* End of word */
1.33 frystyk 584: HTChunk_terminate(string) ;
1.1 timbl 585: handle_attribute_name(context, string->data);
586: string->size = 0;
587: if (c=='>') { /* End of tag */
588: if (context->current_tag->name) start_element(context);
1.18 timbl 589: context->state = S_after_open;
1.1 timbl 590: break;
591: }
592: context->state = (c=='=' ? S_equals: S_attr_gap);
593: } else {
1.33 frystyk 594: HTChunk_putc(string, c);
1.1 timbl 595: }
596: break;
597:
598: case S_attr_gap: /* Expecting attribute or = or > */
599: if (WHITE(c)) break; /* Gap after attribute */
600: if (c=='>') { /* End of tag */
601: if (context->current_tag->name) start_element(context);
1.18 timbl 602: context->state = S_after_open;
1.1 timbl 603: break;
604: } else if (c=='=') {
605: context->state = S_equals;
606: break;
607: }
1.33 frystyk 608: HTChunk_putc(string, c);
1.1 timbl 609: context->state = S_attr; /* Get next attribute */
610: break;
611:
612: case S_equals: /* After attr = */
613: if (WHITE(c)) break; /* Before attribute value */
614: if (c=='>') { /* End of tag */
1.37 eric 615: if (SGML_TRACE) HTTrace("SGML: found = but no value\n");
1.1 timbl 616: if (context->current_tag->name) start_element(context);
1.18 timbl 617: context->state = S_after_open;
1.1 timbl 618: break;
619:
620: } else if (c=='\'') {
621: context->state = S_squoted;
622: break;
623:
624: } else if (c=='"') {
625: context->state = S_dquoted;
626: break;
627: }
1.33 frystyk 628: HTChunk_putc(string, c);
1.1 timbl 629: context->state = S_value;
630: break;
631:
632: case S_value:
633: if (WHITE(c) || (c=='>')) { /* End of word */
1.33 frystyk 634: HTChunk_terminate(string) ;
1.1 timbl 635: handle_attribute_value(context, string->data);
636: string->size = 0;
637: if (c=='>') { /* End of tag */
638: if (context->current_tag->name) start_element(context);
1.18 timbl 639: context->state = S_after_open;
1.1 timbl 640: break;
641: }
642: else context->state = S_tag_gap;
643: } else {
1.33 frystyk 644: HTChunk_putc(string, c);
1.1 timbl 645: }
646: break;
647:
648: case S_squoted: /* Quoted attribute value */
649: if (c=='\'') { /* End of attribute value */
1.33 frystyk 650: HTChunk_terminate(string) ;
1.1 timbl 651: handle_attribute_value(context, string->data);
652: string->size = 0;
653: context->state = S_tag_gap;
654: } else {
1.33 frystyk 655: HTChunk_putc(string, c);
1.1 timbl 656: }
657: break;
658:
659: case S_dquoted: /* Quoted attribute value */
660: if (c=='"') { /* End of attribute value */
1.33 frystyk 661: HTChunk_terminate(string) ;
1.1 timbl 662: handle_attribute_value(context, string->data);
663: string->size = 0;
664: context->state = S_tag_gap;
665: } else {
1.33 frystyk 666: HTChunk_putc(string, c);
1.1 timbl 667: }
668: break;
669:
670: case S_end: /* </ */
671: if (isalnum(c))
1.33 frystyk 672: HTChunk_putc(string, c);
1.1 timbl 673: else { /* End of end tag name */
1.7 timbl 674: HTTag * t;
1.33 frystyk 675: HTChunk_terminate(string) ;
1.7 timbl 676: if (!*string->data) { /* Empty end tag */
677: t = context->element_stack->tag;
678: } else {
1.10 timbl 679: t = SGMLFindTag(dtd, string->data);
1.1 timbl 680: }
1.7 timbl 681: if (!t) {
1.37 eric 682: if(SGML_TRACE) HTTrace(
1.1 timbl 683: "Unknown end tag </%s>\n", string->data);
1.2 timbl 684: } else {
1.7 timbl 685: context->current_tag = t;
1.2 timbl 686: end_element( context, context->current_tag);
1.1 timbl 687: }
1.2 timbl 688:
1.1 timbl 689: string->size = 0;
1.2 timbl 690: context->current_attribute_number = INVALID;
1.7 timbl 691: if (c!='>') {
1.20 frystyk 692: if (SGML_TRACE && !WHITE(c))
1.37 eric 693: HTTrace("SGML: `</%s%c' found!\n",
1.7 timbl 694: string->data, c);
695: context->state = S_junk_tag;
696: } else {
697: context->state = S_text;
698: }
1.1 timbl 699: }
700: break;
701:
702:
703: case S_junk_tag:
704: if (c=='>') {
705: context->state = S_text;
706: }
707: } /* switch on context->state */
1.26 frystyk 708: return HT_OK;
709: }
1.2 timbl 710:
711:
1.38 ! frystyk 712: PUBLIC int SGML_string (HTStream * context, const char* s)
1.2 timbl 713: {
1.26 frystyk 714: while (*s)
715: SGML_character(context, *s++);
716: return HT_OK;
1.2 timbl 717: }
718:
719:
1.38 ! frystyk 720: PUBLIC int SGML_write (HTStream * context, const char* b, int l)
1.2 timbl 721: {
1.26 frystyk 722: while (l-- > 0)
723: SGML_character(context, *b++);
724: return HT_OK;
1.2 timbl 725: }
726:
727: /*_______________________________________________________________________
728: */
729:
730: /* Structured Object Class
731: ** -----------------------
732: */
1.38 ! frystyk 733: PRIVATE const HTStreamClass SGMLParser =
1.2 timbl 734: {
1.32 frystyk 735: "SGMLParser",
736: SGML_flush,
737: SGML_free,
738: SGML_abort,
739: SGML_character,
740: SGML_string,
741: SGML_write,
1.2 timbl 742: };
743:
744: /* Create SGML Engine
745: ** ------------------
746: **
747: ** On entry,
748: ** dtd represents the DTD, along with
749: ** actions is the sink for the data as a set of routines.
750: **
751: */
1.38 ! frystyk 752: PUBLIC HTStream * SGML_new (const SGML_dtd * dtd, HTStructured * target)
1.2 timbl 753: {
754: int i;
1.36 frystyk 755: HTStream* context;
756: if ((context = (HTStream *) HT_MALLOC(sizeof(*context))) == NULL)
757: HT_OUTOFMEM("SGML_begin");
1.2 timbl 758:
759: context->isa = &SGMLParser;
1.33 frystyk 760: context->string = HTChunk_new(128); /* Grow by this much */
1.2 timbl 761: context->dtd = dtd;
762: context->target = target;
763: context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
764: /* Ugh: no OO */
765: context->state = S_text;
766: context->element_stack = 0; /* empty */
767: for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
768:
769: return context;
770: }
Webmaster