Annotation of libwww/Library/src/SGML.c, revision 1.41
1.23 frystyk 1: /* SGML.c
2: ** GENERAL SGML PARSER CODE
3: **
1.27 frystyk 4: ** (c) COPYRIGHT MIT 1995.
1.23 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.41 ! frystyk 6: ** @(#) $Id: SGML.c,v 1.40 1996/06/02 00:35:05 frystyk Exp $
1.1 timbl 7: **
1.2 timbl 8: ** This module implements an HTStream object. To parse an
1.1 timbl 9: ** SGML file, create this object which is a parser. The object
1.2 timbl 10: ** is (currently) created by being passed a DTD structure,
11: ** and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 12: **
1.19 duns 13: ** 6 Feb 93 Binary seraches used. Intreface modified.
14: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.1 timbl 15: */
16:
1.25 frystyk 17: /* Library include files */
1.38 frystyk 18: #include "sysdep.h"
1.1 timbl 19: #include "HTUtils.h"
1.25 frystyk 20: #include "HTString.h"
1.1 timbl 21: #include "HTChunk.h"
1.20 frystyk 22: #include "SGML.h"
1.1 timbl 23:
1.2 timbl 24: #define INVALID (-1)
25:
1.1 timbl 26: /* The State (context) of the parser
27: **
1.2 timbl 28: ** This is passed with each call to make the parser reentrant
1.1 timbl 29: **
30: */
31:
1.16 frystyk 32:
1.2 timbl 33:
34:
35: /* Element Stack
36: ** -------------
37: ** This allows us to return down the stack reselcting styles.
38: ** As we return, attribute values will be garbage in general.
39: */
40: typedef struct _HTElement HTElement;
41: struct _HTElement {
42: HTElement * next; /* Previously nested element or 0 */
43: HTTag* tag; /* The tag at this level */
44: };
45:
46:
1.21 frystyk 47: typedef enum _sgml_state {
48: S_text, S_literal, S_tag, S_tag_gap,
49: S_attr, S_attr_gap, S_equals, S_value, S_after_open,
50: S_nl, S_nl_tago,
51: S_ero, S_cro,
52: #ifdef ISO_2022_JP
53: S_esc, S_dollar, S_paren, S_nonascii_text,
54: #endif
55: S_squoted, S_dquoted, S_end, S_entity, S_junk_tag
56: } sgml_state;
57:
58:
1.2 timbl 59: /* Internal Context Data Structure
60: ** -------------------------------
61: */
62: struct _HTStream {
63:
1.38 frystyk 64: const HTStreamClass * isa; /* inherited from HTStream */
1.2 timbl 65:
1.38 frystyk 66: const SGML_dtd *dtd;
1.2 timbl 67: HTStructuredClass *actions; /* target class */
68: HTStructured *target; /* target object */
69:
1.1 timbl 70: HTTag *current_tag;
1.2 timbl 71: int current_attribute_number;
1.1 timbl 72: HTChunk *string;
73: HTElement *element_stack;
1.21 frystyk 74: sgml_state state;
1.2 timbl 75: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
76: char * value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
77: } ;
78:
79:
80: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
81:
1.1 timbl 82:
1.17 timbl 83: /* Find Attribute Number
84: ** ---------------------
85: */
86:
1.40 frystyk 87: PRIVATE int SGMLFindAttribute (HTTag* tag, const char * s)
1.17 timbl 88: {
89: attr* attributes = tag->attributes;
90:
91: int high, low, i, diff; /* Binary search for attribute name */
92: for(low=0, high=tag->number_of_attributes;
93: high > low ;
94: diff < 0 ? (low = i+1) : (high = i) ) {
95: i = (low + (high-low)/2);
96: diff = strcasecomp(attributes[i].name, s);
97: if (diff==0) return i; /* success: found it */
98: } /* for */
99:
100: return -1;
101: }
102:
1.1 timbl 103:
104: /* Handle Attribute
105: ** ----------------
106: */
1.38 frystyk 107: /* PUBLIC const char * SGML_default = ""; ?? */
1.1 timbl 108:
1.38 frystyk 109: PRIVATE void handle_attribute_name (HTStream * context, const char * s)
1.1 timbl 110: {
1.2 timbl 111:
112: HTTag * tag = context->current_tag;
113:
1.17 timbl 114: int i = SGMLFindAttribute(tag, s);
115: if (i>=0) {
116: context->current_attribute_number = i;
117: context->present[i] = YES;
118: if (context->value[i]) {
1.36 frystyk 119: HT_FREE(context->value[i]);
1.17 timbl 120: context->value[i] = NULL;
121: }
122: return;
123: } /* if */
1.2 timbl 124:
1.20 frystyk 125: if (SGML_TRACE)
1.41 ! frystyk 126: HTTrace("SGML Parser. Unknown attribute %s for tag %s\n",
1.2 timbl 127: s, context->current_tag->name);
128: context->current_attribute_number = INVALID; /* Invalid */
1.1 timbl 129: }
130:
131:
132: /* Handle attribute value
133: ** ----------------------
134: */
1.38 frystyk 135: PRIVATE void handle_attribute_value (HTStream * context, const char * s)
1.1 timbl 136: {
1.2 timbl 137: if (context->current_attribute_number != INVALID) {
138: StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 139: } else {
1.41 ! frystyk 140: if (SGML_TRACE) HTTrace("SGML Parser. Attribute value %s ignored\n", s);
1.1 timbl 141: }
1.2 timbl 142: context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 143: }
144:
1.2 timbl 145:
1.1 timbl 146: /* Handle entity
147: ** -------------
148: **
149: ** On entry,
150: ** s contains the entity name zero terminated
151: ** Bugs:
152: ** If the entity name is unknown, the terminator is treated as
153: ** a printable non-special character in all cases, even if it is '<'
154: */
1.31 frystyk 155: PRIVATE void handle_entity (HTStream * context, char term)
1.1 timbl 156: {
1.2 timbl 157:
1.38 frystyk 158: const char ** entities = context->dtd->entity_names;
159: const char *s = context->string->data;
1.2 timbl 160:
161: int high, low, i, diff;
162: for(low=0, high = context->dtd->number_of_entities;
163: high > low ;
164: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
165: i = (low + (high-low)/2);
166: diff = strcmp(entities[i], s); /* Csse sensitive! */
167: if (diff==0) { /* success: found it */
168: (*context->actions->put_entity)(context->target, i);
169: return;
1.1 timbl 170: }
171: }
172: /* If entity string not found, display as text */
1.20 frystyk 173: if (SGML_TRACE)
1.41 ! frystyk 174: HTTrace("SGML Parser. Unknown entity %s\n", s);
1.2 timbl 175: PUTC('&');
1.1 timbl 176: {
1.38 frystyk 177: const char *p;
1.1 timbl 178: for (p=s; *p; p++) {
1.2 timbl 179: PUTC(*p);
1.1 timbl 180: }
181: }
1.2 timbl 182: PUTC(term);
1.1 timbl 183: }
184:
1.35 frystyk 185: /*
186: ** Helper function to check if the tag is on the stack
187: */
188: PRIVATE BOOL lookup_element_stack (HTElement* stack, HTTag *tag)
189: {
190: HTElement* elem;
191: for (elem = stack; elem != NULL; elem = elem->next)
192: {
193: if (elem->tag == tag) return YES;
194: }
195: return NO;
196: }
1.2 timbl 197:
1.1 timbl 198: /* End element
1.2 timbl 199: ** -----------
1.1 timbl 200: */
1.31 frystyk 201: PRIVATE void end_element (HTStream * context, HTTag * old_tag)
1.1 timbl 202: {
1.41 ! frystyk 203: if (SGML_TRACE) HTTrace("SGML Parser. End </%s>\n", old_tag->name);
1.2 timbl 204: if (old_tag->contents == SGML_EMPTY) {
1.41 ! frystyk 205: if (SGML_TRACE) HTTrace("SGML Parser. Illegal end tag </%s> found.\n",
1.1 timbl 206: old_tag->name);
207: return;
208: }
209: while (context->element_stack) {/* Loop is error path only */
210: HTElement * N = context->element_stack;
211: HTTag * t = N->tag;
212:
213: if (old_tag != t) { /* Mismatch: syntax error */
1.35 frystyk 214: /*
215: ** Patch from Maciej Puzio, puzio@laser.mimuw.edu.pl
216: ** See explanation in ../User/Patch/lib_4.0_1.fix
217: */
218: if (context->element_stack->next /* This is not the last level */
219: && lookup_element_stack(context->element_stack, old_tag)) {
1.37 eric 220: if (SGML_TRACE) HTTrace(
1.41 ! frystyk 221: "SGML Parser. Found </%s> when expecting </%s>. </%s> assumed.\n",
1.1 timbl 222: old_tag->name, t->name, t->name);
223: } else { /* last level */
1.37 eric 224: if (SGML_TRACE) HTTrace(
1.41 ! frystyk 225: "SGML Parser. Found </%s> when expecting </%s>. </%s> Ignored.\n",
1.1 timbl 226: old_tag->name, t->name, old_tag->name);
227: return; /* Ignore */
228: }
229: }
230:
231: context->element_stack = N->next; /* Remove from stack */
1.36 frystyk 232: HT_FREE(N);
1.2 timbl 233: (*context->actions->end_element)(context->target,
234: t - context->dtd->tags);
1.1 timbl 235: if (old_tag == t) return; /* Correct sequence */
236:
237: /* Syntax error path only */
238:
239: }
1.37 eric 240: if (SGML_TRACE) HTTrace(
1.41 ! frystyk 241: "SGML Parser. Extra end tag </%s> found and ignored.\n", old_tag->name);
1.1 timbl 242: }
243:
244:
1.17 timbl 245: /* Start an element
246: ** ----------------
1.1 timbl 247: */
1.31 frystyk 248: PRIVATE void start_element (HTStream * context)
1.1 timbl 249: {
250: HTTag * new_tag = context->current_tag;
251:
1.41 ! frystyk 252: if (SGML_TRACE) HTTrace("SGML Parser. Start <%s>\n", new_tag->name);
1.2 timbl 253: (*context->actions->start_element)(
254: context->target,
255: new_tag - context->dtd->tags,
256: context->present,
1.38 frystyk 257: (const char**) context->value); /* coerce type for think c */
1.2 timbl 258: if (new_tag->contents != SGML_EMPTY) { /* i.e. tag not empty */
1.36 frystyk 259: HTElement * N;
260: if ((N = (HTElement *) HT_MALLOC(sizeof(HTElement))) == NULL)
261: HT_OUTOFMEM("start_element");
1.1 timbl 262: N->next = context->element_stack;
263: N->tag = new_tag;
264: context->element_stack = N;
265: }
266: }
267:
268:
1.2 timbl 269: /* Find Tag in DTD tag list
270: ** ------------------------
1.1 timbl 271: **
272: ** On entry,
1.2 timbl 273: ** dtd points to dtd structire including valid tag list
274: ** string points to name of tag in question
1.1 timbl 275: **
1.2 timbl 276: ** On exit,
277: ** returns:
1.7 timbl 278: ** NULL tag not found
279: ** else address of tag structure in dtd
1.2 timbl 280: */
1.40 frystyk 281: PRIVATE HTTag * SGMLFindTag (const SGML_dtd* dtd, const char * string)
1.2 timbl 282: {
283: int high, low, i, diff;
284: for(low=0, high=dtd->number_of_tags;
285: high > low ;
286: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
287: i = (low + (high-low)/2);
1.3 timbl 288: diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 289: if (diff==0) { /* success: found it */
1.7 timbl 290: return &dtd->tags[i];
1.2 timbl 291: }
292: }
1.7 timbl 293: return NULL;
1.2 timbl 294: }
295:
296: /*________________________________________________________________________
297: ** Public Methods
1.1 timbl 298: */
299:
1.2 timbl 300:
301: /* Could check that we are back to bottom of stack! @@ */
1.40 frystyk 302: PRIVATE int SGML_flush (HTStream * context)
1.26 frystyk 303: {
304: while (context->element_stack) {
305: HTElement *ptr = context->element_stack;
306: if (SGML_TRACE)
1.37 eric 307: HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 308: context->element_stack->tag->name);
309: context->element_stack = ptr->next;
1.36 frystyk 310: HT_FREE(ptr);
1.26 frystyk 311: }
312: return (*context->actions->flush)(context->target);
313: }
1.1 timbl 314:
1.40 frystyk 315: PRIVATE int SGML_free (HTStream * context)
1.8 timbl 316: {
1.26 frystyk 317: int status;
1.14 frystyk 318: int cnt;
1.15 frystyk 319: while (context->element_stack) { /* Make sure, that all tags are gone */
320: HTElement *ptr = context->element_stack;
321:
1.26 frystyk 322: if (SGML_TRACE)
1.37 eric 323: HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 324: context->element_stack->tag->name);
1.15 frystyk 325: context->element_stack = ptr->next;
1.36 frystyk 326: HT_FREE(ptr);
1.15 frystyk 327: }
1.26 frystyk 328: if ((status = (*context->actions->_free)(context->target)) != HT_OK)
329: return status;
1.33 frystyk 330: HTChunk_delete(context->string);
1.15 frystyk 331: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
1.14 frystyk 332: if(context->value[cnt])
1.36 frystyk 333: HT_FREE(context->value[cnt]);
334: HT_FREE(context);
1.26 frystyk 335: return HT_OK;
1.1 timbl 336: }
337:
1.40 frystyk 338: PRIVATE int SGML_abort (HTStream * context, HTList * e)
1.1 timbl 339: {
1.14 frystyk 340: int cnt;
1.15 frystyk 341: while (context->element_stack) { /* Make sure, that all tags are gone */
342: HTElement *ptr = context->element_stack;
1.26 frystyk 343: if (SGML_TRACE)
1.37 eric 344: HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 345: context->element_stack->tag->name);
1.15 frystyk 346: context->element_stack = ptr->next;
1.36 frystyk 347: HT_FREE(ptr);
1.15 frystyk 348: }
1.8 timbl 349: (*context->actions->abort)(context->target, e);
1.33 frystyk 350: HTChunk_delete(context->string);
1.14 frystyk 351: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
352: if(context->value[cnt])
1.36 frystyk 353: HT_FREE(context->value[cnt]);
354: HT_FREE(context);
1.26 frystyk 355: return HT_ERROR;
1.1 timbl 356: }
357:
1.41 ! frystyk 358: PRIVATE int SGML_write (HTStream * context, const char * b, int l)
1.1 timbl 359: {
1.38 frystyk 360: const SGML_dtd *dtd = context->dtd;
1.1 timbl 361: HTChunk *string = context->string;
362:
1.41 ! frystyk 363: while (l-- > 0) {
! 364: char c = *b++;
! 365: switch(context->state) {
1.18 timbl 366:
1.41 ! frystyk 367: case S_after_open: /* Strip one trainling newline
! 368: only after opening nonempty element. - SGML: Ugh! */
1.18 timbl 369: if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
370: break;
371: }
372: context->state = S_text;
373: goto normal_text;
374: /* (***falls through***) */
375:
1.1 timbl 376: case S_text:
1.18 timbl 377: normal_text:
378:
1.13 timbl 379: #ifdef ISO_2022_JP
380: if (c=='\033') {
381: context->state = S_esc;
382: PUTC(c);
383: break;
384: }
385: #endif /* ISO_2022_JP */
1.6 timbl 386: if (c=='&' && (!context->element_stack || (
387: context->element_stack->tag &&
388: ( context->element_stack->tag->contents == SGML_MIXED
389: || context->element_stack->tag->contents ==
390: SGML_RCDATA)
391: ))) {
1.1 timbl 392: string->size = 0;
393: context->state = S_ero;
394:
395: } else if (c=='<') {
396: string->size = 0;
397: context->state = (context->element_stack &&
1.13 timbl 398: context->element_stack->tag &&
399: context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 400: S_literal : S_tag;
1.18 timbl 401: } else if (c=='\n') { /* Newline - ignore if before tag end! */
402: context->state = S_nl;
1.2 timbl 403: } else PUTC(c);
1.1 timbl 404: break;
1.13 timbl 405:
1.18 timbl 406: case S_nl:
407: if (c=='<') {
408: string->size = 0;
409: context->state = (context->element_stack &&
410: context->element_stack->tag &&
411: context->element_stack->tag->contents == SGML_LITERAL) ?
412: S_literal : S_nl_tago;
413: } else {
414: PUTC('\n');
415: context->state = S_text;
416: goto normal_text;
417: }
418: break;
419:
420: case S_nl_tago: /* Had newline and tag opener */
421: if (c != '/') {
422: PUTC('\n'); /* Only ignore newline before </ */
423: }
424: context->state = S_tag;
425: goto handle_S_tag;
426:
1.13 timbl 427: #ifdef ISO_2022_JP
428: case S_esc:
429: if (c=='$') {
430: context->state = S_dollar;
431: } else if (c=='(') {
432: context->state = S_paren;
433: } else {
434: context->state = S_text;
435: }
436: PUTC(c);
437: break;
438: case S_dollar:
439: if (c=='@' || c=='B') {
440: context->state = S_nonascii_text;
441: } else {
442: context->state = S_text;
443: }
444: PUTC(c);
445: break;
446: case S_paren:
447: if (c=='B' || c=='J') {
448: context->state = S_text;
449: } else {
450: context->state = S_text;
451: }
452: PUTC(c);
453: break;
454: case S_nonascii_text:
455: if (c=='\033') {
456: context->state = S_esc;
457: PUTC(c);
458: } else {
459: PUTC(c);
460: }
461: break;
462: #endif /* ISO_2022_JP */
1.1 timbl 463:
1.12 timbl 464: /* In literal mode, waits only for specific end tag!
1.2 timbl 465: ** Only foir compatibility with old servers.
1.1 timbl 466: */
1.12 timbl 467: case S_literal :
1.33 frystyk 468: HTChunk_putc(string, c);
1.1 timbl 469: if ( TOUPPER(c) != ((string->size ==1) ? '/'
470: : context->element_stack->tag->name[string->size-2])) {
471: int i;
472:
1.12 timbl 473: /* If complete match, end literal */
1.1 timbl 474: if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
475: end_element(context, context->element_stack->tag);
476: string->size = 0;
1.2 timbl 477: context->current_attribute_number = INVALID;
1.1 timbl 478: context->state = S_text;
479: break;
480: } /* If Mismatch: recover string. */
1.2 timbl 481: PUTC( '<');
1.1 timbl 482: for (i=0; i<string->size; i++) /* recover */
1.2 timbl 483: PUTC(
1.1 timbl 484: string->data[i]);
485: context->state = S_text;
486: }
487:
488: break;
489:
490: /* Character reference or Entity
491: */
492: case S_ero:
493: if (c=='#') {
494: context->state = S_cro; /* &# is Char Ref Open */
495: break;
496: }
497: context->state = S_entity; /* Fall through! */
498:
499: /* Handle Entities
500: */
501: case S_entity:
502: if (isalnum(c))
1.33 frystyk 503: HTChunk_putc(string, c);
1.1 timbl 504: else {
1.33 frystyk 505: HTChunk_terminate(string);
1.1 timbl 506: handle_entity(context, c);
507: context->state = S_text;
508: }
509: break;
510:
511: /* Character reference
512: */
513: case S_cro:
514: if (isalnum(c))
1.33 frystyk 515: HTChunk_putc(string, c); /* accumulate a character NUMBER */
1.1 timbl 516: else {
517: int value;
1.33 frystyk 518: HTChunk_terminate(string);
1.1 timbl 519: if (sscanf(string->data, "%d", &value)==1)
1.28 frystyk 520: PUTC((char) value);
1.1 timbl 521: context->state = S_text;
522: }
523: break;
524:
525: /* Tag
526: */
527: case S_tag: /* new tag */
1.18 timbl 528: handle_S_tag:
529:
1.1 timbl 530: if (isalnum(c))
1.33 frystyk 531: HTChunk_putc(string, c);
1.1 timbl 532: else { /* End of tag name */
1.7 timbl 533: HTTag * t;
1.1 timbl 534: if (c=='/') {
1.20 frystyk 535: if (SGML_TRACE) if (string->size!=0)
1.41 ! frystyk 536: HTTrace("SGML Parser. `<%s/' found!\n", string->data);
1.1 timbl 537: context->state = S_end;
538: break;
539: }
1.33 frystyk 540: HTChunk_terminate(string) ;
1.2 timbl 541:
1.10 timbl 542: t = SGMLFindTag(dtd, string->data);
1.7 timbl 543: if (!t) {
1.41 ! frystyk 544: if(SGML_TRACE) HTTrace("SGML Parser. *** Unknown element %s\n",
1.1 timbl 545: string->data);
546: context->state = (c=='>') ? S_text : S_junk_tag;
547: break;
548: }
1.7 timbl 549: context->current_tag = t;
1.2 timbl 550:
551: /* Clear out attributes
552: */
1.1 timbl 553:
1.2 timbl 554: {
555: int i;
556: for (i=0; i< context->current_tag->number_of_attributes; i++)
557: context->present[i] = NO;
1.1 timbl 558: }
559: string->size = 0;
1.2 timbl 560: context->current_attribute_number = INVALID;
1.1 timbl 561:
562: if (c=='>') {
563: if (context->current_tag->name) start_element(context);
1.18 timbl 564: context->state = S_after_open;
1.1 timbl 565: } else {
566: context->state = S_tag_gap;
567: }
568: }
569: break;
570:
571:
572: case S_tag_gap: /* Expecting attribute or > */
573: if (WHITE(c)) break; /* Gap between attributes */
574: if (c=='>') { /* End of tag */
575: if (context->current_tag->name) start_element(context);
1.18 timbl 576: context->state = S_after_open;
1.1 timbl 577: break;
578: }
1.33 frystyk 579: HTChunk_putc(string, c);
1.1 timbl 580: context->state = S_attr; /* Get attribute */
581: break;
582:
583: /* accumulating value */
584: case S_attr:
585: if (WHITE(c) || (c=='>') || (c=='=')) { /* End of word */
1.33 frystyk 586: HTChunk_terminate(string) ;
1.1 timbl 587: handle_attribute_name(context, string->data);
588: string->size = 0;
589: if (c=='>') { /* End of tag */
590: if (context->current_tag->name) start_element(context);
1.18 timbl 591: context->state = S_after_open;
1.1 timbl 592: break;
593: }
594: context->state = (c=='=' ? S_equals: S_attr_gap);
595: } else {
1.33 frystyk 596: HTChunk_putc(string, c);
1.1 timbl 597: }
598: break;
599:
600: case S_attr_gap: /* Expecting attribute or = or > */
601: if (WHITE(c)) break; /* Gap after attribute */
602: if (c=='>') { /* End of tag */
603: if (context->current_tag->name) start_element(context);
1.18 timbl 604: context->state = S_after_open;
1.1 timbl 605: break;
606: } else if (c=='=') {
607: context->state = S_equals;
608: break;
609: }
1.33 frystyk 610: HTChunk_putc(string, c);
1.1 timbl 611: context->state = S_attr; /* Get next attribute */
612: break;
613:
614: case S_equals: /* After attr = */
615: if (WHITE(c)) break; /* Before attribute value */
616: if (c=='>') { /* End of tag */
1.41 ! frystyk 617: if (SGML_TRACE) HTTrace("SGML Parser. found = but no value\n");
1.1 timbl 618: if (context->current_tag->name) start_element(context);
1.18 timbl 619: context->state = S_after_open;
1.1 timbl 620: break;
621:
622: } else if (c=='\'') {
623: context->state = S_squoted;
624: break;
625:
626: } else if (c=='"') {
627: context->state = S_dquoted;
628: break;
629: }
1.33 frystyk 630: HTChunk_putc(string, c);
1.1 timbl 631: context->state = S_value;
632: break;
633:
634: case S_value:
635: if (WHITE(c) || (c=='>')) { /* End of word */
1.33 frystyk 636: HTChunk_terminate(string) ;
1.1 timbl 637: handle_attribute_value(context, string->data);
638: string->size = 0;
639: if (c=='>') { /* End of tag */
640: if (context->current_tag->name) start_element(context);
1.18 timbl 641: context->state = S_after_open;
1.1 timbl 642: break;
643: }
644: else context->state = S_tag_gap;
645: } else {
1.33 frystyk 646: HTChunk_putc(string, c);
1.1 timbl 647: }
648: break;
649:
650: case S_squoted: /* Quoted attribute value */
651: if (c=='\'') { /* End of attribute value */
1.33 frystyk 652: HTChunk_terminate(string) ;
1.1 timbl 653: handle_attribute_value(context, string->data);
654: string->size = 0;
655: context->state = S_tag_gap;
656: } else {
1.33 frystyk 657: HTChunk_putc(string, c);
1.1 timbl 658: }
659: break;
660:
661: case S_dquoted: /* Quoted attribute value */
662: if (c=='"') { /* End of attribute value */
1.33 frystyk 663: HTChunk_terminate(string) ;
1.1 timbl 664: handle_attribute_value(context, string->data);
665: string->size = 0;
666: context->state = S_tag_gap;
667: } else {
1.33 frystyk 668: HTChunk_putc(string, c);
1.1 timbl 669: }
670: break;
671:
672: case S_end: /* </ */
673: if (isalnum(c))
1.33 frystyk 674: HTChunk_putc(string, c);
1.1 timbl 675: else { /* End of end tag name */
1.7 timbl 676: HTTag * t;
1.33 frystyk 677: HTChunk_terminate(string) ;
1.7 timbl 678: if (!*string->data) { /* Empty end tag */
679: t = context->element_stack->tag;
680: } else {
1.10 timbl 681: t = SGMLFindTag(dtd, string->data);
1.1 timbl 682: }
1.7 timbl 683: if (!t) {
1.37 eric 684: if(SGML_TRACE) HTTrace(
1.1 timbl 685: "Unknown end tag </%s>\n", string->data);
1.2 timbl 686: } else {
1.7 timbl 687: context->current_tag = t;
1.2 timbl 688: end_element( context, context->current_tag);
1.1 timbl 689: }
1.2 timbl 690:
1.1 timbl 691: string->size = 0;
1.2 timbl 692: context->current_attribute_number = INVALID;
1.7 timbl 693: if (c!='>') {
1.20 frystyk 694: if (SGML_TRACE && !WHITE(c))
1.41 ! frystyk 695: HTTrace("SGML Parser. `</%s%c' found!\n",
1.7 timbl 696: string->data, c);
697: context->state = S_junk_tag;
698: } else {
699: context->state = S_text;
700: }
1.1 timbl 701: }
702: break;
703:
704:
705: case S_junk_tag:
706: if (c=='>') {
707: context->state = S_text;
708: }
709: } /* switch on context->state */
1.41 ! frystyk 710: }
1.26 frystyk 711: return HT_OK;
712: }
1.2 timbl 713:
714:
1.40 frystyk 715: PRIVATE int SGML_string (HTStream * context, const char* s)
1.2 timbl 716: {
1.41 ! frystyk 717: return SGML_write(context, s, (int) strlen(s));
1.2 timbl 718: }
719:
720:
1.41 ! frystyk 721: PRIVATE int SGML_character (HTStream * context, char c)
1.2 timbl 722: {
1.41 ! frystyk 723: return SGML_write(context, &c, 1);
1.2 timbl 724: }
725:
726: /*_______________________________________________________________________
727: */
728:
729: /* Structured Object Class
730: ** -----------------------
731: */
1.38 frystyk 732: PRIVATE const HTStreamClass SGMLParser =
1.2 timbl 733: {
1.32 frystyk 734: "SGMLParser",
735: SGML_flush,
736: SGML_free,
737: SGML_abort,
738: SGML_character,
739: SGML_string,
740: SGML_write,
1.2 timbl 741: };
742:
743: /* Create SGML Engine
744: ** ------------------
745: **
746: ** On entry,
747: ** dtd represents the DTD, along with
748: ** actions is the sink for the data as a set of routines.
749: **
750: */
1.38 frystyk 751: PUBLIC HTStream * SGML_new (const SGML_dtd * dtd, HTStructured * target)
1.2 timbl 752: {
753: int i;
1.36 frystyk 754: HTStream* context;
1.40 frystyk 755: if ((context = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
1.36 frystyk 756: HT_OUTOFMEM("SGML_begin");
1.2 timbl 757:
758: context->isa = &SGMLParser;
1.33 frystyk 759: context->string = HTChunk_new(128); /* Grow by this much */
1.2 timbl 760: context->dtd = dtd;
761: context->target = target;
762: context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
763: /* Ugh: no OO */
764: context->state = S_text;
765: context->element_stack = 0; /* empty */
766: for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
767:
768: return context;
769: }
Webmaster