Annotation of libwww/Library/src/SGML.c, revision 1.37
1.23 frystyk 1: /* SGML.c
2: ** GENERAL SGML PARSER CODE
3: **
1.27 frystyk 4: ** (c) COPYRIGHT MIT 1995.
1.23 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
1.2 timbl 7: ** This module implements an HTStream object. To parse an
1.1 timbl 8: ** SGML file, create this object which is a parser. The object
1.2 timbl 9: ** is (currently) created by being passed a DTD structure,
10: ** and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 11: **
1.19 duns 12: ** 6 Feb 93 Binary seraches used. Intreface modified.
13: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.1 timbl 14: */
15:
1.25 frystyk 16: /* Library include files */
17: #include "tcp.h"
1.1 timbl 18: #include "HTUtils.h"
1.25 frystyk 19: #include "HTString.h"
1.1 timbl 20: #include "HTChunk.h"
1.20 frystyk 21: #include "SGML.h"
1.1 timbl 22:
1.2 timbl 23: #define INVALID (-1)
24:
1.1 timbl 25: /* The State (context) of the parser
26: **
1.2 timbl 27: ** This is passed with each call to make the parser reentrant
1.1 timbl 28: **
29: */
30:
1.16 frystyk 31:
1.2 timbl 32:
33:
34: /* Element Stack
35: ** -------------
36: ** This allows us to return down the stack reselcting styles.
37: ** As we return, attribute values will be garbage in general.
38: */
39: typedef struct _HTElement HTElement;
40: struct _HTElement {
41: HTElement * next; /* Previously nested element or 0 */
42: HTTag* tag; /* The tag at this level */
43: };
44:
45:
1.21 frystyk 46: typedef enum _sgml_state {
47: S_text, S_literal, S_tag, S_tag_gap,
48: S_attr, S_attr_gap, S_equals, S_value, S_after_open,
49: S_nl, S_nl_tago,
50: S_ero, S_cro,
51: #ifdef ISO_2022_JP
52: S_esc, S_dollar, S_paren, S_nonascii_text,
53: #endif
54: S_squoted, S_dquoted, S_end, S_entity, S_junk_tag
55: } sgml_state;
56:
57:
1.2 timbl 58: /* Internal Context Data Structure
59: ** -------------------------------
60: */
61: struct _HTStream {
62:
63: CONST HTStreamClass * isa; /* inherited from HTStream */
64:
65: CONST SGML_dtd *dtd;
66: HTStructuredClass *actions; /* target class */
67: HTStructured *target; /* target object */
68:
1.1 timbl 69: HTTag *current_tag;
1.2 timbl 70: int current_attribute_number;
1.1 timbl 71: HTChunk *string;
72: HTElement *element_stack;
1.21 frystyk 73: sgml_state state;
1.2 timbl 74: #ifdef CALLERDATA
1.1 timbl 75: void * callerData;
1.2 timbl 76: #endif
77: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
78: char * value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
79: } ;
80:
81:
82: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
83:
1.1 timbl 84:
1.17 timbl 85: /* Find Attribute Number
86: ** ---------------------
87: */
88:
1.31 frystyk 89: PUBLIC int SGMLFindAttribute (HTTag* tag, CONST char * s)
1.17 timbl 90: {
91: attr* attributes = tag->attributes;
92:
93: int high, low, i, diff; /* Binary search for attribute name */
94: for(low=0, high=tag->number_of_attributes;
95: high > low ;
96: diff < 0 ? (low = i+1) : (high = i) ) {
97: i = (low + (high-low)/2);
98: diff = strcasecomp(attributes[i].name, s);
99: if (diff==0) return i; /* success: found it */
100: } /* for */
101:
102: return -1;
103: }
104:
1.1 timbl 105:
106: /* Handle Attribute
107: ** ----------------
108: */
109: /* PUBLIC CONST char * SGML_default = ""; ?? */
110:
1.31 frystyk 111: PRIVATE void handle_attribute_name (HTStream * context, CONST char * s)
1.1 timbl 112: {
1.2 timbl 113:
114: HTTag * tag = context->current_tag;
115:
1.17 timbl 116: int i = SGMLFindAttribute(tag, s);
117: if (i>=0) {
118: context->current_attribute_number = i;
119: context->present[i] = YES;
120: if (context->value[i]) {
1.36 frystyk 121: HT_FREE(context->value[i]);
1.17 timbl 122: context->value[i] = NULL;
123: }
124: return;
125: } /* if */
1.2 timbl 126:
1.20 frystyk 127: if (SGML_TRACE)
1.37 ! eric 128: HTTrace("SGML: Unknown attribute %s for tag %s\n",
1.2 timbl 129: s, context->current_tag->name);
130: context->current_attribute_number = INVALID; /* Invalid */
1.1 timbl 131: }
132:
133:
134: /* Handle attribute value
135: ** ----------------------
136: */
1.31 frystyk 137: PRIVATE void handle_attribute_value (HTStream * context, CONST char * s)
1.1 timbl 138: {
1.2 timbl 139: if (context->current_attribute_number != INVALID) {
140: StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 141: } else {
1.37 ! eric 142: if (SGML_TRACE) HTTrace("SGML: Attribute value %s ignored\n", s);
1.1 timbl 143: }
1.2 timbl 144: context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 145: }
146:
1.2 timbl 147:
1.1 timbl 148: /* Handle entity
149: ** -------------
150: **
151: ** On entry,
152: ** s contains the entity name zero terminated
153: ** Bugs:
154: ** If the entity name is unknown, the terminator is treated as
155: ** a printable non-special character in all cases, even if it is '<'
156: */
1.31 frystyk 157: PRIVATE void handle_entity (HTStream * context, char term)
1.1 timbl 158: {
1.2 timbl 159:
1.3 timbl 160: CONST char ** entities = context->dtd->entity_names;
1.1 timbl 161: CONST char *s = context->string->data;
1.2 timbl 162:
163: int high, low, i, diff;
164: for(low=0, high = context->dtd->number_of_entities;
165: high > low ;
166: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
167: i = (low + (high-low)/2);
168: diff = strcmp(entities[i], s); /* Csse sensitive! */
169: if (diff==0) { /* success: found it */
170: (*context->actions->put_entity)(context->target, i);
171: return;
1.1 timbl 172: }
173: }
174: /* If entity string not found, display as text */
1.20 frystyk 175: if (SGML_TRACE)
1.37 ! eric 176: HTTrace("SGML: Unknown entity %s\n", s);
1.2 timbl 177: PUTC('&');
1.1 timbl 178: {
179: CONST char *p;
180: for (p=s; *p; p++) {
1.2 timbl 181: PUTC(*p);
1.1 timbl 182: }
183: }
1.2 timbl 184: PUTC(term);
1.1 timbl 185: }
186:
1.35 frystyk 187: /*
188: ** Helper function to check if the tag is on the stack
189: */
190: PRIVATE BOOL lookup_element_stack (HTElement* stack, HTTag *tag)
191: {
192: HTElement* elem;
193: for (elem = stack; elem != NULL; elem = elem->next)
194: {
195: if (elem->tag == tag) return YES;
196: }
197: return NO;
198: }
1.2 timbl 199:
1.1 timbl 200: /* End element
1.2 timbl 201: ** -----------
1.1 timbl 202: */
1.31 frystyk 203: PRIVATE void end_element (HTStream * context, HTTag * old_tag)
1.1 timbl 204: {
1.37 ! eric 205: if (SGML_TRACE) HTTrace("SGML: End </%s>\n", old_tag->name);
1.2 timbl 206: if (old_tag->contents == SGML_EMPTY) {
1.37 ! eric 207: if (SGML_TRACE) HTTrace("SGML: Illegal end tag </%s> found.\n",
1.1 timbl 208: old_tag->name);
209: return;
210: }
211: while (context->element_stack) {/* Loop is error path only */
212: HTElement * N = context->element_stack;
213: HTTag * t = N->tag;
214:
215: if (old_tag != t) { /* Mismatch: syntax error */
1.35 frystyk 216: /*
217: ** Patch from Maciej Puzio, puzio@laser.mimuw.edu.pl
218: ** See explanation in ../User/Patch/lib_4.0_1.fix
219: */
220: if (context->element_stack->next /* This is not the last level */
221: && lookup_element_stack(context->element_stack, old_tag)) {
1.37 ! eric 222: if (SGML_TRACE) HTTrace(
1.1 timbl 223: "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
224: old_tag->name, t->name, t->name);
225: } else { /* last level */
1.37 ! eric 226: if (SGML_TRACE) HTTrace(
1.1 timbl 227: "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
228: old_tag->name, t->name, old_tag->name);
229: return; /* Ignore */
230: }
231: }
232:
233: context->element_stack = N->next; /* Remove from stack */
1.36 frystyk 234: HT_FREE(N);
1.2 timbl 235: (*context->actions->end_element)(context->target,
236: t - context->dtd->tags);
1.1 timbl 237: if (old_tag == t) return; /* Correct sequence */
238:
239: /* Syntax error path only */
240:
241: }
1.37 ! eric 242: if (SGML_TRACE) HTTrace(
1.1 timbl 243: "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
244: }
245:
246:
1.17 timbl 247: /* Start an element
248: ** ----------------
1.1 timbl 249: */
1.31 frystyk 250: PRIVATE void start_element (HTStream * context)
1.1 timbl 251: {
252: HTTag * new_tag = context->current_tag;
253:
1.37 ! eric 254: if (SGML_TRACE) HTTrace("SGML: Start <%s>\n", new_tag->name);
1.2 timbl 255: (*context->actions->start_element)(
256: context->target,
257: new_tag - context->dtd->tags,
258: context->present,
1.3 timbl 259: (CONST char**) context->value); /* coerce type for think c */
1.2 timbl 260: if (new_tag->contents != SGML_EMPTY) { /* i.e. tag not empty */
1.36 frystyk 261: HTElement * N;
262: if ((N = (HTElement *) HT_MALLOC(sizeof(HTElement))) == NULL)
263: HT_OUTOFMEM("start_element");
1.1 timbl 264: N->next = context->element_stack;
265: N->tag = new_tag;
266: context->element_stack = N;
267: }
268: }
269:
270:
1.2 timbl 271: /* Find Tag in DTD tag list
272: ** ------------------------
1.1 timbl 273: **
274: ** On entry,
1.2 timbl 275: ** dtd points to dtd structire including valid tag list
276: ** string points to name of tag in question
1.1 timbl 277: **
1.2 timbl 278: ** On exit,
279: ** returns:
1.7 timbl 280: ** NULL tag not found
281: ** else address of tag structure in dtd
1.2 timbl 282: */
1.31 frystyk 283: PUBLIC HTTag * SGMLFindTag (CONST SGML_dtd* dtd, CONST char * string)
1.2 timbl 284: {
285: int high, low, i, diff;
286: for(low=0, high=dtd->number_of_tags;
287: high > low ;
288: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
289: i = (low + (high-low)/2);
1.3 timbl 290: diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 291: if (diff==0) { /* success: found it */
1.7 timbl 292: return &dtd->tags[i];
1.2 timbl 293: }
294: }
1.7 timbl 295: return NULL;
1.2 timbl 296: }
297:
298: /*________________________________________________________________________
299: ** Public Methods
1.1 timbl 300: */
301:
1.2 timbl 302:
303: /* Could check that we are back to bottom of stack! @@ */
1.31 frystyk 304: PUBLIC int SGML_flush (HTStream * context)
1.26 frystyk 305: {
306: while (context->element_stack) {
307: HTElement *ptr = context->element_stack;
308: if (SGML_TRACE)
1.37 ! eric 309: HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 310: context->element_stack->tag->name);
311: context->element_stack = ptr->next;
1.36 frystyk 312: HT_FREE(ptr);
1.26 frystyk 313: }
314: return (*context->actions->flush)(context->target);
315: }
1.1 timbl 316:
1.31 frystyk 317: PUBLIC int SGML_free (HTStream * context)
1.8 timbl 318: {
1.26 frystyk 319: int status;
1.14 frystyk 320: int cnt;
1.15 frystyk 321: while (context->element_stack) { /* Make sure, that all tags are gone */
322: HTElement *ptr = context->element_stack;
323:
1.26 frystyk 324: if (SGML_TRACE)
1.37 ! eric 325: HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 326: context->element_stack->tag->name);
1.15 frystyk 327: context->element_stack = ptr->next;
1.36 frystyk 328: HT_FREE(ptr);
1.15 frystyk 329: }
1.26 frystyk 330: if ((status = (*context->actions->_free)(context->target)) != HT_OK)
331: return status;
1.33 frystyk 332: HTChunk_delete(context->string);
1.15 frystyk 333: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
1.14 frystyk 334: if(context->value[cnt])
1.36 frystyk 335: HT_FREE(context->value[cnt]);
336: HT_FREE(context);
1.26 frystyk 337: return HT_OK;
1.1 timbl 338: }
339:
1.31 frystyk 340: PUBLIC int SGML_abort (HTStream * context, HTList * e)
1.1 timbl 341: {
1.14 frystyk 342: int cnt;
1.15 frystyk 343: while (context->element_stack) { /* Make sure, that all tags are gone */
344: HTElement *ptr = context->element_stack;
1.26 frystyk 345: if (SGML_TRACE)
1.37 ! eric 346: HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 347: context->element_stack->tag->name);
1.15 frystyk 348: context->element_stack = ptr->next;
1.36 frystyk 349: HT_FREE(ptr);
1.15 frystyk 350: }
1.8 timbl 351: (*context->actions->abort)(context->target, e);
1.33 frystyk 352: HTChunk_delete(context->string);
1.14 frystyk 353: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
354: if(context->value[cnt])
1.36 frystyk 355: HT_FREE(context->value[cnt]);
356: HT_FREE(context);
1.26 frystyk 357: return HT_ERROR;
1.1 timbl 358: }
359:
1.2 timbl 360:
1.1 timbl 361: /* Read and write user callback handle
362: ** -----------------------------------
363: **
364: ** The callbacks from the SGML parser have an SGML context parameter.
365: ** These calls allow the caller to associate his own context with a
366: ** particular SGML context.
367: */
368:
1.2 timbl 369: #ifdef CALLERDATA
1.31 frystyk 370: PUBLIC void* SGML_callerData (HTStream * context)
1.1 timbl 371: {
372: return context->callerData;
373: }
374:
1.31 frystyk 375: PUBLIC void SGML_setCallerData (HTStream * context, void* data)
1.1 timbl 376: {
377: context->callerData = data;
378: }
1.34 frystyk 379: #else
380: #ifdef WWW_WIN_DLL
381: PUBLIC void * SGML_callerData (HTStream * context) {return NULL;}
382: PUBLIC void SGML_setCallerData (HTStream * context, void* data) {}
383: #endif /* WWW_WIN_DLL */
384: #endif /* CALLERDATA */
1.1 timbl 385:
1.31 frystyk 386: PUBLIC int SGML_character (HTStream * context, char c)
1.1 timbl 387:
388: {
1.2 timbl 389: CONST SGML_dtd *dtd = context->dtd;
1.1 timbl 390: HTChunk *string = context->string;
391:
392: switch(context->state) {
1.18 timbl 393:
394: case S_after_open: /* Strip one trainling newline
395: only after opening nonempty element. - SGML:Ugh! */
396: if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
397: break;
398: }
399: context->state = S_text;
400: goto normal_text;
401: /* (***falls through***) */
402:
1.1 timbl 403: case S_text:
1.18 timbl 404: normal_text:
405:
1.13 timbl 406: #ifdef ISO_2022_JP
407: if (c=='\033') {
408: context->state = S_esc;
409: PUTC(c);
410: break;
411: }
412: #endif /* ISO_2022_JP */
1.6 timbl 413: if (c=='&' && (!context->element_stack || (
414: context->element_stack->tag &&
415: ( context->element_stack->tag->contents == SGML_MIXED
416: || context->element_stack->tag->contents ==
417: SGML_RCDATA)
418: ))) {
1.1 timbl 419: string->size = 0;
420: context->state = S_ero;
421:
422: } else if (c=='<') {
423: string->size = 0;
424: context->state = (context->element_stack &&
1.13 timbl 425: context->element_stack->tag &&
426: context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 427: S_literal : S_tag;
1.18 timbl 428: } else if (c=='\n') { /* Newline - ignore if before tag end! */
429: context->state = S_nl;
1.2 timbl 430: } else PUTC(c);
1.1 timbl 431: break;
1.13 timbl 432:
1.18 timbl 433: case S_nl:
434: if (c=='<') {
435: string->size = 0;
436: context->state = (context->element_stack &&
437: context->element_stack->tag &&
438: context->element_stack->tag->contents == SGML_LITERAL) ?
439: S_literal : S_nl_tago;
440: } else {
441: PUTC('\n');
442: context->state = S_text;
443: goto normal_text;
444: }
445: break;
446:
447: case S_nl_tago: /* Had newline and tag opener */
448: if (c != '/') {
449: PUTC('\n'); /* Only ignore newline before </ */
450: }
451: context->state = S_tag;
452: goto handle_S_tag;
453:
1.13 timbl 454: #ifdef ISO_2022_JP
455: case S_esc:
456: if (c=='$') {
457: context->state = S_dollar;
458: } else if (c=='(') {
459: context->state = S_paren;
460: } else {
461: context->state = S_text;
462: }
463: PUTC(c);
464: break;
465: case S_dollar:
466: if (c=='@' || c=='B') {
467: context->state = S_nonascii_text;
468: } else {
469: context->state = S_text;
470: }
471: PUTC(c);
472: break;
473: case S_paren:
474: if (c=='B' || c=='J') {
475: context->state = S_text;
476: } else {
477: context->state = S_text;
478: }
479: PUTC(c);
480: break;
481: case S_nonascii_text:
482: if (c=='\033') {
483: context->state = S_esc;
484: PUTC(c);
485: } else {
486: PUTC(c);
487: }
488: break;
489: #endif /* ISO_2022_JP */
1.1 timbl 490:
1.12 timbl 491: /* In literal mode, waits only for specific end tag!
1.2 timbl 492: ** Only foir compatibility with old servers.
1.1 timbl 493: */
1.12 timbl 494: case S_literal :
1.33 frystyk 495: HTChunk_putc(string, c);
1.1 timbl 496: if ( TOUPPER(c) != ((string->size ==1) ? '/'
497: : context->element_stack->tag->name[string->size-2])) {
498: int i;
499:
1.12 timbl 500: /* If complete match, end literal */
1.1 timbl 501: if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
502: end_element(context, context->element_stack->tag);
503: string->size = 0;
1.2 timbl 504: context->current_attribute_number = INVALID;
1.1 timbl 505: context->state = S_text;
506: break;
507: } /* If Mismatch: recover string. */
1.2 timbl 508: PUTC( '<');
1.1 timbl 509: for (i=0; i<string->size; i++) /* recover */
1.2 timbl 510: PUTC(
1.1 timbl 511: string->data[i]);
512: context->state = S_text;
513: }
514:
515: break;
516:
517: /* Character reference or Entity
518: */
519: case S_ero:
520: if (c=='#') {
521: context->state = S_cro; /* &# is Char Ref Open */
522: break;
523: }
524: context->state = S_entity; /* Fall through! */
525:
526: /* Handle Entities
527: */
528: case S_entity:
529: if (isalnum(c))
1.33 frystyk 530: HTChunk_putc(string, c);
1.1 timbl 531: else {
1.33 frystyk 532: HTChunk_terminate(string);
1.1 timbl 533: handle_entity(context, c);
534: context->state = S_text;
535: }
536: break;
537:
538: /* Character reference
539: */
540: case S_cro:
541: if (isalnum(c))
1.33 frystyk 542: HTChunk_putc(string, c); /* accumulate a character NUMBER */
1.1 timbl 543: else {
544: int value;
1.33 frystyk 545: HTChunk_terminate(string);
1.1 timbl 546: if (sscanf(string->data, "%d", &value)==1)
1.28 frystyk 547: PUTC((char) value);
1.1 timbl 548: context->state = S_text;
549: }
550: break;
551:
552: /* Tag
553: */
554: case S_tag: /* new tag */
1.18 timbl 555: handle_S_tag:
556:
1.1 timbl 557: if (isalnum(c))
1.33 frystyk 558: HTChunk_putc(string, c);
1.1 timbl 559: else { /* End of tag name */
1.7 timbl 560: HTTag * t;
1.1 timbl 561: if (c=='/') {
1.20 frystyk 562: if (SGML_TRACE) if (string->size!=0)
1.37 ! eric 563: HTTrace("SGML: `<%s/' found!\n", string->data);
1.1 timbl 564: context->state = S_end;
565: break;
566: }
1.33 frystyk 567: HTChunk_terminate(string) ;
1.2 timbl 568:
1.10 timbl 569: t = SGMLFindTag(dtd, string->data);
1.7 timbl 570: if (!t) {
1.37 ! eric 571: if(SGML_TRACE) HTTrace("SGML: *** Unknown element %s\n",
1.1 timbl 572: string->data);
573: context->state = (c=='>') ? S_text : S_junk_tag;
574: break;
575: }
1.7 timbl 576: context->current_tag = t;
1.2 timbl 577:
578: /* Clear out attributes
579: */
1.1 timbl 580:
1.2 timbl 581: {
582: int i;
583: for (i=0; i< context->current_tag->number_of_attributes; i++)
584: context->present[i] = NO;
1.1 timbl 585: }
586: string->size = 0;
1.2 timbl 587: context->current_attribute_number = INVALID;
1.1 timbl 588:
589: if (c=='>') {
590: if (context->current_tag->name) start_element(context);
1.18 timbl 591: context->state = S_after_open;
1.1 timbl 592: } else {
593: context->state = S_tag_gap;
594: }
595: }
596: break;
597:
598:
599: case S_tag_gap: /* Expecting attribute or > */
600: if (WHITE(c)) break; /* Gap between attributes */
601: if (c=='>') { /* End of tag */
602: if (context->current_tag->name) start_element(context);
1.18 timbl 603: context->state = S_after_open;
1.1 timbl 604: break;
605: }
1.33 frystyk 606: HTChunk_putc(string, c);
1.1 timbl 607: context->state = S_attr; /* Get attribute */
608: break;
609:
610: /* accumulating value */
611: case S_attr:
612: if (WHITE(c) || (c=='>') || (c=='=')) { /* End of word */
1.33 frystyk 613: HTChunk_terminate(string) ;
1.1 timbl 614: handle_attribute_name(context, string->data);
615: string->size = 0;
616: if (c=='>') { /* End of tag */
617: if (context->current_tag->name) start_element(context);
1.18 timbl 618: context->state = S_after_open;
1.1 timbl 619: break;
620: }
621: context->state = (c=='=' ? S_equals: S_attr_gap);
622: } else {
1.33 frystyk 623: HTChunk_putc(string, c);
1.1 timbl 624: }
625: break;
626:
627: case S_attr_gap: /* Expecting attribute or = or > */
628: if (WHITE(c)) break; /* Gap after attribute */
629: if (c=='>') { /* End of tag */
630: if (context->current_tag->name) start_element(context);
1.18 timbl 631: context->state = S_after_open;
1.1 timbl 632: break;
633: } else if (c=='=') {
634: context->state = S_equals;
635: break;
636: }
1.33 frystyk 637: HTChunk_putc(string, c);
1.1 timbl 638: context->state = S_attr; /* Get next attribute */
639: break;
640:
641: case S_equals: /* After attr = */
642: if (WHITE(c)) break; /* Before attribute value */
643: if (c=='>') { /* End of tag */
1.37 ! eric 644: if (SGML_TRACE) HTTrace("SGML: found = but no value\n");
1.1 timbl 645: if (context->current_tag->name) start_element(context);
1.18 timbl 646: context->state = S_after_open;
1.1 timbl 647: break;
648:
649: } else if (c=='\'') {
650: context->state = S_squoted;
651: break;
652:
653: } else if (c=='"') {
654: context->state = S_dquoted;
655: break;
656: }
1.33 frystyk 657: HTChunk_putc(string, c);
1.1 timbl 658: context->state = S_value;
659: break;
660:
661: case S_value:
662: if (WHITE(c) || (c=='>')) { /* End of word */
1.33 frystyk 663: HTChunk_terminate(string) ;
1.1 timbl 664: handle_attribute_value(context, string->data);
665: string->size = 0;
666: if (c=='>') { /* End of tag */
667: if (context->current_tag->name) start_element(context);
1.18 timbl 668: context->state = S_after_open;
1.1 timbl 669: break;
670: }
671: else context->state = S_tag_gap;
672: } else {
1.33 frystyk 673: HTChunk_putc(string, c);
1.1 timbl 674: }
675: break;
676:
677: case S_squoted: /* Quoted attribute value */
678: if (c=='\'') { /* End of attribute value */
1.33 frystyk 679: HTChunk_terminate(string) ;
1.1 timbl 680: handle_attribute_value(context, string->data);
681: string->size = 0;
682: context->state = S_tag_gap;
683: } else {
1.33 frystyk 684: HTChunk_putc(string, c);
1.1 timbl 685: }
686: break;
687:
688: case S_dquoted: /* Quoted attribute value */
689: if (c=='"') { /* End of attribute value */
1.33 frystyk 690: HTChunk_terminate(string) ;
1.1 timbl 691: handle_attribute_value(context, string->data);
692: string->size = 0;
693: context->state = S_tag_gap;
694: } else {
1.33 frystyk 695: HTChunk_putc(string, c);
1.1 timbl 696: }
697: break;
698:
699: case S_end: /* </ */
700: if (isalnum(c))
1.33 frystyk 701: HTChunk_putc(string, c);
1.1 timbl 702: else { /* End of end tag name */
1.7 timbl 703: HTTag * t;
1.33 frystyk 704: HTChunk_terminate(string) ;
1.7 timbl 705: if (!*string->data) { /* Empty end tag */
706: t = context->element_stack->tag;
707: } else {
1.10 timbl 708: t = SGMLFindTag(dtd, string->data);
1.1 timbl 709: }
1.7 timbl 710: if (!t) {
1.37 ! eric 711: if(SGML_TRACE) HTTrace(
1.1 timbl 712: "Unknown end tag </%s>\n", string->data);
1.2 timbl 713: } else {
1.7 timbl 714: context->current_tag = t;
1.2 timbl 715: end_element( context, context->current_tag);
1.1 timbl 716: }
1.2 timbl 717:
1.1 timbl 718: string->size = 0;
1.2 timbl 719: context->current_attribute_number = INVALID;
1.7 timbl 720: if (c!='>') {
1.20 frystyk 721: if (SGML_TRACE && !WHITE(c))
1.37 ! eric 722: HTTrace("SGML: `</%s%c' found!\n",
1.7 timbl 723: string->data, c);
724: context->state = S_junk_tag;
725: } else {
726: context->state = S_text;
727: }
1.1 timbl 728: }
729: break;
730:
731:
732: case S_junk_tag:
733: if (c=='>') {
734: context->state = S_text;
735: }
736: } /* switch on context->state */
1.26 frystyk 737: return HT_OK;
738: }
1.2 timbl 739:
740:
1.31 frystyk 741: PUBLIC int SGML_string (HTStream * context, CONST char* s)
1.2 timbl 742: {
1.26 frystyk 743: while (*s)
744: SGML_character(context, *s++);
745: return HT_OK;
1.2 timbl 746: }
747:
748:
1.31 frystyk 749: PUBLIC int SGML_write (HTStream * context, CONST char* b, int l)
1.2 timbl 750: {
1.26 frystyk 751: while (l-- > 0)
752: SGML_character(context, *b++);
753: return HT_OK;
1.2 timbl 754: }
755:
756: /*_______________________________________________________________________
757: */
758:
759: /* Structured Object Class
760: ** -----------------------
761: */
1.32 frystyk 762: PRIVATE CONST HTStreamClass SGMLParser =
1.2 timbl 763: {
1.32 frystyk 764: "SGMLParser",
765: SGML_flush,
766: SGML_free,
767: SGML_abort,
768: SGML_character,
769: SGML_string,
770: SGML_write,
1.2 timbl 771: };
772:
773: /* Create SGML Engine
774: ** ------------------
775: **
776: ** On entry,
777: ** dtd represents the DTD, along with
778: ** actions is the sink for the data as a set of routines.
779: **
780: */
1.32 frystyk 781: PUBLIC HTStream * SGML_new (CONST SGML_dtd * dtd, HTStructured * target)
1.2 timbl 782: {
783: int i;
1.36 frystyk 784: HTStream* context;
785: if ((context = (HTStream *) HT_MALLOC(sizeof(*context))) == NULL)
786: HT_OUTOFMEM("SGML_begin");
1.2 timbl 787:
788: context->isa = &SGMLParser;
1.33 frystyk 789: context->string = HTChunk_new(128); /* Grow by this much */
1.2 timbl 790: context->dtd = dtd;
791: context->target = target;
792: context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
793: /* Ugh: no OO */
794: context->state = S_text;
795: context->element_stack = 0; /* empty */
796: #ifdef CALLERDATA
797: context->callerData = (void*) callerData;
798: #endif
799: for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
800:
801: return context;
802: }
Webmaster