Annotation of libwww/Library/src/SGML.c, revision 1.24
1.23 frystyk 1: /* SGML.c
2: ** GENERAL SGML PARSER CODE
3: **
4: ** (c) COPYRIGHT CERN 1994.
5: ** Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
1.2 timbl 7: ** This module implements an HTStream object. To parse an
1.1 timbl 8: ** SGML file, create this object which is a parser. The object
1.2 timbl 9: ** is (currently) created by being passed a DTD structure,
10: ** and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 11: **
1.19 duns 12: ** 6 Feb 93 Binary seraches used. Intreface modified.
13: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.1 timbl 14: */
15:
1.20 frystyk 16: /* System dependent stuff */
1.24 ! roeber 17: #include "sysdep.h"
1.20 frystyk 18:
19: /* Library includes */
1.1 timbl 20: #include "HTUtils.h"
21: #include "HTChunk.h"
1.20 frystyk 22: #include "SGML.h"
1.1 timbl 23:
1.2 timbl 24: #define INVALID (-1)
25:
1.1 timbl 26: /* The State (context) of the parser
27: **
1.2 timbl 28: ** This is passed with each call to make the parser reentrant
1.1 timbl 29: **
30: */
31:
1.16 frystyk 32:
1.2 timbl 33:
34:
35: /* Element Stack
36: ** -------------
37: ** This allows us to return down the stack reselcting styles.
38: ** As we return, attribute values will be garbage in general.
39: */
40: typedef struct _HTElement HTElement;
41: struct _HTElement {
42: HTElement * next; /* Previously nested element or 0 */
43: HTTag* tag; /* The tag at this level */
44: };
45:
46:
1.21 frystyk 47: typedef enum _sgml_state {
48: S_text, S_literal, S_tag, S_tag_gap,
49: S_attr, S_attr_gap, S_equals, S_value, S_after_open,
50: S_nl, S_nl_tago,
51: S_ero, S_cro,
52: #ifdef ISO_2022_JP
53: S_esc, S_dollar, S_paren, S_nonascii_text,
54: #endif
55: S_squoted, S_dquoted, S_end, S_entity, S_junk_tag
56: } sgml_state;
57:
58:
1.2 timbl 59: /* Internal Context Data Structure
60: ** -------------------------------
61: */
62: struct _HTStream {
63:
64: CONST HTStreamClass * isa; /* inherited from HTStream */
65:
66: CONST SGML_dtd *dtd;
67: HTStructuredClass *actions; /* target class */
68: HTStructured *target; /* target object */
69:
1.1 timbl 70: HTTag *current_tag;
1.2 timbl 71: int current_attribute_number;
1.1 timbl 72: HTChunk *string;
73: HTElement *element_stack;
1.21 frystyk 74: sgml_state state;
1.2 timbl 75: #ifdef CALLERDATA
1.1 timbl 76: void * callerData;
1.2 timbl 77: #endif
78: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
79: char * value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
80: } ;
81:
82:
83: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
84:
1.1 timbl 85:
1.17 timbl 86: /* Find Attribute Number
87: ** ---------------------
88: */
89:
90: PUBLIC int SGMLFindAttribute ARGS2 (HTTag*, tag, CONST char *, s)
91: {
92: attr* attributes = tag->attributes;
93:
94: int high, low, i, diff; /* Binary search for attribute name */
95: for(low=0, high=tag->number_of_attributes;
96: high > low ;
97: diff < 0 ? (low = i+1) : (high = i) ) {
98: i = (low + (high-low)/2);
99: diff = strcasecomp(attributes[i].name, s);
100: if (diff==0) return i; /* success: found it */
101: } /* for */
102:
103: return -1;
104: }
105:
1.1 timbl 106:
107: /* Handle Attribute
108: ** ----------------
109: */
110: /* PUBLIC CONST char * SGML_default = ""; ?? */
111:
1.21 frystyk 112: PRIVATE void handle_attribute_name ARGS2(HTStream *, context, CONST char *, s)
1.1 timbl 113: {
1.2 timbl 114:
115: HTTag * tag = context->current_tag;
116:
1.17 timbl 117: int i = SGMLFindAttribute(tag, s);
118: if (i>=0) {
119: context->current_attribute_number = i;
120: context->present[i] = YES;
121: if (context->value[i]) {
122: free(context->value[i]);
123: context->value[i] = NULL;
124: }
125: return;
126: } /* if */
1.2 timbl 127:
1.20 frystyk 128: if (SGML_TRACE)
1.2 timbl 129: fprintf(stderr, "SGML: Unknown attribute %s for tag %s\n",
130: s, context->current_tag->name);
131: context->current_attribute_number = INVALID; /* Invalid */
1.1 timbl 132: }
133:
134:
135: /* Handle attribute value
136: ** ----------------------
137: */
1.21 frystyk 138: PRIVATE void handle_attribute_value ARGS2(HTStream *, context, CONST char *, s)
1.1 timbl 139: {
1.2 timbl 140: if (context->current_attribute_number != INVALID) {
141: StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 142: } else {
1.20 frystyk 143: if (SGML_TRACE) fprintf(stderr, "SGML: Attribute value %s ignored\n", s);
1.1 timbl 144: }
1.2 timbl 145: context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 146: }
147:
1.2 timbl 148:
1.1 timbl 149: /* Handle entity
150: ** -------------
151: **
152: ** On entry,
153: ** s contains the entity name zero terminated
154: ** Bugs:
155: ** If the entity name is unknown, the terminator is treated as
156: ** a printable non-special character in all cases, even if it is '<'
157: */
1.21 frystyk 158: PRIVATE void handle_entity ARGS2(HTStream *, context, char, term)
1.1 timbl 159: {
1.2 timbl 160:
1.3 timbl 161: CONST char ** entities = context->dtd->entity_names;
1.1 timbl 162: CONST char *s = context->string->data;
1.2 timbl 163:
164: int high, low, i, diff;
165: for(low=0, high = context->dtd->number_of_entities;
166: high > low ;
167: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
168: i = (low + (high-low)/2);
169: diff = strcmp(entities[i], s); /* Csse sensitive! */
170: if (diff==0) { /* success: found it */
171: (*context->actions->put_entity)(context->target, i);
172: return;
1.1 timbl 173: }
174: }
175: /* If entity string not found, display as text */
1.20 frystyk 176: if (SGML_TRACE)
1.1 timbl 177: fprintf(stderr, "SGML: Unknown entity %s\n", s);
1.2 timbl 178: PUTC('&');
1.1 timbl 179: {
180: CONST char *p;
181: for (p=s; *p; p++) {
1.2 timbl 182: PUTC(*p);
1.1 timbl 183: }
184: }
1.2 timbl 185: PUTC(term);
1.1 timbl 186: }
187:
1.2 timbl 188:
1.1 timbl 189: /* End element
1.2 timbl 190: ** -----------
1.1 timbl 191: */
1.21 frystyk 192: PRIVATE void end_element ARGS2(HTStream *, context, HTTag *, old_tag)
1.1 timbl 193: {
1.20 frystyk 194: if (SGML_TRACE) fprintf(stderr, "SGML: End </%s>\n", old_tag->name);
1.2 timbl 195: if (old_tag->contents == SGML_EMPTY) {
1.20 frystyk 196: if (SGML_TRACE) fprintf(stderr,"SGML: Illegal end tag </%s> found.\n",
1.1 timbl 197: old_tag->name);
198: return;
199: }
200: while (context->element_stack) {/* Loop is error path only */
201: HTElement * N = context->element_stack;
202: HTTag * t = N->tag;
203:
204: if (old_tag != t) { /* Mismatch: syntax error */
205: if (context->element_stack->next) { /* This is not the last level */
1.20 frystyk 206: if (SGML_TRACE) fprintf(stderr,
1.1 timbl 207: "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
208: old_tag->name, t->name, t->name);
209: } else { /* last level */
1.20 frystyk 210: if (SGML_TRACE) fprintf(stderr,
1.1 timbl 211: "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
212: old_tag->name, t->name, old_tag->name);
213: return; /* Ignore */
214: }
215: }
216:
217: context->element_stack = N->next; /* Remove from stack */
218: free(N);
1.2 timbl 219: (*context->actions->end_element)(context->target,
220: t - context->dtd->tags);
1.1 timbl 221: if (old_tag == t) return; /* Correct sequence */
222:
223: /* Syntax error path only */
224:
225: }
1.20 frystyk 226: if (SGML_TRACE) fprintf(stderr,
1.1 timbl 227: "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
228: }
229:
230:
1.17 timbl 231: /* Start an element
232: ** ----------------
1.1 timbl 233: */
1.21 frystyk 234: PRIVATE void start_element ARGS1(HTStream *, context)
1.1 timbl 235: {
236: HTTag * new_tag = context->current_tag;
237:
1.20 frystyk 238: if (SGML_TRACE) fprintf(stderr, "SGML: Start <%s>\n", new_tag->name);
1.2 timbl 239: (*context->actions->start_element)(
240: context->target,
241: new_tag - context->dtd->tags,
242: context->present,
1.3 timbl 243: (CONST char**) context->value); /* coerce type for think c */
1.2 timbl 244: if (new_tag->contents != SGML_EMPTY) { /* i.e. tag not empty */
1.1 timbl 245: HTElement * N = (HTElement *)malloc(sizeof(HTElement));
246: if (N == NULL) outofmem(__FILE__, "start_element");
247: N->next = context->element_stack;
248: N->tag = new_tag;
249: context->element_stack = N;
250: }
251: }
252:
253:
1.2 timbl 254: /* Find Tag in DTD tag list
255: ** ------------------------
1.1 timbl 256: **
257: ** On entry,
1.2 timbl 258: ** dtd points to dtd structire including valid tag list
259: ** string points to name of tag in question
1.1 timbl 260: **
1.2 timbl 261: ** On exit,
262: ** returns:
1.7 timbl 263: ** NULL tag not found
264: ** else address of tag structure in dtd
1.2 timbl 265: */
1.11 timbl 266: PUBLIC HTTag * SGMLFindTag ARGS2(CONST SGML_dtd*, dtd, CONST char *, string)
1.2 timbl 267: {
268: int high, low, i, diff;
269: for(low=0, high=dtd->number_of_tags;
270: high > low ;
271: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
272: i = (low + (high-low)/2);
1.3 timbl 273: diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 274: if (diff==0) { /* success: found it */
1.7 timbl 275: return &dtd->tags[i];
1.2 timbl 276: }
277: }
1.7 timbl 278: return NULL;
1.2 timbl 279: }
280:
281: /*________________________________________________________________________
282: ** Public Methods
1.1 timbl 283: */
284:
1.2 timbl 285:
286: /* Could check that we are back to bottom of stack! @@ */
1.1 timbl 287:
1.22 frystyk 288: PUBLIC int SGML_free ARGS1(HTStream *, context)
1.8 timbl 289: {
1.14 frystyk 290: int cnt;
291:
1.15 frystyk 292: while (context->element_stack) { /* Make sure, that all tags are gone */
293: HTElement *ptr = context->element_stack;
294:
1.20 frystyk 295: if(SGML_TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
1.15 frystyk 296: context->element_stack->tag->name);
297: context->element_stack = ptr->next;
298: free(ptr);
299: }
1.19 duns 300: (*context->actions->_free)(context->target);
1.8 timbl 301: HTChunkFree(context->string);
1.15 frystyk 302: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
1.14 frystyk 303: if(context->value[cnt])
304: free(context->value[cnt]);
1.8 timbl 305: free(context);
1.22 frystyk 306: return 0;
1.1 timbl 307: }
308:
1.22 frystyk 309: PUBLIC int SGML_abort ARGS2(HTStream *, context, HTError, e)
1.1 timbl 310: {
1.14 frystyk 311: int cnt;
312:
1.15 frystyk 313: while (context->element_stack) { /* Make sure, that all tags are gone */
314: HTElement *ptr = context->element_stack;
315:
1.20 frystyk 316: if(SGML_TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
1.15 frystyk 317: context->element_stack->tag->name);
318: context->element_stack = ptr->next;
319: free(ptr);
320: }
1.8 timbl 321: (*context->actions->abort)(context->target, e);
1.1 timbl 322: HTChunkFree(context->string);
1.14 frystyk 323: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
324: if(context->value[cnt])
325: free(context->value[cnt]);
1.1 timbl 326: free(context);
1.22 frystyk 327: return EOF;
1.1 timbl 328: }
329:
1.2 timbl 330:
1.1 timbl 331: /* Read and write user callback handle
332: ** -----------------------------------
333: **
334: ** The callbacks from the SGML parser have an SGML context parameter.
335: ** These calls allow the caller to associate his own context with a
336: ** particular SGML context.
337: */
338:
1.2 timbl 339: #ifdef CALLERDATA
340: PUBLIC void* SGML_callerData ARGS1(HTStream *, context)
1.1 timbl 341: {
342: return context->callerData;
343: }
344:
1.2 timbl 345: PUBLIC void SGML_setCallerData ARGS2(HTStream *, context, void*, data)
1.1 timbl 346: {
347: context->callerData = data;
348: }
1.2 timbl 349: #endif
1.1 timbl 350:
1.2 timbl 351: PUBLIC void SGML_character ARGS2(HTStream *, context, char,c)
1.1 timbl 352:
353: {
1.2 timbl 354: CONST SGML_dtd *dtd = context->dtd;
1.1 timbl 355: HTChunk *string = context->string;
356:
357: switch(context->state) {
1.18 timbl 358:
359: case S_after_open: /* Strip one trainling newline
360: only after opening nonempty element. - SGML:Ugh! */
361: if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
362: break;
363: }
364: context->state = S_text;
365: goto normal_text;
366: /* (***falls through***) */
367:
1.1 timbl 368: case S_text:
1.18 timbl 369: normal_text:
370:
1.13 timbl 371: #ifdef ISO_2022_JP
372: if (c=='\033') {
373: context->state = S_esc;
374: PUTC(c);
375: break;
376: }
377: #endif /* ISO_2022_JP */
1.6 timbl 378: if (c=='&' && (!context->element_stack || (
379: context->element_stack->tag &&
380: ( context->element_stack->tag->contents == SGML_MIXED
381: || context->element_stack->tag->contents ==
382: SGML_RCDATA)
383: ))) {
1.1 timbl 384: string->size = 0;
385: context->state = S_ero;
386:
387: } else if (c=='<') {
388: string->size = 0;
389: context->state = (context->element_stack &&
1.13 timbl 390: context->element_stack->tag &&
391: context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 392: S_literal : S_tag;
1.18 timbl 393: } else if (c=='\n') { /* Newline - ignore if before tag end! */
394: context->state = S_nl;
1.2 timbl 395: } else PUTC(c);
1.1 timbl 396: break;
1.13 timbl 397:
1.18 timbl 398: case S_nl:
399: if (c=='<') {
400: string->size = 0;
401: context->state = (context->element_stack &&
402: context->element_stack->tag &&
403: context->element_stack->tag->contents == SGML_LITERAL) ?
404: S_literal : S_nl_tago;
405: } else {
406: PUTC('\n');
407: context->state = S_text;
408: goto normal_text;
409: }
410: break;
411:
412: case S_nl_tago: /* Had newline and tag opener */
413: if (c != '/') {
414: PUTC('\n'); /* Only ignore newline before </ */
415: }
416: context->state = S_tag;
417: goto handle_S_tag;
418:
1.13 timbl 419: #ifdef ISO_2022_JP
420: case S_esc:
421: if (c=='$') {
422: context->state = S_dollar;
423: } else if (c=='(') {
424: context->state = S_paren;
425: } else {
426: context->state = S_text;
427: }
428: PUTC(c);
429: break;
430: case S_dollar:
431: if (c=='@' || c=='B') {
432: context->state = S_nonascii_text;
433: } else {
434: context->state = S_text;
435: }
436: PUTC(c);
437: break;
438: case S_paren:
439: if (c=='B' || c=='J') {
440: context->state = S_text;
441: } else {
442: context->state = S_text;
443: }
444: PUTC(c);
445: break;
446: case S_nonascii_text:
447: if (c=='\033') {
448: context->state = S_esc;
449: PUTC(c);
450: } else {
451: PUTC(c);
452: }
453: break;
454: #endif /* ISO_2022_JP */
1.1 timbl 455:
1.12 timbl 456: /* In literal mode, waits only for specific end tag!
1.2 timbl 457: ** Only foir compatibility with old servers.
1.1 timbl 458: */
1.12 timbl 459: case S_literal :
1.1 timbl 460: HTChunkPutc(string, c);
461: if ( TOUPPER(c) != ((string->size ==1) ? '/'
462: : context->element_stack->tag->name[string->size-2])) {
463: int i;
464:
1.12 timbl 465: /* If complete match, end literal */
1.1 timbl 466: if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
467: end_element(context, context->element_stack->tag);
468: string->size = 0;
1.2 timbl 469: context->current_attribute_number = INVALID;
1.1 timbl 470: context->state = S_text;
471: break;
472: } /* If Mismatch: recover string. */
1.2 timbl 473: PUTC( '<');
1.1 timbl 474: for (i=0; i<string->size; i++) /* recover */
1.2 timbl 475: PUTC(
1.1 timbl 476: string->data[i]);
477: context->state = S_text;
478: }
479:
480: break;
481:
482: /* Character reference or Entity
483: */
484: case S_ero:
485: if (c=='#') {
486: context->state = S_cro; /* &# is Char Ref Open */
487: break;
488: }
489: context->state = S_entity; /* Fall through! */
490:
491: /* Handle Entities
492: */
493: case S_entity:
494: if (isalnum(c))
495: HTChunkPutc(string, c);
496: else {
497: HTChunkTerminate(string);
498: handle_entity(context, c);
499: context->state = S_text;
500: }
501: break;
502:
503: /* Character reference
504: */
505: case S_cro:
506: if (isalnum(c))
507: HTChunkPutc(string, c); /* accumulate a character NUMBER */
508: else {
509: int value;
510: HTChunkTerminate(string);
511: if (sscanf(string->data, "%d", &value)==1)
1.2 timbl 512: PUTC(FROMASCII((char)value));
1.1 timbl 513: context->state = S_text;
514: }
515: break;
516:
517: /* Tag
518: */
519: case S_tag: /* new tag */
1.18 timbl 520: handle_S_tag:
521:
1.1 timbl 522: if (isalnum(c))
523: HTChunkPutc(string, c);
524: else { /* End of tag name */
1.7 timbl 525: HTTag * t;
1.1 timbl 526: if (c=='/') {
1.20 frystyk 527: if (SGML_TRACE) if (string->size!=0)
1.1 timbl 528: fprintf(stderr,"SGML: `<%s/' found!\n", string->data);
529: context->state = S_end;
530: break;
531: }
532: HTChunkTerminate(string) ;
1.2 timbl 533:
1.10 timbl 534: t = SGMLFindTag(dtd, string->data);
1.7 timbl 535: if (!t) {
1.20 frystyk 536: if(SGML_TRACE) fprintf(stderr, "SGML: *** Unknown element %s\n",
1.1 timbl 537: string->data);
538: context->state = (c=='>') ? S_text : S_junk_tag;
539: break;
540: }
1.7 timbl 541: context->current_tag = t;
1.2 timbl 542:
543: /* Clear out attributes
544: */
1.1 timbl 545:
1.2 timbl 546: {
547: int i;
548: for (i=0; i< context->current_tag->number_of_attributes; i++)
549: context->present[i] = NO;
1.1 timbl 550: }
551: string->size = 0;
1.2 timbl 552: context->current_attribute_number = INVALID;
1.1 timbl 553:
554: if (c=='>') {
555: if (context->current_tag->name) start_element(context);
1.18 timbl 556: context->state = S_after_open;
1.1 timbl 557: } else {
558: context->state = S_tag_gap;
559: }
560: }
561: break;
562:
563:
564: case S_tag_gap: /* Expecting attribute or > */
565: if (WHITE(c)) break; /* Gap between attributes */
566: if (c=='>') { /* End of tag */
567: if (context->current_tag->name) start_element(context);
1.18 timbl 568: context->state = S_after_open;
1.1 timbl 569: break;
570: }
571: HTChunkPutc(string, c);
572: context->state = S_attr; /* Get attribute */
573: break;
574:
575: /* accumulating value */
576: case S_attr:
577: if (WHITE(c) || (c=='>') || (c=='=')) { /* End of word */
578: HTChunkTerminate(string) ;
579: handle_attribute_name(context, string->data);
580: string->size = 0;
581: if (c=='>') { /* End of tag */
582: if (context->current_tag->name) start_element(context);
1.18 timbl 583: context->state = S_after_open;
1.1 timbl 584: break;
585: }
586: context->state = (c=='=' ? S_equals: S_attr_gap);
587: } else {
588: HTChunkPutc(string, c);
589: }
590: break;
591:
592: case S_attr_gap: /* Expecting attribute or = or > */
593: if (WHITE(c)) break; /* Gap after attribute */
594: if (c=='>') { /* End of tag */
595: if (context->current_tag->name) start_element(context);
1.18 timbl 596: context->state = S_after_open;
1.1 timbl 597: break;
598: } else if (c=='=') {
599: context->state = S_equals;
600: break;
601: }
602: HTChunkPutc(string, c);
603: context->state = S_attr; /* Get next attribute */
604: break;
605:
606: case S_equals: /* After attr = */
607: if (WHITE(c)) break; /* Before attribute value */
608: if (c=='>') { /* End of tag */
1.20 frystyk 609: if (SGML_TRACE) fprintf(stderr, "SGML: found = but no value\n");
1.1 timbl 610: if (context->current_tag->name) start_element(context);
1.18 timbl 611: context->state = S_after_open;
1.1 timbl 612: break;
613:
614: } else if (c=='\'') {
615: context->state = S_squoted;
616: break;
617:
618: } else if (c=='"') {
619: context->state = S_dquoted;
620: break;
621: }
622: HTChunkPutc(string, c);
623: context->state = S_value;
624: break;
625:
626: case S_value:
627: if (WHITE(c) || (c=='>')) { /* End of word */
628: HTChunkTerminate(string) ;
629: handle_attribute_value(context, string->data);
630: string->size = 0;
631: if (c=='>') { /* End of tag */
632: if (context->current_tag->name) start_element(context);
1.18 timbl 633: context->state = S_after_open;
1.1 timbl 634: break;
635: }
636: else context->state = S_tag_gap;
637: } else {
638: HTChunkPutc(string, c);
639: }
640: break;
641:
642: case S_squoted: /* Quoted attribute value */
643: if (c=='\'') { /* End of attribute value */
644: HTChunkTerminate(string) ;
645: handle_attribute_value(context, string->data);
646: string->size = 0;
647: context->state = S_tag_gap;
648: } else {
649: HTChunkPutc(string, c);
650: }
651: break;
652:
653: case S_dquoted: /* Quoted attribute value */
654: if (c=='"') { /* End of attribute value */
655: HTChunkTerminate(string) ;
656: handle_attribute_value(context, string->data);
657: string->size = 0;
658: context->state = S_tag_gap;
659: } else {
660: HTChunkPutc(string, c);
661: }
662: break;
663:
664: case S_end: /* </ */
665: if (isalnum(c))
666: HTChunkPutc(string, c);
667: else { /* End of end tag name */
1.7 timbl 668: HTTag * t;
1.1 timbl 669: HTChunkTerminate(string) ;
1.7 timbl 670: if (!*string->data) { /* Empty end tag */
671: t = context->element_stack->tag;
672: } else {
1.10 timbl 673: t = SGMLFindTag(dtd, string->data);
1.1 timbl 674: }
1.7 timbl 675: if (!t) {
1.20 frystyk 676: if(SGML_TRACE) fprintf(stderr,
1.1 timbl 677: "Unknown end tag </%s>\n", string->data);
1.2 timbl 678: } else {
1.7 timbl 679: context->current_tag = t;
1.2 timbl 680: end_element( context, context->current_tag);
1.1 timbl 681: }
1.2 timbl 682:
1.1 timbl 683: string->size = 0;
1.2 timbl 684: context->current_attribute_number = INVALID;
1.7 timbl 685: if (c!='>') {
1.20 frystyk 686: if (SGML_TRACE && !WHITE(c))
1.7 timbl 687: fprintf(stderr,"SGML: `</%s%c' found!\n",
688: string->data, c);
689: context->state = S_junk_tag;
690: } else {
691: context->state = S_text;
692: }
1.1 timbl 693: }
694: break;
695:
696:
697: case S_junk_tag:
698: if (c=='>') {
699: context->state = S_text;
700: }
701:
702: } /* switch on context->state */
703:
704: } /* SGML_character */
1.2 timbl 705:
706:
707: PUBLIC void SGML_string ARGS2(HTStream *, context, CONST char*, str)
708: {
709: CONST char *p;
710: for(p=str; *p; p++)
711: SGML_character(context, *p);
712: }
713:
714:
715: PUBLIC void SGML_write ARGS3(HTStream *, context, CONST char*, str, int, l)
716: {
717: CONST char *p;
718: CONST char *e = str+l;
719: for(p=str; p<e; p++)
720: SGML_character(context, *p);
721: }
722:
723: /*_______________________________________________________________________
724: */
725:
726: /* Structured Object Class
727: ** -----------------------
728: */
729: PUBLIC CONST HTStreamClass SGMLParser =
730: {
731: "SGMLParser",
732: SGML_free,
1.8 timbl 733: SGML_abort,
1.9 timbl 734: SGML_character,
735: SGML_string,
736: SGML_write,
1.2 timbl 737: };
738:
739: /* Create SGML Engine
740: ** ------------------
741: **
742: ** On entry,
743: ** dtd represents the DTD, along with
744: ** actions is the sink for the data as a set of routines.
745: **
746: */
747:
748: PUBLIC HTStream* SGML_new ARGS2(
749: CONST SGML_dtd *, dtd,
750: HTStructured *, target)
751: {
752: int i;
753: HTStream* context = (HTStream *) malloc(sizeof(*context));
754: if (!context) outofmem(__FILE__, "SGML_begin");
755:
756: context->isa = &SGMLParser;
757: context->string = HTChunkCreate(128); /* Grow by this much */
758: context->dtd = dtd;
759: context->target = target;
760: context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
761: /* Ugh: no OO */
762: context->state = S_text;
763: context->element_stack = 0; /* empty */
764: #ifdef CALLERDATA
765: context->callerData = (void*) callerData;
766: #endif
767: for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
768:
769: return context;
770: }
1.14 frystyk 771:
772:
773:
774:
775:
776:
777:
778:
779:
780:
781:
1.2 timbl 782:
Webmaster