Annotation of libwww/Library/src/SGML.c, revision 1.18
1.1 timbl 1: /* General SGML Parser code SGML.c
2: ** ========================
3: **
1.2 timbl 4: ** This module implements an HTStream object. To parse an
1.1 timbl 5: ** SGML file, create this object which is a parser. The object
1.2 timbl 6: ** is (currently) created by being passed a DTD structure,
7: ** and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 8: **
1.2 timbl 9: ** 6 Feb 93 Binary seraches used. Intreface modified.
1.1 timbl 10: */
11: #include "SGML.h"
12:
13: #include <ctype.h>
14: #include <stdio.h>
15: #include "HTUtils.h"
16: #include "HTChunk.h"
17: #include "tcp.h" /* For FROMASCII */
18:
1.2 timbl 19: #define INVALID (-1)
20:
1.1 timbl 21: /* The State (context) of the parser
22: **
1.2 timbl 23: ** This is passed with each call to make the parser reentrant
1.1 timbl 24: **
25: */
26:
1.16 frystyk 27:
1.2 timbl 28:
29:
30: /* Element Stack
31: ** -------------
32: ** This allows us to return down the stack reselcting styles.
33: ** As we return, attribute values will be garbage in general.
34: */
35: typedef struct _HTElement HTElement;
36: struct _HTElement {
37: HTElement * next; /* Previously nested element or 0 */
38: HTTag* tag; /* The tag at this level */
39: };
40:
41:
42: /* Internal Context Data Structure
43: ** -------------------------------
44: */
45: struct _HTStream {
46:
47: CONST HTStreamClass * isa; /* inherited from HTStream */
48:
49: CONST SGML_dtd *dtd;
50: HTStructuredClass *actions; /* target class */
51: HTStructured *target; /* target object */
52:
1.1 timbl 53: HTTag *current_tag;
1.2 timbl 54: int current_attribute_number;
1.1 timbl 55: HTChunk *string;
56: HTElement *element_stack;
1.12 timbl 57: enum sgml_state { S_text, S_literal, S_tag, S_tag_gap,
1.18 ! timbl 58: S_attr, S_attr_gap, S_equals, S_value, S_after_open,
! 59: S_nl, S_nl_tago,
1.1 timbl 60: S_ero, S_cro,
1.13 timbl 61: #ifdef ISO_2022_JP
62: S_esc, S_dollar, S_paren, S_nonascii_text,
63: #endif
1.1 timbl 64: S_squoted, S_dquoted, S_end, S_entity, S_junk_tag} state;
1.2 timbl 65: #ifdef CALLERDATA
1.1 timbl 66: void * callerData;
1.2 timbl 67: #endif
68: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
69: char * value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
70: } ;
71:
72:
73: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
74:
1.1 timbl 75:
1.17 timbl 76: /* Find Attribute Number
77: ** ---------------------
78: */
79:
80: PUBLIC int SGMLFindAttribute ARGS2 (HTTag*, tag, CONST char *, s)
81: {
82: attr* attributes = tag->attributes;
83:
84: int high, low, i, diff; /* Binary search for attribute name */
85: for(low=0, high=tag->number_of_attributes;
86: high > low ;
87: diff < 0 ? (low = i+1) : (high = i) ) {
88: i = (low + (high-low)/2);
89: diff = strcasecomp(attributes[i].name, s);
90: if (diff==0) return i; /* success: found it */
91: } /* for */
92:
93: return -1;
94: }
95:
1.1 timbl 96:
97: /* Handle Attribute
98: ** ----------------
99: */
100: /* PUBLIC CONST char * SGML_default = ""; ?? */
101:
102: #ifdef __STDC__
1.17 timbl 103: PRIVATE void handle_attribute_name(HTStream * context, CONST char * s)
1.1 timbl 104: #else
105: PRIVATE void handle_attribute_name(context, s)
1.2 timbl 106: HTStream * context;
1.1 timbl 107: char *s;
108: #endif
109: {
1.2 timbl 110:
111: HTTag * tag = context->current_tag;
112:
1.17 timbl 113: int i = SGMLFindAttribute(tag, s);
114: if (i>=0) {
115: context->current_attribute_number = i;
116: context->present[i] = YES;
117: if (context->value[i]) {
118: free(context->value[i]);
119: context->value[i] = NULL;
120: }
121: return;
122: } /* if */
1.2 timbl 123:
124: if (TRACE)
125: fprintf(stderr, "SGML: Unknown attribute %s for tag %s\n",
126: s, context->current_tag->name);
127: context->current_attribute_number = INVALID; /* Invalid */
1.1 timbl 128: }
129:
130:
131: /* Handle attribute value
132: ** ----------------------
133: */
134: #ifdef __STDC__
1.2 timbl 135: PRIVATE void handle_attribute_value(HTStream * context, const char * s)
1.1 timbl 136: #else
137: PRIVATE void handle_attribute_value(context, s)
1.2 timbl 138: HTStream * context;
1.1 timbl 139: char *s;
140: #endif
141: {
1.2 timbl 142: if (context->current_attribute_number != INVALID) {
143: StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 144: } else {
145: if (TRACE) fprintf(stderr, "SGML: Attribute value %s ignored\n", s);
146: }
1.2 timbl 147: context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 148: }
149:
1.2 timbl 150:
1.1 timbl 151: /* Handle entity
152: ** -------------
153: **
154: ** On entry,
155: ** s contains the entity name zero terminated
156: ** Bugs:
157: ** If the entity name is unknown, the terminator is treated as
158: ** a printable non-special character in all cases, even if it is '<'
159: */
160: #ifdef __STDC__
1.2 timbl 161: PRIVATE void handle_entity(HTStream * context, char term)
1.1 timbl 162: #else
163: PRIVATE void handle_entity(context, term)
1.2 timbl 164: HTStream * context;
1.1 timbl 165: char term;
166: #endif
167: {
1.2 timbl 168:
1.3 timbl 169: CONST char ** entities = context->dtd->entity_names;
1.1 timbl 170: CONST char *s = context->string->data;
1.2 timbl 171:
172: int high, low, i, diff;
173: for(low=0, high = context->dtd->number_of_entities;
174: high > low ;
175: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
176: i = (low + (high-low)/2);
177: diff = strcmp(entities[i], s); /* Csse sensitive! */
178: if (diff==0) { /* success: found it */
179: (*context->actions->put_entity)(context->target, i);
180: return;
1.1 timbl 181: }
182: }
183: /* If entity string not found, display as text */
184: if (TRACE)
185: fprintf(stderr, "SGML: Unknown entity %s\n", s);
1.2 timbl 186: PUTC('&');
1.1 timbl 187: {
188: CONST char *p;
189: for (p=s; *p; p++) {
1.2 timbl 190: PUTC(*p);
1.1 timbl 191: }
192: }
1.2 timbl 193: PUTC(term);
1.1 timbl 194: }
195:
1.2 timbl 196:
1.1 timbl 197: /* End element
1.2 timbl 198: ** -----------
1.1 timbl 199: */
200: #ifdef __STDC__
1.2 timbl 201: PRIVATE void end_element(HTStream * context, HTTag * old_tag)
1.1 timbl 202: #else
203: PRIVATE void end_element(context, old_tag)
204: HTTag * old_tag;
1.2 timbl 205: HTStream * context;
1.1 timbl 206: #endif
207: {
208: if (TRACE) fprintf(stderr, "SGML: End </%s>\n", old_tag->name);
1.2 timbl 209: if (old_tag->contents == SGML_EMPTY) {
1.1 timbl 210: if (TRACE) fprintf(stderr,"SGML: Illegal end tag </%s> found.\n",
211: old_tag->name);
212: return;
213: }
214: while (context->element_stack) {/* Loop is error path only */
215: HTElement * N = context->element_stack;
216: HTTag * t = N->tag;
217:
218: if (old_tag != t) { /* Mismatch: syntax error */
219: if (context->element_stack->next) { /* This is not the last level */
220: if (TRACE) fprintf(stderr,
221: "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
222: old_tag->name, t->name, t->name);
223: } else { /* last level */
224: if (TRACE) fprintf(stderr,
225: "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
226: old_tag->name, t->name, old_tag->name);
227: return; /* Ignore */
228: }
229: }
230:
231: context->element_stack = N->next; /* Remove from stack */
232: free(N);
1.2 timbl 233: (*context->actions->end_element)(context->target,
234: t - context->dtd->tags);
1.1 timbl 235: if (old_tag == t) return; /* Correct sequence */
236:
237: /* Syntax error path only */
238:
239: }
1.5 timbl 240: if (TRACE) fprintf(stderr,
1.1 timbl 241: "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
242: }
243:
244:
1.17 timbl 245: /* Start an element
246: ** ----------------
1.1 timbl 247: */
248: #ifdef __STDC__
1.2 timbl 249: PRIVATE void start_element(HTStream * context)
1.1 timbl 250: #else
251: PRIVATE void start_element(context)
1.2 timbl 252: HTStream * context;
1.1 timbl 253: #endif
254: {
255: HTTag * new_tag = context->current_tag;
256:
257: if (TRACE) fprintf(stderr, "SGML: Start <%s>\n", new_tag->name);
1.2 timbl 258: (*context->actions->start_element)(
259: context->target,
260: new_tag - context->dtd->tags,
261: context->present,
1.3 timbl 262: (CONST char**) context->value); /* coerce type for think c */
1.2 timbl 263: if (new_tag->contents != SGML_EMPTY) { /* i.e. tag not empty */
1.1 timbl 264: HTElement * N = (HTElement *)malloc(sizeof(HTElement));
265: if (N == NULL) outofmem(__FILE__, "start_element");
266: N->next = context->element_stack;
267: N->tag = new_tag;
268: context->element_stack = N;
269: }
270: }
271:
272:
1.2 timbl 273: /* Find Tag in DTD tag list
274: ** ------------------------
1.1 timbl 275: **
276: ** On entry,
1.2 timbl 277: ** dtd points to dtd structire including valid tag list
278: ** string points to name of tag in question
1.1 timbl 279: **
1.2 timbl 280: ** On exit,
281: ** returns:
1.7 timbl 282: ** NULL tag not found
283: ** else address of tag structure in dtd
1.2 timbl 284: */
1.11 timbl 285: PUBLIC HTTag * SGMLFindTag ARGS2(CONST SGML_dtd*, dtd, CONST char *, string)
1.2 timbl 286: {
287: int high, low, i, diff;
288: for(low=0, high=dtd->number_of_tags;
289: high > low ;
290: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
291: i = (low + (high-low)/2);
1.3 timbl 292: diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 293: if (diff==0) { /* success: found it */
1.7 timbl 294: return &dtd->tags[i];
1.2 timbl 295: }
296: }
1.7 timbl 297: return NULL;
1.2 timbl 298: }
299:
300: /*________________________________________________________________________
301: ** Public Methods
1.1 timbl 302: */
303:
1.2 timbl 304:
305: /* Could check that we are back to bottom of stack! @@ */
1.1 timbl 306:
1.8 timbl 307: PUBLIC void SGML_free ARGS1(HTStream *, context)
308: {
1.14 frystyk 309: int cnt;
310:
1.15 frystyk 311: while (context->element_stack) { /* Make sure, that all tags are gone */
312: HTElement *ptr = context->element_stack;
313:
314: if(TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
315: context->element_stack->tag->name);
316: context->element_stack = ptr->next;
317: free(ptr);
318: }
1.8 timbl 319: (*context->actions->free)(context->target);
320: HTChunkFree(context->string);
1.15 frystyk 321: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
1.14 frystyk 322: if(context->value[cnt])
323: free(context->value[cnt]);
1.8 timbl 324: free(context);
1.1 timbl 325: }
326:
1.8 timbl 327: PUBLIC void SGML_abort ARGS2(HTStream *, context, HTError, e)
1.1 timbl 328: {
1.14 frystyk 329: int cnt;
330:
1.15 frystyk 331: while (context->element_stack) { /* Make sure, that all tags are gone */
332: HTElement *ptr = context->element_stack;
333:
334: if(TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
335: context->element_stack->tag->name);
336: context->element_stack = ptr->next;
337: free(ptr);
338: }
1.8 timbl 339: (*context->actions->abort)(context->target, e);
1.1 timbl 340: HTChunkFree(context->string);
1.14 frystyk 341: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
342: if(context->value[cnt])
343: free(context->value[cnt]);
1.1 timbl 344: free(context);
345: }
346:
1.2 timbl 347:
1.1 timbl 348: /* Read and write user callback handle
349: ** -----------------------------------
350: **
351: ** The callbacks from the SGML parser have an SGML context parameter.
352: ** These calls allow the caller to associate his own context with a
353: ** particular SGML context.
354: */
355:
1.2 timbl 356: #ifdef CALLERDATA
357: PUBLIC void* SGML_callerData ARGS1(HTStream *, context)
1.1 timbl 358: {
359: return context->callerData;
360: }
361:
1.2 timbl 362: PUBLIC void SGML_setCallerData ARGS2(HTStream *, context, void*, data)
1.1 timbl 363: {
364: context->callerData = data;
365: }
1.2 timbl 366: #endif
1.1 timbl 367:
1.2 timbl 368: PUBLIC void SGML_character ARGS2(HTStream *, context, char,c)
1.1 timbl 369:
370: {
1.2 timbl 371: CONST SGML_dtd *dtd = context->dtd;
1.1 timbl 372: HTChunk *string = context->string;
373:
374: switch(context->state) {
1.18 ! timbl 375:
! 376: case S_after_open: /* Strip one trainling newline
! 377: only after opening nonempty element. - SGML:Ugh! */
! 378: if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
! 379: break;
! 380: }
! 381: context->state = S_text;
! 382: goto normal_text;
! 383: /* (***falls through***) */
! 384:
1.1 timbl 385: case S_text:
1.18 ! timbl 386: normal_text:
! 387:
1.13 timbl 388: #ifdef ISO_2022_JP
389: if (c=='\033') {
390: context->state = S_esc;
391: PUTC(c);
392: break;
393: }
394: #endif /* ISO_2022_JP */
1.6 timbl 395: if (c=='&' && (!context->element_stack || (
396: context->element_stack->tag &&
397: ( context->element_stack->tag->contents == SGML_MIXED
398: || context->element_stack->tag->contents ==
399: SGML_RCDATA)
400: ))) {
1.1 timbl 401: string->size = 0;
402: context->state = S_ero;
403:
404: } else if (c=='<') {
405: string->size = 0;
406: context->state = (context->element_stack &&
1.13 timbl 407: context->element_stack->tag &&
408: context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 409: S_literal : S_tag;
1.18 ! timbl 410: } else if (c=='\n') { /* Newline - ignore if before tag end! */
! 411: context->state = S_nl;
1.2 timbl 412: } else PUTC(c);
1.1 timbl 413: break;
1.13 timbl 414:
1.18 ! timbl 415: case S_nl:
! 416: if (c=='<') {
! 417: string->size = 0;
! 418: context->state = (context->element_stack &&
! 419: context->element_stack->tag &&
! 420: context->element_stack->tag->contents == SGML_LITERAL) ?
! 421: S_literal : S_nl_tago;
! 422: } else {
! 423: PUTC('\n');
! 424: context->state = S_text;
! 425: goto normal_text;
! 426: }
! 427: break;
! 428:
! 429: case S_nl_tago: /* Had newline and tag opener */
! 430: if (c != '/') {
! 431: PUTC('\n'); /* Only ignore newline before </ */
! 432: }
! 433: context->state = S_tag;
! 434: goto handle_S_tag;
! 435:
1.13 timbl 436: #ifdef ISO_2022_JP
437: case S_esc:
438: if (c=='$') {
439: context->state = S_dollar;
440: } else if (c=='(') {
441: context->state = S_paren;
442: } else {
443: context->state = S_text;
444: }
445: PUTC(c);
446: break;
447: case S_dollar:
448: if (c=='@' || c=='B') {
449: context->state = S_nonascii_text;
450: } else {
451: context->state = S_text;
452: }
453: PUTC(c);
454: break;
455: case S_paren:
456: if (c=='B' || c=='J') {
457: context->state = S_text;
458: } else {
459: context->state = S_text;
460: }
461: PUTC(c);
462: break;
463: case S_nonascii_text:
464: if (c=='\033') {
465: context->state = S_esc;
466: PUTC(c);
467: } else {
468: PUTC(c);
469: }
470: break;
471: #endif /* ISO_2022_JP */
1.1 timbl 472:
1.12 timbl 473: /* In literal mode, waits only for specific end tag!
1.2 timbl 474: ** Only foir compatibility with old servers.
1.1 timbl 475: */
1.12 timbl 476: case S_literal :
1.1 timbl 477: HTChunkPutc(string, c);
478: if ( TOUPPER(c) != ((string->size ==1) ? '/'
479: : context->element_stack->tag->name[string->size-2])) {
480: int i;
481:
1.12 timbl 482: /* If complete match, end literal */
1.1 timbl 483: if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
484: end_element(context, context->element_stack->tag);
485: string->size = 0;
1.2 timbl 486: context->current_attribute_number = INVALID;
1.1 timbl 487: context->state = S_text;
488: break;
489: } /* If Mismatch: recover string. */
1.2 timbl 490: PUTC( '<');
1.1 timbl 491: for (i=0; i<string->size; i++) /* recover */
1.2 timbl 492: PUTC(
1.1 timbl 493: string->data[i]);
494: context->state = S_text;
495: }
496:
497: break;
498:
499: /* Character reference or Entity
500: */
501: case S_ero:
502: if (c=='#') {
503: context->state = S_cro; /* &# is Char Ref Open */
504: break;
505: }
506: context->state = S_entity; /* Fall through! */
507:
508: /* Handle Entities
509: */
510: case S_entity:
511: if (isalnum(c))
512: HTChunkPutc(string, c);
513: else {
514: HTChunkTerminate(string);
515: handle_entity(context, c);
516: context->state = S_text;
517: }
518: break;
519:
520: /* Character reference
521: */
522: case S_cro:
523: if (isalnum(c))
524: HTChunkPutc(string, c); /* accumulate a character NUMBER */
525: else {
526: int value;
527: HTChunkTerminate(string);
528: if (sscanf(string->data, "%d", &value)==1)
1.2 timbl 529: PUTC(FROMASCII((char)value));
1.1 timbl 530: context->state = S_text;
531: }
532: break;
533:
534: /* Tag
535: */
536: case S_tag: /* new tag */
1.18 ! timbl 537: handle_S_tag:
! 538:
1.1 timbl 539: if (isalnum(c))
540: HTChunkPutc(string, c);
541: else { /* End of tag name */
1.7 timbl 542: HTTag * t;
1.1 timbl 543: if (c=='/') {
544: if (TRACE) if (string->size!=0)
545: fprintf(stderr,"SGML: `<%s/' found!\n", string->data);
546: context->state = S_end;
547: break;
548: }
549: HTChunkTerminate(string) ;
1.2 timbl 550:
1.10 timbl 551: t = SGMLFindTag(dtd, string->data);
1.7 timbl 552: if (!t) {
1.2 timbl 553: if(TRACE) fprintf(stderr, "SGML: *** Unknown element %s\n",
1.1 timbl 554: string->data);
555: context->state = (c=='>') ? S_text : S_junk_tag;
556: break;
557: }
1.7 timbl 558: context->current_tag = t;
1.2 timbl 559:
560: /* Clear out attributes
561: */
1.1 timbl 562:
1.2 timbl 563: {
564: int i;
565: for (i=0; i< context->current_tag->number_of_attributes; i++)
566: context->present[i] = NO;
1.1 timbl 567: }
568: string->size = 0;
1.2 timbl 569: context->current_attribute_number = INVALID;
1.1 timbl 570:
571: if (c=='>') {
572: if (context->current_tag->name) start_element(context);
1.18 ! timbl 573: context->state = S_after_open;
1.1 timbl 574: } else {
575: context->state = S_tag_gap;
576: }
577: }
578: break;
579:
580:
581: case S_tag_gap: /* Expecting attribute or > */
582: if (WHITE(c)) break; /* Gap between attributes */
583: if (c=='>') { /* End of tag */
584: if (context->current_tag->name) start_element(context);
1.18 ! timbl 585: context->state = S_after_open;
1.1 timbl 586: break;
587: }
588: HTChunkPutc(string, c);
589: context->state = S_attr; /* Get attribute */
590: break;
591:
592: /* accumulating value */
593: case S_attr:
594: if (WHITE(c) || (c=='>') || (c=='=')) { /* End of word */
595: HTChunkTerminate(string) ;
596: handle_attribute_name(context, string->data);
597: string->size = 0;
598: if (c=='>') { /* End of tag */
599: if (context->current_tag->name) start_element(context);
1.18 ! timbl 600: context->state = S_after_open;
1.1 timbl 601: break;
602: }
603: context->state = (c=='=' ? S_equals: S_attr_gap);
604: } else {
605: HTChunkPutc(string, c);
606: }
607: break;
608:
609: case S_attr_gap: /* Expecting attribute or = or > */
610: if (WHITE(c)) break; /* Gap after attribute */
611: if (c=='>') { /* End of tag */
612: if (context->current_tag->name) start_element(context);
1.18 ! timbl 613: context->state = S_after_open;
1.1 timbl 614: break;
615: } else if (c=='=') {
616: context->state = S_equals;
617: break;
618: }
619: HTChunkPutc(string, c);
620: context->state = S_attr; /* Get next attribute */
621: break;
622:
623: case S_equals: /* After attr = */
624: if (WHITE(c)) break; /* Before attribute value */
625: if (c=='>') { /* End of tag */
1.5 timbl 626: if (TRACE) fprintf(stderr, "SGML: found = but no value\n");
1.1 timbl 627: if (context->current_tag->name) start_element(context);
1.18 ! timbl 628: context->state = S_after_open;
1.1 timbl 629: break;
630:
631: } else if (c=='\'') {
632: context->state = S_squoted;
633: break;
634:
635: } else if (c=='"') {
636: context->state = S_dquoted;
637: break;
638: }
639: HTChunkPutc(string, c);
640: context->state = S_value;
641: break;
642:
643: case S_value:
644: if (WHITE(c) || (c=='>')) { /* End of word */
645: HTChunkTerminate(string) ;
646: handle_attribute_value(context, string->data);
647: string->size = 0;
648: if (c=='>') { /* End of tag */
649: if (context->current_tag->name) start_element(context);
1.18 ! timbl 650: context->state = S_after_open;
1.1 timbl 651: break;
652: }
653: else context->state = S_tag_gap;
654: } else {
655: HTChunkPutc(string, c);
656: }
657: break;
658:
659: case S_squoted: /* Quoted attribute value */
660: if (c=='\'') { /* End of attribute value */
661: HTChunkTerminate(string) ;
662: handle_attribute_value(context, string->data);
663: string->size = 0;
664: context->state = S_tag_gap;
665: } else {
666: HTChunkPutc(string, c);
667: }
668: break;
669:
670: case S_dquoted: /* Quoted attribute value */
671: if (c=='"') { /* End of attribute value */
672: HTChunkTerminate(string) ;
673: handle_attribute_value(context, string->data);
674: string->size = 0;
675: context->state = S_tag_gap;
676: } else {
677: HTChunkPutc(string, c);
678: }
679: break;
680:
681: case S_end: /* </ */
682: if (isalnum(c))
683: HTChunkPutc(string, c);
684: else { /* End of end tag name */
1.7 timbl 685: HTTag * t;
1.1 timbl 686: HTChunkTerminate(string) ;
1.7 timbl 687: if (!*string->data) { /* Empty end tag */
688: t = context->element_stack->tag;
689: } else {
1.10 timbl 690: t = SGMLFindTag(dtd, string->data);
1.1 timbl 691: }
1.7 timbl 692: if (!t) {
1.1 timbl 693: if(TRACE) fprintf(stderr,
694: "Unknown end tag </%s>\n", string->data);
1.2 timbl 695: } else {
1.7 timbl 696: context->current_tag = t;
1.2 timbl 697: end_element( context, context->current_tag);
1.1 timbl 698: }
1.2 timbl 699:
1.1 timbl 700: string->size = 0;
1.2 timbl 701: context->current_attribute_number = INVALID;
1.7 timbl 702: if (c!='>') {
703: if (TRACE && !WHITE(c))
704: fprintf(stderr,"SGML: `</%s%c' found!\n",
705: string->data, c);
706: context->state = S_junk_tag;
707: } else {
708: context->state = S_text;
709: }
1.1 timbl 710: }
711: break;
712:
713:
714: case S_junk_tag:
715: if (c=='>') {
716: context->state = S_text;
717: }
718:
719: } /* switch on context->state */
720:
721: } /* SGML_character */
1.2 timbl 722:
723:
724: PUBLIC void SGML_string ARGS2(HTStream *, context, CONST char*, str)
725: {
726: CONST char *p;
727: for(p=str; *p; p++)
728: SGML_character(context, *p);
729: }
730:
731:
732: PUBLIC void SGML_write ARGS3(HTStream *, context, CONST char*, str, int, l)
733: {
734: CONST char *p;
735: CONST char *e = str+l;
736: for(p=str; p<e; p++)
737: SGML_character(context, *p);
738: }
739:
740: /*_______________________________________________________________________
741: */
742:
743: /* Structured Object Class
744: ** -----------------------
745: */
746: PUBLIC CONST HTStreamClass SGMLParser =
747: {
748: "SGMLParser",
749: SGML_free,
1.8 timbl 750: SGML_abort,
1.9 timbl 751: SGML_character,
752: SGML_string,
753: SGML_write,
1.2 timbl 754: };
755:
756: /* Create SGML Engine
757: ** ------------------
758: **
759: ** On entry,
760: ** dtd represents the DTD, along with
761: ** actions is the sink for the data as a set of routines.
762: **
763: */
764:
765: PUBLIC HTStream* SGML_new ARGS2(
766: CONST SGML_dtd *, dtd,
767: HTStructured *, target)
768: {
769: int i;
770: HTStream* context = (HTStream *) malloc(sizeof(*context));
771: if (!context) outofmem(__FILE__, "SGML_begin");
772:
773: context->isa = &SGMLParser;
774: context->string = HTChunkCreate(128); /* Grow by this much */
775: context->dtd = dtd;
776: context->target = target;
777: context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
778: /* Ugh: no OO */
779: context->state = S_text;
780: context->element_stack = 0; /* empty */
781: #ifdef CALLERDATA
782: context->callerData = (void*) callerData;
783: #endif
784: for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
785:
786: return context;
787: }
1.14 frystyk 788:
789:
790:
791:
792:
793:
794:
795:
796:
797:
798:
1.2 timbl 799:
Webmaster