Annotation of libwww/Library/src/SGML.c, revision 1.19
1.1 timbl 1: /* General SGML Parser code SGML.c
2: ** ========================
3: **
1.2 timbl 4: ** This module implements an HTStream object. To parse an
1.1 timbl 5: ** SGML file, create this object which is a parser. The object
1.2 timbl 6: ** is (currently) created by being passed a DTD structure,
7: ** and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 8: **
1.19 ! duns 9: ** 6 Feb 93 Binary seraches used. Intreface modified.
! 10: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.1 timbl 11: */
12: #include "SGML.h"
13:
14: #include <ctype.h>
15: #include <stdio.h>
16: #include "HTUtils.h"
17: #include "HTChunk.h"
18: #include "tcp.h" /* For FROMASCII */
19:
1.2 timbl 20: #define INVALID (-1)
21:
1.1 timbl 22: /* The State (context) of the parser
23: **
1.2 timbl 24: ** This is passed with each call to make the parser reentrant
1.1 timbl 25: **
26: */
27:
1.16 frystyk 28:
1.2 timbl 29:
30:
31: /* Element Stack
32: ** -------------
33: ** This allows us to return down the stack reselcting styles.
34: ** As we return, attribute values will be garbage in general.
35: */
36: typedef struct _HTElement HTElement;
37: struct _HTElement {
38: HTElement * next; /* Previously nested element or 0 */
39: HTTag* tag; /* The tag at this level */
40: };
41:
42:
43: /* Internal Context Data Structure
44: ** -------------------------------
45: */
46: struct _HTStream {
47:
48: CONST HTStreamClass * isa; /* inherited from HTStream */
49:
50: CONST SGML_dtd *dtd;
51: HTStructuredClass *actions; /* target class */
52: HTStructured *target; /* target object */
53:
1.1 timbl 54: HTTag *current_tag;
1.2 timbl 55: int current_attribute_number;
1.1 timbl 56: HTChunk *string;
57: HTElement *element_stack;
1.12 timbl 58: enum sgml_state { S_text, S_literal, S_tag, S_tag_gap,
1.18 timbl 59: S_attr, S_attr_gap, S_equals, S_value, S_after_open,
60: S_nl, S_nl_tago,
1.1 timbl 61: S_ero, S_cro,
1.13 timbl 62: #ifdef ISO_2022_JP
63: S_esc, S_dollar, S_paren, S_nonascii_text,
64: #endif
1.1 timbl 65: S_squoted, S_dquoted, S_end, S_entity, S_junk_tag} state;
1.2 timbl 66: #ifdef CALLERDATA
1.1 timbl 67: void * callerData;
1.2 timbl 68: #endif
69: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
70: char * value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
71: } ;
72:
73:
74: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
75:
1.1 timbl 76:
1.17 timbl 77: /* Find Attribute Number
78: ** ---------------------
79: */
80:
81: PUBLIC int SGMLFindAttribute ARGS2 (HTTag*, tag, CONST char *, s)
82: {
83: attr* attributes = tag->attributes;
84:
85: int high, low, i, diff; /* Binary search for attribute name */
86: for(low=0, high=tag->number_of_attributes;
87: high > low ;
88: diff < 0 ? (low = i+1) : (high = i) ) {
89: i = (low + (high-low)/2);
90: diff = strcasecomp(attributes[i].name, s);
91: if (diff==0) return i; /* success: found it */
92: } /* for */
93:
94: return -1;
95: }
96:
1.1 timbl 97:
98: /* Handle Attribute
99: ** ----------------
100: */
101: /* PUBLIC CONST char * SGML_default = ""; ?? */
102:
103: #ifdef __STDC__
1.17 timbl 104: PRIVATE void handle_attribute_name(HTStream * context, CONST char * s)
1.1 timbl 105: #else
106: PRIVATE void handle_attribute_name(context, s)
1.2 timbl 107: HTStream * context;
1.1 timbl 108: char *s;
109: #endif
110: {
1.2 timbl 111:
112: HTTag * tag = context->current_tag;
113:
1.17 timbl 114: int i = SGMLFindAttribute(tag, s);
115: if (i>=0) {
116: context->current_attribute_number = i;
117: context->present[i] = YES;
118: if (context->value[i]) {
119: free(context->value[i]);
120: context->value[i] = NULL;
121: }
122: return;
123: } /* if */
1.2 timbl 124:
125: if (TRACE)
126: fprintf(stderr, "SGML: Unknown attribute %s for tag %s\n",
127: s, context->current_tag->name);
128: context->current_attribute_number = INVALID; /* Invalid */
1.1 timbl 129: }
130:
131:
132: /* Handle attribute value
133: ** ----------------------
134: */
135: #ifdef __STDC__
1.2 timbl 136: PRIVATE void handle_attribute_value(HTStream * context, const char * s)
1.1 timbl 137: #else
138: PRIVATE void handle_attribute_value(context, s)
1.2 timbl 139: HTStream * context;
1.1 timbl 140: char *s;
141: #endif
142: {
1.2 timbl 143: if (context->current_attribute_number != INVALID) {
144: StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 145: } else {
146: if (TRACE) fprintf(stderr, "SGML: Attribute value %s ignored\n", s);
147: }
1.2 timbl 148: context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 149: }
150:
1.2 timbl 151:
1.1 timbl 152: /* Handle entity
153: ** -------------
154: **
155: ** On entry,
156: ** s contains the entity name zero terminated
157: ** Bugs:
158: ** If the entity name is unknown, the terminator is treated as
159: ** a printable non-special character in all cases, even if it is '<'
160: */
161: #ifdef __STDC__
1.2 timbl 162: PRIVATE void handle_entity(HTStream * context, char term)
1.1 timbl 163: #else
164: PRIVATE void handle_entity(context, term)
1.2 timbl 165: HTStream * context;
1.1 timbl 166: char term;
167: #endif
168: {
1.2 timbl 169:
1.3 timbl 170: CONST char ** entities = context->dtd->entity_names;
1.1 timbl 171: CONST char *s = context->string->data;
1.2 timbl 172:
173: int high, low, i, diff;
174: for(low=0, high = context->dtd->number_of_entities;
175: high > low ;
176: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
177: i = (low + (high-low)/2);
178: diff = strcmp(entities[i], s); /* Csse sensitive! */
179: if (diff==0) { /* success: found it */
180: (*context->actions->put_entity)(context->target, i);
181: return;
1.1 timbl 182: }
183: }
184: /* If entity string not found, display as text */
185: if (TRACE)
186: fprintf(stderr, "SGML: Unknown entity %s\n", s);
1.2 timbl 187: PUTC('&');
1.1 timbl 188: {
189: CONST char *p;
190: for (p=s; *p; p++) {
1.2 timbl 191: PUTC(*p);
1.1 timbl 192: }
193: }
1.2 timbl 194: PUTC(term);
1.1 timbl 195: }
196:
1.2 timbl 197:
1.1 timbl 198: /* End element
1.2 timbl 199: ** -----------
1.1 timbl 200: */
201: #ifdef __STDC__
1.2 timbl 202: PRIVATE void end_element(HTStream * context, HTTag * old_tag)
1.1 timbl 203: #else
204: PRIVATE void end_element(context, old_tag)
205: HTTag * old_tag;
1.2 timbl 206: HTStream * context;
1.1 timbl 207: #endif
208: {
209: if (TRACE) fprintf(stderr, "SGML: End </%s>\n", old_tag->name);
1.2 timbl 210: if (old_tag->contents == SGML_EMPTY) {
1.1 timbl 211: if (TRACE) fprintf(stderr,"SGML: Illegal end tag </%s> found.\n",
212: old_tag->name);
213: return;
214: }
215: while (context->element_stack) {/* Loop is error path only */
216: HTElement * N = context->element_stack;
217: HTTag * t = N->tag;
218:
219: if (old_tag != t) { /* Mismatch: syntax error */
220: if (context->element_stack->next) { /* This is not the last level */
221: if (TRACE) fprintf(stderr,
222: "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
223: old_tag->name, t->name, t->name);
224: } else { /* last level */
225: if (TRACE) fprintf(stderr,
226: "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
227: old_tag->name, t->name, old_tag->name);
228: return; /* Ignore */
229: }
230: }
231:
232: context->element_stack = N->next; /* Remove from stack */
233: free(N);
1.2 timbl 234: (*context->actions->end_element)(context->target,
235: t - context->dtd->tags);
1.1 timbl 236: if (old_tag == t) return; /* Correct sequence */
237:
238: /* Syntax error path only */
239:
240: }
1.5 timbl 241: if (TRACE) fprintf(stderr,
1.1 timbl 242: "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
243: }
244:
245:
1.17 timbl 246: /* Start an element
247: ** ----------------
1.1 timbl 248: */
249: #ifdef __STDC__
1.2 timbl 250: PRIVATE void start_element(HTStream * context)
1.1 timbl 251: #else
252: PRIVATE void start_element(context)
1.2 timbl 253: HTStream * context;
1.1 timbl 254: #endif
255: {
256: HTTag * new_tag = context->current_tag;
257:
258: if (TRACE) fprintf(stderr, "SGML: Start <%s>\n", new_tag->name);
1.2 timbl 259: (*context->actions->start_element)(
260: context->target,
261: new_tag - context->dtd->tags,
262: context->present,
1.3 timbl 263: (CONST char**) context->value); /* coerce type for think c */
1.2 timbl 264: if (new_tag->contents != SGML_EMPTY) { /* i.e. tag not empty */
1.1 timbl 265: HTElement * N = (HTElement *)malloc(sizeof(HTElement));
266: if (N == NULL) outofmem(__FILE__, "start_element");
267: N->next = context->element_stack;
268: N->tag = new_tag;
269: context->element_stack = N;
270: }
271: }
272:
273:
1.2 timbl 274: /* Find Tag in DTD tag list
275: ** ------------------------
1.1 timbl 276: **
277: ** On entry,
1.2 timbl 278: ** dtd points to dtd structire including valid tag list
279: ** string points to name of tag in question
1.1 timbl 280: **
1.2 timbl 281: ** On exit,
282: ** returns:
1.7 timbl 283: ** NULL tag not found
284: ** else address of tag structure in dtd
1.2 timbl 285: */
1.11 timbl 286: PUBLIC HTTag * SGMLFindTag ARGS2(CONST SGML_dtd*, dtd, CONST char *, string)
1.2 timbl 287: {
288: int high, low, i, diff;
289: for(low=0, high=dtd->number_of_tags;
290: high > low ;
291: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
292: i = (low + (high-low)/2);
1.3 timbl 293: diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 294: if (diff==0) { /* success: found it */
1.7 timbl 295: return &dtd->tags[i];
1.2 timbl 296: }
297: }
1.7 timbl 298: return NULL;
1.2 timbl 299: }
300:
301: /*________________________________________________________________________
302: ** Public Methods
1.1 timbl 303: */
304:
1.2 timbl 305:
306: /* Could check that we are back to bottom of stack! @@ */
1.1 timbl 307:
1.8 timbl 308: PUBLIC void SGML_free ARGS1(HTStream *, context)
309: {
1.14 frystyk 310: int cnt;
311:
1.15 frystyk 312: while (context->element_stack) { /* Make sure, that all tags are gone */
313: HTElement *ptr = context->element_stack;
314:
315: if(TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
316: context->element_stack->tag->name);
317: context->element_stack = ptr->next;
318: free(ptr);
319: }
1.19 ! duns 320: (*context->actions->_free)(context->target);
1.8 timbl 321: HTChunkFree(context->string);
1.15 frystyk 322: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
1.14 frystyk 323: if(context->value[cnt])
324: free(context->value[cnt]);
1.8 timbl 325: free(context);
1.1 timbl 326: }
327:
1.8 timbl 328: PUBLIC void SGML_abort ARGS2(HTStream *, context, HTError, e)
1.1 timbl 329: {
1.14 frystyk 330: int cnt;
331:
1.15 frystyk 332: while (context->element_stack) { /* Make sure, that all tags are gone */
333: HTElement *ptr = context->element_stack;
334:
335: if(TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
336: context->element_stack->tag->name);
337: context->element_stack = ptr->next;
338: free(ptr);
339: }
1.8 timbl 340: (*context->actions->abort)(context->target, e);
1.1 timbl 341: HTChunkFree(context->string);
1.14 frystyk 342: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
343: if(context->value[cnt])
344: free(context->value[cnt]);
1.1 timbl 345: free(context);
346: }
347:
1.2 timbl 348:
1.1 timbl 349: /* Read and write user callback handle
350: ** -----------------------------------
351: **
352: ** The callbacks from the SGML parser have an SGML context parameter.
353: ** These calls allow the caller to associate his own context with a
354: ** particular SGML context.
355: */
356:
1.2 timbl 357: #ifdef CALLERDATA
358: PUBLIC void* SGML_callerData ARGS1(HTStream *, context)
1.1 timbl 359: {
360: return context->callerData;
361: }
362:
1.2 timbl 363: PUBLIC void SGML_setCallerData ARGS2(HTStream *, context, void*, data)
1.1 timbl 364: {
365: context->callerData = data;
366: }
1.2 timbl 367: #endif
1.1 timbl 368:
1.2 timbl 369: PUBLIC void SGML_character ARGS2(HTStream *, context, char,c)
1.1 timbl 370:
371: {
1.2 timbl 372: CONST SGML_dtd *dtd = context->dtd;
1.1 timbl 373: HTChunk *string = context->string;
374:
375: switch(context->state) {
1.18 timbl 376:
377: case S_after_open: /* Strip one trainling newline
378: only after opening nonempty element. - SGML:Ugh! */
379: if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
380: break;
381: }
382: context->state = S_text;
383: goto normal_text;
384: /* (***falls through***) */
385:
1.1 timbl 386: case S_text:
1.18 timbl 387: normal_text:
388:
1.13 timbl 389: #ifdef ISO_2022_JP
390: if (c=='\033') {
391: context->state = S_esc;
392: PUTC(c);
393: break;
394: }
395: #endif /* ISO_2022_JP */
1.6 timbl 396: if (c=='&' && (!context->element_stack || (
397: context->element_stack->tag &&
398: ( context->element_stack->tag->contents == SGML_MIXED
399: || context->element_stack->tag->contents ==
400: SGML_RCDATA)
401: ))) {
1.1 timbl 402: string->size = 0;
403: context->state = S_ero;
404:
405: } else if (c=='<') {
406: string->size = 0;
407: context->state = (context->element_stack &&
1.13 timbl 408: context->element_stack->tag &&
409: context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 410: S_literal : S_tag;
1.18 timbl 411: } else if (c=='\n') { /* Newline - ignore if before tag end! */
412: context->state = S_nl;
1.2 timbl 413: } else PUTC(c);
1.1 timbl 414: break;
1.13 timbl 415:
1.18 timbl 416: case S_nl:
417: if (c=='<') {
418: string->size = 0;
419: context->state = (context->element_stack &&
420: context->element_stack->tag &&
421: context->element_stack->tag->contents == SGML_LITERAL) ?
422: S_literal : S_nl_tago;
423: } else {
424: PUTC('\n');
425: context->state = S_text;
426: goto normal_text;
427: }
428: break;
429:
430: case S_nl_tago: /* Had newline and tag opener */
431: if (c != '/') {
432: PUTC('\n'); /* Only ignore newline before </ */
433: }
434: context->state = S_tag;
435: goto handle_S_tag;
436:
1.13 timbl 437: #ifdef ISO_2022_JP
438: case S_esc:
439: if (c=='$') {
440: context->state = S_dollar;
441: } else if (c=='(') {
442: context->state = S_paren;
443: } else {
444: context->state = S_text;
445: }
446: PUTC(c);
447: break;
448: case S_dollar:
449: if (c=='@' || c=='B') {
450: context->state = S_nonascii_text;
451: } else {
452: context->state = S_text;
453: }
454: PUTC(c);
455: break;
456: case S_paren:
457: if (c=='B' || c=='J') {
458: context->state = S_text;
459: } else {
460: context->state = S_text;
461: }
462: PUTC(c);
463: break;
464: case S_nonascii_text:
465: if (c=='\033') {
466: context->state = S_esc;
467: PUTC(c);
468: } else {
469: PUTC(c);
470: }
471: break;
472: #endif /* ISO_2022_JP */
1.1 timbl 473:
1.12 timbl 474: /* In literal mode, waits only for specific end tag!
1.2 timbl 475: ** Only foir compatibility with old servers.
1.1 timbl 476: */
1.12 timbl 477: case S_literal :
1.1 timbl 478: HTChunkPutc(string, c);
479: if ( TOUPPER(c) != ((string->size ==1) ? '/'
480: : context->element_stack->tag->name[string->size-2])) {
481: int i;
482:
1.12 timbl 483: /* If complete match, end literal */
1.1 timbl 484: if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
485: end_element(context, context->element_stack->tag);
486: string->size = 0;
1.2 timbl 487: context->current_attribute_number = INVALID;
1.1 timbl 488: context->state = S_text;
489: break;
490: } /* If Mismatch: recover string. */
1.2 timbl 491: PUTC( '<');
1.1 timbl 492: for (i=0; i<string->size; i++) /* recover */
1.2 timbl 493: PUTC(
1.1 timbl 494: string->data[i]);
495: context->state = S_text;
496: }
497:
498: break;
499:
500: /* Character reference or Entity
501: */
502: case S_ero:
503: if (c=='#') {
504: context->state = S_cro; /* &# is Char Ref Open */
505: break;
506: }
507: context->state = S_entity; /* Fall through! */
508:
509: /* Handle Entities
510: */
511: case S_entity:
512: if (isalnum(c))
513: HTChunkPutc(string, c);
514: else {
515: HTChunkTerminate(string);
516: handle_entity(context, c);
517: context->state = S_text;
518: }
519: break;
520:
521: /* Character reference
522: */
523: case S_cro:
524: if (isalnum(c))
525: HTChunkPutc(string, c); /* accumulate a character NUMBER */
526: else {
527: int value;
528: HTChunkTerminate(string);
529: if (sscanf(string->data, "%d", &value)==1)
1.2 timbl 530: PUTC(FROMASCII((char)value));
1.1 timbl 531: context->state = S_text;
532: }
533: break;
534:
535: /* Tag
536: */
537: case S_tag: /* new tag */
1.18 timbl 538: handle_S_tag:
539:
1.1 timbl 540: if (isalnum(c))
541: HTChunkPutc(string, c);
542: else { /* End of tag name */
1.7 timbl 543: HTTag * t;
1.1 timbl 544: if (c=='/') {
545: if (TRACE) if (string->size!=0)
546: fprintf(stderr,"SGML: `<%s/' found!\n", string->data);
547: context->state = S_end;
548: break;
549: }
550: HTChunkTerminate(string) ;
1.2 timbl 551:
1.10 timbl 552: t = SGMLFindTag(dtd, string->data);
1.7 timbl 553: if (!t) {
1.2 timbl 554: if(TRACE) fprintf(stderr, "SGML: *** Unknown element %s\n",
1.1 timbl 555: string->data);
556: context->state = (c=='>') ? S_text : S_junk_tag;
557: break;
558: }
1.7 timbl 559: context->current_tag = t;
1.2 timbl 560:
561: /* Clear out attributes
562: */
1.1 timbl 563:
1.2 timbl 564: {
565: int i;
566: for (i=0; i< context->current_tag->number_of_attributes; i++)
567: context->present[i] = NO;
1.1 timbl 568: }
569: string->size = 0;
1.2 timbl 570: context->current_attribute_number = INVALID;
1.1 timbl 571:
572: if (c=='>') {
573: if (context->current_tag->name) start_element(context);
1.18 timbl 574: context->state = S_after_open;
1.1 timbl 575: } else {
576: context->state = S_tag_gap;
577: }
578: }
579: break;
580:
581:
582: case S_tag_gap: /* Expecting attribute or > */
583: if (WHITE(c)) break; /* Gap between attributes */
584: if (c=='>') { /* End of tag */
585: if (context->current_tag->name) start_element(context);
1.18 timbl 586: context->state = S_after_open;
1.1 timbl 587: break;
588: }
589: HTChunkPutc(string, c);
590: context->state = S_attr; /* Get attribute */
591: break;
592:
593: /* accumulating value */
594: case S_attr:
595: if (WHITE(c) || (c=='>') || (c=='=')) { /* End of word */
596: HTChunkTerminate(string) ;
597: handle_attribute_name(context, string->data);
598: string->size = 0;
599: if (c=='>') { /* End of tag */
600: if (context->current_tag->name) start_element(context);
1.18 timbl 601: context->state = S_after_open;
1.1 timbl 602: break;
603: }
604: context->state = (c=='=' ? S_equals: S_attr_gap);
605: } else {
606: HTChunkPutc(string, c);
607: }
608: break;
609:
610: case S_attr_gap: /* Expecting attribute or = or > */
611: if (WHITE(c)) break; /* Gap after attribute */
612: if (c=='>') { /* End of tag */
613: if (context->current_tag->name) start_element(context);
1.18 timbl 614: context->state = S_after_open;
1.1 timbl 615: break;
616: } else if (c=='=') {
617: context->state = S_equals;
618: break;
619: }
620: HTChunkPutc(string, c);
621: context->state = S_attr; /* Get next attribute */
622: break;
623:
624: case S_equals: /* After attr = */
625: if (WHITE(c)) break; /* Before attribute value */
626: if (c=='>') { /* End of tag */
1.5 timbl 627: if (TRACE) fprintf(stderr, "SGML: found = but no value\n");
1.1 timbl 628: if (context->current_tag->name) start_element(context);
1.18 timbl 629: context->state = S_after_open;
1.1 timbl 630: break;
631:
632: } else if (c=='\'') {
633: context->state = S_squoted;
634: break;
635:
636: } else if (c=='"') {
637: context->state = S_dquoted;
638: break;
639: }
640: HTChunkPutc(string, c);
641: context->state = S_value;
642: break;
643:
644: case S_value:
645: if (WHITE(c) || (c=='>')) { /* End of word */
646: HTChunkTerminate(string) ;
647: handle_attribute_value(context, string->data);
648: string->size = 0;
649: if (c=='>') { /* End of tag */
650: if (context->current_tag->name) start_element(context);
1.18 timbl 651: context->state = S_after_open;
1.1 timbl 652: break;
653: }
654: else context->state = S_tag_gap;
655: } else {
656: HTChunkPutc(string, c);
657: }
658: break;
659:
660: case S_squoted: /* Quoted attribute value */
661: if (c=='\'') { /* End of attribute value */
662: HTChunkTerminate(string) ;
663: handle_attribute_value(context, string->data);
664: string->size = 0;
665: context->state = S_tag_gap;
666: } else {
667: HTChunkPutc(string, c);
668: }
669: break;
670:
671: case S_dquoted: /* Quoted attribute value */
672: if (c=='"') { /* End of attribute value */
673: HTChunkTerminate(string) ;
674: handle_attribute_value(context, string->data);
675: string->size = 0;
676: context->state = S_tag_gap;
677: } else {
678: HTChunkPutc(string, c);
679: }
680: break;
681:
682: case S_end: /* </ */
683: if (isalnum(c))
684: HTChunkPutc(string, c);
685: else { /* End of end tag name */
1.7 timbl 686: HTTag * t;
1.1 timbl 687: HTChunkTerminate(string) ;
1.7 timbl 688: if (!*string->data) { /* Empty end tag */
689: t = context->element_stack->tag;
690: } else {
1.10 timbl 691: t = SGMLFindTag(dtd, string->data);
1.1 timbl 692: }
1.7 timbl 693: if (!t) {
1.1 timbl 694: if(TRACE) fprintf(stderr,
695: "Unknown end tag </%s>\n", string->data);
1.2 timbl 696: } else {
1.7 timbl 697: context->current_tag = t;
1.2 timbl 698: end_element( context, context->current_tag);
1.1 timbl 699: }
1.2 timbl 700:
1.1 timbl 701: string->size = 0;
1.2 timbl 702: context->current_attribute_number = INVALID;
1.7 timbl 703: if (c!='>') {
704: if (TRACE && !WHITE(c))
705: fprintf(stderr,"SGML: `</%s%c' found!\n",
706: string->data, c);
707: context->state = S_junk_tag;
708: } else {
709: context->state = S_text;
710: }
1.1 timbl 711: }
712: break;
713:
714:
715: case S_junk_tag:
716: if (c=='>') {
717: context->state = S_text;
718: }
719:
720: } /* switch on context->state */
721:
722: } /* SGML_character */
1.2 timbl 723:
724:
725: PUBLIC void SGML_string ARGS2(HTStream *, context, CONST char*, str)
726: {
727: CONST char *p;
728: for(p=str; *p; p++)
729: SGML_character(context, *p);
730: }
731:
732:
733: PUBLIC void SGML_write ARGS3(HTStream *, context, CONST char*, str, int, l)
734: {
735: CONST char *p;
736: CONST char *e = str+l;
737: for(p=str; p<e; p++)
738: SGML_character(context, *p);
739: }
740:
741: /*_______________________________________________________________________
742: */
743:
744: /* Structured Object Class
745: ** -----------------------
746: */
747: PUBLIC CONST HTStreamClass SGMLParser =
748: {
749: "SGMLParser",
750: SGML_free,
1.8 timbl 751: SGML_abort,
1.9 timbl 752: SGML_character,
753: SGML_string,
754: SGML_write,
1.2 timbl 755: };
756:
757: /* Create SGML Engine
758: ** ------------------
759: **
760: ** On entry,
761: ** dtd represents the DTD, along with
762: ** actions is the sink for the data as a set of routines.
763: **
764: */
765:
766: PUBLIC HTStream* SGML_new ARGS2(
767: CONST SGML_dtd *, dtd,
768: HTStructured *, target)
769: {
770: int i;
771: HTStream* context = (HTStream *) malloc(sizeof(*context));
772: if (!context) outofmem(__FILE__, "SGML_begin");
773:
774: context->isa = &SGMLParser;
775: context->string = HTChunkCreate(128); /* Grow by this much */
776: context->dtd = dtd;
777: context->target = target;
778: context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
779: /* Ugh: no OO */
780: context->state = S_text;
781: context->element_stack = 0; /* empty */
782: #ifdef CALLERDATA
783: context->callerData = (void*) callerData;
784: #endif
785: for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
786:
787: return context;
788: }
1.14 frystyk 789:
790:
791:
792:
793:
794:
795:
796:
797:
798:
799:
1.2 timbl 800:
Webmaster