Annotation of libwww/Library/src/SGML.c, revision 1.22
1.1 timbl 1: /* General SGML Parser code SGML.c
2: ** ========================
3: **
1.2 timbl 4: ** This module implements an HTStream object. To parse an
1.1 timbl 5: ** SGML file, create this object which is a parser. The object
1.2 timbl 6: ** is (currently) created by being passed a DTD structure,
7: ** and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 8: **
1.19 duns 9: ** 6 Feb 93 Binary seraches used. Intreface modified.
10: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.1 timbl 11: */
12:
1.20 frystyk 13: /* System dependent stuff */
14: #include "tcp.h" /* For FROMASCII */
15:
16: /* Library includes */
1.1 timbl 17: #include "HTUtils.h"
18: #include "HTChunk.h"
1.20 frystyk 19: #include "SGML.h"
1.1 timbl 20:
1.2 timbl 21: #define INVALID (-1)
22:
1.1 timbl 23: /* The State (context) of the parser
24: **
1.2 timbl 25: ** This is passed with each call to make the parser reentrant
1.1 timbl 26: **
27: */
28:
1.16 frystyk 29:
1.2 timbl 30:
31:
32: /* Element Stack
33: ** -------------
34: ** This allows us to return down the stack reselcting styles.
35: ** As we return, attribute values will be garbage in general.
36: */
37: typedef struct _HTElement HTElement;
38: struct _HTElement {
39: HTElement * next; /* Previously nested element or 0 */
40: HTTag* tag; /* The tag at this level */
41: };
42:
43:
1.21 frystyk 44: typedef enum _sgml_state {
45: S_text, S_literal, S_tag, S_tag_gap,
46: S_attr, S_attr_gap, S_equals, S_value, S_after_open,
47: S_nl, S_nl_tago,
48: S_ero, S_cro,
49: #ifdef ISO_2022_JP
50: S_esc, S_dollar, S_paren, S_nonascii_text,
51: #endif
52: S_squoted, S_dquoted, S_end, S_entity, S_junk_tag
53: } sgml_state;
54:
55:
1.2 timbl 56: /* Internal Context Data Structure
57: ** -------------------------------
58: */
59: struct _HTStream {
60:
61: CONST HTStreamClass * isa; /* inherited from HTStream */
62:
63: CONST SGML_dtd *dtd;
64: HTStructuredClass *actions; /* target class */
65: HTStructured *target; /* target object */
66:
1.1 timbl 67: HTTag *current_tag;
1.2 timbl 68: int current_attribute_number;
1.1 timbl 69: HTChunk *string;
70: HTElement *element_stack;
1.21 frystyk 71: sgml_state state;
1.2 timbl 72: #ifdef CALLERDATA
1.1 timbl 73: void * callerData;
1.2 timbl 74: #endif
75: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
76: char * value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
77: } ;
78:
79:
80: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
81:
1.1 timbl 82:
1.17 timbl 83: /* Find Attribute Number
84: ** ---------------------
85: */
86:
87: PUBLIC int SGMLFindAttribute ARGS2 (HTTag*, tag, CONST char *, s)
88: {
89: attr* attributes = tag->attributes;
90:
91: int high, low, i, diff; /* Binary search for attribute name */
92: for(low=0, high=tag->number_of_attributes;
93: high > low ;
94: diff < 0 ? (low = i+1) : (high = i) ) {
95: i = (low + (high-low)/2);
96: diff = strcasecomp(attributes[i].name, s);
97: if (diff==0) return i; /* success: found it */
98: } /* for */
99:
100: return -1;
101: }
102:
1.1 timbl 103:
104: /* Handle Attribute
105: ** ----------------
106: */
107: /* PUBLIC CONST char * SGML_default = ""; ?? */
108:
1.21 frystyk 109: PRIVATE void handle_attribute_name ARGS2(HTStream *, context, CONST char *, s)
1.1 timbl 110: {
1.2 timbl 111:
112: HTTag * tag = context->current_tag;
113:
1.17 timbl 114: int i = SGMLFindAttribute(tag, s);
115: if (i>=0) {
116: context->current_attribute_number = i;
117: context->present[i] = YES;
118: if (context->value[i]) {
119: free(context->value[i]);
120: context->value[i] = NULL;
121: }
122: return;
123: } /* if */
1.2 timbl 124:
1.20 frystyk 125: if (SGML_TRACE)
1.2 timbl 126: fprintf(stderr, "SGML: Unknown attribute %s for tag %s\n",
127: s, context->current_tag->name);
128: context->current_attribute_number = INVALID; /* Invalid */
1.1 timbl 129: }
130:
131:
132: /* Handle attribute value
133: ** ----------------------
134: */
1.21 frystyk 135: PRIVATE void handle_attribute_value ARGS2(HTStream *, context, CONST char *, s)
1.1 timbl 136: {
1.2 timbl 137: if (context->current_attribute_number != INVALID) {
138: StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 139: } else {
1.20 frystyk 140: if (SGML_TRACE) fprintf(stderr, "SGML: Attribute value %s ignored\n", s);
1.1 timbl 141: }
1.2 timbl 142: context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 143: }
144:
1.2 timbl 145:
1.1 timbl 146: /* Handle entity
147: ** -------------
148: **
149: ** On entry,
150: ** s contains the entity name zero terminated
151: ** Bugs:
152: ** If the entity name is unknown, the terminator is treated as
153: ** a printable non-special character in all cases, even if it is '<'
154: */
1.21 frystyk 155: PRIVATE void handle_entity ARGS2(HTStream *, context, char, term)
1.1 timbl 156: {
1.2 timbl 157:
1.3 timbl 158: CONST char ** entities = context->dtd->entity_names;
1.1 timbl 159: CONST char *s = context->string->data;
1.2 timbl 160:
161: int high, low, i, diff;
162: for(low=0, high = context->dtd->number_of_entities;
163: high > low ;
164: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
165: i = (low + (high-low)/2);
166: diff = strcmp(entities[i], s); /* Csse sensitive! */
167: if (diff==0) { /* success: found it */
168: (*context->actions->put_entity)(context->target, i);
169: return;
1.1 timbl 170: }
171: }
172: /* If entity string not found, display as text */
1.20 frystyk 173: if (SGML_TRACE)
1.1 timbl 174: fprintf(stderr, "SGML: Unknown entity %s\n", s);
1.2 timbl 175: PUTC('&');
1.1 timbl 176: {
177: CONST char *p;
178: for (p=s; *p; p++) {
1.2 timbl 179: PUTC(*p);
1.1 timbl 180: }
181: }
1.2 timbl 182: PUTC(term);
1.1 timbl 183: }
184:
1.2 timbl 185:
1.1 timbl 186: /* End element
1.2 timbl 187: ** -----------
1.1 timbl 188: */
1.21 frystyk 189: PRIVATE void end_element ARGS2(HTStream *, context, HTTag *, old_tag)
1.1 timbl 190: {
1.20 frystyk 191: if (SGML_TRACE) fprintf(stderr, "SGML: End </%s>\n", old_tag->name);
1.2 timbl 192: if (old_tag->contents == SGML_EMPTY) {
1.20 frystyk 193: if (SGML_TRACE) fprintf(stderr,"SGML: Illegal end tag </%s> found.\n",
1.1 timbl 194: old_tag->name);
195: return;
196: }
197: while (context->element_stack) {/* Loop is error path only */
198: HTElement * N = context->element_stack;
199: HTTag * t = N->tag;
200:
201: if (old_tag != t) { /* Mismatch: syntax error */
202: if (context->element_stack->next) { /* This is not the last level */
1.20 frystyk 203: if (SGML_TRACE) fprintf(stderr,
1.1 timbl 204: "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
205: old_tag->name, t->name, t->name);
206: } else { /* last level */
1.20 frystyk 207: if (SGML_TRACE) fprintf(stderr,
1.1 timbl 208: "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
209: old_tag->name, t->name, old_tag->name);
210: return; /* Ignore */
211: }
212: }
213:
214: context->element_stack = N->next; /* Remove from stack */
215: free(N);
1.2 timbl 216: (*context->actions->end_element)(context->target,
217: t - context->dtd->tags);
1.1 timbl 218: if (old_tag == t) return; /* Correct sequence */
219:
220: /* Syntax error path only */
221:
222: }
1.20 frystyk 223: if (SGML_TRACE) fprintf(stderr,
1.1 timbl 224: "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
225: }
226:
227:
1.17 timbl 228: /* Start an element
229: ** ----------------
1.1 timbl 230: */
1.21 frystyk 231: PRIVATE void start_element ARGS1(HTStream *, context)
1.1 timbl 232: {
233: HTTag * new_tag = context->current_tag;
234:
1.20 frystyk 235: if (SGML_TRACE) fprintf(stderr, "SGML: Start <%s>\n", new_tag->name);
1.2 timbl 236: (*context->actions->start_element)(
237: context->target,
238: new_tag - context->dtd->tags,
239: context->present,
1.3 timbl 240: (CONST char**) context->value); /* coerce type for think c */
1.2 timbl 241: if (new_tag->contents != SGML_EMPTY) { /* i.e. tag not empty */
1.1 timbl 242: HTElement * N = (HTElement *)malloc(sizeof(HTElement));
243: if (N == NULL) outofmem(__FILE__, "start_element");
244: N->next = context->element_stack;
245: N->tag = new_tag;
246: context->element_stack = N;
247: }
248: }
249:
250:
1.2 timbl 251: /* Find Tag in DTD tag list
252: ** ------------------------
1.1 timbl 253: **
254: ** On entry,
1.2 timbl 255: ** dtd points to dtd structire including valid tag list
256: ** string points to name of tag in question
1.1 timbl 257: **
1.2 timbl 258: ** On exit,
259: ** returns:
1.7 timbl 260: ** NULL tag not found
261: ** else address of tag structure in dtd
1.2 timbl 262: */
1.11 timbl 263: PUBLIC HTTag * SGMLFindTag ARGS2(CONST SGML_dtd*, dtd, CONST char *, string)
1.2 timbl 264: {
265: int high, low, i, diff;
266: for(low=0, high=dtd->number_of_tags;
267: high > low ;
268: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
269: i = (low + (high-low)/2);
1.3 timbl 270: diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 271: if (diff==0) { /* success: found it */
1.7 timbl 272: return &dtd->tags[i];
1.2 timbl 273: }
274: }
1.7 timbl 275: return NULL;
1.2 timbl 276: }
277:
278: /*________________________________________________________________________
279: ** Public Methods
1.1 timbl 280: */
281:
1.2 timbl 282:
283: /* Could check that we are back to bottom of stack! @@ */
1.1 timbl 284:
1.22 ! frystyk 285: PUBLIC int SGML_free ARGS1(HTStream *, context)
1.8 timbl 286: {
1.14 frystyk 287: int cnt;
288:
1.15 frystyk 289: while (context->element_stack) { /* Make sure, that all tags are gone */
290: HTElement *ptr = context->element_stack;
291:
1.20 frystyk 292: if(SGML_TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
1.15 frystyk 293: context->element_stack->tag->name);
294: context->element_stack = ptr->next;
295: free(ptr);
296: }
1.19 duns 297: (*context->actions->_free)(context->target);
1.8 timbl 298: HTChunkFree(context->string);
1.15 frystyk 299: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
1.14 frystyk 300: if(context->value[cnt])
301: free(context->value[cnt]);
1.8 timbl 302: free(context);
1.22 ! frystyk 303: return 0;
1.1 timbl 304: }
305:
1.22 ! frystyk 306: PUBLIC int SGML_abort ARGS2(HTStream *, context, HTError, e)
1.1 timbl 307: {
1.14 frystyk 308: int cnt;
309:
1.15 frystyk 310: while (context->element_stack) { /* Make sure, that all tags are gone */
311: HTElement *ptr = context->element_stack;
312:
1.20 frystyk 313: if(SGML_TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
1.15 frystyk 314: context->element_stack->tag->name);
315: context->element_stack = ptr->next;
316: free(ptr);
317: }
1.8 timbl 318: (*context->actions->abort)(context->target, e);
1.1 timbl 319: HTChunkFree(context->string);
1.14 frystyk 320: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
321: if(context->value[cnt])
322: free(context->value[cnt]);
1.1 timbl 323: free(context);
1.22 ! frystyk 324: return EOF;
1.1 timbl 325: }
326:
1.2 timbl 327:
1.1 timbl 328: /* Read and write user callback handle
329: ** -----------------------------------
330: **
331: ** The callbacks from the SGML parser have an SGML context parameter.
332: ** These calls allow the caller to associate his own context with a
333: ** particular SGML context.
334: */
335:
1.2 timbl 336: #ifdef CALLERDATA
337: PUBLIC void* SGML_callerData ARGS1(HTStream *, context)
1.1 timbl 338: {
339: return context->callerData;
340: }
341:
1.2 timbl 342: PUBLIC void SGML_setCallerData ARGS2(HTStream *, context, void*, data)
1.1 timbl 343: {
344: context->callerData = data;
345: }
1.2 timbl 346: #endif
1.1 timbl 347:
1.2 timbl 348: PUBLIC void SGML_character ARGS2(HTStream *, context, char,c)
1.1 timbl 349:
350: {
1.2 timbl 351: CONST SGML_dtd *dtd = context->dtd;
1.1 timbl 352: HTChunk *string = context->string;
353:
354: switch(context->state) {
1.18 timbl 355:
356: case S_after_open: /* Strip one trainling newline
357: only after opening nonempty element. - SGML:Ugh! */
358: if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
359: break;
360: }
361: context->state = S_text;
362: goto normal_text;
363: /* (***falls through***) */
364:
1.1 timbl 365: case S_text:
1.18 timbl 366: normal_text:
367:
1.13 timbl 368: #ifdef ISO_2022_JP
369: if (c=='\033') {
370: context->state = S_esc;
371: PUTC(c);
372: break;
373: }
374: #endif /* ISO_2022_JP */
1.6 timbl 375: if (c=='&' && (!context->element_stack || (
376: context->element_stack->tag &&
377: ( context->element_stack->tag->contents == SGML_MIXED
378: || context->element_stack->tag->contents ==
379: SGML_RCDATA)
380: ))) {
1.1 timbl 381: string->size = 0;
382: context->state = S_ero;
383:
384: } else if (c=='<') {
385: string->size = 0;
386: context->state = (context->element_stack &&
1.13 timbl 387: context->element_stack->tag &&
388: context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 389: S_literal : S_tag;
1.18 timbl 390: } else if (c=='\n') { /* Newline - ignore if before tag end! */
391: context->state = S_nl;
1.2 timbl 392: } else PUTC(c);
1.1 timbl 393: break;
1.13 timbl 394:
1.18 timbl 395: case S_nl:
396: if (c=='<') {
397: string->size = 0;
398: context->state = (context->element_stack &&
399: context->element_stack->tag &&
400: context->element_stack->tag->contents == SGML_LITERAL) ?
401: S_literal : S_nl_tago;
402: } else {
403: PUTC('\n');
404: context->state = S_text;
405: goto normal_text;
406: }
407: break;
408:
409: case S_nl_tago: /* Had newline and tag opener */
410: if (c != '/') {
411: PUTC('\n'); /* Only ignore newline before </ */
412: }
413: context->state = S_tag;
414: goto handle_S_tag;
415:
1.13 timbl 416: #ifdef ISO_2022_JP
417: case S_esc:
418: if (c=='$') {
419: context->state = S_dollar;
420: } else if (c=='(') {
421: context->state = S_paren;
422: } else {
423: context->state = S_text;
424: }
425: PUTC(c);
426: break;
427: case S_dollar:
428: if (c=='@' || c=='B') {
429: context->state = S_nonascii_text;
430: } else {
431: context->state = S_text;
432: }
433: PUTC(c);
434: break;
435: case S_paren:
436: if (c=='B' || c=='J') {
437: context->state = S_text;
438: } else {
439: context->state = S_text;
440: }
441: PUTC(c);
442: break;
443: case S_nonascii_text:
444: if (c=='\033') {
445: context->state = S_esc;
446: PUTC(c);
447: } else {
448: PUTC(c);
449: }
450: break;
451: #endif /* ISO_2022_JP */
1.1 timbl 452:
1.12 timbl 453: /* In literal mode, waits only for specific end tag!
1.2 timbl 454: ** Only foir compatibility with old servers.
1.1 timbl 455: */
1.12 timbl 456: case S_literal :
1.1 timbl 457: HTChunkPutc(string, c);
458: if ( TOUPPER(c) != ((string->size ==1) ? '/'
459: : context->element_stack->tag->name[string->size-2])) {
460: int i;
461:
1.12 timbl 462: /* If complete match, end literal */
1.1 timbl 463: if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
464: end_element(context, context->element_stack->tag);
465: string->size = 0;
1.2 timbl 466: context->current_attribute_number = INVALID;
1.1 timbl 467: context->state = S_text;
468: break;
469: } /* If Mismatch: recover string. */
1.2 timbl 470: PUTC( '<');
1.1 timbl 471: for (i=0; i<string->size; i++) /* recover */
1.2 timbl 472: PUTC(
1.1 timbl 473: string->data[i]);
474: context->state = S_text;
475: }
476:
477: break;
478:
479: /* Character reference or Entity
480: */
481: case S_ero:
482: if (c=='#') {
483: context->state = S_cro; /* &# is Char Ref Open */
484: break;
485: }
486: context->state = S_entity; /* Fall through! */
487:
488: /* Handle Entities
489: */
490: case S_entity:
491: if (isalnum(c))
492: HTChunkPutc(string, c);
493: else {
494: HTChunkTerminate(string);
495: handle_entity(context, c);
496: context->state = S_text;
497: }
498: break;
499:
500: /* Character reference
501: */
502: case S_cro:
503: if (isalnum(c))
504: HTChunkPutc(string, c); /* accumulate a character NUMBER */
505: else {
506: int value;
507: HTChunkTerminate(string);
508: if (sscanf(string->data, "%d", &value)==1)
1.2 timbl 509: PUTC(FROMASCII((char)value));
1.1 timbl 510: context->state = S_text;
511: }
512: break;
513:
514: /* Tag
515: */
516: case S_tag: /* new tag */
1.18 timbl 517: handle_S_tag:
518:
1.1 timbl 519: if (isalnum(c))
520: HTChunkPutc(string, c);
521: else { /* End of tag name */
1.7 timbl 522: HTTag * t;
1.1 timbl 523: if (c=='/') {
1.20 frystyk 524: if (SGML_TRACE) if (string->size!=0)
1.1 timbl 525: fprintf(stderr,"SGML: `<%s/' found!\n", string->data);
526: context->state = S_end;
527: break;
528: }
529: HTChunkTerminate(string) ;
1.2 timbl 530:
1.10 timbl 531: t = SGMLFindTag(dtd, string->data);
1.7 timbl 532: if (!t) {
1.20 frystyk 533: if(SGML_TRACE) fprintf(stderr, "SGML: *** Unknown element %s\n",
1.1 timbl 534: string->data);
535: context->state = (c=='>') ? S_text : S_junk_tag;
536: break;
537: }
1.7 timbl 538: context->current_tag = t;
1.2 timbl 539:
540: /* Clear out attributes
541: */
1.1 timbl 542:
1.2 timbl 543: {
544: int i;
545: for (i=0; i< context->current_tag->number_of_attributes; i++)
546: context->present[i] = NO;
1.1 timbl 547: }
548: string->size = 0;
1.2 timbl 549: context->current_attribute_number = INVALID;
1.1 timbl 550:
551: if (c=='>') {
552: if (context->current_tag->name) start_element(context);
1.18 timbl 553: context->state = S_after_open;
1.1 timbl 554: } else {
555: context->state = S_tag_gap;
556: }
557: }
558: break;
559:
560:
561: case S_tag_gap: /* Expecting attribute or > */
562: if (WHITE(c)) break; /* Gap between attributes */
563: if (c=='>') { /* End of tag */
564: if (context->current_tag->name) start_element(context);
1.18 timbl 565: context->state = S_after_open;
1.1 timbl 566: break;
567: }
568: HTChunkPutc(string, c);
569: context->state = S_attr; /* Get attribute */
570: break;
571:
572: /* accumulating value */
573: case S_attr:
574: if (WHITE(c) || (c=='>') || (c=='=')) { /* End of word */
575: HTChunkTerminate(string) ;
576: handle_attribute_name(context, string->data);
577: string->size = 0;
578: if (c=='>') { /* End of tag */
579: if (context->current_tag->name) start_element(context);
1.18 timbl 580: context->state = S_after_open;
1.1 timbl 581: break;
582: }
583: context->state = (c=='=' ? S_equals: S_attr_gap);
584: } else {
585: HTChunkPutc(string, c);
586: }
587: break;
588:
589: case S_attr_gap: /* Expecting attribute or = or > */
590: if (WHITE(c)) break; /* Gap after attribute */
591: if (c=='>') { /* End of tag */
592: if (context->current_tag->name) start_element(context);
1.18 timbl 593: context->state = S_after_open;
1.1 timbl 594: break;
595: } else if (c=='=') {
596: context->state = S_equals;
597: break;
598: }
599: HTChunkPutc(string, c);
600: context->state = S_attr; /* Get next attribute */
601: break;
602:
603: case S_equals: /* After attr = */
604: if (WHITE(c)) break; /* Before attribute value */
605: if (c=='>') { /* End of tag */
1.20 frystyk 606: if (SGML_TRACE) fprintf(stderr, "SGML: found = but no value\n");
1.1 timbl 607: if (context->current_tag->name) start_element(context);
1.18 timbl 608: context->state = S_after_open;
1.1 timbl 609: break;
610:
611: } else if (c=='\'') {
612: context->state = S_squoted;
613: break;
614:
615: } else if (c=='"') {
616: context->state = S_dquoted;
617: break;
618: }
619: HTChunkPutc(string, c);
620: context->state = S_value;
621: break;
622:
623: case S_value:
624: if (WHITE(c) || (c=='>')) { /* End of word */
625: HTChunkTerminate(string) ;
626: handle_attribute_value(context, string->data);
627: string->size = 0;
628: if (c=='>') { /* End of tag */
629: if (context->current_tag->name) start_element(context);
1.18 timbl 630: context->state = S_after_open;
1.1 timbl 631: break;
632: }
633: else context->state = S_tag_gap;
634: } else {
635: HTChunkPutc(string, c);
636: }
637: break;
638:
639: case S_squoted: /* Quoted attribute value */
640: if (c=='\'') { /* End of attribute value */
641: HTChunkTerminate(string) ;
642: handle_attribute_value(context, string->data);
643: string->size = 0;
644: context->state = S_tag_gap;
645: } else {
646: HTChunkPutc(string, c);
647: }
648: break;
649:
650: case S_dquoted: /* Quoted attribute value */
651: if (c=='"') { /* End of attribute value */
652: HTChunkTerminate(string) ;
653: handle_attribute_value(context, string->data);
654: string->size = 0;
655: context->state = S_tag_gap;
656: } else {
657: HTChunkPutc(string, c);
658: }
659: break;
660:
661: case S_end: /* </ */
662: if (isalnum(c))
663: HTChunkPutc(string, c);
664: else { /* End of end tag name */
1.7 timbl 665: HTTag * t;
1.1 timbl 666: HTChunkTerminate(string) ;
1.7 timbl 667: if (!*string->data) { /* Empty end tag */
668: t = context->element_stack->tag;
669: } else {
1.10 timbl 670: t = SGMLFindTag(dtd, string->data);
1.1 timbl 671: }
1.7 timbl 672: if (!t) {
1.20 frystyk 673: if(SGML_TRACE) fprintf(stderr,
1.1 timbl 674: "Unknown end tag </%s>\n", string->data);
1.2 timbl 675: } else {
1.7 timbl 676: context->current_tag = t;
1.2 timbl 677: end_element( context, context->current_tag);
1.1 timbl 678: }
1.2 timbl 679:
1.1 timbl 680: string->size = 0;
1.2 timbl 681: context->current_attribute_number = INVALID;
1.7 timbl 682: if (c!='>') {
1.20 frystyk 683: if (SGML_TRACE && !WHITE(c))
1.7 timbl 684: fprintf(stderr,"SGML: `</%s%c' found!\n",
685: string->data, c);
686: context->state = S_junk_tag;
687: } else {
688: context->state = S_text;
689: }
1.1 timbl 690: }
691: break;
692:
693:
694: case S_junk_tag:
695: if (c=='>') {
696: context->state = S_text;
697: }
698:
699: } /* switch on context->state */
700:
701: } /* SGML_character */
1.2 timbl 702:
703:
704: PUBLIC void SGML_string ARGS2(HTStream *, context, CONST char*, str)
705: {
706: CONST char *p;
707: for(p=str; *p; p++)
708: SGML_character(context, *p);
709: }
710:
711:
712: PUBLIC void SGML_write ARGS3(HTStream *, context, CONST char*, str, int, l)
713: {
714: CONST char *p;
715: CONST char *e = str+l;
716: for(p=str; p<e; p++)
717: SGML_character(context, *p);
718: }
719:
720: /*_______________________________________________________________________
721: */
722:
723: /* Structured Object Class
724: ** -----------------------
725: */
726: PUBLIC CONST HTStreamClass SGMLParser =
727: {
728: "SGMLParser",
729: SGML_free,
1.8 timbl 730: SGML_abort,
1.9 timbl 731: SGML_character,
732: SGML_string,
733: SGML_write,
1.2 timbl 734: };
735:
736: /* Create SGML Engine
737: ** ------------------
738: **
739: ** On entry,
740: ** dtd represents the DTD, along with
741: ** actions is the sink for the data as a set of routines.
742: **
743: */
744:
745: PUBLIC HTStream* SGML_new ARGS2(
746: CONST SGML_dtd *, dtd,
747: HTStructured *, target)
748: {
749: int i;
750: HTStream* context = (HTStream *) malloc(sizeof(*context));
751: if (!context) outofmem(__FILE__, "SGML_begin");
752:
753: context->isa = &SGMLParser;
754: context->string = HTChunkCreate(128); /* Grow by this much */
755: context->dtd = dtd;
756: context->target = target;
757: context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
758: /* Ugh: no OO */
759: context->state = S_text;
760: context->element_stack = 0; /* empty */
761: #ifdef CALLERDATA
762: context->callerData = (void*) callerData;
763: #endif
764: for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
765:
766: return context;
767: }
1.14 frystyk 768:
769:
770:
771:
772:
773:
774:
775:
776:
777:
778:
1.2 timbl 779:
Webmaster