Annotation of libwww/Library/src/SGML.c, revision 1.25
1.23 frystyk 1: /* SGML.c
2: ** GENERAL SGML PARSER CODE
3: **
4: ** (c) COPYRIGHT CERN 1994.
5: ** Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
1.2 timbl 7: ** This module implements an HTStream object. To parse an
1.1 timbl 8: ** SGML file, create this object which is a parser. The object
1.2 timbl 9: ** is (currently) created by being passed a DTD structure,
10: ** and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 11: **
1.19 duns 12: ** 6 Feb 93 Binary seraches used. Intreface modified.
13: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.1 timbl 14: */
15:
1.25 ! frystyk 16: /* Library include files */
! 17: #include "tcp.h"
1.1 timbl 18: #include "HTUtils.h"
1.25 ! frystyk 19: #include "HTString.h"
1.1 timbl 20: #include "HTChunk.h"
1.20 frystyk 21: #include "SGML.h"
1.1 timbl 22:
1.2 timbl 23: #define INVALID (-1)
24:
1.1 timbl 25: /* The State (context) of the parser
26: **
1.2 timbl 27: ** This is passed with each call to make the parser reentrant
1.1 timbl 28: **
29: */
30:
1.16 frystyk 31:
1.2 timbl 32:
33:
34: /* Element Stack
35: ** -------------
36: ** This allows us to return down the stack reselcting styles.
37: ** As we return, attribute values will be garbage in general.
38: */
39: typedef struct _HTElement HTElement;
40: struct _HTElement {
41: HTElement * next; /* Previously nested element or 0 */
42: HTTag* tag; /* The tag at this level */
43: };
44:
45:
1.21 frystyk 46: typedef enum _sgml_state {
47: S_text, S_literal, S_tag, S_tag_gap,
48: S_attr, S_attr_gap, S_equals, S_value, S_after_open,
49: S_nl, S_nl_tago,
50: S_ero, S_cro,
51: #ifdef ISO_2022_JP
52: S_esc, S_dollar, S_paren, S_nonascii_text,
53: #endif
54: S_squoted, S_dquoted, S_end, S_entity, S_junk_tag
55: } sgml_state;
56:
57:
1.2 timbl 58: /* Internal Context Data Structure
59: ** -------------------------------
60: */
61: struct _HTStream {
62:
63: CONST HTStreamClass * isa; /* inherited from HTStream */
64:
65: CONST SGML_dtd *dtd;
66: HTStructuredClass *actions; /* target class */
67: HTStructured *target; /* target object */
68:
1.1 timbl 69: HTTag *current_tag;
1.2 timbl 70: int current_attribute_number;
1.1 timbl 71: HTChunk *string;
72: HTElement *element_stack;
1.21 frystyk 73: sgml_state state;
1.2 timbl 74: #ifdef CALLERDATA
1.1 timbl 75: void * callerData;
1.2 timbl 76: #endif
77: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
78: char * value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
79: } ;
80:
81:
82: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
83:
1.1 timbl 84:
1.17 timbl 85: /* Find Attribute Number
86: ** ---------------------
87: */
88:
89: PUBLIC int SGMLFindAttribute ARGS2 (HTTag*, tag, CONST char *, s)
90: {
91: attr* attributes = tag->attributes;
92:
93: int high, low, i, diff; /* Binary search for attribute name */
94: for(low=0, high=tag->number_of_attributes;
95: high > low ;
96: diff < 0 ? (low = i+1) : (high = i) ) {
97: i = (low + (high-low)/2);
98: diff = strcasecomp(attributes[i].name, s);
99: if (diff==0) return i; /* success: found it */
100: } /* for */
101:
102: return -1;
103: }
104:
1.1 timbl 105:
106: /* Handle Attribute
107: ** ----------------
108: */
109: /* PUBLIC CONST char * SGML_default = ""; ?? */
110:
1.21 frystyk 111: PRIVATE void handle_attribute_name ARGS2(HTStream *, context, CONST char *, s)
1.1 timbl 112: {
1.2 timbl 113:
114: HTTag * tag = context->current_tag;
115:
1.17 timbl 116: int i = SGMLFindAttribute(tag, s);
117: if (i>=0) {
118: context->current_attribute_number = i;
119: context->present[i] = YES;
120: if (context->value[i]) {
121: free(context->value[i]);
122: context->value[i] = NULL;
123: }
124: return;
125: } /* if */
1.2 timbl 126:
1.20 frystyk 127: if (SGML_TRACE)
1.25 ! frystyk 128: fprintf(TDEST, "SGML: Unknown attribute %s for tag %s\n",
1.2 timbl 129: s, context->current_tag->name);
130: context->current_attribute_number = INVALID; /* Invalid */
1.1 timbl 131: }
132:
133:
134: /* Handle attribute value
135: ** ----------------------
136: */
1.21 frystyk 137: PRIVATE void handle_attribute_value ARGS2(HTStream *, context, CONST char *, s)
1.1 timbl 138: {
1.2 timbl 139: if (context->current_attribute_number != INVALID) {
140: StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 141: } else {
1.25 ! frystyk 142: if (SGML_TRACE) fprintf(TDEST, "SGML: Attribute value %s ignored\n", s);
1.1 timbl 143: }
1.2 timbl 144: context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 145: }
146:
1.2 timbl 147:
1.1 timbl 148: /* Handle entity
149: ** -------------
150: **
151: ** On entry,
152: ** s contains the entity name zero terminated
153: ** Bugs:
154: ** If the entity name is unknown, the terminator is treated as
155: ** a printable non-special character in all cases, even if it is '<'
156: */
1.21 frystyk 157: PRIVATE void handle_entity ARGS2(HTStream *, context, char, term)
1.1 timbl 158: {
1.2 timbl 159:
1.3 timbl 160: CONST char ** entities = context->dtd->entity_names;
1.1 timbl 161: CONST char *s = context->string->data;
1.2 timbl 162:
163: int high, low, i, diff;
164: for(low=0, high = context->dtd->number_of_entities;
165: high > low ;
166: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
167: i = (low + (high-low)/2);
168: diff = strcmp(entities[i], s); /* Csse sensitive! */
169: if (diff==0) { /* success: found it */
170: (*context->actions->put_entity)(context->target, i);
171: return;
1.1 timbl 172: }
173: }
174: /* If entity string not found, display as text */
1.20 frystyk 175: if (SGML_TRACE)
1.25 ! frystyk 176: fprintf(TDEST, "SGML: Unknown entity %s\n", s);
1.2 timbl 177: PUTC('&');
1.1 timbl 178: {
179: CONST char *p;
180: for (p=s; *p; p++) {
1.2 timbl 181: PUTC(*p);
1.1 timbl 182: }
183: }
1.2 timbl 184: PUTC(term);
1.1 timbl 185: }
186:
1.2 timbl 187:
1.1 timbl 188: /* End element
1.2 timbl 189: ** -----------
1.1 timbl 190: */
1.21 frystyk 191: PRIVATE void end_element ARGS2(HTStream *, context, HTTag *, old_tag)
1.1 timbl 192: {
1.25 ! frystyk 193: if (SGML_TRACE) fprintf(TDEST, "SGML: End </%s>\n", old_tag->name);
1.2 timbl 194: if (old_tag->contents == SGML_EMPTY) {
1.25 ! frystyk 195: if (SGML_TRACE) fprintf(TDEST,"SGML: Illegal end tag </%s> found.\n",
1.1 timbl 196: old_tag->name);
197: return;
198: }
199: while (context->element_stack) {/* Loop is error path only */
200: HTElement * N = context->element_stack;
201: HTTag * t = N->tag;
202:
203: if (old_tag != t) { /* Mismatch: syntax error */
204: if (context->element_stack->next) { /* This is not the last level */
1.25 ! frystyk 205: if (SGML_TRACE) fprintf(TDEST,
1.1 timbl 206: "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
207: old_tag->name, t->name, t->name);
208: } else { /* last level */
1.25 ! frystyk 209: if (SGML_TRACE) fprintf(TDEST,
1.1 timbl 210: "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
211: old_tag->name, t->name, old_tag->name);
212: return; /* Ignore */
213: }
214: }
215:
216: context->element_stack = N->next; /* Remove from stack */
217: free(N);
1.2 timbl 218: (*context->actions->end_element)(context->target,
219: t - context->dtd->tags);
1.1 timbl 220: if (old_tag == t) return; /* Correct sequence */
221:
222: /* Syntax error path only */
223:
224: }
1.25 ! frystyk 225: if (SGML_TRACE) fprintf(TDEST,
1.1 timbl 226: "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
227: }
228:
229:
1.17 timbl 230: /* Start an element
231: ** ----------------
1.1 timbl 232: */
1.21 frystyk 233: PRIVATE void start_element ARGS1(HTStream *, context)
1.1 timbl 234: {
235: HTTag * new_tag = context->current_tag;
236:
1.25 ! frystyk 237: if (SGML_TRACE) fprintf(TDEST, "SGML: Start <%s>\n", new_tag->name);
1.2 timbl 238: (*context->actions->start_element)(
239: context->target,
240: new_tag - context->dtd->tags,
241: context->present,
1.3 timbl 242: (CONST char**) context->value); /* coerce type for think c */
1.2 timbl 243: if (new_tag->contents != SGML_EMPTY) { /* i.e. tag not empty */
1.1 timbl 244: HTElement * N = (HTElement *)malloc(sizeof(HTElement));
245: if (N == NULL) outofmem(__FILE__, "start_element");
246: N->next = context->element_stack;
247: N->tag = new_tag;
248: context->element_stack = N;
249: }
250: }
251:
252:
1.2 timbl 253: /* Find Tag in DTD tag list
254: ** ------------------------
1.1 timbl 255: **
256: ** On entry,
1.2 timbl 257: ** dtd points to dtd structire including valid tag list
258: ** string points to name of tag in question
1.1 timbl 259: **
1.2 timbl 260: ** On exit,
261: ** returns:
1.7 timbl 262: ** NULL tag not found
263: ** else address of tag structure in dtd
1.2 timbl 264: */
1.11 timbl 265: PUBLIC HTTag * SGMLFindTag ARGS2(CONST SGML_dtd*, dtd, CONST char *, string)
1.2 timbl 266: {
267: int high, low, i, diff;
268: for(low=0, high=dtd->number_of_tags;
269: high > low ;
270: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
271: i = (low + (high-low)/2);
1.3 timbl 272: diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 273: if (diff==0) { /* success: found it */
1.7 timbl 274: return &dtd->tags[i];
1.2 timbl 275: }
276: }
1.7 timbl 277: return NULL;
1.2 timbl 278: }
279:
280: /*________________________________________________________________________
281: ** Public Methods
1.1 timbl 282: */
283:
1.2 timbl 284:
285: /* Could check that we are back to bottom of stack! @@ */
1.1 timbl 286:
1.22 frystyk 287: PUBLIC int SGML_free ARGS1(HTStream *, context)
1.8 timbl 288: {
1.14 frystyk 289: int cnt;
290:
1.15 frystyk 291: while (context->element_stack) { /* Make sure, that all tags are gone */
292: HTElement *ptr = context->element_stack;
293:
1.25 ! frystyk 294: if(SGML_TRACE) fprintf(TDEST, "SGML: Non-matched tag found: <%s>\n",
1.15 frystyk 295: context->element_stack->tag->name);
296: context->element_stack = ptr->next;
297: free(ptr);
298: }
1.19 duns 299: (*context->actions->_free)(context->target);
1.8 timbl 300: HTChunkFree(context->string);
1.15 frystyk 301: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
1.14 frystyk 302: if(context->value[cnt])
303: free(context->value[cnt]);
1.8 timbl 304: free(context);
1.22 frystyk 305: return 0;
1.1 timbl 306: }
307:
1.22 frystyk 308: PUBLIC int SGML_abort ARGS2(HTStream *, context, HTError, e)
1.1 timbl 309: {
1.14 frystyk 310: int cnt;
311:
1.15 frystyk 312: while (context->element_stack) { /* Make sure, that all tags are gone */
313: HTElement *ptr = context->element_stack;
314:
1.25 ! frystyk 315: if(SGML_TRACE) fprintf(TDEST, "SGML: Non-matched tag found: <%s>\n",
1.15 frystyk 316: context->element_stack->tag->name);
317: context->element_stack = ptr->next;
318: free(ptr);
319: }
1.8 timbl 320: (*context->actions->abort)(context->target, e);
1.1 timbl 321: HTChunkFree(context->string);
1.14 frystyk 322: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
323: if(context->value[cnt])
324: free(context->value[cnt]);
1.1 timbl 325: free(context);
1.22 frystyk 326: return EOF;
1.1 timbl 327: }
328:
1.2 timbl 329:
1.1 timbl 330: /* Read and write user callback handle
331: ** -----------------------------------
332: **
333: ** The callbacks from the SGML parser have an SGML context parameter.
334: ** These calls allow the caller to associate his own context with a
335: ** particular SGML context.
336: */
337:
1.2 timbl 338: #ifdef CALLERDATA
339: PUBLIC void* SGML_callerData ARGS1(HTStream *, context)
1.1 timbl 340: {
341: return context->callerData;
342: }
343:
1.2 timbl 344: PUBLIC void SGML_setCallerData ARGS2(HTStream *, context, void*, data)
1.1 timbl 345: {
346: context->callerData = data;
347: }
1.2 timbl 348: #endif
1.1 timbl 349:
1.2 timbl 350: PUBLIC void SGML_character ARGS2(HTStream *, context, char,c)
1.1 timbl 351:
352: {
1.2 timbl 353: CONST SGML_dtd *dtd = context->dtd;
1.1 timbl 354: HTChunk *string = context->string;
355:
356: switch(context->state) {
1.18 timbl 357:
358: case S_after_open: /* Strip one trainling newline
359: only after opening nonempty element. - SGML:Ugh! */
360: if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
361: break;
362: }
363: context->state = S_text;
364: goto normal_text;
365: /* (***falls through***) */
366:
1.1 timbl 367: case S_text:
1.18 timbl 368: normal_text:
369:
1.13 timbl 370: #ifdef ISO_2022_JP
371: if (c=='\033') {
372: context->state = S_esc;
373: PUTC(c);
374: break;
375: }
376: #endif /* ISO_2022_JP */
1.6 timbl 377: if (c=='&' && (!context->element_stack || (
378: context->element_stack->tag &&
379: ( context->element_stack->tag->contents == SGML_MIXED
380: || context->element_stack->tag->contents ==
381: SGML_RCDATA)
382: ))) {
1.1 timbl 383: string->size = 0;
384: context->state = S_ero;
385:
386: } else if (c=='<') {
387: string->size = 0;
388: context->state = (context->element_stack &&
1.13 timbl 389: context->element_stack->tag &&
390: context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 391: S_literal : S_tag;
1.18 timbl 392: } else if (c=='\n') { /* Newline - ignore if before tag end! */
393: context->state = S_nl;
1.2 timbl 394: } else PUTC(c);
1.1 timbl 395: break;
1.13 timbl 396:
1.18 timbl 397: case S_nl:
398: if (c=='<') {
399: string->size = 0;
400: context->state = (context->element_stack &&
401: context->element_stack->tag &&
402: context->element_stack->tag->contents == SGML_LITERAL) ?
403: S_literal : S_nl_tago;
404: } else {
405: PUTC('\n');
406: context->state = S_text;
407: goto normal_text;
408: }
409: break;
410:
411: case S_nl_tago: /* Had newline and tag opener */
412: if (c != '/') {
413: PUTC('\n'); /* Only ignore newline before </ */
414: }
415: context->state = S_tag;
416: goto handle_S_tag;
417:
1.13 timbl 418: #ifdef ISO_2022_JP
419: case S_esc:
420: if (c=='$') {
421: context->state = S_dollar;
422: } else if (c=='(') {
423: context->state = S_paren;
424: } else {
425: context->state = S_text;
426: }
427: PUTC(c);
428: break;
429: case S_dollar:
430: if (c=='@' || c=='B') {
431: context->state = S_nonascii_text;
432: } else {
433: context->state = S_text;
434: }
435: PUTC(c);
436: break;
437: case S_paren:
438: if (c=='B' || c=='J') {
439: context->state = S_text;
440: } else {
441: context->state = S_text;
442: }
443: PUTC(c);
444: break;
445: case S_nonascii_text:
446: if (c=='\033') {
447: context->state = S_esc;
448: PUTC(c);
449: } else {
450: PUTC(c);
451: }
452: break;
453: #endif /* ISO_2022_JP */
1.1 timbl 454:
1.12 timbl 455: /* In literal mode, waits only for specific end tag!
1.2 timbl 456: ** Only foir compatibility with old servers.
1.1 timbl 457: */
1.12 timbl 458: case S_literal :
1.1 timbl 459: HTChunkPutc(string, c);
460: if ( TOUPPER(c) != ((string->size ==1) ? '/'
461: : context->element_stack->tag->name[string->size-2])) {
462: int i;
463:
1.12 timbl 464: /* If complete match, end literal */
1.1 timbl 465: if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
466: end_element(context, context->element_stack->tag);
467: string->size = 0;
1.2 timbl 468: context->current_attribute_number = INVALID;
1.1 timbl 469: context->state = S_text;
470: break;
471: } /* If Mismatch: recover string. */
1.2 timbl 472: PUTC( '<');
1.1 timbl 473: for (i=0; i<string->size; i++) /* recover */
1.2 timbl 474: PUTC(
1.1 timbl 475: string->data[i]);
476: context->state = S_text;
477: }
478:
479: break;
480:
481: /* Character reference or Entity
482: */
483: case S_ero:
484: if (c=='#') {
485: context->state = S_cro; /* &# is Char Ref Open */
486: break;
487: }
488: context->state = S_entity; /* Fall through! */
489:
490: /* Handle Entities
491: */
492: case S_entity:
493: if (isalnum(c))
494: HTChunkPutc(string, c);
495: else {
496: HTChunkTerminate(string);
497: handle_entity(context, c);
498: context->state = S_text;
499: }
500: break;
501:
502: /* Character reference
503: */
504: case S_cro:
505: if (isalnum(c))
506: HTChunkPutc(string, c); /* accumulate a character NUMBER */
507: else {
508: int value;
509: HTChunkTerminate(string);
510: if (sscanf(string->data, "%d", &value)==1)
1.2 timbl 511: PUTC(FROMASCII((char)value));
1.1 timbl 512: context->state = S_text;
513: }
514: break;
515:
516: /* Tag
517: */
518: case S_tag: /* new tag */
1.18 timbl 519: handle_S_tag:
520:
1.1 timbl 521: if (isalnum(c))
522: HTChunkPutc(string, c);
523: else { /* End of tag name */
1.7 timbl 524: HTTag * t;
1.1 timbl 525: if (c=='/') {
1.20 frystyk 526: if (SGML_TRACE) if (string->size!=0)
1.25 ! frystyk 527: fprintf(TDEST,"SGML: `<%s/' found!\n", string->data);
1.1 timbl 528: context->state = S_end;
529: break;
530: }
531: HTChunkTerminate(string) ;
1.2 timbl 532:
1.10 timbl 533: t = SGMLFindTag(dtd, string->data);
1.7 timbl 534: if (!t) {
1.25 ! frystyk 535: if(SGML_TRACE) fprintf(TDEST, "SGML: *** Unknown element %s\n",
1.1 timbl 536: string->data);
537: context->state = (c=='>') ? S_text : S_junk_tag;
538: break;
539: }
1.7 timbl 540: context->current_tag = t;
1.2 timbl 541:
542: /* Clear out attributes
543: */
1.1 timbl 544:
1.2 timbl 545: {
546: int i;
547: for (i=0; i< context->current_tag->number_of_attributes; i++)
548: context->present[i] = NO;
1.1 timbl 549: }
550: string->size = 0;
1.2 timbl 551: context->current_attribute_number = INVALID;
1.1 timbl 552:
553: if (c=='>') {
554: if (context->current_tag->name) start_element(context);
1.18 timbl 555: context->state = S_after_open;
1.1 timbl 556: } else {
557: context->state = S_tag_gap;
558: }
559: }
560: break;
561:
562:
563: case S_tag_gap: /* Expecting attribute or > */
564: if (WHITE(c)) break; /* Gap between attributes */
565: if (c=='>') { /* End of tag */
566: if (context->current_tag->name) start_element(context);
1.18 timbl 567: context->state = S_after_open;
1.1 timbl 568: break;
569: }
570: HTChunkPutc(string, c);
571: context->state = S_attr; /* Get attribute */
572: break;
573:
574: /* accumulating value */
575: case S_attr:
576: if (WHITE(c) || (c=='>') || (c=='=')) { /* End of word */
577: HTChunkTerminate(string) ;
578: handle_attribute_name(context, string->data);
579: string->size = 0;
580: if (c=='>') { /* End of tag */
581: if (context->current_tag->name) start_element(context);
1.18 timbl 582: context->state = S_after_open;
1.1 timbl 583: break;
584: }
585: context->state = (c=='=' ? S_equals: S_attr_gap);
586: } else {
587: HTChunkPutc(string, c);
588: }
589: break;
590:
591: case S_attr_gap: /* Expecting attribute or = or > */
592: if (WHITE(c)) break; /* Gap after attribute */
593: if (c=='>') { /* End of tag */
594: if (context->current_tag->name) start_element(context);
1.18 timbl 595: context->state = S_after_open;
1.1 timbl 596: break;
597: } else if (c=='=') {
598: context->state = S_equals;
599: break;
600: }
601: HTChunkPutc(string, c);
602: context->state = S_attr; /* Get next attribute */
603: break;
604:
605: case S_equals: /* After attr = */
606: if (WHITE(c)) break; /* Before attribute value */
607: if (c=='>') { /* End of tag */
1.25 ! frystyk 608: if (SGML_TRACE) fprintf(TDEST, "SGML: found = but no value\n");
1.1 timbl 609: if (context->current_tag->name) start_element(context);
1.18 timbl 610: context->state = S_after_open;
1.1 timbl 611: break;
612:
613: } else if (c=='\'') {
614: context->state = S_squoted;
615: break;
616:
617: } else if (c=='"') {
618: context->state = S_dquoted;
619: break;
620: }
621: HTChunkPutc(string, c);
622: context->state = S_value;
623: break;
624:
625: case S_value:
626: if (WHITE(c) || (c=='>')) { /* End of word */
627: HTChunkTerminate(string) ;
628: handle_attribute_value(context, string->data);
629: string->size = 0;
630: if (c=='>') { /* End of tag */
631: if (context->current_tag->name) start_element(context);
1.18 timbl 632: context->state = S_after_open;
1.1 timbl 633: break;
634: }
635: else context->state = S_tag_gap;
636: } else {
637: HTChunkPutc(string, c);
638: }
639: break;
640:
641: case S_squoted: /* Quoted attribute value */
642: if (c=='\'') { /* End of attribute value */
643: HTChunkTerminate(string) ;
644: handle_attribute_value(context, string->data);
645: string->size = 0;
646: context->state = S_tag_gap;
647: } else {
648: HTChunkPutc(string, c);
649: }
650: break;
651:
652: case S_dquoted: /* Quoted attribute value */
653: if (c=='"') { /* End of attribute value */
654: HTChunkTerminate(string) ;
655: handle_attribute_value(context, string->data);
656: string->size = 0;
657: context->state = S_tag_gap;
658: } else {
659: HTChunkPutc(string, c);
660: }
661: break;
662:
663: case S_end: /* </ */
664: if (isalnum(c))
665: HTChunkPutc(string, c);
666: else { /* End of end tag name */
1.7 timbl 667: HTTag * t;
1.1 timbl 668: HTChunkTerminate(string) ;
1.7 timbl 669: if (!*string->data) { /* Empty end tag */
670: t = context->element_stack->tag;
671: } else {
1.10 timbl 672: t = SGMLFindTag(dtd, string->data);
1.1 timbl 673: }
1.7 timbl 674: if (!t) {
1.25 ! frystyk 675: if(SGML_TRACE) fprintf(TDEST,
1.1 timbl 676: "Unknown end tag </%s>\n", string->data);
1.2 timbl 677: } else {
1.7 timbl 678: context->current_tag = t;
1.2 timbl 679: end_element( context, context->current_tag);
1.1 timbl 680: }
1.2 timbl 681:
1.1 timbl 682: string->size = 0;
1.2 timbl 683: context->current_attribute_number = INVALID;
1.7 timbl 684: if (c!='>') {
1.20 frystyk 685: if (SGML_TRACE && !WHITE(c))
1.25 ! frystyk 686: fprintf(TDEST,"SGML: `</%s%c' found!\n",
1.7 timbl 687: string->data, c);
688: context->state = S_junk_tag;
689: } else {
690: context->state = S_text;
691: }
1.1 timbl 692: }
693: break;
694:
695:
696: case S_junk_tag:
697: if (c=='>') {
698: context->state = S_text;
699: }
700:
701: } /* switch on context->state */
702:
703: } /* SGML_character */
1.2 timbl 704:
705:
706: PUBLIC void SGML_string ARGS2(HTStream *, context, CONST char*, str)
707: {
708: CONST char *p;
709: for(p=str; *p; p++)
710: SGML_character(context, *p);
711: }
712:
713:
714: PUBLIC void SGML_write ARGS3(HTStream *, context, CONST char*, str, int, l)
715: {
716: CONST char *p;
717: CONST char *e = str+l;
718: for(p=str; p<e; p++)
719: SGML_character(context, *p);
720: }
721:
722: /*_______________________________________________________________________
723: */
724:
725: /* Structured Object Class
726: ** -----------------------
727: */
728: PUBLIC CONST HTStreamClass SGMLParser =
729: {
730: "SGMLParser",
731: SGML_free,
1.8 timbl 732: SGML_abort,
1.9 timbl 733: SGML_character,
734: SGML_string,
735: SGML_write,
1.2 timbl 736: };
737:
738: /* Create SGML Engine
739: ** ------------------
740: **
741: ** On entry,
742: ** dtd represents the DTD, along with
743: ** actions is the sink for the data as a set of routines.
744: **
745: */
746:
747: PUBLIC HTStream* SGML_new ARGS2(
748: CONST SGML_dtd *, dtd,
749: HTStructured *, target)
750: {
751: int i;
752: HTStream* context = (HTStream *) malloc(sizeof(*context));
753: if (!context) outofmem(__FILE__, "SGML_begin");
754:
755: context->isa = &SGMLParser;
756: context->string = HTChunkCreate(128); /* Grow by this much */
757: context->dtd = dtd;
758: context->target = target;
759: context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
760: /* Ugh: no OO */
761: context->state = S_text;
762: context->element_stack = 0; /* empty */
763: #ifdef CALLERDATA
764: context->callerData = (void*) callerData;
765: #endif
766: for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
767:
768: return context;
769: }
1.14 frystyk 770:
771:
772:
773:
774:
775:
776:
777:
778:
779:
780:
1.2 timbl 781:
Webmaster