Annotation of libwww/Library/src/SGML.c, revision 1.20
1.1 timbl 1: /* General SGML Parser code SGML.c
2: ** ========================
3: **
1.2 timbl 4: ** This module implements an HTStream object. To parse an
1.1 timbl 5: ** SGML file, create this object which is a parser. The object
1.2 timbl 6: ** is (currently) created by being passed a DTD structure,
7: ** and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 8: **
1.19 duns 9: ** 6 Feb 93 Binary seraches used. Intreface modified.
10: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.1 timbl 11: */
12:
1.20 ! frystyk 13: /* System dependent stuff */
! 14: #include "tcp.h" /* For FROMASCII */
! 15:
! 16: /* Library includes */
1.1 timbl 17: #include "HTUtils.h"
18: #include "HTChunk.h"
1.20 ! frystyk 19: #include "SGML.h"
1.1 timbl 20:
1.2 timbl 21: #define INVALID (-1)
22:
1.1 timbl 23: /* The State (context) of the parser
24: **
1.2 timbl 25: ** This is passed with each call to make the parser reentrant
1.1 timbl 26: **
27: */
28:
1.16 frystyk 29:
1.2 timbl 30:
31:
32: /* Element Stack
33: ** -------------
34: ** This allows us to return down the stack reselcting styles.
35: ** As we return, attribute values will be garbage in general.
36: */
37: typedef struct _HTElement HTElement;
38: struct _HTElement {
39: HTElement * next; /* Previously nested element or 0 */
40: HTTag* tag; /* The tag at this level */
41: };
42:
43:
44: /* Internal Context Data Structure
45: ** -------------------------------
46: */
47: struct _HTStream {
48:
49: CONST HTStreamClass * isa; /* inherited from HTStream */
50:
51: CONST SGML_dtd *dtd;
52: HTStructuredClass *actions; /* target class */
53: HTStructured *target; /* target object */
54:
1.1 timbl 55: HTTag *current_tag;
1.2 timbl 56: int current_attribute_number;
1.1 timbl 57: HTChunk *string;
58: HTElement *element_stack;
1.12 timbl 59: enum sgml_state { S_text, S_literal, S_tag, S_tag_gap,
1.18 timbl 60: S_attr, S_attr_gap, S_equals, S_value, S_after_open,
61: S_nl, S_nl_tago,
1.1 timbl 62: S_ero, S_cro,
1.13 timbl 63: #ifdef ISO_2022_JP
64: S_esc, S_dollar, S_paren, S_nonascii_text,
65: #endif
1.1 timbl 66: S_squoted, S_dquoted, S_end, S_entity, S_junk_tag} state;
1.2 timbl 67: #ifdef CALLERDATA
1.1 timbl 68: void * callerData;
1.2 timbl 69: #endif
70: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
71: char * value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
72: } ;
73:
74:
75: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
76:
1.1 timbl 77:
1.17 timbl 78: /* Find Attribute Number
79: ** ---------------------
80: */
81:
82: PUBLIC int SGMLFindAttribute ARGS2 (HTTag*, tag, CONST char *, s)
83: {
84: attr* attributes = tag->attributes;
85:
86: int high, low, i, diff; /* Binary search for attribute name */
87: for(low=0, high=tag->number_of_attributes;
88: high > low ;
89: diff < 0 ? (low = i+1) : (high = i) ) {
90: i = (low + (high-low)/2);
91: diff = strcasecomp(attributes[i].name, s);
92: if (diff==0) return i; /* success: found it */
93: } /* for */
94:
95: return -1;
96: }
97:
1.1 timbl 98:
99: /* Handle Attribute
100: ** ----------------
101: */
102: /* PUBLIC CONST char * SGML_default = ""; ?? */
103:
104: #ifdef __STDC__
1.17 timbl 105: PRIVATE void handle_attribute_name(HTStream * context, CONST char * s)
1.1 timbl 106: #else
107: PRIVATE void handle_attribute_name(context, s)
1.2 timbl 108: HTStream * context;
1.1 timbl 109: char *s;
110: #endif
111: {
1.2 timbl 112:
113: HTTag * tag = context->current_tag;
114:
1.17 timbl 115: int i = SGMLFindAttribute(tag, s);
116: if (i>=0) {
117: context->current_attribute_number = i;
118: context->present[i] = YES;
119: if (context->value[i]) {
120: free(context->value[i]);
121: context->value[i] = NULL;
122: }
123: return;
124: } /* if */
1.2 timbl 125:
1.20 ! frystyk 126: if (SGML_TRACE)
1.2 timbl 127: fprintf(stderr, "SGML: Unknown attribute %s for tag %s\n",
128: s, context->current_tag->name);
129: context->current_attribute_number = INVALID; /* Invalid */
1.1 timbl 130: }
131:
132:
133: /* Handle attribute value
134: ** ----------------------
135: */
136: #ifdef __STDC__
1.2 timbl 137: PRIVATE void handle_attribute_value(HTStream * context, const char * s)
1.1 timbl 138: #else
139: PRIVATE void handle_attribute_value(context, s)
1.2 timbl 140: HTStream * context;
1.1 timbl 141: char *s;
142: #endif
143: {
1.2 timbl 144: if (context->current_attribute_number != INVALID) {
145: StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 146: } else {
1.20 ! frystyk 147: if (SGML_TRACE) fprintf(stderr, "SGML: Attribute value %s ignored\n", s);
1.1 timbl 148: }
1.2 timbl 149: context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 150: }
151:
1.2 timbl 152:
1.1 timbl 153: /* Handle entity
154: ** -------------
155: **
156: ** On entry,
157: ** s contains the entity name zero terminated
158: ** Bugs:
159: ** If the entity name is unknown, the terminator is treated as
160: ** a printable non-special character in all cases, even if it is '<'
161: */
162: #ifdef __STDC__
1.2 timbl 163: PRIVATE void handle_entity(HTStream * context, char term)
1.1 timbl 164: #else
165: PRIVATE void handle_entity(context, term)
1.2 timbl 166: HTStream * context;
1.1 timbl 167: char term;
168: #endif
169: {
1.2 timbl 170:
1.3 timbl 171: CONST char ** entities = context->dtd->entity_names;
1.1 timbl 172: CONST char *s = context->string->data;
1.2 timbl 173:
174: int high, low, i, diff;
175: for(low=0, high = context->dtd->number_of_entities;
176: high > low ;
177: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
178: i = (low + (high-low)/2);
179: diff = strcmp(entities[i], s); /* Csse sensitive! */
180: if (diff==0) { /* success: found it */
181: (*context->actions->put_entity)(context->target, i);
182: return;
1.1 timbl 183: }
184: }
185: /* If entity string not found, display as text */
1.20 ! frystyk 186: if (SGML_TRACE)
1.1 timbl 187: fprintf(stderr, "SGML: Unknown entity %s\n", s);
1.2 timbl 188: PUTC('&');
1.1 timbl 189: {
190: CONST char *p;
191: for (p=s; *p; p++) {
1.2 timbl 192: PUTC(*p);
1.1 timbl 193: }
194: }
1.2 timbl 195: PUTC(term);
1.1 timbl 196: }
197:
1.2 timbl 198:
1.1 timbl 199: /* End element
1.2 timbl 200: ** -----------
1.1 timbl 201: */
202: #ifdef __STDC__
1.2 timbl 203: PRIVATE void end_element(HTStream * context, HTTag * old_tag)
1.1 timbl 204: #else
205: PRIVATE void end_element(context, old_tag)
206: HTTag * old_tag;
1.2 timbl 207: HTStream * context;
1.1 timbl 208: #endif
209: {
1.20 ! frystyk 210: if (SGML_TRACE) fprintf(stderr, "SGML: End </%s>\n", old_tag->name);
1.2 timbl 211: if (old_tag->contents == SGML_EMPTY) {
1.20 ! frystyk 212: if (SGML_TRACE) fprintf(stderr,"SGML: Illegal end tag </%s> found.\n",
1.1 timbl 213: old_tag->name);
214: return;
215: }
216: while (context->element_stack) {/* Loop is error path only */
217: HTElement * N = context->element_stack;
218: HTTag * t = N->tag;
219:
220: if (old_tag != t) { /* Mismatch: syntax error */
221: if (context->element_stack->next) { /* This is not the last level */
1.20 ! frystyk 222: if (SGML_TRACE) fprintf(stderr,
1.1 timbl 223: "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
224: old_tag->name, t->name, t->name);
225: } else { /* last level */
1.20 ! frystyk 226: if (SGML_TRACE) fprintf(stderr,
1.1 timbl 227: "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
228: old_tag->name, t->name, old_tag->name);
229: return; /* Ignore */
230: }
231: }
232:
233: context->element_stack = N->next; /* Remove from stack */
234: free(N);
1.2 timbl 235: (*context->actions->end_element)(context->target,
236: t - context->dtd->tags);
1.1 timbl 237: if (old_tag == t) return; /* Correct sequence */
238:
239: /* Syntax error path only */
240:
241: }
1.20 ! frystyk 242: if (SGML_TRACE) fprintf(stderr,
1.1 timbl 243: "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
244: }
245:
246:
1.17 timbl 247: /* Start an element
248: ** ----------------
1.1 timbl 249: */
250: #ifdef __STDC__
1.2 timbl 251: PRIVATE void start_element(HTStream * context)
1.1 timbl 252: #else
253: PRIVATE void start_element(context)
1.2 timbl 254: HTStream * context;
1.1 timbl 255: #endif
256: {
257: HTTag * new_tag = context->current_tag;
258:
1.20 ! frystyk 259: if (SGML_TRACE) fprintf(stderr, "SGML: Start <%s>\n", new_tag->name);
1.2 timbl 260: (*context->actions->start_element)(
261: context->target,
262: new_tag - context->dtd->tags,
263: context->present,
1.3 timbl 264: (CONST char**) context->value); /* coerce type for think c */
1.2 timbl 265: if (new_tag->contents != SGML_EMPTY) { /* i.e. tag not empty */
1.1 timbl 266: HTElement * N = (HTElement *)malloc(sizeof(HTElement));
267: if (N == NULL) outofmem(__FILE__, "start_element");
268: N->next = context->element_stack;
269: N->tag = new_tag;
270: context->element_stack = N;
271: }
272: }
273:
274:
1.2 timbl 275: /* Find Tag in DTD tag list
276: ** ------------------------
1.1 timbl 277: **
278: ** On entry,
1.2 timbl 279: ** dtd points to dtd structire including valid tag list
280: ** string points to name of tag in question
1.1 timbl 281: **
1.2 timbl 282: ** On exit,
283: ** returns:
1.7 timbl 284: ** NULL tag not found
285: ** else address of tag structure in dtd
1.2 timbl 286: */
1.11 timbl 287: PUBLIC HTTag * SGMLFindTag ARGS2(CONST SGML_dtd*, dtd, CONST char *, string)
1.2 timbl 288: {
289: int high, low, i, diff;
290: for(low=0, high=dtd->number_of_tags;
291: high > low ;
292: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
293: i = (low + (high-low)/2);
1.3 timbl 294: diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 295: if (diff==0) { /* success: found it */
1.7 timbl 296: return &dtd->tags[i];
1.2 timbl 297: }
298: }
1.7 timbl 299: return NULL;
1.2 timbl 300: }
301:
302: /*________________________________________________________________________
303: ** Public Methods
1.1 timbl 304: */
305:
1.2 timbl 306:
307: /* Could check that we are back to bottom of stack! @@ */
1.1 timbl 308:
1.8 timbl 309: PUBLIC void SGML_free ARGS1(HTStream *, context)
310: {
1.14 frystyk 311: int cnt;
312:
1.15 frystyk 313: while (context->element_stack) { /* Make sure, that all tags are gone */
314: HTElement *ptr = context->element_stack;
315:
1.20 ! frystyk 316: if(SGML_TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
1.15 frystyk 317: context->element_stack->tag->name);
318: context->element_stack = ptr->next;
319: free(ptr);
320: }
1.19 duns 321: (*context->actions->_free)(context->target);
1.8 timbl 322: HTChunkFree(context->string);
1.15 frystyk 323: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
1.14 frystyk 324: if(context->value[cnt])
325: free(context->value[cnt]);
1.8 timbl 326: free(context);
1.1 timbl 327: }
328:
1.8 timbl 329: PUBLIC void SGML_abort ARGS2(HTStream *, context, HTError, e)
1.1 timbl 330: {
1.14 frystyk 331: int cnt;
332:
1.15 frystyk 333: while (context->element_stack) { /* Make sure, that all tags are gone */
334: HTElement *ptr = context->element_stack;
335:
1.20 ! frystyk 336: if(SGML_TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
1.15 frystyk 337: context->element_stack->tag->name);
338: context->element_stack = ptr->next;
339: free(ptr);
340: }
1.8 timbl 341: (*context->actions->abort)(context->target, e);
1.1 timbl 342: HTChunkFree(context->string);
1.14 frystyk 343: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
344: if(context->value[cnt])
345: free(context->value[cnt]);
1.1 timbl 346: free(context);
347: }
348:
1.2 timbl 349:
1.1 timbl 350: /* Read and write user callback handle
351: ** -----------------------------------
352: **
353: ** The callbacks from the SGML parser have an SGML context parameter.
354: ** These calls allow the caller to associate his own context with a
355: ** particular SGML context.
356: */
357:
1.2 timbl 358: #ifdef CALLERDATA
359: PUBLIC void* SGML_callerData ARGS1(HTStream *, context)
1.1 timbl 360: {
361: return context->callerData;
362: }
363:
1.2 timbl 364: PUBLIC void SGML_setCallerData ARGS2(HTStream *, context, void*, data)
1.1 timbl 365: {
366: context->callerData = data;
367: }
1.2 timbl 368: #endif
1.1 timbl 369:
1.2 timbl 370: PUBLIC void SGML_character ARGS2(HTStream *, context, char,c)
1.1 timbl 371:
372: {
1.2 timbl 373: CONST SGML_dtd *dtd = context->dtd;
1.1 timbl 374: HTChunk *string = context->string;
375:
376: switch(context->state) {
1.18 timbl 377:
378: case S_after_open: /* Strip one trainling newline
379: only after opening nonempty element. - SGML:Ugh! */
380: if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
381: break;
382: }
383: context->state = S_text;
384: goto normal_text;
385: /* (***falls through***) */
386:
1.1 timbl 387: case S_text:
1.18 timbl 388: normal_text:
389:
1.13 timbl 390: #ifdef ISO_2022_JP
391: if (c=='\033') {
392: context->state = S_esc;
393: PUTC(c);
394: break;
395: }
396: #endif /* ISO_2022_JP */
1.6 timbl 397: if (c=='&' && (!context->element_stack || (
398: context->element_stack->tag &&
399: ( context->element_stack->tag->contents == SGML_MIXED
400: || context->element_stack->tag->contents ==
401: SGML_RCDATA)
402: ))) {
1.1 timbl 403: string->size = 0;
404: context->state = S_ero;
405:
406: } else if (c=='<') {
407: string->size = 0;
408: context->state = (context->element_stack &&
1.13 timbl 409: context->element_stack->tag &&
410: context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 411: S_literal : S_tag;
1.18 timbl 412: } else if (c=='\n') { /* Newline - ignore if before tag end! */
413: context->state = S_nl;
1.2 timbl 414: } else PUTC(c);
1.1 timbl 415: break;
1.13 timbl 416:
1.18 timbl 417: case S_nl:
418: if (c=='<') {
419: string->size = 0;
420: context->state = (context->element_stack &&
421: context->element_stack->tag &&
422: context->element_stack->tag->contents == SGML_LITERAL) ?
423: S_literal : S_nl_tago;
424: } else {
425: PUTC('\n');
426: context->state = S_text;
427: goto normal_text;
428: }
429: break;
430:
431: case S_nl_tago: /* Had newline and tag opener */
432: if (c != '/') {
433: PUTC('\n'); /* Only ignore newline before </ */
434: }
435: context->state = S_tag;
436: goto handle_S_tag;
437:
1.13 timbl 438: #ifdef ISO_2022_JP
439: case S_esc:
440: if (c=='$') {
441: context->state = S_dollar;
442: } else if (c=='(') {
443: context->state = S_paren;
444: } else {
445: context->state = S_text;
446: }
447: PUTC(c);
448: break;
449: case S_dollar:
450: if (c=='@' || c=='B') {
451: context->state = S_nonascii_text;
452: } else {
453: context->state = S_text;
454: }
455: PUTC(c);
456: break;
457: case S_paren:
458: if (c=='B' || c=='J') {
459: context->state = S_text;
460: } else {
461: context->state = S_text;
462: }
463: PUTC(c);
464: break;
465: case S_nonascii_text:
466: if (c=='\033') {
467: context->state = S_esc;
468: PUTC(c);
469: } else {
470: PUTC(c);
471: }
472: break;
473: #endif /* ISO_2022_JP */
1.1 timbl 474:
1.12 timbl 475: /* In literal mode, waits only for specific end tag!
1.2 timbl 476: ** Only foir compatibility with old servers.
1.1 timbl 477: */
1.12 timbl 478: case S_literal :
1.1 timbl 479: HTChunkPutc(string, c);
480: if ( TOUPPER(c) != ((string->size ==1) ? '/'
481: : context->element_stack->tag->name[string->size-2])) {
482: int i;
483:
1.12 timbl 484: /* If complete match, end literal */
1.1 timbl 485: if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
486: end_element(context, context->element_stack->tag);
487: string->size = 0;
1.2 timbl 488: context->current_attribute_number = INVALID;
1.1 timbl 489: context->state = S_text;
490: break;
491: } /* If Mismatch: recover string. */
1.2 timbl 492: PUTC( '<');
1.1 timbl 493: for (i=0; i<string->size; i++) /* recover */
1.2 timbl 494: PUTC(
1.1 timbl 495: string->data[i]);
496: context->state = S_text;
497: }
498:
499: break;
500:
501: /* Character reference or Entity
502: */
503: case S_ero:
504: if (c=='#') {
505: context->state = S_cro; /* &# is Char Ref Open */
506: break;
507: }
508: context->state = S_entity; /* Fall through! */
509:
510: /* Handle Entities
511: */
512: case S_entity:
513: if (isalnum(c))
514: HTChunkPutc(string, c);
515: else {
516: HTChunkTerminate(string);
517: handle_entity(context, c);
518: context->state = S_text;
519: }
520: break;
521:
522: /* Character reference
523: */
524: case S_cro:
525: if (isalnum(c))
526: HTChunkPutc(string, c); /* accumulate a character NUMBER */
527: else {
528: int value;
529: HTChunkTerminate(string);
530: if (sscanf(string->data, "%d", &value)==1)
1.2 timbl 531: PUTC(FROMASCII((char)value));
1.1 timbl 532: context->state = S_text;
533: }
534: break;
535:
536: /* Tag
537: */
538: case S_tag: /* new tag */
1.18 timbl 539: handle_S_tag:
540:
1.1 timbl 541: if (isalnum(c))
542: HTChunkPutc(string, c);
543: else { /* End of tag name */
1.7 timbl 544: HTTag * t;
1.1 timbl 545: if (c=='/') {
1.20 ! frystyk 546: if (SGML_TRACE) if (string->size!=0)
1.1 timbl 547: fprintf(stderr,"SGML: `<%s/' found!\n", string->data);
548: context->state = S_end;
549: break;
550: }
551: HTChunkTerminate(string) ;
1.2 timbl 552:
1.10 timbl 553: t = SGMLFindTag(dtd, string->data);
1.7 timbl 554: if (!t) {
1.20 ! frystyk 555: if(SGML_TRACE) fprintf(stderr, "SGML: *** Unknown element %s\n",
1.1 timbl 556: string->data);
557: context->state = (c=='>') ? S_text : S_junk_tag;
558: break;
559: }
1.7 timbl 560: context->current_tag = t;
1.2 timbl 561:
562: /* Clear out attributes
563: */
1.1 timbl 564:
1.2 timbl 565: {
566: int i;
567: for (i=0; i< context->current_tag->number_of_attributes; i++)
568: context->present[i] = NO;
1.1 timbl 569: }
570: string->size = 0;
1.2 timbl 571: context->current_attribute_number = INVALID;
1.1 timbl 572:
573: if (c=='>') {
574: if (context->current_tag->name) start_element(context);
1.18 timbl 575: context->state = S_after_open;
1.1 timbl 576: } else {
577: context->state = S_tag_gap;
578: }
579: }
580: break;
581:
582:
583: case S_tag_gap: /* Expecting attribute or > */
584: if (WHITE(c)) break; /* Gap between attributes */
585: if (c=='>') { /* End of tag */
586: if (context->current_tag->name) start_element(context);
1.18 timbl 587: context->state = S_after_open;
1.1 timbl 588: break;
589: }
590: HTChunkPutc(string, c);
591: context->state = S_attr; /* Get attribute */
592: break;
593:
594: /* accumulating value */
595: case S_attr:
596: if (WHITE(c) || (c=='>') || (c=='=')) { /* End of word */
597: HTChunkTerminate(string) ;
598: handle_attribute_name(context, string->data);
599: string->size = 0;
600: if (c=='>') { /* End of tag */
601: if (context->current_tag->name) start_element(context);
1.18 timbl 602: context->state = S_after_open;
1.1 timbl 603: break;
604: }
605: context->state = (c=='=' ? S_equals: S_attr_gap);
606: } else {
607: HTChunkPutc(string, c);
608: }
609: break;
610:
611: case S_attr_gap: /* Expecting attribute or = or > */
612: if (WHITE(c)) break; /* Gap after attribute */
613: if (c=='>') { /* End of tag */
614: if (context->current_tag->name) start_element(context);
1.18 timbl 615: context->state = S_after_open;
1.1 timbl 616: break;
617: } else if (c=='=') {
618: context->state = S_equals;
619: break;
620: }
621: HTChunkPutc(string, c);
622: context->state = S_attr; /* Get next attribute */
623: break;
624:
625: case S_equals: /* After attr = */
626: if (WHITE(c)) break; /* Before attribute value */
627: if (c=='>') { /* End of tag */
1.20 ! frystyk 628: if (SGML_TRACE) fprintf(stderr, "SGML: found = but no value\n");
1.1 timbl 629: if (context->current_tag->name) start_element(context);
1.18 timbl 630: context->state = S_after_open;
1.1 timbl 631: break;
632:
633: } else if (c=='\'') {
634: context->state = S_squoted;
635: break;
636:
637: } else if (c=='"') {
638: context->state = S_dquoted;
639: break;
640: }
641: HTChunkPutc(string, c);
642: context->state = S_value;
643: break;
644:
645: case S_value:
646: if (WHITE(c) || (c=='>')) { /* End of word */
647: HTChunkTerminate(string) ;
648: handle_attribute_value(context, string->data);
649: string->size = 0;
650: if (c=='>') { /* End of tag */
651: if (context->current_tag->name) start_element(context);
1.18 timbl 652: context->state = S_after_open;
1.1 timbl 653: break;
654: }
655: else context->state = S_tag_gap;
656: } else {
657: HTChunkPutc(string, c);
658: }
659: break;
660:
661: case S_squoted: /* Quoted attribute value */
662: if (c=='\'') { /* End of attribute value */
663: HTChunkTerminate(string) ;
664: handle_attribute_value(context, string->data);
665: string->size = 0;
666: context->state = S_tag_gap;
667: } else {
668: HTChunkPutc(string, c);
669: }
670: break;
671:
672: case S_dquoted: /* Quoted attribute value */
673: if (c=='"') { /* End of attribute value */
674: HTChunkTerminate(string) ;
675: handle_attribute_value(context, string->data);
676: string->size = 0;
677: context->state = S_tag_gap;
678: } else {
679: HTChunkPutc(string, c);
680: }
681: break;
682:
683: case S_end: /* </ */
684: if (isalnum(c))
685: HTChunkPutc(string, c);
686: else { /* End of end tag name */
1.7 timbl 687: HTTag * t;
1.1 timbl 688: HTChunkTerminate(string) ;
1.7 timbl 689: if (!*string->data) { /* Empty end tag */
690: t = context->element_stack->tag;
691: } else {
1.10 timbl 692: t = SGMLFindTag(dtd, string->data);
1.1 timbl 693: }
1.7 timbl 694: if (!t) {
1.20 ! frystyk 695: if(SGML_TRACE) fprintf(stderr,
1.1 timbl 696: "Unknown end tag </%s>\n", string->data);
1.2 timbl 697: } else {
1.7 timbl 698: context->current_tag = t;
1.2 timbl 699: end_element( context, context->current_tag);
1.1 timbl 700: }
1.2 timbl 701:
1.1 timbl 702: string->size = 0;
1.2 timbl 703: context->current_attribute_number = INVALID;
1.7 timbl 704: if (c!='>') {
1.20 ! frystyk 705: if (SGML_TRACE && !WHITE(c))
1.7 timbl 706: fprintf(stderr,"SGML: `</%s%c' found!\n",
707: string->data, c);
708: context->state = S_junk_tag;
709: } else {
710: context->state = S_text;
711: }
1.1 timbl 712: }
713: break;
714:
715:
716: case S_junk_tag:
717: if (c=='>') {
718: context->state = S_text;
719: }
720:
721: } /* switch on context->state */
722:
723: } /* SGML_character */
1.2 timbl 724:
725:
726: PUBLIC void SGML_string ARGS2(HTStream *, context, CONST char*, str)
727: {
728: CONST char *p;
729: for(p=str; *p; p++)
730: SGML_character(context, *p);
731: }
732:
733:
734: PUBLIC void SGML_write ARGS3(HTStream *, context, CONST char*, str, int, l)
735: {
736: CONST char *p;
737: CONST char *e = str+l;
738: for(p=str; p<e; p++)
739: SGML_character(context, *p);
740: }
741:
742: /*_______________________________________________________________________
743: */
744:
745: /* Structured Object Class
746: ** -----------------------
747: */
748: PUBLIC CONST HTStreamClass SGMLParser =
749: {
750: "SGMLParser",
751: SGML_free,
1.8 timbl 752: SGML_abort,
1.9 timbl 753: SGML_character,
754: SGML_string,
755: SGML_write,
1.2 timbl 756: };
757:
758: /* Create SGML Engine
759: ** ------------------
760: **
761: ** On entry,
762: ** dtd represents the DTD, along with
763: ** actions is the sink for the data as a set of routines.
764: **
765: */
766:
767: PUBLIC HTStream* SGML_new ARGS2(
768: CONST SGML_dtd *, dtd,
769: HTStructured *, target)
770: {
771: int i;
772: HTStream* context = (HTStream *) malloc(sizeof(*context));
773: if (!context) outofmem(__FILE__, "SGML_begin");
774:
775: context->isa = &SGMLParser;
776: context->string = HTChunkCreate(128); /* Grow by this much */
777: context->dtd = dtd;
778: context->target = target;
779: context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
780: /* Ugh: no OO */
781: context->state = S_text;
782: context->element_stack = 0; /* empty */
783: #ifdef CALLERDATA
784: context->callerData = (void*) callerData;
785: #endif
786: for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
787:
788: return context;
789: }
1.14 frystyk 790:
791:
792:
793:
794:
795:
796:
797:
798:
799:
800:
1.2 timbl 801:
Webmaster