Annotation of libwww/Library/src/SGML.c, revision 1.34
1.23 frystyk 1: /* SGML.c
2: ** GENERAL SGML PARSER CODE
3: **
1.27 frystyk 4: ** (c) COPYRIGHT MIT 1995.
1.23 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
1.2 timbl 7: ** This module implements an HTStream object. To parse an
1.1 timbl 8: ** SGML file, create this object which is a parser. The object
1.2 timbl 9: ** is (currently) created by being passed a DTD structure,
10: ** and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 11: **
1.19 duns 12: ** 6 Feb 93 Binary seraches used. Intreface modified.
13: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.1 timbl 14: */
15:
1.25 frystyk 16: /* Library include files */
17: #include "tcp.h"
1.1 timbl 18: #include "HTUtils.h"
1.25 frystyk 19: #include "HTString.h"
1.1 timbl 20: #include "HTChunk.h"
1.20 frystyk 21: #include "SGML.h"
1.1 timbl 22:
1.2 timbl 23: #define INVALID (-1)
24:
1.1 timbl 25: /* The State (context) of the parser
26: **
1.2 timbl 27: ** This is passed with each call to make the parser reentrant
1.1 timbl 28: **
29: */
30:
1.16 frystyk 31:
1.2 timbl 32:
33:
34: /* Element Stack
35: ** -------------
36: ** This allows us to return down the stack reselcting styles.
37: ** As we return, attribute values will be garbage in general.
38: */
39: typedef struct _HTElement HTElement;
40: struct _HTElement {
41: HTElement * next; /* Previously nested element or 0 */
42: HTTag* tag; /* The tag at this level */
43: };
44:
45:
1.21 frystyk 46: typedef enum _sgml_state {
47: S_text, S_literal, S_tag, S_tag_gap,
48: S_attr, S_attr_gap, S_equals, S_value, S_after_open,
49: S_nl, S_nl_tago,
50: S_ero, S_cro,
51: #ifdef ISO_2022_JP
52: S_esc, S_dollar, S_paren, S_nonascii_text,
53: #endif
54: S_squoted, S_dquoted, S_end, S_entity, S_junk_tag
55: } sgml_state;
56:
57:
1.2 timbl 58: /* Internal Context Data Structure
59: ** -------------------------------
60: */
61: struct _HTStream {
62:
63: CONST HTStreamClass * isa; /* inherited from HTStream */
64:
65: CONST SGML_dtd *dtd;
66: HTStructuredClass *actions; /* target class */
67: HTStructured *target; /* target object */
68:
1.1 timbl 69: HTTag *current_tag;
1.2 timbl 70: int current_attribute_number;
1.1 timbl 71: HTChunk *string;
72: HTElement *element_stack;
1.21 frystyk 73: sgml_state state;
1.2 timbl 74: #ifdef CALLERDATA
1.1 timbl 75: void * callerData;
1.2 timbl 76: #endif
77: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
78: char * value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
79: } ;
80:
81:
82: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
83:
1.1 timbl 84:
1.17 timbl 85: /* Find Attribute Number
86: ** ---------------------
87: */
88:
1.31 frystyk 89: PUBLIC int SGMLFindAttribute (HTTag* tag, CONST char * s)
1.17 timbl 90: {
91: attr* attributes = tag->attributes;
92:
93: int high, low, i, diff; /* Binary search for attribute name */
94: for(low=0, high=tag->number_of_attributes;
95: high > low ;
96: diff < 0 ? (low = i+1) : (high = i) ) {
97: i = (low + (high-low)/2);
98: diff = strcasecomp(attributes[i].name, s);
99: if (diff==0) return i; /* success: found it */
100: } /* for */
101:
102: return -1;
103: }
104:
1.1 timbl 105:
106: /* Handle Attribute
107: ** ----------------
108: */
109: /* PUBLIC CONST char * SGML_default = ""; ?? */
110:
1.31 frystyk 111: PRIVATE void handle_attribute_name (HTStream * context, CONST char * s)
1.1 timbl 112: {
1.2 timbl 113:
114: HTTag * tag = context->current_tag;
115:
1.17 timbl 116: int i = SGMLFindAttribute(tag, s);
117: if (i>=0) {
118: context->current_attribute_number = i;
119: context->present[i] = YES;
120: if (context->value[i]) {
121: free(context->value[i]);
122: context->value[i] = NULL;
123: }
124: return;
125: } /* if */
1.2 timbl 126:
1.20 frystyk 127: if (SGML_TRACE)
1.29 frystyk 128: TTYPrint(TDEST, "SGML: Unknown attribute %s for tag %s\n",
1.2 timbl 129: s, context->current_tag->name);
130: context->current_attribute_number = INVALID; /* Invalid */
1.1 timbl 131: }
132:
133:
134: /* Handle attribute value
135: ** ----------------------
136: */
1.31 frystyk 137: PRIVATE void handle_attribute_value (HTStream * context, CONST char * s)
1.1 timbl 138: {
1.2 timbl 139: if (context->current_attribute_number != INVALID) {
140: StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 141: } else {
1.29 frystyk 142: if (SGML_TRACE) TTYPrint(TDEST, "SGML: Attribute value %s ignored\n", s);
1.1 timbl 143: }
1.2 timbl 144: context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 145: }
146:
1.2 timbl 147:
1.1 timbl 148: /* Handle entity
149: ** -------------
150: **
151: ** On entry,
152: ** s contains the entity name zero terminated
153: ** Bugs:
154: ** If the entity name is unknown, the terminator is treated as
155: ** a printable non-special character in all cases, even if it is '<'
156: */
1.31 frystyk 157: PRIVATE void handle_entity (HTStream * context, char term)
1.1 timbl 158: {
1.2 timbl 159:
1.3 timbl 160: CONST char ** entities = context->dtd->entity_names;
1.1 timbl 161: CONST char *s = context->string->data;
1.2 timbl 162:
163: int high, low, i, diff;
164: for(low=0, high = context->dtd->number_of_entities;
165: high > low ;
166: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
167: i = (low + (high-low)/2);
168: diff = strcmp(entities[i], s); /* Csse sensitive! */
169: if (diff==0) { /* success: found it */
170: (*context->actions->put_entity)(context->target, i);
171: return;
1.1 timbl 172: }
173: }
174: /* If entity string not found, display as text */
1.20 frystyk 175: if (SGML_TRACE)
1.29 frystyk 176: TTYPrint(TDEST, "SGML: Unknown entity %s\n", s);
1.2 timbl 177: PUTC('&');
1.1 timbl 178: {
179: CONST char *p;
180: for (p=s; *p; p++) {
1.2 timbl 181: PUTC(*p);
1.1 timbl 182: }
183: }
1.2 timbl 184: PUTC(term);
1.1 timbl 185: }
186:
1.2 timbl 187:
1.1 timbl 188: /* End element
1.2 timbl 189: ** -----------
1.1 timbl 190: */
1.31 frystyk 191: PRIVATE void end_element (HTStream * context, HTTag * old_tag)
1.1 timbl 192: {
1.29 frystyk 193: if (SGML_TRACE) TTYPrint(TDEST, "SGML: End </%s>\n", old_tag->name);
1.2 timbl 194: if (old_tag->contents == SGML_EMPTY) {
1.29 frystyk 195: if (SGML_TRACE) TTYPrint(TDEST,"SGML: Illegal end tag </%s> found.\n",
1.1 timbl 196: old_tag->name);
197: return;
198: }
199: while (context->element_stack) {/* Loop is error path only */
200: HTElement * N = context->element_stack;
201: HTTag * t = N->tag;
202:
203: if (old_tag != t) { /* Mismatch: syntax error */
204: if (context->element_stack->next) { /* This is not the last level */
1.29 frystyk 205: if (SGML_TRACE) TTYPrint(TDEST,
1.1 timbl 206: "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
207: old_tag->name, t->name, t->name);
208: } else { /* last level */
1.29 frystyk 209: if (SGML_TRACE) TTYPrint(TDEST,
1.1 timbl 210: "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
211: old_tag->name, t->name, old_tag->name);
212: return; /* Ignore */
213: }
214: }
215:
216: context->element_stack = N->next; /* Remove from stack */
217: free(N);
1.2 timbl 218: (*context->actions->end_element)(context->target,
219: t - context->dtd->tags);
1.1 timbl 220: if (old_tag == t) return; /* Correct sequence */
221:
222: /* Syntax error path only */
223:
224: }
1.29 frystyk 225: if (SGML_TRACE) TTYPrint(TDEST,
1.1 timbl 226: "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
227: }
228:
229:
1.17 timbl 230: /* Start an element
231: ** ----------------
1.1 timbl 232: */
1.31 frystyk 233: PRIVATE void start_element (HTStream * context)
1.1 timbl 234: {
235: HTTag * new_tag = context->current_tag;
236:
1.29 frystyk 237: if (SGML_TRACE) TTYPrint(TDEST, "SGML: Start <%s>\n", new_tag->name);
1.2 timbl 238: (*context->actions->start_element)(
239: context->target,
240: new_tag - context->dtd->tags,
241: context->present,
1.3 timbl 242: (CONST char**) context->value); /* coerce type for think c */
1.2 timbl 243: if (new_tag->contents != SGML_EMPTY) { /* i.e. tag not empty */
1.1 timbl 244: HTElement * N = (HTElement *)malloc(sizeof(HTElement));
245: if (N == NULL) outofmem(__FILE__, "start_element");
246: N->next = context->element_stack;
247: N->tag = new_tag;
248: context->element_stack = N;
249: }
250: }
251:
252:
1.2 timbl 253: /* Find Tag in DTD tag list
254: ** ------------------------
1.1 timbl 255: **
256: ** On entry,
1.2 timbl 257: ** dtd points to dtd structire including valid tag list
258: ** string points to name of tag in question
1.1 timbl 259: **
1.2 timbl 260: ** On exit,
261: ** returns:
1.7 timbl 262: ** NULL tag not found
263: ** else address of tag structure in dtd
1.2 timbl 264: */
1.31 frystyk 265: PUBLIC HTTag * SGMLFindTag (CONST SGML_dtd* dtd, CONST char * string)
1.2 timbl 266: {
267: int high, low, i, diff;
268: for(low=0, high=dtd->number_of_tags;
269: high > low ;
270: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
271: i = (low + (high-low)/2);
1.3 timbl 272: diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 273: if (diff==0) { /* success: found it */
1.7 timbl 274: return &dtd->tags[i];
1.2 timbl 275: }
276: }
1.7 timbl 277: return NULL;
1.2 timbl 278: }
279:
280: /*________________________________________________________________________
281: ** Public Methods
1.1 timbl 282: */
283:
1.2 timbl 284:
285: /* Could check that we are back to bottom of stack! @@ */
1.31 frystyk 286: PUBLIC int SGML_flush (HTStream * context)
1.26 frystyk 287: {
288: while (context->element_stack) {
289: HTElement *ptr = context->element_stack;
290: if (SGML_TRACE)
1.29 frystyk 291: TTYPrint(TDEST, "SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 292: context->element_stack->tag->name);
293: context->element_stack = ptr->next;
294: free(ptr);
295: }
296: return (*context->actions->flush)(context->target);
297: }
1.1 timbl 298:
1.31 frystyk 299: PUBLIC int SGML_free (HTStream * context)
1.8 timbl 300: {
1.26 frystyk 301: int status;
1.14 frystyk 302: int cnt;
1.15 frystyk 303: while (context->element_stack) { /* Make sure, that all tags are gone */
304: HTElement *ptr = context->element_stack;
305:
1.26 frystyk 306: if (SGML_TRACE)
1.29 frystyk 307: TTYPrint(TDEST, "SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 308: context->element_stack->tag->name);
1.15 frystyk 309: context->element_stack = ptr->next;
310: free(ptr);
311: }
1.26 frystyk 312: if ((status = (*context->actions->_free)(context->target)) != HT_OK)
313: return status;
1.33 frystyk 314: HTChunk_delete(context->string);
1.15 frystyk 315: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
1.14 frystyk 316: if(context->value[cnt])
317: free(context->value[cnt]);
1.8 timbl 318: free(context);
1.26 frystyk 319: return HT_OK;
1.1 timbl 320: }
321:
1.31 frystyk 322: PUBLIC int SGML_abort (HTStream * context, HTList * e)
1.1 timbl 323: {
1.14 frystyk 324: int cnt;
1.15 frystyk 325: while (context->element_stack) { /* Make sure, that all tags are gone */
326: HTElement *ptr = context->element_stack;
1.26 frystyk 327: if (SGML_TRACE)
1.29 frystyk 328: TTYPrint(TDEST, "SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 329: context->element_stack->tag->name);
1.15 frystyk 330: context->element_stack = ptr->next;
331: free(ptr);
332: }
1.8 timbl 333: (*context->actions->abort)(context->target, e);
1.33 frystyk 334: HTChunk_delete(context->string);
1.14 frystyk 335: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
336: if(context->value[cnt])
337: free(context->value[cnt]);
1.1 timbl 338: free(context);
1.26 frystyk 339: return HT_ERROR;
1.1 timbl 340: }
341:
1.2 timbl 342:
1.1 timbl 343: /* Read and write user callback handle
344: ** -----------------------------------
345: **
346: ** The callbacks from the SGML parser have an SGML context parameter.
347: ** These calls allow the caller to associate his own context with a
348: ** particular SGML context.
349: */
350:
1.2 timbl 351: #ifdef CALLERDATA
1.31 frystyk 352: PUBLIC void* SGML_callerData (HTStream * context)
1.1 timbl 353: {
354: return context->callerData;
355: }
356:
1.31 frystyk 357: PUBLIC void SGML_setCallerData (HTStream * context, void* data)
1.1 timbl 358: {
359: context->callerData = data;
360: }
1.34 ! frystyk 361: #else
! 362: #ifdef WWW_WIN_DLL
! 363: PUBLIC void * SGML_callerData (HTStream * context) {return NULL;}
! 364: PUBLIC void SGML_setCallerData (HTStream * context, void* data) {}
! 365: #endif /* WWW_WIN_DLL */
! 366: #endif /* CALLERDATA */
1.1 timbl 367:
1.31 frystyk 368: PUBLIC int SGML_character (HTStream * context, char c)
1.1 timbl 369:
370: {
1.2 timbl 371: CONST SGML_dtd *dtd = context->dtd;
1.1 timbl 372: HTChunk *string = context->string;
373:
374: switch(context->state) {
1.18 timbl 375:
376: case S_after_open: /* Strip one trainling newline
377: only after opening nonempty element. - SGML:Ugh! */
378: if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
379: break;
380: }
381: context->state = S_text;
382: goto normal_text;
383: /* (***falls through***) */
384:
1.1 timbl 385: case S_text:
1.18 timbl 386: normal_text:
387:
1.13 timbl 388: #ifdef ISO_2022_JP
389: if (c=='\033') {
390: context->state = S_esc;
391: PUTC(c);
392: break;
393: }
394: #endif /* ISO_2022_JP */
1.6 timbl 395: if (c=='&' && (!context->element_stack || (
396: context->element_stack->tag &&
397: ( context->element_stack->tag->contents == SGML_MIXED
398: || context->element_stack->tag->contents ==
399: SGML_RCDATA)
400: ))) {
1.1 timbl 401: string->size = 0;
402: context->state = S_ero;
403:
404: } else if (c=='<') {
405: string->size = 0;
406: context->state = (context->element_stack &&
1.13 timbl 407: context->element_stack->tag &&
408: context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 409: S_literal : S_tag;
1.18 timbl 410: } else if (c=='\n') { /* Newline - ignore if before tag end! */
411: context->state = S_nl;
1.2 timbl 412: } else PUTC(c);
1.1 timbl 413: break;
1.13 timbl 414:
1.18 timbl 415: case S_nl:
416: if (c=='<') {
417: string->size = 0;
418: context->state = (context->element_stack &&
419: context->element_stack->tag &&
420: context->element_stack->tag->contents == SGML_LITERAL) ?
421: S_literal : S_nl_tago;
422: } else {
423: PUTC('\n');
424: context->state = S_text;
425: goto normal_text;
426: }
427: break;
428:
429: case S_nl_tago: /* Had newline and tag opener */
430: if (c != '/') {
431: PUTC('\n'); /* Only ignore newline before </ */
432: }
433: context->state = S_tag;
434: goto handle_S_tag;
435:
1.13 timbl 436: #ifdef ISO_2022_JP
437: case S_esc:
438: if (c=='$') {
439: context->state = S_dollar;
440: } else if (c=='(') {
441: context->state = S_paren;
442: } else {
443: context->state = S_text;
444: }
445: PUTC(c);
446: break;
447: case S_dollar:
448: if (c=='@' || c=='B') {
449: context->state = S_nonascii_text;
450: } else {
451: context->state = S_text;
452: }
453: PUTC(c);
454: break;
455: case S_paren:
456: if (c=='B' || c=='J') {
457: context->state = S_text;
458: } else {
459: context->state = S_text;
460: }
461: PUTC(c);
462: break;
463: case S_nonascii_text:
464: if (c=='\033') {
465: context->state = S_esc;
466: PUTC(c);
467: } else {
468: PUTC(c);
469: }
470: break;
471: #endif /* ISO_2022_JP */
1.1 timbl 472:
1.12 timbl 473: /* In literal mode, waits only for specific end tag!
1.2 timbl 474: ** Only foir compatibility with old servers.
1.1 timbl 475: */
1.12 timbl 476: case S_literal :
1.33 frystyk 477: HTChunk_putc(string, c);
1.1 timbl 478: if ( TOUPPER(c) != ((string->size ==1) ? '/'
479: : context->element_stack->tag->name[string->size-2])) {
480: int i;
481:
1.12 timbl 482: /* If complete match, end literal */
1.1 timbl 483: if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
484: end_element(context, context->element_stack->tag);
485: string->size = 0;
1.2 timbl 486: context->current_attribute_number = INVALID;
1.1 timbl 487: context->state = S_text;
488: break;
489: } /* If Mismatch: recover string. */
1.2 timbl 490: PUTC( '<');
1.1 timbl 491: for (i=0; i<string->size; i++) /* recover */
1.2 timbl 492: PUTC(
1.1 timbl 493: string->data[i]);
494: context->state = S_text;
495: }
496:
497: break;
498:
499: /* Character reference or Entity
500: */
501: case S_ero:
502: if (c=='#') {
503: context->state = S_cro; /* &# is Char Ref Open */
504: break;
505: }
506: context->state = S_entity; /* Fall through! */
507:
508: /* Handle Entities
509: */
510: case S_entity:
511: if (isalnum(c))
1.33 frystyk 512: HTChunk_putc(string, c);
1.1 timbl 513: else {
1.33 frystyk 514: HTChunk_terminate(string);
1.1 timbl 515: handle_entity(context, c);
516: context->state = S_text;
517: }
518: break;
519:
520: /* Character reference
521: */
522: case S_cro:
523: if (isalnum(c))
1.33 frystyk 524: HTChunk_putc(string, c); /* accumulate a character NUMBER */
1.1 timbl 525: else {
526: int value;
1.33 frystyk 527: HTChunk_terminate(string);
1.1 timbl 528: if (sscanf(string->data, "%d", &value)==1)
1.28 frystyk 529: PUTC((char) value);
1.1 timbl 530: context->state = S_text;
531: }
532: break;
533:
534: /* Tag
535: */
536: case S_tag: /* new tag */
1.18 timbl 537: handle_S_tag:
538:
1.1 timbl 539: if (isalnum(c))
1.33 frystyk 540: HTChunk_putc(string, c);
1.1 timbl 541: else { /* End of tag name */
1.7 timbl 542: HTTag * t;
1.1 timbl 543: if (c=='/') {
1.20 frystyk 544: if (SGML_TRACE) if (string->size!=0)
1.29 frystyk 545: TTYPrint(TDEST,"SGML: `<%s/' found!\n", string->data);
1.1 timbl 546: context->state = S_end;
547: break;
548: }
1.33 frystyk 549: HTChunk_terminate(string) ;
1.2 timbl 550:
1.10 timbl 551: t = SGMLFindTag(dtd, string->data);
1.7 timbl 552: if (!t) {
1.29 frystyk 553: if(SGML_TRACE) TTYPrint(TDEST, "SGML: *** Unknown element %s\n",
1.1 timbl 554: string->data);
555: context->state = (c=='>') ? S_text : S_junk_tag;
556: break;
557: }
1.7 timbl 558: context->current_tag = t;
1.2 timbl 559:
560: /* Clear out attributes
561: */
1.1 timbl 562:
1.2 timbl 563: {
564: int i;
565: for (i=0; i< context->current_tag->number_of_attributes; i++)
566: context->present[i] = NO;
1.1 timbl 567: }
568: string->size = 0;
1.2 timbl 569: context->current_attribute_number = INVALID;
1.1 timbl 570:
571: if (c=='>') {
572: if (context->current_tag->name) start_element(context);
1.18 timbl 573: context->state = S_after_open;
1.1 timbl 574: } else {
575: context->state = S_tag_gap;
576: }
577: }
578: break;
579:
580:
581: case S_tag_gap: /* Expecting attribute or > */
582: if (WHITE(c)) break; /* Gap between attributes */
583: if (c=='>') { /* End of tag */
584: if (context->current_tag->name) start_element(context);
1.18 timbl 585: context->state = S_after_open;
1.1 timbl 586: break;
587: }
1.33 frystyk 588: HTChunk_putc(string, c);
1.1 timbl 589: context->state = S_attr; /* Get attribute */
590: break;
591:
592: /* accumulating value */
593: case S_attr:
594: if (WHITE(c) || (c=='>') || (c=='=')) { /* End of word */
1.33 frystyk 595: HTChunk_terminate(string) ;
1.1 timbl 596: handle_attribute_name(context, string->data);
597: string->size = 0;
598: if (c=='>') { /* End of tag */
599: if (context->current_tag->name) start_element(context);
1.18 timbl 600: context->state = S_after_open;
1.1 timbl 601: break;
602: }
603: context->state = (c=='=' ? S_equals: S_attr_gap);
604: } else {
1.33 frystyk 605: HTChunk_putc(string, c);
1.1 timbl 606: }
607: break;
608:
609: case S_attr_gap: /* Expecting attribute or = or > */
610: if (WHITE(c)) break; /* Gap after attribute */
611: if (c=='>') { /* End of tag */
612: if (context->current_tag->name) start_element(context);
1.18 timbl 613: context->state = S_after_open;
1.1 timbl 614: break;
615: } else if (c=='=') {
616: context->state = S_equals;
617: break;
618: }
1.33 frystyk 619: HTChunk_putc(string, c);
1.1 timbl 620: context->state = S_attr; /* Get next attribute */
621: break;
622:
623: case S_equals: /* After attr = */
624: if (WHITE(c)) break; /* Before attribute value */
625: if (c=='>') { /* End of tag */
1.29 frystyk 626: if (SGML_TRACE) TTYPrint(TDEST, "SGML: found = but no value\n");
1.1 timbl 627: if (context->current_tag->name) start_element(context);
1.18 timbl 628: context->state = S_after_open;
1.1 timbl 629: break;
630:
631: } else if (c=='\'') {
632: context->state = S_squoted;
633: break;
634:
635: } else if (c=='"') {
636: context->state = S_dquoted;
637: break;
638: }
1.33 frystyk 639: HTChunk_putc(string, c);
1.1 timbl 640: context->state = S_value;
641: break;
642:
643: case S_value:
644: if (WHITE(c) || (c=='>')) { /* End of word */
1.33 frystyk 645: HTChunk_terminate(string) ;
1.1 timbl 646: handle_attribute_value(context, string->data);
647: string->size = 0;
648: if (c=='>') { /* End of tag */
649: if (context->current_tag->name) start_element(context);
1.18 timbl 650: context->state = S_after_open;
1.1 timbl 651: break;
652: }
653: else context->state = S_tag_gap;
654: } else {
1.33 frystyk 655: HTChunk_putc(string, c);
1.1 timbl 656: }
657: break;
658:
659: case S_squoted: /* Quoted attribute value */
660: if (c=='\'') { /* End of attribute value */
1.33 frystyk 661: HTChunk_terminate(string) ;
1.1 timbl 662: handle_attribute_value(context, string->data);
663: string->size = 0;
664: context->state = S_tag_gap;
665: } else {
1.33 frystyk 666: HTChunk_putc(string, c);
1.1 timbl 667: }
668: break;
669:
670: case S_dquoted: /* Quoted attribute value */
671: if (c=='"') { /* End of attribute value */
1.33 frystyk 672: HTChunk_terminate(string) ;
1.1 timbl 673: handle_attribute_value(context, string->data);
674: string->size = 0;
675: context->state = S_tag_gap;
676: } else {
1.33 frystyk 677: HTChunk_putc(string, c);
1.1 timbl 678: }
679: break;
680:
681: case S_end: /* </ */
682: if (isalnum(c))
1.33 frystyk 683: HTChunk_putc(string, c);
1.1 timbl 684: else { /* End of end tag name */
1.7 timbl 685: HTTag * t;
1.33 frystyk 686: HTChunk_terminate(string) ;
1.7 timbl 687: if (!*string->data) { /* Empty end tag */
688: t = context->element_stack->tag;
689: } else {
1.10 timbl 690: t = SGMLFindTag(dtd, string->data);
1.1 timbl 691: }
1.7 timbl 692: if (!t) {
1.29 frystyk 693: if(SGML_TRACE) TTYPrint(TDEST,
1.1 timbl 694: "Unknown end tag </%s>\n", string->data);
1.2 timbl 695: } else {
1.7 timbl 696: context->current_tag = t;
1.2 timbl 697: end_element( context, context->current_tag);
1.1 timbl 698: }
1.2 timbl 699:
1.1 timbl 700: string->size = 0;
1.2 timbl 701: context->current_attribute_number = INVALID;
1.7 timbl 702: if (c!='>') {
1.20 frystyk 703: if (SGML_TRACE && !WHITE(c))
1.29 frystyk 704: TTYPrint(TDEST,"SGML: `</%s%c' found!\n",
1.7 timbl 705: string->data, c);
706: context->state = S_junk_tag;
707: } else {
708: context->state = S_text;
709: }
1.1 timbl 710: }
711: break;
712:
713:
714: case S_junk_tag:
715: if (c=='>') {
716: context->state = S_text;
717: }
718: } /* switch on context->state */
1.26 frystyk 719: return HT_OK;
720: }
1.2 timbl 721:
722:
1.31 frystyk 723: PUBLIC int SGML_string (HTStream * context, CONST char* s)
1.2 timbl 724: {
1.26 frystyk 725: while (*s)
726: SGML_character(context, *s++);
727: return HT_OK;
1.2 timbl 728: }
729:
730:
1.31 frystyk 731: PUBLIC int SGML_write (HTStream * context, CONST char* b, int l)
1.2 timbl 732: {
1.26 frystyk 733: while (l-- > 0)
734: SGML_character(context, *b++);
735: return HT_OK;
1.2 timbl 736: }
737:
738: /*_______________________________________________________________________
739: */
740:
741: /* Structured Object Class
742: ** -----------------------
743: */
1.32 frystyk 744: PRIVATE CONST HTStreamClass SGMLParser =
1.2 timbl 745: {
1.32 frystyk 746: "SGMLParser",
747: SGML_flush,
748: SGML_free,
749: SGML_abort,
750: SGML_character,
751: SGML_string,
752: SGML_write,
1.2 timbl 753: };
754:
755: /* Create SGML Engine
756: ** ------------------
757: **
758: ** On entry,
759: ** dtd represents the DTD, along with
760: ** actions is the sink for the data as a set of routines.
761: **
762: */
1.32 frystyk 763: PUBLIC HTStream * SGML_new (CONST SGML_dtd * dtd, HTStructured * target)
1.2 timbl 764: {
765: int i;
766: HTStream* context = (HTStream *) malloc(sizeof(*context));
767: if (!context) outofmem(__FILE__, "SGML_begin");
768:
769: context->isa = &SGMLParser;
1.33 frystyk 770: context->string = HTChunk_new(128); /* Grow by this much */
1.2 timbl 771: context->dtd = dtd;
772: context->target = target;
773: context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
774: /* Ugh: no OO */
775: context->state = S_text;
776: context->element_stack = 0; /* empty */
777: #ifdef CALLERDATA
778: context->callerData = (void*) callerData;
779: #endif
780: for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
781:
782: return context;
783: }
Webmaster