Annotation of libwww/Library/src/SGML.c, revision 1.35
1.23 frystyk 1: /* SGML.c
2: ** GENERAL SGML PARSER CODE
3: **
1.27 frystyk 4: ** (c) COPYRIGHT MIT 1995.
1.23 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
1.2 timbl 7: ** This module implements an HTStream object. To parse an
1.1 timbl 8: ** SGML file, create this object which is a parser. The object
1.2 timbl 9: ** is (currently) created by being passed a DTD structure,
10: ** and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 11: **
1.19 duns 12: ** 6 Feb 93 Binary seraches used. Intreface modified.
13: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.1 timbl 14: */
15:
1.25 frystyk 16: /* Library include files */
17: #include "tcp.h"
1.1 timbl 18: #include "HTUtils.h"
1.25 frystyk 19: #include "HTString.h"
1.1 timbl 20: #include "HTChunk.h"
1.20 frystyk 21: #include "SGML.h"
1.1 timbl 22:
1.2 timbl 23: #define INVALID (-1)
24:
1.1 timbl 25: /* The State (context) of the parser
26: **
1.2 timbl 27: ** This is passed with each call to make the parser reentrant
1.1 timbl 28: **
29: */
30:
1.16 frystyk 31:
1.2 timbl 32:
33:
34: /* Element Stack
35: ** -------------
36: ** This allows us to return down the stack reselcting styles.
37: ** As we return, attribute values will be garbage in general.
38: */
39: typedef struct _HTElement HTElement;
40: struct _HTElement {
41: HTElement * next; /* Previously nested element or 0 */
42: HTTag* tag; /* The tag at this level */
43: };
44:
45:
1.21 frystyk 46: typedef enum _sgml_state {
47: S_text, S_literal, S_tag, S_tag_gap,
48: S_attr, S_attr_gap, S_equals, S_value, S_after_open,
49: S_nl, S_nl_tago,
50: S_ero, S_cro,
51: #ifdef ISO_2022_JP
52: S_esc, S_dollar, S_paren, S_nonascii_text,
53: #endif
54: S_squoted, S_dquoted, S_end, S_entity, S_junk_tag
55: } sgml_state;
56:
57:
1.2 timbl 58: /* Internal Context Data Structure
59: ** -------------------------------
60: */
61: struct _HTStream {
62:
63: CONST HTStreamClass * isa; /* inherited from HTStream */
64:
65: CONST SGML_dtd *dtd;
66: HTStructuredClass *actions; /* target class */
67: HTStructured *target; /* target object */
68:
1.1 timbl 69: HTTag *current_tag;
1.2 timbl 70: int current_attribute_number;
1.1 timbl 71: HTChunk *string;
72: HTElement *element_stack;
1.21 frystyk 73: sgml_state state;
1.2 timbl 74: #ifdef CALLERDATA
1.1 timbl 75: void * callerData;
1.2 timbl 76: #endif
77: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
78: char * value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
79: } ;
80:
81:
82: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
83:
1.1 timbl 84:
1.17 timbl 85: /* Find Attribute Number
86: ** ---------------------
87: */
88:
1.31 frystyk 89: PUBLIC int SGMLFindAttribute (HTTag* tag, CONST char * s)
1.17 timbl 90: {
91: attr* attributes = tag->attributes;
92:
93: int high, low, i, diff; /* Binary search for attribute name */
94: for(low=0, high=tag->number_of_attributes;
95: high > low ;
96: diff < 0 ? (low = i+1) : (high = i) ) {
97: i = (low + (high-low)/2);
98: diff = strcasecomp(attributes[i].name, s);
99: if (diff==0) return i; /* success: found it */
100: } /* for */
101:
102: return -1;
103: }
104:
1.1 timbl 105:
106: /* Handle Attribute
107: ** ----------------
108: */
109: /* PUBLIC CONST char * SGML_default = ""; ?? */
110:
1.31 frystyk 111: PRIVATE void handle_attribute_name (HTStream * context, CONST char * s)
1.1 timbl 112: {
1.2 timbl 113:
114: HTTag * tag = context->current_tag;
115:
1.17 timbl 116: int i = SGMLFindAttribute(tag, s);
117: if (i>=0) {
118: context->current_attribute_number = i;
119: context->present[i] = YES;
120: if (context->value[i]) {
121: free(context->value[i]);
122: context->value[i] = NULL;
123: }
124: return;
125: } /* if */
1.2 timbl 126:
1.20 frystyk 127: if (SGML_TRACE)
1.29 frystyk 128: TTYPrint(TDEST, "SGML: Unknown attribute %s for tag %s\n",
1.2 timbl 129: s, context->current_tag->name);
130: context->current_attribute_number = INVALID; /* Invalid */
1.1 timbl 131: }
132:
133:
134: /* Handle attribute value
135: ** ----------------------
136: */
1.31 frystyk 137: PRIVATE void handle_attribute_value (HTStream * context, CONST char * s)
1.1 timbl 138: {
1.2 timbl 139: if (context->current_attribute_number != INVALID) {
140: StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 141: } else {
1.29 frystyk 142: if (SGML_TRACE) TTYPrint(TDEST, "SGML: Attribute value %s ignored\n", s);
1.1 timbl 143: }
1.2 timbl 144: context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 145: }
146:
1.2 timbl 147:
1.1 timbl 148: /* Handle entity
149: ** -------------
150: **
151: ** On entry,
152: ** s contains the entity name zero terminated
153: ** Bugs:
154: ** If the entity name is unknown, the terminator is treated as
155: ** a printable non-special character in all cases, even if it is '<'
156: */
1.31 frystyk 157: PRIVATE void handle_entity (HTStream * context, char term)
1.1 timbl 158: {
1.2 timbl 159:
1.3 timbl 160: CONST char ** entities = context->dtd->entity_names;
1.1 timbl 161: CONST char *s = context->string->data;
1.2 timbl 162:
163: int high, low, i, diff;
164: for(low=0, high = context->dtd->number_of_entities;
165: high > low ;
166: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
167: i = (low + (high-low)/2);
168: diff = strcmp(entities[i], s); /* Csse sensitive! */
169: if (diff==0) { /* success: found it */
170: (*context->actions->put_entity)(context->target, i);
171: return;
1.1 timbl 172: }
173: }
174: /* If entity string not found, display as text */
1.20 frystyk 175: if (SGML_TRACE)
1.29 frystyk 176: TTYPrint(TDEST, "SGML: Unknown entity %s\n", s);
1.2 timbl 177: PUTC('&');
1.1 timbl 178: {
179: CONST char *p;
180: for (p=s; *p; p++) {
1.2 timbl 181: PUTC(*p);
1.1 timbl 182: }
183: }
1.2 timbl 184: PUTC(term);
1.1 timbl 185: }
186:
1.35 ! frystyk 187: /*
! 188: ** Helper function to check if the tag is on the stack
! 189: */
! 190: PRIVATE BOOL lookup_element_stack (HTElement* stack, HTTag *tag)
! 191: {
! 192: HTElement* elem;
! 193: for (elem = stack; elem != NULL; elem = elem->next)
! 194: {
! 195: if (elem->tag == tag) return YES;
! 196: }
! 197: return NO;
! 198: }
1.2 timbl 199:
1.1 timbl 200: /* End element
1.2 timbl 201: ** -----------
1.1 timbl 202: */
1.31 frystyk 203: PRIVATE void end_element (HTStream * context, HTTag * old_tag)
1.1 timbl 204: {
1.29 frystyk 205: if (SGML_TRACE) TTYPrint(TDEST, "SGML: End </%s>\n", old_tag->name);
1.2 timbl 206: if (old_tag->contents == SGML_EMPTY) {
1.29 frystyk 207: if (SGML_TRACE) TTYPrint(TDEST,"SGML: Illegal end tag </%s> found.\n",
1.1 timbl 208: old_tag->name);
209: return;
210: }
211: while (context->element_stack) {/* Loop is error path only */
212: HTElement * N = context->element_stack;
213: HTTag * t = N->tag;
214:
215: if (old_tag != t) { /* Mismatch: syntax error */
1.35 ! frystyk 216: /*
! 217: ** Patch from Maciej Puzio, puzio@laser.mimuw.edu.pl
! 218: ** See explanation in ../User/Patch/lib_4.0_1.fix
! 219: */
! 220: if (context->element_stack->next /* This is not the last level */
! 221: && lookup_element_stack(context->element_stack, old_tag)) {
1.29 frystyk 222: if (SGML_TRACE) TTYPrint(TDEST,
1.1 timbl 223: "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
224: old_tag->name, t->name, t->name);
225: } else { /* last level */
1.29 frystyk 226: if (SGML_TRACE) TTYPrint(TDEST,
1.1 timbl 227: "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
228: old_tag->name, t->name, old_tag->name);
229: return; /* Ignore */
230: }
231: }
232:
233: context->element_stack = N->next; /* Remove from stack */
234: free(N);
1.2 timbl 235: (*context->actions->end_element)(context->target,
236: t - context->dtd->tags);
1.1 timbl 237: if (old_tag == t) return; /* Correct sequence */
238:
239: /* Syntax error path only */
240:
241: }
1.29 frystyk 242: if (SGML_TRACE) TTYPrint(TDEST,
1.1 timbl 243: "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
244: }
245:
246:
1.17 timbl 247: /* Start an element
248: ** ----------------
1.1 timbl 249: */
1.31 frystyk 250: PRIVATE void start_element (HTStream * context)
1.1 timbl 251: {
252: HTTag * new_tag = context->current_tag;
253:
1.29 frystyk 254: if (SGML_TRACE) TTYPrint(TDEST, "SGML: Start <%s>\n", new_tag->name);
1.2 timbl 255: (*context->actions->start_element)(
256: context->target,
257: new_tag - context->dtd->tags,
258: context->present,
1.3 timbl 259: (CONST char**) context->value); /* coerce type for think c */
1.2 timbl 260: if (new_tag->contents != SGML_EMPTY) { /* i.e. tag not empty */
1.1 timbl 261: HTElement * N = (HTElement *)malloc(sizeof(HTElement));
262: if (N == NULL) outofmem(__FILE__, "start_element");
263: N->next = context->element_stack;
264: N->tag = new_tag;
265: context->element_stack = N;
266: }
267: }
268:
269:
1.2 timbl 270: /* Find Tag in DTD tag list
271: ** ------------------------
1.1 timbl 272: **
273: ** On entry,
1.2 timbl 274: ** dtd points to dtd structire including valid tag list
275: ** string points to name of tag in question
1.1 timbl 276: **
1.2 timbl 277: ** On exit,
278: ** returns:
1.7 timbl 279: ** NULL tag not found
280: ** else address of tag structure in dtd
1.2 timbl 281: */
1.31 frystyk 282: PUBLIC HTTag * SGMLFindTag (CONST SGML_dtd* dtd, CONST char * string)
1.2 timbl 283: {
284: int high, low, i, diff;
285: for(low=0, high=dtd->number_of_tags;
286: high > low ;
287: diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */
288: i = (low + (high-low)/2);
1.3 timbl 289: diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 290: if (diff==0) { /* success: found it */
1.7 timbl 291: return &dtd->tags[i];
1.2 timbl 292: }
293: }
1.7 timbl 294: return NULL;
1.2 timbl 295: }
296:
297: /*________________________________________________________________________
298: ** Public Methods
1.1 timbl 299: */
300:
1.2 timbl 301:
302: /* Could check that we are back to bottom of stack! @@ */
1.31 frystyk 303: PUBLIC int SGML_flush (HTStream * context)
1.26 frystyk 304: {
305: while (context->element_stack) {
306: HTElement *ptr = context->element_stack;
307: if (SGML_TRACE)
1.29 frystyk 308: TTYPrint(TDEST, "SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 309: context->element_stack->tag->name);
310: context->element_stack = ptr->next;
311: free(ptr);
312: }
313: return (*context->actions->flush)(context->target);
314: }
1.1 timbl 315:
1.31 frystyk 316: PUBLIC int SGML_free (HTStream * context)
1.8 timbl 317: {
1.26 frystyk 318: int status;
1.14 frystyk 319: int cnt;
1.15 frystyk 320: while (context->element_stack) { /* Make sure, that all tags are gone */
321: HTElement *ptr = context->element_stack;
322:
1.26 frystyk 323: if (SGML_TRACE)
1.29 frystyk 324: TTYPrint(TDEST, "SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 325: context->element_stack->tag->name);
1.15 frystyk 326: context->element_stack = ptr->next;
327: free(ptr);
328: }
1.26 frystyk 329: if ((status = (*context->actions->_free)(context->target)) != HT_OK)
330: return status;
1.33 frystyk 331: HTChunk_delete(context->string);
1.15 frystyk 332: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
1.14 frystyk 333: if(context->value[cnt])
334: free(context->value[cnt]);
1.8 timbl 335: free(context);
1.26 frystyk 336: return HT_OK;
1.1 timbl 337: }
338:
1.31 frystyk 339: PUBLIC int SGML_abort (HTStream * context, HTList * e)
1.1 timbl 340: {
1.14 frystyk 341: int cnt;
1.15 frystyk 342: while (context->element_stack) { /* Make sure, that all tags are gone */
343: HTElement *ptr = context->element_stack;
1.26 frystyk 344: if (SGML_TRACE)
1.29 frystyk 345: TTYPrint(TDEST, "SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 346: context->element_stack->tag->name);
1.15 frystyk 347: context->element_stack = ptr->next;
348: free(ptr);
349: }
1.8 timbl 350: (*context->actions->abort)(context->target, e);
1.33 frystyk 351: HTChunk_delete(context->string);
1.14 frystyk 352: for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
353: if(context->value[cnt])
354: free(context->value[cnt]);
1.1 timbl 355: free(context);
1.26 frystyk 356: return HT_ERROR;
1.1 timbl 357: }
358:
1.2 timbl 359:
1.1 timbl 360: /* Read and write user callback handle
361: ** -----------------------------------
362: **
363: ** The callbacks from the SGML parser have an SGML context parameter.
364: ** These calls allow the caller to associate his own context with a
365: ** particular SGML context.
366: */
367:
1.2 timbl 368: #ifdef CALLERDATA
1.31 frystyk 369: PUBLIC void* SGML_callerData (HTStream * context)
1.1 timbl 370: {
371: return context->callerData;
372: }
373:
1.31 frystyk 374: PUBLIC void SGML_setCallerData (HTStream * context, void* data)
1.1 timbl 375: {
376: context->callerData = data;
377: }
1.34 frystyk 378: #else
379: #ifdef WWW_WIN_DLL
380: PUBLIC void * SGML_callerData (HTStream * context) {return NULL;}
381: PUBLIC void SGML_setCallerData (HTStream * context, void* data) {}
382: #endif /* WWW_WIN_DLL */
383: #endif /* CALLERDATA */
1.1 timbl 384:
1.31 frystyk 385: PUBLIC int SGML_character (HTStream * context, char c)
1.1 timbl 386:
387: {
1.2 timbl 388: CONST SGML_dtd *dtd = context->dtd;
1.1 timbl 389: HTChunk *string = context->string;
390:
391: switch(context->state) {
1.18 timbl 392:
393: case S_after_open: /* Strip one trainling newline
394: only after opening nonempty element. - SGML:Ugh! */
395: if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
396: break;
397: }
398: context->state = S_text;
399: goto normal_text;
400: /* (***falls through***) */
401:
1.1 timbl 402: case S_text:
1.18 timbl 403: normal_text:
404:
1.13 timbl 405: #ifdef ISO_2022_JP
406: if (c=='\033') {
407: context->state = S_esc;
408: PUTC(c);
409: break;
410: }
411: #endif /* ISO_2022_JP */
1.6 timbl 412: if (c=='&' && (!context->element_stack || (
413: context->element_stack->tag &&
414: ( context->element_stack->tag->contents == SGML_MIXED
415: || context->element_stack->tag->contents ==
416: SGML_RCDATA)
417: ))) {
1.1 timbl 418: string->size = 0;
419: context->state = S_ero;
420:
421: } else if (c=='<') {
422: string->size = 0;
423: context->state = (context->element_stack &&
1.13 timbl 424: context->element_stack->tag &&
425: context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 426: S_literal : S_tag;
1.18 timbl 427: } else if (c=='\n') { /* Newline - ignore if before tag end! */
428: context->state = S_nl;
1.2 timbl 429: } else PUTC(c);
1.1 timbl 430: break;
1.13 timbl 431:
1.18 timbl 432: case S_nl:
433: if (c=='<') {
434: string->size = 0;
435: context->state = (context->element_stack &&
436: context->element_stack->tag &&
437: context->element_stack->tag->contents == SGML_LITERAL) ?
438: S_literal : S_nl_tago;
439: } else {
440: PUTC('\n');
441: context->state = S_text;
442: goto normal_text;
443: }
444: break;
445:
446: case S_nl_tago: /* Had newline and tag opener */
447: if (c != '/') {
448: PUTC('\n'); /* Only ignore newline before </ */
449: }
450: context->state = S_tag;
451: goto handle_S_tag;
452:
1.13 timbl 453: #ifdef ISO_2022_JP
454: case S_esc:
455: if (c=='$') {
456: context->state = S_dollar;
457: } else if (c=='(') {
458: context->state = S_paren;
459: } else {
460: context->state = S_text;
461: }
462: PUTC(c);
463: break;
464: case S_dollar:
465: if (c=='@' || c=='B') {
466: context->state = S_nonascii_text;
467: } else {
468: context->state = S_text;
469: }
470: PUTC(c);
471: break;
472: case S_paren:
473: if (c=='B' || c=='J') {
474: context->state = S_text;
475: } else {
476: context->state = S_text;
477: }
478: PUTC(c);
479: break;
480: case S_nonascii_text:
481: if (c=='\033') {
482: context->state = S_esc;
483: PUTC(c);
484: } else {
485: PUTC(c);
486: }
487: break;
488: #endif /* ISO_2022_JP */
1.1 timbl 489:
1.12 timbl 490: /* In literal mode, waits only for specific end tag!
1.2 timbl 491: ** Only foir compatibility with old servers.
1.1 timbl 492: */
1.12 timbl 493: case S_literal :
1.33 frystyk 494: HTChunk_putc(string, c);
1.1 timbl 495: if ( TOUPPER(c) != ((string->size ==1) ? '/'
496: : context->element_stack->tag->name[string->size-2])) {
497: int i;
498:
1.12 timbl 499: /* If complete match, end literal */
1.1 timbl 500: if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
501: end_element(context, context->element_stack->tag);
502: string->size = 0;
1.2 timbl 503: context->current_attribute_number = INVALID;
1.1 timbl 504: context->state = S_text;
505: break;
506: } /* If Mismatch: recover string. */
1.2 timbl 507: PUTC( '<');
1.1 timbl 508: for (i=0; i<string->size; i++) /* recover */
1.2 timbl 509: PUTC(
1.1 timbl 510: string->data[i]);
511: context->state = S_text;
512: }
513:
514: break;
515:
516: /* Character reference or Entity
517: */
518: case S_ero:
519: if (c=='#') {
520: context->state = S_cro; /* &# is Char Ref Open */
521: break;
522: }
523: context->state = S_entity; /* Fall through! */
524:
525: /* Handle Entities
526: */
527: case S_entity:
528: if (isalnum(c))
1.33 frystyk 529: HTChunk_putc(string, c);
1.1 timbl 530: else {
1.33 frystyk 531: HTChunk_terminate(string);
1.1 timbl 532: handle_entity(context, c);
533: context->state = S_text;
534: }
535: break;
536:
537: /* Character reference
538: */
539: case S_cro:
540: if (isalnum(c))
1.33 frystyk 541: HTChunk_putc(string, c); /* accumulate a character NUMBER */
1.1 timbl 542: else {
543: int value;
1.33 frystyk 544: HTChunk_terminate(string);
1.1 timbl 545: if (sscanf(string->data, "%d", &value)==1)
1.28 frystyk 546: PUTC((char) value);
1.1 timbl 547: context->state = S_text;
548: }
549: break;
550:
551: /* Tag
552: */
553: case S_tag: /* new tag */
1.18 timbl 554: handle_S_tag:
555:
1.1 timbl 556: if (isalnum(c))
1.33 frystyk 557: HTChunk_putc(string, c);
1.1 timbl 558: else { /* End of tag name */
1.7 timbl 559: HTTag * t;
1.1 timbl 560: if (c=='/') {
1.20 frystyk 561: if (SGML_TRACE) if (string->size!=0)
1.29 frystyk 562: TTYPrint(TDEST,"SGML: `<%s/' found!\n", string->data);
1.1 timbl 563: context->state = S_end;
564: break;
565: }
1.33 frystyk 566: HTChunk_terminate(string) ;
1.2 timbl 567:
1.10 timbl 568: t = SGMLFindTag(dtd, string->data);
1.7 timbl 569: if (!t) {
1.29 frystyk 570: if(SGML_TRACE) TTYPrint(TDEST, "SGML: *** Unknown element %s\n",
1.1 timbl 571: string->data);
572: context->state = (c=='>') ? S_text : S_junk_tag;
573: break;
574: }
1.7 timbl 575: context->current_tag = t;
1.2 timbl 576:
577: /* Clear out attributes
578: */
1.1 timbl 579:
1.2 timbl 580: {
581: int i;
582: for (i=0; i< context->current_tag->number_of_attributes; i++)
583: context->present[i] = NO;
1.1 timbl 584: }
585: string->size = 0;
1.2 timbl 586: context->current_attribute_number = INVALID;
1.1 timbl 587:
588: if (c=='>') {
589: if (context->current_tag->name) start_element(context);
1.18 timbl 590: context->state = S_after_open;
1.1 timbl 591: } else {
592: context->state = S_tag_gap;
593: }
594: }
595: break;
596:
597:
598: case S_tag_gap: /* Expecting attribute or > */
599: if (WHITE(c)) break; /* Gap between attributes */
600: if (c=='>') { /* End of tag */
601: if (context->current_tag->name) start_element(context);
1.18 timbl 602: context->state = S_after_open;
1.1 timbl 603: break;
604: }
1.33 frystyk 605: HTChunk_putc(string, c);
1.1 timbl 606: context->state = S_attr; /* Get attribute */
607: break;
608:
609: /* accumulating value */
610: case S_attr:
611: if (WHITE(c) || (c=='>') || (c=='=')) { /* End of word */
1.33 frystyk 612: HTChunk_terminate(string) ;
1.1 timbl 613: handle_attribute_name(context, string->data);
614: string->size = 0;
615: if (c=='>') { /* End of tag */
616: if (context->current_tag->name) start_element(context);
1.18 timbl 617: context->state = S_after_open;
1.1 timbl 618: break;
619: }
620: context->state = (c=='=' ? S_equals: S_attr_gap);
621: } else {
1.33 frystyk 622: HTChunk_putc(string, c);
1.1 timbl 623: }
624: break;
625:
626: case S_attr_gap: /* Expecting attribute or = or > */
627: if (WHITE(c)) break; /* Gap after attribute */
628: if (c=='>') { /* End of tag */
629: if (context->current_tag->name) start_element(context);
1.18 timbl 630: context->state = S_after_open;
1.1 timbl 631: break;
632: } else if (c=='=') {
633: context->state = S_equals;
634: break;
635: }
1.33 frystyk 636: HTChunk_putc(string, c);
1.1 timbl 637: context->state = S_attr; /* Get next attribute */
638: break;
639:
640: case S_equals: /* After attr = */
641: if (WHITE(c)) break; /* Before attribute value */
642: if (c=='>') { /* End of tag */
1.29 frystyk 643: if (SGML_TRACE) TTYPrint(TDEST, "SGML: found = but no value\n");
1.1 timbl 644: if (context->current_tag->name) start_element(context);
1.18 timbl 645: context->state = S_after_open;
1.1 timbl 646: break;
647:
648: } else if (c=='\'') {
649: context->state = S_squoted;
650: break;
651:
652: } else if (c=='"') {
653: context->state = S_dquoted;
654: break;
655: }
1.33 frystyk 656: HTChunk_putc(string, c);
1.1 timbl 657: context->state = S_value;
658: break;
659:
660: case S_value:
661: if (WHITE(c) || (c=='>')) { /* End of word */
1.33 frystyk 662: HTChunk_terminate(string) ;
1.1 timbl 663: handle_attribute_value(context, string->data);
664: string->size = 0;
665: if (c=='>') { /* End of tag */
666: if (context->current_tag->name) start_element(context);
1.18 timbl 667: context->state = S_after_open;
1.1 timbl 668: break;
669: }
670: else context->state = S_tag_gap;
671: } else {
1.33 frystyk 672: HTChunk_putc(string, c);
1.1 timbl 673: }
674: break;
675:
676: case S_squoted: /* Quoted attribute value */
677: if (c=='\'') { /* End of attribute value */
1.33 frystyk 678: HTChunk_terminate(string) ;
1.1 timbl 679: handle_attribute_value(context, string->data);
680: string->size = 0;
681: context->state = S_tag_gap;
682: } else {
1.33 frystyk 683: HTChunk_putc(string, c);
1.1 timbl 684: }
685: break;
686:
687: case S_dquoted: /* Quoted attribute value */
688: if (c=='"') { /* End of attribute value */
1.33 frystyk 689: HTChunk_terminate(string) ;
1.1 timbl 690: handle_attribute_value(context, string->data);
691: string->size = 0;
692: context->state = S_tag_gap;
693: } else {
1.33 frystyk 694: HTChunk_putc(string, c);
1.1 timbl 695: }
696: break;
697:
698: case S_end: /* </ */
699: if (isalnum(c))
1.33 frystyk 700: HTChunk_putc(string, c);
1.1 timbl 701: else { /* End of end tag name */
1.7 timbl 702: HTTag * t;
1.33 frystyk 703: HTChunk_terminate(string) ;
1.7 timbl 704: if (!*string->data) { /* Empty end tag */
705: t = context->element_stack->tag;
706: } else {
1.10 timbl 707: t = SGMLFindTag(dtd, string->data);
1.1 timbl 708: }
1.7 timbl 709: if (!t) {
1.29 frystyk 710: if(SGML_TRACE) TTYPrint(TDEST,
1.1 timbl 711: "Unknown end tag </%s>\n", string->data);
1.2 timbl 712: } else {
1.7 timbl 713: context->current_tag = t;
1.2 timbl 714: end_element( context, context->current_tag);
1.1 timbl 715: }
1.2 timbl 716:
1.1 timbl 717: string->size = 0;
1.2 timbl 718: context->current_attribute_number = INVALID;
1.7 timbl 719: if (c!='>') {
1.20 frystyk 720: if (SGML_TRACE && !WHITE(c))
1.29 frystyk 721: TTYPrint(TDEST,"SGML: `</%s%c' found!\n",
1.7 timbl 722: string->data, c);
723: context->state = S_junk_tag;
724: } else {
725: context->state = S_text;
726: }
1.1 timbl 727: }
728: break;
729:
730:
731: case S_junk_tag:
732: if (c=='>') {
733: context->state = S_text;
734: }
735: } /* switch on context->state */
1.26 frystyk 736: return HT_OK;
737: }
1.2 timbl 738:
739:
1.31 frystyk 740: PUBLIC int SGML_string (HTStream * context, CONST char* s)
1.2 timbl 741: {
1.26 frystyk 742: while (*s)
743: SGML_character(context, *s++);
744: return HT_OK;
1.2 timbl 745: }
746:
747:
1.31 frystyk 748: PUBLIC int SGML_write (HTStream * context, CONST char* b, int l)
1.2 timbl 749: {
1.26 frystyk 750: while (l-- > 0)
751: SGML_character(context, *b++);
752: return HT_OK;
1.2 timbl 753: }
754:
755: /*_______________________________________________________________________
756: */
757:
758: /* Structured Object Class
759: ** -----------------------
760: */
1.32 frystyk 761: PRIVATE CONST HTStreamClass SGMLParser =
1.2 timbl 762: {
1.32 frystyk 763: "SGMLParser",
764: SGML_flush,
765: SGML_free,
766: SGML_abort,
767: SGML_character,
768: SGML_string,
769: SGML_write,
1.2 timbl 770: };
771:
772: /* Create SGML Engine
773: ** ------------------
774: **
775: ** On entry,
776: ** dtd represents the DTD, along with
777: ** actions is the sink for the data as a set of routines.
778: **
779: */
1.32 frystyk 780: PUBLIC HTStream * SGML_new (CONST SGML_dtd * dtd, HTStructured * target)
1.2 timbl 781: {
782: int i;
783: HTStream* context = (HTStream *) malloc(sizeof(*context));
784: if (!context) outofmem(__FILE__, "SGML_begin");
785:
786: context->isa = &SGMLParser;
1.33 frystyk 787: context->string = HTChunk_new(128); /* Grow by this much */
1.2 timbl 788: context->dtd = dtd;
789: context->target = target;
790: context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
791: /* Ugh: no OO */
792: context->state = S_text;
793: context->element_stack = 0; /* empty */
794: #ifdef CALLERDATA
795: context->callerData = (void*) callerData;
796: #endif
797: for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
798:
799: return context;
800: }
Webmaster