Annotation of libwww/Library/src/SGML.c, revision 1.43
1.23 frystyk 1: /* SGML.c
2: ** GENERAL SGML PARSER CODE
3: **
1.27 frystyk 4: ** (c) COPYRIGHT MIT 1995.
1.23 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.43 ! frystyk 6: ** @(#) $Id: SGML.c,v 1.42 1997/03/21 19:33:28 frystyk Exp $
1.1 timbl 7: **
1.2 timbl 8: ** This module implements an HTStream object. To parse an
1.1 timbl 9: ** SGML file, create this object which is a parser. The object
1.2 timbl 10: ** is (currently) created by being passed a DTD structure,
11: ** and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 12: **
1.19 duns 13: ** 6 Feb 93 Binary seraches used. Intreface modified.
14: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.42 frystyk 15: ** Nov 1996 msa Strip down the parser to minimal HTML tokenizer,
16: ** Stop allocating space for the attribute values,
17: ** use pointers to the string chunk instead.
1.1 timbl 18: */
19:
1.25 frystyk 20: /* Library include files */
1.38 frystyk 21: #include "sysdep.h"
1.1 timbl 22: #include "HTUtils.h"
1.25 frystyk 23: #include "HTString.h"
1.1 timbl 24: #include "HTChunk.h"
1.20 frystyk 25: #include "SGML.h"
1.1 timbl 26:
1.2 timbl 27: #define INVALID (-1)
28:
1.1 timbl 29: /* The State (context) of the parser
30: **
1.2 timbl 31: ** This is passed with each call to make the parser reentrant
1.1 timbl 32: **
33: */
1.42 frystyk 34: typedef enum _sgml_state
35: {
36: S_text, S_literal, S_tag, S_tag_gap,
37: S_attr, S_attr_gap, S_equals, S_value, S_after_open,
38: S_nl, S_nl_tago,
39: S_ero, S_cro,
1.21 frystyk 40: #ifdef ISO_2022_JP
1.42 frystyk 41: S_esc, S_dollar, S_paren, S_nonascii_text,
1.21 frystyk 42: #endif
1.42 frystyk 43: S_squoted, S_dquoted, S_end, S_entity, S_junk_tag,
44: S_md, S_md_sqs, S_md_dqs, S_com_1, S_com, S_com_2
45: } sgml_state;
1.21 frystyk 46:
47:
1.2 timbl 48: /* Internal Context Data Structure
49: ** -------------------------------
50: */
1.42 frystyk 51: struct _HTStream
52: {
53: const HTStreamClass *isa; /* inherited from HTStream */
54: const SGML_dtd *dtd;
55: HTStructuredClass *actions; /* target class */
56: HTStructured *target; /* target object */
1.2 timbl 57:
1.42 frystyk 58: HTTag *current_tag;
59: int current_attribute_number;
60: SGMLContent contents; /* current content mode */
61: HTChunk *string;
62: int token; /* ptr into string buffer */
63: sgml_state state;
64: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
65: int value[MAX_ATTRIBUTES]; /* Offset pointers to the string */
66: };
1.2 timbl 67:
68:
69: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
1.42 frystyk 70: #define PUTB(b,l) ((*context->actions->put_block)(context->target, b, l))
1.2 timbl 71:
1.42 frystyk 72: #define TRACE1(f,a) \
73: do {if (SGML_TRACE) HTTrace("SGML Parser. " f,a); } while(0)
74: #define TRACE2(f,a,b) \
75: do {if (SGML_TRACE) HTTrace("SGML Parser. " f,a,b); } while(0)
1.1 timbl 76:
1.17 timbl 77: /* Find Attribute Number
78: ** ---------------------
79: */
1.40 frystyk 80: PRIVATE int SGMLFindAttribute (HTTag* tag, const char * s)
1.42 frystyk 81: {
82: attr* attributes = tag->attributes;
1.17 timbl 83:
1.42 frystyk 84: int high, low, i, diff; /* Binary search for attribute name */
85: for(low=0, high=tag->number_of_attributes;
86: high > low ;
87: diff < 0 ? (low = i+1) : (high = i) )
88: {
89: i = (low + (high-low)/2);
90: diff = strcasecomp(attributes[i].name, s);
91: if (diff==0)
92: return i; /* success: found it */
93: }
94: return -1;
95: }
1.17 timbl 96:
1.1 timbl 97:
98: /* Handle Attribute
99: ** ----------------
100: */
1.38 frystyk 101: /* PUBLIC const char * SGML_default = ""; ?? */
1.1 timbl 102:
1.38 frystyk 103: PRIVATE void handle_attribute_name (HTStream * context, const char * s)
1.42 frystyk 104: {
105: HTTag * tag = context->current_tag;
1.2 timbl 106:
1.42 frystyk 107: /* Note: if tag==NULL, we are skipping unknown tag... */
108: if (tag)
109: {
110: int i = SGMLFindAttribute(tag, s);
111: if (i >= 0)
112: {
113: context->current_attribute_number = i;
114: context->present[i] = YES;
115: return;
116: }
117: TRACE2("Unknown attribute %s for tag %s\n",
118: s, context->current_tag->name);
119: }
120: context->current_attribute_number = INVALID; /* Invalid */
121: }
1.2 timbl 122:
1.1 timbl 123:
124: /* Handle attribute value
125: ** ----------------------
126: */
1.42 frystyk 127: PRIVATE void handle_attribute_value (HTStream * context)
128: {
129: /* Deal with attributes only if tag is known,
130: ignore silently otherwise */
131:
132: if (context->current_tag)
133: {
134: if (context->current_attribute_number != INVALID)
135: context->value[context->current_attribute_number] =
136: context->token;
137: else
138: TRACE1("Attribute value %s ignored\n",
139: context->string->data + context->token);
140:
141: }
142: context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 143: }
144:
145: /* Handle entity
146: ** -------------
147: **
148: ** On entry,
149: ** s contains the entity name zero terminated
150: */
1.42 frystyk 151: PRIVATE void handle_entity (HTStream * context)
1.1 timbl 152: {
1.42 frystyk 153: const char ** entities = context->dtd->entity_names;
154: const char *s = context->string->data;
1.1 timbl 155:
1.42 frystyk 156: int high, low, i, diff;
157: for(low=0, high = context->dtd->number_of_entities;
158: high > low ;
159: diff < 0 ? (low = i+1) : (high = i))
160: {
161: i = (low + (high-low)/2);
162: diff = strcmp(entities[i], s); /* Case sensitive! */
163: if (diff==0)
164: { /* success: found it */
165: (*context->actions->put_entity)(context->target, i);
166: return;
167: }
168: }
169: /* If entity string not found, display as text */
170: TRACE1("Unknown entity %s\n", s);
171: PUTC('&');
172: {
173: const char *p;
174: for (p=s; *p; p++)
175: PUTC(*p);
176: }
1.35 frystyk 177: }
1.2 timbl 178:
1.1 timbl 179: /* End element
1.2 timbl 180: ** -----------
1.1 timbl 181: */
1.42 frystyk 182: PRIVATE void end_element (HTStream * context, HTTag *tag)
183: {
184: TRACE1("End </%s>\n", tag->name);
185: (*context->actions->end_element)
186: (context->target, tag - context->dtd->tags);
1.1 timbl 187: }
188:
1.17 timbl 189: /* Start an element
190: ** ----------------
1.1 timbl 191: */
1.31 frystyk 192: PRIVATE void start_element (HTStream * context)
1.42 frystyk 193: {
194: int i;
195: char *value[MAX_ATTRIBUTES];
196: HTTag *tag = context->current_tag;
197:
198: TRACE1("Start <%s>\n", tag->name);
199: context->contents = tag->contents;
200:
201: /*
202: ** Build the actual pointers to the value strings stored in the
203: ** chunk buffer. (Must use offsets while collecting the values,
204: ** because the string chunk may get resized during the collection
205: ** and potentially relocated).
206: */
207: for (i = 0; i < MAX_ATTRIBUTES; ++i)
208: value[i] = context->value[i] < 0 ? NULL :
209: context->string->data + context->value[i];
210: (*context->actions->start_element)
211: (context->target,
212: tag - context->dtd->tags,
213: context->present,
214: (const char**)value); /* coerce type for think c */
1.1 timbl 215: }
216:
217:
1.2 timbl 218: /* Find Tag in DTD tag list
219: ** ------------------------
1.1 timbl 220: **
221: ** On entry,
1.2 timbl 222: ** dtd points to dtd structire including valid tag list
223: ** string points to name of tag in question
1.1 timbl 224: **
1.2 timbl 225: ** On exit,
226: ** returns:
1.7 timbl 227: ** NULL tag not found
228: ** else address of tag structure in dtd
1.2 timbl 229: */
1.40 frystyk 230: PRIVATE HTTag * SGMLFindTag (const SGML_dtd* dtd, const char * string)
1.42 frystyk 231: {
232: int high, low, i, diff;
233: for(low=0, high=dtd->number_of_tags;
234: high > low ;
235: diff < 0 ? (low = i+1) : (high = i))
236: { /* Binary serach */
237: i = (low + (high-low)/2);
238: diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
239: if (diff==0)
240: /* success: found it */
241: return &dtd->tags[i];
242: }
243: return NULL;
1.2 timbl 244: }
245:
246: /*________________________________________________________________________
247: ** Public Methods
1.1 timbl 248: */
249:
1.2 timbl 250:
251: /* Could check that we are back to bottom of stack! @@ */
1.40 frystyk 252: PRIVATE int SGML_flush (HTStream * context)
1.42 frystyk 253: {
254: return (*context->actions->flush)(context->target);
1.26 frystyk 255: }
1.1 timbl 256:
1.40 frystyk 257: PRIVATE int SGML_free (HTStream * context)
1.42 frystyk 258: {
259: int status;
1.15 frystyk 260:
1.42 frystyk 261: if ((status = (*context->actions->_free)(context->target)) != HT_OK)
262: return status;
263: HTChunk_delete(context->string);
264: HT_FREE(context);
265: return HT_OK;
1.15 frystyk 266: }
1.1 timbl 267:
1.40 frystyk 268: PRIVATE int SGML_abort (HTStream * context, HTList * e)
1.42 frystyk 269: {
270: (*context->actions->abort)(context->target, e);
271: HTChunk_delete(context->string);
272: HT_FREE(context);
273: return HT_ERROR;
1.15 frystyk 274: }
1.1 timbl 275:
1.41 frystyk 276: PRIVATE int SGML_write (HTStream * context, const char * b, int l)
1.42 frystyk 277: {
278: const SGML_dtd *dtd = context->dtd;
279: HTChunk *string = context->string;
280: const char *text = b;
281: int count = 0;
1.18 timbl 282:
1.42 frystyk 283: while (l-- > 0)
284: {
285: char c = *b++;
286: switch(context->state)
287: {
288: got_element_open:
289: /*
290: ** The label is jumped when the '>' of a the element
291: ** start tag has been detected. This DOES NOT FALL TO
292: ** THE CODE S_after_open, only processes the tag and
293: ** sets the state (c should still contain the
294: ** terminating character of the tag ('>'))
295: */
296: if (context->current_tag && context->current_tag->name)
297: start_element(context);
298: context->state = S_after_open;
299: break;
1.18 timbl 300:
1.42 frystyk 301: case S_after_open:
302: /*
303: ** State S_after_open is entered only for single
304: ** character after the element opening tag to test
305: ** against newline. Strip one trainling newline only
306: ** after opening nonempty element. - SGML: Ugh!
307: */
308: text = b;
309: count = 0;
310: if (c == '\n' && (context->contents != SGML_EMPTY))
311: {
312: context->state = S_text;
313: break;
314: }
315: --text;
316: goto S_text;
317:
318: S_text:
319: context->state = S_text;
320: case S_text:
1.13 timbl 321: #ifdef ISO_2022_JP
1.42 frystyk 322: if (c == '\033')
323: {
324: context->state = S_esc;
325: ++count;
326: break;
327: }
1.13 timbl 328: #endif /* ISO_2022_JP */
1.42 frystyk 329: if (c == '&')
330: {
331: if (count > 0)
332: PUTB(text, count);
333: count = 0;
334: string->size = 0;
335: context->state = S_ero;
336: }
337: else if (c == '<')
338: {
339: if (count > 0)
340: PUTB(text, count);
341: count = 0;
342: string->size = 0;
343: /* should scrap LITERAL, and use CDATA and
344: RCDATA -- msa */
345: context->state =
346: (context->contents == SGML_LITERAL) ?
347: S_literal : S_tag;
348: }
349: else if (c == '\n')
350: /* Newline - ignore if before end tag! */
351: context->state = S_nl;
352: else
353: ++count;
354: break;
1.13 timbl 355:
1.42 frystyk 356: case S_nl:
357: if (c == '<')
358: {
359: if (count > 0)
360: PUTB(text, count);
361: count = 0;
362: string->size = 0;
363: context->state =
364: (context->contents == SGML_LITERAL) ?
365: S_literal : S_nl_tago;
366: }
367: else
368: {
369: ++count;
370: goto S_text;
371: }
372: break;
1.18 timbl 373:
1.42 frystyk 374: case S_nl_tago: /* Had newline and tag opener */
375: if (c != '/')
376: PUTC('\n'); /* Only ignore newline before </ */
377: context->state = S_tag;
378: goto handle_S_tag;
1.18 timbl 379:
1.13 timbl 380: #ifdef ISO_2022_JP
1.42 frystyk 381: case S_esc:
382: if (c=='$')
383: context->state = S_dollar;
384: else if (c=='(')
385: context->state = S_paren;
386: else
387: context->state = S_text;
388: ++count;
389: break;
390:
391: case S_dollar:
392: if (c=='@' || c=='B')
393: context->state = S_nonascii_text;
394: else
395: context->state = S_text;
396: ++count;
397: break;
398:
399: case S_paren:
400: if (c=='B' || c=='J')
401: context->state = S_text;
402: else
403: context->state = S_text;
404: ++count;
405: break;
406:
407: case S_nonascii_text:
408: if (c == '\033')
409: context->state = S_esc;
410: ++count;
411: break;
1.13 timbl 412: #endif /* ISO_2022_JP */
1.1 timbl 413:
1.42 frystyk 414: /* In literal mode, waits only for specific end tag!
415: ** Only foir compatibility with old servers.
416: */
417: case S_literal:
418: HTChunk_putc(string, c);
419: if ( TOUPPER(c) !=
420: ((string->size == 1) ? '/'
421: : context->current_tag->name[string->size-2]))
422: {
1.1 timbl 423:
1.42 frystyk 424: /* If complete match, end literal */
425: if ((c == '>') &&
426: (!context->current_tag->name[string->size-2]))
427: {
428: end_element
429: (context,context->current_tag);
430: /*
431: ...setting SGML_MIXED below is a
432: bit of kludge, but a good guess that
433: currently works, anything other than
434: SGML_LITERAL would work... -- msa */
435: context->contents = SGML_MIXED;
436: }
437: else
438: {
439: /* If Mismatch: recover string. */
440: PUTC( '<');
441: PUTB(string->data, string->size);
442: }
443: context->state = S_text;
444: text = b;
445: count = 0;
446: }
447: break;
1.1 timbl 448:
1.42 frystyk 449: /*
450: ** Character reference or Entity
451: */
452: case S_ero:
453: if (c == '#')
454: {
455: /* &# is Char Ref Open */
456: context->state = S_cro;
457: break;
458: }
459: context->state = S_entity;
1.1 timbl 460:
1.42 frystyk 461: /** FALL THROUGH TO S_entity !! ***/
1.18 timbl 462:
1.42 frystyk 463: /*
464: ** Handle Entities
465: */
466: case S_entity:
1.43 ! frystyk 467: if (isalnum((int) c))
1.42 frystyk 468: HTChunk_putc(string, c);
469: else
470: {
471: HTChunk_terminate(string);
472: handle_entity(context);
473: text = b;
474: count = 0;
475: if (c != ';')
476: {
477: --text;
478: goto S_text;
479: }
480: context->state = S_text;
481: }
482: break;
1.2 timbl 483:
1.42 frystyk 484: /* Character reference
485: */
486: case S_cro:
1.43 ! frystyk 487: if (isalnum((int)c))
1.42 frystyk 488: /* accumulate a character NUMBER */
489: HTChunk_putc(string, c);
490: else
491: {
492: int value;
493: HTChunk_terminate(string);
494: if (sscanf(string->data, "%d", &value)==1)
495: PUTC((char)value);
496: else
497: {
498: PUTB("&#", 2);
499: PUTB(string->data, string->size-1);
500: }
501: text = b;
502: count = 0;
503: if (c != ';')
504: {
505: --text;
506: goto S_text;
507: }
508: context->state = S_text;
509: }
510: break;
1.1 timbl 511:
1.42 frystyk 512: case S_tag: /* new tag */
513: handle_S_tag:
1.43 ! frystyk 514: if (isalnum((int)c))
1.42 frystyk 515: HTChunk_putc(string, c);
516: else
517: { /* End of tag name */
518: int i;
1.1 timbl 519:
1.42 frystyk 520: if (c == '/')
521: {
522: if (string->size != 0)
523: TRACE1("`<%s/' found!\n",
524: string->data);
525: context->state = S_end;
526: break;
527: }
528: else if (c == '!')
529: {
530: if (string->size != 0)
531: TRACE1(" `<%s!' found!\n",
532: string->data);
533: context->state = S_md;
534: break;
535: }
536: HTChunk_terminate(string);
537: context->current_tag = SGMLFindTag(dtd, string->data);
538: if (context->current_tag == NULL)
539: TRACE1("*** Unknown element %s\n",
540: string->data);
541: else for (i=0;
542: i < context->current_tag->number_of_attributes; i++)
543: {
544: context->present[i] = NO;
545: context->value[i] = -1;
546: }
547: context->token = string->size = 0;
548: context->current_attribute_number = INVALID;
549: goto S_tag_gap;
550: }
551: break;
552:
553: S_tag_gap:
554: context->state = S_tag_gap;
555: case S_tag_gap: /* Expecting attribute or > */
1.43 ! frystyk 556: if (isspace((int) c))
1.42 frystyk 557: break; /* Gap between attributes */
558:
559: if (c == '>')
560: goto got_element_open;
561: else
562: goto S_attr;
563:
564: S_attr:
565: /*
566: ** Start collecting the attribute name and collect
567: ** it in S_attr.
568: */
569: context->state = S_attr;
570: string->size = context->token;
571: case S_attr:
1.43 ! frystyk 572: if (isspace((int) c) || c == '>' || c == '=')
1.42 frystyk 573: goto got_attribute_name;
574: else
575: HTChunk_putc(string, c);
576: break;
577:
578: got_attribute_name:
579: /*
580: ** This label is entered when attribute name has been
581: ** collected. Process it and enter S_attr_gap for
582: ** potential value or start of the next attribute.
583: */
584: HTChunk_terminate(string) ;
585: handle_attribute_name
586: (context, string->data + context->token);
587: string->size = context->token;
588: context->state = S_attr_gap;
589: case S_attr_gap: /* Expecting attribute or = or > */
1.43 ! frystyk 590: if (isspace((int) c))
1.42 frystyk 591: break; /* Gap after attribute */
592:
593: if (c == '>')
594: goto got_element_open;
595: else if (c == '=')
596: context->state = S_equals;
597: else
598: goto S_attr; /* Get next attribute */
599: break;
600:
601: case S_equals: /* After attr = */
1.43 ! frystyk 602: if (isspace((int) c))
1.42 frystyk 603: break; /* Before attribute value */
604:
605: if (c == '>')
606: { /* End of tag */
607: TRACE1("found = but no value\n", NULL);
608: goto got_element_open;
609: }
610: else if (c == '\'')
611: context->state = S_squoted;
612: else if (c == '"')
613: context->state = S_dquoted;
614: else
615: goto S_value;
616: break;
617:
618: S_value:
619: context->state = S_value;
620: string->size = context->token;
621: case S_value:
1.43 ! frystyk 622: if (isspace((int) c) || c == '>')
1.42 frystyk 623: {
624: HTChunk_terminate(string);
625: handle_attribute_value(context);
626: context->token = string->size;
627: goto S_tag_gap;
628: }
629: else
630: HTChunk_putc(string, c);
631: break;
1.1 timbl 632:
1.42 frystyk 633: case S_squoted: /* Quoted attribute value */
634: if (c == '\'')
635: {
636: HTChunk_terminate(string);
637: handle_attribute_value(context);
638: context->token = string->size;
639: context->state = S_tag_gap;
640: }
641: else if (c && c != '\n' && c != '\r')
642: HTChunk_putc(string, c);
643: break;
1.1 timbl 644:
1.42 frystyk 645: case S_dquoted: /* Quoted attribute value */
646: if (c == '"')
647: {
648: HTChunk_terminate(string);
649: handle_attribute_value(context);
650: context->token = string->size;
651: context->state = S_tag_gap;
652: }
653: else if (c && c != '\n' && c != '\r')
654: HTChunk_putc(string, c);
655: break;
1.2 timbl 656:
1.42 frystyk 657: case S_end: /* </ */
1.43 ! frystyk 658: if (isalnum((int) c))
1.42 frystyk 659: HTChunk_putc(string, c);
660: else
661: { /* End of end tag name */
662: HTTag *t;
663:
664: HTChunk_terminate(string);
665: if (*string->data)
666: t = SGMLFindTag(dtd, string->data);
667: else
668: /* Empty end tag */
669: /* Original code popped here one
670: from the stack. If this feature
671: is required, I have to put the
672: stack back... -- msa */
673: t = NULL;
674: if (!t)
675: TRACE1("Unknown end tag </%s>\n",
676: string->data);
677: else
678: {
679: context->current_tag = NULL;
680: end_element(context, t);
681: }
682: string->size = 0;
683: context->current_attribute_number = INVALID;
684: if (c != '>')
685: {
1.43 ! frystyk 686: if (!isspace((int) c))
1.42 frystyk 687: TRACE2("`</%s%c' found!\n",
688: string->data, c);
689: context->state = S_junk_tag;
690: }
691: else
692: {
693: text = b;
694: count = 0;
695: context->state = S_text;
696: }
697: }
698: break;
699:
700: S_junk_tag:
701: context->state = S_junk_tag;
702: case S_junk_tag:
703: if (c == '>')
704: {
705: text = b;
706: count = 0;
707: context->state = S_text;
708: }
709: break;
710:
711: /*
712: ** Scanning (actually skipping) declarations
713: */
714: case S_md:
715: if (c == '-')
716: context->state = S_com_1;
717: else if (c == '"')
718: context->state = S_md_dqs;
719: else if (c == '\'')
720: context->state = S_md_sqs;
721: else if (c == '>')
722: {
723: text = b;
724: count = 0;
725: context->state = S_text;
726: }
727: break;
728:
729: case S_md_dqs: /* Skip double quoted string */
730: if (c == '"')
731: context->state = S_md;
732: break;
733:
734: case S_md_sqs: /* Skip single quoted string */
735: if (c == '\'')
736: context->state = S_md;
737: break;
738:
739: case S_com_1: /* Starting a comment? */
740: context->state = (c == '-') ? S_com : S_md;
741: break;
742:
743: case S_com: /* ..within comment */
744: if (c == '-')
745: context->state = S_com_2;
746: break;
747:
748: case S_com_2: /* Ending a comment ? */
749: context->state = (c == '-') ? S_md : S_com;
750: break;
751: }
1.7 timbl 752: }
1.42 frystyk 753: if (count > 0)
754: PUTB(text, count);
755: return HT_OK;
756: }
1.1 timbl 757:
1.2 timbl 758:
1.40 frystyk 759: PRIVATE int SGML_string (HTStream * context, const char* s)
1.42 frystyk 760: {
761: return SGML_write(context, s, (int) strlen(s));
762: }
1.2 timbl 763:
764:
1.41 frystyk 765: PRIVATE int SGML_character (HTStream * context, char c)
1.42 frystyk 766: {
767: return SGML_write(context, &c, 1);
768: }
1.2 timbl 769:
770: /*_______________________________________________________________________
771: */
772:
773: /* Structured Object Class
774: ** -----------------------
775: */
1.38 frystyk 776: PRIVATE const HTStreamClass SGMLParser =
1.42 frystyk 777: {
778: "SGMLParser",
779: SGML_flush,
780: SGML_free,
781: SGML_abort,
782: SGML_character,
783: SGML_string,
784: SGML_write,
785: };
1.2 timbl 786:
787: /* Create SGML Engine
788: ** ------------------
789: **
790: ** On entry,
791: ** dtd represents the DTD, along with
792: ** actions is the sink for the data as a set of routines.
793: **
794: */
1.42 frystyk 795: PUBLIC HTStream *SGML_new(const SGML_dtd * dtd, HTStructured * target)
796: {
797: int i;
798: HTStream* context;
799: if ((context = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
800: HT_OUTOFMEM("SGML_begin");
1.2 timbl 801:
1.42 frystyk 802: context->isa = &SGMLParser;
803: context->string = HTChunk_new(128); /* Grow by this much */
804: context->dtd = dtd;
805: context->target = target;
806: context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
807: /* Ugh: no OO */
808: context->state = S_text;
809: for(i=0; i<MAX_ATTRIBUTES; i++)
810: context->value[i] = 0;
811: return context;
812: }
Webmaster