Annotation of libwww/Library/src/SGML.c, revision 1.47
1.23 frystyk 1: /* SGML.c
2: ** GENERAL SGML PARSER CODE
3: **
1.27 frystyk 4: ** (c) COPYRIGHT MIT 1995.
1.23 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.47 ! frystyk 6: ** @(#) $Id: SGML.c,v 1.46 1998/09/18 02:45:33 frystyk Exp $
1.1 timbl 7: **
1.2 timbl 8: ** This module implements an HTStream object. To parse an
1.1 timbl 9: ** SGML file, create this object which is a parser. The object
1.2 timbl 10: ** is (currently) created by being passed a DTD structure,
11: ** and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 12: **
1.19 duns 13: ** 6 Feb 93 Binary seraches used. Intreface modified.
14: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.42 frystyk 15: ** Nov 1996 msa Strip down the parser to minimal HTML tokenizer,
16: ** Stop allocating space for the attribute values,
17: ** use pointers to the string chunk instead.
1.1 timbl 18: */
19:
1.25 frystyk 20: /* Library include files */
1.45 frystyk 21: #include "wwwsys.h"
1.1 timbl 22: #include "HTUtils.h"
1.25 frystyk 23: #include "HTString.h"
1.1 timbl 24: #include "HTChunk.h"
1.20 frystyk 25: #include "SGML.h"
1.1 timbl 26:
1.2 timbl 27: #define INVALID (-1)
28:
1.1 timbl 29: /* The State (context) of the parser
30: **
1.2 timbl 31: ** This is passed with each call to make the parser reentrant
1.1 timbl 32: **
33: */
1.42 frystyk 34: typedef enum _sgml_state
35: {
36: S_text, S_literal, S_tag, S_tag_gap,
37: S_attr, S_attr_gap, S_equals, S_value, S_after_open,
38: S_nl, S_nl_tago,
39: S_ero, S_cro,
1.21 frystyk 40: #ifdef ISO_2022_JP
1.42 frystyk 41: S_esc, S_dollar, S_paren, S_nonascii_text,
1.21 frystyk 42: #endif
1.42 frystyk 43: S_squoted, S_dquoted, S_end, S_entity, S_junk_tag,
1.44 frystyk 44: S_md, S_md_sqs, S_md_dqs, S_com_1, S_com, S_com_2, S_com_2a
1.42 frystyk 45: } sgml_state;
1.21 frystyk 46:
47:
1.2 timbl 48: /* Internal Context Data Structure
49: ** -------------------------------
50: */
1.42 frystyk 51: struct _HTStream
52: {
53: const HTStreamClass *isa; /* inherited from HTStream */
54: const SGML_dtd *dtd;
55: HTStructuredClass *actions; /* target class */
56: HTStructured *target; /* target object */
1.2 timbl 57:
1.42 frystyk 58: HTTag *current_tag;
59: int current_attribute_number;
60: SGMLContent contents; /* current content mode */
61: HTChunk *string;
62: int token; /* ptr into string buffer */
63: sgml_state state;
64: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
65: int value[MAX_ATTRIBUTES]; /* Offset pointers to the string */
66: };
1.2 timbl 67:
68:
69: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
1.42 frystyk 70: #define PUTB(b,l) ((*context->actions->put_block)(context->target, b, l))
1.2 timbl 71:
1.42 frystyk 72: #define TRACE1(f,a) \
1.44 frystyk 73: do {if (SGML_TRACE) HTTrace((f),(a)); } while(0)
1.42 frystyk 74: #define TRACE2(f,a,b) \
1.44 frystyk 75: do {if (SGML_TRACE) HTTrace((f),(a),(b)); } while(0)
1.1 timbl 76:
1.17 timbl 77: /* Find Attribute Number
78: ** ---------------------
79: */
1.40 frystyk 80: PRIVATE int SGMLFindAttribute (HTTag* tag, const char * s)
1.42 frystyk 81: {
1.47 ! frystyk 82: HTAttr* attributes = tag->attributes;
1.17 timbl 83:
1.42 frystyk 84: int high, low, i, diff; /* Binary search for attribute name */
85: for(low=0, high=tag->number_of_attributes;
86: high > low ;
87: diff < 0 ? (low = i+1) : (high = i) )
88: {
89: i = (low + (high-low)/2);
90: diff = strcasecomp(attributes[i].name, s);
91: if (diff==0)
92: return i; /* success: found it */
93: }
94: return -1;
95: }
1.17 timbl 96:
1.1 timbl 97:
98: /* Handle Attribute
99: ** ----------------
100: */
1.38 frystyk 101: /* PUBLIC const char * SGML_default = ""; ?? */
1.1 timbl 102:
1.38 frystyk 103: PRIVATE void handle_attribute_name (HTStream * context, const char * s)
1.42 frystyk 104: {
105: HTTag * tag = context->current_tag;
1.2 timbl 106:
1.42 frystyk 107: /* Note: if tag==NULL, we are skipping unknown tag... */
108: if (tag)
109: {
110: int i = SGMLFindAttribute(tag, s);
111: if (i >= 0)
112: {
113: context->current_attribute_number = i;
114: context->present[i] = YES;
115: return;
116: }
117: TRACE2("Unknown attribute %s for tag %s\n",
118: s, context->current_tag->name);
119: }
120: context->current_attribute_number = INVALID; /* Invalid */
121: }
1.2 timbl 122:
1.1 timbl 123:
124: /* Handle attribute value
125: ** ----------------------
126: */
1.42 frystyk 127: PRIVATE void handle_attribute_value (HTStream * context)
128: {
129: /* Deal with attributes only if tag is known,
130: ignore silently otherwise */
131:
132: if (context->current_tag)
133: {
134: if (context->current_attribute_number != INVALID)
135: context->value[context->current_attribute_number] =
136: context->token;
137: else
138: TRACE1("Attribute value %s ignored\n",
139: context->string->data + context->token);
140:
141: }
142: context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 143: }
144:
145: /* Handle entity
146: ** -------------
147: **
148: ** On entry,
149: ** s contains the entity name zero terminated
150: */
1.42 frystyk 151: PRIVATE void handle_entity (HTStream * context)
1.1 timbl 152: {
1.42 frystyk 153: const char ** entities = context->dtd->entity_names;
154: const char *s = context->string->data;
1.1 timbl 155:
1.42 frystyk 156: int high, low, i, diff;
157: for(low=0, high = context->dtd->number_of_entities;
158: high > low ;
159: diff < 0 ? (low = i+1) : (high = i))
160: {
161: i = (low + (high-low)/2);
162: diff = strcmp(entities[i], s); /* Case sensitive! */
163: if (diff==0)
164: { /* success: found it */
165: (*context->actions->put_entity)(context->target, i);
166: return;
167: }
168: }
1.47 ! frystyk 169:
! 170: /* If entity string not found */
1.42 frystyk 171: TRACE1("Unknown entity %s\n", s);
1.47 ! frystyk 172: (*context->actions->unparsed_entity)
! 173: (context->target, context->string->data, context->string->size);
1.35 frystyk 174: }
1.2 timbl 175:
1.1 timbl 176: /* End element
1.2 timbl 177: ** -----------
1.1 timbl 178: */
1.42 frystyk 179: PRIVATE void end_element (HTStream * context, HTTag *tag)
180: {
181: TRACE1("End </%s>\n", tag->name);
182: (*context->actions->end_element)
183: (context->target, tag - context->dtd->tags);
1.1 timbl 184: }
185:
1.17 timbl 186: /* Start an element
187: ** ----------------
1.1 timbl 188: */
1.31 frystyk 189: PRIVATE void start_element (HTStream * context)
1.42 frystyk 190: {
191: int i;
192: char *value[MAX_ATTRIBUTES];
193: HTTag *tag = context->current_tag;
194:
195: TRACE1("Start <%s>\n", tag->name);
196: context->contents = tag->contents;
197:
198: /*
199: ** Build the actual pointers to the value strings stored in the
200: ** chunk buffer. (Must use offsets while collecting the values,
201: ** because the string chunk may get resized during the collection
202: ** and potentially relocated).
203: */
204: for (i = 0; i < MAX_ATTRIBUTES; ++i)
205: value[i] = context->value[i] < 0 ? NULL :
206: context->string->data + context->value[i];
207: (*context->actions->start_element)
208: (context->target,
209: tag - context->dtd->tags,
210: context->present,
211: (const char**)value); /* coerce type for think c */
1.1 timbl 212: }
213:
214:
1.2 timbl 215: /* Find Tag in DTD tag list
216: ** ------------------------
1.1 timbl 217: **
218: ** On entry,
1.2 timbl 219: ** dtd points to dtd structire including valid tag list
220: ** string points to name of tag in question
1.1 timbl 221: **
1.2 timbl 222: ** On exit,
223: ** returns:
1.7 timbl 224: ** NULL tag not found
225: ** else address of tag structure in dtd
1.2 timbl 226: */
1.40 frystyk 227: PRIVATE HTTag * SGMLFindTag (const SGML_dtd* dtd, const char * string)
1.42 frystyk 228: {
229: int high, low, i, diff;
230: for(low=0, high=dtd->number_of_tags;
231: high > low ;
232: diff < 0 ? (low = i+1) : (high = i))
233: { /* Binary serach */
234: i = (low + (high-low)/2);
235: diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
236: if (diff==0)
237: /* success: found it */
238: return &dtd->tags[i];
239: }
240: return NULL;
1.2 timbl 241: }
242:
243: /*________________________________________________________________________
244: ** Public Methods
1.1 timbl 245: */
246:
1.2 timbl 247:
248: /* Could check that we are back to bottom of stack! @@ */
1.40 frystyk 249: PRIVATE int SGML_flush (HTStream * context)
1.42 frystyk 250: {
251: return (*context->actions->flush)(context->target);
1.26 frystyk 252: }
1.1 timbl 253:
1.40 frystyk 254: PRIVATE int SGML_free (HTStream * context)
1.42 frystyk 255: {
256: int status;
1.15 frystyk 257:
1.42 frystyk 258: if ((status = (*context->actions->_free)(context->target)) != HT_OK)
259: return status;
260: HTChunk_delete(context->string);
261: HT_FREE(context);
262: return HT_OK;
1.15 frystyk 263: }
1.1 timbl 264:
1.40 frystyk 265: PRIVATE int SGML_abort (HTStream * context, HTList * e)
1.42 frystyk 266: {
267: (*context->actions->abort)(context->target, e);
268: HTChunk_delete(context->string);
269: HT_FREE(context);
270: return HT_ERROR;
1.15 frystyk 271: }
1.1 timbl 272:
1.41 frystyk 273: PRIVATE int SGML_write (HTStream * context, const char * b, int l)
1.42 frystyk 274: {
275: const SGML_dtd *dtd = context->dtd;
276: HTChunk *string = context->string;
277: const char *text = b;
278: int count = 0;
1.18 timbl 279:
1.42 frystyk 280: while (l-- > 0)
281: {
282: char c = *b++;
283: switch(context->state)
284: {
285: got_element_open:
286: /*
287: ** The label is jumped when the '>' of a the element
288: ** start tag has been detected. This DOES NOT FALL TO
289: ** THE CODE S_after_open, only processes the tag and
290: ** sets the state (c should still contain the
291: ** terminating character of the tag ('>'))
292: */
293: if (context->current_tag && context->current_tag->name)
294: start_element(context);
295: context->state = S_after_open;
296: break;
1.18 timbl 297:
1.42 frystyk 298: case S_after_open:
299: /*
300: ** State S_after_open is entered only for single
301: ** character after the element opening tag to test
302: ** against newline. Strip one trainling newline only
303: ** after opening nonempty element. - SGML: Ugh!
304: */
305: text = b;
306: count = 0;
307: if (c == '\n' && (context->contents != SGML_EMPTY))
308: {
309: context->state = S_text;
310: break;
311: }
312: --text;
313: goto S_text;
314:
315: S_text:
316: context->state = S_text;
317: case S_text:
1.13 timbl 318: #ifdef ISO_2022_JP
1.42 frystyk 319: if (c == '\033')
320: {
321: context->state = S_esc;
322: ++count;
323: break;
324: }
1.13 timbl 325: #endif /* ISO_2022_JP */
1.42 frystyk 326: if (c == '&')
327: {
328: if (count > 0)
329: PUTB(text, count);
330: count = 0;
331: string->size = 0;
332: context->state = S_ero;
333: }
334: else if (c == '<')
335: {
336: if (count > 0)
337: PUTB(text, count);
338: count = 0;
339: string->size = 0;
340: /* should scrap LITERAL, and use CDATA and
341: RCDATA -- msa */
342: context->state =
343: (context->contents == SGML_LITERAL) ?
344: S_literal : S_tag;
345: }
346: else if (c == '\n')
347: /* Newline - ignore if before end tag! */
348: context->state = S_nl;
349: else
350: ++count;
351: break;
1.13 timbl 352:
1.42 frystyk 353: case S_nl:
354: if (c == '<')
355: {
356: if (count > 0)
357: PUTB(text, count);
358: count = 0;
359: string->size = 0;
360: context->state =
361: (context->contents == SGML_LITERAL) ?
362: S_literal : S_nl_tago;
363: }
364: else
365: {
366: ++count;
367: goto S_text;
368: }
369: break;
1.18 timbl 370:
1.42 frystyk 371: case S_nl_tago: /* Had newline and tag opener */
372: if (c != '/')
373: PUTC('\n'); /* Only ignore newline before </ */
374: context->state = S_tag;
375: goto handle_S_tag;
1.18 timbl 376:
1.13 timbl 377: #ifdef ISO_2022_JP
1.42 frystyk 378: case S_esc:
379: if (c=='$')
380: context->state = S_dollar;
381: else if (c=='(')
382: context->state = S_paren;
383: else
384: context->state = S_text;
385: ++count;
386: break;
387:
388: case S_dollar:
389: if (c=='@' || c=='B')
390: context->state = S_nonascii_text;
391: else
392: context->state = S_text;
393: ++count;
394: break;
395:
396: case S_paren:
397: if (c=='B' || c=='J')
398: context->state = S_text;
399: else
400: context->state = S_text;
401: ++count;
402: break;
403:
404: case S_nonascii_text:
405: if (c == '\033')
406: context->state = S_esc;
407: ++count;
408: break;
1.13 timbl 409: #endif /* ISO_2022_JP */
1.1 timbl 410:
1.42 frystyk 411: /* In literal mode, waits only for specific end tag!
412: ** Only foir compatibility with old servers.
413: */
414: case S_literal:
415: HTChunk_putc(string, c);
416: if ( TOUPPER(c) !=
417: ((string->size == 1) ? '/'
418: : context->current_tag->name[string->size-2]))
419: {
1.1 timbl 420:
1.42 frystyk 421: /* If complete match, end literal */
422: if ((c == '>') &&
423: (!context->current_tag->name[string->size-2]))
424: {
425: end_element
426: (context,context->current_tag);
427: /*
428: ...setting SGML_MIXED below is a
429: bit of kludge, but a good guess that
430: currently works, anything other than
431: SGML_LITERAL would work... -- msa */
432: context->contents = SGML_MIXED;
433: }
434: else
435: {
436: /* If Mismatch: recover string. */
437: PUTC( '<');
438: PUTB(string->data, string->size);
439: }
440: context->state = S_text;
441: text = b;
442: count = 0;
443: }
444: break;
1.1 timbl 445:
1.42 frystyk 446: /*
447: ** Character reference or Entity
448: */
449: case S_ero:
450: if (c == '#')
451: {
452: /* &# is Char Ref Open */
453: context->state = S_cro;
454: break;
455: }
456: context->state = S_entity;
1.1 timbl 457:
1.42 frystyk 458: /** FALL THROUGH TO S_entity !! ***/
1.18 timbl 459:
1.42 frystyk 460: /*
461: ** Handle Entities
462: */
463: case S_entity:
1.43 frystyk 464: if (isalnum((int) c))
1.42 frystyk 465: HTChunk_putc(string, c);
466: else
467: {
468: HTChunk_terminate(string);
469: handle_entity(context);
470: text = b;
471: count = 0;
472: if (c != ';')
473: {
474: --text;
475: goto S_text;
476: }
477: context->state = S_text;
478: }
479: break;
1.2 timbl 480:
1.42 frystyk 481: /* Character reference
482: */
483: case S_cro:
1.43 frystyk 484: if (isalnum((int)c))
1.42 frystyk 485: /* accumulate a character NUMBER */
486: HTChunk_putc(string, c);
487: else
488: {
489: int value;
490: HTChunk_terminate(string);
491: if (sscanf(string->data, "%d", &value)==1)
492: PUTC((char)value);
493: else
494: {
495: PUTB("&#", 2);
496: PUTB(string->data, string->size-1);
497: }
498: text = b;
499: count = 0;
500: if (c != ';')
501: {
502: --text;
503: goto S_text;
504: }
505: context->state = S_text;
506: }
507: break;
1.1 timbl 508:
1.42 frystyk 509: case S_tag: /* new tag */
510: handle_S_tag:
1.43 frystyk 511: if (isalnum((int)c))
1.42 frystyk 512: HTChunk_putc(string, c);
513: else
514: { /* End of tag name */
515: int i;
1.1 timbl 516:
1.42 frystyk 517: if (c == '/')
518: {
519: if (string->size != 0)
520: TRACE1("`<%s/' found!\n",
521: string->data);
522: context->state = S_end;
523: break;
524: }
525: else if (c == '!')
526: {
527: if (string->size != 0)
528: TRACE1(" `<%s!' found!\n",
529: string->data);
530: context->state = S_md;
531: break;
532: }
533: HTChunk_terminate(string);
534: context->current_tag = SGMLFindTag(dtd, string->data);
1.47 ! frystyk 535: if (context->current_tag == NULL) {
! 536: TRACE1("*** Unknown element %s\n", string->data);
! 537: (*context->actions->unparsed_begin_element)
! 538: (context->target, string->data, string->size);
! 539: } else {
! 540: for (i=0; i<context->current_tag->number_of_attributes; i++) {
1.42 frystyk 541: context->present[i] = NO;
542: context->value[i] = -1;
543: }
1.47 ! frystyk 544: }
1.42 frystyk 545: context->token = string->size = 0;
546: context->current_attribute_number = INVALID;
547: goto S_tag_gap;
548: }
549: break;
550:
551: S_tag_gap:
552: context->state = S_tag_gap;
553: case S_tag_gap: /* Expecting attribute or > */
1.43 frystyk 554: if (isspace((int) c))
1.42 frystyk 555: break; /* Gap between attributes */
556:
557: if (c == '>')
558: goto got_element_open;
559: else
560: goto S_attr;
561:
562: S_attr:
563: /*
564: ** Start collecting the attribute name and collect
565: ** it in S_attr.
566: */
567: context->state = S_attr;
568: string->size = context->token;
569: case S_attr:
1.43 frystyk 570: if (isspace((int) c) || c == '>' || c == '=')
1.42 frystyk 571: goto got_attribute_name;
572: else
573: HTChunk_putc(string, c);
574: break;
575:
576: got_attribute_name:
577: /*
578: ** This label is entered when attribute name has been
579: ** collected. Process it and enter S_attr_gap for
580: ** potential value or start of the next attribute.
581: */
582: HTChunk_terminate(string) ;
583: handle_attribute_name
584: (context, string->data + context->token);
585: string->size = context->token;
586: context->state = S_attr_gap;
587: case S_attr_gap: /* Expecting attribute or = or > */
1.43 frystyk 588: if (isspace((int) c))
1.42 frystyk 589: break; /* Gap after attribute */
590:
591: if (c == '>')
592: goto got_element_open;
593: else if (c == '=')
594: context->state = S_equals;
595: else
596: goto S_attr; /* Get next attribute */
597: break;
598:
599: case S_equals: /* After attr = */
1.43 frystyk 600: if (isspace((int) c))
1.42 frystyk 601: break; /* Before attribute value */
602:
603: if (c == '>')
604: { /* End of tag */
605: TRACE1("found = but no value\n", NULL);
606: goto got_element_open;
607: }
608: else if (c == '\'')
609: context->state = S_squoted;
610: else if (c == '"')
611: context->state = S_dquoted;
612: else
613: goto S_value;
614: break;
615:
616: S_value:
617: context->state = S_value;
618: string->size = context->token;
619: case S_value:
1.43 frystyk 620: if (isspace((int) c) || c == '>')
1.42 frystyk 621: {
622: HTChunk_terminate(string);
623: handle_attribute_value(context);
624: context->token = string->size;
625: goto S_tag_gap;
626: }
627: else
628: HTChunk_putc(string, c);
629: break;
1.1 timbl 630:
1.42 frystyk 631: case S_squoted: /* Quoted attribute value */
632: if (c == '\'')
633: {
634: HTChunk_terminate(string);
635: handle_attribute_value(context);
636: context->token = string->size;
637: context->state = S_tag_gap;
638: }
639: else if (c && c != '\n' && c != '\r')
640: HTChunk_putc(string, c);
641: break;
1.1 timbl 642:
1.42 frystyk 643: case S_dquoted: /* Quoted attribute value */
644: if (c == '"')
645: {
646: HTChunk_terminate(string);
647: handle_attribute_value(context);
648: context->token = string->size;
649: context->state = S_tag_gap;
650: }
651: else if (c && c != '\n' && c != '\r')
652: HTChunk_putc(string, c);
653: break;
1.2 timbl 654:
1.42 frystyk 655: case S_end: /* </ */
1.43 frystyk 656: if (isalnum((int) c))
1.42 frystyk 657: HTChunk_putc(string, c);
658: else
659: { /* End of end tag name */
660: HTTag *t;
661:
662: HTChunk_terminate(string);
663: if (*string->data)
664: t = SGMLFindTag(dtd, string->data);
665: else
666: /* Empty end tag */
667: /* Original code popped here one
668: from the stack. If this feature
669: is required, I have to put the
670: stack back... -- msa */
671: t = NULL;
1.47 ! frystyk 672: if (!t) {
! 673: TRACE1("Unknown end tag </%s>\n", string->data);
! 674: (*context->actions->unparsed_end_element)
! 675: (context->target, string->data, string->size);
! 676: } else {
! 677: context->current_tag = NULL;
! 678: end_element(context, t);
! 679: }
1.42 frystyk 680: string->size = 0;
681: context->current_attribute_number = INVALID;
682: if (c != '>')
683: {
1.43 frystyk 684: if (!isspace((int) c))
1.42 frystyk 685: TRACE2("`</%s%c' found!\n",
686: string->data, c);
687: context->state = S_junk_tag;
688: }
689: else
690: {
691: text = b;
692: count = 0;
693: context->state = S_text;
694: }
695: }
696: break;
697:
698: case S_junk_tag:
699: if (c == '>')
700: {
701: text = b;
702: count = 0;
703: context->state = S_text;
704: }
705: break;
706:
707: /*
708: ** Scanning (actually skipping) declarations
709: */
710: case S_md:
711: if (c == '-')
712: context->state = S_com_1;
713: else if (c == '"')
714: context->state = S_md_dqs;
715: else if (c == '\'')
716: context->state = S_md_sqs;
717: else if (c == '>')
718: {
719: text = b;
720: count = 0;
721: context->state = S_text;
722: }
723: break;
724:
725: case S_md_dqs: /* Skip double quoted string */
726: if (c == '"')
727: context->state = S_md;
1.46 frystyk 728: else if (c == '>')
729: {
730: text = b;
731: count = 0;
732: context->state = S_text;
733: }
1.42 frystyk 734: break;
735:
736: case S_md_sqs: /* Skip single quoted string */
737: if (c == '\'')
738: context->state = S_md;
1.46 frystyk 739: else if (c == '>')
740: {
741: text = b;
742: count = 0;
743: context->state = S_text;
744: }
1.42 frystyk 745: break;
746:
747: case S_com_1: /* Starting a comment? */
748: context->state = (c == '-') ? S_com : S_md;
1.46 frystyk 749: if (c == '>')
750: {
751: text = b;
752: count = 0;
753: context->state = S_text;
754: }
1.42 frystyk 755: break;
756:
757: case S_com: /* ..within comment */
758: if (c == '-')
759: context->state = S_com_2;
760: break;
761:
762: case S_com_2: /* Ending a comment ? */
1.44 frystyk 763: context->state = (c == '-') ? S_com_2a : S_com;
764: break;
765:
766: case S_com_2a:
767: if (c == '>') {
768: text = b;
769: count = 0;
770: context->state = S_text;
771: } else
772: context->state = S_com;
1.42 frystyk 773: break;
774: }
1.7 timbl 775: }
1.42 frystyk 776: if (count > 0)
777: PUTB(text, count);
778: return HT_OK;
779: }
1.1 timbl 780:
1.2 timbl 781:
1.40 frystyk 782: PRIVATE int SGML_string (HTStream * context, const char* s)
1.42 frystyk 783: {
784: return SGML_write(context, s, (int) strlen(s));
785: }
1.2 timbl 786:
787:
1.41 frystyk 788: PRIVATE int SGML_character (HTStream * context, char c)
1.42 frystyk 789: {
790: return SGML_write(context, &c, 1);
791: }
1.2 timbl 792:
793: /*_______________________________________________________________________
794: */
795:
796: /* Structured Object Class
797: ** -----------------------
798: */
1.38 frystyk 799: PRIVATE const HTStreamClass SGMLParser =
1.47 ! frystyk 800: {
! 801: "SGML",
! 802: SGML_flush,
! 803: SGML_free,
! 804: SGML_abort,
! 805: SGML_character,
! 806: SGML_string,
! 807: SGML_write
! 808: };
1.2 timbl 809:
810: /* Create SGML Engine
811: ** ------------------
812: **
813: ** On entry,
814: ** dtd represents the DTD, along with
815: ** actions is the sink for the data as a set of routines.
816: **
817: */
1.42 frystyk 818: PUBLIC HTStream *SGML_new(const SGML_dtd * dtd, HTStructured * target)
1.47 ! frystyk 819: {
! 820: int i;
! 821: HTStream* context;
! 822: if ((context = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
! 823: HT_OUTOFMEM("SGML_begin");
! 824:
! 825: context->isa = &SGMLParser;
! 826: context->string = HTChunk_new(128); /* Grow by this much */
! 827: context->dtd = dtd;
! 828: context->target = target;
! 829: context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
! 830: /* Ugh: no OO */
! 831: context->state = S_text;
! 832: for(i=0; i<MAX_ATTRIBUTES; i++)
! 833: context->value[i] = 0;
! 834: return context;
! 835: }
! 836:
! 837: PUBLIC HTTag * SGML_findTag (SGML_dtd * dtd, int element_number)
! 838: {
! 839: return (dtd && element_number>=0 && element_number<dtd->number_of_tags) ?
! 840: (dtd->tags+element_number) : NULL;
! 841: }
! 842:
! 843: PUBLIC char * SGML_findTagName (SGML_dtd * dtd, int element_number)
! 844: {
! 845: return (dtd && element_number>=0 && element_number<dtd->number_of_tags) ?
! 846: (dtd->tags+element_number)->name : NULL;
! 847: }
! 848:
! 849: PUBLIC SGMLContent SGML_findTagContents (SGML_dtd * dtd, int element_number)
! 850: {
! 851: return (dtd && element_number>=0 && element_number<dtd->number_of_tags) ?
! 852: (dtd->tags+element_number)->contents : SGML_ELEMENT;
! 853: }
! 854:
! 855: PUBLIC char * HTTag_name (HTTag * tag)
! 856: {
! 857: return tag ? tag->name : NULL;
! 858: }
! 859:
! 860: PUBLIC int HTTag_attributes (HTTag * tag)
! 861: {
! 862: return tag ? tag->number_of_attributes : -1;
! 863: }
! 864:
! 865: PUBLIC char * HTTag_attributeName (HTTag * tag, int attribute_number)
! 866: {
! 867: return (tag && attribute_number>=0 && attribute_number<tag->number_of_attributes) ?
! 868: (tag->attributes+attribute_number)->name : NULL;
! 869: }
! 870:
! 871: PUBLIC char * HTAttr_name (HTAttr * attr)
! 872: {
! 873: return attr ? attr->name : NULL;
! 874: }
Webmaster