Annotation of libwww/Library/src/SGML.c, revision 1.50
1.23 frystyk 1: /* SGML.c
2: ** GENERAL SGML PARSER CODE
3: **
1.27 frystyk 4: ** (c) COPYRIGHT MIT 1995.
1.23 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.50 ! frystyk 6: ** @(#) $Id: SGML.c,v 1.49 1999/02/18 00:34:52 frystyk Exp $
1.1 timbl 7: **
1.2 timbl 8: ** This module implements an HTStream object. To parse an
1.1 timbl 9: ** SGML file, create this object which is a parser. The object
1.2 timbl 10: ** is (currently) created by being passed a DTD structure,
11: ** and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 12: **
1.19 duns 13: ** 6 Feb 93 Binary seraches used. Intreface modified.
14: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.42 frystyk 15: ** Nov 1996 msa Strip down the parser to minimal HTML tokenizer,
16: ** Stop allocating space for the attribute values,
17: ** use pointers to the string chunk instead.
1.1 timbl 18: */
19:
1.25 frystyk 20: /* Library include files */
1.45 frystyk 21: #include "wwwsys.h"
1.1 timbl 22: #include "HTUtils.h"
1.25 frystyk 23: #include "HTString.h"
1.1 timbl 24: #include "HTChunk.h"
1.20 frystyk 25: #include "SGML.h"
1.1 timbl 26:
1.2 timbl 27: #define INVALID (-1)
28:
1.1 timbl 29: /* The State (context) of the parser
30: **
1.2 timbl 31: ** This is passed with each call to make the parser reentrant
1.1 timbl 32: **
33: */
1.42 frystyk 34: typedef enum _sgml_state
35: {
36: S_text, S_literal, S_tag, S_tag_gap,
37: S_attr, S_attr_gap, S_equals, S_value, S_after_open,
38: S_nl, S_nl_tago,
39: S_ero, S_cro,
1.21 frystyk 40: #ifdef ISO_2022_JP
1.42 frystyk 41: S_esc, S_dollar, S_paren, S_nonascii_text,
1.21 frystyk 42: #endif
1.42 frystyk 43: S_squoted, S_dquoted, S_end, S_entity, S_junk_tag,
1.44 frystyk 44: S_md, S_md_sqs, S_md_dqs, S_com_1, S_com, S_com_2, S_com_2a
1.42 frystyk 45: } sgml_state;
1.21 frystyk 46:
47:
1.2 timbl 48: /* Internal Context Data Structure
49: ** -------------------------------
50: */
1.42 frystyk 51: struct _HTStream
52: {
53: const HTStreamClass *isa; /* inherited from HTStream */
54: const SGML_dtd *dtd;
55: HTStructuredClass *actions; /* target class */
56: HTStructured *target; /* target object */
1.2 timbl 57:
1.42 frystyk 58: HTTag *current_tag;
59: int current_attribute_number;
60: SGMLContent contents; /* current content mode */
61: HTChunk *string;
62: int token; /* ptr into string buffer */
63: sgml_state state;
64: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
65: int value[MAX_ATTRIBUTES]; /* Offset pointers to the string */
66: };
1.2 timbl 67:
68:
69: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
1.42 frystyk 70: #define PUTB(b,l) ((*context->actions->put_block)(context->target, b, l))
1.2 timbl 71:
1.42 frystyk 72: #define TRACE1(f,a) \
1.44 frystyk 73: do {if (SGML_TRACE) HTTrace((f),(a)); } while(0)
1.42 frystyk 74: #define TRACE2(f,a,b) \
1.44 frystyk 75: do {if (SGML_TRACE) HTTrace((f),(a),(b)); } while(0)
1.1 timbl 76:
1.17 timbl 77: /* Find Attribute Number
78: ** ---------------------
79: */
1.40 frystyk 80: PRIVATE int SGMLFindAttribute (HTTag* tag, const char * s)
1.42 frystyk 81: {
1.47 frystyk 82: HTAttr* attributes = tag->attributes;
1.17 timbl 83:
1.42 frystyk 84: int high, low, i, diff; /* Binary search for attribute name */
85: for(low=0, high=tag->number_of_attributes;
86: high > low ;
87: diff < 0 ? (low = i+1) : (high = i) )
88: {
89: i = (low + (high-low)/2);
90: diff = strcasecomp(attributes[i].name, s);
91: if (diff==0)
92: return i; /* success: found it */
93: }
94: return -1;
95: }
1.17 timbl 96:
1.1 timbl 97:
98: /* Handle Attribute
99: ** ----------------
100: */
1.38 frystyk 101: /* PUBLIC const char * SGML_default = ""; ?? */
1.1 timbl 102:
1.38 frystyk 103: PRIVATE void handle_attribute_name (HTStream * context, const char * s)
1.42 frystyk 104: {
105: HTTag * tag = context->current_tag;
1.2 timbl 106:
1.42 frystyk 107: /* Note: if tag==NULL, we are skipping unknown tag... */
108: if (tag)
109: {
110: int i = SGMLFindAttribute(tag, s);
111: if (i >= 0)
112: {
113: context->current_attribute_number = i;
114: context->present[i] = YES;
115: return;
116: }
117: TRACE2("Unknown attribute %s for tag %s\n",
118: s, context->current_tag->name);
119: }
120: context->current_attribute_number = INVALID; /* Invalid */
121: }
1.2 timbl 122:
1.1 timbl 123:
124: /* Handle attribute value
125: ** ----------------------
126: */
1.42 frystyk 127: PRIVATE void handle_attribute_value (HTStream * context)
128: {
129: /* Deal with attributes only if tag is known,
130: ignore silently otherwise */
131:
132: if (context->current_tag)
133: {
134: if (context->current_attribute_number != INVALID)
135: context->value[context->current_attribute_number] =
136: context->token;
1.48 frystyk 137: else {
138: char * data = HTChunk_data(context->string);
139: TRACE1("Attribute value %s ignored\n",
140: data ? data+context->token : "<null>");
141: }
1.42 frystyk 142: }
143: context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 144: }
145:
146: /* Handle entity
147: ** -------------
148: **
149: ** On entry,
150: ** s contains the entity name zero terminated
151: */
1.42 frystyk 152: PRIVATE void handle_entity (HTStream * context)
1.1 timbl 153: {
1.42 frystyk 154: const char ** entities = context->dtd->entity_names;
1.48 frystyk 155: const char *s = HTChunk_data(context->string);
1.1 timbl 156:
1.42 frystyk 157: int high, low, i, diff;
158: for(low=0, high = context->dtd->number_of_entities;
159: high > low ;
160: diff < 0 ? (low = i+1) : (high = i))
161: {
162: i = (low + (high-low)/2);
163: diff = strcmp(entities[i], s); /* Case sensitive! */
164: if (diff==0)
165: { /* success: found it */
166: (*context->actions->put_entity)(context->target, i);
167: return;
168: }
169: }
1.47 frystyk 170:
171: /* If entity string not found */
1.42 frystyk 172: TRACE1("Unknown entity %s\n", s);
1.47 frystyk 173: (*context->actions->unparsed_entity)
1.48 frystyk 174: (context->target, HTChunk_data(context->string), HTChunk_size(context->string));
1.35 frystyk 175: }
1.2 timbl 176:
1.1 timbl 177: /* End element
1.2 timbl 178: ** -----------
1.1 timbl 179: */
1.42 frystyk 180: PRIVATE void end_element (HTStream * context, HTTag *tag)
181: {
182: TRACE1("End </%s>\n", tag->name);
183: (*context->actions->end_element)
184: (context->target, tag - context->dtd->tags);
1.1 timbl 185: }
186:
1.17 timbl 187: /* Start an element
188: ** ----------------
1.1 timbl 189: */
1.31 frystyk 190: PRIVATE void start_element (HTStream * context)
1.42 frystyk 191: {
192: int i;
193: char *value[MAX_ATTRIBUTES];
194: HTTag *tag = context->current_tag;
195:
196: TRACE1("Start <%s>\n", tag->name);
197: context->contents = tag->contents;
198:
199: /*
200: ** Build the actual pointers to the value strings stored in the
201: ** chunk buffer. (Must use offsets while collecting the values,
202: ** because the string chunk may get resized during the collection
203: ** and potentially relocated).
204: */
205: for (i = 0; i < MAX_ATTRIBUTES; ++i)
206: value[i] = context->value[i] < 0 ? NULL :
1.48 frystyk 207: HTChunk_data(context->string) + context->value[i];
1.42 frystyk 208: (*context->actions->start_element)
209: (context->target,
210: tag - context->dtd->tags,
211: context->present,
212: (const char**)value); /* coerce type for think c */
1.1 timbl 213: }
214:
215:
1.2 timbl 216: /* Find Tag in DTD tag list
217: ** ------------------------
1.1 timbl 218: **
219: ** On entry,
1.2 timbl 220: ** dtd points to dtd structire including valid tag list
221: ** string points to name of tag in question
1.1 timbl 222: **
1.2 timbl 223: ** On exit,
224: ** returns:
1.7 timbl 225: ** NULL tag not found
226: ** else address of tag structure in dtd
1.2 timbl 227: */
1.40 frystyk 228: PRIVATE HTTag * SGMLFindTag (const SGML_dtd* dtd, const char * string)
1.42 frystyk 229: {
230: int high, low, i, diff;
231: for(low=0, high=dtd->number_of_tags;
232: high > low ;
233: diff < 0 ? (low = i+1) : (high = i))
234: { /* Binary serach */
235: i = (low + (high-low)/2);
236: diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
237: if (diff==0)
238: /* success: found it */
239: return &dtd->tags[i];
240: }
241: return NULL;
1.2 timbl 242: }
243:
244: /*________________________________________________________________________
245: ** Public Methods
1.1 timbl 246: */
247:
1.2 timbl 248:
249: /* Could check that we are back to bottom of stack! @@ */
1.40 frystyk 250: PRIVATE int SGML_flush (HTStream * context)
1.42 frystyk 251: {
252: return (*context->actions->flush)(context->target);
1.26 frystyk 253: }
1.1 timbl 254:
1.40 frystyk 255: PRIVATE int SGML_free (HTStream * context)
1.42 frystyk 256: {
257: int status;
1.15 frystyk 258:
1.42 frystyk 259: if ((status = (*context->actions->_free)(context->target)) != HT_OK)
260: return status;
261: HTChunk_delete(context->string);
262: HT_FREE(context);
263: return HT_OK;
1.15 frystyk 264: }
1.1 timbl 265:
1.40 frystyk 266: PRIVATE int SGML_abort (HTStream * context, HTList * e)
1.42 frystyk 267: {
268: (*context->actions->abort)(context->target, e);
269: HTChunk_delete(context->string);
270: HT_FREE(context);
271: return HT_ERROR;
1.15 frystyk 272: }
1.1 timbl 273:
1.41 frystyk 274: PRIVATE int SGML_write (HTStream * context, const char * b, int l)
1.42 frystyk 275: {
276: const SGML_dtd *dtd = context->dtd;
277: HTChunk *string = context->string;
278: const char *text = b;
279: int count = 0;
1.18 timbl 280:
1.42 frystyk 281: while (l-- > 0)
282: {
283: char c = *b++;
284: switch(context->state)
285: {
286: got_element_open:
287: /*
288: ** The label is jumped when the '>' of a the element
289: ** start tag has been detected. This DOES NOT FALL TO
290: ** THE CODE S_after_open, only processes the tag and
291: ** sets the state (c should still contain the
292: ** terminating character of the tag ('>'))
293: */
294: if (context->current_tag && context->current_tag->name)
295: start_element(context);
296: context->state = S_after_open;
297: break;
1.18 timbl 298:
1.42 frystyk 299: case S_after_open:
300: /*
301: ** State S_after_open is entered only for single
302: ** character after the element opening tag to test
303: ** against newline. Strip one trainling newline only
304: ** after opening nonempty element. - SGML: Ugh!
305: */
306: text = b;
307: count = 0;
308: if (c == '\n' && (context->contents != SGML_EMPTY))
309: {
310: context->state = S_text;
311: break;
312: }
313: --text;
314: goto S_text;
315:
316: S_text:
317: context->state = S_text;
318: case S_text:
1.13 timbl 319: #ifdef ISO_2022_JP
1.42 frystyk 320: if (c == '\033')
321: {
322: context->state = S_esc;
323: ++count;
324: break;
325: }
1.13 timbl 326: #endif /* ISO_2022_JP */
1.42 frystyk 327: if (c == '&')
328: {
329: if (count > 0)
330: PUTB(text, count);
331: count = 0;
1.48 frystyk 332: HTChunk_clear(string);
1.42 frystyk 333: context->state = S_ero;
334: }
335: else if (c == '<')
336: {
337: if (count > 0)
338: PUTB(text, count);
339: count = 0;
1.48 frystyk 340: HTChunk_clear(string);
1.42 frystyk 341: /* should scrap LITERAL, and use CDATA and
342: RCDATA -- msa */
343: context->state =
344: (context->contents == SGML_LITERAL) ?
345: S_literal : S_tag;
346: }
347: else if (c == '\n')
348: /* Newline - ignore if before end tag! */
349: context->state = S_nl;
350: else
351: ++count;
352: break;
1.13 timbl 353:
1.42 frystyk 354: case S_nl:
355: if (c == '<')
356: {
357: if (count > 0)
358: PUTB(text, count);
359: count = 0;
1.48 frystyk 360: HTChunk_clear(string);
1.42 frystyk 361: context->state =
362: (context->contents == SGML_LITERAL) ?
363: S_literal : S_nl_tago;
364: }
365: else
366: {
367: ++count;
368: goto S_text;
369: }
370: break;
1.18 timbl 371:
1.42 frystyk 372: case S_nl_tago: /* Had newline and tag opener */
373: if (c != '/')
374: PUTC('\n'); /* Only ignore newline before </ */
375: context->state = S_tag;
376: goto handle_S_tag;
1.18 timbl 377:
1.13 timbl 378: #ifdef ISO_2022_JP
1.42 frystyk 379: case S_esc:
380: if (c=='$')
381: context->state = S_dollar;
382: else if (c=='(')
383: context->state = S_paren;
384: else
385: context->state = S_text;
386: ++count;
387: break;
388:
389: case S_dollar:
390: if (c=='@' || c=='B')
391: context->state = S_nonascii_text;
392: else
393: context->state = S_text;
394: ++count;
395: break;
396:
397: case S_paren:
398: if (c=='B' || c=='J')
399: context->state = S_text;
400: else
401: context->state = S_text;
402: ++count;
403: break;
404:
405: case S_nonascii_text:
406: if (c == '\033')
407: context->state = S_esc;
408: ++count;
409: break;
1.13 timbl 410: #endif /* ISO_2022_JP */
1.1 timbl 411:
1.42 frystyk 412: /* In literal mode, waits only for specific end tag!
413: ** Only foir compatibility with old servers.
414: */
415: case S_literal:
416: HTChunk_putc(string, c);
417: if ( TOUPPER(c) !=
1.48 frystyk 418: ((HTChunk_size(string) == 1) ? '/'
419: : context->current_tag->name[HTChunk_size(string)-2]))
1.42 frystyk 420: {
1.1 timbl 421:
1.42 frystyk 422: /* If complete match, end literal */
423: if ((c == '>') &&
1.48 frystyk 424: (!context->current_tag->name[HTChunk_size(string)-2]))
1.42 frystyk 425: {
426: end_element
427: (context,context->current_tag);
428: /*
429: ...setting SGML_MIXED below is a
430: bit of kludge, but a good guess that
431: currently works, anything other than
432: SGML_LITERAL would work... -- msa */
433: context->contents = SGML_MIXED;
434: }
435: else
436: {
437: /* If Mismatch: recover string. */
438: PUTC( '<');
1.48 frystyk 439: PUTB(HTChunk_data(string), HTChunk_size(string));
1.42 frystyk 440: }
441: context->state = S_text;
442: text = b;
443: count = 0;
444: }
445: break;
1.1 timbl 446:
1.42 frystyk 447: /*
448: ** Character reference or Entity
449: */
450: case S_ero:
451: if (c == '#')
452: {
453: /* &# is Char Ref Open */
454: context->state = S_cro;
455: break;
456: }
457: context->state = S_entity;
1.1 timbl 458:
1.42 frystyk 459: /** FALL THROUGH TO S_entity !! ***/
1.18 timbl 460:
1.42 frystyk 461: /*
462: ** Handle Entities
463: */
464: case S_entity:
1.43 frystyk 465: if (isalnum((int) c))
1.42 frystyk 466: HTChunk_putc(string, c);
467: else
468: {
469: HTChunk_terminate(string);
470: handle_entity(context);
471: text = b;
472: count = 0;
473: if (c != ';')
474: {
475: --text;
476: goto S_text;
477: }
478: context->state = S_text;
479: }
480: break;
1.2 timbl 481:
1.42 frystyk 482: /* Character reference
483: */
484: case S_cro:
1.43 frystyk 485: if (isalnum((int)c))
1.42 frystyk 486: /* accumulate a character NUMBER */
487: HTChunk_putc(string, c);
488: else
489: {
490: int value;
491: HTChunk_terminate(string);
1.48 frystyk 492: if (sscanf(HTChunk_data(string), "%d", &value)==1)
1.42 frystyk 493: PUTC((char)value);
494: else
495: {
496: PUTB("&#", 2);
1.48 frystyk 497: PUTB(HTChunk_data(string), HTChunk_size(string)-1);
1.42 frystyk 498: }
499: text = b;
500: count = 0;
501: if (c != ';')
502: {
503: --text;
504: goto S_text;
505: }
506: context->state = S_text;
507: }
508: break;
1.1 timbl 509:
1.42 frystyk 510: case S_tag: /* new tag */
511: handle_S_tag:
1.43 frystyk 512: if (isalnum((int)c))
1.42 frystyk 513: HTChunk_putc(string, c);
1.48 frystyk 514: else { /* End of tag name */
515: int i;
516: if (c == '/') {
517: if (HTChunk_size(string) > 0)
518: TRACE1("`<%s/' found!\n", HTChunk_data(string));
519: context->state = S_end;
520: break;
521: } else if (c == '!') {
522: if (HTChunk_size(string) > 0)
523: TRACE1(" `<%s!' found!\n", HTChunk_data(string));
524: context->state = S_md;
525: break;
526: }
527: HTChunk_terminate(string);
528: context->current_tag = SGMLFindTag(dtd, HTChunk_data(string));
529: if (context->current_tag == NULL) {
530: TRACE1("*** Unknown element %s\n", HTChunk_data(string));
531: (*context->actions->unparsed_begin_element)
532: (context->target, HTChunk_data(string), HTChunk_size(string));
533: } else {
534: for (i=0; i<context->current_tag->number_of_attributes; i++) {
535: context->present[i] = NO;
536: context->value[i] = -1;
1.47 frystyk 537: }
1.42 frystyk 538: }
1.48 frystyk 539: context->token = 0;
540: HTChunk_clear(string);
541: context->current_attribute_number = INVALID;
542: goto S_tag_gap;
543: }
1.42 frystyk 544: break;
545:
546: S_tag_gap:
547: context->state = S_tag_gap;
548: case S_tag_gap: /* Expecting attribute or > */
1.43 frystyk 549: if (isspace((int) c))
1.42 frystyk 550: break; /* Gap between attributes */
551:
552: if (c == '>')
553: goto got_element_open;
554: else
555: goto S_attr;
556:
557: S_attr:
558: /*
559: ** Start collecting the attribute name and collect
560: ** it in S_attr.
561: */
562: context->state = S_attr;
1.48 frystyk 563: HTChunk_truncate(string, context->token);
1.42 frystyk 564: case S_attr:
1.43 frystyk 565: if (isspace((int) c) || c == '>' || c == '=')
1.42 frystyk 566: goto got_attribute_name;
567: else
568: HTChunk_putc(string, c);
569: break;
570:
571: got_attribute_name:
572: /*
573: ** This label is entered when attribute name has been
574: ** collected. Process it and enter S_attr_gap for
575: ** potential value or start of the next attribute.
576: */
577: HTChunk_terminate(string) ;
578: handle_attribute_name
1.48 frystyk 579: (context, HTChunk_data(string) + context->token);
580: HTChunk_truncate(string, context->token);
1.42 frystyk 581: context->state = S_attr_gap;
582: case S_attr_gap: /* Expecting attribute or = or > */
1.43 frystyk 583: if (isspace((int) c))
1.42 frystyk 584: break; /* Gap after attribute */
585:
586: if (c == '>')
587: goto got_element_open;
588: else if (c == '=')
589: context->state = S_equals;
590: else
591: goto S_attr; /* Get next attribute */
592: break;
593:
594: case S_equals: /* After attr = */
1.43 frystyk 595: if (isspace((int) c))
1.42 frystyk 596: break; /* Before attribute value */
597:
598: if (c == '>')
599: { /* End of tag */
600: TRACE1("found = but no value\n", NULL);
601: goto got_element_open;
602: }
603: else if (c == '\'')
604: context->state = S_squoted;
605: else if (c == '"')
606: context->state = S_dquoted;
607: else
608: goto S_value;
609: break;
610:
611: S_value:
612: context->state = S_value;
1.48 frystyk 613: HTChunk_truncate(string, context->token);
1.42 frystyk 614: case S_value:
1.43 frystyk 615: if (isspace((int) c) || c == '>')
1.42 frystyk 616: {
617: HTChunk_terminate(string);
618: handle_attribute_value(context);
1.48 frystyk 619: context->token = HTChunk_size(string);
1.42 frystyk 620: goto S_tag_gap;
621: }
622: else
623: HTChunk_putc(string, c);
624: break;
1.1 timbl 625:
1.42 frystyk 626: case S_squoted: /* Quoted attribute value */
627: if (c == '\'')
628: {
629: HTChunk_terminate(string);
630: handle_attribute_value(context);
1.48 frystyk 631: context->token = HTChunk_size(string);
1.42 frystyk 632: context->state = S_tag_gap;
633: }
634: else if (c && c != '\n' && c != '\r')
635: HTChunk_putc(string, c);
636: break;
1.1 timbl 637:
1.42 frystyk 638: case S_dquoted: /* Quoted attribute value */
639: if (c == '"')
640: {
641: HTChunk_terminate(string);
642: handle_attribute_value(context);
1.48 frystyk 643: context->token = HTChunk_size(string);
1.42 frystyk 644: context->state = S_tag_gap;
645: }
646: else if (c && c != '\n' && c != '\r')
647: HTChunk_putc(string, c);
648: break;
1.2 timbl 649:
1.42 frystyk 650: case S_end: /* </ */
1.43 frystyk 651: if (isalnum((int) c))
1.42 frystyk 652: HTChunk_putc(string, c);
653: else
654: { /* End of end tag name */
655: HTTag *t;
1.48 frystyk 656: char * first;
1.42 frystyk 657: HTChunk_terminate(string);
1.48 frystyk 658: if ((first=HTChunk_data(string))!=NULL && *first != '\0')
659: t = SGMLFindTag(dtd, HTChunk_data(string));
1.42 frystyk 660: else
661: /* Empty end tag */
662: /* Original code popped here one
663: from the stack. If this feature
664: is required, I have to put the
665: stack back... -- msa */
666: t = NULL;
1.47 frystyk 667: if (!t) {
1.48 frystyk 668: TRACE1("Unknown end tag </%s>\n", HTChunk_data(string));
1.47 frystyk 669: (*context->actions->unparsed_end_element)
1.48 frystyk 670: (context->target, HTChunk_data(string), HTChunk_size(string));
1.47 frystyk 671: } else {
672: context->current_tag = NULL;
673: end_element(context, t);
674: }
1.48 frystyk 675: HTChunk_clear(string);
1.42 frystyk 676: context->current_attribute_number = INVALID;
677: if (c != '>')
678: {
1.43 frystyk 679: if (!isspace((int) c))
1.42 frystyk 680: TRACE2("`</%s%c' found!\n",
1.48 frystyk 681: HTChunk_data(string), c);
1.42 frystyk 682: context->state = S_junk_tag;
683: }
684: else
685: {
686: text = b;
687: count = 0;
688: context->state = S_text;
689: }
690: }
691: break;
692:
693: case S_junk_tag:
694: if (c == '>')
695: {
696: text = b;
697: count = 0;
698: context->state = S_text;
699: }
700: break;
701:
702: /*
703: ** Scanning (actually skipping) declarations
704: */
705: case S_md:
706: if (c == '-')
707: context->state = S_com_1;
708: else if (c == '"')
709: context->state = S_md_dqs;
710: else if (c == '\'')
711: context->state = S_md_sqs;
712: else if (c == '>')
713: {
714: text = b;
715: count = 0;
716: context->state = S_text;
717: }
718: break;
719:
720: case S_md_dqs: /* Skip double quoted string */
721: if (c == '"')
722: context->state = S_md;
1.46 frystyk 723: else if (c == '>')
724: {
725: text = b;
726: count = 0;
727: context->state = S_text;
728: }
1.42 frystyk 729: break;
730:
731: case S_md_sqs: /* Skip single quoted string */
732: if (c == '\'')
733: context->state = S_md;
1.46 frystyk 734: else if (c == '>')
735: {
736: text = b;
737: count = 0;
738: context->state = S_text;
739: }
1.42 frystyk 740: break;
741:
742: case S_com_1: /* Starting a comment? */
743: context->state = (c == '-') ? S_com : S_md;
1.46 frystyk 744: if (c == '>')
745: {
746: text = b;
747: count = 0;
748: context->state = S_text;
749: }
1.42 frystyk 750: break;
751:
752: case S_com: /* ..within comment */
753: if (c == '-')
754: context->state = S_com_2;
755: break;
756:
757: case S_com_2: /* Ending a comment ? */
1.44 frystyk 758: context->state = (c == '-') ? S_com_2a : S_com;
759: break;
760:
761: case S_com_2a:
762: if (c == '>') {
763: text = b;
764: count = 0;
765: context->state = S_text;
766: } else
767: context->state = S_com;
1.42 frystyk 768: break;
769: }
1.7 timbl 770: }
1.42 frystyk 771: if (count > 0)
772: PUTB(text, count);
773: return HT_OK;
774: }
1.1 timbl 775:
1.2 timbl 776:
1.40 frystyk 777: PRIVATE int SGML_string (HTStream * context, const char* s)
1.42 frystyk 778: {
779: return SGML_write(context, s, (int) strlen(s));
780: }
1.2 timbl 781:
782:
1.41 frystyk 783: PRIVATE int SGML_character (HTStream * context, char c)
1.42 frystyk 784: {
785: return SGML_write(context, &c, 1);
786: }
1.2 timbl 787:
788: /*_______________________________________________________________________
789: */
790:
791: /* Structured Object Class
792: ** -----------------------
793: */
1.38 frystyk 794: PRIVATE const HTStreamClass SGMLParser =
1.47 frystyk 795: {
796: "SGML",
797: SGML_flush,
798: SGML_free,
799: SGML_abort,
800: SGML_character,
801: SGML_string,
802: SGML_write
803: };
1.2 timbl 804:
805: /* Create SGML Engine
806: ** ------------------
807: **
808: ** On entry,
809: ** dtd represents the DTD, along with
810: ** actions is the sink for the data as a set of routines.
811: **
812: */
1.42 frystyk 813: PUBLIC HTStream *SGML_new(const SGML_dtd * dtd, HTStructured * target)
1.47 frystyk 814: {
815: int i;
816: HTStream* context;
817: if ((context = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
818: HT_OUTOFMEM("SGML_begin");
819:
820: context->isa = &SGMLParser;
821: context->string = HTChunk_new(128); /* Grow by this much */
822: context->dtd = dtd;
823: context->target = target;
824: context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
825: /* Ugh: no OO */
826: context->state = S_text;
827: for(i=0; i<MAX_ATTRIBUTES; i++)
828: context->value[i] = 0;
829: return context;
830: }
831:
832: PUBLIC HTTag * SGML_findTag (SGML_dtd * dtd, int element_number)
833: {
834: return (dtd && element_number>=0 && element_number<dtd->number_of_tags) ?
835: (dtd->tags+element_number) : NULL;
836: }
837:
838: PUBLIC char * SGML_findTagName (SGML_dtd * dtd, int element_number)
839: {
840: return (dtd && element_number>=0 && element_number<dtd->number_of_tags) ?
841: (dtd->tags+element_number)->name : NULL;
842: }
843:
844: PUBLIC SGMLContent SGML_findTagContents (SGML_dtd * dtd, int element_number)
845: {
846: return (dtd && element_number>=0 && element_number<dtd->number_of_tags) ?
847: (dtd->tags+element_number)->contents : SGML_ELEMENT;
848: }
849:
1.50 ! frystyk 850: PUBLIC int SGML_findElementNumber (SGML_dtd * dtd, char * name_element)
! 851: {
! 852: if (dtd && name_element) {
! 853: int i;
! 854: HTTag *ct;
! 855: for (i = 0; i< dtd->number_of_tags; i++) {
! 856: ct = &(dtd->tags[i]);
! 857: if (!strcasecomp(ct->name,name_element))
! 858: return i;
! 859: }
! 860: }
! 861: return -1;
! 862: }
! 863:
1.47 frystyk 864: PUBLIC char * HTTag_name (HTTag * tag)
865: {
866: return tag ? tag->name : NULL;
1.49 frystyk 867: }
868:
869: PUBLIC SGMLContent HTTag_content (HTTag * tag)
870: {
871: return tag ? tag->contents : SGML_EMPTY;
1.47 frystyk 872: }
873:
874: PUBLIC int HTTag_attributes (HTTag * tag)
875: {
876: return tag ? tag->number_of_attributes : -1;
877: }
878:
879: PUBLIC char * HTTag_attributeName (HTTag * tag, int attribute_number)
880: {
881: return (tag && attribute_number>=0 && attribute_number<tag->number_of_attributes) ?
882: (tag->attributes+attribute_number)->name : NULL;
883: }
884:
885: PUBLIC char * HTAttr_name (HTAttr * attr)
886: {
887: return attr ? attr->name : NULL;
888: }
1.50 ! frystyk 889:
! 890:
! 891:
Webmaster