Annotation of libwww/Library/src/HTML.c, revision 1.77
1.39 frystyk 1: /* HTML.c
1.75 frystyk 2: ** SIMPLE HTML PARSER WITHOUT ANY PRESENTATION CODE
1.39 frystyk 3: **
1.43 frystyk 4: ** (c) COPYRIGHT MIT 1995.
1.39 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.77 ! frystyk 6: ** @(#) $Id: HTML.c,v 1.76 1999/01/22 13:54:17 frystyk Exp $
1.1 timbl 7: **
1.2 timbl 8: ** This generates of a hypertext object. It converts from the
1.75 frystyk 9: ** structured stream interface foo HTML events into the style-
10: ** oriented interface of the HText interface.
1.1 timbl 11: **
1.35 duns 12: ** HISTORY:
13: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.1 timbl 14: */
1.16 timbl 15:
1.41 frystyk 16: /* Library include files */
1.72 frystyk 17: #include "wwwsys.h"
1.63 frystyk 18: #include "WWWUtil.h"
19: #include "WWWCore.h"
20: #include "WWWHTML.h"
1.73 frystyk 21: #include "HTML.h"
1.75 frystyk 22: #include "HTextImp.h"
1.73 frystyk 23:
1.75 frystyk 24: #define PUTC(t,c) (*(t)->target->isa->put_character)((t)->target, (c))
25: #define PUTS(t,s) (*(t)->target->isa->put_string)((t)->target, (s))
26: #define PUTB(s,b,l) (*(t)->target->isa->put_block)((t)->target, (b), (l))
27: #define FLUSH_TARGET(t) (*(t)->target->isa->flush)((t)->target)
28: #define FREE_TARGET(t) (*(t)->target->isa->_free)((t)->target)
29: #define ABORT_TARGET(t) (*(t)->target->isa->abort)((t)->target, e)
1.1 timbl 30:
1.75 frystyk 31: #define MAX_NESTING 40
1.1 timbl 32:
1.75 frystyk 33: struct _HTStream {
34: const HTStreamClass * isa;
35: /* .... */
36: };
1.2 timbl 37:
38: struct _HTStructured {
1.60 frystyk 39: const HTStructuredClass * isa;
1.54 frystyk 40: HTRequest * request;
1.2 timbl 41: HTParentAnchor * node_anchor;
1.75 frystyk 42: HTextImp * text;
43: HTStream * target;
44: HTChunk * title;
45: BOOL in_word;
46: SGML_dtd * dtd;
1.2 timbl 47: char * comment_start; /* for literate programming */
48: char * comment_end;
1.75 frystyk 49: BOOL started;
1.1 timbl 50:
1.75 frystyk 51: int overflow;
52: int * sp;
53: int stack[MAX_NESTING];
1.2 timbl 54: };
1.1 timbl 55:
1.75 frystyk 56: /*
57: ** Entity values -- for ISO Latin 1 local representation
1.2 timbl 58: ** This MUST match exactly the table referred to in the DTD!
59: */
1.75 frystyk 60: #define ENTITY_SIZE 67
61: static char * ISO_Latin1[ENTITY_SIZE] = {
1.2 timbl 62: "\306", /* capital AE diphthong (ligature) */
63: "\301", /* capital A, acute accent */
64: "\302", /* capital A, circumflex accent */
65: "\300", /* capital A, grave accent */
66: "\305", /* capital A, ring */
67: "\303", /* capital A, tilde */
68: "\304", /* capital A, dieresis or umlaut mark */
69: "\307", /* capital C, cedilla */
70: "\320", /* capital Eth, Icelandic */
71: "\311", /* capital E, acute accent */
72: "\312", /* capital E, circumflex accent */
73: "\310", /* capital E, grave accent */
74: "\313", /* capital E, dieresis or umlaut mark */
75: "\315", /* capital I, acute accent */
76: "\316", /* capital I, circumflex accent */
77: "\314", /* capital I, grave accent */
78: "\317", /* capital I, dieresis or umlaut mark */
79: "\321", /* capital N, tilde */
80: "\323", /* capital O, acute accent */
81: "\324", /* capital O, circumflex accent */
82: "\322", /* capital O, grave accent */
83: "\330", /* capital O, slash */
84: "\325", /* capital O, tilde */
85: "\326", /* capital O, dieresis or umlaut mark */
86: "\336", /* capital THORN, Icelandic */
87: "\332", /* capital U, acute accent */
88: "\333", /* capital U, circumflex accent */
89: "\331", /* capital U, grave accent */
90: "\334", /* capital U, dieresis or umlaut mark */
91: "\335", /* capital Y, acute accent */
92: "\341", /* small a, acute accent */
93: "\342", /* small a, circumflex accent */
94: "\346", /* small ae diphthong (ligature) */
95: "\340", /* small a, grave accent */
96: "\046", /* ampersand */
97: "\345", /* small a, ring */
98: "\343", /* small a, tilde */
99: "\344", /* small a, dieresis or umlaut mark */
100: "\347", /* small c, cedilla */
101: "\351", /* small e, acute accent */
102: "\352", /* small e, circumflex accent */
103: "\350", /* small e, grave accent */
104: "\360", /* small eth, Icelandic */
105: "\353", /* small e, dieresis or umlaut mark */
106: "\076", /* greater than */
107: "\355", /* small i, acute accent */
108: "\356", /* small i, circumflex accent */
109: "\354", /* small i, grave accent */
110: "\357", /* small i, dieresis or umlaut mark */
111: "\074", /* less than */
1.62 frystyk 112: "\040", /* non-breaking space */
1.2 timbl 113: "\361", /* small n, tilde */
114: "\363", /* small o, acute accent */
115: "\364", /* small o, circumflex accent */
116: "\362", /* small o, grave accent */
117: "\370", /* small o, slash */
118: "\365", /* small o, tilde */
119: "\366", /* small o, dieresis or umlaut mark */
1.36 frystyk 120: "\042", /* double quote sign - June 94 */
1.2 timbl 121: "\337", /* small sharp s, German (sz ligature) */
122: "\376", /* small thorn, Icelandic */
123: "\372", /* small u, acute accent */
124: "\373", /* small u, circumflex accent */
125: "\371", /* small u, grave accent */
126: "\374", /* small u, dieresis or umlaut mark */
127: "\375", /* small y, acute accent */
128: "\377", /* small y, dieresis or umlaut mark */
1.1 timbl 129: };
130:
1.75 frystyk 131: PRIVATE char ** CurrentEntityValues = ISO_Latin1;
1.2 timbl 132:
1.75 frystyk 133: PUBLIC BOOL HTMLUseCharacterSet (HTMLCharacterSet i)
1.1 timbl 134: {
1.75 frystyk 135: if (i == HTML_ISO_LATIN1) {
136: CurrentEntityValues = ISO_Latin1;
137: return YES;
1.1 timbl 138: } else {
1.75 frystyk 139: if (SGML_TRACE) HTTrace("HTML Parser. Doesn't support this character set\n");
140: return NO;
1.1 timbl 141: }
142: }
143:
1.75 frystyk 144: PRIVATE int HTML_write (HTStructured * me, const char * b, int l)
1.1 timbl 145: {
1.75 frystyk 146: if (!me->started) {
147: HTextImp_build(me->text, HTEXT_BEGIN);
148: me->started = YES;
1.1 timbl 149: }
150:
1.75 frystyk 151: /* Look at what we got */
152: switch (me->sp[0]) {
1.2 timbl 153:
1.75 frystyk 154: case HTML_COMMENT:
155: break; /* Do Nothing */
156:
157: case HTML_TITLE:
158: HTChunk_putb(me->title, b, l);
159: /* Fall through */
1.71 frystyk 160:
1.75 frystyk 161: default:
162: HTextImp_addText(me->text, b, l);
1.71 frystyk 163: }
1.42 frystyk 164: return HT_OK;
1.1 timbl 165: }
166:
1.71 frystyk 167: PRIVATE int HTML_put_character (HTStructured * me, char c)
1.1 timbl 168: {
1.71 frystyk 169: return HTML_write(me, &c, sizeof(char));
1.1 timbl 170: }
171:
1.64 frystyk 172: PRIVATE int HTML_put_string (HTStructured * me, const char* s)
1.1 timbl 173: {
1.71 frystyk 174: return HTML_write(me, s, (int) strlen(s));
1.1 timbl 175: }
1.2 timbl 176:
1.75 frystyk 177: PRIVATE void HTML_start_element (HTStructured * me,
178: int element_number,
179: const BOOL * present,
180: const char ** value)
181: {
182: HTChildAnchor * address = NULL;
183: if (!me->started) {
184: HTextImp_build(me->text, HTEXT_BEGIN);
185: me->started = YES;
1.73 frystyk 186: }
187:
1.75 frystyk 188: /* Look at what element was started */
189: switch (element_number) {
1.2 timbl 190: case HTML_A:
1.75 frystyk 191: if (present[HTML_A_HREF] && value[HTML_A_HREF]) {
192: address = HTAnchor_findChildAndLink(
193: me->node_anchor, /* parent */
194: present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL, /* Tag */
195: value[HTML_A_HREF], /* Addresss */
196: present[HTML_A_REL] && value[HTML_A_REL] ?
197: (HTLinkType) HTAtom_caseFor(value[HTML_A_REL]) : NULL);
1.2 timbl 198:
1.75 frystyk 199: if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
200: HTLink * link = HTAnchor_mainLink((HTAnchor *) address);
201: HTParentAnchor * dest = HTAnchor_parent(HTLink_destination(link));
202: if (!HTAnchor_title(dest)) HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
203: }
204: HTextImp_foundLink(me->text, element_number, HTML_A_HREF,
205: address, present, value);
206: if (SGML_TRACE)
207: HTTrace("HTML Parser. Anchor `%s\'\n", value[HTML_A_HREF]);
208: }
209: break;
210:
211: case HTML_AREA:
212: if (present[HTML_AREA_HREF] && value[HTML_AREA_HREF]) {
213: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
214: value[HTML_AREA_HREF], NULL);
215: HTextImp_foundLink(me->text, element_number, HTML_AREA_HREF,
216: address, present, value);
217: if (SGML_TRACE)
218: HTTrace("HTML Parser. Image map area `%s\'\n", value[HTML_AREA_HREF]);
1.2 timbl 219: }
1.75 frystyk 220: break;
221:
222: case HTML_BASE:
223: if (present[HTML_BASE_HREF] && value[HTML_BASE_HREF]) {
224: HTAnchor_setBase(me->node_anchor, (char *) value[HTML_BASE_HREF]);
225: if (SGML_TRACE)
226: HTTrace("HTML Parser. New base `%s\'\n", value[HTML_BASE_HREF]);
227: }
228: break;
229:
230: case HTML_BODY:
231: if (present[HTML_BODY_BACKGROUND] && value[HTML_BODY_BACKGROUND]) {
232: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
233: value[HTML_BODY_BACKGROUND], NULL);
234: HTextImp_foundLink(me->text, element_number, HTML_BODY_BACKGROUND,
235: address, present, value);
236: if (SGML_TRACE)
237: HTTrace("HTML Parser. Background `%s\'\n", value[HTML_BODY_BACKGROUND]);
238: }
239: break;
240:
1.77 ! frystyk 241: case HTML_FORM:
! 242: if (present[HTML_FORM_ACTION] && value[HTML_FORM_ACTION]) {
! 243: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
! 244: value[HTML_FORM_ACTION], NULL);
! 245: HTextImp_foundLink(me->text, element_number, HTML_FORM_ACTION,
! 246: address, present, value);
! 247: }
! 248: break;
! 249:
1.75 frystyk 250: case HTML_FRAME:
251: if (present[HTML_FRAME_SRC] && value[HTML_FRAME_SRC]) {
252: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
253: value[HTML_FRAME_SRC], NULL);
254: HTextImp_foundLink(me->text, element_number, HTML_FRAME_SRC,
255: address, present, value);
256: if (SGML_TRACE)
257: HTTrace("HTML Parser. Frame `%s\'\n", value[HTML_FRAME_SRC]);
258: }
259: break;
260:
1.77 ! frystyk 261: case HTML_INPUT:
! 262: if (present[HTML_INPUT_SRC] && value[HTML_INPUT_SRC]) {
! 263: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
! 264: value[HTML_INPUT_SRC], NULL);
! 265: HTextImp_foundLink(me->text, element_number, HTML_INPUT_SRC,
! 266: address, present, value);
! 267: }
! 268: break;
! 269:
1.75 frystyk 270: case HTML_IMG:
271: if (present[HTML_IMG_SRC] && value[HTML_IMG_SRC]) {
272: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
273: value[HTML_IMG_SRC], NULL);
274: HTextImp_foundLink(me->text, element_number, HTML_IMG_SRC,
275: address, present, value);
276: }
277: break;
278:
279: case HTML_ISINDEX:
280: HTAnchor_setIndex(me->node_anchor);
281: break;
1.2 timbl 282:
1.63 frystyk 283: case HTML_LINK:
1.69 frystyk 284: if (present[HTML_LINK_HREF] && value[HTML_LINK_HREF]) {
1.75 frystyk 285: HTParentAnchor * dest = NULL;
286: address = HTAnchor_findChildAndLink(
1.73 frystyk 287: me->node_anchor, /* parent */
288: present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL, /* Tag */
289: present[HTML_A_HREF] ? value[HTML_A_HREF] : NULL, /* Addresss */
290: NULL); /* Rels */
1.75 frystyk 291: dest = HTAnchor_parent(HTAnchor_followMainLink((HTAnchor *) address));
1.69 frystyk 292:
293: /* If forward reference */
294: if ((present[HTML_LINK_REL] && value[HTML_LINK_REL])) {
295: char * strval = NULL;
296: char * ptr = NULL;
297: char * relation = NULL;
298: StrAllocCopy(strval, value[HTML_LINK_REL]);
299: ptr = strval;
300: while ((relation = HTNextLWSToken(&ptr)) != NULL) {
301: HTLink_add((HTAnchor *) me->node_anchor, (HTAnchor *) dest,
302: (HTLinkType) HTAtom_caseFor(relation),
303: METHOD_INVALID);
304: }
305: HT_FREE(strval);
306: }
307:
308: /* If reverse reference */
309: if ((present[HTML_LINK_REV] && value[HTML_LINK_REV])) {
310: char * strval = NULL;
311: char * ptr = NULL;
312: char * relation = NULL;
313: StrAllocCopy(strval, value[HTML_LINK_REV]);
314: ptr = strval;
315: while ((relation = HTNextLWSToken(&ptr)) != NULL) {
316: HTLink_add((HTAnchor *) dest, (HTAnchor *) me->node_anchor,
317: (HTLinkType) HTAtom_caseFor(relation),
318: METHOD_INVALID);
319: }
320: HT_FREE(strval);
321: }
1.63 frystyk 322:
1.69 frystyk 323: /* If we got any type information as well */
324: if (present[HTML_LINK_TYPE] && value[HTML_LINK_TYPE]) {
325: if (HTAnchor_format(dest) == WWW_UNKNOWN)
326: HTAnchor_setFormat(dest,
327: (HTFormat) HTAtom_caseFor(value[HTML_LINK_TYPE]));
328: }
1.63 frystyk 329:
1.75 frystyk 330: /* Call out to the layout engine */
331: HTextImp_foundLink(me->text, element_number, HTML_LINK_HREF,
332: address, present, value);
1.70 frystyk 333: }
1.75 frystyk 334: break;
1.70 frystyk 335:
336: case HTML_META:
337: if (present[HTML_META_NAME] && value[HTML_META_NAME]) {
338: HTAnchor_addMeta (me->node_anchor,
339: value[HTML_META_NAME],
340: (present[HTML_META_CONTENT] && value[HTML_META_CONTENT]) ?
341: value[HTML_META_CONTENT] : "");
1.69 frystyk 342: }
1.75 frystyk 343: break;
344:
345: case HTML_OBJECT:
346: if (present[HTML_OBJECT_CLASSID] && value[HTML_OBJECT_CLASSID]) {
347: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
348: value[HTML_OBJECT_CLASSID], NULL);
349: HTextImp_foundLink(me->text, element_number, HTML_OBJECT_CLASSID,
350: address, present, value);
351: }
352:
353: if (present[HTML_OBJECT_CODEBASE] && value[HTML_OBJECT_CODEBASE]) {
354: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
355: value[HTML_OBJECT_CODEBASE], NULL);
356: HTextImp_foundLink(me->text, element_number, HTML_OBJECT_CODEBASE,
357: address, present, value);
358: }
359:
360: if (present[HTML_OBJECT_DATA] && value[HTML_OBJECT_DATA]) {
361: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
362: value[HTML_OBJECT_DATA], NULL);
363: HTextImp_foundLink(me->text, element_number, HTML_OBJECT_DATA,
364: address, present, value);
365: }
1.63 frystyk 366:
1.75 frystyk 367: if (present[HTML_OBJECT_ARCHIVE] && value[HTML_OBJECT_ARCHIVE]) {
368: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
369: value[HTML_OBJECT_ARCHIVE], NULL);
370: HTextImp_foundLink(me->text, element_number, HTML_OBJECT_ARCHIVE,
371: address, present, value);
372: }
1.2 timbl 373:
1.75 frystyk 374: if (present[HTML_OBJECT_USEMAP] && value[HTML_OBJECT_USEMAP]) {
375: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
376: value[HTML_OBJECT_USEMAP], NULL);
377: HTextImp_foundLink(me->text, element_number, HTML_OBJECT_USEMAP,
378: address, present, value);
1.2 timbl 379: }
380: break;
381:
382: case HTML_PRE:
1.4 timbl 383: if (me->comment_end)
1.75 frystyk 384: HTextImp_addText(me->text, me->comment_end, strlen(me->comment_end));
1.2 timbl 385: break;
1.11 timbl 386:
1.75 frystyk 387: case HTML_TITLE:
388: HTChunk_clear(me->title);
1.2 timbl 389: break;
1.75 frystyk 390: }
1.2 timbl 391:
1.75 frystyk 392: /* Update our parse stack */
393: if (SGML_findTagContents(me->dtd, element_number) != SGML_EMPTY) {
1.13 timbl 394: if (me->sp == me->stack) {
1.44 frystyk 395: if (SGML_TRACE)
1.75 frystyk 396: HTTrace("HTML Parser. Maximum nesting of %d exceded!\n", MAX_NESTING);
1.44 frystyk 397: me->overflow++;
1.12 timbl 398: return;
399: }
1.4 timbl 400: --(me->sp);
1.75 frystyk 401: me->sp[0] = element_number;
1.10 timbl 402: }
1.75 frystyk 403:
404: /* Call out to the layout engine */
405: HTextImp_beginElement(me->text, element_number, present, value);
1.1 timbl 406: }
1.10 timbl 407:
1.53 frystyk 408: PRIVATE void HTML_end_element (HTStructured * me, int element_number)
1.1 timbl 409: {
1.75 frystyk 410: if (!me->started) {
411: HTextImp_build(me->text, HTEXT_BEGIN);
412: me->started = YES;
1.1 timbl 413: }
1.44 frystyk 414:
1.75 frystyk 415: /* Update our parse stack */
1.44 frystyk 416: if (me->overflow > 0) {
417: me->overflow--;
418: return;
419: }
1.75 frystyk 420: me->sp++;
1.67 frystyk 421: if (me->sp > me->stack + MAX_NESTING - 1) {
1.75 frystyk 422: if (SGML_TRACE) HTTrace("HTML Parser. Bottom of parse stack reached\n");
1.67 frystyk 423: me->sp = me->stack + MAX_NESTING - 1;
424: }
1.44 frystyk 425:
1.75 frystyk 426: /* Look at what element was closed */
1.2 timbl 427: switch(element_number) {
428: case HTML_TITLE:
1.56 frystyk 429: HTAnchor_setTitle(me->node_anchor, HTChunk_data(me->title));
1.2 timbl 430: break;
431:
432: case HTML_PRE:
1.4 timbl 433: if (me->comment_start)
1.75 frystyk 434: HTextImp_addText(me->text, me->comment_start, strlen(me->comment_start));
435: break;
436: }
1.44 frystyk 437:
1.75 frystyk 438: /* Call out to the layout engine */
439: HTextImp_endElement(me->text, element_number);
1.1 timbl 440: }
441:
1.53 frystyk 442: PRIVATE void HTML_put_entity (HTStructured * me, int entity_number)
1.1 timbl 443: {
1.75 frystyk 444: if (!me->started) {
445: HTextImp_build(me->text, HTEXT_BEGIN);
446: me->started = YES;
447: }
448: if (entity_number>=0 && entity_number<ENTITY_SIZE)
449: HTML_put_string(me, *(CurrentEntityValues+entity_number));
1.1 timbl 450: }
1.2 timbl 451:
1.53 frystyk 452: PUBLIC int HTML_flush (HTStructured * me)
1.42 frystyk 453: {
1.75 frystyk 454: if (!me->started) {
455: HTextImp_build(me->text, HTEXT_BEGIN);
456: me->started = YES;
457: }
458: if (me->comment_end) HTML_put_string(me, me->comment_end);
459: return me->target ? FLUSH_TARGET(me) : HT_OK;
460: }
461:
462: PRIVATE int HTML_unparsedBeginElement (HTStructured * me, const char * b, int l)
463: {
464: if (!me->started) {
465: HTextImp_build(me->text, HTEXT_BEGIN);
466: me->started = YES;
467: }
468: HTextImp_unparsedBeginElement(me->text, b, l);
469: return HT_OK;
1.42 frystyk 470: }
1.2 timbl 471:
1.75 frystyk 472: PRIVATE int HTML_unparsedEndElement (HTStructured * me, const char * b, int l)
1.1 timbl 473: {
1.75 frystyk 474: if (!me->started) {
475: HTextImp_build(me->text, HTEXT_BEGIN);
476: me->started = YES;
477: }
478: HTextImp_unparsedEndElement(me->text, b, l);
479: return HT_OK;
480: }
1.4 timbl 481:
1.75 frystyk 482: PRIVATE int HTML_unparsedEntity (HTStructured * me, const char * b, int l)
483: {
484: if (!me->started) {
485: HTextImp_build(me->text, HTEXT_BEGIN);
486: me->started = YES;
1.2 timbl 487: }
1.75 frystyk 488: HTextImp_unparsedEntity(me->text, b, l);
489: return HT_OK;
490: }
491:
492: PUBLIC int HTML_free (HTStructured * me)
493: {
494: if (!me->started) HTextImp_build(me->text, HTEXT_BEGIN);
495: if (me->comment_end) HTML_put_string(me, me->comment_end);
496: HTextImp_build(me->text, HTEXT_END);
1.76 frystyk 497: HTextImp_delete(me->text);
1.56 frystyk 498: HTChunk_delete(me->title);
1.75 frystyk 499: if (me->target) FREE_TARGET(me);
1.58 frystyk 500: HT_FREE(me);
1.42 frystyk 501: return HT_OK;
1.1 timbl 502: }
503:
1.53 frystyk 504: PRIVATE int HTML_abort (HTStructured * me, HTList * e)
1.14 timbl 505: {
1.75 frystyk 506: if (!me->started) HTextImp_build(me->text, HTEXT_BEGIN);
507: HTextImp_build(me->text, HTEXT_ABORT);
1.76 frystyk 508: HTextImp_delete(me->text);
1.56 frystyk 509: HTChunk_delete(me->title);
1.75 frystyk 510: if (me->target) ABORT_TARGET(me);
1.58 frystyk 511: HT_FREE(me);
1.42 frystyk 512: return HT_ERROR;
1.1 timbl 513: }
514:
1.2 timbl 515: /* Structured Object Class
516: ** -----------------------
517: */
1.60 frystyk 518: PRIVATE const HTStructuredClass HTMLPresentation = /* As opposed to print etc */
1.2 timbl 519: {
1.75 frystyk 520: "text/html",
521: HTML_flush,
522: HTML_free,
523: HTML_abort,
524: HTML_put_character,
525: HTML_put_string,
526: HTML_write,
527: HTML_start_element,
528: HTML_end_element,
529: HTML_put_entity,
530: HTML_unparsedBeginElement,
531: HTML_unparsedEndElement,
532: HTML_unparsedEntity
533: };
1.4 timbl 534:
1.75 frystyk 535: /* Structured Text object
536: ** ----------------------
1.2 timbl 537: **
1.16 timbl 538: ** The structured stream can generate either presentation,
1.4 timbl 539: ** or plain text, or HTML.
1.1 timbl 540: */
1.75 frystyk 541: PRIVATE HTStructured * HTML_new (HTRequest * request,
542: void * param,
543: HTFormat input_format,
544: HTFormat output_format,
545: HTStream * output_stream)
546: {
547: HTStructured * me = NULL;
548: if (request) {
549: if ((me = (HTStructured *) HT_CALLOC(1, sizeof(HTStructured))) == NULL)
550: HT_OUTOFMEM("HTML_new");
551: me->isa = &HTMLPresentation;
552: me->dtd = HTML_dtd();
553: me->request = request;
554: me->node_anchor = HTRequest_anchor(request);
555: me->title = HTChunk_new(128);
556: me->comment_start = NULL;
557: me->comment_end = NULL;
558: me->target = output_stream;
559: me->sp = me->stack + MAX_NESTING - 1;
560:
561: /* Create the text object */
562: me->text = HTextImp_new(me->request, me->node_anchor, me->target);
563: }
564: return me;
1.1 timbl 565: }
566:
1.2 timbl 567: /* HTConverter for HTML to plain text
568: ** ----------------------------------
1.1 timbl 569: **
1.2 timbl 570: ** This will convert from HTML to presentation or plain text.
1.1 timbl 571: */
1.75 frystyk 572: PUBLIC HTStream * HTMLToPlain (HTRequest * request,
573: void * param,
574: HTFormat input_format,
575: HTFormat output_format,
576: HTStream * output_stream)
1.1 timbl 577: {
1.75 frystyk 578: return SGML_new(HTML_dtd(), HTML_new(
1.16 timbl 579: request, NULL, input_format, output_format, output_stream));
1.1 timbl 580: }
581:
582:
1.2 timbl 583: /* HTConverter for HTML to C code
584: ** ------------------------------
585: **
1.36 frystyk 586: ** C code is like plain text but all non-preformatted code
1.2 timbl 587: ** is commented out.
588: ** This will convert from HTML to presentation or plain text.
589: */
1.75 frystyk 590: PUBLIC HTStream * HTMLToC (HTRequest * request,
591: void * param,
592: HTFormat input_format,
593: HTFormat output_format,
594: HTStream * output_stream)
595: {
596: if (output_stream) {
597: HTStructured * html = NULL;
598: (*output_stream->isa->put_string)(output_stream, "/* "); /* Before title */
599: html = HTML_new(request, NULL, input_format, output_format, output_stream);
600: html->comment_start = "\n/* ";
601: html->dtd = HTML_dtd();
602: html->comment_end = " */\n"; /* Must start in col 1 for cpp */
603: return SGML_new(HTML_dtd(), html);
604: } else
605: return HTErrorStream();
1.1 timbl 606: }
607:
608:
1.2 timbl 609: /* Presenter for HTML
610: ** ------------------
611: **
612: ** This will convert from HTML to presentation or plain text.
613: **
614: ** Override this if you have a windows version
1.1 timbl 615: */
1.75 frystyk 616: PUBLIC HTStream * HTMLPresent (HTRequest * request,
617: void * param,
618: HTFormat input_format,
619: HTFormat output_format,
620: HTStream * output_stream)
1.1 timbl 621: {
1.75 frystyk 622: return SGML_new(HTML_dtd(), HTML_new(
1.16 timbl 623: request, NULL, input_format, output_format, output_stream));
1.1 timbl 624: }
1.29 frystyk 625:
Webmaster