Annotation of libwww/Library/src/HTML.c, revision 1.81
1.39 frystyk 1: /* HTML.c
1.75 frystyk 2: ** SIMPLE HTML PARSER WITHOUT ANY PRESENTATION CODE
1.39 frystyk 3: **
1.43 frystyk 4: ** (c) COPYRIGHT MIT 1995.
1.39 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.81 ! kahan 6: ** @(#) $Id: HTML.c,v 1.80 1999/06/25 19:09:20 raff Exp $
1.1 timbl 7: **
1.2 timbl 8: ** This generates of a hypertext object. It converts from the
1.75 frystyk 9: ** structured stream interface foo HTML events into the style-
10: ** oriented interface of the HText interface.
1.1 timbl 11: **
1.35 duns 12: ** HISTORY:
13: ** 8 Jul 94 FM Insulate free() from _free structure element.
1.1 timbl 14: */
1.16 timbl 15:
1.41 frystyk 16: /* Library include files */
1.72 frystyk 17: #include "wwwsys.h"
1.63 frystyk 18: #include "WWWUtil.h"
19: #include "WWWCore.h"
20: #include "WWWHTML.h"
1.73 frystyk 21: #include "HTML.h"
1.75 frystyk 22: #include "HTextImp.h"
1.73 frystyk 23:
1.75 frystyk 24: #define PUTC(t,c) (*(t)->target->isa->put_character)((t)->target, (c))
25: #define PUTS(t,s) (*(t)->target->isa->put_string)((t)->target, (s))
26: #define PUTB(s,b,l) (*(t)->target->isa->put_block)((t)->target, (b), (l))
27: #define FLUSH_TARGET(t) (*(t)->target->isa->flush)((t)->target)
28: #define FREE_TARGET(t) (*(t)->target->isa->_free)((t)->target)
29: #define ABORT_TARGET(t) (*(t)->target->isa->abort)((t)->target, e)
1.1 timbl 30:
1.75 frystyk 31: #define MAX_NESTING 40
1.1 timbl 32:
1.75 frystyk 33: struct _HTStream {
34: const HTStreamClass * isa;
35: /* .... */
36: };
1.2 timbl 37:
38: struct _HTStructured {
1.60 frystyk 39: const HTStructuredClass * isa;
1.54 frystyk 40: HTRequest * request;
1.2 timbl 41: HTParentAnchor * node_anchor;
1.75 frystyk 42: HTextImp * text;
43: HTStream * target;
44: HTChunk * title;
45: BOOL in_word;
46: SGML_dtd * dtd;
1.2 timbl 47: char * comment_start; /* for literate programming */
48: char * comment_end;
1.75 frystyk 49: BOOL started;
1.1 timbl 50:
1.75 frystyk 51: int overflow;
52: int * sp;
53: int stack[MAX_NESTING];
1.2 timbl 54: };
1.1 timbl 55:
1.75 frystyk 56: /*
57: ** Entity values -- for ISO Latin 1 local representation
1.2 timbl 58: ** This MUST match exactly the table referred to in the DTD!
59: */
1.80 raff 60: static char * ISO_Latin1[HTML_ENTITIES] = {
61: /* 00 */
1.2 timbl 62: "\306", /* capital AE diphthong (ligature) */
63: "\301", /* capital A, acute accent */
64: "\302", /* capital A, circumflex accent */
65: "\300", /* capital A, grave accent */
66: "\305", /* capital A, ring */
67: "\303", /* capital A, tilde */
68: "\304", /* capital A, dieresis or umlaut mark */
69: "\307", /* capital C, cedilla */
70: "\320", /* capital Eth, Icelandic */
71: "\311", /* capital E, acute accent */
1.80 raff 72: /* 10 */
1.2 timbl 73: "\312", /* capital E, circumflex accent */
74: "\310", /* capital E, grave accent */
75: "\313", /* capital E, dieresis or umlaut mark */
76: "\315", /* capital I, acute accent */
77: "\316", /* capital I, circumflex accent */
78: "\314", /* capital I, grave accent */
79: "\317", /* capital I, dieresis or umlaut mark */
80: "\321", /* capital N, tilde */
81: "\323", /* capital O, acute accent */
82: "\324", /* capital O, circumflex accent */
1.80 raff 83: /* 20 */
1.2 timbl 84: "\322", /* capital O, grave accent */
85: "\330", /* capital O, slash */
86: "\325", /* capital O, tilde */
87: "\326", /* capital O, dieresis or umlaut mark */
88: "\336", /* capital THORN, Icelandic */
89: "\332", /* capital U, acute accent */
90: "\333", /* capital U, circumflex accent */
91: "\331", /* capital U, grave accent */
92: "\334", /* capital U, dieresis or umlaut mark */
93: "\335", /* capital Y, acute accent */
1.80 raff 94: /* 30 */
1.2 timbl 95: "\341", /* small a, acute accent */
96: "\342", /* small a, circumflex accent */
1.80 raff 97: "\264", /* acute accent */
1.2 timbl 98: "\346", /* small ae diphthong (ligature) */
99: "\340", /* small a, grave accent */
100: "\046", /* ampersand */
101: "\345", /* small a, ring */
102: "\343", /* small a, tilde */
103: "\344", /* small a, dieresis or umlaut mark */
1.80 raff 104: "\246", /* broken vertical bar */
105: /* 40 */
1.2 timbl 106: "\347", /* small c, cedilla */
1.80 raff 107: "\270", /* cedilla */
108: "\242", /* cent sign */
109: "\251", /* copyright */
110: "\244", /* general currency sign */
111: "\260", /* degree sign */
112: "\367", /* division sign */
1.2 timbl 113: "\351", /* small e, acute accent */
114: "\352", /* small e, circumflex accent */
115: "\350", /* small e, grave accent */
1.80 raff 116: /* 50 */
1.2 timbl 117: "\360", /* small eth, Icelandic */
118: "\353", /* small e, dieresis or umlaut mark */
1.80 raff 119: "\275", /* fraction one-half */
120: "\274", /* fraction one-fourth */
121: "\276", /* fraction three-fourth */
1.2 timbl 122: "\076", /* greater than */
123: "\355", /* small i, acute accent */
124: "\356", /* small i, circumflex accent */
1.80 raff 125: "\241", /* inverted exclamation */
1.2 timbl 126: "\354", /* small i, grave accent */
1.80 raff 127: /* 60 */
128: "\277", /* inverted question mark */
1.2 timbl 129: "\357", /* small i, dieresis or umlaut mark */
1.80 raff 130: "\253", /* left angle quote */
1.2 timbl 131: "\074", /* less than */
1.80 raff 132: "\257", /* macron accent */
133: "\265", /* micro sign (greek mu) */
134: "\267", /* middle dot */
1.62 frystyk 135: "\040", /* non-breaking space */
1.80 raff 136: "\254", /* not sign */
1.2 timbl 137: "\361", /* small n, tilde */
1.80 raff 138: /* 70 */
1.2 timbl 139: "\363", /* small o, acute accent */
140: "\364", /* small o, circumflex accent */
141: "\362", /* small o, grave accent */
1.80 raff 142: "\252", /* feminine ordinal */
143: "\272", /* masculine ordinal */
1.2 timbl 144: "\370", /* small o, slash */
145: "\365", /* small o, tilde */
146: "\366", /* small o, dieresis or umlaut mark */
1.80 raff 147: "\266", /* paragraph sign */
148: "\261", /* plus or minus */
149: /* 80 */
150: "\243", /* pound sign */
1.36 frystyk 151: "\042", /* double quote sign - June 94 */
1.80 raff 152: "\273", /* right angle quote */
153: "\256", /* registered trademark */
154: "\247", /* section sign */
155: "\255", /* soft hyphen */
156: "\271", /* superscript 1 */
157: "\262", /* superscript 2 */
158: "\263", /* superscript 3 */
1.2 timbl 159: "\337", /* small sharp s, German (sz ligature) */
1.80 raff 160: /* 90 */
1.2 timbl 161: "\376", /* small thorn, Icelandic */
1.80 raff 162: "\327", /* multiply sign */
1.2 timbl 163: "\372", /* small u, acute accent */
164: "\373", /* small u, circumflex accent */
165: "\371", /* small u, grave accent */
1.80 raff 166: "\250", /* dieresis or umlaut mark */
1.2 timbl 167: "\374", /* small u, dieresis or umlaut mark */
168: "\375", /* small y, acute accent */
1.80 raff 169: "\245", /* yen sign */
170: "\377" /* small y, dieresis or umlaut mark */
171: /* 100 */
1.1 timbl 172: };
173:
1.75 frystyk 174: PRIVATE char ** CurrentEntityValues = ISO_Latin1;
1.2 timbl 175:
1.75 frystyk 176: PUBLIC BOOL HTMLUseCharacterSet (HTMLCharacterSet i)
1.1 timbl 177: {
1.75 frystyk 178: if (i == HTML_ISO_LATIN1) {
179: CurrentEntityValues = ISO_Latin1;
180: return YES;
1.1 timbl 181: } else {
1.78 frystyk 182: HTTRACE(SGML_TRACE, "HTML Parser. Doesn't support this character set\n");
1.75 frystyk 183: return NO;
1.1 timbl 184: }
185: }
186:
1.75 frystyk 187: PRIVATE int HTML_write (HTStructured * me, const char * b, int l)
1.1 timbl 188: {
1.75 frystyk 189: if (!me->started) {
190: HTextImp_build(me->text, HTEXT_BEGIN);
191: me->started = YES;
1.1 timbl 192: }
193:
1.75 frystyk 194: /* Look at what we got */
195: switch (me->sp[0]) {
1.2 timbl 196:
1.75 frystyk 197: case HTML_TITLE:
198: HTChunk_putb(me->title, b, l);
199: /* Fall through */
1.71 frystyk 200:
1.75 frystyk 201: default:
202: HTextImp_addText(me->text, b, l);
1.71 frystyk 203: }
1.42 frystyk 204: return HT_OK;
1.1 timbl 205: }
206:
1.71 frystyk 207: PRIVATE int HTML_put_character (HTStructured * me, char c)
1.1 timbl 208: {
1.71 frystyk 209: return HTML_write(me, &c, sizeof(char));
1.1 timbl 210: }
211:
1.64 frystyk 212: PRIVATE int HTML_put_string (HTStructured * me, const char* s)
1.1 timbl 213: {
1.71 frystyk 214: return HTML_write(me, s, (int) strlen(s));
1.1 timbl 215: }
1.2 timbl 216:
1.75 frystyk 217: PRIVATE void HTML_start_element (HTStructured * me,
218: int element_number,
219: const BOOL * present,
220: const char ** value)
221: {
222: HTChildAnchor * address = NULL;
223: if (!me->started) {
224: HTextImp_build(me->text, HTEXT_BEGIN);
225: me->started = YES;
1.73 frystyk 226: }
227:
1.75 frystyk 228: /* Look at what element was started */
229: switch (element_number) {
1.2 timbl 230: case HTML_A:
1.75 frystyk 231: if (present[HTML_A_HREF] && value[HTML_A_HREF]) {
232: address = HTAnchor_findChildAndLink(
233: me->node_anchor, /* parent */
234: present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL, /* Tag */
235: value[HTML_A_HREF], /* Addresss */
236: present[HTML_A_REL] && value[HTML_A_REL] ?
237: (HTLinkType) HTAtom_caseFor(value[HTML_A_REL]) : NULL);
1.2 timbl 238:
1.75 frystyk 239: if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
240: HTLink * link = HTAnchor_mainLink((HTAnchor *) address);
241: HTParentAnchor * dest = HTAnchor_parent(HTLink_destination(link));
242: if (!HTAnchor_title(dest)) HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
243: }
244: HTextImp_foundLink(me->text, element_number, HTML_A_HREF,
245: address, present, value);
1.78 frystyk 246: HTTRACE(SGML_TRACE, "HTML Parser. Anchor `%s\'\n" _ value[HTML_A_HREF]);
1.75 frystyk 247: }
248: break;
249:
250: case HTML_AREA:
251: if (present[HTML_AREA_HREF] && value[HTML_AREA_HREF]) {
252: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
253: value[HTML_AREA_HREF], NULL);
254: HTextImp_foundLink(me->text, element_number, HTML_AREA_HREF,
255: address, present, value);
1.78 frystyk 256: HTTRACE(SGML_TRACE, "HTML Parser. Image map area `%s\'\n" _ value[HTML_AREA_HREF]);
1.2 timbl 257: }
1.75 frystyk 258: break;
259:
260: case HTML_BASE:
261: if (present[HTML_BASE_HREF] && value[HTML_BASE_HREF]) {
262: HTAnchor_setBase(me->node_anchor, (char *) value[HTML_BASE_HREF]);
1.78 frystyk 263: HTTRACE(SGML_TRACE, "HTML Parser. New base `%s\'\n" _ value[HTML_BASE_HREF]);
1.75 frystyk 264: }
265: break;
266:
267: case HTML_BODY:
268: if (present[HTML_BODY_BACKGROUND] && value[HTML_BODY_BACKGROUND]) {
269: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
270: value[HTML_BODY_BACKGROUND], NULL);
271: HTextImp_foundLink(me->text, element_number, HTML_BODY_BACKGROUND,
272: address, present, value);
1.78 frystyk 273: HTTRACE(SGML_TRACE, "HTML Parser. Background `%s\'\n" _ value[HTML_BODY_BACKGROUND]);
1.75 frystyk 274: }
275: break;
276:
1.77 frystyk 277: case HTML_FORM:
278: if (present[HTML_FORM_ACTION] && value[HTML_FORM_ACTION]) {
279: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
280: value[HTML_FORM_ACTION], NULL);
281: HTextImp_foundLink(me->text, element_number, HTML_FORM_ACTION,
282: address, present, value);
283: }
284: break;
285:
1.75 frystyk 286: case HTML_FRAME:
287: if (present[HTML_FRAME_SRC] && value[HTML_FRAME_SRC]) {
288: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
289: value[HTML_FRAME_SRC], NULL);
290: HTextImp_foundLink(me->text, element_number, HTML_FRAME_SRC,
291: address, present, value);
1.78 frystyk 292: HTTRACE(SGML_TRACE, "HTML Parser. Frame `%s\'\n" _ value[HTML_FRAME_SRC]);
1.75 frystyk 293: }
294: break;
295:
1.77 frystyk 296: case HTML_INPUT:
297: if (present[HTML_INPUT_SRC] && value[HTML_INPUT_SRC]) {
298: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
299: value[HTML_INPUT_SRC], NULL);
300: HTextImp_foundLink(me->text, element_number, HTML_INPUT_SRC,
301: address, present, value);
302: }
303: break;
304:
1.75 frystyk 305: case HTML_IMG:
306: if (present[HTML_IMG_SRC] && value[HTML_IMG_SRC]) {
307: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
308: value[HTML_IMG_SRC], NULL);
309: HTextImp_foundLink(me->text, element_number, HTML_IMG_SRC,
310: address, present, value);
311: }
312: break;
313:
314: case HTML_ISINDEX:
315: HTAnchor_setIndex(me->node_anchor);
316: break;
1.2 timbl 317:
1.63 frystyk 318: case HTML_LINK:
1.69 frystyk 319: if (present[HTML_LINK_HREF] && value[HTML_LINK_HREF]) {
1.75 frystyk 320: HTParentAnchor * dest = NULL;
321: address = HTAnchor_findChildAndLink(
1.73 frystyk 322: me->node_anchor, /* parent */
323: present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL, /* Tag */
324: present[HTML_A_HREF] ? value[HTML_A_HREF] : NULL, /* Addresss */
325: NULL); /* Rels */
1.75 frystyk 326: dest = HTAnchor_parent(HTAnchor_followMainLink((HTAnchor *) address));
1.69 frystyk 327:
328: /* If forward reference */
329: if ((present[HTML_LINK_REL] && value[HTML_LINK_REL])) {
330: char * strval = NULL;
331: char * ptr = NULL;
332: char * relation = NULL;
333: StrAllocCopy(strval, value[HTML_LINK_REL]);
334: ptr = strval;
335: while ((relation = HTNextLWSToken(&ptr)) != NULL) {
336: HTLink_add((HTAnchor *) me->node_anchor, (HTAnchor *) dest,
337: (HTLinkType) HTAtom_caseFor(relation),
338: METHOD_INVALID);
339: }
340: HT_FREE(strval);
341: }
342:
343: /* If reverse reference */
344: if ((present[HTML_LINK_REV] && value[HTML_LINK_REV])) {
345: char * strval = NULL;
346: char * ptr = NULL;
347: char * relation = NULL;
348: StrAllocCopy(strval, value[HTML_LINK_REV]);
349: ptr = strval;
350: while ((relation = HTNextLWSToken(&ptr)) != NULL) {
351: HTLink_add((HTAnchor *) dest, (HTAnchor *) me->node_anchor,
352: (HTLinkType) HTAtom_caseFor(relation),
353: METHOD_INVALID);
354: }
355: HT_FREE(strval);
356: }
1.63 frystyk 357:
1.69 frystyk 358: /* If we got any type information as well */
359: if (present[HTML_LINK_TYPE] && value[HTML_LINK_TYPE]) {
360: if (HTAnchor_format(dest) == WWW_UNKNOWN)
361: HTAnchor_setFormat(dest,
362: (HTFormat) HTAtom_caseFor(value[HTML_LINK_TYPE]));
363: }
1.63 frystyk 364:
1.75 frystyk 365: /* Call out to the layout engine */
366: HTextImp_foundLink(me->text, element_number, HTML_LINK_HREF,
367: address, present, value);
1.70 frystyk 368: }
1.75 frystyk 369: break;
1.70 frystyk 370:
371: case HTML_META:
372: if (present[HTML_META_NAME] && value[HTML_META_NAME]) {
373: HTAnchor_addMeta (me->node_anchor,
374: value[HTML_META_NAME],
375: (present[HTML_META_CONTENT] && value[HTML_META_CONTENT]) ?
376: value[HTML_META_CONTENT] : "");
1.69 frystyk 377: }
1.75 frystyk 378: break;
379:
380: case HTML_OBJECT:
381: if (present[HTML_OBJECT_CLASSID] && value[HTML_OBJECT_CLASSID]) {
382: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
383: value[HTML_OBJECT_CLASSID], NULL);
384: HTextImp_foundLink(me->text, element_number, HTML_OBJECT_CLASSID,
385: address, present, value);
386: }
387:
388: if (present[HTML_OBJECT_CODEBASE] && value[HTML_OBJECT_CODEBASE]) {
389: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
390: value[HTML_OBJECT_CODEBASE], NULL);
391: HTextImp_foundLink(me->text, element_number, HTML_OBJECT_CODEBASE,
392: address, present, value);
393: }
394:
395: if (present[HTML_OBJECT_DATA] && value[HTML_OBJECT_DATA]) {
396: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
397: value[HTML_OBJECT_DATA], NULL);
398: HTextImp_foundLink(me->text, element_number, HTML_OBJECT_DATA,
399: address, present, value);
400: }
1.63 frystyk 401:
1.75 frystyk 402: if (present[HTML_OBJECT_ARCHIVE] && value[HTML_OBJECT_ARCHIVE]) {
403: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
404: value[HTML_OBJECT_ARCHIVE], NULL);
405: HTextImp_foundLink(me->text, element_number, HTML_OBJECT_ARCHIVE,
406: address, present, value);
407: }
1.2 timbl 408:
1.75 frystyk 409: if (present[HTML_OBJECT_USEMAP] && value[HTML_OBJECT_USEMAP]) {
410: address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
411: value[HTML_OBJECT_USEMAP], NULL);
412: HTextImp_foundLink(me->text, element_number, HTML_OBJECT_USEMAP,
413: address, present, value);
1.2 timbl 414: }
415: break;
416:
417: case HTML_PRE:
1.4 timbl 418: if (me->comment_end)
1.75 frystyk 419: HTextImp_addText(me->text, me->comment_end, strlen(me->comment_end));
1.2 timbl 420: break;
1.11 timbl 421:
1.75 frystyk 422: case HTML_TITLE:
1.81 ! kahan 423: HTChunk_truncate(me->title,0);
1.2 timbl 424: break;
1.75 frystyk 425: }
1.2 timbl 426:
1.75 frystyk 427: /* Update our parse stack */
428: if (SGML_findTagContents(me->dtd, element_number) != SGML_EMPTY) {
1.13 timbl 429: if (me->sp == me->stack) {
1.78 frystyk 430: HTTRACE(SGML_TRACE, "HTML Parser. Maximum nesting of %d exceded!\n" _ MAX_NESTING);
1.44 frystyk 431: me->overflow++;
1.12 timbl 432: return;
433: }
1.4 timbl 434: --(me->sp);
1.75 frystyk 435: me->sp[0] = element_number;
1.10 timbl 436: }
1.75 frystyk 437:
438: /* Call out to the layout engine */
439: HTextImp_beginElement(me->text, element_number, present, value);
1.1 timbl 440: }
1.10 timbl 441:
1.53 frystyk 442: PRIVATE void HTML_end_element (HTStructured * me, int element_number)
1.1 timbl 443: {
1.75 frystyk 444: if (!me->started) {
445: HTextImp_build(me->text, HTEXT_BEGIN);
446: me->started = YES;
1.1 timbl 447: }
1.44 frystyk 448:
1.75 frystyk 449: /* Update our parse stack */
1.44 frystyk 450: if (me->overflow > 0) {
451: me->overflow--;
452: return;
453: }
1.75 frystyk 454: me->sp++;
1.67 frystyk 455: if (me->sp > me->stack + MAX_NESTING - 1) {
1.78 frystyk 456: HTTRACE(SGML_TRACE, "HTML Parser. Bottom of parse stack reached\n");
1.67 frystyk 457: me->sp = me->stack + MAX_NESTING - 1;
458: }
1.44 frystyk 459:
1.75 frystyk 460: /* Look at what element was closed */
1.2 timbl 461: switch(element_number) {
462: case HTML_TITLE:
1.56 frystyk 463: HTAnchor_setTitle(me->node_anchor, HTChunk_data(me->title));
1.2 timbl 464: break;
465:
466: case HTML_PRE:
1.4 timbl 467: if (me->comment_start)
1.75 frystyk 468: HTextImp_addText(me->text, me->comment_start, strlen(me->comment_start));
469: break;
470: }
1.44 frystyk 471:
1.75 frystyk 472: /* Call out to the layout engine */
473: HTextImp_endElement(me->text, element_number);
1.1 timbl 474: }
475:
1.53 frystyk 476: PRIVATE void HTML_put_entity (HTStructured * me, int entity_number)
1.1 timbl 477: {
1.75 frystyk 478: if (!me->started) {
479: HTextImp_build(me->text, HTEXT_BEGIN);
480: me->started = YES;
481: }
1.80 raff 482: if (entity_number>=0 && entity_number<HTML_ENTITIES)
1.75 frystyk 483: HTML_put_string(me, *(CurrentEntityValues+entity_number));
1.1 timbl 484: }
1.2 timbl 485:
1.53 frystyk 486: PUBLIC int HTML_flush (HTStructured * me)
1.42 frystyk 487: {
1.75 frystyk 488: if (!me->started) {
489: HTextImp_build(me->text, HTEXT_BEGIN);
490: me->started = YES;
491: }
492: if (me->comment_end) HTML_put_string(me, me->comment_end);
493: return me->target ? FLUSH_TARGET(me) : HT_OK;
494: }
495:
496: PRIVATE int HTML_unparsedBeginElement (HTStructured * me, const char * b, int l)
497: {
498: if (!me->started) {
499: HTextImp_build(me->text, HTEXT_BEGIN);
500: me->started = YES;
501: }
502: HTextImp_unparsedBeginElement(me->text, b, l);
503: return HT_OK;
1.42 frystyk 504: }
1.2 timbl 505:
1.75 frystyk 506: PRIVATE int HTML_unparsedEndElement (HTStructured * me, const char * b, int l)
1.1 timbl 507: {
1.75 frystyk 508: if (!me->started) {
509: HTextImp_build(me->text, HTEXT_BEGIN);
510: me->started = YES;
511: }
512: HTextImp_unparsedEndElement(me->text, b, l);
513: return HT_OK;
514: }
1.4 timbl 515:
1.75 frystyk 516: PRIVATE int HTML_unparsedEntity (HTStructured * me, const char * b, int l)
517: {
518: if (!me->started) {
519: HTextImp_build(me->text, HTEXT_BEGIN);
520: me->started = YES;
1.2 timbl 521: }
1.75 frystyk 522: HTextImp_unparsedEntity(me->text, b, l);
523: return HT_OK;
524: }
525:
526: PUBLIC int HTML_free (HTStructured * me)
527: {
528: if (!me->started) HTextImp_build(me->text, HTEXT_BEGIN);
529: if (me->comment_end) HTML_put_string(me, me->comment_end);
530: HTextImp_build(me->text, HTEXT_END);
1.76 frystyk 531: HTextImp_delete(me->text);
1.56 frystyk 532: HTChunk_delete(me->title);
1.75 frystyk 533: if (me->target) FREE_TARGET(me);
1.58 frystyk 534: HT_FREE(me);
1.42 frystyk 535: return HT_OK;
1.1 timbl 536: }
537:
1.53 frystyk 538: PRIVATE int HTML_abort (HTStructured * me, HTList * e)
1.14 timbl 539: {
1.75 frystyk 540: if (!me->started) HTextImp_build(me->text, HTEXT_BEGIN);
541: HTextImp_build(me->text, HTEXT_ABORT);
1.76 frystyk 542: HTextImp_delete(me->text);
1.56 frystyk 543: HTChunk_delete(me->title);
1.75 frystyk 544: if (me->target) ABORT_TARGET(me);
1.58 frystyk 545: HT_FREE(me);
1.42 frystyk 546: return HT_ERROR;
1.1 timbl 547: }
548:
1.2 timbl 549: /* Structured Object Class
550: ** -----------------------
551: */
1.60 frystyk 552: PRIVATE const HTStructuredClass HTMLPresentation = /* As opposed to print etc */
1.2 timbl 553: {
1.75 frystyk 554: "text/html",
555: HTML_flush,
556: HTML_free,
557: HTML_abort,
558: HTML_put_character,
559: HTML_put_string,
560: HTML_write,
561: HTML_start_element,
562: HTML_end_element,
563: HTML_put_entity,
564: HTML_unparsedBeginElement,
565: HTML_unparsedEndElement,
566: HTML_unparsedEntity
567: };
1.4 timbl 568:
1.75 frystyk 569: /* Structured Text object
570: ** ----------------------
1.2 timbl 571: **
1.16 timbl 572: ** The structured stream can generate either presentation,
1.4 timbl 573: ** or plain text, or HTML.
1.1 timbl 574: */
1.75 frystyk 575: PRIVATE HTStructured * HTML_new (HTRequest * request,
576: void * param,
577: HTFormat input_format,
578: HTFormat output_format,
579: HTStream * output_stream)
580: {
581: HTStructured * me = NULL;
582: if (request) {
583: if ((me = (HTStructured *) HT_CALLOC(1, sizeof(HTStructured))) == NULL)
584: HT_OUTOFMEM("HTML_new");
585: me->isa = &HTMLPresentation;
586: me->dtd = HTML_dtd();
587: me->request = request;
588: me->node_anchor = HTRequest_anchor(request);
589: me->title = HTChunk_new(128);
590: me->comment_start = NULL;
591: me->comment_end = NULL;
592: me->target = output_stream;
593: me->sp = me->stack + MAX_NESTING - 1;
594:
595: /* Create the text object */
596: me->text = HTextImp_new(me->request, me->node_anchor, me->target);
597: }
598: return me;
1.1 timbl 599: }
600:
1.2 timbl 601: /* HTConverter for HTML to plain text
602: ** ----------------------------------
1.1 timbl 603: **
1.2 timbl 604: ** This will convert from HTML to presentation or plain text.
1.1 timbl 605: */
1.75 frystyk 606: PUBLIC HTStream * HTMLToPlain (HTRequest * request,
607: void * param,
608: HTFormat input_format,
609: HTFormat output_format,
610: HTStream * output_stream)
1.1 timbl 611: {
1.75 frystyk 612: return SGML_new(HTML_dtd(), HTML_new(
1.16 timbl 613: request, NULL, input_format, output_format, output_stream));
1.1 timbl 614: }
615:
616:
1.2 timbl 617: /* HTConverter for HTML to C code
618: ** ------------------------------
619: **
1.36 frystyk 620: ** C code is like plain text but all non-preformatted code
1.2 timbl 621: ** is commented out.
622: ** This will convert from HTML to presentation or plain text.
623: */
1.75 frystyk 624: PUBLIC HTStream * HTMLToC (HTRequest * request,
625: void * param,
626: HTFormat input_format,
627: HTFormat output_format,
628: HTStream * output_stream)
629: {
630: if (output_stream) {
631: HTStructured * html = NULL;
632: (*output_stream->isa->put_string)(output_stream, "/* "); /* Before title */
633: html = HTML_new(request, NULL, input_format, output_format, output_stream);
634: html->comment_start = "\n/* ";
635: html->dtd = HTML_dtd();
636: html->comment_end = " */\n"; /* Must start in col 1 for cpp */
637: return SGML_new(HTML_dtd(), html);
638: } else
639: return HTErrorStream();
1.1 timbl 640: }
641:
642:
1.2 timbl 643: /* Presenter for HTML
644: ** ------------------
645: **
646: ** This will convert from HTML to presentation or plain text.
647: **
648: ** Override this if you have a windows version
1.1 timbl 649: */
1.75 frystyk 650: PUBLIC HTStream * HTMLPresent (HTRequest * request,
651: void * param,
652: HTFormat input_format,
653: HTFormat output_format,
654: HTStream * output_stream)
1.1 timbl 655: {
1.75 frystyk 656: return SGML_new(HTML_dtd(), HTML_new(
1.16 timbl 657: request, NULL, input_format, output_format, output_stream));
1.1 timbl 658: }
1.29 frystyk 659:
Webmaster