Annotation of libwww/Library/src/HTML.c, revision 1.13
1.2 timbl 1: /* Structured stream to Rich hypertext converter
2: ** ============================================
1.1 timbl 3: **
1.2 timbl 4: ** This generates of a hypertext object. It converts from the
5: ** structured stream interface fro HTMl events into the style-
6: ** oriented iunterface of the HText.h interface. This module is
7: ** only used in clients and shouldnot be linked into servers.
1.1 timbl 8: **
1.6 timbl 9: ** Override this module if making a new GUI browser.
1.1 timbl 10: **
11: */
12: #include "HTML.h"
13:
1.6 timbl 14: /* #define CAREFUL Check nesting here notreally necessary */
1.2 timbl 15:
1.1 timbl 16: #include <ctype.h>
17: #include <stdio.h>
18:
19: #include "HTAtom.h"
20: #include "HTChunk.h"
21: #include "HText.h"
22: #include "HTStyle.h"
23:
1.3 timbl 24: #include "HTAlert.h"
1.4 timbl 25: #include "HTMLGen.h"
1.8 timbl 26: #include "HTParse.h"
1.1 timbl 27:
28: extern HTStyleSheet * styleSheet; /* Application-wide */
29:
30: /* Module-wide style cache
31: */
32: PRIVATE int got_styles = 0;
1.2 timbl 33: PRIVATE HTStyle *styles[HTML_ELEMENTS];
34: PRIVATE HTStyle *default_style;
1.1 timbl 35:
36:
37: /* HTML Object
38: ** -----------
39: */
1.2 timbl 40: #define MAX_NESTING 20 /* Should be checked by parser */
41:
42: typedef struct _stack_element {
43: HTStyle * style;
44: int tag_number;
45: } stack_element;
46:
47: struct _HTStructured {
48: CONST HTStructuredClass * isa;
49: HTParentAnchor * node_anchor;
50: HText * text;
51:
52: HTStream* target; /* Output stream */
53: HTStreamClass targetClass; /* Output routines */
54:
55: HTChunk title; /* Grow by 128 */
56:
57: char * comment_start; /* for literate programming */
58: char * comment_end;
59:
60: HTTag * current_tag;
61: BOOL style_change;
62: HTStyle * new_style;
63: HTStyle * old_style;
64: BOOL in_word; /* Have just had a non-white char */
65: stack_element stack[MAX_NESTING];
66: stack_element *sp; /* Style stack pointer */
1.1 timbl 67: };
68:
1.2 timbl 69: struct _HTStream {
70: CONST HTStreamClass * isa;
71: /* .... */
72: };
1.1 timbl 73:
74: /* Forward declarations of routines
75: */
76: PRIVATE void get_styles NOPARAMS;
77:
78:
1.4 timbl 79: PRIVATE void actually_set_style PARAMS((HTStructured * me));
1.11 timbl 80: PRIVATE void change_paragraph_style PARAMS((HTStructured * me, HTStyle * style));
1.1 timbl 81:
82: /* Style buffering avoids dummy paragraph begin/ends.
83: */
1.4 timbl 84: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1 timbl 85:
86:
1.2 timbl 87: #ifdef OLD_CODE
1.1 timbl 88: /* The following accented characters are from peter Flynn, curia project */
89:
90: /* these ifdefs don't solve the problem of a simple terminal emulator
91: ** with a different character set to the client machine. But nothing does,
92: ** except looking at the TERM setting */
93:
1.2 timbl 94:
1.1 timbl 95: { "ocus" , "&" }, /* for CURIA */
96: #ifdef IBMPC
97: { "aacute" , "\240" }, /* For PC display */
98: { "eacute" , "\202" },
99: { "iacute" , "\241" },
100: { "oacute" , "\242" },
101: { "uacute" , "\243" },
102: { "Aacute" , "\101" },
103: { "Eacute" , "\220" },
104: { "Iacute" , "\111" },
105: { "Oacute" , "\117" },
106: { "Uacute" , "\125" },
107: #else
108: { "aacute" , "\341" }, /* Works for openwindows -- Peter Flynn */
109: { "eacute" , "\351" },
110: { "iacute" , "\355" },
111: { "oacute" , "\363" },
112: { "uacute" , "\372" },
113: { "Aacute" , "\301" },
114: { "Eacute" , "\310" },
115: { "Iacute" , "\315" },
116: { "Oacute" , "\323" },
117: { "Uacute" , "\332" },
118: #endif
119: { 0, 0 } /* Terminate list */
120: };
1.2 timbl 121: #endif
1.1 timbl 122:
123:
1.2 timbl 124: /* Entity values -- for ISO Latin 1 local representation
125: **
126: ** This MUST match exactly the table referred to in the DTD!
127: */
128: static char * ISO_Latin1[] = {
129: "\306", /* capital AE diphthong (ligature) */
130: "\301", /* capital A, acute accent */
131: "\302", /* capital A, circumflex accent */
132: "\300", /* capital A, grave accent */
133: "\305", /* capital A, ring */
134: "\303", /* capital A, tilde */
135: "\304", /* capital A, dieresis or umlaut mark */
136: "\307", /* capital C, cedilla */
137: "\320", /* capital Eth, Icelandic */
138: "\311", /* capital E, acute accent */
139: "\312", /* capital E, circumflex accent */
140: "\310", /* capital E, grave accent */
141: "\313", /* capital E, dieresis or umlaut mark */
142: "\315", /* capital I, acute accent */
143: "\316", /* capital I, circumflex accent */
144: "\314", /* capital I, grave accent */
145: "\317", /* capital I, dieresis or umlaut mark */
146: "\321", /* capital N, tilde */
147: "\323", /* capital O, acute accent */
148: "\324", /* capital O, circumflex accent */
149: "\322", /* capital O, grave accent */
150: "\330", /* capital O, slash */
151: "\325", /* capital O, tilde */
152: "\326", /* capital O, dieresis or umlaut mark */
153: "\336", /* capital THORN, Icelandic */
154: "\332", /* capital U, acute accent */
155: "\333", /* capital U, circumflex accent */
156: "\331", /* capital U, grave accent */
157: "\334", /* capital U, dieresis or umlaut mark */
158: "\335", /* capital Y, acute accent */
159: "\341", /* small a, acute accent */
160: "\342", /* small a, circumflex accent */
161: "\346", /* small ae diphthong (ligature) */
162: "\340", /* small a, grave accent */
163: "\046", /* ampersand */
164: "\345", /* small a, ring */
165: "\343", /* small a, tilde */
166: "\344", /* small a, dieresis or umlaut mark */
167: "\347", /* small c, cedilla */
168: "\351", /* small e, acute accent */
169: "\352", /* small e, circumflex accent */
170: "\350", /* small e, grave accent */
171: "\360", /* small eth, Icelandic */
172: "\353", /* small e, dieresis or umlaut mark */
173: "\076", /* greater than */
174: "\355", /* small i, acute accent */
175: "\356", /* small i, circumflex accent */
176: "\354", /* small i, grave accent */
177: "\357", /* small i, dieresis or umlaut mark */
178: "\074", /* less than */
179: "\361", /* small n, tilde */
180: "\363", /* small o, acute accent */
181: "\364", /* small o, circumflex accent */
182: "\362", /* small o, grave accent */
183: "\370", /* small o, slash */
184: "\365", /* small o, tilde */
185: "\366", /* small o, dieresis or umlaut mark */
186: "\337", /* small sharp s, German (sz ligature) */
187: "\376", /* small thorn, Icelandic */
188: "\372", /* small u, acute accent */
189: "\373", /* small u, circumflex accent */
190: "\371", /* small u, grave accent */
191: "\374", /* small u, dieresis or umlaut mark */
192: "\375", /* small y, acute accent */
193: "\377", /* small y, dieresis or umlaut mark */
1.1 timbl 194: };
195:
1.2 timbl 196:
197: /* Entity values -- for NeXT local representation
198: **
199: ** This MUST match exactly the table referred to in the DTD!
200: **
201: */
202: static char * NeXTCharacters[] = {
203: "\341", /* capital AE diphthong (ligature) */
204: "\202", /* capital A, acute accent */
205: "\203", /* capital A, circumflex accent */
206: "\201", /* capital A, grave accent */
207: "\206", /* capital A, ring */
208: "\204", /* capital A, tilde */
209: "\205", /* capital A, dieresis or umlaut mark */
210: "\207", /* capital C, cedilla */
211: "\220", /* capital Eth, Icelandic */
212: "\211", /* capital E, acute accent */
213: "\212", /* capital E, circumflex accent */
214: "\210", /* capital E, grave accent */
215: "\213", /* capital E, dieresis or umlaut mark */
216: "\215", /* capital I, acute accent */
217: "\216", /* capital I, circumflex accent these are */
218: "\214", /* capital I, grave accent ISO -100 hex */
219: "\217", /* capital I, dieresis or umlaut mark */
220: "\221", /* capital N, tilde */
221: "\223", /* capital O, acute accent */
222: "\224", /* capital O, circumflex accent */
223: "\222", /* capital O, grave accent */
224: "\351", /* capital O, slash 'cept this */
225: "\225", /* capital O, tilde */
226: "\226", /* capital O, dieresis or umlaut mark */
227: "\234", /* capital THORN, Icelandic */
228: "\230", /* capital U, acute accent */
229: "\231", /* capital U, circumflex accent */
230: "\227", /* capital U, grave accent */
231: "\232", /* capital U, dieresis or umlaut mark */
232: "\233", /* capital Y, acute accent */
233: "\326", /* small a, acute accent */
234: "\327", /* small a, circumflex accent */
235: "\361", /* small ae diphthong (ligature) */
236: "\325", /* small a, grave accent */
237: "\046", /* ampersand */
238: "\332", /* small a, ring */
239: "\330", /* small a, tilde */
240: "\331", /* small a, dieresis or umlaut mark */
241: "\333", /* small c, cedilla */
242: "\335", /* small e, acute accent */
243: "\336", /* small e, circumflex accent */
244: "\334", /* small e, grave accent */
245: "\346", /* small eth, Icelandic */
246: "\337", /* small e, dieresis or umlaut mark */
247: "\076", /* greater than */
248: "\342", /* small i, acute accent */
249: "\344", /* small i, circumflex accent */
250: "\340", /* small i, grave accent */
251: "\345", /* small i, dieresis or umlaut mark */
252: "\074", /* less than */
253: "\347", /* small n, tilde */
254: "\355", /* small o, acute accent */
255: "\356", /* small o, circumflex accent */
256: "\354", /* small o, grave accent */
257: "\371", /* small o, slash */
258: "\357", /* small o, tilde */
259: "\360", /* small o, dieresis or umlaut mark */
260: "\373", /* small sharp s, German (sz ligature) */
261: "\374", /* small thorn, Icelandic */
262: "\363", /* small u, acute accent */
263: "\364", /* small u, circumflex accent */
264: "\362", /* small u, grave accent */
265: "\366", /* small u, dieresis or umlaut mark */
266: "\367", /* small y, acute accent */
267: "\375", /* small y, dieresis or umlaut mark */
1.1 timbl 268: };
269:
1.2 timbl 270: /* Entity values -- for IBM/PC Code Page 850 (International)
271: **
272: ** This MUST match exactly the table referred to in the DTD!
273: **
274: */
275: /* @@@@@@@@@@@@@@@@@ TBD */
276:
277:
278:
279: /* Set character set
280: ** ----------------
281: */
282:
283: PRIVATE char** p_entity_values = ISO_Latin1; /* Pointer to translation */
1.1 timbl 284:
1.2 timbl 285: PUBLIC void HTMLUseCharacterSet ARGS1(HTMLCharacterSet, i)
286: {
287: p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
288: : ISO_Latin1;
289: }
1.1 timbl 290:
291:
292: /* Flattening the style structure
293: ** ------------------------------
294: **
295: On the NeXT, and on any read-only browser, it is simpler for the text to have
296: a sequence of styles, rather than a nested tree of styles. In this
297: case we have to flatten the structure as it arrives from SGML tags into
298: a sequence of styles.
299: */
300:
301: /* If style really needs to be set, call this
302: */
1.4 timbl 303: PRIVATE void actually_set_style ARGS1(HTStructured *, me)
1.1 timbl 304: {
1.4 timbl 305: if (!me->text) { /* First time through */
306: me->text = HText_new2(me->node_anchor, me->target);
307: HText_beginAppend(me->text);
308: HText_setStyle(me->text, me->new_style);
309: me->in_word = NO;
1.1 timbl 310: } else {
1.4 timbl 311: HText_setStyle(me->text, me->new_style);
1.1 timbl 312: }
1.4 timbl 313: me->old_style = me->new_style;
314: me->style_change = NO;
1.1 timbl 315: }
316:
317: /* If you THINK you need to change style, call this
318: */
319:
1.11 timbl 320: PRIVATE void change_paragraph_style ARGS2(HTStructured *, me, HTStyle *,style)
1.1 timbl 321: {
1.4 timbl 322: if (me->new_style!=style) {
323: me->style_change = YES;
324: me->new_style = style;
1.1 timbl 325: }
1.11 timbl 326: me->in_word = NO;
1.1 timbl 327: }
328:
1.2 timbl 329: /*_________________________________________________________________________
330: **
331: ** A C T I O N R O U T I N E S
332: */
333:
334: /* Character handling
335: ** ------------------
1.1 timbl 336: */
1.4 timbl 337: PRIVATE void HTML_put_character ARGS2(HTStructured *, me, char, c)
1.1 timbl 338: {
1.2 timbl 339:
1.4 timbl 340: switch (me->sp[0].tag_number) {
1.2 timbl 341: case HTML_COMMENT:
342: break; /* Do Nothing */
343:
344: case HTML_TITLE:
1.4 timbl 345: HTChunkPutc(&me->title, c);
1.2 timbl 346: break;
347:
348:
349: case HTML_LISTING: /* Litteral text */
350: case HTML_XMP:
351: case HTML_PLAINTEXT:
352: case HTML_PRE:
353: /* We guarrantee that the style is up-to-date in begin_litteral
354: */
1.4 timbl 355: HText_appendCharacter(me->text, c);
1.2 timbl 356: break;
357:
358: default: /* Free format text */
1.4 timbl 359: if (me->style_change) {
1.2 timbl 360: if ((c=='\n') || (c==' ')) return; /* Ignore it */
361: UPDATE_STYLE;
362: }
363: if (c=='\n') {
1.4 timbl 364: if (me->in_word) {
365: HText_appendCharacter(me->text, ' ');
366: me->in_word = NO;
1.2 timbl 367: }
368: } else {
1.4 timbl 369: HText_appendCharacter(me->text, c);
370: me->in_word = YES;
1.2 timbl 371: }
372: } /* end switch */
1.1 timbl 373: }
374:
1.2 timbl 375:
376:
377: /* String handling
378: ** ---------------
379: **
380: ** This is written separately from put_character becuase the loop can
1.11 timbl 381: ** in some cases be promoted to a higher function call level for speed.
1.2 timbl 382: */
1.4 timbl 383: PRIVATE void HTML_put_string ARGS2(HTStructured *, me, CONST char*, s)
1.1 timbl 384: {
1.2 timbl 385:
1.4 timbl 386: switch (me->sp[0].tag_number) {
1.2 timbl 387: case HTML_COMMENT:
388: break; /* Do Nothing */
389:
390: case HTML_TITLE:
1.4 timbl 391: HTChunkPuts(&me->title, s);
1.2 timbl 392: break;
393:
394:
395: case HTML_LISTING: /* Litteral text */
396: case HTML_XMP:
397: case HTML_PLAINTEXT:
398: case HTML_PRE:
399:
400: /* We guarrantee that the style is up-to-date in begin_litteral
401: */
1.4 timbl 402: HText_appendText(me->text, s);
1.2 timbl 403: break;
404:
405: default: /* Free format text */
406: {
407: CONST char *p = s;
1.4 timbl 408: if (me->style_change) {
1.2 timbl 409: for (; *p && ((*p=='\n') || (*p==' ')); p++) ; /* Ignore leaders */
410: if (!*p) return;
411: UPDATE_STYLE;
412: }
413: for(; *p; p++) {
1.4 timbl 414: if (me->style_change) {
1.2 timbl 415: if ((*p=='\n') || (*p==' ')) continue; /* Ignore it */
416: UPDATE_STYLE;
417: }
418: if (*p=='\n') {
1.4 timbl 419: if (me->in_word) {
420: HText_appendCharacter(me->text, ' ');
421: me->in_word = NO;
1.2 timbl 422: }
423: } else {
1.4 timbl 424: HText_appendCharacter(me->text, *p);
425: me->in_word = YES;
1.2 timbl 426: }
427: } /* for */
428: }
429: } /* end switch */
1.1 timbl 430: }
431:
432:
1.2 timbl 433: /* Buffer write
1.3 timbl 434: ** ------------
1.1 timbl 435: */
1.4 timbl 436: PRIVATE void HTML_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
1.1 timbl 437: {
1.2 timbl 438: CONST char* p;
439: CONST char* e = s+l;
1.4 timbl 440: for (p=s; s<e; p++) HTML_put_character(me, *p);
1.1 timbl 441: }
1.2 timbl 442:
443:
444: /* Start Element
445: ** -------------
446: */
447: PRIVATE void HTML_start_element ARGS4(
1.4 timbl 448: HTStructured *, me,
1.2 timbl 449: int, element_number,
1.3 timbl 450: CONST BOOL*, present,
451: CONST char **, value)
1.2 timbl 452: {
453: switch (element_number) {
454: case HTML_A:
455: {
1.8 timbl 456: HTChildAnchor * source;
1.9 timbl 457: char * href = NULL;
458: if (present[HTML_A_HREF]) {
459: StrAllocCopy(href, value[HTML_A_HREF]);
460: HTSimplify(href);
461: }
1.8 timbl 462: source = HTAnchor_findChildAndLink(
1.4 timbl 463: me->node_anchor, /* parent */
1.2 timbl 464: present[HTML_A_NAME] ? value[HTML_A_NAME] : 0, /* Tag */
1.9 timbl 465: present[HTML_A_HREF] ? href : 0, /* Addresss */
1.2 timbl 466: present[HTML_A_TYPE] && value[HTML_A_TYPE] ?
467: (HTLinkType*)HTAtom_for(value[HTML_A_TYPE])
468: : 0);
469:
470: if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
471: HTParentAnchor * dest =
472: HTAnchor_parent(
473: HTAnchor_followMainLink((HTAnchor*)source)
474: );
475: if (!HTAnchor_title(dest))
476: HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
477: }
478: UPDATE_STYLE;
1.4 timbl 479: HText_beginAnchor(me->text, source);
1.2 timbl 480: }
481: break;
482:
483: case HTML_TITLE:
1.4 timbl 484: HTChunkClear(&me->title);
1.2 timbl 485: break;
486:
487: case HTML_NEXTID:
488: /* if (present[NEXTID_N] && value[NEXTID_N])
1.4 timbl 489: HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2 timbl 490: break;
491:
492: case HTML_ISINDEX:
1.4 timbl 493: HTAnchor_setIndex(me->node_anchor);
1.2 timbl 494: break;
495:
496: case HTML_P:
497: UPDATE_STYLE;
1.4 timbl 498: HText_appendParagraph(me->text);
499: me->in_word = NO;
1.2 timbl 500: break;
501:
502: case HTML_DL:
1.11 timbl 503: change_paragraph_style(me, present && present[DL_COMPACT]
1.2 timbl 504: ? styles[HTML_DLC]
505: : styles[HTML_DL]);
506: break;
507:
508: case HTML_DT:
1.4 timbl 509: if (!me->style_change) {
510: HText_appendParagraph(me->text);
511: me->in_word = NO;
1.2 timbl 512: }
513: break;
514:
515: case HTML_DD:
516: UPDATE_STYLE;
1.4 timbl 517: HTML_put_character(me, '\t'); /* Just tab out one stop */
518: me->in_word = NO;
519: break;
1.2 timbl 520:
521: case HTML_UL:
522: case HTML_OL:
523: case HTML_MENU:
524: case HTML_DIR:
1.11 timbl 525: change_paragraph_style(me, styles[element_number]);
1.2 timbl 526: break;
527:
528: case HTML_LI:
529: UPDATE_STYLE;
1.7 timbl 530: if (me->sp[0].tag_number != HTML_DIR)
1.4 timbl 531: HText_appendParagraph(me->text);
1.2 timbl 532: else
1.4 timbl 533: HText_appendCharacter(me->text, '\t'); /* Tab @@ nl for UL? */
534: me->in_word = NO;
1.2 timbl 535: break;
536:
537: case HTML_LISTING: /* Litteral text */
538: case HTML_XMP:
539: case HTML_PLAINTEXT:
540: case HTML_PRE:
1.11 timbl 541: change_paragraph_style(me, styles[element_number]);
1.2 timbl 542: UPDATE_STYLE;
1.4 timbl 543: if (me->comment_end)
544: HText_appendText(me->text, me->comment_end);
1.2 timbl 545: break;
1.11 timbl 546:
547: case HTML_HTML: /* Ignore these altogether */
548: case HTML_HEAD:
549: case HTML_BODY:
550:
1.10 timbl 551: case HTML_IMG: /* Images -- ignore */
552:
553: case HTML_TT: /* Physical character highlighting */
554: case HTML_B: /* Currently ignored */
555: case HTML_I:
556: case HTML_U:
557:
558: case HTML_EM: /* Logical character highlighting */
559: case HTML_STRONG: /* Currently ignored */
560: case HTML_CODE:
561: case HTML_SAMP:
562: case HTML_KBD:
563: case HTML_VAR:
564: case HTML_DFN:
565: case HTML_CITE:
566: break;
567:
1.11 timbl 568: case HTML_H1: /* paragraph styles */
569: case HTML_H2:
570: case HTML_H3:
571: case HTML_H4:
572: case HTML_H5:
573: case HTML_H6:
574: case HTML_H7:
575: case HTML_ADDRESS:
576: case HTML_BLOCKQUOTE:
577: change_paragraph_style(me, styles[element_number]); /* May be postponed */
1.2 timbl 578: break;
579:
580: } /* end switch */
581:
582: if (HTML_dtd.tags[element_number].contents!= SGML_EMPTY) {
1.13 ! timbl 583: if (me->sp == me->stack) {
1.12 timbl 584: fprintf(stderr, "HTML: ****** Maximum nesting of %d exceded!\n",
585: MAX_NESTING);
586: return;
587: }
1.4 timbl 588: --(me->sp);
589: me->sp[0].style = me->new_style; /* Stack new style */
590: me->sp[0].tag_number = element_number;
1.10 timbl 591: }
1.1 timbl 592: }
1.10 timbl 593:
1.2 timbl 594:
1.1 timbl 595: /* End Element
1.2 timbl 596: ** -----------
1.1 timbl 597: **
1.2 timbl 598: */
599: /* When we end an element, the style must be returned to that
1.1 timbl 600: ** in effect before that element. Note that anchors (etc?)
601: ** don't have an associated style, so that we must scan down the
602: ** stack for an element with a defined style. (In fact, the styles
603: ** should be linked to the whole stack not just the top one.)
604: ** TBL 921119
1.6 timbl 605: **
606: ** We don't turn on "CAREFUL" check because the parser produces
607: ** (internal code errors apart) good nesting. The parser checks
608: ** incoming code errors, not this module.
1.1 timbl 609: */
1.4 timbl 610: PRIVATE void HTML_end_element ARGS2(HTStructured *, me, int , element_number)
1.1 timbl 611: {
1.2 timbl 612: #ifdef CAREFUL /* parser assumed to produce good nesting */
1.4 timbl 613: if (element_number != me->sp[0].tag_number) {
1.2 timbl 614: fprintf(stderr, "HTMLText: end of element %s when expecting end of %s\n",
615: HTML_dtd.tags[element_number].name,
1.4 timbl 616: HTML_dtd.tags[me->sp->tag_number].name);
1.6 timbl 617: /* panic */
1.1 timbl 618: }
1.2 timbl 619: #endif
620:
1.4 timbl 621: me->sp++; /* Pop state off stack */
1.2 timbl 622:
623: switch(element_number) {
624:
625: case HTML_A:
626: UPDATE_STYLE;
1.4 timbl 627: HText_endAnchor(me->text);
1.2 timbl 628: break;
629:
630: case HTML_TITLE:
1.4 timbl 631: HTChunkTerminate(&me->title);
632: HTAnchor_setTitle(me->node_anchor, me->title.data);
1.2 timbl 633: break;
634:
635: case HTML_LISTING: /* Litteral text */
636: case HTML_XMP:
637: case HTML_PLAINTEXT:
638: case HTML_PRE:
1.4 timbl 639: if (me->comment_start)
640: HText_appendText(me->text, me->comment_start);
1.2 timbl 641: /* Fall through */
642:
643: default:
644:
1.11 timbl 645: change_paragraph_style(me, me->sp->style); /* Often won't really change */
1.2 timbl 646: break;
647:
648: } /* switch */
1.1 timbl 649: }
650:
1.2 timbl 651:
652: /* Expanding entities
653: ** ------------------
654: */
655: /* (In fact, they all shrink!)
1.1 timbl 656: */
1.2 timbl 657:
1.4 timbl 658: PRIVATE void HTML_put_entity ARGS2(HTStructured *, me, int, entity_number)
1.1 timbl 659: {
1.4 timbl 660: HTML_put_string(me, ISO_Latin1[entity_number]); /* @@ Other representations */
1.1 timbl 661: }
662:
1.2 timbl 663:
664:
665: /* Free an HTML object
666: ** -------------------
667: **
1.4 timbl 668: ** If the document is empty, the text object will not yet exist.
669: So we could in fact abandon creating the document and return
670: an error code. In fact an empty document is an important type
671: of document, so we don't.
672: **
673: ** If non-interactive, everything is freed off. No: crashes -listrefs
1.2 timbl 674: ** Otherwise, the interactive object is left.
675: */
1.4 timbl 676: PUBLIC void HTML_free ARGS1(HTStructured *, me)
1.1 timbl 677: {
1.4 timbl 678: UPDATE_STYLE; /* Creates empty document here! */
679: if (me->comment_end)
680: HTML_put_string(me,me->comment_end);
681: HText_endAppend(me->text);
682:
683: if (me->target) {
684: (*me->targetClass.end_document)(me->target);
685: (*me->targetClass.free)(me->target);
686: /* HText_free(me->text); */ /* @@@@@@@@@@@@@@@ */
1.2 timbl 687: }
1.4 timbl 688: free(me);
1.1 timbl 689: }
690:
691:
1.4 timbl 692: PRIVATE void HTML_end_document ARGS1(HTStructured *, me)
1.1 timbl 693:
1.4 timbl 694: { /* Obsolete */
1.1 timbl 695: }
696:
1.2 timbl 697:
698: /* Get Styles from style sheet
699: ** ---------------------------
700: */
701: PRIVATE void get_styles NOARGS
1.1 timbl 702: {
1.2 timbl 703: got_styles = YES;
704:
705: default_style = HTStyleNamed(styleSheet, "Normal");
1.1 timbl 706:
1.2 timbl 707: styles[HTML_H1] = HTStyleNamed(styleSheet, "Heading1");
708: styles[HTML_H2] = HTStyleNamed(styleSheet, "Heading2");
709: styles[HTML_H3] = HTStyleNamed(styleSheet, "Heading3");
710: styles[HTML_H4] = HTStyleNamed(styleSheet, "Heading4");
711: styles[HTML_H5] = HTStyleNamed(styleSheet, "Heading5");
712: styles[HTML_H6] = HTStyleNamed(styleSheet, "Heading6");
713: styles[HTML_H7] = HTStyleNamed(styleSheet, "Heading7");
714:
715: styles[HTML_DL] = HTStyleNamed(styleSheet, "Glossary");
716: styles[HTML_UL] =
717: styles[HTML_OL] = HTStyleNamed(styleSheet, "List");
718: styles[HTML_MENU] = HTStyleNamed(styleSheet, "Menu");
719: styles[HTML_DIR] = HTStyleNamed(styleSheet, "Dir");
720: styles[HTML_DLC] = HTStyleNamed(styleSheet, "GlossaryCompact");
721: styles[HTML_ADDRESS]= HTStyleNamed(styleSheet, "Address");
722: styles[HTML_BLOCKQUOTE]= HTStyleNamed(styleSheet, "BlockQuote");
723: styles[HTML_PLAINTEXT] =
724: styles[HTML_XMP] = HTStyleNamed(styleSheet, "Example");
725: styles[HTML_PRE] = HTStyleNamed(styleSheet, "Preformatted");
726: styles[HTML_LISTING] = HTStyleNamed(styleSheet, "Listing");
727: }
728: /* P U B L I C
729: */
730:
731: /* Structured Object Class
732: ** -----------------------
733: */
734: PUBLIC CONST HTStructuredClass HTMLPresentation = /* As opposed to print etc */
735: {
736: "text/html",
737: HTML_free,
738: HTML_end_document,
739: HTML_put_character, HTML_put_string, HTML_write,
740: HTML_start_element, HTML_end_element,
741: HTML_put_entity
742: };
1.1 timbl 743:
1.4 timbl 744:
1.2 timbl 745: /* New Structured Text object
746: ** --------------------------
747: **
1.4 timbl 748: ** The strutcured stream can generate either presentation,
749: ** or plain text, or HTML.
1.1 timbl 750: */
1.4 timbl 751: PUBLIC HTStructured* HTML_new ARGS3(
1.2 timbl 752: HTParentAnchor *, anchor,
1.4 timbl 753: HTFormat, format_out,
1.2 timbl 754: HTStream*, stream)
1.1 timbl 755: {
756:
1.4 timbl 757: HTStructured * me;
758:
759: if (format_out != WWW_PLAINTEXT && format_out != WWW_PRESENT) {
1.6 timbl 760: HTStream * intermediate = HTStreamStack(WWW_HTML, format_out,
761: stream, anchor);
762: if (intermediate) return HTMLGenerator(intermediate);
1.4 timbl 763: fprintf(stderr, "** Internal error: can't parse HTML to %s\n",
764: HTAtom_name(format_out));
765: exit (-99);
766: }
767:
768: me = (HTStructured*) malloc(sizeof(*me));
769: if (me == NULL) outofmem(__FILE__, "HTML_new");
1.1 timbl 770:
771: if (!got_styles) get_styles();
772:
1.4 timbl 773: me->isa = &HTMLPresentation;
774: me->node_anchor = anchor;
775: me->title.size = 0;
776: me->title.growby = 128;
777: me->title.allocated = 0;
778: me->title.data = 0;
779: me->text = 0;
780: me->style_change = YES; /* Force check leading to text creation */
781: me->new_style = default_style;
782: me->old_style = 0;
783: me->sp = me->stack + MAX_NESTING - 1;
784: me->sp->tag_number = -1; /* INVALID */
785: me->sp->style = default_style; /* INVALID */
1.1 timbl 786:
1.4 timbl 787: me->comment_start = NULL;
788: me->comment_end = NULL;
789: me->target = stream;
790: if (stream) me->targetClass = *stream->isa; /* Copy pointers */
1.1 timbl 791:
1.4 timbl 792: return (HTStructured*) me;
1.1 timbl 793: }
794:
795:
1.2 timbl 796: /* HTConverter for HTML to plain text
797: ** ----------------------------------
1.1 timbl 798: **
1.2 timbl 799: ** This will convert from HTML to presentation or plain text.
1.1 timbl 800: */
1.2 timbl 801: PUBLIC HTStream* HTMLToPlain ARGS3(
802: HTPresentation *, pres,
803: HTParentAnchor *, anchor,
804: HTStream *, sink)
1.1 timbl 805: {
1.4 timbl 806: return SGML_new(&HTML_dtd, HTML_new(anchor, pres->rep_out, sink));
1.1 timbl 807: }
808:
809:
1.2 timbl 810: /* HTConverter for HTML to C code
811: ** ------------------------------
812: **
813: ** C copde is like plain text but all non-preformatted code
814: ** is commented out.
815: ** This will convert from HTML to presentation or plain text.
816: */
817: PUBLIC HTStream* HTMLToC ARGS3(
818: HTPresentation *, pres,
819: HTParentAnchor *, anchor,
820: HTStream *, sink)
1.1 timbl 821: {
1.4 timbl 822:
823: HTStructured * html;
824:
825: (*sink->isa->put_string)(sink, "/* "); /* Before even title */
826: html = HTML_new(anchor, WWW_PLAINTEXT, sink);
1.2 timbl 827: html->comment_start = "/* ";
828: html->comment_end = " */\n"; /* Must start in col 1 for cpp */
1.4 timbl 829: /* HTML_put_string(html,html->comment_start); */
1.2 timbl 830: return SGML_new(&HTML_dtd, html);
1.1 timbl 831: }
832:
833:
1.2 timbl 834: /* Presenter for HTML
835: ** ------------------
836: **
837: ** This will convert from HTML to presentation or plain text.
838: **
839: ** Override this if you have a windows version
1.1 timbl 840: */
1.2 timbl 841: #ifndef GUI
842: PUBLIC HTStream* HTMLPresent ARGS3(
843: HTPresentation *, pres,
844: HTParentAnchor *, anchor,
845: HTStream *, sink)
1.1 timbl 846: {
1.4 timbl 847: return SGML_new(&HTML_dtd, HTML_new(anchor, WWW_PRESENT, NULL));
1.1 timbl 848: }
1.2 timbl 849: #endif
1.1 timbl 850:
851:
1.2 timbl 852: /* Record error message as a hypertext object
853: ** ------------------------------------------
854: **
855: ** The error message should be marked as an error so that
856: ** it can be reloaded later.
857: ** This implementation just throws up an error message
858: ** and leaves the document unloaded.
1.9 timbl 859: ** A smarter implementation would load an error document,
860: ** marking at such so that it is retried on reload.
1.1 timbl 861: **
1.2 timbl 862: ** On entry,
863: ** sink is a stream to the output device if any
864: ** number is the HTTP error number
865: ** message is the human readable message.
1.9 timbl 866: **
867: ** On exit,
868: ** returns a negative number to indicate lack of success in the load.
1.1 timbl 869: */
1.2 timbl 870:
871: PUBLIC int HTLoadError ARGS3(
872: HTStream *, sink,
873: int, number,
874: CONST char *, message)
875: {
876: HTAlert(message); /* @@@@@@@@@@@@@@@@@@@ */
877: return -number;
878: }
879:
Webmaster