Annotation of libwww/Library/src/HTML.c, revision 1.62
1.39 frystyk 1: /* HTML.c
2: ** STRUCTURED STREAM TO RICH HYPERTEXT CONVERTER
3: **
1.43 frystyk 4: ** (c) COPYRIGHT MIT 1995.
1.39 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.62 ! frystyk 6: ** @(#) $Id: HTML.c,v 1.61 1996/04/12 17:47:44 frystyk Exp $
1.1 timbl 7: **
1.2 timbl 8: ** This generates of a hypertext object. It converts from the
9: ** structured stream interface fro HTMl events into the style-
1.47 frystyk 10: ** oriented iunterface of the HText interface. This module is
1.2 timbl 11: ** only used in clients and shouldnot be linked into servers.
1.1 timbl 12: **
1.6 timbl 13: ** Override this module if making a new GUI browser.
1.1 timbl 14: **
1.35 duns 15: ** HISTORY:
16: ** 8 Jul 94 FM Insulate free() from _free structure element.
17: **
1.1 timbl 18: */
1.16 timbl 19:
1.41 frystyk 20: /* Library include files */
1.60 frystyk 21: #include "sysdep.h"
1.41 frystyk 22: #include "HTUtils.h"
23: #include "HTString.h"
1.1 timbl 24: #include "HTAtom.h"
25: #include "HTChunk.h"
26: #include "HText.h"
27: #include "HTStyle.h"
1.3 timbl 28: #include "HTAlert.h"
1.4 timbl 29: #include "HTMLGen.h"
1.8 timbl 30: #include "HTParse.h"
1.41 frystyk 31: #include "HTML.h"
1.1 timbl 32:
33: extern HTStyleSheet * styleSheet; /* Application-wide */
34:
35: /* Module-wide style cache
36: */
37: PRIVATE int got_styles = 0;
1.16 timbl 38: PRIVATE HTStyle *styles[HTMLP_ELEMENTS];
1.2 timbl 39: PRIVATE HTStyle *default_style;
1.1 timbl 40:
1.62 ! frystyk 41: #define TAB '\0'
1.1 timbl 42:
43: /* HTML Object
44: ** -----------
45: */
1.2 timbl 46: #define MAX_NESTING 20 /* Should be checked by parser */
47:
48: typedef struct _stack_element {
49: HTStyle * style;
50: int tag_number;
51: } stack_element;
52:
53: struct _HTStructured {
1.60 frystyk 54: const HTStructuredClass * isa;
1.54 frystyk 55: HTRequest * request;
1.2 timbl 56: HTParentAnchor * node_anchor;
57: HText * text;
58:
59: HTStream* target; /* Output stream */
60: HTStreamClass targetClass; /* Output routines */
61:
1.56 frystyk 62: HTChunk * title; /* Grow by 128 */
1.2 timbl 63:
64: char * comment_start; /* for literate programming */
65: char * comment_end;
1.16 timbl 66:
1.60 frystyk 67: const SGML_dtd* dtd;
1.16 timbl 68:
1.2 timbl 69: HTTag * current_tag;
70: BOOL style_change;
71: HTStyle * new_style;
72: HTStyle * old_style;
73: BOOL in_word; /* Have just had a non-white char */
1.44 frystyk 74:
75: stack_element stack[MAX_NESTING];
76: stack_element *sp; /* Style stack pointer */
77: int overflow; /* Keep track of overflow nesting */
1.1 timbl 78: };
79:
1.2 timbl 80: struct _HTStream {
1.60 frystyk 81: const HTStreamClass * isa;
1.2 timbl 82: /* .... */
83: };
1.1 timbl 84:
85: /* Forward declarations of routines
86: */
1.52 frystyk 87: PRIVATE void get_styles (void);
1.1 timbl 88:
89:
1.52 frystyk 90: PRIVATE void actually_set_style (HTStructured * me);
91: PRIVATE void change_paragraph_style (HTStructured * me, HTStyle * style);
1.1 timbl 92:
93: /* Style buffering avoids dummy paragraph begin/ends.
94: */
1.4 timbl 95: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1 timbl 96:
97:
1.2 timbl 98: #ifdef OLD_CODE
1.1 timbl 99: /* The following accented characters are from peter Flynn, curia project */
100:
101: /* these ifdefs don't solve the problem of a simple terminal emulator
102: ** with a different character set to the client machine. But nothing does,
103: ** except looking at the TERM setting */
104:
1.2 timbl 105:
1.1 timbl 106: { "ocus" , "&" }, /* for CURIA */
107: #ifdef IBMPC
108: { "aacute" , "\240" }, /* For PC display */
109: { "eacute" , "\202" },
110: { "iacute" , "\241" },
111: { "oacute" , "\242" },
112: { "uacute" , "\243" },
113: { "Aacute" , "\101" },
114: { "Eacute" , "\220" },
115: { "Iacute" , "\111" },
116: { "Oacute" , "\117" },
117: { "Uacute" , "\125" },
118: #else
119: { "aacute" , "\341" }, /* Works for openwindows -- Peter Flynn */
120: { "eacute" , "\351" },
121: { "iacute" , "\355" },
122: { "oacute" , "\363" },
123: { "uacute" , "\372" },
124: { "Aacute" , "\301" },
125: { "Eacute" , "\310" },
126: { "Iacute" , "\315" },
127: { "Oacute" , "\323" },
128: { "Uacute" , "\332" },
129: #endif
130: { 0, 0 } /* Terminate list */
131: };
1.2 timbl 132: #endif
1.1 timbl 133:
134:
1.2 timbl 135: /* Entity values -- for ISO Latin 1 local representation
136: **
137: ** This MUST match exactly the table referred to in the DTD!
138: */
139: static char * ISO_Latin1[] = {
140: "\306", /* capital AE diphthong (ligature) */
141: "\301", /* capital A, acute accent */
142: "\302", /* capital A, circumflex accent */
143: "\300", /* capital A, grave accent */
144: "\305", /* capital A, ring */
145: "\303", /* capital A, tilde */
146: "\304", /* capital A, dieresis or umlaut mark */
147: "\307", /* capital C, cedilla */
148: "\320", /* capital Eth, Icelandic */
149: "\311", /* capital E, acute accent */
150: "\312", /* capital E, circumflex accent */
151: "\310", /* capital E, grave accent */
152: "\313", /* capital E, dieresis or umlaut mark */
153: "\315", /* capital I, acute accent */
154: "\316", /* capital I, circumflex accent */
155: "\314", /* capital I, grave accent */
156: "\317", /* capital I, dieresis or umlaut mark */
157: "\321", /* capital N, tilde */
158: "\323", /* capital O, acute accent */
159: "\324", /* capital O, circumflex accent */
160: "\322", /* capital O, grave accent */
161: "\330", /* capital O, slash */
162: "\325", /* capital O, tilde */
163: "\326", /* capital O, dieresis or umlaut mark */
164: "\336", /* capital THORN, Icelandic */
165: "\332", /* capital U, acute accent */
166: "\333", /* capital U, circumflex accent */
167: "\331", /* capital U, grave accent */
168: "\334", /* capital U, dieresis or umlaut mark */
169: "\335", /* capital Y, acute accent */
170: "\341", /* small a, acute accent */
171: "\342", /* small a, circumflex accent */
172: "\346", /* small ae diphthong (ligature) */
173: "\340", /* small a, grave accent */
174: "\046", /* ampersand */
175: "\345", /* small a, ring */
176: "\343", /* small a, tilde */
177: "\344", /* small a, dieresis or umlaut mark */
178: "\347", /* small c, cedilla */
179: "\351", /* small e, acute accent */
180: "\352", /* small e, circumflex accent */
181: "\350", /* small e, grave accent */
182: "\360", /* small eth, Icelandic */
183: "\353", /* small e, dieresis or umlaut mark */
184: "\076", /* greater than */
185: "\355", /* small i, acute accent */
186: "\356", /* small i, circumflex accent */
187: "\354", /* small i, grave accent */
188: "\357", /* small i, dieresis or umlaut mark */
189: "\074", /* less than */
1.62 ! frystyk 190: "\040", /* non-breaking space */
1.2 timbl 191: "\361", /* small n, tilde */
192: "\363", /* small o, acute accent */
193: "\364", /* small o, circumflex accent */
194: "\362", /* small o, grave accent */
195: "\370", /* small o, slash */
196: "\365", /* small o, tilde */
197: "\366", /* small o, dieresis or umlaut mark */
1.36 frystyk 198: "\042", /* double quote sign - June 94 */
1.2 timbl 199: "\337", /* small sharp s, German (sz ligature) */
200: "\376", /* small thorn, Icelandic */
201: "\372", /* small u, acute accent */
202: "\373", /* small u, circumflex accent */
203: "\371", /* small u, grave accent */
204: "\374", /* small u, dieresis or umlaut mark */
205: "\375", /* small y, acute accent */
206: "\377", /* small y, dieresis or umlaut mark */
1.1 timbl 207: };
208:
1.2 timbl 209:
210: /* Entity values -- for NeXT local representation
211: **
212: ** This MUST match exactly the table referred to in the DTD!
213: **
214: */
215: static char * NeXTCharacters[] = {
216: "\341", /* capital AE diphthong (ligature) */
217: "\202", /* capital A, acute accent */
218: "\203", /* capital A, circumflex accent */
219: "\201", /* capital A, grave accent */
220: "\206", /* capital A, ring */
221: "\204", /* capital A, tilde */
222: "\205", /* capital A, dieresis or umlaut mark */
223: "\207", /* capital C, cedilla */
224: "\220", /* capital Eth, Icelandic */
225: "\211", /* capital E, acute accent */
226: "\212", /* capital E, circumflex accent */
227: "\210", /* capital E, grave accent */
228: "\213", /* capital E, dieresis or umlaut mark */
229: "\215", /* capital I, acute accent */
230: "\216", /* capital I, circumflex accent these are */
231: "\214", /* capital I, grave accent ISO -100 hex */
232: "\217", /* capital I, dieresis or umlaut mark */
233: "\221", /* capital N, tilde */
234: "\223", /* capital O, acute accent */
235: "\224", /* capital O, circumflex accent */
236: "\222", /* capital O, grave accent */
237: "\351", /* capital O, slash 'cept this */
238: "\225", /* capital O, tilde */
239: "\226", /* capital O, dieresis or umlaut mark */
240: "\234", /* capital THORN, Icelandic */
241: "\230", /* capital U, acute accent */
242: "\231", /* capital U, circumflex accent */
243: "\227", /* capital U, grave accent */
244: "\232", /* capital U, dieresis or umlaut mark */
245: "\233", /* capital Y, acute accent */
246: "\326", /* small a, acute accent */
247: "\327", /* small a, circumflex accent */
248: "\361", /* small ae diphthong (ligature) */
249: "\325", /* small a, grave accent */
250: "\046", /* ampersand */
251: "\332", /* small a, ring */
252: "\330", /* small a, tilde */
253: "\331", /* small a, dieresis or umlaut mark */
254: "\333", /* small c, cedilla */
255: "\335", /* small e, acute accent */
256: "\336", /* small e, circumflex accent */
257: "\334", /* small e, grave accent */
258: "\346", /* small eth, Icelandic */
259: "\337", /* small e, dieresis or umlaut mark */
260: "\076", /* greater than */
261: "\342", /* small i, acute accent */
262: "\344", /* small i, circumflex accent */
263: "\340", /* small i, grave accent */
264: "\345", /* small i, dieresis or umlaut mark */
265: "\074", /* less than */
1.62 ! frystyk 266: "\040", /* non-breaking space */
1.2 timbl 267: "\347", /* small n, tilde */
268: "\355", /* small o, acute accent */
269: "\356", /* small o, circumflex accent */
270: "\354", /* small o, grave accent */
271: "\371", /* small o, slash */
272: "\357", /* small o, tilde */
273: "\360", /* small o, dieresis or umlaut mark */
1.36 frystyk 274: "\042", /* double quote sign - June 94 */
1.2 timbl 275: "\373", /* small sharp s, German (sz ligature) */
276: "\374", /* small thorn, Icelandic */
277: "\363", /* small u, acute accent */
278: "\364", /* small u, circumflex accent */
279: "\362", /* small u, grave accent */
280: "\366", /* small u, dieresis or umlaut mark */
281: "\367", /* small y, acute accent */
282: "\375", /* small y, dieresis or umlaut mark */
1.1 timbl 283: };
284:
1.2 timbl 285: /* Entity values -- for IBM/PC Code Page 850 (International)
286: **
287: ** This MUST match exactly the table referred to in the DTD!
288: **
289: */
290: /* @@@@@@@@@@@@@@@@@ TBD */
291:
292:
293:
294: /* Set character set
295: ** ----------------
296: */
297:
298: PRIVATE char** p_entity_values = ISO_Latin1; /* Pointer to translation */
1.1 timbl 299:
1.53 frystyk 300: PUBLIC void HTMLUseCharacterSet (HTMLCharacterSet i)
1.2 timbl 301: {
302: p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
303: : ISO_Latin1;
304: }
1.1 timbl 305:
306:
307: /* Flattening the style structure
308: ** ------------------------------
309: **
310: On the NeXT, and on any read-only browser, it is simpler for the text to have
311: a sequence of styles, rather than a nested tree of styles. In this
312: case we have to flatten the structure as it arrives from SGML tags into
313: a sequence of styles.
314: */
315:
316: /* If style really needs to be set, call this
317: */
1.53 frystyk 318: PRIVATE void actually_set_style (HTStructured * me)
1.1 timbl 319: {
1.4 timbl 320: if (!me->text) { /* First time through */
1.54 frystyk 321: me->text = HText_new2(me->request, me->node_anchor, me->target);
1.4 timbl 322: HText_beginAppend(me->text);
323: HText_setStyle(me->text, me->new_style);
324: me->in_word = NO;
1.1 timbl 325: } else {
1.4 timbl 326: HText_setStyle(me->text, me->new_style);
1.1 timbl 327: }
1.4 timbl 328: me->old_style = me->new_style;
329: me->style_change = NO;
1.1 timbl 330: }
331:
332: /* If you THINK you need to change style, call this
333: */
334:
1.53 frystyk 335: PRIVATE void change_paragraph_style (HTStructured * me, HTStyle *style)
1.1 timbl 336: {
1.4 timbl 337: if (me->new_style!=style) {
338: me->style_change = YES;
339: me->new_style = style;
1.1 timbl 340: }
1.11 timbl 341: me->in_word = NO;
1.1 timbl 342: }
343:
1.2 timbl 344: /*_________________________________________________________________________
345: **
346: ** A C T I O N R O U T I N E S
347: */
348:
349: /* Character handling
350: ** ------------------
1.1 timbl 351: */
1.53 frystyk 352: PRIVATE int HTML_put_character (HTStructured * me, char c)
1.1 timbl 353: {
1.2 timbl 354:
1.4 timbl 355: switch (me->sp[0].tag_number) {
1.2 timbl 356: case HTML_COMMENT:
357: break; /* Do Nothing */
358:
359: case HTML_TITLE:
1.56 frystyk 360: HTChunk_putb(me->title, &c, 1);
1.2 timbl 361: break;
362:
363:
364: case HTML_LISTING: /* Litteral text */
365: case HTML_XMP:
366: case HTML_PLAINTEXT:
367: case HTML_PRE:
368: /* We guarrantee that the style is up-to-date in begin_litteral
369: */
1.4 timbl 370: HText_appendCharacter(me->text, c);
1.2 timbl 371: break;
372:
373: default: /* Free format text */
1.4 timbl 374: if (me->style_change) {
1.42 frystyk 375: if ((c=='\n') || (c==' ')) return HT_OK; /* Ignore it */
1.2 timbl 376: UPDATE_STYLE;
377: }
1.62 ! frystyk 378: if (c == TAB)
! 379: HText_appendCharacter(me->text, '\t');
! 380: else if (WHITE(c)) {
1.4 timbl 381: if (me->in_word) {
382: HText_appendCharacter(me->text, ' ');
383: me->in_word = NO;
1.2 timbl 384: }
385: } else {
1.4 timbl 386: HText_appendCharacter(me->text, c);
387: me->in_word = YES;
1.2 timbl 388: }
389: } /* end switch */
1.42 frystyk 390: return HT_OK;
1.1 timbl 391: }
392:
1.2 timbl 393:
394:
395: /* String handling
396: ** ---------------
397: **
398: ** This is written separately from put_character becuase the loop can
1.11 timbl 399: ** in some cases be promoted to a higher function call level for speed.
1.2 timbl 400: */
1.60 frystyk 401: PRIVATE int HTML_put_string (HTStructured * me, const char* s)
1.1 timbl 402: {
1.2 timbl 403:
1.4 timbl 404: switch (me->sp[0].tag_number) {
1.2 timbl 405: case HTML_COMMENT:
406: break; /* Do Nothing */
407:
408: case HTML_TITLE:
1.56 frystyk 409: HTChunk_putb(me->title, s, strlen(s));
1.2 timbl 410: break;
411:
412:
413: case HTML_LISTING: /* Litteral text */
414: case HTML_XMP:
415: case HTML_PLAINTEXT:
416: case HTML_PRE:
417:
418: /* We guarrantee that the style is up-to-date in begin_litteral
419: */
1.4 timbl 420: HText_appendText(me->text, s);
1.2 timbl 421: break;
422:
423: default: /* Free format text */
424: {
1.60 frystyk 425: const char *p = s;
1.4 timbl 426: if (me->style_change) {
1.2 timbl 427: for (; *p && ((*p=='\n') || (*p==' ')); p++) ; /* Ignore leaders */
1.42 frystyk 428: if (!*p) return HT_OK;
1.2 timbl 429: UPDATE_STYLE;
430: }
431: for(; *p; p++) {
1.4 timbl 432: if (me->style_change) {
1.2 timbl 433: if ((*p=='\n') || (*p==' ')) continue; /* Ignore it */
434: UPDATE_STYLE;
435: }
436: if (*p=='\n') {
1.4 timbl 437: if (me->in_word) {
438: HText_appendCharacter(me->text, ' ');
439: me->in_word = NO;
1.2 timbl 440: }
441: } else {
1.4 timbl 442: HText_appendCharacter(me->text, *p);
443: me->in_word = YES;
1.2 timbl 444: }
445: } /* for */
446: }
447: } /* end switch */
1.42 frystyk 448: return HT_OK;
1.1 timbl 449: }
450:
451:
1.2 timbl 452: /* Buffer write
1.3 timbl 453: ** ------------
1.1 timbl 454: */
1.60 frystyk 455: PRIVATE int HTML_write (HTStructured * me, const char* s, int l)
1.1 timbl 456: {
1.38 frystyk 457: while (l-- > 0)
458: HTML_put_character(me, *s++);
1.42 frystyk 459: return HT_OK;
1.1 timbl 460: }
1.2 timbl 461:
462:
463: /* Start Element
464: ** -------------
465: */
1.53 frystyk 466: PRIVATE void HTML_start_element (
467: HTStructured * me,
468: int element_number,
1.60 frystyk 469: const BOOL* present,
470: const char ** value)
1.2 timbl 471: {
472: switch (element_number) {
473: case HTML_A:
474: {
1.8 timbl 475: HTChildAnchor * source;
1.9 timbl 476: char * href = NULL;
1.42 frystyk 477: if (present[HTML_A_HREF])
1.9 timbl 478: StrAllocCopy(href, value[HTML_A_HREF]);
1.8 timbl 479: source = HTAnchor_findChildAndLink(
1.4 timbl 480: me->node_anchor, /* parent */
1.2 timbl 481: present[HTML_A_NAME] ? value[HTML_A_NAME] : 0, /* Tag */
1.9 timbl 482: present[HTML_A_HREF] ? href : 0, /* Addresss */
1.16 timbl 483: present[HTML_A_REL] && value[HTML_A_REL] ?
1.54 frystyk 484: (HTLinkType) HTAtom_for(value[HTML_A_REL])
485: : 0);
1.2 timbl 486:
487: if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
488: HTParentAnchor * dest =
489: HTAnchor_parent(
490: HTAnchor_followMainLink((HTAnchor*)source)
491: );
492: if (!HTAnchor_title(dest))
493: HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
494: }
495: UPDATE_STYLE;
1.4 timbl 496: HText_beginAnchor(me->text, source);
1.58 frystyk 497: HT_FREE(href); /* Leak fix Henrik 17/02-94 */
1.2 timbl 498: }
499: break;
500:
501: case HTML_TITLE:
1.56 frystyk 502: HTChunk_clear(me->title);
1.2 timbl 503: break;
504:
505: case HTML_NEXTID:
506: /* if (present[NEXTID_N] && value[NEXTID_N])
1.4 timbl 507: HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2 timbl 508: break;
509:
510: case HTML_ISINDEX:
1.4 timbl 511: HTAnchor_setIndex(me->node_anchor);
1.2 timbl 512: break;
513:
1.15 timbl 514: case HTML_BR:
515: UPDATE_STYLE;
516: HText_appendCharacter(me->text, '\n');
517: me->in_word = NO;
518: break;
519:
520: case HTML_HR:
521: UPDATE_STYLE;
522: HText_appendCharacter(me->text, '\n');
1.16 timbl 523: HText_appendText(me->text, "___________________________________");
1.15 timbl 524: HText_appendCharacter(me->text, '\n');
525: me->in_word = NO;
526: break;
527:
1.2 timbl 528: case HTML_P:
529: UPDATE_STYLE;
1.4 timbl 530: HText_appendParagraph(me->text);
531: me->in_word = NO;
1.2 timbl 532: break;
533:
534: case HTML_DL:
1.11 timbl 535: change_paragraph_style(me, present && present[DL_COMPACT]
1.16 timbl 536: ? styles[HTML_DL]
1.2 timbl 537: : styles[HTML_DL]);
538: break;
539:
540: case HTML_DT:
1.4 timbl 541: if (!me->style_change) {
542: HText_appendParagraph(me->text);
543: me->in_word = NO;
1.2 timbl 544: }
545: break;
546:
547: case HTML_DD:
548: UPDATE_STYLE;
1.62 ! frystyk 549: HTML_put_character(me, TAB); /* Just tab out one stop */
1.4 timbl 550: me->in_word = NO;
551: break;
1.2 timbl 552:
553: case HTML_UL:
554: case HTML_OL:
555: case HTML_MENU:
556: case HTML_DIR:
1.11 timbl 557: change_paragraph_style(me, styles[element_number]);
1.2 timbl 558: break;
559:
560: case HTML_LI:
561: UPDATE_STYLE;
1.7 timbl 562: if (me->sp[0].tag_number != HTML_DIR)
1.4 timbl 563: HText_appendParagraph(me->text);
1.2 timbl 564: else
1.62 ! frystyk 565: HText_appendCharacter(me->text, TAB);
1.4 timbl 566: me->in_word = NO;
1.2 timbl 567: break;
568:
569: case HTML_LISTING: /* Litteral text */
570: case HTML_XMP:
571: case HTML_PLAINTEXT:
572: case HTML_PRE:
1.11 timbl 573: change_paragraph_style(me, styles[element_number]);
1.2 timbl 574: UPDATE_STYLE;
1.4 timbl 575: if (me->comment_end)
576: HText_appendText(me->text, me->comment_end);
1.2 timbl 577: break;
1.11 timbl 578:
1.23 frystyk 579: case HTML_IMG: /* Images */
580: {
581: HTChildAnchor *source;
582: char *src = NULL;
1.49 frystyk 583: if (present[HTML_IMG_SRC])
1.23 frystyk 584: StrAllocCopy(src, value[HTML_IMG_SRC]);
585: source = HTAnchor_findChildAndLink(
586: me->node_anchor, /* parent */
587: 0, /* Tag */
588: src ? src : 0, /* Addresss */
589: 0);
590: UPDATE_STYLE;
591: HText_appendImage(me->text, source,
1.24 frystyk 592: present[HTML_IMG_ALT] ? value[HTML_IMG_ALT] : NULL,
593: present[HTML_IMG_ALIGN] ? value[HTML_IMG_ALIGN] : NULL,
594: present[HTML_IMG_ISMAP] ? YES : NO);
1.58 frystyk 595: HT_FREE(src);
1.24 frystyk 596: }
597: break;
598:
599: case HTML_HTML: /* Ignore these altogether */
600: case HTML_HEAD:
601: case HTML_BODY:
1.62 ! frystyk 602: break;
1.24 frystyk 603:
1.10 timbl 604: case HTML_TT: /* Physical character highlighting */
605: case HTML_B: /* Currently ignored */
606: case HTML_I:
607: case HTML_U:
1.62 ! frystyk 608: UPDATE_STYLE;
! 609: HText_appendCharacter(me->text, '_');
! 610: me->in_word = NO;
! 611: break;
1.10 timbl 612:
613: case HTML_EM: /* Logical character highlighting */
614: case HTML_STRONG: /* Currently ignored */
615: case HTML_CODE:
616: case HTML_SAMP:
617: case HTML_KBD:
618: case HTML_VAR:
619: case HTML_DFN:
620: case HTML_CITE:
621: break;
622:
1.11 timbl 623: case HTML_H1: /* paragraph styles */
624: case HTML_H2:
625: case HTML_H3:
626: case HTML_H4:
627: case HTML_H5:
628: case HTML_H6:
629: case HTML_H7:
630: case HTML_ADDRESS:
631: case HTML_BLOCKQUOTE:
632: change_paragraph_style(me, styles[element_number]); /* May be postponed */
1.2 timbl 633: break;
634:
635: } /* end switch */
636:
1.16 timbl 637: if (me->dtd->tags[element_number].contents!= SGML_EMPTY) {
1.13 timbl 638: if (me->sp == me->stack) {
1.44 frystyk 639: if (SGML_TRACE)
1.59 eric 640: HTTrace("HTML........ Maximum nesting of %d exceded!\n",
1.44 frystyk 641: MAX_NESTING);
642: me->overflow++;
1.12 timbl 643: return;
644: }
1.4 timbl 645: --(me->sp);
646: me->sp[0].style = me->new_style; /* Stack new style */
647: me->sp[0].tag_number = element_number;
1.10 timbl 648: }
1.1 timbl 649: }
1.10 timbl 650:
1.2 timbl 651:
1.1 timbl 652: /* End Element
1.2 timbl 653: ** -----------
1.1 timbl 654: **
1.2 timbl 655: */
656: /* When we end an element, the style must be returned to that
1.1 timbl 657: ** in effect before that element. Note that anchors (etc?)
658: ** don't have an associated style, so that we must scan down the
659: ** stack for an element with a defined style. (In fact, the styles
660: ** should be linked to the whole stack not just the top one.)
661: ** TBL 921119
1.6 timbl 662: **
663: ** We don't turn on "CAREFUL" check because the parser produces
664: ** (internal code errors apart) good nesting. The parser checks
665: ** incoming code errors, not this module.
1.1 timbl 666: */
1.53 frystyk 667: PRIVATE void HTML_end_element (HTStructured * me, int element_number)
1.1 timbl 668: {
1.2 timbl 669: #ifdef CAREFUL /* parser assumed to produce good nesting */
1.4 timbl 670: if (element_number != me->sp[0].tag_number) {
1.59 eric 671: HTTrace("HTMLText: end of element %s when expecting end of %s\n",
1.16 timbl 672: me->dtd->tags[element_number].name,
673: me->dtd->tags[me->sp->tag_number].name);
1.6 timbl 674: /* panic */
1.1 timbl 675: }
1.2 timbl 676: #endif
1.44 frystyk 677:
678: /* HFN, If overflow of nestings, we need to get back to reality */
679: if (me->overflow > 0) {
680: me->overflow--;
681: return;
682: }
683:
1.4 timbl 684: me->sp++; /* Pop state off stack */
1.44 frystyk 685:
1.2 timbl 686: switch(element_number) {
687:
688: case HTML_A:
689: UPDATE_STYLE;
1.4 timbl 690: HText_endAnchor(me->text);
1.2 timbl 691: break;
692:
693: case HTML_TITLE:
1.56 frystyk 694: HTAnchor_setTitle(me->node_anchor, HTChunk_data(me->title));
1.2 timbl 695: break;
696:
1.62 ! frystyk 697: case HTML_TT: /* Physical character highlighting */
! 698: case HTML_B: /* Currently ignored */
! 699: case HTML_I:
! 700: case HTML_U:
! 701: UPDATE_STYLE;
! 702: HText_appendCharacter(me->text, '_');
! 703: break;
! 704:
1.2 timbl 705: case HTML_LISTING: /* Litteral text */
706: case HTML_XMP:
707: case HTML_PLAINTEXT:
708: case HTML_PRE:
1.4 timbl 709: if (me->comment_start)
710: HText_appendText(me->text, me->comment_start);
1.2 timbl 711: /* Fall through */
712:
713: default:
1.44 frystyk 714:
715: /* Often won't really change */
716: change_paragraph_style(me, me->sp->style);
1.2 timbl 717: break;
718:
719: } /* switch */
1.1 timbl 720: }
721:
1.2 timbl 722:
723: /* Expanding entities
724: ** ------------------
725: */
726: /* (In fact, they all shrink!)
1.1 timbl 727: */
1.2 timbl 728:
1.53 frystyk 729: PRIVATE void HTML_put_entity (HTStructured * me, int entity_number)
1.1 timbl 730: {
1.4 timbl 731: HTML_put_string(me, ISO_Latin1[entity_number]); /* @@ Other representations */
1.1 timbl 732: }
1.2 timbl 733:
1.42 frystyk 734: /* Flush an HTML object
735: ** --------------------
736: */
1.53 frystyk 737: PUBLIC int HTML_flush (HTStructured * me)
1.42 frystyk 738: {
739: UPDATE_STYLE; /* Creates empty document here! */
1.57 frystyk 740: if (me->comment_end) HTML_put_string(me,me->comment_end);
741: return me->target ? (*me->targetClass.flush)(me->target) : HT_OK;
1.42 frystyk 742: }
1.2 timbl 743:
744: /* Free an HTML object
745: ** -------------------
746: **
1.4 timbl 747: ** If the document is empty, the text object will not yet exist.
748: So we could in fact abandon creating the document and return
749: an error code. In fact an empty document is an important type
750: of document, so we don't.
751: **
752: ** If non-interactive, everything is freed off. No: crashes -listrefs
1.2 timbl 753: ** Otherwise, the interactive object is left.
754: */
1.53 frystyk 755: PUBLIC int HTML_free (HTStructured * me)
1.1 timbl 756: {
1.4 timbl 757: UPDATE_STYLE; /* Creates empty document here! */
758: if (me->comment_end)
759: HTML_put_string(me,me->comment_end);
760: HText_endAppend(me->text);
761:
762: if (me->target) {
1.35 duns 763: (*me->targetClass._free)(me->target);
1.2 timbl 764: }
1.56 frystyk 765: HTChunk_delete(me->title);
1.58 frystyk 766: HT_FREE(me);
1.42 frystyk 767: return HT_OK;
1.1 timbl 768: }
769:
770:
1.53 frystyk 771: PRIVATE int HTML_abort (HTStructured * me, HTList * e)
1.1 timbl 772:
1.14 timbl 773: {
774: if (me->target) {
775: (*me->targetClass.abort)(me->target, e);
776: }
1.56 frystyk 777: HTChunk_delete(me->title);
1.58 frystyk 778: HT_FREE(me);
1.42 frystyk 779: return HT_ERROR;
1.1 timbl 780: }
781:
1.2 timbl 782:
783: /* Get Styles from style sheet
784: ** ---------------------------
785: */
1.53 frystyk 786: PRIVATE void get_styles (void)
1.1 timbl 787: {
1.2 timbl 788: got_styles = YES;
789:
790: default_style = HTStyleNamed(styleSheet, "Normal");
1.1 timbl 791:
1.2 timbl 792: styles[HTML_H1] = HTStyleNamed(styleSheet, "Heading1");
793: styles[HTML_H2] = HTStyleNamed(styleSheet, "Heading2");
794: styles[HTML_H3] = HTStyleNamed(styleSheet, "Heading3");
795: styles[HTML_H4] = HTStyleNamed(styleSheet, "Heading4");
796: styles[HTML_H5] = HTStyleNamed(styleSheet, "Heading5");
797: styles[HTML_H6] = HTStyleNamed(styleSheet, "Heading6");
798: styles[HTML_H7] = HTStyleNamed(styleSheet, "Heading7");
799:
800: styles[HTML_DL] = HTStyleNamed(styleSheet, "Glossary");
801: styles[HTML_UL] =
802: styles[HTML_OL] = HTStyleNamed(styleSheet, "List");
803: styles[HTML_MENU] = HTStyleNamed(styleSheet, "Menu");
804: styles[HTML_DIR] = HTStyleNamed(styleSheet, "Dir");
1.16 timbl 805: /* styles[HTML_DLC] = HTStyleNamed(styleSheet, "GlossaryCompact"); */
1.2 timbl 806: styles[HTML_ADDRESS]= HTStyleNamed(styleSheet, "Address");
807: styles[HTML_BLOCKQUOTE]= HTStyleNamed(styleSheet, "BlockQuote");
808: styles[HTML_PLAINTEXT] =
809: styles[HTML_XMP] = HTStyleNamed(styleSheet, "Example");
810: styles[HTML_PRE] = HTStyleNamed(styleSheet, "Preformatted");
811: styles[HTML_LISTING] = HTStyleNamed(styleSheet, "Listing");
812: }
813: /* P U B L I C
814: */
815:
816: /* Structured Object Class
817: ** -----------------------
818: */
1.60 frystyk 819: PRIVATE const HTStructuredClass HTMLPresentation = /* As opposed to print etc */
1.2 timbl 820: {
821: "text/html",
1.42 frystyk 822: HTML_flush,
1.2 timbl 823: HTML_free,
1.14 timbl 824: HTML_abort,
1.2 timbl 825: HTML_put_character, HTML_put_string, HTML_write,
826: HTML_start_element, HTML_end_element,
827: HTML_put_entity
828: };
1.1 timbl 829:
1.4 timbl 830:
1.2 timbl 831: /* New Structured Text object
832: ** --------------------------
833: **
1.16 timbl 834: ** The structured stream can generate either presentation,
1.4 timbl 835: ** or plain text, or HTML.
1.1 timbl 836: */
1.53 frystyk 837: PRIVATE HTStructured* HTML_new (HTRequest * request,
838: void * param,
839: HTFormat input_format,
840: HTFormat output_format,
841: HTStream * output_stream)
1.1 timbl 842: {
843:
1.4 timbl 844: HTStructured * me;
845:
1.47 frystyk 846: #if 0
1.16 timbl 847: if (output_format != WWW_PLAINTEXT
848: && output_format != WWW_PRESENT
849: && output_format != HTAtom_for("text/x-c")) {
1.37 frystyk 850: HTStream * intermediate = HTStreamStack(WWW_HTML, output_format,
851: output_stream, request, NO);
1.6 timbl 852: if (intermediate) return HTMLGenerator(intermediate);
1.44 frystyk 853: if (SGML_TRACE)
1.59 eric 854: HTTrace("HTML........ Can't parse HTML to %s\n",
1.44 frystyk 855: HTAtom_name(output_format));
1.4 timbl 856: exit (-99);
857: }
1.47 frystyk 858: #endif
1.4 timbl 859:
1.58 frystyk 860: if ((me = (HTStructured *) HT_CALLOC(1, sizeof(*me))) == NULL)
861: HT_OUTOFMEM("HTML_new");
1.1 timbl 862:
863: if (!got_styles) get_styles();
864:
1.4 timbl 865: me->isa = &HTMLPresentation;
1.47 frystyk 866: me->dtd = &HTMLP_dtd;
1.54 frystyk 867: me->request = request;
1.48 frystyk 868: me->node_anchor = HTRequest_anchor(request);
1.56 frystyk 869: me->title = HTChunk_new(128);
1.4 timbl 870: me->text = 0;
871: me->style_change = YES; /* Force check leading to text creation */
872: me->new_style = default_style;
873: me->old_style = 0;
874: me->sp = me->stack + MAX_NESTING - 1;
875: me->sp->tag_number = -1; /* INVALID */
876: me->sp->style = default_style; /* INVALID */
1.1 timbl 877:
1.4 timbl 878: me->comment_start = NULL;
879: me->comment_end = NULL;
1.16 timbl 880: me->target = output_stream;
881: if (output_stream) me->targetClass = *output_stream->isa; /* Copy pointers */
1.1 timbl 882:
1.4 timbl 883: return (HTStructured*) me;
1.1 timbl 884: }
885:
886:
1.2 timbl 887: /* HTConverter for HTML to plain text
888: ** ----------------------------------
1.1 timbl 889: **
1.2 timbl 890: ** This will convert from HTML to presentation or plain text.
1.1 timbl 891: */
1.53 frystyk 892: PUBLIC HTStream* HTMLToPlain (
893: HTRequest * request,
894: void * param,
895: HTFormat input_format,
896: HTFormat output_format,
897: HTStream * output_stream)
1.1 timbl 898: {
1.47 frystyk 899: return SGML_new(&HTMLP_dtd, HTML_new(
1.16 timbl 900: request, NULL, input_format, output_format, output_stream));
1.1 timbl 901: }
902:
903:
1.2 timbl 904: /* HTConverter for HTML to C code
905: ** ------------------------------
906: **
1.36 frystyk 907: ** C code is like plain text but all non-preformatted code
1.2 timbl 908: ** is commented out.
909: ** This will convert from HTML to presentation or plain text.
910: */
1.53 frystyk 911: PUBLIC HTStream* HTMLToC (
912: HTRequest * request,
913: void * param,
914: HTFormat input_format,
915: HTFormat output_format,
916: HTStream * output_stream)
1.1 timbl 917: {
1.4 timbl 918:
919: HTStructured * html;
920:
1.36 frystyk 921: (*output_stream->isa->put_string)(output_stream, "/* "); /* Before title */
1.16 timbl 922: html = HTML_new(request, NULL, input_format, output_format, output_stream);
1.45 frystyk 923: html->comment_start = "\n/* ";
1.47 frystyk 924: html->dtd = &HTMLP_dtd;
1.2 timbl 925: html->comment_end = " */\n"; /* Must start in col 1 for cpp */
1.47 frystyk 926: return SGML_new(&HTMLP_dtd, html);
1.1 timbl 927: }
928:
929:
1.2 timbl 930: /* Presenter for HTML
931: ** ------------------
932: **
933: ** This will convert from HTML to presentation or plain text.
934: **
935: ** Override this if you have a windows version
1.1 timbl 936: */
1.2 timbl 937: #ifndef GUI
1.53 frystyk 938: PUBLIC HTStream* HTMLPresent (
939: HTRequest * request,
940: void * param,
941: HTFormat input_format,
942: HTFormat output_format,
943: HTStream * output_stream)
1.1 timbl 944: {
1.47 frystyk 945: return SGML_new(&HTMLP_dtd, HTML_new(
1.16 timbl 946: request, NULL, input_format, output_format, output_stream));
1.1 timbl 947: }
1.2 timbl 948: #endif
1.29 frystyk 949:
Webmaster