Annotation of libwww/Library/src/HTML.c, revision 1.61
1.39 frystyk 1: /* HTML.c
2: ** STRUCTURED STREAM TO RICH HYPERTEXT CONVERTER
3: **
1.43 frystyk 4: ** (c) COPYRIGHT MIT 1995.
1.39 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.61 ! frystyk 6: ** @(#) $Id: Date Author State $
1.1 timbl 7: **
1.2 timbl 8: ** This generates of a hypertext object. It converts from the
9: ** structured stream interface fro HTMl events into the style-
1.47 frystyk 10: ** oriented iunterface of the HText interface. This module is
1.2 timbl 11: ** only used in clients and shouldnot be linked into servers.
1.1 timbl 12: **
1.6 timbl 13: ** Override this module if making a new GUI browser.
1.1 timbl 14: **
1.35 duns 15: ** HISTORY:
16: ** 8 Jul 94 FM Insulate free() from _free structure element.
17: **
1.1 timbl 18: */
1.16 timbl 19:
1.41 frystyk 20: /* Library include files */
1.60 frystyk 21: #include "sysdep.h"
1.41 frystyk 22: #include "HTUtils.h"
23: #include "HTString.h"
1.1 timbl 24: #include "HTAtom.h"
25: #include "HTChunk.h"
26: #include "HText.h"
27: #include "HTStyle.h"
1.3 timbl 28: #include "HTAlert.h"
1.4 timbl 29: #include "HTMLGen.h"
1.8 timbl 30: #include "HTParse.h"
1.41 frystyk 31: #include "HTML.h"
1.1 timbl 32:
33: extern HTStyleSheet * styleSheet; /* Application-wide */
34:
35: /* Module-wide style cache
36: */
37: PRIVATE int got_styles = 0;
1.16 timbl 38: PRIVATE HTStyle *styles[HTMLP_ELEMENTS];
1.2 timbl 39: PRIVATE HTStyle *default_style;
1.1 timbl 40:
41:
42: /* HTML Object
43: ** -----------
44: */
1.2 timbl 45: #define MAX_NESTING 20 /* Should be checked by parser */
46:
47: typedef struct _stack_element {
48: HTStyle * style;
49: int tag_number;
50: } stack_element;
51:
52: struct _HTStructured {
1.60 frystyk 53: const HTStructuredClass * isa;
1.54 frystyk 54: HTRequest * request;
1.2 timbl 55: HTParentAnchor * node_anchor;
56: HText * text;
57:
58: HTStream* target; /* Output stream */
59: HTStreamClass targetClass; /* Output routines */
60:
1.56 frystyk 61: HTChunk * title; /* Grow by 128 */
1.2 timbl 62:
63: char * comment_start; /* for literate programming */
64: char * comment_end;
1.16 timbl 65:
1.60 frystyk 66: const SGML_dtd* dtd;
1.16 timbl 67:
1.2 timbl 68: HTTag * current_tag;
69: BOOL style_change;
70: HTStyle * new_style;
71: HTStyle * old_style;
72: BOOL in_word; /* Have just had a non-white char */
1.44 frystyk 73:
74: stack_element stack[MAX_NESTING];
75: stack_element *sp; /* Style stack pointer */
76: int overflow; /* Keep track of overflow nesting */
1.1 timbl 77: };
78:
1.2 timbl 79: struct _HTStream {
1.60 frystyk 80: const HTStreamClass * isa;
1.2 timbl 81: /* .... */
82: };
1.1 timbl 83:
84: /* Forward declarations of routines
85: */
1.52 frystyk 86: PRIVATE void get_styles (void);
1.1 timbl 87:
88:
1.52 frystyk 89: PRIVATE void actually_set_style (HTStructured * me);
90: PRIVATE void change_paragraph_style (HTStructured * me, HTStyle * style);
1.1 timbl 91:
92: /* Style buffering avoids dummy paragraph begin/ends.
93: */
1.4 timbl 94: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1 timbl 95:
96:
1.2 timbl 97: #ifdef OLD_CODE
1.1 timbl 98: /* The following accented characters are from peter Flynn, curia project */
99:
100: /* these ifdefs don't solve the problem of a simple terminal emulator
101: ** with a different character set to the client machine. But nothing does,
102: ** except looking at the TERM setting */
103:
1.2 timbl 104:
1.1 timbl 105: { "ocus" , "&" }, /* for CURIA */
106: #ifdef IBMPC
107: { "aacute" , "\240" }, /* For PC display */
108: { "eacute" , "\202" },
109: { "iacute" , "\241" },
110: { "oacute" , "\242" },
111: { "uacute" , "\243" },
112: { "Aacute" , "\101" },
113: { "Eacute" , "\220" },
114: { "Iacute" , "\111" },
115: { "Oacute" , "\117" },
116: { "Uacute" , "\125" },
117: #else
118: { "aacute" , "\341" }, /* Works for openwindows -- Peter Flynn */
119: { "eacute" , "\351" },
120: { "iacute" , "\355" },
121: { "oacute" , "\363" },
122: { "uacute" , "\372" },
123: { "Aacute" , "\301" },
124: { "Eacute" , "\310" },
125: { "Iacute" , "\315" },
126: { "Oacute" , "\323" },
127: { "Uacute" , "\332" },
128: #endif
129: { 0, 0 } /* Terminate list */
130: };
1.2 timbl 131: #endif
1.1 timbl 132:
133:
1.2 timbl 134: /* Entity values -- for ISO Latin 1 local representation
135: **
136: ** This MUST match exactly the table referred to in the DTD!
137: */
138: static char * ISO_Latin1[] = {
139: "\306", /* capital AE diphthong (ligature) */
140: "\301", /* capital A, acute accent */
141: "\302", /* capital A, circumflex accent */
142: "\300", /* capital A, grave accent */
143: "\305", /* capital A, ring */
144: "\303", /* capital A, tilde */
145: "\304", /* capital A, dieresis or umlaut mark */
146: "\307", /* capital C, cedilla */
147: "\320", /* capital Eth, Icelandic */
148: "\311", /* capital E, acute accent */
149: "\312", /* capital E, circumflex accent */
150: "\310", /* capital E, grave accent */
151: "\313", /* capital E, dieresis or umlaut mark */
152: "\315", /* capital I, acute accent */
153: "\316", /* capital I, circumflex accent */
154: "\314", /* capital I, grave accent */
155: "\317", /* capital I, dieresis or umlaut mark */
156: "\321", /* capital N, tilde */
157: "\323", /* capital O, acute accent */
158: "\324", /* capital O, circumflex accent */
159: "\322", /* capital O, grave accent */
160: "\330", /* capital O, slash */
161: "\325", /* capital O, tilde */
162: "\326", /* capital O, dieresis or umlaut mark */
163: "\336", /* capital THORN, Icelandic */
164: "\332", /* capital U, acute accent */
165: "\333", /* capital U, circumflex accent */
166: "\331", /* capital U, grave accent */
167: "\334", /* capital U, dieresis or umlaut mark */
168: "\335", /* capital Y, acute accent */
169: "\341", /* small a, acute accent */
170: "\342", /* small a, circumflex accent */
171: "\346", /* small ae diphthong (ligature) */
172: "\340", /* small a, grave accent */
173: "\046", /* ampersand */
174: "\345", /* small a, ring */
175: "\343", /* small a, tilde */
176: "\344", /* small a, dieresis or umlaut mark */
177: "\347", /* small c, cedilla */
178: "\351", /* small e, acute accent */
179: "\352", /* small e, circumflex accent */
180: "\350", /* small e, grave accent */
181: "\360", /* small eth, Icelandic */
182: "\353", /* small e, dieresis or umlaut mark */
183: "\076", /* greater than */
184: "\355", /* small i, acute accent */
185: "\356", /* small i, circumflex accent */
186: "\354", /* small i, grave accent */
187: "\357", /* small i, dieresis or umlaut mark */
188: "\074", /* less than */
189: "\361", /* small n, tilde */
190: "\363", /* small o, acute accent */
191: "\364", /* small o, circumflex accent */
192: "\362", /* small o, grave accent */
193: "\370", /* small o, slash */
194: "\365", /* small o, tilde */
195: "\366", /* small o, dieresis or umlaut mark */
1.36 frystyk 196: "\042", /* double quote sign - June 94 */
1.2 timbl 197: "\337", /* small sharp s, German (sz ligature) */
198: "\376", /* small thorn, Icelandic */
199: "\372", /* small u, acute accent */
200: "\373", /* small u, circumflex accent */
201: "\371", /* small u, grave accent */
202: "\374", /* small u, dieresis or umlaut mark */
203: "\375", /* small y, acute accent */
204: "\377", /* small y, dieresis or umlaut mark */
1.1 timbl 205: };
206:
1.2 timbl 207:
208: /* Entity values -- for NeXT local representation
209: **
210: ** This MUST match exactly the table referred to in the DTD!
211: **
212: */
213: static char * NeXTCharacters[] = {
214: "\341", /* capital AE diphthong (ligature) */
215: "\202", /* capital A, acute accent */
216: "\203", /* capital A, circumflex accent */
217: "\201", /* capital A, grave accent */
218: "\206", /* capital A, ring */
219: "\204", /* capital A, tilde */
220: "\205", /* capital A, dieresis or umlaut mark */
221: "\207", /* capital C, cedilla */
222: "\220", /* capital Eth, Icelandic */
223: "\211", /* capital E, acute accent */
224: "\212", /* capital E, circumflex accent */
225: "\210", /* capital E, grave accent */
226: "\213", /* capital E, dieresis or umlaut mark */
227: "\215", /* capital I, acute accent */
228: "\216", /* capital I, circumflex accent these are */
229: "\214", /* capital I, grave accent ISO -100 hex */
230: "\217", /* capital I, dieresis or umlaut mark */
231: "\221", /* capital N, tilde */
232: "\223", /* capital O, acute accent */
233: "\224", /* capital O, circumflex accent */
234: "\222", /* capital O, grave accent */
235: "\351", /* capital O, slash 'cept this */
236: "\225", /* capital O, tilde */
237: "\226", /* capital O, dieresis or umlaut mark */
238: "\234", /* capital THORN, Icelandic */
239: "\230", /* capital U, acute accent */
240: "\231", /* capital U, circumflex accent */
241: "\227", /* capital U, grave accent */
242: "\232", /* capital U, dieresis or umlaut mark */
243: "\233", /* capital Y, acute accent */
244: "\326", /* small a, acute accent */
245: "\327", /* small a, circumflex accent */
246: "\361", /* small ae diphthong (ligature) */
247: "\325", /* small a, grave accent */
248: "\046", /* ampersand */
249: "\332", /* small a, ring */
250: "\330", /* small a, tilde */
251: "\331", /* small a, dieresis or umlaut mark */
252: "\333", /* small c, cedilla */
253: "\335", /* small e, acute accent */
254: "\336", /* small e, circumflex accent */
255: "\334", /* small e, grave accent */
256: "\346", /* small eth, Icelandic */
257: "\337", /* small e, dieresis or umlaut mark */
258: "\076", /* greater than */
259: "\342", /* small i, acute accent */
260: "\344", /* small i, circumflex accent */
261: "\340", /* small i, grave accent */
262: "\345", /* small i, dieresis or umlaut mark */
263: "\074", /* less than */
264: "\347", /* small n, tilde */
265: "\355", /* small o, acute accent */
266: "\356", /* small o, circumflex accent */
267: "\354", /* small o, grave accent */
268: "\371", /* small o, slash */
269: "\357", /* small o, tilde */
270: "\360", /* small o, dieresis or umlaut mark */
1.36 frystyk 271: "\042", /* double quote sign - June 94 */
1.2 timbl 272: "\373", /* small sharp s, German (sz ligature) */
273: "\374", /* small thorn, Icelandic */
274: "\363", /* small u, acute accent */
275: "\364", /* small u, circumflex accent */
276: "\362", /* small u, grave accent */
277: "\366", /* small u, dieresis or umlaut mark */
278: "\367", /* small y, acute accent */
279: "\375", /* small y, dieresis or umlaut mark */
1.1 timbl 280: };
281:
1.2 timbl 282: /* Entity values -- for IBM/PC Code Page 850 (International)
283: **
284: ** This MUST match exactly the table referred to in the DTD!
285: **
286: */
287: /* @@@@@@@@@@@@@@@@@ TBD */
288:
289:
290:
291: /* Set character set
292: ** ----------------
293: */
294:
295: PRIVATE char** p_entity_values = ISO_Latin1; /* Pointer to translation */
1.1 timbl 296:
1.53 frystyk 297: PUBLIC void HTMLUseCharacterSet (HTMLCharacterSet i)
1.2 timbl 298: {
299: p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
300: : ISO_Latin1;
301: }
1.1 timbl 302:
303:
304: /* Flattening the style structure
305: ** ------------------------------
306: **
307: On the NeXT, and on any read-only browser, it is simpler for the text to have
308: a sequence of styles, rather than a nested tree of styles. In this
309: case we have to flatten the structure as it arrives from SGML tags into
310: a sequence of styles.
311: */
312:
313: /* If style really needs to be set, call this
314: */
1.53 frystyk 315: PRIVATE void actually_set_style (HTStructured * me)
1.1 timbl 316: {
1.4 timbl 317: if (!me->text) { /* First time through */
1.54 frystyk 318: me->text = HText_new2(me->request, me->node_anchor, me->target);
1.4 timbl 319: HText_beginAppend(me->text);
320: HText_setStyle(me->text, me->new_style);
321: me->in_word = NO;
1.1 timbl 322: } else {
1.4 timbl 323: HText_setStyle(me->text, me->new_style);
1.1 timbl 324: }
1.4 timbl 325: me->old_style = me->new_style;
326: me->style_change = NO;
1.1 timbl 327: }
328:
329: /* If you THINK you need to change style, call this
330: */
331:
1.53 frystyk 332: PRIVATE void change_paragraph_style (HTStructured * me, HTStyle *style)
1.1 timbl 333: {
1.4 timbl 334: if (me->new_style!=style) {
335: me->style_change = YES;
336: me->new_style = style;
1.1 timbl 337: }
1.11 timbl 338: me->in_word = NO;
1.1 timbl 339: }
340:
1.2 timbl 341: /*_________________________________________________________________________
342: **
343: ** A C T I O N R O U T I N E S
344: */
345:
346: /* Character handling
347: ** ------------------
1.1 timbl 348: */
1.53 frystyk 349: PRIVATE int HTML_put_character (HTStructured * me, char c)
1.1 timbl 350: {
1.2 timbl 351:
1.4 timbl 352: switch (me->sp[0].tag_number) {
1.2 timbl 353: case HTML_COMMENT:
354: break; /* Do Nothing */
355:
356: case HTML_TITLE:
1.56 frystyk 357: HTChunk_putb(me->title, &c, 1);
1.2 timbl 358: break;
359:
360:
361: case HTML_LISTING: /* Litteral text */
362: case HTML_XMP:
363: case HTML_PLAINTEXT:
364: case HTML_PRE:
365: /* We guarrantee that the style is up-to-date in begin_litteral
366: */
1.4 timbl 367: HText_appendCharacter(me->text, c);
1.2 timbl 368: break;
369:
370: default: /* Free format text */
1.4 timbl 371: if (me->style_change) {
1.42 frystyk 372: if ((c=='\n') || (c==' ')) return HT_OK; /* Ignore it */
1.2 timbl 373: UPDATE_STYLE;
374: }
375: if (c=='\n') {
1.4 timbl 376: if (me->in_word) {
377: HText_appendCharacter(me->text, ' ');
378: me->in_word = NO;
1.2 timbl 379: }
380: } else {
1.4 timbl 381: HText_appendCharacter(me->text, c);
382: me->in_word = YES;
1.2 timbl 383: }
384: } /* end switch */
1.42 frystyk 385: return HT_OK;
1.1 timbl 386: }
387:
1.2 timbl 388:
389:
390: /* String handling
391: ** ---------------
392: **
393: ** This is written separately from put_character becuase the loop can
1.11 timbl 394: ** in some cases be promoted to a higher function call level for speed.
1.2 timbl 395: */
1.60 frystyk 396: PRIVATE int HTML_put_string (HTStructured * me, const char* s)
1.1 timbl 397: {
1.2 timbl 398:
1.4 timbl 399: switch (me->sp[0].tag_number) {
1.2 timbl 400: case HTML_COMMENT:
401: break; /* Do Nothing */
402:
403: case HTML_TITLE:
1.56 frystyk 404: HTChunk_putb(me->title, s, strlen(s));
1.2 timbl 405: break;
406:
407:
408: case HTML_LISTING: /* Litteral text */
409: case HTML_XMP:
410: case HTML_PLAINTEXT:
411: case HTML_PRE:
412:
413: /* We guarrantee that the style is up-to-date in begin_litteral
414: */
1.4 timbl 415: HText_appendText(me->text, s);
1.2 timbl 416: break;
417:
418: default: /* Free format text */
419: {
1.60 frystyk 420: const char *p = s;
1.4 timbl 421: if (me->style_change) {
1.2 timbl 422: for (; *p && ((*p=='\n') || (*p==' ')); p++) ; /* Ignore leaders */
1.42 frystyk 423: if (!*p) return HT_OK;
1.2 timbl 424: UPDATE_STYLE;
425: }
426: for(; *p; p++) {
1.4 timbl 427: if (me->style_change) {
1.2 timbl 428: if ((*p=='\n') || (*p==' ')) continue; /* Ignore it */
429: UPDATE_STYLE;
430: }
431: if (*p=='\n') {
1.4 timbl 432: if (me->in_word) {
433: HText_appendCharacter(me->text, ' ');
434: me->in_word = NO;
1.2 timbl 435: }
436: } else {
1.4 timbl 437: HText_appendCharacter(me->text, *p);
438: me->in_word = YES;
1.2 timbl 439: }
440: } /* for */
441: }
442: } /* end switch */
1.42 frystyk 443: return HT_OK;
1.1 timbl 444: }
445:
446:
1.2 timbl 447: /* Buffer write
1.3 timbl 448: ** ------------
1.1 timbl 449: */
1.60 frystyk 450: PRIVATE int HTML_write (HTStructured * me, const char* s, int l)
1.1 timbl 451: {
1.38 frystyk 452: while (l-- > 0)
453: HTML_put_character(me, *s++);
1.42 frystyk 454: return HT_OK;
1.1 timbl 455: }
1.2 timbl 456:
457:
458: /* Start Element
459: ** -------------
460: */
1.53 frystyk 461: PRIVATE void HTML_start_element (
462: HTStructured * me,
463: int element_number,
1.60 frystyk 464: const BOOL* present,
465: const char ** value)
1.2 timbl 466: {
467: switch (element_number) {
468: case HTML_A:
469: {
1.8 timbl 470: HTChildAnchor * source;
1.9 timbl 471: char * href = NULL;
1.42 frystyk 472: if (present[HTML_A_HREF])
1.9 timbl 473: StrAllocCopy(href, value[HTML_A_HREF]);
1.8 timbl 474: source = HTAnchor_findChildAndLink(
1.4 timbl 475: me->node_anchor, /* parent */
1.2 timbl 476: present[HTML_A_NAME] ? value[HTML_A_NAME] : 0, /* Tag */
1.9 timbl 477: present[HTML_A_HREF] ? href : 0, /* Addresss */
1.16 timbl 478: present[HTML_A_REL] && value[HTML_A_REL] ?
1.54 frystyk 479: (HTLinkType) HTAtom_for(value[HTML_A_REL])
480: : 0);
1.2 timbl 481:
482: if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
483: HTParentAnchor * dest =
484: HTAnchor_parent(
485: HTAnchor_followMainLink((HTAnchor*)source)
486: );
487: if (!HTAnchor_title(dest))
488: HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
489: }
490: UPDATE_STYLE;
1.4 timbl 491: HText_beginAnchor(me->text, source);
1.58 frystyk 492: HT_FREE(href); /* Leak fix Henrik 17/02-94 */
1.2 timbl 493: }
494: break;
495:
496: case HTML_TITLE:
1.56 frystyk 497: HTChunk_clear(me->title);
1.2 timbl 498: break;
499:
500: case HTML_NEXTID:
501: /* if (present[NEXTID_N] && value[NEXTID_N])
1.4 timbl 502: HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2 timbl 503: break;
504:
505: case HTML_ISINDEX:
1.4 timbl 506: HTAnchor_setIndex(me->node_anchor);
1.2 timbl 507: break;
508:
1.15 timbl 509: case HTML_BR:
510: UPDATE_STYLE;
511: HText_appendCharacter(me->text, '\n');
512: me->in_word = NO;
513: break;
514:
515: case HTML_HR:
516: UPDATE_STYLE;
517: HText_appendCharacter(me->text, '\n');
1.16 timbl 518: HText_appendText(me->text, "___________________________________");
1.15 timbl 519: HText_appendCharacter(me->text, '\n');
520: me->in_word = NO;
521: break;
522:
1.2 timbl 523: case HTML_P:
524: UPDATE_STYLE;
1.4 timbl 525: HText_appendParagraph(me->text);
526: me->in_word = NO;
1.2 timbl 527: break;
528:
529: case HTML_DL:
1.11 timbl 530: change_paragraph_style(me, present && present[DL_COMPACT]
1.16 timbl 531: ? styles[HTML_DL]
1.2 timbl 532: : styles[HTML_DL]);
533: break;
534:
535: case HTML_DT:
1.4 timbl 536: if (!me->style_change) {
537: HText_appendParagraph(me->text);
538: me->in_word = NO;
1.2 timbl 539: }
540: break;
541:
542: case HTML_DD:
543: UPDATE_STYLE;
1.4 timbl 544: HTML_put_character(me, '\t'); /* Just tab out one stop */
545: me->in_word = NO;
546: break;
1.2 timbl 547:
548: case HTML_UL:
549: case HTML_OL:
550: case HTML_MENU:
551: case HTML_DIR:
1.11 timbl 552: change_paragraph_style(me, styles[element_number]);
1.2 timbl 553: break;
554:
555: case HTML_LI:
556: UPDATE_STYLE;
1.7 timbl 557: if (me->sp[0].tag_number != HTML_DIR)
1.4 timbl 558: HText_appendParagraph(me->text);
1.2 timbl 559: else
1.4 timbl 560: HText_appendCharacter(me->text, '\t'); /* Tab @@ nl for UL? */
561: me->in_word = NO;
1.2 timbl 562: break;
563:
564: case HTML_LISTING: /* Litteral text */
565: case HTML_XMP:
566: case HTML_PLAINTEXT:
567: case HTML_PRE:
1.11 timbl 568: change_paragraph_style(me, styles[element_number]);
1.2 timbl 569: UPDATE_STYLE;
1.4 timbl 570: if (me->comment_end)
571: HText_appendText(me->text, me->comment_end);
1.2 timbl 572: break;
1.11 timbl 573:
1.23 frystyk 574: case HTML_IMG: /* Images */
575: {
576: HTChildAnchor *source;
577: char *src = NULL;
1.49 frystyk 578: if (present[HTML_IMG_SRC])
1.23 frystyk 579: StrAllocCopy(src, value[HTML_IMG_SRC]);
580: source = HTAnchor_findChildAndLink(
581: me->node_anchor, /* parent */
582: 0, /* Tag */
583: src ? src : 0, /* Addresss */
584: 0);
585: UPDATE_STYLE;
586: HText_appendImage(me->text, source,
1.24 frystyk 587: present[HTML_IMG_ALT] ? value[HTML_IMG_ALT] : NULL,
588: present[HTML_IMG_ALIGN] ? value[HTML_IMG_ALIGN] : NULL,
589: present[HTML_IMG_ISMAP] ? YES : NO);
1.58 frystyk 590: HT_FREE(src);
1.24 frystyk 591: }
592: break;
593:
594: case HTML_HTML: /* Ignore these altogether */
595: case HTML_HEAD:
596: case HTML_BODY:
597:
1.10 timbl 598: case HTML_TT: /* Physical character highlighting */
599: case HTML_B: /* Currently ignored */
600: case HTML_I:
601: case HTML_U:
602:
603: case HTML_EM: /* Logical character highlighting */
604: case HTML_STRONG: /* Currently ignored */
605: case HTML_CODE:
606: case HTML_SAMP:
607: case HTML_KBD:
608: case HTML_VAR:
609: case HTML_DFN:
610: case HTML_CITE:
611: break;
612:
1.11 timbl 613: case HTML_H1: /* paragraph styles */
614: case HTML_H2:
615: case HTML_H3:
616: case HTML_H4:
617: case HTML_H5:
618: case HTML_H6:
619: case HTML_H7:
620: case HTML_ADDRESS:
621: case HTML_BLOCKQUOTE:
622: change_paragraph_style(me, styles[element_number]); /* May be postponed */
1.2 timbl 623: break;
624:
625: } /* end switch */
626:
1.16 timbl 627: if (me->dtd->tags[element_number].contents!= SGML_EMPTY) {
1.13 timbl 628: if (me->sp == me->stack) {
1.44 frystyk 629: if (SGML_TRACE)
1.59 eric 630: HTTrace("HTML........ Maximum nesting of %d exceded!\n",
1.44 frystyk 631: MAX_NESTING);
632: me->overflow++;
1.12 timbl 633: return;
634: }
1.4 timbl 635: --(me->sp);
636: me->sp[0].style = me->new_style; /* Stack new style */
637: me->sp[0].tag_number = element_number;
1.10 timbl 638: }
1.1 timbl 639: }
1.10 timbl 640:
1.2 timbl 641:
1.1 timbl 642: /* End Element
1.2 timbl 643: ** -----------
1.1 timbl 644: **
1.2 timbl 645: */
646: /* When we end an element, the style must be returned to that
1.1 timbl 647: ** in effect before that element. Note that anchors (etc?)
648: ** don't have an associated style, so that we must scan down the
649: ** stack for an element with a defined style. (In fact, the styles
650: ** should be linked to the whole stack not just the top one.)
651: ** TBL 921119
1.6 timbl 652: **
653: ** We don't turn on "CAREFUL" check because the parser produces
654: ** (internal code errors apart) good nesting. The parser checks
655: ** incoming code errors, not this module.
1.1 timbl 656: */
1.53 frystyk 657: PRIVATE void HTML_end_element (HTStructured * me, int element_number)
1.1 timbl 658: {
1.2 timbl 659: #ifdef CAREFUL /* parser assumed to produce good nesting */
1.4 timbl 660: if (element_number != me->sp[0].tag_number) {
1.59 eric 661: HTTrace("HTMLText: end of element %s when expecting end of %s\n",
1.16 timbl 662: me->dtd->tags[element_number].name,
663: me->dtd->tags[me->sp->tag_number].name);
1.6 timbl 664: /* panic */
1.1 timbl 665: }
1.2 timbl 666: #endif
1.44 frystyk 667:
668: /* HFN, If overflow of nestings, we need to get back to reality */
669: if (me->overflow > 0) {
670: me->overflow--;
671: return;
672: }
673:
1.4 timbl 674: me->sp++; /* Pop state off stack */
1.44 frystyk 675:
1.2 timbl 676: switch(element_number) {
677:
678: case HTML_A:
679: UPDATE_STYLE;
1.4 timbl 680: HText_endAnchor(me->text);
1.2 timbl 681: break;
682:
683: case HTML_TITLE:
1.56 frystyk 684: HTAnchor_setTitle(me->node_anchor, HTChunk_data(me->title));
1.2 timbl 685: break;
686:
687: case HTML_LISTING: /* Litteral text */
688: case HTML_XMP:
689: case HTML_PLAINTEXT:
690: case HTML_PRE:
1.4 timbl 691: if (me->comment_start)
692: HText_appendText(me->text, me->comment_start);
1.2 timbl 693: /* Fall through */
694:
695: default:
1.44 frystyk 696:
697: /* Often won't really change */
698: change_paragraph_style(me, me->sp->style);
1.2 timbl 699: break;
700:
701: } /* switch */
1.1 timbl 702: }
703:
1.2 timbl 704:
705: /* Expanding entities
706: ** ------------------
707: */
708: /* (In fact, they all shrink!)
1.1 timbl 709: */
1.2 timbl 710:
1.53 frystyk 711: PRIVATE void HTML_put_entity (HTStructured * me, int entity_number)
1.1 timbl 712: {
1.4 timbl 713: HTML_put_string(me, ISO_Latin1[entity_number]); /* @@ Other representations */
1.1 timbl 714: }
1.2 timbl 715:
1.42 frystyk 716: /* Flush an HTML object
717: ** --------------------
718: */
1.53 frystyk 719: PUBLIC int HTML_flush (HTStructured * me)
1.42 frystyk 720: {
721: UPDATE_STYLE; /* Creates empty document here! */
1.57 frystyk 722: if (me->comment_end) HTML_put_string(me,me->comment_end);
723: return me->target ? (*me->targetClass.flush)(me->target) : HT_OK;
1.42 frystyk 724: }
1.2 timbl 725:
726: /* Free an HTML object
727: ** -------------------
728: **
1.4 timbl 729: ** If the document is empty, the text object will not yet exist.
730: So we could in fact abandon creating the document and return
731: an error code. In fact an empty document is an important type
732: of document, so we don't.
733: **
734: ** If non-interactive, everything is freed off. No: crashes -listrefs
1.2 timbl 735: ** Otherwise, the interactive object is left.
736: */
1.53 frystyk 737: PUBLIC int HTML_free (HTStructured * me)
1.1 timbl 738: {
1.4 timbl 739: UPDATE_STYLE; /* Creates empty document here! */
740: if (me->comment_end)
741: HTML_put_string(me,me->comment_end);
742: HText_endAppend(me->text);
743:
744: if (me->target) {
1.35 duns 745: (*me->targetClass._free)(me->target);
1.2 timbl 746: }
1.56 frystyk 747: HTChunk_delete(me->title);
1.58 frystyk 748: HT_FREE(me);
1.42 frystyk 749: return HT_OK;
1.1 timbl 750: }
751:
752:
1.53 frystyk 753: PRIVATE int HTML_abort (HTStructured * me, HTList * e)
1.1 timbl 754:
1.14 timbl 755: {
756: if (me->target) {
757: (*me->targetClass.abort)(me->target, e);
758: }
1.56 frystyk 759: HTChunk_delete(me->title);
1.58 frystyk 760: HT_FREE(me);
1.42 frystyk 761: return HT_ERROR;
1.1 timbl 762: }
763:
1.2 timbl 764:
765: /* Get Styles from style sheet
766: ** ---------------------------
767: */
1.53 frystyk 768: PRIVATE void get_styles (void)
1.1 timbl 769: {
1.2 timbl 770: got_styles = YES;
771:
772: default_style = HTStyleNamed(styleSheet, "Normal");
1.1 timbl 773:
1.2 timbl 774: styles[HTML_H1] = HTStyleNamed(styleSheet, "Heading1");
775: styles[HTML_H2] = HTStyleNamed(styleSheet, "Heading2");
776: styles[HTML_H3] = HTStyleNamed(styleSheet, "Heading3");
777: styles[HTML_H4] = HTStyleNamed(styleSheet, "Heading4");
778: styles[HTML_H5] = HTStyleNamed(styleSheet, "Heading5");
779: styles[HTML_H6] = HTStyleNamed(styleSheet, "Heading6");
780: styles[HTML_H7] = HTStyleNamed(styleSheet, "Heading7");
781:
782: styles[HTML_DL] = HTStyleNamed(styleSheet, "Glossary");
783: styles[HTML_UL] =
784: styles[HTML_OL] = HTStyleNamed(styleSheet, "List");
785: styles[HTML_MENU] = HTStyleNamed(styleSheet, "Menu");
786: styles[HTML_DIR] = HTStyleNamed(styleSheet, "Dir");
1.16 timbl 787: /* styles[HTML_DLC] = HTStyleNamed(styleSheet, "GlossaryCompact"); */
1.2 timbl 788: styles[HTML_ADDRESS]= HTStyleNamed(styleSheet, "Address");
789: styles[HTML_BLOCKQUOTE]= HTStyleNamed(styleSheet, "BlockQuote");
790: styles[HTML_PLAINTEXT] =
791: styles[HTML_XMP] = HTStyleNamed(styleSheet, "Example");
792: styles[HTML_PRE] = HTStyleNamed(styleSheet, "Preformatted");
793: styles[HTML_LISTING] = HTStyleNamed(styleSheet, "Listing");
794: }
795: /* P U B L I C
796: */
797:
798: /* Structured Object Class
799: ** -----------------------
800: */
1.60 frystyk 801: PRIVATE const HTStructuredClass HTMLPresentation = /* As opposed to print etc */
1.2 timbl 802: {
803: "text/html",
1.42 frystyk 804: HTML_flush,
1.2 timbl 805: HTML_free,
1.14 timbl 806: HTML_abort,
1.2 timbl 807: HTML_put_character, HTML_put_string, HTML_write,
808: HTML_start_element, HTML_end_element,
809: HTML_put_entity
810: };
1.1 timbl 811:
1.4 timbl 812:
1.2 timbl 813: /* New Structured Text object
814: ** --------------------------
815: **
1.16 timbl 816: ** The structured stream can generate either presentation,
1.4 timbl 817: ** or plain text, or HTML.
1.1 timbl 818: */
1.53 frystyk 819: PRIVATE HTStructured* HTML_new (HTRequest * request,
820: void * param,
821: HTFormat input_format,
822: HTFormat output_format,
823: HTStream * output_stream)
1.1 timbl 824: {
825:
1.4 timbl 826: HTStructured * me;
827:
1.47 frystyk 828: #if 0
1.16 timbl 829: if (output_format != WWW_PLAINTEXT
830: && output_format != WWW_PRESENT
831: && output_format != HTAtom_for("text/x-c")) {
1.37 frystyk 832: HTStream * intermediate = HTStreamStack(WWW_HTML, output_format,
833: output_stream, request, NO);
1.6 timbl 834: if (intermediate) return HTMLGenerator(intermediate);
1.44 frystyk 835: if (SGML_TRACE)
1.59 eric 836: HTTrace("HTML........ Can't parse HTML to %s\n",
1.44 frystyk 837: HTAtom_name(output_format));
1.4 timbl 838: exit (-99);
839: }
1.47 frystyk 840: #endif
1.4 timbl 841:
1.58 frystyk 842: if ((me = (HTStructured *) HT_CALLOC(1, sizeof(*me))) == NULL)
843: HT_OUTOFMEM("HTML_new");
1.1 timbl 844:
845: if (!got_styles) get_styles();
846:
1.4 timbl 847: me->isa = &HTMLPresentation;
1.47 frystyk 848: me->dtd = &HTMLP_dtd;
1.54 frystyk 849: me->request = request;
1.48 frystyk 850: me->node_anchor = HTRequest_anchor(request);
1.56 frystyk 851: me->title = HTChunk_new(128);
1.4 timbl 852: me->text = 0;
853: me->style_change = YES; /* Force check leading to text creation */
854: me->new_style = default_style;
855: me->old_style = 0;
856: me->sp = me->stack + MAX_NESTING - 1;
857: me->sp->tag_number = -1; /* INVALID */
858: me->sp->style = default_style; /* INVALID */
1.1 timbl 859:
1.4 timbl 860: me->comment_start = NULL;
861: me->comment_end = NULL;
1.16 timbl 862: me->target = output_stream;
863: if (output_stream) me->targetClass = *output_stream->isa; /* Copy pointers */
1.1 timbl 864:
1.4 timbl 865: return (HTStructured*) me;
1.1 timbl 866: }
867:
868:
1.2 timbl 869: /* HTConverter for HTML to plain text
870: ** ----------------------------------
1.1 timbl 871: **
1.2 timbl 872: ** This will convert from HTML to presentation or plain text.
1.1 timbl 873: */
1.53 frystyk 874: PUBLIC HTStream* HTMLToPlain (
875: HTRequest * request,
876: void * param,
877: HTFormat input_format,
878: HTFormat output_format,
879: HTStream * output_stream)
1.1 timbl 880: {
1.47 frystyk 881: return SGML_new(&HTMLP_dtd, HTML_new(
1.16 timbl 882: request, NULL, input_format, output_format, output_stream));
1.1 timbl 883: }
884:
885:
1.2 timbl 886: /* HTConverter for HTML to C code
887: ** ------------------------------
888: **
1.36 frystyk 889: ** C code is like plain text but all non-preformatted code
1.2 timbl 890: ** is commented out.
891: ** This will convert from HTML to presentation or plain text.
892: */
1.53 frystyk 893: PUBLIC HTStream* HTMLToC (
894: HTRequest * request,
895: void * param,
896: HTFormat input_format,
897: HTFormat output_format,
898: HTStream * output_stream)
1.1 timbl 899: {
1.4 timbl 900:
901: HTStructured * html;
902:
1.36 frystyk 903: (*output_stream->isa->put_string)(output_stream, "/* "); /* Before title */
1.16 timbl 904: html = HTML_new(request, NULL, input_format, output_format, output_stream);
1.45 frystyk 905: html->comment_start = "\n/* ";
1.47 frystyk 906: html->dtd = &HTMLP_dtd;
1.2 timbl 907: html->comment_end = " */\n"; /* Must start in col 1 for cpp */
1.47 frystyk 908: return SGML_new(&HTMLP_dtd, html);
1.1 timbl 909: }
910:
911:
1.2 timbl 912: /* Presenter for HTML
913: ** ------------------
914: **
915: ** This will convert from HTML to presentation or plain text.
916: **
917: ** Override this if you have a windows version
1.1 timbl 918: */
1.2 timbl 919: #ifndef GUI
1.53 frystyk 920: PUBLIC HTStream* HTMLPresent (
921: HTRequest * request,
922: void * param,
923: HTFormat input_format,
924: HTFormat output_format,
925: HTStream * output_stream)
1.1 timbl 926: {
1.47 frystyk 927: return SGML_new(&HTMLP_dtd, HTML_new(
1.16 timbl 928: request, NULL, input_format, output_format, output_stream));
1.1 timbl 929: }
1.2 timbl 930: #endif
1.29 frystyk 931:
Webmaster