Annotation of libwww/Library/src/HTML.c, revision 1.57
1.39 frystyk 1: /* HTML.c
2: ** STRUCTURED STREAM TO RICH HYPERTEXT CONVERTER
3: **
1.43 frystyk 4: ** (c) COPYRIGHT MIT 1995.
1.39 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
1.2 timbl 7: ** This generates of a hypertext object. It converts from the
8: ** structured stream interface fro HTMl events into the style-
1.47 frystyk 9: ** oriented iunterface of the HText interface. This module is
1.2 timbl 10: ** only used in clients and shouldnot be linked into servers.
1.1 timbl 11: **
1.6 timbl 12: ** Override this module if making a new GUI browser.
1.1 timbl 13: **
1.35 duns 14: ** HISTORY:
15: ** 8 Jul 94 FM Insulate free() from _free structure element.
16: **
1.1 timbl 17: */
1.16 timbl 18:
1.41 frystyk 19: /* Library include files */
20: #include "tcp.h"
21: #include "HTUtils.h"
22: #include "HTString.h"
1.1 timbl 23: #include "HTAtom.h"
24: #include "HTChunk.h"
25: #include "HText.h"
26: #include "HTStyle.h"
1.3 timbl 27: #include "HTAlert.h"
1.4 timbl 28: #include "HTMLGen.h"
1.8 timbl 29: #include "HTParse.h"
1.41 frystyk 30: #include "HTML.h"
1.1 timbl 31:
32: extern HTStyleSheet * styleSheet; /* Application-wide */
33:
34: /* Module-wide style cache
35: */
36: PRIVATE int got_styles = 0;
1.16 timbl 37: PRIVATE HTStyle *styles[HTMLP_ELEMENTS];
1.2 timbl 38: PRIVATE HTStyle *default_style;
1.1 timbl 39:
40:
41: /* HTML Object
42: ** -----------
43: */
1.2 timbl 44: #define MAX_NESTING 20 /* Should be checked by parser */
45:
46: typedef struct _stack_element {
47: HTStyle * style;
48: int tag_number;
49: } stack_element;
50:
51: struct _HTStructured {
52: CONST HTStructuredClass * isa;
1.54 frystyk 53: HTRequest * request;
1.2 timbl 54: HTParentAnchor * node_anchor;
55: HText * text;
56:
57: HTStream* target; /* Output stream */
58: HTStreamClass targetClass; /* Output routines */
59:
1.56 frystyk 60: HTChunk * title; /* Grow by 128 */
1.2 timbl 61:
62: char * comment_start; /* for literate programming */
63: char * comment_end;
1.16 timbl 64:
65: CONST SGML_dtd* dtd;
66:
1.2 timbl 67: HTTag * current_tag;
68: BOOL style_change;
69: HTStyle * new_style;
70: HTStyle * old_style;
71: BOOL in_word; /* Have just had a non-white char */
1.44 frystyk 72:
73: stack_element stack[MAX_NESTING];
74: stack_element *sp; /* Style stack pointer */
75: int overflow; /* Keep track of overflow nesting */
1.1 timbl 76: };
77:
1.2 timbl 78: struct _HTStream {
79: CONST HTStreamClass * isa;
80: /* .... */
81: };
1.1 timbl 82:
83: /* Forward declarations of routines
84: */
1.52 frystyk 85: PRIVATE void get_styles (void);
1.1 timbl 86:
87:
1.52 frystyk 88: PRIVATE void actually_set_style (HTStructured * me);
89: PRIVATE void change_paragraph_style (HTStructured * me, HTStyle * style);
1.1 timbl 90:
91: /* Style buffering avoids dummy paragraph begin/ends.
92: */
1.4 timbl 93: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1 timbl 94:
95:
1.2 timbl 96: #ifdef OLD_CODE
1.1 timbl 97: /* The following accented characters are from peter Flynn, curia project */
98:
99: /* these ifdefs don't solve the problem of a simple terminal emulator
100: ** with a different character set to the client machine. But nothing does,
101: ** except looking at the TERM setting */
102:
1.2 timbl 103:
1.1 timbl 104: { "ocus" , "&" }, /* for CURIA */
105: #ifdef IBMPC
106: { "aacute" , "\240" }, /* For PC display */
107: { "eacute" , "\202" },
108: { "iacute" , "\241" },
109: { "oacute" , "\242" },
110: { "uacute" , "\243" },
111: { "Aacute" , "\101" },
112: { "Eacute" , "\220" },
113: { "Iacute" , "\111" },
114: { "Oacute" , "\117" },
115: { "Uacute" , "\125" },
116: #else
117: { "aacute" , "\341" }, /* Works for openwindows -- Peter Flynn */
118: { "eacute" , "\351" },
119: { "iacute" , "\355" },
120: { "oacute" , "\363" },
121: { "uacute" , "\372" },
122: { "Aacute" , "\301" },
123: { "Eacute" , "\310" },
124: { "Iacute" , "\315" },
125: { "Oacute" , "\323" },
126: { "Uacute" , "\332" },
127: #endif
128: { 0, 0 } /* Terminate list */
129: };
1.2 timbl 130: #endif
1.1 timbl 131:
132:
1.2 timbl 133: /* Entity values -- for ISO Latin 1 local representation
134: **
135: ** This MUST match exactly the table referred to in the DTD!
136: */
137: static char * ISO_Latin1[] = {
138: "\306", /* capital AE diphthong (ligature) */
139: "\301", /* capital A, acute accent */
140: "\302", /* capital A, circumflex accent */
141: "\300", /* capital A, grave accent */
142: "\305", /* capital A, ring */
143: "\303", /* capital A, tilde */
144: "\304", /* capital A, dieresis or umlaut mark */
145: "\307", /* capital C, cedilla */
146: "\320", /* capital Eth, Icelandic */
147: "\311", /* capital E, acute accent */
148: "\312", /* capital E, circumflex accent */
149: "\310", /* capital E, grave accent */
150: "\313", /* capital E, dieresis or umlaut mark */
151: "\315", /* capital I, acute accent */
152: "\316", /* capital I, circumflex accent */
153: "\314", /* capital I, grave accent */
154: "\317", /* capital I, dieresis or umlaut mark */
155: "\321", /* capital N, tilde */
156: "\323", /* capital O, acute accent */
157: "\324", /* capital O, circumflex accent */
158: "\322", /* capital O, grave accent */
159: "\330", /* capital O, slash */
160: "\325", /* capital O, tilde */
161: "\326", /* capital O, dieresis or umlaut mark */
162: "\336", /* capital THORN, Icelandic */
163: "\332", /* capital U, acute accent */
164: "\333", /* capital U, circumflex accent */
165: "\331", /* capital U, grave accent */
166: "\334", /* capital U, dieresis or umlaut mark */
167: "\335", /* capital Y, acute accent */
168: "\341", /* small a, acute accent */
169: "\342", /* small a, circumflex accent */
170: "\346", /* small ae diphthong (ligature) */
171: "\340", /* small a, grave accent */
172: "\046", /* ampersand */
173: "\345", /* small a, ring */
174: "\343", /* small a, tilde */
175: "\344", /* small a, dieresis or umlaut mark */
176: "\347", /* small c, cedilla */
177: "\351", /* small e, acute accent */
178: "\352", /* small e, circumflex accent */
179: "\350", /* small e, grave accent */
180: "\360", /* small eth, Icelandic */
181: "\353", /* small e, dieresis or umlaut mark */
182: "\076", /* greater than */
183: "\355", /* small i, acute accent */
184: "\356", /* small i, circumflex accent */
185: "\354", /* small i, grave accent */
186: "\357", /* small i, dieresis or umlaut mark */
187: "\074", /* less than */
188: "\361", /* small n, tilde */
189: "\363", /* small o, acute accent */
190: "\364", /* small o, circumflex accent */
191: "\362", /* small o, grave accent */
192: "\370", /* small o, slash */
193: "\365", /* small o, tilde */
194: "\366", /* small o, dieresis or umlaut mark */
1.36 frystyk 195: "\042", /* double quote sign - June 94 */
1.2 timbl 196: "\337", /* small sharp s, German (sz ligature) */
197: "\376", /* small thorn, Icelandic */
198: "\372", /* small u, acute accent */
199: "\373", /* small u, circumflex accent */
200: "\371", /* small u, grave accent */
201: "\374", /* small u, dieresis or umlaut mark */
202: "\375", /* small y, acute accent */
203: "\377", /* small y, dieresis or umlaut mark */
1.1 timbl 204: };
205:
1.2 timbl 206:
207: /* Entity values -- for NeXT local representation
208: **
209: ** This MUST match exactly the table referred to in the DTD!
210: **
211: */
212: static char * NeXTCharacters[] = {
213: "\341", /* capital AE diphthong (ligature) */
214: "\202", /* capital A, acute accent */
215: "\203", /* capital A, circumflex accent */
216: "\201", /* capital A, grave accent */
217: "\206", /* capital A, ring */
218: "\204", /* capital A, tilde */
219: "\205", /* capital A, dieresis or umlaut mark */
220: "\207", /* capital C, cedilla */
221: "\220", /* capital Eth, Icelandic */
222: "\211", /* capital E, acute accent */
223: "\212", /* capital E, circumflex accent */
224: "\210", /* capital E, grave accent */
225: "\213", /* capital E, dieresis or umlaut mark */
226: "\215", /* capital I, acute accent */
227: "\216", /* capital I, circumflex accent these are */
228: "\214", /* capital I, grave accent ISO -100 hex */
229: "\217", /* capital I, dieresis or umlaut mark */
230: "\221", /* capital N, tilde */
231: "\223", /* capital O, acute accent */
232: "\224", /* capital O, circumflex accent */
233: "\222", /* capital O, grave accent */
234: "\351", /* capital O, slash 'cept this */
235: "\225", /* capital O, tilde */
236: "\226", /* capital O, dieresis or umlaut mark */
237: "\234", /* capital THORN, Icelandic */
238: "\230", /* capital U, acute accent */
239: "\231", /* capital U, circumflex accent */
240: "\227", /* capital U, grave accent */
241: "\232", /* capital U, dieresis or umlaut mark */
242: "\233", /* capital Y, acute accent */
243: "\326", /* small a, acute accent */
244: "\327", /* small a, circumflex accent */
245: "\361", /* small ae diphthong (ligature) */
246: "\325", /* small a, grave accent */
247: "\046", /* ampersand */
248: "\332", /* small a, ring */
249: "\330", /* small a, tilde */
250: "\331", /* small a, dieresis or umlaut mark */
251: "\333", /* small c, cedilla */
252: "\335", /* small e, acute accent */
253: "\336", /* small e, circumflex accent */
254: "\334", /* small e, grave accent */
255: "\346", /* small eth, Icelandic */
256: "\337", /* small e, dieresis or umlaut mark */
257: "\076", /* greater than */
258: "\342", /* small i, acute accent */
259: "\344", /* small i, circumflex accent */
260: "\340", /* small i, grave accent */
261: "\345", /* small i, dieresis or umlaut mark */
262: "\074", /* less than */
263: "\347", /* small n, tilde */
264: "\355", /* small o, acute accent */
265: "\356", /* small o, circumflex accent */
266: "\354", /* small o, grave accent */
267: "\371", /* small o, slash */
268: "\357", /* small o, tilde */
269: "\360", /* small o, dieresis or umlaut mark */
1.36 frystyk 270: "\042", /* double quote sign - June 94 */
1.2 timbl 271: "\373", /* small sharp s, German (sz ligature) */
272: "\374", /* small thorn, Icelandic */
273: "\363", /* small u, acute accent */
274: "\364", /* small u, circumflex accent */
275: "\362", /* small u, grave accent */
276: "\366", /* small u, dieresis or umlaut mark */
277: "\367", /* small y, acute accent */
278: "\375", /* small y, dieresis or umlaut mark */
1.1 timbl 279: };
280:
1.2 timbl 281: /* Entity values -- for IBM/PC Code Page 850 (International)
282: **
283: ** This MUST match exactly the table referred to in the DTD!
284: **
285: */
286: /* @@@@@@@@@@@@@@@@@ TBD */
287:
288:
289:
290: /* Set character set
291: ** ----------------
292: */
293:
294: PRIVATE char** p_entity_values = ISO_Latin1; /* Pointer to translation */
1.1 timbl 295:
1.53 frystyk 296: PUBLIC void HTMLUseCharacterSet (HTMLCharacterSet i)
1.2 timbl 297: {
298: p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
299: : ISO_Latin1;
300: }
1.1 timbl 301:
302:
303: /* Flattening the style structure
304: ** ------------------------------
305: **
306: On the NeXT, and on any read-only browser, it is simpler for the text to have
307: a sequence of styles, rather than a nested tree of styles. In this
308: case we have to flatten the structure as it arrives from SGML tags into
309: a sequence of styles.
310: */
311:
312: /* If style really needs to be set, call this
313: */
1.53 frystyk 314: PRIVATE void actually_set_style (HTStructured * me)
1.1 timbl 315: {
1.4 timbl 316: if (!me->text) { /* First time through */
1.54 frystyk 317: me->text = HText_new2(me->request, me->node_anchor, me->target);
1.4 timbl 318: HText_beginAppend(me->text);
319: HText_setStyle(me->text, me->new_style);
320: me->in_word = NO;
1.1 timbl 321: } else {
1.4 timbl 322: HText_setStyle(me->text, me->new_style);
1.1 timbl 323: }
1.4 timbl 324: me->old_style = me->new_style;
325: me->style_change = NO;
1.1 timbl 326: }
327:
328: /* If you THINK you need to change style, call this
329: */
330:
1.53 frystyk 331: PRIVATE void change_paragraph_style (HTStructured * me, HTStyle *style)
1.1 timbl 332: {
1.4 timbl 333: if (me->new_style!=style) {
334: me->style_change = YES;
335: me->new_style = style;
1.1 timbl 336: }
1.11 timbl 337: me->in_word = NO;
1.1 timbl 338: }
339:
1.2 timbl 340: /*_________________________________________________________________________
341: **
342: ** A C T I O N R O U T I N E S
343: */
344:
345: /* Character handling
346: ** ------------------
1.1 timbl 347: */
1.53 frystyk 348: PRIVATE int HTML_put_character (HTStructured * me, char c)
1.1 timbl 349: {
1.2 timbl 350:
1.4 timbl 351: switch (me->sp[0].tag_number) {
1.2 timbl 352: case HTML_COMMENT:
353: break; /* Do Nothing */
354:
355: case HTML_TITLE:
1.56 frystyk 356: HTChunk_putb(me->title, &c, 1);
1.2 timbl 357: break;
358:
359:
360: case HTML_LISTING: /* Litteral text */
361: case HTML_XMP:
362: case HTML_PLAINTEXT:
363: case HTML_PRE:
364: /* We guarrantee that the style is up-to-date in begin_litteral
365: */
1.4 timbl 366: HText_appendCharacter(me->text, c);
1.2 timbl 367: break;
368:
369: default: /* Free format text */
1.4 timbl 370: if (me->style_change) {
1.42 frystyk 371: if ((c=='\n') || (c==' ')) return HT_OK; /* Ignore it */
1.2 timbl 372: UPDATE_STYLE;
373: }
374: if (c=='\n') {
1.4 timbl 375: if (me->in_word) {
376: HText_appendCharacter(me->text, ' ');
377: me->in_word = NO;
1.2 timbl 378: }
379: } else {
1.4 timbl 380: HText_appendCharacter(me->text, c);
381: me->in_word = YES;
1.2 timbl 382: }
383: } /* end switch */
1.42 frystyk 384: return HT_OK;
1.1 timbl 385: }
386:
1.2 timbl 387:
388:
389: /* String handling
390: ** ---------------
391: **
392: ** This is written separately from put_character becuase the loop can
1.11 timbl 393: ** in some cases be promoted to a higher function call level for speed.
1.2 timbl 394: */
1.53 frystyk 395: PRIVATE int HTML_put_string (HTStructured * me, CONST char* s)
1.1 timbl 396: {
1.2 timbl 397:
1.4 timbl 398: switch (me->sp[0].tag_number) {
1.2 timbl 399: case HTML_COMMENT:
400: break; /* Do Nothing */
401:
402: case HTML_TITLE:
1.56 frystyk 403: HTChunk_putb(me->title, s, strlen(s));
1.2 timbl 404: break;
405:
406:
407: case HTML_LISTING: /* Litteral text */
408: case HTML_XMP:
409: case HTML_PLAINTEXT:
410: case HTML_PRE:
411:
412: /* We guarrantee that the style is up-to-date in begin_litteral
413: */
1.4 timbl 414: HText_appendText(me->text, s);
1.2 timbl 415: break;
416:
417: default: /* Free format text */
418: {
419: CONST char *p = s;
1.4 timbl 420: if (me->style_change) {
1.2 timbl 421: for (; *p && ((*p=='\n') || (*p==' ')); p++) ; /* Ignore leaders */
1.42 frystyk 422: if (!*p) return HT_OK;
1.2 timbl 423: UPDATE_STYLE;
424: }
425: for(; *p; p++) {
1.4 timbl 426: if (me->style_change) {
1.2 timbl 427: if ((*p=='\n') || (*p==' ')) continue; /* Ignore it */
428: UPDATE_STYLE;
429: }
430: if (*p=='\n') {
1.4 timbl 431: if (me->in_word) {
432: HText_appendCharacter(me->text, ' ');
433: me->in_word = NO;
1.2 timbl 434: }
435: } else {
1.4 timbl 436: HText_appendCharacter(me->text, *p);
437: me->in_word = YES;
1.2 timbl 438: }
439: } /* for */
440: }
441: } /* end switch */
1.42 frystyk 442: return HT_OK;
1.1 timbl 443: }
444:
445:
1.2 timbl 446: /* Buffer write
1.3 timbl 447: ** ------------
1.1 timbl 448: */
1.53 frystyk 449: PRIVATE int HTML_write (HTStructured * me, CONST char* s, int l)
1.1 timbl 450: {
1.38 frystyk 451: while (l-- > 0)
452: HTML_put_character(me, *s++);
1.42 frystyk 453: return HT_OK;
1.1 timbl 454: }
1.2 timbl 455:
456:
457: /* Start Element
458: ** -------------
459: */
1.53 frystyk 460: PRIVATE void HTML_start_element (
461: HTStructured * me,
462: int element_number,
463: CONST BOOL* present,
464: CONST char ** value)
1.2 timbl 465: {
466: switch (element_number) {
467: case HTML_A:
468: {
1.8 timbl 469: HTChildAnchor * source;
1.9 timbl 470: char * href = NULL;
1.42 frystyk 471: if (present[HTML_A_HREF])
1.9 timbl 472: StrAllocCopy(href, value[HTML_A_HREF]);
1.8 timbl 473: source = HTAnchor_findChildAndLink(
1.4 timbl 474: me->node_anchor, /* parent */
1.2 timbl 475: present[HTML_A_NAME] ? value[HTML_A_NAME] : 0, /* Tag */
1.9 timbl 476: present[HTML_A_HREF] ? href : 0, /* Addresss */
1.16 timbl 477: present[HTML_A_REL] && value[HTML_A_REL] ?
1.54 frystyk 478: (HTLinkType) HTAtom_for(value[HTML_A_REL])
479: : 0);
1.2 timbl 480:
481: if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
482: HTParentAnchor * dest =
483: HTAnchor_parent(
484: HTAnchor_followMainLink((HTAnchor*)source)
485: );
486: if (!HTAnchor_title(dest))
487: HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
488: }
489: UPDATE_STYLE;
1.4 timbl 490: HText_beginAnchor(me->text, source);
1.42 frystyk 491: FREE(href); /* Leak fix Henrik 17/02-94 */
1.2 timbl 492: }
493: break;
494:
495: case HTML_TITLE:
1.56 frystyk 496: HTChunk_clear(me->title);
1.2 timbl 497: break;
498:
499: case HTML_NEXTID:
500: /* if (present[NEXTID_N] && value[NEXTID_N])
1.4 timbl 501: HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2 timbl 502: break;
503:
504: case HTML_ISINDEX:
1.4 timbl 505: HTAnchor_setIndex(me->node_anchor);
1.2 timbl 506: break;
507:
1.15 timbl 508: case HTML_BR:
509: UPDATE_STYLE;
510: HText_appendCharacter(me->text, '\n');
511: me->in_word = NO;
512: break;
513:
514: case HTML_HR:
515: UPDATE_STYLE;
516: HText_appendCharacter(me->text, '\n');
1.16 timbl 517: HText_appendText(me->text, "___________________________________");
1.15 timbl 518: HText_appendCharacter(me->text, '\n');
519: me->in_word = NO;
520: break;
521:
1.2 timbl 522: case HTML_P:
523: UPDATE_STYLE;
1.4 timbl 524: HText_appendParagraph(me->text);
525: me->in_word = NO;
1.2 timbl 526: break;
527:
528: case HTML_DL:
1.11 timbl 529: change_paragraph_style(me, present && present[DL_COMPACT]
1.16 timbl 530: ? styles[HTML_DL]
1.2 timbl 531: : styles[HTML_DL]);
532: break;
533:
534: case HTML_DT:
1.4 timbl 535: if (!me->style_change) {
536: HText_appendParagraph(me->text);
537: me->in_word = NO;
1.2 timbl 538: }
539: break;
540:
541: case HTML_DD:
542: UPDATE_STYLE;
1.4 timbl 543: HTML_put_character(me, '\t'); /* Just tab out one stop */
544: me->in_word = NO;
545: break;
1.2 timbl 546:
547: case HTML_UL:
548: case HTML_OL:
549: case HTML_MENU:
550: case HTML_DIR:
1.11 timbl 551: change_paragraph_style(me, styles[element_number]);
1.2 timbl 552: break;
553:
554: case HTML_LI:
555: UPDATE_STYLE;
1.7 timbl 556: if (me->sp[0].tag_number != HTML_DIR)
1.4 timbl 557: HText_appendParagraph(me->text);
1.2 timbl 558: else
1.4 timbl 559: HText_appendCharacter(me->text, '\t'); /* Tab @@ nl for UL? */
560: me->in_word = NO;
1.2 timbl 561: break;
562:
563: case HTML_LISTING: /* Litteral text */
564: case HTML_XMP:
565: case HTML_PLAINTEXT:
566: case HTML_PRE:
1.11 timbl 567: change_paragraph_style(me, styles[element_number]);
1.2 timbl 568: UPDATE_STYLE;
1.4 timbl 569: if (me->comment_end)
570: HText_appendText(me->text, me->comment_end);
1.2 timbl 571: break;
1.11 timbl 572:
1.23 frystyk 573: case HTML_IMG: /* Images */
574: {
575: HTChildAnchor *source;
576: char *src = NULL;
1.49 frystyk 577: if (present[HTML_IMG_SRC])
1.23 frystyk 578: StrAllocCopy(src, value[HTML_IMG_SRC]);
579: source = HTAnchor_findChildAndLink(
580: me->node_anchor, /* parent */
581: 0, /* Tag */
582: src ? src : 0, /* Addresss */
583: 0);
584: UPDATE_STYLE;
585: HText_appendImage(me->text, source,
1.24 frystyk 586: present[HTML_IMG_ALT] ? value[HTML_IMG_ALT] : NULL,
587: present[HTML_IMG_ALIGN] ? value[HTML_IMG_ALIGN] : NULL,
588: present[HTML_IMG_ISMAP] ? YES : NO);
1.23 frystyk 589: free(src);
1.24 frystyk 590: }
591: break;
592:
593: case HTML_HTML: /* Ignore these altogether */
594: case HTML_HEAD:
595: case HTML_BODY:
596:
1.10 timbl 597: case HTML_TT: /* Physical character highlighting */
598: case HTML_B: /* Currently ignored */
599: case HTML_I:
600: case HTML_U:
601:
602: case HTML_EM: /* Logical character highlighting */
603: case HTML_STRONG: /* Currently ignored */
604: case HTML_CODE:
605: case HTML_SAMP:
606: case HTML_KBD:
607: case HTML_VAR:
608: case HTML_DFN:
609: case HTML_CITE:
610: break;
611:
1.11 timbl 612: case HTML_H1: /* paragraph styles */
613: case HTML_H2:
614: case HTML_H3:
615: case HTML_H4:
616: case HTML_H5:
617: case HTML_H6:
618: case HTML_H7:
619: case HTML_ADDRESS:
620: case HTML_BLOCKQUOTE:
621: change_paragraph_style(me, styles[element_number]); /* May be postponed */
1.2 timbl 622: break;
623:
624: } /* end switch */
625:
1.16 timbl 626: if (me->dtd->tags[element_number].contents!= SGML_EMPTY) {
1.13 timbl 627: if (me->sp == me->stack) {
1.44 frystyk 628: if (SGML_TRACE)
1.50 frystyk 629: TTYPrint(TDEST, "HTML........ Maximum nesting of %d exceded!\n",
1.44 frystyk 630: MAX_NESTING);
631: me->overflow++;
1.12 timbl 632: return;
633: }
1.4 timbl 634: --(me->sp);
635: me->sp[0].style = me->new_style; /* Stack new style */
636: me->sp[0].tag_number = element_number;
1.10 timbl 637: }
1.1 timbl 638: }
1.10 timbl 639:
1.2 timbl 640:
1.1 timbl 641: /* End Element
1.2 timbl 642: ** -----------
1.1 timbl 643: **
1.2 timbl 644: */
645: /* When we end an element, the style must be returned to that
1.1 timbl 646: ** in effect before that element. Note that anchors (etc?)
647: ** don't have an associated style, so that we must scan down the
648: ** stack for an element with a defined style. (In fact, the styles
649: ** should be linked to the whole stack not just the top one.)
650: ** TBL 921119
1.6 timbl 651: **
652: ** We don't turn on "CAREFUL" check because the parser produces
653: ** (internal code errors apart) good nesting. The parser checks
654: ** incoming code errors, not this module.
1.1 timbl 655: */
1.53 frystyk 656: PRIVATE void HTML_end_element (HTStructured * me, int element_number)
1.1 timbl 657: {
1.2 timbl 658: #ifdef CAREFUL /* parser assumed to produce good nesting */
1.4 timbl 659: if (element_number != me->sp[0].tag_number) {
1.50 frystyk 660: TTYPrint(TDEST, "HTMLText: end of element %s when expecting end of %s\n",
1.16 timbl 661: me->dtd->tags[element_number].name,
662: me->dtd->tags[me->sp->tag_number].name);
1.6 timbl 663: /* panic */
1.1 timbl 664: }
1.2 timbl 665: #endif
1.44 frystyk 666:
667: /* HFN, If overflow of nestings, we need to get back to reality */
668: if (me->overflow > 0) {
669: me->overflow--;
670: return;
671: }
672:
1.4 timbl 673: me->sp++; /* Pop state off stack */
1.44 frystyk 674:
1.2 timbl 675: switch(element_number) {
676:
677: case HTML_A:
678: UPDATE_STYLE;
1.4 timbl 679: HText_endAnchor(me->text);
1.2 timbl 680: break;
681:
682: case HTML_TITLE:
1.56 frystyk 683: HTAnchor_setTitle(me->node_anchor, HTChunk_data(me->title));
1.2 timbl 684: break;
685:
686: case HTML_LISTING: /* Litteral text */
687: case HTML_XMP:
688: case HTML_PLAINTEXT:
689: case HTML_PRE:
1.4 timbl 690: if (me->comment_start)
691: HText_appendText(me->text, me->comment_start);
1.2 timbl 692: /* Fall through */
693:
694: default:
1.44 frystyk 695:
696: /* Often won't really change */
697: change_paragraph_style(me, me->sp->style);
1.2 timbl 698: break;
699:
700: } /* switch */
1.1 timbl 701: }
702:
1.2 timbl 703:
704: /* Expanding entities
705: ** ------------------
706: */
707: /* (In fact, they all shrink!)
1.1 timbl 708: */
1.2 timbl 709:
1.53 frystyk 710: PRIVATE void HTML_put_entity (HTStructured * me, int entity_number)
1.1 timbl 711: {
1.4 timbl 712: HTML_put_string(me, ISO_Latin1[entity_number]); /* @@ Other representations */
1.1 timbl 713: }
1.2 timbl 714:
1.42 frystyk 715: /* Flush an HTML object
716: ** --------------------
717: */
1.53 frystyk 718: PUBLIC int HTML_flush (HTStructured * me)
1.42 frystyk 719: {
720: UPDATE_STYLE; /* Creates empty document here! */
1.57 ! frystyk 721: if (me->comment_end) HTML_put_string(me,me->comment_end);
! 722: return me->target ? (*me->targetClass.flush)(me->target) : HT_OK;
1.42 frystyk 723: }
1.2 timbl 724:
725: /* Free an HTML object
726: ** -------------------
727: **
1.4 timbl 728: ** If the document is empty, the text object will not yet exist.
729: So we could in fact abandon creating the document and return
730: an error code. In fact an empty document is an important type
731: of document, so we don't.
732: **
733: ** If non-interactive, everything is freed off. No: crashes -listrefs
1.2 timbl 734: ** Otherwise, the interactive object is left.
735: */
1.53 frystyk 736: PUBLIC int HTML_free (HTStructured * me)
1.1 timbl 737: {
1.4 timbl 738: UPDATE_STYLE; /* Creates empty document here! */
739: if (me->comment_end)
740: HTML_put_string(me,me->comment_end);
741: HText_endAppend(me->text);
742:
743: if (me->target) {
1.35 duns 744: (*me->targetClass._free)(me->target);
1.2 timbl 745: }
1.56 frystyk 746: HTChunk_delete(me->title);
1.4 timbl 747: free(me);
1.42 frystyk 748: return HT_OK;
1.1 timbl 749: }
750:
751:
1.53 frystyk 752: PRIVATE int HTML_abort (HTStructured * me, HTList * e)
1.1 timbl 753:
1.14 timbl 754: {
755: if (me->target) {
756: (*me->targetClass.abort)(me->target, e);
757: }
1.56 frystyk 758: HTChunk_delete(me->title);
1.14 timbl 759: free(me);
1.42 frystyk 760: return HT_ERROR;
1.1 timbl 761: }
762:
1.2 timbl 763:
764: /* Get Styles from style sheet
765: ** ---------------------------
766: */
1.53 frystyk 767: PRIVATE void get_styles (void)
1.1 timbl 768: {
1.2 timbl 769: got_styles = YES;
770:
771: default_style = HTStyleNamed(styleSheet, "Normal");
1.1 timbl 772:
1.2 timbl 773: styles[HTML_H1] = HTStyleNamed(styleSheet, "Heading1");
774: styles[HTML_H2] = HTStyleNamed(styleSheet, "Heading2");
775: styles[HTML_H3] = HTStyleNamed(styleSheet, "Heading3");
776: styles[HTML_H4] = HTStyleNamed(styleSheet, "Heading4");
777: styles[HTML_H5] = HTStyleNamed(styleSheet, "Heading5");
778: styles[HTML_H6] = HTStyleNamed(styleSheet, "Heading6");
779: styles[HTML_H7] = HTStyleNamed(styleSheet, "Heading7");
780:
781: styles[HTML_DL] = HTStyleNamed(styleSheet, "Glossary");
782: styles[HTML_UL] =
783: styles[HTML_OL] = HTStyleNamed(styleSheet, "List");
784: styles[HTML_MENU] = HTStyleNamed(styleSheet, "Menu");
785: styles[HTML_DIR] = HTStyleNamed(styleSheet, "Dir");
1.16 timbl 786: /* styles[HTML_DLC] = HTStyleNamed(styleSheet, "GlossaryCompact"); */
1.2 timbl 787: styles[HTML_ADDRESS]= HTStyleNamed(styleSheet, "Address");
788: styles[HTML_BLOCKQUOTE]= HTStyleNamed(styleSheet, "BlockQuote");
789: styles[HTML_PLAINTEXT] =
790: styles[HTML_XMP] = HTStyleNamed(styleSheet, "Example");
791: styles[HTML_PRE] = HTStyleNamed(styleSheet, "Preformatted");
792: styles[HTML_LISTING] = HTStyleNamed(styleSheet, "Listing");
793: }
794: /* P U B L I C
795: */
796:
797: /* Structured Object Class
798: ** -----------------------
799: */
1.47 frystyk 800: PRIVATE CONST HTStructuredClass HTMLPresentation = /* As opposed to print etc */
1.2 timbl 801: {
802: "text/html",
1.42 frystyk 803: HTML_flush,
1.2 timbl 804: HTML_free,
1.14 timbl 805: HTML_abort,
1.2 timbl 806: HTML_put_character, HTML_put_string, HTML_write,
807: HTML_start_element, HTML_end_element,
808: HTML_put_entity
809: };
1.1 timbl 810:
1.4 timbl 811:
1.2 timbl 812: /* New Structured Text object
813: ** --------------------------
814: **
1.16 timbl 815: ** The structured stream can generate either presentation,
1.4 timbl 816: ** or plain text, or HTML.
1.1 timbl 817: */
1.53 frystyk 818: PRIVATE HTStructured* HTML_new (HTRequest * request,
819: void * param,
820: HTFormat input_format,
821: HTFormat output_format,
822: HTStream * output_stream)
1.1 timbl 823: {
824:
1.4 timbl 825: HTStructured * me;
826:
1.47 frystyk 827: #if 0
1.16 timbl 828: if (output_format != WWW_PLAINTEXT
829: && output_format != WWW_PRESENT
830: && output_format != HTAtom_for("text/x-c")) {
1.37 frystyk 831: HTStream * intermediate = HTStreamStack(WWW_HTML, output_format,
832: output_stream, request, NO);
1.6 timbl 833: if (intermediate) return HTMLGenerator(intermediate);
1.44 frystyk 834: if (SGML_TRACE)
1.50 frystyk 835: TTYPrint(TDEST, "HTML........ Can't parse HTML to %s\n",
1.44 frystyk 836: HTAtom_name(output_format));
1.4 timbl 837: exit (-99);
838: }
1.47 frystyk 839: #endif
1.4 timbl 840:
1.44 frystyk 841: if ((me = (HTStructured*) calloc(1, sizeof(*me))) == NULL)
842: outofmem(__FILE__, "HTML_new");
1.1 timbl 843:
844: if (!got_styles) get_styles();
845:
1.4 timbl 846: me->isa = &HTMLPresentation;
1.47 frystyk 847: me->dtd = &HTMLP_dtd;
1.54 frystyk 848: me->request = request;
1.48 frystyk 849: me->node_anchor = HTRequest_anchor(request);
1.56 frystyk 850: me->title = HTChunk_new(128);
1.4 timbl 851: me->text = 0;
852: me->style_change = YES; /* Force check leading to text creation */
853: me->new_style = default_style;
854: me->old_style = 0;
855: me->sp = me->stack + MAX_NESTING - 1;
856: me->sp->tag_number = -1; /* INVALID */
857: me->sp->style = default_style; /* INVALID */
1.1 timbl 858:
1.4 timbl 859: me->comment_start = NULL;
860: me->comment_end = NULL;
1.16 timbl 861: me->target = output_stream;
862: if (output_stream) me->targetClass = *output_stream->isa; /* Copy pointers */
1.1 timbl 863:
1.4 timbl 864: return (HTStructured*) me;
1.1 timbl 865: }
866:
867:
1.2 timbl 868: /* HTConverter for HTML to plain text
869: ** ----------------------------------
1.1 timbl 870: **
1.2 timbl 871: ** This will convert from HTML to presentation or plain text.
1.1 timbl 872: */
1.53 frystyk 873: PUBLIC HTStream* HTMLToPlain (
874: HTRequest * request,
875: void * param,
876: HTFormat input_format,
877: HTFormat output_format,
878: HTStream * output_stream)
1.1 timbl 879: {
1.47 frystyk 880: return SGML_new(&HTMLP_dtd, HTML_new(
1.16 timbl 881: request, NULL, input_format, output_format, output_stream));
1.1 timbl 882: }
883:
884:
1.2 timbl 885: /* HTConverter for HTML to C code
886: ** ------------------------------
887: **
1.36 frystyk 888: ** C code is like plain text but all non-preformatted code
1.2 timbl 889: ** is commented out.
890: ** This will convert from HTML to presentation or plain text.
891: */
1.53 frystyk 892: PUBLIC HTStream* HTMLToC (
893: HTRequest * request,
894: void * param,
895: HTFormat input_format,
896: HTFormat output_format,
897: HTStream * output_stream)
1.1 timbl 898: {
1.4 timbl 899:
900: HTStructured * html;
901:
1.36 frystyk 902: (*output_stream->isa->put_string)(output_stream, "/* "); /* Before title */
1.16 timbl 903: html = HTML_new(request, NULL, input_format, output_format, output_stream);
1.45 frystyk 904: html->comment_start = "\n/* ";
1.47 frystyk 905: html->dtd = &HTMLP_dtd;
1.2 timbl 906: html->comment_end = " */\n"; /* Must start in col 1 for cpp */
1.47 frystyk 907: return SGML_new(&HTMLP_dtd, html);
1.1 timbl 908: }
909:
910:
1.2 timbl 911: /* Presenter for HTML
912: ** ------------------
913: **
914: ** This will convert from HTML to presentation or plain text.
915: **
916: ** Override this if you have a windows version
1.1 timbl 917: */
1.2 timbl 918: #ifndef GUI
1.53 frystyk 919: PUBLIC HTStream* HTMLPresent (
920: HTRequest * request,
921: void * param,
922: HTFormat input_format,
923: HTFormat output_format,
924: HTStream * output_stream)
1.1 timbl 925: {
1.47 frystyk 926: return SGML_new(&HTMLP_dtd, HTML_new(
1.16 timbl 927: request, NULL, input_format, output_format, output_stream));
1.1 timbl 928: }
1.2 timbl 929: #endif
1.29 frystyk 930:
Webmaster