Annotation of libwww/Library/src/HTML.c, revision 1.40
1.39 frystyk 1: /* HTML.c
2: ** STRUCTURED STREAM TO RICH HYPERTEXT CONVERTER
3: **
4: ** (c) COPYRIGHT CERN 1994.
5: ** Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
1.2 timbl 7: ** This generates of a hypertext object. It converts from the
8: ** structured stream interface fro HTMl events into the style-
9: ** oriented iunterface of the HText.h interface. This module is
10: ** only used in clients and shouldnot be linked into servers.
1.1 timbl 11: **
1.6 timbl 12: ** Override this module if making a new GUI browser.
1.1 timbl 13: **
1.35 duns 14: ** HISTORY:
15: ** 8 Jul 94 FM Insulate free() from _free structure element.
16: **
1.1 timbl 17: */
1.16 timbl 18:
1.40 ! roeber 19: #include "sysdep.h"
! 20:
1.1 timbl 21: #include "HTML.h"
22:
1.16 timbl 23: /* #define CAREFUL Check nesting here not really necessary */
1.1 timbl 24:
25: #include "HTAtom.h"
26: #include "HTChunk.h"
27: #include "HText.h"
28: #include "HTStyle.h"
29:
1.3 timbl 30: #include "HTAlert.h"
1.4 timbl 31: #include "HTMLGen.h"
1.8 timbl 32: #include "HTParse.h"
1.1 timbl 33:
34: extern HTStyleSheet * styleSheet; /* Application-wide */
35:
36: /* Module-wide style cache
37: */
38: PRIVATE int got_styles = 0;
1.16 timbl 39: PRIVATE HTStyle *styles[HTMLP_ELEMENTS];
1.2 timbl 40: PRIVATE HTStyle *default_style;
1.1 timbl 41:
42:
43: /* HTML Object
44: ** -----------
45: */
1.2 timbl 46: #define MAX_NESTING 20 /* Should be checked by parser */
47:
48: typedef struct _stack_element {
49: HTStyle * style;
50: int tag_number;
51: } stack_element;
52:
53: struct _HTStructured {
54: CONST HTStructuredClass * isa;
55: HTParentAnchor * node_anchor;
56: HText * text;
57:
58: HTStream* target; /* Output stream */
59: HTStreamClass targetClass; /* Output routines */
60:
61: HTChunk title; /* Grow by 128 */
62:
63: char * comment_start; /* for literate programming */
64: char * comment_end;
1.16 timbl 65:
66: CONST SGML_dtd* dtd;
67:
1.2 timbl 68: HTTag * current_tag;
69: BOOL style_change;
70: HTStyle * new_style;
71: HTStyle * old_style;
72: BOOL in_word; /* Have just had a non-white char */
73: stack_element stack[MAX_NESTING];
74: stack_element *sp; /* Style stack pointer */
1.1 timbl 75: };
76:
1.2 timbl 77: struct _HTStream {
78: CONST HTStreamClass * isa;
79: /* .... */
80: };
1.1 timbl 81:
82: /* Forward declarations of routines
83: */
84: PRIVATE void get_styles NOPARAMS;
85:
86:
1.4 timbl 87: PRIVATE void actually_set_style PARAMS((HTStructured * me));
1.11 timbl 88: PRIVATE void change_paragraph_style PARAMS((HTStructured * me, HTStyle * style));
1.1 timbl 89:
90: /* Style buffering avoids dummy paragraph begin/ends.
91: */
1.4 timbl 92: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1 timbl 93:
94:
1.2 timbl 95: #ifdef OLD_CODE
1.1 timbl 96: /* The following accented characters are from peter Flynn, curia project */
97:
98: /* these ifdefs don't solve the problem of a simple terminal emulator
99: ** with a different character set to the client machine. But nothing does,
100: ** except looking at the TERM setting */
101:
1.2 timbl 102:
1.1 timbl 103: { "ocus" , "&" }, /* for CURIA */
104: #ifdef IBMPC
105: { "aacute" , "\240" }, /* For PC display */
106: { "eacute" , "\202" },
107: { "iacute" , "\241" },
108: { "oacute" , "\242" },
109: { "uacute" , "\243" },
110: { "Aacute" , "\101" },
111: { "Eacute" , "\220" },
112: { "Iacute" , "\111" },
113: { "Oacute" , "\117" },
114: { "Uacute" , "\125" },
115: #else
116: { "aacute" , "\341" }, /* Works for openwindows -- Peter Flynn */
117: { "eacute" , "\351" },
118: { "iacute" , "\355" },
119: { "oacute" , "\363" },
120: { "uacute" , "\372" },
121: { "Aacute" , "\301" },
122: { "Eacute" , "\310" },
123: { "Iacute" , "\315" },
124: { "Oacute" , "\323" },
125: { "Uacute" , "\332" },
126: #endif
127: { 0, 0 } /* Terminate list */
128: };
1.2 timbl 129: #endif
1.1 timbl 130:
131:
1.2 timbl 132: /* Entity values -- for ISO Latin 1 local representation
133: **
134: ** This MUST match exactly the table referred to in the DTD!
135: */
136: static char * ISO_Latin1[] = {
137: "\306", /* capital AE diphthong (ligature) */
138: "\301", /* capital A, acute accent */
139: "\302", /* capital A, circumflex accent */
140: "\300", /* capital A, grave accent */
141: "\305", /* capital A, ring */
142: "\303", /* capital A, tilde */
143: "\304", /* capital A, dieresis or umlaut mark */
144: "\307", /* capital C, cedilla */
145: "\320", /* capital Eth, Icelandic */
146: "\311", /* capital E, acute accent */
147: "\312", /* capital E, circumflex accent */
148: "\310", /* capital E, grave accent */
149: "\313", /* capital E, dieresis or umlaut mark */
150: "\315", /* capital I, acute accent */
151: "\316", /* capital I, circumflex accent */
152: "\314", /* capital I, grave accent */
153: "\317", /* capital I, dieresis or umlaut mark */
154: "\321", /* capital N, tilde */
155: "\323", /* capital O, acute accent */
156: "\324", /* capital O, circumflex accent */
157: "\322", /* capital O, grave accent */
158: "\330", /* capital O, slash */
159: "\325", /* capital O, tilde */
160: "\326", /* capital O, dieresis or umlaut mark */
161: "\336", /* capital THORN, Icelandic */
162: "\332", /* capital U, acute accent */
163: "\333", /* capital U, circumflex accent */
164: "\331", /* capital U, grave accent */
165: "\334", /* capital U, dieresis or umlaut mark */
166: "\335", /* capital Y, acute accent */
167: "\341", /* small a, acute accent */
168: "\342", /* small a, circumflex accent */
169: "\346", /* small ae diphthong (ligature) */
170: "\340", /* small a, grave accent */
171: "\046", /* ampersand */
172: "\345", /* small a, ring */
173: "\343", /* small a, tilde */
174: "\344", /* small a, dieresis or umlaut mark */
175: "\347", /* small c, cedilla */
176: "\351", /* small e, acute accent */
177: "\352", /* small e, circumflex accent */
178: "\350", /* small e, grave accent */
179: "\360", /* small eth, Icelandic */
180: "\353", /* small e, dieresis or umlaut mark */
181: "\076", /* greater than */
182: "\355", /* small i, acute accent */
183: "\356", /* small i, circumflex accent */
184: "\354", /* small i, grave accent */
185: "\357", /* small i, dieresis or umlaut mark */
186: "\074", /* less than */
187: "\361", /* small n, tilde */
188: "\363", /* small o, acute accent */
189: "\364", /* small o, circumflex accent */
190: "\362", /* small o, grave accent */
191: "\370", /* small o, slash */
192: "\365", /* small o, tilde */
193: "\366", /* small o, dieresis or umlaut mark */
1.36 frystyk 194: "\042", /* double quote sign - June 94 */
1.2 timbl 195: "\337", /* small sharp s, German (sz ligature) */
196: "\376", /* small thorn, Icelandic */
197: "\372", /* small u, acute accent */
198: "\373", /* small u, circumflex accent */
199: "\371", /* small u, grave accent */
200: "\374", /* small u, dieresis or umlaut mark */
201: "\375", /* small y, acute accent */
202: "\377", /* small y, dieresis or umlaut mark */
1.1 timbl 203: };
204:
1.2 timbl 205:
206: /* Entity values -- for NeXT local representation
207: **
208: ** This MUST match exactly the table referred to in the DTD!
209: **
210: */
211: static char * NeXTCharacters[] = {
212: "\341", /* capital AE diphthong (ligature) */
213: "\202", /* capital A, acute accent */
214: "\203", /* capital A, circumflex accent */
215: "\201", /* capital A, grave accent */
216: "\206", /* capital A, ring */
217: "\204", /* capital A, tilde */
218: "\205", /* capital A, dieresis or umlaut mark */
219: "\207", /* capital C, cedilla */
220: "\220", /* capital Eth, Icelandic */
221: "\211", /* capital E, acute accent */
222: "\212", /* capital E, circumflex accent */
223: "\210", /* capital E, grave accent */
224: "\213", /* capital E, dieresis or umlaut mark */
225: "\215", /* capital I, acute accent */
226: "\216", /* capital I, circumflex accent these are */
227: "\214", /* capital I, grave accent ISO -100 hex */
228: "\217", /* capital I, dieresis or umlaut mark */
229: "\221", /* capital N, tilde */
230: "\223", /* capital O, acute accent */
231: "\224", /* capital O, circumflex accent */
232: "\222", /* capital O, grave accent */
233: "\351", /* capital O, slash 'cept this */
234: "\225", /* capital O, tilde */
235: "\226", /* capital O, dieresis or umlaut mark */
236: "\234", /* capital THORN, Icelandic */
237: "\230", /* capital U, acute accent */
238: "\231", /* capital U, circumflex accent */
239: "\227", /* capital U, grave accent */
240: "\232", /* capital U, dieresis or umlaut mark */
241: "\233", /* capital Y, acute accent */
242: "\326", /* small a, acute accent */
243: "\327", /* small a, circumflex accent */
244: "\361", /* small ae diphthong (ligature) */
245: "\325", /* small a, grave accent */
246: "\046", /* ampersand */
247: "\332", /* small a, ring */
248: "\330", /* small a, tilde */
249: "\331", /* small a, dieresis or umlaut mark */
250: "\333", /* small c, cedilla */
251: "\335", /* small e, acute accent */
252: "\336", /* small e, circumflex accent */
253: "\334", /* small e, grave accent */
254: "\346", /* small eth, Icelandic */
255: "\337", /* small e, dieresis or umlaut mark */
256: "\076", /* greater than */
257: "\342", /* small i, acute accent */
258: "\344", /* small i, circumflex accent */
259: "\340", /* small i, grave accent */
260: "\345", /* small i, dieresis or umlaut mark */
261: "\074", /* less than */
262: "\347", /* small n, tilde */
263: "\355", /* small o, acute accent */
264: "\356", /* small o, circumflex accent */
265: "\354", /* small o, grave accent */
266: "\371", /* small o, slash */
267: "\357", /* small o, tilde */
268: "\360", /* small o, dieresis or umlaut mark */
1.36 frystyk 269: "\042", /* double quote sign - June 94 */
1.2 timbl 270: "\373", /* small sharp s, German (sz ligature) */
271: "\374", /* small thorn, Icelandic */
272: "\363", /* small u, acute accent */
273: "\364", /* small u, circumflex accent */
274: "\362", /* small u, grave accent */
275: "\366", /* small u, dieresis or umlaut mark */
276: "\367", /* small y, acute accent */
277: "\375", /* small y, dieresis or umlaut mark */
1.1 timbl 278: };
279:
1.2 timbl 280: /* Entity values -- for IBM/PC Code Page 850 (International)
281: **
282: ** This MUST match exactly the table referred to in the DTD!
283: **
284: */
285: /* @@@@@@@@@@@@@@@@@ TBD */
286:
287:
288:
289: /* Set character set
290: ** ----------------
291: */
292:
293: PRIVATE char** p_entity_values = ISO_Latin1; /* Pointer to translation */
1.1 timbl 294:
1.2 timbl 295: PUBLIC void HTMLUseCharacterSet ARGS1(HTMLCharacterSet, i)
296: {
297: p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
298: : ISO_Latin1;
299: }
1.1 timbl 300:
301:
302: /* Flattening the style structure
303: ** ------------------------------
304: **
305: On the NeXT, and on any read-only browser, it is simpler for the text to have
306: a sequence of styles, rather than a nested tree of styles. In this
307: case we have to flatten the structure as it arrives from SGML tags into
308: a sequence of styles.
309: */
310:
311: /* If style really needs to be set, call this
312: */
1.4 timbl 313: PRIVATE void actually_set_style ARGS1(HTStructured *, me)
1.1 timbl 314: {
1.4 timbl 315: if (!me->text) { /* First time through */
316: me->text = HText_new2(me->node_anchor, me->target);
317: HText_beginAppend(me->text);
318: HText_setStyle(me->text, me->new_style);
319: me->in_word = NO;
1.1 timbl 320: } else {
1.4 timbl 321: HText_setStyle(me->text, me->new_style);
1.1 timbl 322: }
1.4 timbl 323: me->old_style = me->new_style;
324: me->style_change = NO;
1.1 timbl 325: }
326:
327: /* If you THINK you need to change style, call this
328: */
329:
1.11 timbl 330: PRIVATE void change_paragraph_style ARGS2(HTStructured *, me, HTStyle *,style)
1.1 timbl 331: {
1.4 timbl 332: if (me->new_style!=style) {
333: me->style_change = YES;
334: me->new_style = style;
1.1 timbl 335: }
1.11 timbl 336: me->in_word = NO;
1.1 timbl 337: }
338:
1.2 timbl 339: /*_________________________________________________________________________
340: **
341: ** A C T I O N R O U T I N E S
342: */
343:
344: /* Character handling
345: ** ------------------
1.1 timbl 346: */
1.4 timbl 347: PRIVATE void HTML_put_character ARGS2(HTStructured *, me, char, c)
1.1 timbl 348: {
1.2 timbl 349:
1.4 timbl 350: switch (me->sp[0].tag_number) {
1.2 timbl 351: case HTML_COMMENT:
352: break; /* Do Nothing */
353:
354: case HTML_TITLE:
1.4 timbl 355: HTChunkPutc(&me->title, c);
1.2 timbl 356: break;
357:
358:
359: case HTML_LISTING: /* Litteral text */
360: case HTML_XMP:
361: case HTML_PLAINTEXT:
362: case HTML_PRE:
363: /* We guarrantee that the style is up-to-date in begin_litteral
364: */
1.4 timbl 365: HText_appendCharacter(me->text, c);
1.2 timbl 366: break;
367:
368: default: /* Free format text */
1.4 timbl 369: if (me->style_change) {
1.2 timbl 370: if ((c=='\n') || (c==' ')) return; /* Ignore it */
371: UPDATE_STYLE;
372: }
373: if (c=='\n') {
1.4 timbl 374: if (me->in_word) {
375: HText_appendCharacter(me->text, ' ');
376: me->in_word = NO;
1.2 timbl 377: }
378: } else {
1.4 timbl 379: HText_appendCharacter(me->text, c);
380: me->in_word = YES;
1.2 timbl 381: }
382: } /* end switch */
1.1 timbl 383: }
384:
1.2 timbl 385:
386:
387: /* String handling
388: ** ---------------
389: **
390: ** This is written separately from put_character becuase the loop can
1.11 timbl 391: ** in some cases be promoted to a higher function call level for speed.
1.2 timbl 392: */
1.4 timbl 393: PRIVATE void HTML_put_string ARGS2(HTStructured *, me, CONST char*, s)
1.1 timbl 394: {
1.2 timbl 395:
1.4 timbl 396: switch (me->sp[0].tag_number) {
1.2 timbl 397: case HTML_COMMENT:
398: break; /* Do Nothing */
399:
400: case HTML_TITLE:
1.4 timbl 401: HTChunkPuts(&me->title, s);
1.2 timbl 402: break;
403:
404:
405: case HTML_LISTING: /* Litteral text */
406: case HTML_XMP:
407: case HTML_PLAINTEXT:
408: case HTML_PRE:
409:
410: /* We guarrantee that the style is up-to-date in begin_litteral
411: */
1.4 timbl 412: HText_appendText(me->text, s);
1.2 timbl 413: break;
414:
415: default: /* Free format text */
416: {
417: CONST char *p = s;
1.4 timbl 418: if (me->style_change) {
1.2 timbl 419: for (; *p && ((*p=='\n') || (*p==' ')); p++) ; /* Ignore leaders */
420: if (!*p) return;
421: UPDATE_STYLE;
422: }
423: for(; *p; p++) {
1.4 timbl 424: if (me->style_change) {
1.2 timbl 425: if ((*p=='\n') || (*p==' ')) continue; /* Ignore it */
426: UPDATE_STYLE;
427: }
428: if (*p=='\n') {
1.4 timbl 429: if (me->in_word) {
430: HText_appendCharacter(me->text, ' ');
431: me->in_word = NO;
1.2 timbl 432: }
433: } else {
1.4 timbl 434: HText_appendCharacter(me->text, *p);
435: me->in_word = YES;
1.2 timbl 436: }
437: } /* for */
438: }
439: } /* end switch */
1.1 timbl 440: }
441:
442:
1.2 timbl 443: /* Buffer write
1.3 timbl 444: ** ------------
1.1 timbl 445: */
1.4 timbl 446: PRIVATE void HTML_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
1.1 timbl 447: {
1.38 frystyk 448: while (l-- > 0)
449: HTML_put_character(me, *s++);
1.1 timbl 450: }
1.2 timbl 451:
452:
453: /* Start Element
454: ** -------------
455: */
456: PRIVATE void HTML_start_element ARGS4(
1.4 timbl 457: HTStructured *, me,
1.16 timbl 458: int, element_number,
1.3 timbl 459: CONST BOOL*, present,
1.16 timbl 460: CONST char **, value)
1.2 timbl 461: {
462: switch (element_number) {
463: case HTML_A:
464: {
1.8 timbl 465: HTChildAnchor * source;
1.9 timbl 466: char * href = NULL;
467: if (present[HTML_A_HREF]) {
468: StrAllocCopy(href, value[HTML_A_HREF]);
1.36 frystyk 469: #ifdef OLD_CODE
1.9 timbl 470: HTSimplify(href);
1.36 frystyk 471: #endif
1.9 timbl 472: }
1.8 timbl 473: source = HTAnchor_findChildAndLink(
1.4 timbl 474: me->node_anchor, /* parent */
1.2 timbl 475: present[HTML_A_NAME] ? value[HTML_A_NAME] : 0, /* Tag */
1.9 timbl 476: present[HTML_A_HREF] ? href : 0, /* Addresss */
1.16 timbl 477: present[HTML_A_REL] && value[HTML_A_REL] ?
478: (HTLinkType*)HTAtom_for(value[HTML_A_REL])
1.2 timbl 479: : 0);
480:
481: if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
482: HTParentAnchor * dest =
483: HTAnchor_parent(
484: HTAnchor_followMainLink((HTAnchor*)source)
485: );
486: if (!HTAnchor_title(dest))
487: HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
488: }
489: UPDATE_STYLE;
1.4 timbl 490: HText_beginAnchor(me->text, source);
1.18 frystyk 491: free(href); /* Leak fix Henrik 17/02-94 */
1.2 timbl 492: }
493: break;
494:
495: case HTML_TITLE:
1.4 timbl 496: HTChunkClear(&me->title);
1.2 timbl 497: break;
498:
499: case HTML_NEXTID:
500: /* if (present[NEXTID_N] && value[NEXTID_N])
1.4 timbl 501: HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2 timbl 502: break;
503:
504: case HTML_ISINDEX:
1.4 timbl 505: HTAnchor_setIndex(me->node_anchor);
1.2 timbl 506: break;
507:
1.15 timbl 508: case HTML_BR:
509: UPDATE_STYLE;
510: HText_appendCharacter(me->text, '\n');
511: me->in_word = NO;
512: break;
513:
514: case HTML_HR:
515: UPDATE_STYLE;
516: HText_appendCharacter(me->text, '\n');
1.16 timbl 517: HText_appendText(me->text, "___________________________________");
1.15 timbl 518: HText_appendCharacter(me->text, '\n');
519: me->in_word = NO;
520: break;
521:
1.2 timbl 522: case HTML_P:
523: UPDATE_STYLE;
1.4 timbl 524: HText_appendParagraph(me->text);
525: me->in_word = NO;
1.2 timbl 526: break;
527:
528: case HTML_DL:
1.11 timbl 529: change_paragraph_style(me, present && present[DL_COMPACT]
1.16 timbl 530: ? styles[HTML_DL]
1.2 timbl 531: : styles[HTML_DL]);
532: break;
533:
534: case HTML_DT:
1.4 timbl 535: if (!me->style_change) {
536: HText_appendParagraph(me->text);
537: me->in_word = NO;
1.2 timbl 538: }
539: break;
540:
541: case HTML_DD:
542: UPDATE_STYLE;
1.4 timbl 543: HTML_put_character(me, '\t'); /* Just tab out one stop */
544: me->in_word = NO;
545: break;
1.2 timbl 546:
547: case HTML_UL:
548: case HTML_OL:
549: case HTML_MENU:
550: case HTML_DIR:
1.11 timbl 551: change_paragraph_style(me, styles[element_number]);
1.2 timbl 552: break;
553:
554: case HTML_LI:
555: UPDATE_STYLE;
1.7 timbl 556: if (me->sp[0].tag_number != HTML_DIR)
1.4 timbl 557: HText_appendParagraph(me->text);
1.2 timbl 558: else
1.4 timbl 559: HText_appendCharacter(me->text, '\t'); /* Tab @@ nl for UL? */
560: me->in_word = NO;
1.2 timbl 561: break;
562:
563: case HTML_LISTING: /* Litteral text */
564: case HTML_XMP:
565: case HTML_PLAINTEXT:
566: case HTML_PRE:
1.11 timbl 567: change_paragraph_style(me, styles[element_number]);
1.2 timbl 568: UPDATE_STYLE;
1.4 timbl 569: if (me->comment_end)
570: HText_appendText(me->text, me->comment_end);
1.2 timbl 571: break;
1.11 timbl 572:
1.23 frystyk 573: case HTML_IMG: /* Images */
574: {
575: HTChildAnchor *source;
576: char *src = NULL;
577: if (present[HTML_IMG_SRC]) {
578: StrAllocCopy(src, value[HTML_IMG_SRC]);
1.36 frystyk 579: #ifdef OLD_CODE
1.23 frystyk 580: HTSimplify(src);
1.36 frystyk 581: #endif
1.23 frystyk 582: }
583: source = HTAnchor_findChildAndLink(
584: me->node_anchor, /* parent */
585: 0, /* Tag */
586: src ? src : 0, /* Addresss */
587: 0);
588: UPDATE_STYLE;
589: HText_appendImage(me->text, source,
1.24 frystyk 590: present[HTML_IMG_ALT] ? value[HTML_IMG_ALT] : NULL,
591: present[HTML_IMG_ALIGN] ? value[HTML_IMG_ALIGN] : NULL,
592: present[HTML_IMG_ISMAP] ? YES : NO);
1.23 frystyk 593: free(src);
1.24 frystyk 594: }
595: break;
596:
597: case HTML_HTML: /* Ignore these altogether */
598: case HTML_HEAD:
599: case HTML_BODY:
600:
1.10 timbl 601: case HTML_TT: /* Physical character highlighting */
602: case HTML_B: /* Currently ignored */
603: case HTML_I:
604: case HTML_U:
605:
606: case HTML_EM: /* Logical character highlighting */
607: case HTML_STRONG: /* Currently ignored */
608: case HTML_CODE:
609: case HTML_SAMP:
610: case HTML_KBD:
611: case HTML_VAR:
612: case HTML_DFN:
613: case HTML_CITE:
614: break;
615:
1.11 timbl 616: case HTML_H1: /* paragraph styles */
617: case HTML_H2:
618: case HTML_H3:
619: case HTML_H4:
620: case HTML_H5:
621: case HTML_H6:
622: case HTML_H7:
623: case HTML_ADDRESS:
624: case HTML_BLOCKQUOTE:
625: change_paragraph_style(me, styles[element_number]); /* May be postponed */
1.2 timbl 626: break;
627:
628: } /* end switch */
629:
1.16 timbl 630: if (me->dtd->tags[element_number].contents!= SGML_EMPTY) {
1.13 timbl 631: if (me->sp == me->stack) {
1.12 timbl 632: fprintf(stderr, "HTML: ****** Maximum nesting of %d exceded!\n",
633: MAX_NESTING);
634: return;
635: }
1.4 timbl 636: --(me->sp);
637: me->sp[0].style = me->new_style; /* Stack new style */
638: me->sp[0].tag_number = element_number;
1.10 timbl 639: }
1.1 timbl 640: }
1.10 timbl 641:
1.2 timbl 642:
1.1 timbl 643: /* End Element
1.2 timbl 644: ** -----------
1.1 timbl 645: **
1.2 timbl 646: */
647: /* When we end an element, the style must be returned to that
1.1 timbl 648: ** in effect before that element. Note that anchors (etc?)
649: ** don't have an associated style, so that we must scan down the
650: ** stack for an element with a defined style. (In fact, the styles
651: ** should be linked to the whole stack not just the top one.)
652: ** TBL 921119
1.6 timbl 653: **
654: ** We don't turn on "CAREFUL" check because the parser produces
655: ** (internal code errors apart) good nesting. The parser checks
656: ** incoming code errors, not this module.
1.1 timbl 657: */
1.4 timbl 658: PRIVATE void HTML_end_element ARGS2(HTStructured *, me, int , element_number)
1.1 timbl 659: {
1.2 timbl 660: #ifdef CAREFUL /* parser assumed to produce good nesting */
1.4 timbl 661: if (element_number != me->sp[0].tag_number) {
1.2 timbl 662: fprintf(stderr, "HTMLText: end of element %s when expecting end of %s\n",
1.16 timbl 663: me->dtd->tags[element_number].name,
664: me->dtd->tags[me->sp->tag_number].name);
1.6 timbl 665: /* panic */
1.1 timbl 666: }
1.2 timbl 667: #endif
668:
1.4 timbl 669: me->sp++; /* Pop state off stack */
1.2 timbl 670:
671: switch(element_number) {
672:
673: case HTML_A:
674: UPDATE_STYLE;
1.4 timbl 675: HText_endAnchor(me->text);
1.2 timbl 676: break;
677:
678: case HTML_TITLE:
1.4 timbl 679: HTChunkTerminate(&me->title);
680: HTAnchor_setTitle(me->node_anchor, me->title.data);
1.2 timbl 681: break;
682:
683: case HTML_LISTING: /* Litteral text */
684: case HTML_XMP:
685: case HTML_PLAINTEXT:
686: case HTML_PRE:
1.4 timbl 687: if (me->comment_start)
688: HText_appendText(me->text, me->comment_start);
1.2 timbl 689: /* Fall through */
690:
691: default:
692:
1.11 timbl 693: change_paragraph_style(me, me->sp->style); /* Often won't really change */
1.2 timbl 694: break;
695:
696: } /* switch */
1.1 timbl 697: }
698:
1.2 timbl 699:
700: /* Expanding entities
701: ** ------------------
702: */
703: /* (In fact, they all shrink!)
1.1 timbl 704: */
1.2 timbl 705:
1.4 timbl 706: PRIVATE void HTML_put_entity ARGS2(HTStructured *, me, int, entity_number)
1.1 timbl 707: {
1.4 timbl 708: HTML_put_string(me, ISO_Latin1[entity_number]); /* @@ Other representations */
1.1 timbl 709: }
1.2 timbl 710:
711:
712: /* Free an HTML object
713: ** -------------------
714: **
1.4 timbl 715: ** If the document is empty, the text object will not yet exist.
716: So we could in fact abandon creating the document and return
717: an error code. In fact an empty document is an important type
718: of document, so we don't.
719: **
720: ** If non-interactive, everything is freed off. No: crashes -listrefs
1.2 timbl 721: ** Otherwise, the interactive object is left.
722: */
1.37 frystyk 723: PUBLIC int HTML_free ARGS1(HTStructured *, me)
1.1 timbl 724: {
1.4 timbl 725: UPDATE_STYLE; /* Creates empty document here! */
726: if (me->comment_end)
727: HTML_put_string(me,me->comment_end);
728: HText_endAppend(me->text);
729:
730: if (me->target) {
1.35 duns 731: (*me->targetClass._free)(me->target);
1.2 timbl 732: }
1.19 frystyk 733: HTChunkClear(&me->title); /* Henrik 18/02-94 */
1.4 timbl 734: free(me);
1.37 frystyk 735: return 0;
1.1 timbl 736: }
737:
738:
1.37 frystyk 739: PRIVATE int HTML_abort ARGS2(HTStructured *, me, HTError, e)
1.1 timbl 740:
1.14 timbl 741: {
742: if (me->target) {
743: (*me->targetClass.abort)(me->target, e);
744: }
1.19 frystyk 745: HTChunkClear(&me->title); /* Henrik 18/02-94 */
1.14 timbl 746: free(me);
1.37 frystyk 747: return EOF;
1.1 timbl 748: }
749:
1.2 timbl 750:
751: /* Get Styles from style sheet
752: ** ---------------------------
753: */
754: PRIVATE void get_styles NOARGS
1.1 timbl 755: {
1.2 timbl 756: got_styles = YES;
757:
758: default_style = HTStyleNamed(styleSheet, "Normal");
1.1 timbl 759:
1.2 timbl 760: styles[HTML_H1] = HTStyleNamed(styleSheet, "Heading1");
761: styles[HTML_H2] = HTStyleNamed(styleSheet, "Heading2");
762: styles[HTML_H3] = HTStyleNamed(styleSheet, "Heading3");
763: styles[HTML_H4] = HTStyleNamed(styleSheet, "Heading4");
764: styles[HTML_H5] = HTStyleNamed(styleSheet, "Heading5");
765: styles[HTML_H6] = HTStyleNamed(styleSheet, "Heading6");
766: styles[HTML_H7] = HTStyleNamed(styleSheet, "Heading7");
767:
768: styles[HTML_DL] = HTStyleNamed(styleSheet, "Glossary");
769: styles[HTML_UL] =
770: styles[HTML_OL] = HTStyleNamed(styleSheet, "List");
771: styles[HTML_MENU] = HTStyleNamed(styleSheet, "Menu");
772: styles[HTML_DIR] = HTStyleNamed(styleSheet, "Dir");
1.16 timbl 773: /* styles[HTML_DLC] = HTStyleNamed(styleSheet, "GlossaryCompact"); */
1.2 timbl 774: styles[HTML_ADDRESS]= HTStyleNamed(styleSheet, "Address");
775: styles[HTML_BLOCKQUOTE]= HTStyleNamed(styleSheet, "BlockQuote");
776: styles[HTML_PLAINTEXT] =
777: styles[HTML_XMP] = HTStyleNamed(styleSheet, "Example");
778: styles[HTML_PRE] = HTStyleNamed(styleSheet, "Preformatted");
779: styles[HTML_LISTING] = HTStyleNamed(styleSheet, "Listing");
780: }
781: /* P U B L I C
782: */
783:
784: /* Structured Object Class
785: ** -----------------------
786: */
787: PUBLIC CONST HTStructuredClass HTMLPresentation = /* As opposed to print etc */
788: {
789: "text/html",
790: HTML_free,
1.14 timbl 791: HTML_abort,
1.2 timbl 792: HTML_put_character, HTML_put_string, HTML_write,
793: HTML_start_element, HTML_end_element,
794: HTML_put_entity
795: };
1.1 timbl 796:
1.4 timbl 797:
1.2 timbl 798: /* New Structured Text object
799: ** --------------------------
800: **
1.16 timbl 801: ** The structured stream can generate either presentation,
1.4 timbl 802: ** or plain text, or HTML.
1.1 timbl 803: */
1.16 timbl 804: PUBLIC HTStructured* HTML_new ARGS5(
805: HTRequest *, request,
806: void *, param,
807: HTFormat, input_format,
808: HTFormat, output_format,
809: HTStream *, output_stream)
1.1 timbl 810: {
811:
1.4 timbl 812: HTStructured * me;
813:
1.16 timbl 814: if (output_format != WWW_PLAINTEXT
815: && output_format != WWW_PRESENT
816: && output_format != HTAtom_for("text/x-c")) {
1.37 frystyk 817: HTStream * intermediate = HTStreamStack(WWW_HTML, output_format,
818: output_stream, request, NO);
1.6 timbl 819: if (intermediate) return HTMLGenerator(intermediate);
1.4 timbl 820: fprintf(stderr, "** Internal error: can't parse HTML to %s\n",
1.16 timbl 821: HTAtom_name(output_format));
1.4 timbl 822: exit (-99);
823: }
824:
825: me = (HTStructured*) malloc(sizeof(*me));
826: if (me == NULL) outofmem(__FILE__, "HTML_new");
1.1 timbl 827:
828: if (!got_styles) get_styles();
829:
1.4 timbl 830: me->isa = &HTMLPresentation;
1.16 timbl 831: me->dtd = &DTD;
832: me->node_anchor = request->anchor;
1.4 timbl 833: me->title.size = 0;
834: me->title.growby = 128;
835: me->title.allocated = 0;
836: me->title.data = 0;
837: me->text = 0;
838: me->style_change = YES; /* Force check leading to text creation */
839: me->new_style = default_style;
840: me->old_style = 0;
841: me->sp = me->stack + MAX_NESTING - 1;
842: me->sp->tag_number = -1; /* INVALID */
843: me->sp->style = default_style; /* INVALID */
1.1 timbl 844:
1.4 timbl 845: me->comment_start = NULL;
846: me->comment_end = NULL;
1.16 timbl 847: me->target = output_stream;
848: if (output_stream) me->targetClass = *output_stream->isa; /* Copy pointers */
1.1 timbl 849:
1.4 timbl 850: return (HTStructured*) me;
1.1 timbl 851: }
852:
853:
1.2 timbl 854: /* HTConverter for HTML to plain text
855: ** ----------------------------------
1.1 timbl 856: **
1.2 timbl 857: ** This will convert from HTML to presentation or plain text.
1.1 timbl 858: */
1.16 timbl 859: PUBLIC HTStream* HTMLToPlain ARGS5(
860: HTRequest *, request,
861: void *, param,
862: HTFormat, input_format,
863: HTFormat, output_format,
864: HTStream *, output_stream)
1.1 timbl 865: {
1.16 timbl 866: return SGML_new(&DTD, HTML_new(
867: request, NULL, input_format, output_format, output_stream));
1.1 timbl 868: }
869:
870:
1.2 timbl 871: /* HTConverter for HTML to C code
872: ** ------------------------------
873: **
1.36 frystyk 874: ** C code is like plain text but all non-preformatted code
1.2 timbl 875: ** is commented out.
876: ** This will convert from HTML to presentation or plain text.
877: */
1.16 timbl 878: PUBLIC HTStream* HTMLToC ARGS5(
879: HTRequest *, request,
880: void *, param,
881: HTFormat, input_format,
882: HTFormat, output_format,
883: HTStream *, output_stream)
1.1 timbl 884: {
1.4 timbl 885:
886: HTStructured * html;
887:
1.36 frystyk 888: (*output_stream->isa->put_string)(output_stream, "/* "); /* Before title */
1.16 timbl 889: html = HTML_new(request, NULL, input_format, output_format, output_stream);
1.2 timbl 890: html->comment_start = "/* ";
1.16 timbl 891: html->dtd = &DTD;
1.2 timbl 892: html->comment_end = " */\n"; /* Must start in col 1 for cpp */
1.16 timbl 893: return SGML_new(&DTD, html);
1.1 timbl 894: }
895:
896:
1.2 timbl 897: /* Presenter for HTML
898: ** ------------------
899: **
900: ** This will convert from HTML to presentation or plain text.
901: **
902: ** Override this if you have a windows version
1.1 timbl 903: */
1.2 timbl 904: #ifndef GUI
1.16 timbl 905: PUBLIC HTStream* HTMLPresent ARGS5(
906: HTRequest *, request,
907: void *, param,
908: HTFormat, input_format,
909: HTFormat, output_format,
910: HTStream *, output_stream)
1.1 timbl 911: {
1.16 timbl 912: return SGML_new(&DTD, HTML_new(
913: request, NULL, input_format, output_format, output_stream));
1.1 timbl 914: }
1.2 timbl 915: #endif
1.1 timbl 916:
917:
1.2 timbl 918: /* Record error message as a hypertext object
919: ** ------------------------------------------
920: **
921: ** The error message should be marked as an error so that
922: ** it can be reloaded later.
923: ** This implementation just throws up an error message
924: ** and leaves the document unloaded.
1.9 timbl 925: ** A smarter implementation would load an error document,
926: ** marking at such so that it is retried on reload.
1.1 timbl 927: **
1.2 timbl 928: ** On entry,
929: ** sink is a stream to the output device if any
930: ** number is the HTTP error number
931: ** message is the human readable message.
1.9 timbl 932: **
933: ** On exit,
934: ** returns a negative number to indicate lack of success in the load.
1.1 timbl 935: */
1.2 timbl 936:
937: PUBLIC int HTLoadError ARGS3(
1.17 luotonen 938: HTRequest *, req,
1.2 timbl 939: int, number,
940: CONST char *, message)
941: {
1.20 frystyk 942: char *err = "Oh I screwed up!"; /* Dummy pointer not used (I hope) */
1.2 timbl 943: HTAlert(message); /* @@@@@@@@@@@@@@@@@@@ */
1.20 frystyk 944: /* Clean up! Henrik 04/03-94 */
945: if (req && req->output_stream)
946: (*req->output_stream->isa->abort)(req->output_stream, err);
1.33 frystyk 947: #if OLD_CODE
1.25 luotonen 948: HTClearErrors(req);
1.33 frystyk 949: #endif
1.2 timbl 950: return -number;
951: }
1.29 frystyk 952:
Webmaster