Annotation of libwww/Library/src/HTML.c, revision 1.42
1.39 frystyk 1: /* HTML.c
2: ** STRUCTURED STREAM TO RICH HYPERTEXT CONVERTER
3: **
4: ** (c) COPYRIGHT CERN 1994.
5: ** Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
1.2 timbl 7: ** This generates of a hypertext object. It converts from the
8: ** structured stream interface fro HTMl events into the style-
9: ** oriented iunterface of the HText.h interface. This module is
10: ** only used in clients and shouldnot be linked into servers.
1.1 timbl 11: **
1.6 timbl 12: ** Override this module if making a new GUI browser.
1.1 timbl 13: **
1.35 duns 14: ** HISTORY:
15: ** 8 Jul 94 FM Insulate free() from _free structure element.
16: **
1.1 timbl 17: */
1.16 timbl 18:
1.41 frystyk 19: /* Library include files */
20: #include "tcp.h"
21: #include "HTUtils.h"
22: #include "HTString.h"
1.1 timbl 23: #include "HTAtom.h"
24: #include "HTChunk.h"
25: #include "HText.h"
26: #include "HTStyle.h"
1.3 timbl 27: #include "HTAlert.h"
1.4 timbl 28: #include "HTMLGen.h"
1.8 timbl 29: #include "HTParse.h"
1.41 frystyk 30: #include "HTML.h"
1.1 timbl 31:
32: extern HTStyleSheet * styleSheet; /* Application-wide */
33:
34: /* Module-wide style cache
35: */
36: PRIVATE int got_styles = 0;
1.16 timbl 37: PRIVATE HTStyle *styles[HTMLP_ELEMENTS];
1.2 timbl 38: PRIVATE HTStyle *default_style;
1.1 timbl 39:
40:
41: /* HTML Object
42: ** -----------
43: */
1.2 timbl 44: #define MAX_NESTING 20 /* Should be checked by parser */
45:
46: typedef struct _stack_element {
47: HTStyle * style;
48: int tag_number;
49: } stack_element;
50:
51: struct _HTStructured {
52: CONST HTStructuredClass * isa;
53: HTParentAnchor * node_anchor;
54: HText * text;
55:
56: HTStream* target; /* Output stream */
57: HTStreamClass targetClass; /* Output routines */
58:
59: HTChunk title; /* Grow by 128 */
60:
61: char * comment_start; /* for literate programming */
62: char * comment_end;
1.16 timbl 63:
64: CONST SGML_dtd* dtd;
65:
1.2 timbl 66: HTTag * current_tag;
67: BOOL style_change;
68: HTStyle * new_style;
69: HTStyle * old_style;
70: BOOL in_word; /* Have just had a non-white char */
71: stack_element stack[MAX_NESTING];
72: stack_element *sp; /* Style stack pointer */
1.1 timbl 73: };
74:
1.2 timbl 75: struct _HTStream {
76: CONST HTStreamClass * isa;
77: /* .... */
78: };
1.1 timbl 79:
80: /* Forward declarations of routines
81: */
82: PRIVATE void get_styles NOPARAMS;
83:
84:
1.4 timbl 85: PRIVATE void actually_set_style PARAMS((HTStructured * me));
1.11 timbl 86: PRIVATE void change_paragraph_style PARAMS((HTStructured * me, HTStyle * style));
1.1 timbl 87:
88: /* Style buffering avoids dummy paragraph begin/ends.
89: */
1.4 timbl 90: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1 timbl 91:
92:
1.2 timbl 93: #ifdef OLD_CODE
1.1 timbl 94: /* The following accented characters are from peter Flynn, curia project */
95:
96: /* these ifdefs don't solve the problem of a simple terminal emulator
97: ** with a different character set to the client machine. But nothing does,
98: ** except looking at the TERM setting */
99:
1.2 timbl 100:
1.1 timbl 101: { "ocus" , "&" }, /* for CURIA */
102: #ifdef IBMPC
103: { "aacute" , "\240" }, /* For PC display */
104: { "eacute" , "\202" },
105: { "iacute" , "\241" },
106: { "oacute" , "\242" },
107: { "uacute" , "\243" },
108: { "Aacute" , "\101" },
109: { "Eacute" , "\220" },
110: { "Iacute" , "\111" },
111: { "Oacute" , "\117" },
112: { "Uacute" , "\125" },
113: #else
114: { "aacute" , "\341" }, /* Works for openwindows -- Peter Flynn */
115: { "eacute" , "\351" },
116: { "iacute" , "\355" },
117: { "oacute" , "\363" },
118: { "uacute" , "\372" },
119: { "Aacute" , "\301" },
120: { "Eacute" , "\310" },
121: { "Iacute" , "\315" },
122: { "Oacute" , "\323" },
123: { "Uacute" , "\332" },
124: #endif
125: { 0, 0 } /* Terminate list */
126: };
1.2 timbl 127: #endif
1.1 timbl 128:
129:
1.2 timbl 130: /* Entity values -- for ISO Latin 1 local representation
131: **
132: ** This MUST match exactly the table referred to in the DTD!
133: */
134: static char * ISO_Latin1[] = {
135: "\306", /* capital AE diphthong (ligature) */
136: "\301", /* capital A, acute accent */
137: "\302", /* capital A, circumflex accent */
138: "\300", /* capital A, grave accent */
139: "\305", /* capital A, ring */
140: "\303", /* capital A, tilde */
141: "\304", /* capital A, dieresis or umlaut mark */
142: "\307", /* capital C, cedilla */
143: "\320", /* capital Eth, Icelandic */
144: "\311", /* capital E, acute accent */
145: "\312", /* capital E, circumflex accent */
146: "\310", /* capital E, grave accent */
147: "\313", /* capital E, dieresis or umlaut mark */
148: "\315", /* capital I, acute accent */
149: "\316", /* capital I, circumflex accent */
150: "\314", /* capital I, grave accent */
151: "\317", /* capital I, dieresis or umlaut mark */
152: "\321", /* capital N, tilde */
153: "\323", /* capital O, acute accent */
154: "\324", /* capital O, circumflex accent */
155: "\322", /* capital O, grave accent */
156: "\330", /* capital O, slash */
157: "\325", /* capital O, tilde */
158: "\326", /* capital O, dieresis or umlaut mark */
159: "\336", /* capital THORN, Icelandic */
160: "\332", /* capital U, acute accent */
161: "\333", /* capital U, circumflex accent */
162: "\331", /* capital U, grave accent */
163: "\334", /* capital U, dieresis or umlaut mark */
164: "\335", /* capital Y, acute accent */
165: "\341", /* small a, acute accent */
166: "\342", /* small a, circumflex accent */
167: "\346", /* small ae diphthong (ligature) */
168: "\340", /* small a, grave accent */
169: "\046", /* ampersand */
170: "\345", /* small a, ring */
171: "\343", /* small a, tilde */
172: "\344", /* small a, dieresis or umlaut mark */
173: "\347", /* small c, cedilla */
174: "\351", /* small e, acute accent */
175: "\352", /* small e, circumflex accent */
176: "\350", /* small e, grave accent */
177: "\360", /* small eth, Icelandic */
178: "\353", /* small e, dieresis or umlaut mark */
179: "\076", /* greater than */
180: "\355", /* small i, acute accent */
181: "\356", /* small i, circumflex accent */
182: "\354", /* small i, grave accent */
183: "\357", /* small i, dieresis or umlaut mark */
184: "\074", /* less than */
185: "\361", /* small n, tilde */
186: "\363", /* small o, acute accent */
187: "\364", /* small o, circumflex accent */
188: "\362", /* small o, grave accent */
189: "\370", /* small o, slash */
190: "\365", /* small o, tilde */
191: "\366", /* small o, dieresis or umlaut mark */
1.36 frystyk 192: "\042", /* double quote sign - June 94 */
1.2 timbl 193: "\337", /* small sharp s, German (sz ligature) */
194: "\376", /* small thorn, Icelandic */
195: "\372", /* small u, acute accent */
196: "\373", /* small u, circumflex accent */
197: "\371", /* small u, grave accent */
198: "\374", /* small u, dieresis or umlaut mark */
199: "\375", /* small y, acute accent */
200: "\377", /* small y, dieresis or umlaut mark */
1.1 timbl 201: };
202:
1.2 timbl 203:
204: /* Entity values -- for NeXT local representation
205: **
206: ** This MUST match exactly the table referred to in the DTD!
207: **
208: */
209: static char * NeXTCharacters[] = {
210: "\341", /* capital AE diphthong (ligature) */
211: "\202", /* capital A, acute accent */
212: "\203", /* capital A, circumflex accent */
213: "\201", /* capital A, grave accent */
214: "\206", /* capital A, ring */
215: "\204", /* capital A, tilde */
216: "\205", /* capital A, dieresis or umlaut mark */
217: "\207", /* capital C, cedilla */
218: "\220", /* capital Eth, Icelandic */
219: "\211", /* capital E, acute accent */
220: "\212", /* capital E, circumflex accent */
221: "\210", /* capital E, grave accent */
222: "\213", /* capital E, dieresis or umlaut mark */
223: "\215", /* capital I, acute accent */
224: "\216", /* capital I, circumflex accent these are */
225: "\214", /* capital I, grave accent ISO -100 hex */
226: "\217", /* capital I, dieresis or umlaut mark */
227: "\221", /* capital N, tilde */
228: "\223", /* capital O, acute accent */
229: "\224", /* capital O, circumflex accent */
230: "\222", /* capital O, grave accent */
231: "\351", /* capital O, slash 'cept this */
232: "\225", /* capital O, tilde */
233: "\226", /* capital O, dieresis or umlaut mark */
234: "\234", /* capital THORN, Icelandic */
235: "\230", /* capital U, acute accent */
236: "\231", /* capital U, circumflex accent */
237: "\227", /* capital U, grave accent */
238: "\232", /* capital U, dieresis or umlaut mark */
239: "\233", /* capital Y, acute accent */
240: "\326", /* small a, acute accent */
241: "\327", /* small a, circumflex accent */
242: "\361", /* small ae diphthong (ligature) */
243: "\325", /* small a, grave accent */
244: "\046", /* ampersand */
245: "\332", /* small a, ring */
246: "\330", /* small a, tilde */
247: "\331", /* small a, dieresis or umlaut mark */
248: "\333", /* small c, cedilla */
249: "\335", /* small e, acute accent */
250: "\336", /* small e, circumflex accent */
251: "\334", /* small e, grave accent */
252: "\346", /* small eth, Icelandic */
253: "\337", /* small e, dieresis or umlaut mark */
254: "\076", /* greater than */
255: "\342", /* small i, acute accent */
256: "\344", /* small i, circumflex accent */
257: "\340", /* small i, grave accent */
258: "\345", /* small i, dieresis or umlaut mark */
259: "\074", /* less than */
260: "\347", /* small n, tilde */
261: "\355", /* small o, acute accent */
262: "\356", /* small o, circumflex accent */
263: "\354", /* small o, grave accent */
264: "\371", /* small o, slash */
265: "\357", /* small o, tilde */
266: "\360", /* small o, dieresis or umlaut mark */
1.36 frystyk 267: "\042", /* double quote sign - June 94 */
1.2 timbl 268: "\373", /* small sharp s, German (sz ligature) */
269: "\374", /* small thorn, Icelandic */
270: "\363", /* small u, acute accent */
271: "\364", /* small u, circumflex accent */
272: "\362", /* small u, grave accent */
273: "\366", /* small u, dieresis or umlaut mark */
274: "\367", /* small y, acute accent */
275: "\375", /* small y, dieresis or umlaut mark */
1.1 timbl 276: };
277:
1.2 timbl 278: /* Entity values -- for IBM/PC Code Page 850 (International)
279: **
280: ** This MUST match exactly the table referred to in the DTD!
281: **
282: */
283: /* @@@@@@@@@@@@@@@@@ TBD */
284:
285:
286:
287: /* Set character set
288: ** ----------------
289: */
290:
291: PRIVATE char** p_entity_values = ISO_Latin1; /* Pointer to translation */
1.1 timbl 292:
1.2 timbl 293: PUBLIC void HTMLUseCharacterSet ARGS1(HTMLCharacterSet, i)
294: {
295: p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
296: : ISO_Latin1;
297: }
1.1 timbl 298:
299:
300: /* Flattening the style structure
301: ** ------------------------------
302: **
303: On the NeXT, and on any read-only browser, it is simpler for the text to have
304: a sequence of styles, rather than a nested tree of styles. In this
305: case we have to flatten the structure as it arrives from SGML tags into
306: a sequence of styles.
307: */
308:
309: /* If style really needs to be set, call this
310: */
1.4 timbl 311: PRIVATE void actually_set_style ARGS1(HTStructured *, me)
1.1 timbl 312: {
1.4 timbl 313: if (!me->text) { /* First time through */
314: me->text = HText_new2(me->node_anchor, me->target);
315: HText_beginAppend(me->text);
316: HText_setStyle(me->text, me->new_style);
317: me->in_word = NO;
1.1 timbl 318: } else {
1.4 timbl 319: HText_setStyle(me->text, me->new_style);
1.1 timbl 320: }
1.4 timbl 321: me->old_style = me->new_style;
322: me->style_change = NO;
1.1 timbl 323: }
324:
325: /* If you THINK you need to change style, call this
326: */
327:
1.11 timbl 328: PRIVATE void change_paragraph_style ARGS2(HTStructured *, me, HTStyle *,style)
1.1 timbl 329: {
1.4 timbl 330: if (me->new_style!=style) {
331: me->style_change = YES;
332: me->new_style = style;
1.1 timbl 333: }
1.11 timbl 334: me->in_word = NO;
1.1 timbl 335: }
336:
1.2 timbl 337: /*_________________________________________________________________________
338: **
339: ** A C T I O N R O U T I N E S
340: */
341:
342: /* Character handling
343: ** ------------------
1.1 timbl 344: */
1.42 ! frystyk 345: PRIVATE int HTML_put_character ARGS2(HTStructured *, me, char, c)
1.1 timbl 346: {
1.2 timbl 347:
1.4 timbl 348: switch (me->sp[0].tag_number) {
1.2 timbl 349: case HTML_COMMENT:
350: break; /* Do Nothing */
351:
352: case HTML_TITLE:
1.4 timbl 353: HTChunkPutc(&me->title, c);
1.2 timbl 354: break;
355:
356:
357: case HTML_LISTING: /* Litteral text */
358: case HTML_XMP:
359: case HTML_PLAINTEXT:
360: case HTML_PRE:
361: /* We guarrantee that the style is up-to-date in begin_litteral
362: */
1.4 timbl 363: HText_appendCharacter(me->text, c);
1.2 timbl 364: break;
365:
366: default: /* Free format text */
1.4 timbl 367: if (me->style_change) {
1.42 ! frystyk 368: if ((c=='\n') || (c==' ')) return HT_OK; /* Ignore it */
1.2 timbl 369: UPDATE_STYLE;
370: }
371: if (c=='\n') {
1.4 timbl 372: if (me->in_word) {
373: HText_appendCharacter(me->text, ' ');
374: me->in_word = NO;
1.2 timbl 375: }
376: } else {
1.4 timbl 377: HText_appendCharacter(me->text, c);
378: me->in_word = YES;
1.2 timbl 379: }
380: } /* end switch */
1.42 ! frystyk 381: return HT_OK;
1.1 timbl 382: }
383:
1.2 timbl 384:
385:
386: /* String handling
387: ** ---------------
388: **
389: ** This is written separately from put_character becuase the loop can
1.11 timbl 390: ** in some cases be promoted to a higher function call level for speed.
1.2 timbl 391: */
1.42 ! frystyk 392: PRIVATE int HTML_put_string ARGS2(HTStructured *, me, CONST char*, s)
1.1 timbl 393: {
1.2 timbl 394:
1.4 timbl 395: switch (me->sp[0].tag_number) {
1.2 timbl 396: case HTML_COMMENT:
397: break; /* Do Nothing */
398:
399: case HTML_TITLE:
1.4 timbl 400: HTChunkPuts(&me->title, s);
1.2 timbl 401: break;
402:
403:
404: case HTML_LISTING: /* Litteral text */
405: case HTML_XMP:
406: case HTML_PLAINTEXT:
407: case HTML_PRE:
408:
409: /* We guarrantee that the style is up-to-date in begin_litteral
410: */
1.4 timbl 411: HText_appendText(me->text, s);
1.2 timbl 412: break;
413:
414: default: /* Free format text */
415: {
416: CONST char *p = s;
1.4 timbl 417: if (me->style_change) {
1.2 timbl 418: for (; *p && ((*p=='\n') || (*p==' ')); p++) ; /* Ignore leaders */
1.42 ! frystyk 419: if (!*p) return HT_OK;
1.2 timbl 420: UPDATE_STYLE;
421: }
422: for(; *p; p++) {
1.4 timbl 423: if (me->style_change) {
1.2 timbl 424: if ((*p=='\n') || (*p==' ')) continue; /* Ignore it */
425: UPDATE_STYLE;
426: }
427: if (*p=='\n') {
1.4 timbl 428: if (me->in_word) {
429: HText_appendCharacter(me->text, ' ');
430: me->in_word = NO;
1.2 timbl 431: }
432: } else {
1.4 timbl 433: HText_appendCharacter(me->text, *p);
434: me->in_word = YES;
1.2 timbl 435: }
436: } /* for */
437: }
438: } /* end switch */
1.42 ! frystyk 439: return HT_OK;
1.1 timbl 440: }
441:
442:
1.2 timbl 443: /* Buffer write
1.3 timbl 444: ** ------------
1.1 timbl 445: */
1.42 ! frystyk 446: PRIVATE int HTML_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
1.1 timbl 447: {
1.38 frystyk 448: while (l-- > 0)
449: HTML_put_character(me, *s++);
1.42 ! frystyk 450: return HT_OK;
1.1 timbl 451: }
1.2 timbl 452:
453:
454: /* Start Element
455: ** -------------
456: */
457: PRIVATE void HTML_start_element ARGS4(
1.4 timbl 458: HTStructured *, me,
1.16 timbl 459: int, element_number,
1.3 timbl 460: CONST BOOL*, present,
1.16 timbl 461: CONST char **, value)
1.2 timbl 462: {
463: switch (element_number) {
464: case HTML_A:
465: {
1.8 timbl 466: HTChildAnchor * source;
1.9 timbl 467: char * href = NULL;
1.42 ! frystyk 468: if (present[HTML_A_HREF])
1.9 timbl 469: StrAllocCopy(href, value[HTML_A_HREF]);
1.8 timbl 470: source = HTAnchor_findChildAndLink(
1.4 timbl 471: me->node_anchor, /* parent */
1.2 timbl 472: present[HTML_A_NAME] ? value[HTML_A_NAME] : 0, /* Tag */
1.9 timbl 473: present[HTML_A_HREF] ? href : 0, /* Addresss */
1.16 timbl 474: present[HTML_A_REL] && value[HTML_A_REL] ?
475: (HTLinkType*)HTAtom_for(value[HTML_A_REL])
1.2 timbl 476: : 0);
477:
478: if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
479: HTParentAnchor * dest =
480: HTAnchor_parent(
481: HTAnchor_followMainLink((HTAnchor*)source)
482: );
483: if (!HTAnchor_title(dest))
484: HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
485: }
486: UPDATE_STYLE;
1.4 timbl 487: HText_beginAnchor(me->text, source);
1.42 ! frystyk 488: FREE(href); /* Leak fix Henrik 17/02-94 */
1.2 timbl 489: }
490: break;
491:
492: case HTML_TITLE:
1.4 timbl 493: HTChunkClear(&me->title);
1.2 timbl 494: break;
495:
496: case HTML_NEXTID:
497: /* if (present[NEXTID_N] && value[NEXTID_N])
1.4 timbl 498: HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2 timbl 499: break;
500:
501: case HTML_ISINDEX:
1.4 timbl 502: HTAnchor_setIndex(me->node_anchor);
1.2 timbl 503: break;
504:
1.15 timbl 505: case HTML_BR:
506: UPDATE_STYLE;
507: HText_appendCharacter(me->text, '\n');
508: me->in_word = NO;
509: break;
510:
511: case HTML_HR:
512: UPDATE_STYLE;
513: HText_appendCharacter(me->text, '\n');
1.16 timbl 514: HText_appendText(me->text, "___________________________________");
1.15 timbl 515: HText_appendCharacter(me->text, '\n');
516: me->in_word = NO;
517: break;
518:
1.2 timbl 519: case HTML_P:
520: UPDATE_STYLE;
1.4 timbl 521: HText_appendParagraph(me->text);
522: me->in_word = NO;
1.2 timbl 523: break;
524:
525: case HTML_DL:
1.11 timbl 526: change_paragraph_style(me, present && present[DL_COMPACT]
1.16 timbl 527: ? styles[HTML_DL]
1.2 timbl 528: : styles[HTML_DL]);
529: break;
530:
531: case HTML_DT:
1.4 timbl 532: if (!me->style_change) {
533: HText_appendParagraph(me->text);
534: me->in_word = NO;
1.2 timbl 535: }
536: break;
537:
538: case HTML_DD:
539: UPDATE_STYLE;
1.4 timbl 540: HTML_put_character(me, '\t'); /* Just tab out one stop */
541: me->in_word = NO;
542: break;
1.2 timbl 543:
544: case HTML_UL:
545: case HTML_OL:
546: case HTML_MENU:
547: case HTML_DIR:
1.11 timbl 548: change_paragraph_style(me, styles[element_number]);
1.2 timbl 549: break;
550:
551: case HTML_LI:
552: UPDATE_STYLE;
1.7 timbl 553: if (me->sp[0].tag_number != HTML_DIR)
1.4 timbl 554: HText_appendParagraph(me->text);
1.2 timbl 555: else
1.4 timbl 556: HText_appendCharacter(me->text, '\t'); /* Tab @@ nl for UL? */
557: me->in_word = NO;
1.2 timbl 558: break;
559:
560: case HTML_LISTING: /* Litteral text */
561: case HTML_XMP:
562: case HTML_PLAINTEXT:
563: case HTML_PRE:
1.11 timbl 564: change_paragraph_style(me, styles[element_number]);
1.2 timbl 565: UPDATE_STYLE;
1.4 timbl 566: if (me->comment_end)
567: HText_appendText(me->text, me->comment_end);
1.2 timbl 568: break;
1.11 timbl 569:
1.23 frystyk 570: case HTML_IMG: /* Images */
571: {
572: HTChildAnchor *source;
573: char *src = NULL;
574: if (present[HTML_IMG_SRC]) {
575: StrAllocCopy(src, value[HTML_IMG_SRC]);
1.36 frystyk 576: #ifdef OLD_CODE
1.23 frystyk 577: HTSimplify(src);
1.36 frystyk 578: #endif
1.23 frystyk 579: }
580: source = HTAnchor_findChildAndLink(
581: me->node_anchor, /* parent */
582: 0, /* Tag */
583: src ? src : 0, /* Addresss */
584: 0);
585: UPDATE_STYLE;
586: HText_appendImage(me->text, source,
1.24 frystyk 587: present[HTML_IMG_ALT] ? value[HTML_IMG_ALT] : NULL,
588: present[HTML_IMG_ALIGN] ? value[HTML_IMG_ALIGN] : NULL,
589: present[HTML_IMG_ISMAP] ? YES : NO);
1.23 frystyk 590: free(src);
1.24 frystyk 591: }
592: break;
593:
594: case HTML_HTML: /* Ignore these altogether */
595: case HTML_HEAD:
596: case HTML_BODY:
597:
1.10 timbl 598: case HTML_TT: /* Physical character highlighting */
599: case HTML_B: /* Currently ignored */
600: case HTML_I:
601: case HTML_U:
602:
603: case HTML_EM: /* Logical character highlighting */
604: case HTML_STRONG: /* Currently ignored */
605: case HTML_CODE:
606: case HTML_SAMP:
607: case HTML_KBD:
608: case HTML_VAR:
609: case HTML_DFN:
610: case HTML_CITE:
611: break;
612:
1.11 timbl 613: case HTML_H1: /* paragraph styles */
614: case HTML_H2:
615: case HTML_H3:
616: case HTML_H4:
617: case HTML_H5:
618: case HTML_H6:
619: case HTML_H7:
620: case HTML_ADDRESS:
621: case HTML_BLOCKQUOTE:
622: change_paragraph_style(me, styles[element_number]); /* May be postponed */
1.2 timbl 623: break;
624:
625: } /* end switch */
626:
1.16 timbl 627: if (me->dtd->tags[element_number].contents!= SGML_EMPTY) {
1.13 timbl 628: if (me->sp == me->stack) {
1.41 frystyk 629: fprintf(TDEST, "HTML: ****** Maximum nesting of %d exceded!\n",
1.12 timbl 630: MAX_NESTING);
631: return;
632: }
1.4 timbl 633: --(me->sp);
634: me->sp[0].style = me->new_style; /* Stack new style */
635: me->sp[0].tag_number = element_number;
1.10 timbl 636: }
1.1 timbl 637: }
1.10 timbl 638:
1.2 timbl 639:
1.1 timbl 640: /* End Element
1.2 timbl 641: ** -----------
1.1 timbl 642: **
1.2 timbl 643: */
644: /* When we end an element, the style must be returned to that
1.1 timbl 645: ** in effect before that element. Note that anchors (etc?)
646: ** don't have an associated style, so that we must scan down the
647: ** stack for an element with a defined style. (In fact, the styles
648: ** should be linked to the whole stack not just the top one.)
649: ** TBL 921119
1.6 timbl 650: **
651: ** We don't turn on "CAREFUL" check because the parser produces
652: ** (internal code errors apart) good nesting. The parser checks
653: ** incoming code errors, not this module.
1.1 timbl 654: */
1.4 timbl 655: PRIVATE void HTML_end_element ARGS2(HTStructured *, me, int , element_number)
1.1 timbl 656: {
1.2 timbl 657: #ifdef CAREFUL /* parser assumed to produce good nesting */
1.4 timbl 658: if (element_number != me->sp[0].tag_number) {
1.41 frystyk 659: fprintf(TDEST, "HTMLText: end of element %s when expecting end of %s\n",
1.16 timbl 660: me->dtd->tags[element_number].name,
661: me->dtd->tags[me->sp->tag_number].name);
1.6 timbl 662: /* panic */
1.1 timbl 663: }
1.2 timbl 664: #endif
665:
1.4 timbl 666: me->sp++; /* Pop state off stack */
1.2 timbl 667:
668: switch(element_number) {
669:
670: case HTML_A:
671: UPDATE_STYLE;
1.4 timbl 672: HText_endAnchor(me->text);
1.2 timbl 673: break;
674:
675: case HTML_TITLE:
1.4 timbl 676: HTChunkTerminate(&me->title);
677: HTAnchor_setTitle(me->node_anchor, me->title.data);
1.2 timbl 678: break;
679:
680: case HTML_LISTING: /* Litteral text */
681: case HTML_XMP:
682: case HTML_PLAINTEXT:
683: case HTML_PRE:
1.4 timbl 684: if (me->comment_start)
685: HText_appendText(me->text, me->comment_start);
1.2 timbl 686: /* Fall through */
687:
688: default:
689:
1.11 timbl 690: change_paragraph_style(me, me->sp->style); /* Often won't really change */
1.2 timbl 691: break;
692:
693: } /* switch */
1.1 timbl 694: }
695:
1.2 timbl 696:
697: /* Expanding entities
698: ** ------------------
699: */
700: /* (In fact, they all shrink!)
1.1 timbl 701: */
1.2 timbl 702:
1.4 timbl 703: PRIVATE void HTML_put_entity ARGS2(HTStructured *, me, int, entity_number)
1.1 timbl 704: {
1.4 timbl 705: HTML_put_string(me, ISO_Latin1[entity_number]); /* @@ Other representations */
1.1 timbl 706: }
1.2 timbl 707:
1.42 ! frystyk 708: /* Flush an HTML object
! 709: ** --------------------
! 710: */
! 711: PUBLIC int HTML_flush ARGS1(HTStructured *, me)
! 712: {
! 713: UPDATE_STYLE; /* Creates empty document here! */
! 714: if (me->comment_end)
! 715: HTML_put_string(me,me->comment_end);
! 716: HText_endAppend(me->text);
! 717: return (*me->targetClass.flush)(me->target);
! 718: }
1.2 timbl 719:
720: /* Free an HTML object
721: ** -------------------
722: **
1.4 timbl 723: ** If the document is empty, the text object will not yet exist.
724: So we could in fact abandon creating the document and return
725: an error code. In fact an empty document is an important type
726: of document, so we don't.
727: **
728: ** If non-interactive, everything is freed off. No: crashes -listrefs
1.2 timbl 729: ** Otherwise, the interactive object is left.
730: */
1.37 frystyk 731: PUBLIC int HTML_free ARGS1(HTStructured *, me)
1.1 timbl 732: {
1.4 timbl 733: UPDATE_STYLE; /* Creates empty document here! */
734: if (me->comment_end)
735: HTML_put_string(me,me->comment_end);
736: HText_endAppend(me->text);
737:
738: if (me->target) {
1.35 duns 739: (*me->targetClass._free)(me->target);
1.2 timbl 740: }
1.19 frystyk 741: HTChunkClear(&me->title); /* Henrik 18/02-94 */
1.4 timbl 742: free(me);
1.42 ! frystyk 743: return HT_OK;
1.1 timbl 744: }
745:
746:
1.37 frystyk 747: PRIVATE int HTML_abort ARGS2(HTStructured *, me, HTError, e)
1.1 timbl 748:
1.14 timbl 749: {
750: if (me->target) {
751: (*me->targetClass.abort)(me->target, e);
752: }
1.19 frystyk 753: HTChunkClear(&me->title); /* Henrik 18/02-94 */
1.14 timbl 754: free(me);
1.42 ! frystyk 755: return HT_ERROR;
1.1 timbl 756: }
757:
1.2 timbl 758:
759: /* Get Styles from style sheet
760: ** ---------------------------
761: */
762: PRIVATE void get_styles NOARGS
1.1 timbl 763: {
1.2 timbl 764: got_styles = YES;
765:
766: default_style = HTStyleNamed(styleSheet, "Normal");
1.1 timbl 767:
1.2 timbl 768: styles[HTML_H1] = HTStyleNamed(styleSheet, "Heading1");
769: styles[HTML_H2] = HTStyleNamed(styleSheet, "Heading2");
770: styles[HTML_H3] = HTStyleNamed(styleSheet, "Heading3");
771: styles[HTML_H4] = HTStyleNamed(styleSheet, "Heading4");
772: styles[HTML_H5] = HTStyleNamed(styleSheet, "Heading5");
773: styles[HTML_H6] = HTStyleNamed(styleSheet, "Heading6");
774: styles[HTML_H7] = HTStyleNamed(styleSheet, "Heading7");
775:
776: styles[HTML_DL] = HTStyleNamed(styleSheet, "Glossary");
777: styles[HTML_UL] =
778: styles[HTML_OL] = HTStyleNamed(styleSheet, "List");
779: styles[HTML_MENU] = HTStyleNamed(styleSheet, "Menu");
780: styles[HTML_DIR] = HTStyleNamed(styleSheet, "Dir");
1.16 timbl 781: /* styles[HTML_DLC] = HTStyleNamed(styleSheet, "GlossaryCompact"); */
1.2 timbl 782: styles[HTML_ADDRESS]= HTStyleNamed(styleSheet, "Address");
783: styles[HTML_BLOCKQUOTE]= HTStyleNamed(styleSheet, "BlockQuote");
784: styles[HTML_PLAINTEXT] =
785: styles[HTML_XMP] = HTStyleNamed(styleSheet, "Example");
786: styles[HTML_PRE] = HTStyleNamed(styleSheet, "Preformatted");
787: styles[HTML_LISTING] = HTStyleNamed(styleSheet, "Listing");
788: }
789: /* P U B L I C
790: */
791:
792: /* Structured Object Class
793: ** -----------------------
794: */
795: PUBLIC CONST HTStructuredClass HTMLPresentation = /* As opposed to print etc */
796: {
797: "text/html",
1.42 ! frystyk 798: HTML_flush,
1.2 timbl 799: HTML_free,
1.14 timbl 800: HTML_abort,
1.2 timbl 801: HTML_put_character, HTML_put_string, HTML_write,
802: HTML_start_element, HTML_end_element,
803: HTML_put_entity
804: };
1.1 timbl 805:
1.4 timbl 806:
1.2 timbl 807: /* New Structured Text object
808: ** --------------------------
809: **
1.16 timbl 810: ** The structured stream can generate either presentation,
1.4 timbl 811: ** or plain text, or HTML.
1.1 timbl 812: */
1.16 timbl 813: PUBLIC HTStructured* HTML_new ARGS5(
814: HTRequest *, request,
815: void *, param,
816: HTFormat, input_format,
817: HTFormat, output_format,
818: HTStream *, output_stream)
1.1 timbl 819: {
820:
1.4 timbl 821: HTStructured * me;
822:
1.16 timbl 823: if (output_format != WWW_PLAINTEXT
824: && output_format != WWW_PRESENT
825: && output_format != HTAtom_for("text/x-c")) {
1.37 frystyk 826: HTStream * intermediate = HTStreamStack(WWW_HTML, output_format,
827: output_stream, request, NO);
1.6 timbl 828: if (intermediate) return HTMLGenerator(intermediate);
1.41 frystyk 829: fprintf(TDEST, "** Internal error: can't parse HTML to %s\n",
1.16 timbl 830: HTAtom_name(output_format));
1.4 timbl 831: exit (-99);
832: }
833:
834: me = (HTStructured*) malloc(sizeof(*me));
835: if (me == NULL) outofmem(__FILE__, "HTML_new");
1.1 timbl 836:
837: if (!got_styles) get_styles();
838:
1.4 timbl 839: me->isa = &HTMLPresentation;
1.16 timbl 840: me->dtd = &DTD;
841: me->node_anchor = request->anchor;
1.4 timbl 842: me->title.size = 0;
843: me->title.growby = 128;
844: me->title.allocated = 0;
845: me->title.data = 0;
846: me->text = 0;
847: me->style_change = YES; /* Force check leading to text creation */
848: me->new_style = default_style;
849: me->old_style = 0;
850: me->sp = me->stack + MAX_NESTING - 1;
851: me->sp->tag_number = -1; /* INVALID */
852: me->sp->style = default_style; /* INVALID */
1.1 timbl 853:
1.4 timbl 854: me->comment_start = NULL;
855: me->comment_end = NULL;
1.16 timbl 856: me->target = output_stream;
857: if (output_stream) me->targetClass = *output_stream->isa; /* Copy pointers */
1.1 timbl 858:
1.4 timbl 859: return (HTStructured*) me;
1.1 timbl 860: }
861:
862:
1.2 timbl 863: /* HTConverter for HTML to plain text
864: ** ----------------------------------
1.1 timbl 865: **
1.2 timbl 866: ** This will convert from HTML to presentation or plain text.
1.1 timbl 867: */
1.16 timbl 868: PUBLIC HTStream* HTMLToPlain ARGS5(
869: HTRequest *, request,
870: void *, param,
871: HTFormat, input_format,
872: HTFormat, output_format,
873: HTStream *, output_stream)
1.1 timbl 874: {
1.16 timbl 875: return SGML_new(&DTD, HTML_new(
876: request, NULL, input_format, output_format, output_stream));
1.1 timbl 877: }
878:
879:
1.2 timbl 880: /* HTConverter for HTML to C code
881: ** ------------------------------
882: **
1.36 frystyk 883: ** C code is like plain text but all non-preformatted code
1.2 timbl 884: ** is commented out.
885: ** This will convert from HTML to presentation or plain text.
886: */
1.16 timbl 887: PUBLIC HTStream* HTMLToC ARGS5(
888: HTRequest *, request,
889: void *, param,
890: HTFormat, input_format,
891: HTFormat, output_format,
892: HTStream *, output_stream)
1.1 timbl 893: {
1.4 timbl 894:
895: HTStructured * html;
896:
1.36 frystyk 897: (*output_stream->isa->put_string)(output_stream, "/* "); /* Before title */
1.16 timbl 898: html = HTML_new(request, NULL, input_format, output_format, output_stream);
1.2 timbl 899: html->comment_start = "/* ";
1.16 timbl 900: html->dtd = &DTD;
1.2 timbl 901: html->comment_end = " */\n"; /* Must start in col 1 for cpp */
1.16 timbl 902: return SGML_new(&DTD, html);
1.1 timbl 903: }
904:
905:
1.2 timbl 906: /* Presenter for HTML
907: ** ------------------
908: **
909: ** This will convert from HTML to presentation or plain text.
910: **
911: ** Override this if you have a windows version
1.1 timbl 912: */
1.2 timbl 913: #ifndef GUI
1.16 timbl 914: PUBLIC HTStream* HTMLPresent ARGS5(
915: HTRequest *, request,
916: void *, param,
917: HTFormat, input_format,
918: HTFormat, output_format,
919: HTStream *, output_stream)
1.1 timbl 920: {
1.16 timbl 921: return SGML_new(&DTD, HTML_new(
922: request, NULL, input_format, output_format, output_stream));
1.1 timbl 923: }
1.2 timbl 924: #endif
1.1 timbl 925:
926:
1.2 timbl 927: /* Record error message as a hypertext object
928: ** ------------------------------------------
929: **
930: ** The error message should be marked as an error so that
931: ** it can be reloaded later.
932: ** This implementation just throws up an error message
933: ** and leaves the document unloaded.
1.9 timbl 934: ** A smarter implementation would load an error document,
935: ** marking at such so that it is retried on reload.
1.1 timbl 936: **
1.2 timbl 937: ** On entry,
938: ** sink is a stream to the output device if any
939: ** number is the HTTP error number
940: ** message is the human readable message.
1.9 timbl 941: **
942: ** On exit,
943: ** returns a negative number to indicate lack of success in the load.
1.1 timbl 944: */
1.2 timbl 945:
946: PUBLIC int HTLoadError ARGS3(
1.17 luotonen 947: HTRequest *, req,
1.2 timbl 948: int, number,
949: CONST char *, message)
950: {
1.20 frystyk 951: char *err = "Oh I screwed up!"; /* Dummy pointer not used (I hope) */
1.2 timbl 952: HTAlert(message); /* @@@@@@@@@@@@@@@@@@@ */
1.20 frystyk 953: /* Clean up! Henrik 04/03-94 */
954: if (req && req->output_stream)
955: (*req->output_stream->isa->abort)(req->output_stream, err);
1.33 frystyk 956: #if OLD_CODE
1.25 luotonen 957: HTClearErrors(req);
1.33 frystyk 958: #endif
1.2 timbl 959: return -number;
960: }
1.29 frystyk 961:
Webmaster