Annotation of libwww/Library/src/HTML.c, revision 1.35
1.2 timbl 1: /* Structured stream to Rich hypertext converter
2: ** ============================================
1.1 timbl 3: **
1.2 timbl 4: ** This generates of a hypertext object. It converts from the
5: ** structured stream interface fro HTMl events into the style-
6: ** oriented iunterface of the HText.h interface. This module is
7: ** only used in clients and shouldnot be linked into servers.
1.1 timbl 8: **
1.6 timbl 9: ** Override this module if making a new GUI browser.
1.1 timbl 10: **
1.35 ! duns 11: ** HISTORY:
! 12: ** 8 Jul 94 FM Insulate free() from _free structure element.
! 13: **
1.1 timbl 14: */
1.16 timbl 15:
1.1 timbl 16: #include "HTML.h"
17:
1.16 timbl 18: /* #define CAREFUL Check nesting here not really necessary */
1.2 timbl 19:
1.1 timbl 20: #include <ctype.h>
21: #include <stdio.h>
22:
23: #include "HTAtom.h"
24: #include "HTChunk.h"
25: #include "HText.h"
26: #include "HTStyle.h"
27:
1.3 timbl 28: #include "HTAlert.h"
1.4 timbl 29: #include "HTMLGen.h"
1.8 timbl 30: #include "HTParse.h"
1.1 timbl 31:
32: extern HTStyleSheet * styleSheet; /* Application-wide */
33:
34: /* Module-wide style cache
35: */
36: PRIVATE int got_styles = 0;
1.16 timbl 37: PRIVATE HTStyle *styles[HTMLP_ELEMENTS];
1.2 timbl 38: PRIVATE HTStyle *default_style;
1.1 timbl 39:
40:
41: /* HTML Object
42: ** -----------
43: */
1.2 timbl 44: #define MAX_NESTING 20 /* Should be checked by parser */
45:
46: typedef struct _stack_element {
47: HTStyle * style;
48: int tag_number;
49: } stack_element;
50:
51: struct _HTStructured {
52: CONST HTStructuredClass * isa;
53: HTParentAnchor * node_anchor;
54: HText * text;
55:
56: HTStream* target; /* Output stream */
57: HTStreamClass targetClass; /* Output routines */
58:
59: HTChunk title; /* Grow by 128 */
60:
61: char * comment_start; /* for literate programming */
62: char * comment_end;
1.16 timbl 63:
64: CONST SGML_dtd* dtd;
65:
1.2 timbl 66: HTTag * current_tag;
67: BOOL style_change;
68: HTStyle * new_style;
69: HTStyle * old_style;
70: BOOL in_word; /* Have just had a non-white char */
71: stack_element stack[MAX_NESTING];
72: stack_element *sp; /* Style stack pointer */
1.1 timbl 73: };
74:
1.2 timbl 75: struct _HTStream {
76: CONST HTStreamClass * isa;
77: /* .... */
78: };
1.1 timbl 79:
80: /* Forward declarations of routines
81: */
82: PRIVATE void get_styles NOPARAMS;
83:
84:
1.4 timbl 85: PRIVATE void actually_set_style PARAMS((HTStructured * me));
1.11 timbl 86: PRIVATE void change_paragraph_style PARAMS((HTStructured * me, HTStyle * style));
1.1 timbl 87:
88: /* Style buffering avoids dummy paragraph begin/ends.
89: */
1.4 timbl 90: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1 timbl 91:
92:
1.2 timbl 93: #ifdef OLD_CODE
1.1 timbl 94: /* The following accented characters are from peter Flynn, curia project */
95:
96: /* these ifdefs don't solve the problem of a simple terminal emulator
97: ** with a different character set to the client machine. But nothing does,
98: ** except looking at the TERM setting */
99:
1.2 timbl 100:
1.1 timbl 101: { "ocus" , "&" }, /* for CURIA */
102: #ifdef IBMPC
103: { "aacute" , "\240" }, /* For PC display */
104: { "eacute" , "\202" },
105: { "iacute" , "\241" },
106: { "oacute" , "\242" },
107: { "uacute" , "\243" },
108: { "Aacute" , "\101" },
109: { "Eacute" , "\220" },
110: { "Iacute" , "\111" },
111: { "Oacute" , "\117" },
112: { "Uacute" , "\125" },
113: #else
114: { "aacute" , "\341" }, /* Works for openwindows -- Peter Flynn */
115: { "eacute" , "\351" },
116: { "iacute" , "\355" },
117: { "oacute" , "\363" },
118: { "uacute" , "\372" },
119: { "Aacute" , "\301" },
120: { "Eacute" , "\310" },
121: { "Iacute" , "\315" },
122: { "Oacute" , "\323" },
123: { "Uacute" , "\332" },
124: #endif
125: { 0, 0 } /* Terminate list */
126: };
1.2 timbl 127: #endif
1.1 timbl 128:
129:
1.2 timbl 130: /* Entity values -- for ISO Latin 1 local representation
131: **
132: ** This MUST match exactly the table referred to in the DTD!
133: */
134: static char * ISO_Latin1[] = {
135: "\306", /* capital AE diphthong (ligature) */
136: "\301", /* capital A, acute accent */
137: "\302", /* capital A, circumflex accent */
138: "\300", /* capital A, grave accent */
139: "\305", /* capital A, ring */
140: "\303", /* capital A, tilde */
141: "\304", /* capital A, dieresis or umlaut mark */
142: "\307", /* capital C, cedilla */
143: "\320", /* capital Eth, Icelandic */
144: "\311", /* capital E, acute accent */
145: "\312", /* capital E, circumflex accent */
146: "\310", /* capital E, grave accent */
147: "\313", /* capital E, dieresis or umlaut mark */
148: "\315", /* capital I, acute accent */
149: "\316", /* capital I, circumflex accent */
150: "\314", /* capital I, grave accent */
151: "\317", /* capital I, dieresis or umlaut mark */
152: "\321", /* capital N, tilde */
153: "\323", /* capital O, acute accent */
154: "\324", /* capital O, circumflex accent */
155: "\322", /* capital O, grave accent */
156: "\330", /* capital O, slash */
157: "\325", /* capital O, tilde */
158: "\326", /* capital O, dieresis or umlaut mark */
159: "\336", /* capital THORN, Icelandic */
160: "\332", /* capital U, acute accent */
161: "\333", /* capital U, circumflex accent */
162: "\331", /* capital U, grave accent */
163: "\334", /* capital U, dieresis or umlaut mark */
164: "\335", /* capital Y, acute accent */
165: "\341", /* small a, acute accent */
166: "\342", /* small a, circumflex accent */
167: "\346", /* small ae diphthong (ligature) */
168: "\340", /* small a, grave accent */
169: "\046", /* ampersand */
170: "\345", /* small a, ring */
171: "\343", /* small a, tilde */
172: "\344", /* small a, dieresis or umlaut mark */
173: "\347", /* small c, cedilla */
174: "\351", /* small e, acute accent */
175: "\352", /* small e, circumflex accent */
176: "\350", /* small e, grave accent */
177: "\360", /* small eth, Icelandic */
178: "\353", /* small e, dieresis or umlaut mark */
179: "\076", /* greater than */
180: "\355", /* small i, acute accent */
181: "\356", /* small i, circumflex accent */
182: "\354", /* small i, grave accent */
183: "\357", /* small i, dieresis or umlaut mark */
184: "\074", /* less than */
185: "\361", /* small n, tilde */
186: "\363", /* small o, acute accent */
187: "\364", /* small o, circumflex accent */
188: "\362", /* small o, grave accent */
189: "\370", /* small o, slash */
190: "\365", /* small o, tilde */
191: "\366", /* small o, dieresis or umlaut mark */
192: "\337", /* small sharp s, German (sz ligature) */
193: "\376", /* small thorn, Icelandic */
194: "\372", /* small u, acute accent */
195: "\373", /* small u, circumflex accent */
196: "\371", /* small u, grave accent */
197: "\374", /* small u, dieresis or umlaut mark */
198: "\375", /* small y, acute accent */
199: "\377", /* small y, dieresis or umlaut mark */
1.1 timbl 200: };
201:
1.2 timbl 202:
203: /* Entity values -- for NeXT local representation
204: **
205: ** This MUST match exactly the table referred to in the DTD!
206: **
207: */
208: static char * NeXTCharacters[] = {
209: "\341", /* capital AE diphthong (ligature) */
210: "\202", /* capital A, acute accent */
211: "\203", /* capital A, circumflex accent */
212: "\201", /* capital A, grave accent */
213: "\206", /* capital A, ring */
214: "\204", /* capital A, tilde */
215: "\205", /* capital A, dieresis or umlaut mark */
216: "\207", /* capital C, cedilla */
217: "\220", /* capital Eth, Icelandic */
218: "\211", /* capital E, acute accent */
219: "\212", /* capital E, circumflex accent */
220: "\210", /* capital E, grave accent */
221: "\213", /* capital E, dieresis or umlaut mark */
222: "\215", /* capital I, acute accent */
223: "\216", /* capital I, circumflex accent these are */
224: "\214", /* capital I, grave accent ISO -100 hex */
225: "\217", /* capital I, dieresis or umlaut mark */
226: "\221", /* capital N, tilde */
227: "\223", /* capital O, acute accent */
228: "\224", /* capital O, circumflex accent */
229: "\222", /* capital O, grave accent */
230: "\351", /* capital O, slash 'cept this */
231: "\225", /* capital O, tilde */
232: "\226", /* capital O, dieresis or umlaut mark */
233: "\234", /* capital THORN, Icelandic */
234: "\230", /* capital U, acute accent */
235: "\231", /* capital U, circumflex accent */
236: "\227", /* capital U, grave accent */
237: "\232", /* capital U, dieresis or umlaut mark */
238: "\233", /* capital Y, acute accent */
239: "\326", /* small a, acute accent */
240: "\327", /* small a, circumflex accent */
241: "\361", /* small ae diphthong (ligature) */
242: "\325", /* small a, grave accent */
243: "\046", /* ampersand */
244: "\332", /* small a, ring */
245: "\330", /* small a, tilde */
246: "\331", /* small a, dieresis or umlaut mark */
247: "\333", /* small c, cedilla */
248: "\335", /* small e, acute accent */
249: "\336", /* small e, circumflex accent */
250: "\334", /* small e, grave accent */
251: "\346", /* small eth, Icelandic */
252: "\337", /* small e, dieresis or umlaut mark */
253: "\076", /* greater than */
254: "\342", /* small i, acute accent */
255: "\344", /* small i, circumflex accent */
256: "\340", /* small i, grave accent */
257: "\345", /* small i, dieresis or umlaut mark */
258: "\074", /* less than */
259: "\347", /* small n, tilde */
260: "\355", /* small o, acute accent */
261: "\356", /* small o, circumflex accent */
262: "\354", /* small o, grave accent */
263: "\371", /* small o, slash */
264: "\357", /* small o, tilde */
265: "\360", /* small o, dieresis or umlaut mark */
266: "\373", /* small sharp s, German (sz ligature) */
267: "\374", /* small thorn, Icelandic */
268: "\363", /* small u, acute accent */
269: "\364", /* small u, circumflex accent */
270: "\362", /* small u, grave accent */
271: "\366", /* small u, dieresis or umlaut mark */
272: "\367", /* small y, acute accent */
273: "\375", /* small y, dieresis or umlaut mark */
1.1 timbl 274: };
275:
1.2 timbl 276: /* Entity values -- for IBM/PC Code Page 850 (International)
277: **
278: ** This MUST match exactly the table referred to in the DTD!
279: **
280: */
281: /* @@@@@@@@@@@@@@@@@ TBD */
282:
283:
284:
285: /* Set character set
286: ** ----------------
287: */
288:
289: PRIVATE char** p_entity_values = ISO_Latin1; /* Pointer to translation */
1.1 timbl 290:
1.2 timbl 291: PUBLIC void HTMLUseCharacterSet ARGS1(HTMLCharacterSet, i)
292: {
293: p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
294: : ISO_Latin1;
295: }
1.1 timbl 296:
297:
298: /* Flattening the style structure
299: ** ------------------------------
300: **
301: On the NeXT, and on any read-only browser, it is simpler for the text to have
302: a sequence of styles, rather than a nested tree of styles. In this
303: case we have to flatten the structure as it arrives from SGML tags into
304: a sequence of styles.
305: */
306:
307: /* If style really needs to be set, call this
308: */
1.4 timbl 309: PRIVATE void actually_set_style ARGS1(HTStructured *, me)
1.1 timbl 310: {
1.4 timbl 311: if (!me->text) { /* First time through */
312: me->text = HText_new2(me->node_anchor, me->target);
313: HText_beginAppend(me->text);
314: HText_setStyle(me->text, me->new_style);
315: me->in_word = NO;
1.1 timbl 316: } else {
1.4 timbl 317: HText_setStyle(me->text, me->new_style);
1.1 timbl 318: }
1.4 timbl 319: me->old_style = me->new_style;
320: me->style_change = NO;
1.1 timbl 321: }
322:
323: /* If you THINK you need to change style, call this
324: */
325:
1.11 timbl 326: PRIVATE void change_paragraph_style ARGS2(HTStructured *, me, HTStyle *,style)
1.1 timbl 327: {
1.4 timbl 328: if (me->new_style!=style) {
329: me->style_change = YES;
330: me->new_style = style;
1.1 timbl 331: }
1.11 timbl 332: me->in_word = NO;
1.1 timbl 333: }
334:
1.2 timbl 335: /*_________________________________________________________________________
336: **
337: ** A C T I O N R O U T I N E S
338: */
339:
340: /* Character handling
341: ** ------------------
1.1 timbl 342: */
1.4 timbl 343: PRIVATE void HTML_put_character ARGS2(HTStructured *, me, char, c)
1.1 timbl 344: {
1.2 timbl 345:
1.4 timbl 346: switch (me->sp[0].tag_number) {
1.2 timbl 347: case HTML_COMMENT:
348: break; /* Do Nothing */
349:
350: case HTML_TITLE:
1.4 timbl 351: HTChunkPutc(&me->title, c);
1.2 timbl 352: break;
353:
354:
355: case HTML_LISTING: /* Litteral text */
356: case HTML_XMP:
357: case HTML_PLAINTEXT:
358: case HTML_PRE:
359: /* We guarrantee that the style is up-to-date in begin_litteral
360: */
1.4 timbl 361: HText_appendCharacter(me->text, c);
1.2 timbl 362: break;
363:
364: default: /* Free format text */
1.4 timbl 365: if (me->style_change) {
1.2 timbl 366: if ((c=='\n') || (c==' ')) return; /* Ignore it */
367: UPDATE_STYLE;
368: }
369: if (c=='\n') {
1.4 timbl 370: if (me->in_word) {
371: HText_appendCharacter(me->text, ' ');
372: me->in_word = NO;
1.2 timbl 373: }
374: } else {
1.4 timbl 375: HText_appendCharacter(me->text, c);
376: me->in_word = YES;
1.2 timbl 377: }
378: } /* end switch */
1.1 timbl 379: }
380:
1.2 timbl 381:
382:
383: /* String handling
384: ** ---------------
385: **
386: ** This is written separately from put_character becuase the loop can
1.11 timbl 387: ** in some cases be promoted to a higher function call level for speed.
1.2 timbl 388: */
1.4 timbl 389: PRIVATE void HTML_put_string ARGS2(HTStructured *, me, CONST char*, s)
1.1 timbl 390: {
1.2 timbl 391:
1.4 timbl 392: switch (me->sp[0].tag_number) {
1.2 timbl 393: case HTML_COMMENT:
394: break; /* Do Nothing */
395:
396: case HTML_TITLE:
1.4 timbl 397: HTChunkPuts(&me->title, s);
1.2 timbl 398: break;
399:
400:
401: case HTML_LISTING: /* Litteral text */
402: case HTML_XMP:
403: case HTML_PLAINTEXT:
404: case HTML_PRE:
405:
406: /* We guarrantee that the style is up-to-date in begin_litteral
407: */
1.4 timbl 408: HText_appendText(me->text, s);
1.2 timbl 409: break;
410:
411: default: /* Free format text */
412: {
413: CONST char *p = s;
1.4 timbl 414: if (me->style_change) {
1.2 timbl 415: for (; *p && ((*p=='\n') || (*p==' ')); p++) ; /* Ignore leaders */
416: if (!*p) return;
417: UPDATE_STYLE;
418: }
419: for(; *p; p++) {
1.4 timbl 420: if (me->style_change) {
1.2 timbl 421: if ((*p=='\n') || (*p==' ')) continue; /* Ignore it */
422: UPDATE_STYLE;
423: }
424: if (*p=='\n') {
1.4 timbl 425: if (me->in_word) {
426: HText_appendCharacter(me->text, ' ');
427: me->in_word = NO;
1.2 timbl 428: }
429: } else {
1.4 timbl 430: HText_appendCharacter(me->text, *p);
431: me->in_word = YES;
1.2 timbl 432: }
433: } /* for */
434: }
435: } /* end switch */
1.1 timbl 436: }
437:
438:
1.2 timbl 439: /* Buffer write
1.3 timbl 440: ** ------------
1.1 timbl 441: */
1.4 timbl 442: PRIVATE void HTML_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
1.1 timbl 443: {
1.2 timbl 444: CONST char* p;
445: CONST char* e = s+l;
1.4 timbl 446: for (p=s; s<e; p++) HTML_put_character(me, *p);
1.1 timbl 447: }
1.2 timbl 448:
449:
450: /* Start Element
451: ** -------------
452: */
453: PRIVATE void HTML_start_element ARGS4(
1.4 timbl 454: HTStructured *, me,
1.16 timbl 455: int, element_number,
1.3 timbl 456: CONST BOOL*, present,
1.16 timbl 457: CONST char **, value)
1.2 timbl 458: {
459: switch (element_number) {
460: case HTML_A:
461: {
1.8 timbl 462: HTChildAnchor * source;
1.9 timbl 463: char * href = NULL;
464: if (present[HTML_A_HREF]) {
465: StrAllocCopy(href, value[HTML_A_HREF]);
466: HTSimplify(href);
467: }
1.8 timbl 468: source = HTAnchor_findChildAndLink(
1.4 timbl 469: me->node_anchor, /* parent */
1.2 timbl 470: present[HTML_A_NAME] ? value[HTML_A_NAME] : 0, /* Tag */
1.9 timbl 471: present[HTML_A_HREF] ? href : 0, /* Addresss */
1.16 timbl 472: present[HTML_A_REL] && value[HTML_A_REL] ?
473: (HTLinkType*)HTAtom_for(value[HTML_A_REL])
1.2 timbl 474: : 0);
475:
476: if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
477: HTParentAnchor * dest =
478: HTAnchor_parent(
479: HTAnchor_followMainLink((HTAnchor*)source)
480: );
481: if (!HTAnchor_title(dest))
482: HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
483: }
484: UPDATE_STYLE;
1.4 timbl 485: HText_beginAnchor(me->text, source);
1.18 frystyk 486: free(href); /* Leak fix Henrik 17/02-94 */
1.2 timbl 487: }
488: break;
489:
490: case HTML_TITLE:
1.4 timbl 491: HTChunkClear(&me->title);
1.2 timbl 492: break;
493:
494: case HTML_NEXTID:
495: /* if (present[NEXTID_N] && value[NEXTID_N])
1.4 timbl 496: HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2 timbl 497: break;
498:
499: case HTML_ISINDEX:
1.4 timbl 500: HTAnchor_setIndex(me->node_anchor);
1.2 timbl 501: break;
502:
1.15 timbl 503: case HTML_BR:
504: UPDATE_STYLE;
505: HText_appendCharacter(me->text, '\n');
506: me->in_word = NO;
507: break;
508:
509: case HTML_HR:
510: UPDATE_STYLE;
511: HText_appendCharacter(me->text, '\n');
1.16 timbl 512: HText_appendText(me->text, "___________________________________");
1.15 timbl 513: HText_appendCharacter(me->text, '\n');
514: me->in_word = NO;
515: break;
516:
1.2 timbl 517: case HTML_P:
518: UPDATE_STYLE;
1.4 timbl 519: HText_appendParagraph(me->text);
520: me->in_word = NO;
1.2 timbl 521: break;
522:
523: case HTML_DL:
1.11 timbl 524: change_paragraph_style(me, present && present[DL_COMPACT]
1.16 timbl 525: ? styles[HTML_DL]
1.2 timbl 526: : styles[HTML_DL]);
527: break;
528:
529: case HTML_DT:
1.4 timbl 530: if (!me->style_change) {
531: HText_appendParagraph(me->text);
532: me->in_word = NO;
1.2 timbl 533: }
534: break;
535:
536: case HTML_DD:
537: UPDATE_STYLE;
1.4 timbl 538: HTML_put_character(me, '\t'); /* Just tab out one stop */
539: me->in_word = NO;
540: break;
1.2 timbl 541:
542: case HTML_UL:
543: case HTML_OL:
544: case HTML_MENU:
545: case HTML_DIR:
1.11 timbl 546: change_paragraph_style(me, styles[element_number]);
1.2 timbl 547: break;
548:
549: case HTML_LI:
550: UPDATE_STYLE;
1.7 timbl 551: if (me->sp[0].tag_number != HTML_DIR)
1.4 timbl 552: HText_appendParagraph(me->text);
1.2 timbl 553: else
1.4 timbl 554: HText_appendCharacter(me->text, '\t'); /* Tab @@ nl for UL? */
555: me->in_word = NO;
1.2 timbl 556: break;
557:
558: case HTML_LISTING: /* Litteral text */
559: case HTML_XMP:
560: case HTML_PLAINTEXT:
561: case HTML_PRE:
1.11 timbl 562: change_paragraph_style(me, styles[element_number]);
1.2 timbl 563: UPDATE_STYLE;
1.4 timbl 564: if (me->comment_end)
565: HText_appendText(me->text, me->comment_end);
1.2 timbl 566: break;
1.11 timbl 567:
1.23 frystyk 568: case HTML_IMG: /* Images */
569: {
570: HTChildAnchor *source;
571: char *src = NULL;
572: if (present[HTML_IMG_SRC]) {
573: StrAllocCopy(src, value[HTML_IMG_SRC]);
574: HTSimplify(src);
575: }
576: source = HTAnchor_findChildAndLink(
577: me->node_anchor, /* parent */
578: 0, /* Tag */
579: src ? src : 0, /* Addresss */
580: 0);
581: UPDATE_STYLE;
582: HText_appendImage(me->text, source,
1.24 frystyk 583: present[HTML_IMG_ALT] ? value[HTML_IMG_ALT] : NULL,
584: present[HTML_IMG_ALIGN] ? value[HTML_IMG_ALIGN] : NULL,
585: present[HTML_IMG_ISMAP] ? YES : NO);
1.23 frystyk 586: free(src);
1.24 frystyk 587: }
588: break;
589:
590: case HTML_HTML: /* Ignore these altogether */
591: case HTML_HEAD:
592: case HTML_BODY:
593:
1.10 timbl 594: case HTML_TT: /* Physical character highlighting */
595: case HTML_B: /* Currently ignored */
596: case HTML_I:
597: case HTML_U:
598:
599: case HTML_EM: /* Logical character highlighting */
600: case HTML_STRONG: /* Currently ignored */
601: case HTML_CODE:
602: case HTML_SAMP:
603: case HTML_KBD:
604: case HTML_VAR:
605: case HTML_DFN:
606: case HTML_CITE:
607: break;
608:
1.11 timbl 609: case HTML_H1: /* paragraph styles */
610: case HTML_H2:
611: case HTML_H3:
612: case HTML_H4:
613: case HTML_H5:
614: case HTML_H6:
615: case HTML_H7:
616: case HTML_ADDRESS:
617: case HTML_BLOCKQUOTE:
618: change_paragraph_style(me, styles[element_number]); /* May be postponed */
1.2 timbl 619: break;
620:
621: } /* end switch */
622:
1.16 timbl 623: if (me->dtd->tags[element_number].contents!= SGML_EMPTY) {
1.13 timbl 624: if (me->sp == me->stack) {
1.12 timbl 625: fprintf(stderr, "HTML: ****** Maximum nesting of %d exceded!\n",
626: MAX_NESTING);
627: return;
628: }
1.4 timbl 629: --(me->sp);
630: me->sp[0].style = me->new_style; /* Stack new style */
631: me->sp[0].tag_number = element_number;
1.10 timbl 632: }
1.1 timbl 633: }
1.10 timbl 634:
1.2 timbl 635:
1.1 timbl 636: /* End Element
1.2 timbl 637: ** -----------
1.1 timbl 638: **
1.2 timbl 639: */
640: /* When we end an element, the style must be returned to that
1.1 timbl 641: ** in effect before that element. Note that anchors (etc?)
642: ** don't have an associated style, so that we must scan down the
643: ** stack for an element with a defined style. (In fact, the styles
644: ** should be linked to the whole stack not just the top one.)
645: ** TBL 921119
1.6 timbl 646: **
647: ** We don't turn on "CAREFUL" check because the parser produces
648: ** (internal code errors apart) good nesting. The parser checks
649: ** incoming code errors, not this module.
1.1 timbl 650: */
1.4 timbl 651: PRIVATE void HTML_end_element ARGS2(HTStructured *, me, int , element_number)
1.1 timbl 652: {
1.2 timbl 653: #ifdef CAREFUL /* parser assumed to produce good nesting */
1.4 timbl 654: if (element_number != me->sp[0].tag_number) {
1.2 timbl 655: fprintf(stderr, "HTMLText: end of element %s when expecting end of %s\n",
1.16 timbl 656: me->dtd->tags[element_number].name,
657: me->dtd->tags[me->sp->tag_number].name);
1.6 timbl 658: /* panic */
1.1 timbl 659: }
1.2 timbl 660: #endif
661:
1.4 timbl 662: me->sp++; /* Pop state off stack */
1.2 timbl 663:
664: switch(element_number) {
665:
666: case HTML_A:
667: UPDATE_STYLE;
1.4 timbl 668: HText_endAnchor(me->text);
1.2 timbl 669: break;
670:
671: case HTML_TITLE:
1.4 timbl 672: HTChunkTerminate(&me->title);
673: HTAnchor_setTitle(me->node_anchor, me->title.data);
1.2 timbl 674: break;
675:
676: case HTML_LISTING: /* Litteral text */
677: case HTML_XMP:
678: case HTML_PLAINTEXT:
679: case HTML_PRE:
1.4 timbl 680: if (me->comment_start)
681: HText_appendText(me->text, me->comment_start);
1.2 timbl 682: /* Fall through */
683:
684: default:
685:
1.11 timbl 686: change_paragraph_style(me, me->sp->style); /* Often won't really change */
1.2 timbl 687: break;
688:
689: } /* switch */
1.1 timbl 690: }
691:
1.2 timbl 692:
693: /* Expanding entities
694: ** ------------------
695: */
696: /* (In fact, they all shrink!)
1.1 timbl 697: */
1.2 timbl 698:
1.4 timbl 699: PRIVATE void HTML_put_entity ARGS2(HTStructured *, me, int, entity_number)
1.1 timbl 700: {
1.4 timbl 701: HTML_put_string(me, ISO_Latin1[entity_number]); /* @@ Other representations */
1.1 timbl 702: }
1.2 timbl 703:
704:
705: /* Free an HTML object
706: ** -------------------
707: **
1.4 timbl 708: ** If the document is empty, the text object will not yet exist.
709: So we could in fact abandon creating the document and return
710: an error code. In fact an empty document is an important type
711: of document, so we don't.
712: **
713: ** If non-interactive, everything is freed off. No: crashes -listrefs
1.2 timbl 714: ** Otherwise, the interactive object is left.
715: */
1.4 timbl 716: PUBLIC void HTML_free ARGS1(HTStructured *, me)
1.1 timbl 717: {
1.4 timbl 718: UPDATE_STYLE; /* Creates empty document here! */
719: if (me->comment_end)
720: HTML_put_string(me,me->comment_end);
721: HText_endAppend(me->text);
722:
723: if (me->target) {
1.35 ! duns 724: (*me->targetClass._free)(me->target);
1.2 timbl 725: }
1.19 frystyk 726: HTChunkClear(&me->title); /* Henrik 18/02-94 */
1.4 timbl 727: free(me);
1.1 timbl 728: }
729:
730:
1.14 timbl 731: PRIVATE void HTML_abort ARGS2(HTStructured *, me, HTError, e)
1.1 timbl 732:
1.14 timbl 733: {
734: if (me->target) {
735: (*me->targetClass.abort)(me->target, e);
736: }
1.19 frystyk 737: HTChunkClear(&me->title); /* Henrik 18/02-94 */
1.14 timbl 738: free(me);
1.1 timbl 739: }
740:
1.2 timbl 741:
742: /* Get Styles from style sheet
743: ** ---------------------------
744: */
745: PRIVATE void get_styles NOARGS
1.1 timbl 746: {
1.2 timbl 747: got_styles = YES;
748:
749: default_style = HTStyleNamed(styleSheet, "Normal");
1.1 timbl 750:
1.2 timbl 751: styles[HTML_H1] = HTStyleNamed(styleSheet, "Heading1");
752: styles[HTML_H2] = HTStyleNamed(styleSheet, "Heading2");
753: styles[HTML_H3] = HTStyleNamed(styleSheet, "Heading3");
754: styles[HTML_H4] = HTStyleNamed(styleSheet, "Heading4");
755: styles[HTML_H5] = HTStyleNamed(styleSheet, "Heading5");
756: styles[HTML_H6] = HTStyleNamed(styleSheet, "Heading6");
757: styles[HTML_H7] = HTStyleNamed(styleSheet, "Heading7");
758:
759: styles[HTML_DL] = HTStyleNamed(styleSheet, "Glossary");
760: styles[HTML_UL] =
761: styles[HTML_OL] = HTStyleNamed(styleSheet, "List");
762: styles[HTML_MENU] = HTStyleNamed(styleSheet, "Menu");
763: styles[HTML_DIR] = HTStyleNamed(styleSheet, "Dir");
1.16 timbl 764: /* styles[HTML_DLC] = HTStyleNamed(styleSheet, "GlossaryCompact"); */
1.2 timbl 765: styles[HTML_ADDRESS]= HTStyleNamed(styleSheet, "Address");
766: styles[HTML_BLOCKQUOTE]= HTStyleNamed(styleSheet, "BlockQuote");
767: styles[HTML_PLAINTEXT] =
768: styles[HTML_XMP] = HTStyleNamed(styleSheet, "Example");
769: styles[HTML_PRE] = HTStyleNamed(styleSheet, "Preformatted");
770: styles[HTML_LISTING] = HTStyleNamed(styleSheet, "Listing");
771: }
772: /* P U B L I C
773: */
774:
775: /* Structured Object Class
776: ** -----------------------
777: */
778: PUBLIC CONST HTStructuredClass HTMLPresentation = /* As opposed to print etc */
779: {
780: "text/html",
781: HTML_free,
1.14 timbl 782: HTML_abort,
1.2 timbl 783: HTML_put_character, HTML_put_string, HTML_write,
784: HTML_start_element, HTML_end_element,
785: HTML_put_entity
786: };
1.1 timbl 787:
1.4 timbl 788:
1.2 timbl 789: /* New Structured Text object
790: ** --------------------------
791: **
1.16 timbl 792: ** The structured stream can generate either presentation,
1.4 timbl 793: ** or plain text, or HTML.
1.1 timbl 794: */
1.16 timbl 795: PUBLIC HTStructured* HTML_new ARGS5(
796: HTRequest *, request,
797: void *, param,
798: HTFormat, input_format,
799: HTFormat, output_format,
800: HTStream *, output_stream)
1.1 timbl 801: {
802:
1.4 timbl 803: HTStructured * me;
804:
1.16 timbl 805: if (output_format != WWW_PLAINTEXT
806: && output_format != WWW_PRESENT
807: && output_format != HTAtom_for("text/x-c")) {
1.21 luotonen 808: HTStream * intermediate = HTStreamStack(WWW_HTML, request, NO);
1.6 timbl 809: if (intermediate) return HTMLGenerator(intermediate);
1.4 timbl 810: fprintf(stderr, "** Internal error: can't parse HTML to %s\n",
1.16 timbl 811: HTAtom_name(output_format));
1.4 timbl 812: exit (-99);
813: }
814:
815: me = (HTStructured*) malloc(sizeof(*me));
816: if (me == NULL) outofmem(__FILE__, "HTML_new");
1.1 timbl 817:
818: if (!got_styles) get_styles();
819:
1.4 timbl 820: me->isa = &HTMLPresentation;
1.16 timbl 821: me->dtd = &DTD;
822: me->node_anchor = request->anchor;
1.4 timbl 823: me->title.size = 0;
824: me->title.growby = 128;
825: me->title.allocated = 0;
826: me->title.data = 0;
827: me->text = 0;
828: me->style_change = YES; /* Force check leading to text creation */
829: me->new_style = default_style;
830: me->old_style = 0;
831: me->sp = me->stack + MAX_NESTING - 1;
832: me->sp->tag_number = -1; /* INVALID */
833: me->sp->style = default_style; /* INVALID */
1.1 timbl 834:
1.4 timbl 835: me->comment_start = NULL;
836: me->comment_end = NULL;
1.16 timbl 837: me->target = output_stream;
838: if (output_stream) me->targetClass = *output_stream->isa; /* Copy pointers */
1.1 timbl 839:
1.4 timbl 840: return (HTStructured*) me;
1.1 timbl 841: }
842:
843:
1.2 timbl 844: /* HTConverter for HTML to plain text
845: ** ----------------------------------
1.1 timbl 846: **
1.2 timbl 847: ** This will convert from HTML to presentation or plain text.
1.1 timbl 848: */
1.16 timbl 849: PUBLIC HTStream* HTMLToPlain ARGS5(
850: HTRequest *, request,
851: void *, param,
852: HTFormat, input_format,
853: HTFormat, output_format,
854: HTStream *, output_stream)
1.1 timbl 855: {
1.16 timbl 856: return SGML_new(&DTD, HTML_new(
857: request, NULL, input_format, output_format, output_stream));
1.1 timbl 858: }
859:
860:
1.2 timbl 861: /* HTConverter for HTML to C code
862: ** ------------------------------
863: **
864: ** C copde is like plain text but all non-preformatted code
865: ** is commented out.
866: ** This will convert from HTML to presentation or plain text.
867: */
1.16 timbl 868: PUBLIC HTStream* HTMLToC ARGS5(
869: HTRequest *, request,
870: void *, param,
871: HTFormat, input_format,
872: HTFormat, output_format,
873: HTStream *, output_stream)
1.1 timbl 874: {
1.4 timbl 875:
876: HTStructured * html;
877:
1.16 timbl 878: (*output_stream->isa->put_string)(output_stream, "/* "); /* Before even title */
879: html = HTML_new(request, NULL, input_format, output_format, output_stream);
1.2 timbl 880: html->comment_start = "/* ";
1.16 timbl 881: html->dtd = &DTD;
1.2 timbl 882: html->comment_end = " */\n"; /* Must start in col 1 for cpp */
1.4 timbl 883: /* HTML_put_string(html,html->comment_start); */
1.16 timbl 884: return SGML_new(&DTD, html);
1.1 timbl 885: }
886:
887:
1.2 timbl 888: /* Presenter for HTML
889: ** ------------------
890: **
891: ** This will convert from HTML to presentation or plain text.
892: **
893: ** Override this if you have a windows version
1.1 timbl 894: */
1.2 timbl 895: #ifndef GUI
1.16 timbl 896: PUBLIC HTStream* HTMLPresent ARGS5(
897: HTRequest *, request,
898: void *, param,
899: HTFormat, input_format,
900: HTFormat, output_format,
901: HTStream *, output_stream)
1.1 timbl 902: {
1.16 timbl 903: return SGML_new(&DTD, HTML_new(
904: request, NULL, input_format, output_format, output_stream));
1.1 timbl 905: }
1.2 timbl 906: #endif
1.1 timbl 907:
908:
1.2 timbl 909: /* Record error message as a hypertext object
910: ** ------------------------------------------
911: **
912: ** The error message should be marked as an error so that
913: ** it can be reloaded later.
914: ** This implementation just throws up an error message
915: ** and leaves the document unloaded.
1.9 timbl 916: ** A smarter implementation would load an error document,
917: ** marking at such so that it is retried on reload.
1.1 timbl 918: **
1.2 timbl 919: ** On entry,
920: ** sink is a stream to the output device if any
921: ** number is the HTTP error number
922: ** message is the human readable message.
1.9 timbl 923: **
924: ** On exit,
925: ** returns a negative number to indicate lack of success in the load.
1.1 timbl 926: */
1.2 timbl 927:
928: PUBLIC int HTLoadError ARGS3(
1.17 luotonen 929: HTRequest *, req,
1.2 timbl 930: int, number,
931: CONST char *, message)
932: {
1.20 frystyk 933: char *err = "Oh I screwed up!"; /* Dummy pointer not used (I hope) */
1.2 timbl 934: HTAlert(message); /* @@@@@@@@@@@@@@@@@@@ */
1.20 frystyk 935: /* Clean up! Henrik 04/03-94 */
936: if (req && req->output_stream)
937: (*req->output_stream->isa->abort)(req->output_stream, err);
1.33 frystyk 938: #if OLD_CODE
1.25 luotonen 939: HTClearErrors(req);
1.33 frystyk 940: #endif
1.2 timbl 941: return -number;
942: }
1.29 frystyk 943:
Webmaster