Annotation of libwww/Library/src/HTML.c, revision 1.26
1.2 timbl 1: /* Structured stream to Rich hypertext converter
2: ** ============================================
1.1 timbl 3: **
1.2 timbl 4: ** This generates of a hypertext object. It converts from the
5: ** structured stream interface fro HTMl events into the style-
6: ** oriented iunterface of the HText.h interface. This module is
7: ** only used in clients and shouldnot be linked into servers.
1.1 timbl 8: **
1.6 timbl 9: ** Override this module if making a new GUI browser.
1.1 timbl 10: **
11: */
1.16 timbl 12:
1.1 timbl 13: #include "HTML.h"
14:
1.16 timbl 15: /* #define CAREFUL Check nesting here not really necessary */
1.2 timbl 16:
1.1 timbl 17: #include <ctype.h>
18: #include <stdio.h>
19:
20: #include "HTAtom.h"
21: #include "HTChunk.h"
22: #include "HText.h"
23: #include "HTStyle.h"
24:
1.3 timbl 25: #include "HTAlert.h"
1.4 timbl 26: #include "HTMLGen.h"
1.8 timbl 27: #include "HTParse.h"
1.1 timbl 28:
29: extern HTStyleSheet * styleSheet; /* Application-wide */
30:
31: /* Module-wide style cache
32: */
33: PRIVATE int got_styles = 0;
1.16 timbl 34: PRIVATE HTStyle *styles[HTMLP_ELEMENTS];
1.2 timbl 35: PRIVATE HTStyle *default_style;
1.1 timbl 36:
37:
38: /* HTML Object
39: ** -----------
40: */
1.2 timbl 41: #define MAX_NESTING 20 /* Should be checked by parser */
42:
43: typedef struct _stack_element {
44: HTStyle * style;
45: int tag_number;
46: } stack_element;
47:
48: struct _HTStructured {
49: CONST HTStructuredClass * isa;
50: HTParentAnchor * node_anchor;
51: HText * text;
52:
53: HTStream* target; /* Output stream */
54: HTStreamClass targetClass; /* Output routines */
55:
56: HTChunk title; /* Grow by 128 */
57:
58: char * comment_start; /* for literate programming */
59: char * comment_end;
1.16 timbl 60:
61: CONST SGML_dtd* dtd;
62:
1.2 timbl 63: HTTag * current_tag;
64: BOOL style_change;
65: HTStyle * new_style;
66: HTStyle * old_style;
67: BOOL in_word; /* Have just had a non-white char */
68: stack_element stack[MAX_NESTING];
69: stack_element *sp; /* Style stack pointer */
1.1 timbl 70: };
71:
1.2 timbl 72: struct _HTStream {
73: CONST HTStreamClass * isa;
74: /* .... */
75: };
1.1 timbl 76:
77: /* Forward declarations of routines
78: */
79: PRIVATE void get_styles NOPARAMS;
80:
81:
1.4 timbl 82: PRIVATE void actually_set_style PARAMS((HTStructured * me));
1.11 timbl 83: PRIVATE void change_paragraph_style PARAMS((HTStructured * me, HTStyle * style));
1.1 timbl 84:
85: /* Style buffering avoids dummy paragraph begin/ends.
86: */
1.4 timbl 87: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1 timbl 88:
89:
1.2 timbl 90: #ifdef OLD_CODE
1.1 timbl 91: /* The following accented characters are from peter Flynn, curia project */
92:
93: /* these ifdefs don't solve the problem of a simple terminal emulator
94: ** with a different character set to the client machine. But nothing does,
95: ** except looking at the TERM setting */
96:
1.2 timbl 97:
1.1 timbl 98: { "ocus" , "&" }, /* for CURIA */
99: #ifdef IBMPC
100: { "aacute" , "\240" }, /* For PC display */
101: { "eacute" , "\202" },
102: { "iacute" , "\241" },
103: { "oacute" , "\242" },
104: { "uacute" , "\243" },
105: { "Aacute" , "\101" },
106: { "Eacute" , "\220" },
107: { "Iacute" , "\111" },
108: { "Oacute" , "\117" },
109: { "Uacute" , "\125" },
110: #else
111: { "aacute" , "\341" }, /* Works for openwindows -- Peter Flynn */
112: { "eacute" , "\351" },
113: { "iacute" , "\355" },
114: { "oacute" , "\363" },
115: { "uacute" , "\372" },
116: { "Aacute" , "\301" },
117: { "Eacute" , "\310" },
118: { "Iacute" , "\315" },
119: { "Oacute" , "\323" },
120: { "Uacute" , "\332" },
121: #endif
122: { 0, 0 } /* Terminate list */
123: };
1.2 timbl 124: #endif
1.1 timbl 125:
126:
1.2 timbl 127: /* Entity values -- for ISO Latin 1 local representation
128: **
129: ** This MUST match exactly the table referred to in the DTD!
130: */
131: static char * ISO_Latin1[] = {
132: "\306", /* capital AE diphthong (ligature) */
133: "\301", /* capital A, acute accent */
134: "\302", /* capital A, circumflex accent */
135: "\300", /* capital A, grave accent */
136: "\305", /* capital A, ring */
137: "\303", /* capital A, tilde */
138: "\304", /* capital A, dieresis or umlaut mark */
139: "\307", /* capital C, cedilla */
140: "\320", /* capital Eth, Icelandic */
141: "\311", /* capital E, acute accent */
142: "\312", /* capital E, circumflex accent */
143: "\310", /* capital E, grave accent */
144: "\313", /* capital E, dieresis or umlaut mark */
145: "\315", /* capital I, acute accent */
146: "\316", /* capital I, circumflex accent */
147: "\314", /* capital I, grave accent */
148: "\317", /* capital I, dieresis or umlaut mark */
149: "\321", /* capital N, tilde */
150: "\323", /* capital O, acute accent */
151: "\324", /* capital O, circumflex accent */
152: "\322", /* capital O, grave accent */
153: "\330", /* capital O, slash */
154: "\325", /* capital O, tilde */
155: "\326", /* capital O, dieresis or umlaut mark */
156: "\336", /* capital THORN, Icelandic */
157: "\332", /* capital U, acute accent */
158: "\333", /* capital U, circumflex accent */
159: "\331", /* capital U, grave accent */
160: "\334", /* capital U, dieresis or umlaut mark */
161: "\335", /* capital Y, acute accent */
162: "\341", /* small a, acute accent */
163: "\342", /* small a, circumflex accent */
164: "\346", /* small ae diphthong (ligature) */
165: "\340", /* small a, grave accent */
166: "\046", /* ampersand */
167: "\345", /* small a, ring */
168: "\343", /* small a, tilde */
169: "\344", /* small a, dieresis or umlaut mark */
170: "\347", /* small c, cedilla */
171: "\351", /* small e, acute accent */
172: "\352", /* small e, circumflex accent */
173: "\350", /* small e, grave accent */
174: "\360", /* small eth, Icelandic */
175: "\353", /* small e, dieresis or umlaut mark */
176: "\076", /* greater than */
177: "\355", /* small i, acute accent */
178: "\356", /* small i, circumflex accent */
179: "\354", /* small i, grave accent */
180: "\357", /* small i, dieresis or umlaut mark */
181: "\074", /* less than */
182: "\361", /* small n, tilde */
183: "\363", /* small o, acute accent */
184: "\364", /* small o, circumflex accent */
185: "\362", /* small o, grave accent */
186: "\370", /* small o, slash */
187: "\365", /* small o, tilde */
188: "\366", /* small o, dieresis or umlaut mark */
189: "\337", /* small sharp s, German (sz ligature) */
190: "\376", /* small thorn, Icelandic */
191: "\372", /* small u, acute accent */
192: "\373", /* small u, circumflex accent */
193: "\371", /* small u, grave accent */
194: "\374", /* small u, dieresis or umlaut mark */
195: "\375", /* small y, acute accent */
196: "\377", /* small y, dieresis or umlaut mark */
1.1 timbl 197: };
198:
1.2 timbl 199:
200: /* Entity values -- for NeXT local representation
201: **
202: ** This MUST match exactly the table referred to in the DTD!
203: **
204: */
205: static char * NeXTCharacters[] = {
206: "\341", /* capital AE diphthong (ligature) */
207: "\202", /* capital A, acute accent */
208: "\203", /* capital A, circumflex accent */
209: "\201", /* capital A, grave accent */
210: "\206", /* capital A, ring */
211: "\204", /* capital A, tilde */
212: "\205", /* capital A, dieresis or umlaut mark */
213: "\207", /* capital C, cedilla */
214: "\220", /* capital Eth, Icelandic */
215: "\211", /* capital E, acute accent */
216: "\212", /* capital E, circumflex accent */
217: "\210", /* capital E, grave accent */
218: "\213", /* capital E, dieresis or umlaut mark */
219: "\215", /* capital I, acute accent */
220: "\216", /* capital I, circumflex accent these are */
221: "\214", /* capital I, grave accent ISO -100 hex */
222: "\217", /* capital I, dieresis or umlaut mark */
223: "\221", /* capital N, tilde */
224: "\223", /* capital O, acute accent */
225: "\224", /* capital O, circumflex accent */
226: "\222", /* capital O, grave accent */
227: "\351", /* capital O, slash 'cept this */
228: "\225", /* capital O, tilde */
229: "\226", /* capital O, dieresis or umlaut mark */
230: "\234", /* capital THORN, Icelandic */
231: "\230", /* capital U, acute accent */
232: "\231", /* capital U, circumflex accent */
233: "\227", /* capital U, grave accent */
234: "\232", /* capital U, dieresis or umlaut mark */
235: "\233", /* capital Y, acute accent */
236: "\326", /* small a, acute accent */
237: "\327", /* small a, circumflex accent */
238: "\361", /* small ae diphthong (ligature) */
239: "\325", /* small a, grave accent */
240: "\046", /* ampersand */
241: "\332", /* small a, ring */
242: "\330", /* small a, tilde */
243: "\331", /* small a, dieresis or umlaut mark */
244: "\333", /* small c, cedilla */
245: "\335", /* small e, acute accent */
246: "\336", /* small e, circumflex accent */
247: "\334", /* small e, grave accent */
248: "\346", /* small eth, Icelandic */
249: "\337", /* small e, dieresis or umlaut mark */
250: "\076", /* greater than */
251: "\342", /* small i, acute accent */
252: "\344", /* small i, circumflex accent */
253: "\340", /* small i, grave accent */
254: "\345", /* small i, dieresis or umlaut mark */
255: "\074", /* less than */
256: "\347", /* small n, tilde */
257: "\355", /* small o, acute accent */
258: "\356", /* small o, circumflex accent */
259: "\354", /* small o, grave accent */
260: "\371", /* small o, slash */
261: "\357", /* small o, tilde */
262: "\360", /* small o, dieresis or umlaut mark */
263: "\373", /* small sharp s, German (sz ligature) */
264: "\374", /* small thorn, Icelandic */
265: "\363", /* small u, acute accent */
266: "\364", /* small u, circumflex accent */
267: "\362", /* small u, grave accent */
268: "\366", /* small u, dieresis or umlaut mark */
269: "\367", /* small y, acute accent */
270: "\375", /* small y, dieresis or umlaut mark */
1.1 timbl 271: };
272:
1.2 timbl 273: /* Entity values -- for IBM/PC Code Page 850 (International)
274: **
275: ** This MUST match exactly the table referred to in the DTD!
276: **
277: */
278: /* @@@@@@@@@@@@@@@@@ TBD */
279:
280:
281:
282: /* Set character set
283: ** ----------------
284: */
285:
286: PRIVATE char** p_entity_values = ISO_Latin1; /* Pointer to translation */
1.1 timbl 287:
1.2 timbl 288: PUBLIC void HTMLUseCharacterSet ARGS1(HTMLCharacterSet, i)
289: {
290: p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
291: : ISO_Latin1;
292: }
1.1 timbl 293:
294:
295: /* Flattening the style structure
296: ** ------------------------------
297: **
298: On the NeXT, and on any read-only browser, it is simpler for the text to have
299: a sequence of styles, rather than a nested tree of styles. In this
300: case we have to flatten the structure as it arrives from SGML tags into
301: a sequence of styles.
302: */
303:
304: /* If style really needs to be set, call this
305: */
1.4 timbl 306: PRIVATE void actually_set_style ARGS1(HTStructured *, me)
1.1 timbl 307: {
1.4 timbl 308: if (!me->text) { /* First time through */
309: me->text = HText_new2(me->node_anchor, me->target);
310: HText_beginAppend(me->text);
311: HText_setStyle(me->text, me->new_style);
312: me->in_word = NO;
1.1 timbl 313: } else {
1.4 timbl 314: HText_setStyle(me->text, me->new_style);
1.1 timbl 315: }
1.4 timbl 316: me->old_style = me->new_style;
317: me->style_change = NO;
1.1 timbl 318: }
319:
320: /* If you THINK you need to change style, call this
321: */
322:
1.11 timbl 323: PRIVATE void change_paragraph_style ARGS2(HTStructured *, me, HTStyle *,style)
1.1 timbl 324: {
1.4 timbl 325: if (me->new_style!=style) {
326: me->style_change = YES;
327: me->new_style = style;
1.1 timbl 328: }
1.11 timbl 329: me->in_word = NO;
1.1 timbl 330: }
331:
1.2 timbl 332: /*_________________________________________________________________________
333: **
334: ** A C T I O N R O U T I N E S
335: */
336:
337: /* Character handling
338: ** ------------------
1.1 timbl 339: */
1.4 timbl 340: PRIVATE void HTML_put_character ARGS2(HTStructured *, me, char, c)
1.1 timbl 341: {
1.2 timbl 342:
1.4 timbl 343: switch (me->sp[0].tag_number) {
1.2 timbl 344: case HTML_COMMENT:
345: break; /* Do Nothing */
346:
347: case HTML_TITLE:
1.4 timbl 348: HTChunkPutc(&me->title, c);
1.2 timbl 349: break;
350:
351:
352: case HTML_LISTING: /* Litteral text */
353: case HTML_XMP:
354: case HTML_PLAINTEXT:
355: case HTML_PRE:
356: /* We guarrantee that the style is up-to-date in begin_litteral
357: */
1.4 timbl 358: HText_appendCharacter(me->text, c);
1.2 timbl 359: break;
360:
361: default: /* Free format text */
1.4 timbl 362: if (me->style_change) {
1.2 timbl 363: if ((c=='\n') || (c==' ')) return; /* Ignore it */
364: UPDATE_STYLE;
365: }
366: if (c=='\n') {
1.4 timbl 367: if (me->in_word) {
368: HText_appendCharacter(me->text, ' ');
369: me->in_word = NO;
1.2 timbl 370: }
371: } else {
1.4 timbl 372: HText_appendCharacter(me->text, c);
373: me->in_word = YES;
1.2 timbl 374: }
375: } /* end switch */
1.1 timbl 376: }
377:
1.2 timbl 378:
379:
380: /* String handling
381: ** ---------------
382: **
383: ** This is written separately from put_character becuase the loop can
1.11 timbl 384: ** in some cases be promoted to a higher function call level for speed.
1.2 timbl 385: */
1.4 timbl 386: PRIVATE void HTML_put_string ARGS2(HTStructured *, me, CONST char*, s)
1.1 timbl 387: {
1.2 timbl 388:
1.4 timbl 389: switch (me->sp[0].tag_number) {
1.2 timbl 390: case HTML_COMMENT:
391: break; /* Do Nothing */
392:
393: case HTML_TITLE:
1.4 timbl 394: HTChunkPuts(&me->title, s);
1.2 timbl 395: break;
396:
397:
398: case HTML_LISTING: /* Litteral text */
399: case HTML_XMP:
400: case HTML_PLAINTEXT:
401: case HTML_PRE:
402:
403: /* We guarrantee that the style is up-to-date in begin_litteral
404: */
1.4 timbl 405: HText_appendText(me->text, s);
1.2 timbl 406: break;
407:
408: default: /* Free format text */
409: {
410: CONST char *p = s;
1.4 timbl 411: if (me->style_change) {
1.2 timbl 412: for (; *p && ((*p=='\n') || (*p==' ')); p++) ; /* Ignore leaders */
413: if (!*p) return;
414: UPDATE_STYLE;
415: }
416: for(; *p; p++) {
1.4 timbl 417: if (me->style_change) {
1.2 timbl 418: if ((*p=='\n') || (*p==' ')) continue; /* Ignore it */
419: UPDATE_STYLE;
420: }
421: if (*p=='\n') {
1.4 timbl 422: if (me->in_word) {
423: HText_appendCharacter(me->text, ' ');
424: me->in_word = NO;
1.2 timbl 425: }
426: } else {
1.4 timbl 427: HText_appendCharacter(me->text, *p);
428: me->in_word = YES;
1.2 timbl 429: }
430: } /* for */
431: }
432: } /* end switch */
1.1 timbl 433: }
434:
435:
1.2 timbl 436: /* Buffer write
1.3 timbl 437: ** ------------
1.1 timbl 438: */
1.4 timbl 439: PRIVATE void HTML_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
1.1 timbl 440: {
1.2 timbl 441: CONST char* p;
442: CONST char* e = s+l;
1.4 timbl 443: for (p=s; s<e; p++) HTML_put_character(me, *p);
1.1 timbl 444: }
1.2 timbl 445:
446:
447: /* Start Element
448: ** -------------
449: */
450: PRIVATE void HTML_start_element ARGS4(
1.4 timbl 451: HTStructured *, me,
1.16 timbl 452: int, element_number,
1.3 timbl 453: CONST BOOL*, present,
1.16 timbl 454: CONST char **, value)
1.2 timbl 455: {
456: switch (element_number) {
457: case HTML_A:
458: {
1.8 timbl 459: HTChildAnchor * source;
1.9 timbl 460: char * href = NULL;
461: if (present[HTML_A_HREF]) {
462: StrAllocCopy(href, value[HTML_A_HREF]);
1.26 ! frystyk 463:
! 464: /* If the reference is simplified to empty string then we
! 465: must use the unsimplified version in order to recognize
! 466: the anchor, Henrik May 9, 1994 */
1.9 timbl 467: HTSimplify(href);
1.26 ! frystyk 468: if (!*href)
! 469: StrAllocCopy(href, value[HTML_A_HREF]);
1.9 timbl 470: }
1.8 timbl 471: source = HTAnchor_findChildAndLink(
1.4 timbl 472: me->node_anchor, /* parent */
1.2 timbl 473: present[HTML_A_NAME] ? value[HTML_A_NAME] : 0, /* Tag */
1.9 timbl 474: present[HTML_A_HREF] ? href : 0, /* Addresss */
1.16 timbl 475: present[HTML_A_REL] && value[HTML_A_REL] ?
476: (HTLinkType*)HTAtom_for(value[HTML_A_REL])
1.2 timbl 477: : 0);
478:
479: if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
480: HTParentAnchor * dest =
481: HTAnchor_parent(
482: HTAnchor_followMainLink((HTAnchor*)source)
483: );
484: if (!HTAnchor_title(dest))
485: HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
486: }
487: UPDATE_STYLE;
1.4 timbl 488: HText_beginAnchor(me->text, source);
1.18 frystyk 489: free(href); /* Leak fix Henrik 17/02-94 */
1.2 timbl 490: }
491: break;
492:
493: case HTML_TITLE:
1.4 timbl 494: HTChunkClear(&me->title);
1.2 timbl 495: break;
496:
497: case HTML_NEXTID:
498: /* if (present[NEXTID_N] && value[NEXTID_N])
1.4 timbl 499: HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2 timbl 500: break;
501:
502: case HTML_ISINDEX:
1.4 timbl 503: HTAnchor_setIndex(me->node_anchor);
1.2 timbl 504: break;
505:
1.15 timbl 506: case HTML_BR:
507: UPDATE_STYLE;
508: HText_appendCharacter(me->text, '\n');
509: me->in_word = NO;
510: break;
511:
512: case HTML_HR:
513: UPDATE_STYLE;
514: HText_appendCharacter(me->text, '\n');
1.16 timbl 515: HText_appendText(me->text, "___________________________________");
1.15 timbl 516: HText_appendCharacter(me->text, '\n');
517: me->in_word = NO;
518: break;
519:
1.2 timbl 520: case HTML_P:
521: UPDATE_STYLE;
1.4 timbl 522: HText_appendParagraph(me->text);
523: me->in_word = NO;
1.2 timbl 524: break;
525:
526: case HTML_DL:
1.11 timbl 527: change_paragraph_style(me, present && present[DL_COMPACT]
1.16 timbl 528: ? styles[HTML_DL]
1.2 timbl 529: : styles[HTML_DL]);
530: break;
531:
532: case HTML_DT:
1.4 timbl 533: if (!me->style_change) {
534: HText_appendParagraph(me->text);
535: me->in_word = NO;
1.2 timbl 536: }
537: break;
538:
539: case HTML_DD:
540: UPDATE_STYLE;
1.4 timbl 541: HTML_put_character(me, '\t'); /* Just tab out one stop */
542: me->in_word = NO;
543: break;
1.2 timbl 544:
545: case HTML_UL:
546: case HTML_OL:
547: case HTML_MENU:
548: case HTML_DIR:
1.11 timbl 549: change_paragraph_style(me, styles[element_number]);
1.2 timbl 550: break;
551:
552: case HTML_LI:
553: UPDATE_STYLE;
1.7 timbl 554: if (me->sp[0].tag_number != HTML_DIR)
1.4 timbl 555: HText_appendParagraph(me->text);
1.2 timbl 556: else
1.4 timbl 557: HText_appendCharacter(me->text, '\t'); /* Tab @@ nl for UL? */
558: me->in_word = NO;
1.2 timbl 559: break;
560:
561: case HTML_LISTING: /* Litteral text */
562: case HTML_XMP:
563: case HTML_PLAINTEXT:
564: case HTML_PRE:
1.11 timbl 565: change_paragraph_style(me, styles[element_number]);
1.2 timbl 566: UPDATE_STYLE;
1.4 timbl 567: if (me->comment_end)
568: HText_appendText(me->text, me->comment_end);
1.2 timbl 569: break;
1.11 timbl 570:
1.23 frystyk 571: case HTML_IMG: /* Images */
572: {
573: HTChildAnchor *source;
574: char *src = NULL;
575: if (present[HTML_IMG_SRC]) {
576: StrAllocCopy(src, value[HTML_IMG_SRC]);
577: HTSimplify(src);
578: }
579: source = HTAnchor_findChildAndLink(
580: me->node_anchor, /* parent */
581: 0, /* Tag */
582: src ? src : 0, /* Addresss */
583: 0);
584: UPDATE_STYLE;
585: HText_appendImage(me->text, source,
1.24 frystyk 586: present[HTML_IMG_ALT] ? value[HTML_IMG_ALT] : NULL,
587: present[HTML_IMG_ALIGN] ? value[HTML_IMG_ALIGN] : NULL,
588: present[HTML_IMG_ISMAP] ? YES : NO);
1.23 frystyk 589: free(src);
1.24 frystyk 590: }
591: break;
592:
593: case HTML_HTML: /* Ignore these altogether */
594: case HTML_HEAD:
595: case HTML_BODY:
596:
1.10 timbl 597: case HTML_TT: /* Physical character highlighting */
598: case HTML_B: /* Currently ignored */
599: case HTML_I:
600: case HTML_U:
601:
602: case HTML_EM: /* Logical character highlighting */
603: case HTML_STRONG: /* Currently ignored */
604: case HTML_CODE:
605: case HTML_SAMP:
606: case HTML_KBD:
607: case HTML_VAR:
608: case HTML_DFN:
609: case HTML_CITE:
610: break;
611:
1.11 timbl 612: case HTML_H1: /* paragraph styles */
613: case HTML_H2:
614: case HTML_H3:
615: case HTML_H4:
616: case HTML_H5:
617: case HTML_H6:
618: case HTML_H7:
619: case HTML_ADDRESS:
620: case HTML_BLOCKQUOTE:
621: change_paragraph_style(me, styles[element_number]); /* May be postponed */
1.2 timbl 622: break;
623:
624: } /* end switch */
625:
1.16 timbl 626: if (me->dtd->tags[element_number].contents!= SGML_EMPTY) {
1.13 timbl 627: if (me->sp == me->stack) {
1.12 timbl 628: fprintf(stderr, "HTML: ****** Maximum nesting of %d exceded!\n",
629: MAX_NESTING);
630: return;
631: }
1.4 timbl 632: --(me->sp);
633: me->sp[0].style = me->new_style; /* Stack new style */
634: me->sp[0].tag_number = element_number;
1.10 timbl 635: }
1.1 timbl 636: }
1.10 timbl 637:
1.2 timbl 638:
1.1 timbl 639: /* End Element
1.2 timbl 640: ** -----------
1.1 timbl 641: **
1.2 timbl 642: */
643: /* When we end an element, the style must be returned to that
1.1 timbl 644: ** in effect before that element. Note that anchors (etc?)
645: ** don't have an associated style, so that we must scan down the
646: ** stack for an element with a defined style. (In fact, the styles
647: ** should be linked to the whole stack not just the top one.)
648: ** TBL 921119
1.6 timbl 649: **
650: ** We don't turn on "CAREFUL" check because the parser produces
651: ** (internal code errors apart) good nesting. The parser checks
652: ** incoming code errors, not this module.
1.1 timbl 653: */
1.4 timbl 654: PRIVATE void HTML_end_element ARGS2(HTStructured *, me, int , element_number)
1.1 timbl 655: {
1.2 timbl 656: #ifdef CAREFUL /* parser assumed to produce good nesting */
1.4 timbl 657: if (element_number != me->sp[0].tag_number) {
1.2 timbl 658: fprintf(stderr, "HTMLText: end of element %s when expecting end of %s\n",
1.16 timbl 659: me->dtd->tags[element_number].name,
660: me->dtd->tags[me->sp->tag_number].name);
1.6 timbl 661: /* panic */
1.1 timbl 662: }
1.2 timbl 663: #endif
664:
1.4 timbl 665: me->sp++; /* Pop state off stack */
1.2 timbl 666:
667: switch(element_number) {
668:
669: case HTML_A:
670: UPDATE_STYLE;
1.4 timbl 671: HText_endAnchor(me->text);
1.2 timbl 672: break;
673:
674: case HTML_TITLE:
1.4 timbl 675: HTChunkTerminate(&me->title);
676: HTAnchor_setTitle(me->node_anchor, me->title.data);
1.2 timbl 677: break;
678:
679: case HTML_LISTING: /* Litteral text */
680: case HTML_XMP:
681: case HTML_PLAINTEXT:
682: case HTML_PRE:
1.4 timbl 683: if (me->comment_start)
684: HText_appendText(me->text, me->comment_start);
1.2 timbl 685: /* Fall through */
686:
687: default:
688:
1.11 timbl 689: change_paragraph_style(me, me->sp->style); /* Often won't really change */
1.2 timbl 690: break;
691:
692: } /* switch */
1.1 timbl 693: }
694:
1.2 timbl 695:
696: /* Expanding entities
697: ** ------------------
698: */
699: /* (In fact, they all shrink!)
1.1 timbl 700: */
1.2 timbl 701:
1.4 timbl 702: PRIVATE void HTML_put_entity ARGS2(HTStructured *, me, int, entity_number)
1.1 timbl 703: {
1.4 timbl 704: HTML_put_string(me, ISO_Latin1[entity_number]); /* @@ Other representations */
1.1 timbl 705: }
1.2 timbl 706:
707:
708: /* Free an HTML object
709: ** -------------------
710: **
1.4 timbl 711: ** If the document is empty, the text object will not yet exist.
712: So we could in fact abandon creating the document and return
713: an error code. In fact an empty document is an important type
714: of document, so we don't.
715: **
716: ** If non-interactive, everything is freed off. No: crashes -listrefs
1.2 timbl 717: ** Otherwise, the interactive object is left.
718: */
1.4 timbl 719: PUBLIC void HTML_free ARGS1(HTStructured *, me)
1.1 timbl 720: {
1.4 timbl 721: UPDATE_STYLE; /* Creates empty document here! */
722: if (me->comment_end)
723: HTML_put_string(me,me->comment_end);
724: HText_endAppend(me->text);
725:
726: if (me->target) {
727: (*me->targetClass.free)(me->target);
1.2 timbl 728: }
1.19 frystyk 729: HTChunkClear(&me->title); /* Henrik 18/02-94 */
1.4 timbl 730: free(me);
1.1 timbl 731: }
732:
733:
1.14 timbl 734: PRIVATE void HTML_abort ARGS2(HTStructured *, me, HTError, e)
1.1 timbl 735:
1.14 timbl 736: {
737: if (me->target) {
738: (*me->targetClass.abort)(me->target, e);
739: }
1.19 frystyk 740: HTChunkClear(&me->title); /* Henrik 18/02-94 */
1.14 timbl 741: free(me);
1.1 timbl 742: }
743:
1.2 timbl 744:
745: /* Get Styles from style sheet
746: ** ---------------------------
747: */
748: PRIVATE void get_styles NOARGS
1.1 timbl 749: {
1.2 timbl 750: got_styles = YES;
751:
752: default_style = HTStyleNamed(styleSheet, "Normal");
1.1 timbl 753:
1.2 timbl 754: styles[HTML_H1] = HTStyleNamed(styleSheet, "Heading1");
755: styles[HTML_H2] = HTStyleNamed(styleSheet, "Heading2");
756: styles[HTML_H3] = HTStyleNamed(styleSheet, "Heading3");
757: styles[HTML_H4] = HTStyleNamed(styleSheet, "Heading4");
758: styles[HTML_H5] = HTStyleNamed(styleSheet, "Heading5");
759: styles[HTML_H6] = HTStyleNamed(styleSheet, "Heading6");
760: styles[HTML_H7] = HTStyleNamed(styleSheet, "Heading7");
761:
762: styles[HTML_DL] = HTStyleNamed(styleSheet, "Glossary");
763: styles[HTML_UL] =
764: styles[HTML_OL] = HTStyleNamed(styleSheet, "List");
765: styles[HTML_MENU] = HTStyleNamed(styleSheet, "Menu");
766: styles[HTML_DIR] = HTStyleNamed(styleSheet, "Dir");
1.16 timbl 767: /* styles[HTML_DLC] = HTStyleNamed(styleSheet, "GlossaryCompact"); */
1.2 timbl 768: styles[HTML_ADDRESS]= HTStyleNamed(styleSheet, "Address");
769: styles[HTML_BLOCKQUOTE]= HTStyleNamed(styleSheet, "BlockQuote");
770: styles[HTML_PLAINTEXT] =
771: styles[HTML_XMP] = HTStyleNamed(styleSheet, "Example");
772: styles[HTML_PRE] = HTStyleNamed(styleSheet, "Preformatted");
773: styles[HTML_LISTING] = HTStyleNamed(styleSheet, "Listing");
774: }
775: /* P U B L I C
776: */
777:
778: /* Structured Object Class
779: ** -----------------------
780: */
781: PUBLIC CONST HTStructuredClass HTMLPresentation = /* As opposed to print etc */
782: {
783: "text/html",
784: HTML_free,
1.14 timbl 785: HTML_abort,
1.2 timbl 786: HTML_put_character, HTML_put_string, HTML_write,
787: HTML_start_element, HTML_end_element,
788: HTML_put_entity
789: };
1.1 timbl 790:
1.4 timbl 791:
1.2 timbl 792: /* New Structured Text object
793: ** --------------------------
794: **
1.16 timbl 795: ** The structured stream can generate either presentation,
1.4 timbl 796: ** or plain text, or HTML.
1.1 timbl 797: */
1.16 timbl 798: PUBLIC HTStructured* HTML_new ARGS5(
799: HTRequest *, request,
800: void *, param,
801: HTFormat, input_format,
802: HTFormat, output_format,
803: HTStream *, output_stream)
1.1 timbl 804: {
805:
1.4 timbl 806: HTStructured * me;
807:
1.16 timbl 808: if (output_format != WWW_PLAINTEXT
809: && output_format != WWW_PRESENT
810: && output_format != HTAtom_for("text/x-c")) {
1.21 luotonen 811: HTStream * intermediate = HTStreamStack(WWW_HTML, request, NO);
1.6 timbl 812: if (intermediate) return HTMLGenerator(intermediate);
1.4 timbl 813: fprintf(stderr, "** Internal error: can't parse HTML to %s\n",
1.16 timbl 814: HTAtom_name(output_format));
1.4 timbl 815: exit (-99);
816: }
817:
818: me = (HTStructured*) malloc(sizeof(*me));
819: if (me == NULL) outofmem(__FILE__, "HTML_new");
1.1 timbl 820:
821: if (!got_styles) get_styles();
822:
1.4 timbl 823: me->isa = &HTMLPresentation;
1.16 timbl 824: me->dtd = &DTD;
825: me->node_anchor = request->anchor;
1.4 timbl 826: me->title.size = 0;
827: me->title.growby = 128;
828: me->title.allocated = 0;
829: me->title.data = 0;
830: me->text = 0;
831: me->style_change = YES; /* Force check leading to text creation */
832: me->new_style = default_style;
833: me->old_style = 0;
834: me->sp = me->stack + MAX_NESTING - 1;
835: me->sp->tag_number = -1; /* INVALID */
836: me->sp->style = default_style; /* INVALID */
1.1 timbl 837:
1.4 timbl 838: me->comment_start = NULL;
839: me->comment_end = NULL;
1.16 timbl 840: me->target = output_stream;
841: if (output_stream) me->targetClass = *output_stream->isa; /* Copy pointers */
1.1 timbl 842:
1.4 timbl 843: return (HTStructured*) me;
1.1 timbl 844: }
845:
846:
1.2 timbl 847: /* HTConverter for HTML to plain text
848: ** ----------------------------------
1.1 timbl 849: **
1.2 timbl 850: ** This will convert from HTML to presentation or plain text.
1.1 timbl 851: */
1.16 timbl 852: PUBLIC HTStream* HTMLToPlain ARGS5(
853: HTRequest *, request,
854: void *, param,
855: HTFormat, input_format,
856: HTFormat, output_format,
857: HTStream *, output_stream)
1.1 timbl 858: {
1.16 timbl 859: return SGML_new(&DTD, HTML_new(
860: request, NULL, input_format, output_format, output_stream));
1.1 timbl 861: }
862:
863:
1.2 timbl 864: /* HTConverter for HTML to C code
865: ** ------------------------------
866: **
867: ** C copde is like plain text but all non-preformatted code
868: ** is commented out.
869: ** This will convert from HTML to presentation or plain text.
870: */
1.16 timbl 871: PUBLIC HTStream* HTMLToC ARGS5(
872: HTRequest *, request,
873: void *, param,
874: HTFormat, input_format,
875: HTFormat, output_format,
876: HTStream *, output_stream)
1.1 timbl 877: {
1.4 timbl 878:
879: HTStructured * html;
880:
1.16 timbl 881: (*output_stream->isa->put_string)(output_stream, "/* "); /* Before even title */
882: html = HTML_new(request, NULL, input_format, output_format, output_stream);
1.2 timbl 883: html->comment_start = "/* ";
1.16 timbl 884: html->dtd = &DTD;
1.2 timbl 885: html->comment_end = " */\n"; /* Must start in col 1 for cpp */
1.4 timbl 886: /* HTML_put_string(html,html->comment_start); */
1.16 timbl 887: return SGML_new(&DTD, html);
1.1 timbl 888: }
889:
890:
1.2 timbl 891: /* Presenter for HTML
892: ** ------------------
893: **
894: ** This will convert from HTML to presentation or plain text.
895: **
896: ** Override this if you have a windows version
1.1 timbl 897: */
1.2 timbl 898: #ifndef GUI
1.16 timbl 899: PUBLIC HTStream* HTMLPresent ARGS5(
900: HTRequest *, request,
901: void *, param,
902: HTFormat, input_format,
903: HTFormat, output_format,
904: HTStream *, output_stream)
1.1 timbl 905: {
1.16 timbl 906: return SGML_new(&DTD, HTML_new(
907: request, NULL, input_format, output_format, output_stream));
1.1 timbl 908: }
1.2 timbl 909: #endif
1.1 timbl 910:
911:
1.2 timbl 912: /* Record error message as a hypertext object
913: ** ------------------------------------------
914: **
915: ** The error message should be marked as an error so that
916: ** it can be reloaded later.
917: ** This implementation just throws up an error message
918: ** and leaves the document unloaded.
1.9 timbl 919: ** A smarter implementation would load an error document,
920: ** marking at such so that it is retried on reload.
1.1 timbl 921: **
1.2 timbl 922: ** On entry,
923: ** sink is a stream to the output device if any
924: ** number is the HTTP error number
925: ** message is the human readable message.
1.9 timbl 926: **
927: ** On exit,
928: ** returns a negative number to indicate lack of success in the load.
1.1 timbl 929: */
1.2 timbl 930:
931: PUBLIC int HTLoadError ARGS3(
1.17 luotonen 932: HTRequest *, req,
1.2 timbl 933: int, number,
934: CONST char *, message)
935: {
1.20 frystyk 936: char *err = "Oh I screwed up!"; /* Dummy pointer not used (I hope) */
1.2 timbl 937: HTAlert(message); /* @@@@@@@@@@@@@@@@@@@ */
1.20 frystyk 938: /* Clean up! Henrik 04/03-94 */
939: if (req && req->output_stream)
940: (*req->output_stream->isa->abort)(req->output_stream, err);
1.25 luotonen 941: HTClearErrors(req);
1.2 timbl 942: return -number;
943: }
944:
Webmaster