Annotation of libwww/Library/src/HTML.c, revision 1.39
1.39 ! frystyk 1: /* HTML.c
! 2: ** STRUCTURED STREAM TO RICH HYPERTEXT CONVERTER
! 3: **
! 4: ** (c) COPYRIGHT CERN 1994.
! 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
1.2 timbl 7: ** This generates of a hypertext object. It converts from the
8: ** structured stream interface fro HTMl events into the style-
9: ** oriented iunterface of the HText.h interface. This module is
10: ** only used in clients and shouldnot be linked into servers.
1.1 timbl 11: **
1.6 timbl 12: ** Override this module if making a new GUI browser.
1.1 timbl 13: **
1.35 duns 14: ** HISTORY:
15: ** 8 Jul 94 FM Insulate free() from _free structure element.
16: **
1.1 timbl 17: */
1.16 timbl 18:
1.1 timbl 19: #include "HTML.h"
20:
1.16 timbl 21: /* #define CAREFUL Check nesting here not really necessary */
1.2 timbl 22:
1.1 timbl 23: #include <ctype.h>
24: #include <stdio.h>
25:
26: #include "HTAtom.h"
27: #include "HTChunk.h"
28: #include "HText.h"
29: #include "HTStyle.h"
30:
1.3 timbl 31: #include "HTAlert.h"
1.4 timbl 32: #include "HTMLGen.h"
1.8 timbl 33: #include "HTParse.h"
1.1 timbl 34:
35: extern HTStyleSheet * styleSheet; /* Application-wide */
36:
37: /* Module-wide style cache
38: */
39: PRIVATE int got_styles = 0;
1.16 timbl 40: PRIVATE HTStyle *styles[HTMLP_ELEMENTS];
1.2 timbl 41: PRIVATE HTStyle *default_style;
1.1 timbl 42:
43:
44: /* HTML Object
45: ** -----------
46: */
1.2 timbl 47: #define MAX_NESTING 20 /* Should be checked by parser */
48:
49: typedef struct _stack_element {
50: HTStyle * style;
51: int tag_number;
52: } stack_element;
53:
54: struct _HTStructured {
55: CONST HTStructuredClass * isa;
56: HTParentAnchor * node_anchor;
57: HText * text;
58:
59: HTStream* target; /* Output stream */
60: HTStreamClass targetClass; /* Output routines */
61:
62: HTChunk title; /* Grow by 128 */
63:
64: char * comment_start; /* for literate programming */
65: char * comment_end;
1.16 timbl 66:
67: CONST SGML_dtd* dtd;
68:
1.2 timbl 69: HTTag * current_tag;
70: BOOL style_change;
71: HTStyle * new_style;
72: HTStyle * old_style;
73: BOOL in_word; /* Have just had a non-white char */
74: stack_element stack[MAX_NESTING];
75: stack_element *sp; /* Style stack pointer */
1.1 timbl 76: };
77:
1.2 timbl 78: struct _HTStream {
79: CONST HTStreamClass * isa;
80: /* .... */
81: };
1.1 timbl 82:
83: /* Forward declarations of routines
84: */
85: PRIVATE void get_styles NOPARAMS;
86:
87:
1.4 timbl 88: PRIVATE void actually_set_style PARAMS((HTStructured * me));
1.11 timbl 89: PRIVATE void change_paragraph_style PARAMS((HTStructured * me, HTStyle * style));
1.1 timbl 90:
91: /* Style buffering avoids dummy paragraph begin/ends.
92: */
1.4 timbl 93: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1 timbl 94:
95:
1.2 timbl 96: #ifdef OLD_CODE
1.1 timbl 97: /* The following accented characters are from peter Flynn, curia project */
98:
99: /* these ifdefs don't solve the problem of a simple terminal emulator
100: ** with a different character set to the client machine. But nothing does,
101: ** except looking at the TERM setting */
102:
1.2 timbl 103:
1.1 timbl 104: { "ocus" , "&" }, /* for CURIA */
105: #ifdef IBMPC
106: { "aacute" , "\240" }, /* For PC display */
107: { "eacute" , "\202" },
108: { "iacute" , "\241" },
109: { "oacute" , "\242" },
110: { "uacute" , "\243" },
111: { "Aacute" , "\101" },
112: { "Eacute" , "\220" },
113: { "Iacute" , "\111" },
114: { "Oacute" , "\117" },
115: { "Uacute" , "\125" },
116: #else
117: { "aacute" , "\341" }, /* Works for openwindows -- Peter Flynn */
118: { "eacute" , "\351" },
119: { "iacute" , "\355" },
120: { "oacute" , "\363" },
121: { "uacute" , "\372" },
122: { "Aacute" , "\301" },
123: { "Eacute" , "\310" },
124: { "Iacute" , "\315" },
125: { "Oacute" , "\323" },
126: { "Uacute" , "\332" },
127: #endif
128: { 0, 0 } /* Terminate list */
129: };
1.2 timbl 130: #endif
1.1 timbl 131:
132:
1.2 timbl 133: /* Entity values -- for ISO Latin 1 local representation
134: **
135: ** This MUST match exactly the table referred to in the DTD!
136: */
137: static char * ISO_Latin1[] = {
138: "\306", /* capital AE diphthong (ligature) */
139: "\301", /* capital A, acute accent */
140: "\302", /* capital A, circumflex accent */
141: "\300", /* capital A, grave accent */
142: "\305", /* capital A, ring */
143: "\303", /* capital A, tilde */
144: "\304", /* capital A, dieresis or umlaut mark */
145: "\307", /* capital C, cedilla */
146: "\320", /* capital Eth, Icelandic */
147: "\311", /* capital E, acute accent */
148: "\312", /* capital E, circumflex accent */
149: "\310", /* capital E, grave accent */
150: "\313", /* capital E, dieresis or umlaut mark */
151: "\315", /* capital I, acute accent */
152: "\316", /* capital I, circumflex accent */
153: "\314", /* capital I, grave accent */
154: "\317", /* capital I, dieresis or umlaut mark */
155: "\321", /* capital N, tilde */
156: "\323", /* capital O, acute accent */
157: "\324", /* capital O, circumflex accent */
158: "\322", /* capital O, grave accent */
159: "\330", /* capital O, slash */
160: "\325", /* capital O, tilde */
161: "\326", /* capital O, dieresis or umlaut mark */
162: "\336", /* capital THORN, Icelandic */
163: "\332", /* capital U, acute accent */
164: "\333", /* capital U, circumflex accent */
165: "\331", /* capital U, grave accent */
166: "\334", /* capital U, dieresis or umlaut mark */
167: "\335", /* capital Y, acute accent */
168: "\341", /* small a, acute accent */
169: "\342", /* small a, circumflex accent */
170: "\346", /* small ae diphthong (ligature) */
171: "\340", /* small a, grave accent */
172: "\046", /* ampersand */
173: "\345", /* small a, ring */
174: "\343", /* small a, tilde */
175: "\344", /* small a, dieresis or umlaut mark */
176: "\347", /* small c, cedilla */
177: "\351", /* small e, acute accent */
178: "\352", /* small e, circumflex accent */
179: "\350", /* small e, grave accent */
180: "\360", /* small eth, Icelandic */
181: "\353", /* small e, dieresis or umlaut mark */
182: "\076", /* greater than */
183: "\355", /* small i, acute accent */
184: "\356", /* small i, circumflex accent */
185: "\354", /* small i, grave accent */
186: "\357", /* small i, dieresis or umlaut mark */
187: "\074", /* less than */
188: "\361", /* small n, tilde */
189: "\363", /* small o, acute accent */
190: "\364", /* small o, circumflex accent */
191: "\362", /* small o, grave accent */
192: "\370", /* small o, slash */
193: "\365", /* small o, tilde */
194: "\366", /* small o, dieresis or umlaut mark */
1.36 frystyk 195: "\042", /* double quote sign - June 94 */
1.2 timbl 196: "\337", /* small sharp s, German (sz ligature) */
197: "\376", /* small thorn, Icelandic */
198: "\372", /* small u, acute accent */
199: "\373", /* small u, circumflex accent */
200: "\371", /* small u, grave accent */
201: "\374", /* small u, dieresis or umlaut mark */
202: "\375", /* small y, acute accent */
203: "\377", /* small y, dieresis or umlaut mark */
1.1 timbl 204: };
205:
1.2 timbl 206:
207: /* Entity values -- for NeXT local representation
208: **
209: ** This MUST match exactly the table referred to in the DTD!
210: **
211: */
212: static char * NeXTCharacters[] = {
213: "\341", /* capital AE diphthong (ligature) */
214: "\202", /* capital A, acute accent */
215: "\203", /* capital A, circumflex accent */
216: "\201", /* capital A, grave accent */
217: "\206", /* capital A, ring */
218: "\204", /* capital A, tilde */
219: "\205", /* capital A, dieresis or umlaut mark */
220: "\207", /* capital C, cedilla */
221: "\220", /* capital Eth, Icelandic */
222: "\211", /* capital E, acute accent */
223: "\212", /* capital E, circumflex accent */
224: "\210", /* capital E, grave accent */
225: "\213", /* capital E, dieresis or umlaut mark */
226: "\215", /* capital I, acute accent */
227: "\216", /* capital I, circumflex accent these are */
228: "\214", /* capital I, grave accent ISO -100 hex */
229: "\217", /* capital I, dieresis or umlaut mark */
230: "\221", /* capital N, tilde */
231: "\223", /* capital O, acute accent */
232: "\224", /* capital O, circumflex accent */
233: "\222", /* capital O, grave accent */
234: "\351", /* capital O, slash 'cept this */
235: "\225", /* capital O, tilde */
236: "\226", /* capital O, dieresis or umlaut mark */
237: "\234", /* capital THORN, Icelandic */
238: "\230", /* capital U, acute accent */
239: "\231", /* capital U, circumflex accent */
240: "\227", /* capital U, grave accent */
241: "\232", /* capital U, dieresis or umlaut mark */
242: "\233", /* capital Y, acute accent */
243: "\326", /* small a, acute accent */
244: "\327", /* small a, circumflex accent */
245: "\361", /* small ae diphthong (ligature) */
246: "\325", /* small a, grave accent */
247: "\046", /* ampersand */
248: "\332", /* small a, ring */
249: "\330", /* small a, tilde */
250: "\331", /* small a, dieresis or umlaut mark */
251: "\333", /* small c, cedilla */
252: "\335", /* small e, acute accent */
253: "\336", /* small e, circumflex accent */
254: "\334", /* small e, grave accent */
255: "\346", /* small eth, Icelandic */
256: "\337", /* small e, dieresis or umlaut mark */
257: "\076", /* greater than */
258: "\342", /* small i, acute accent */
259: "\344", /* small i, circumflex accent */
260: "\340", /* small i, grave accent */
261: "\345", /* small i, dieresis or umlaut mark */
262: "\074", /* less than */
263: "\347", /* small n, tilde */
264: "\355", /* small o, acute accent */
265: "\356", /* small o, circumflex accent */
266: "\354", /* small o, grave accent */
267: "\371", /* small o, slash */
268: "\357", /* small o, tilde */
269: "\360", /* small o, dieresis or umlaut mark */
1.36 frystyk 270: "\042", /* double quote sign - June 94 */
1.2 timbl 271: "\373", /* small sharp s, German (sz ligature) */
272: "\374", /* small thorn, Icelandic */
273: "\363", /* small u, acute accent */
274: "\364", /* small u, circumflex accent */
275: "\362", /* small u, grave accent */
276: "\366", /* small u, dieresis or umlaut mark */
277: "\367", /* small y, acute accent */
278: "\375", /* small y, dieresis or umlaut mark */
1.1 timbl 279: };
280:
1.2 timbl 281: /* Entity values -- for IBM/PC Code Page 850 (International)
282: **
283: ** This MUST match exactly the table referred to in the DTD!
284: **
285: */
286: /* @@@@@@@@@@@@@@@@@ TBD */
287:
288:
289:
290: /* Set character set
291: ** ----------------
292: */
293:
294: PRIVATE char** p_entity_values = ISO_Latin1; /* Pointer to translation */
1.1 timbl 295:
1.2 timbl 296: PUBLIC void HTMLUseCharacterSet ARGS1(HTMLCharacterSet, i)
297: {
298: p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
299: : ISO_Latin1;
300: }
1.1 timbl 301:
302:
303: /* Flattening the style structure
304: ** ------------------------------
305: **
306: On the NeXT, and on any read-only browser, it is simpler for the text to have
307: a sequence of styles, rather than a nested tree of styles. In this
308: case we have to flatten the structure as it arrives from SGML tags into
309: a sequence of styles.
310: */
311:
312: /* If style really needs to be set, call this
313: */
1.4 timbl 314: PRIVATE void actually_set_style ARGS1(HTStructured *, me)
1.1 timbl 315: {
1.4 timbl 316: if (!me->text) { /* First time through */
317: me->text = HText_new2(me->node_anchor, me->target);
318: HText_beginAppend(me->text);
319: HText_setStyle(me->text, me->new_style);
320: me->in_word = NO;
1.1 timbl 321: } else {
1.4 timbl 322: HText_setStyle(me->text, me->new_style);
1.1 timbl 323: }
1.4 timbl 324: me->old_style = me->new_style;
325: me->style_change = NO;
1.1 timbl 326: }
327:
328: /* If you THINK you need to change style, call this
329: */
330:
1.11 timbl 331: PRIVATE void change_paragraph_style ARGS2(HTStructured *, me, HTStyle *,style)
1.1 timbl 332: {
1.4 timbl 333: if (me->new_style!=style) {
334: me->style_change = YES;
335: me->new_style = style;
1.1 timbl 336: }
1.11 timbl 337: me->in_word = NO;
1.1 timbl 338: }
339:
1.2 timbl 340: /*_________________________________________________________________________
341: **
342: ** A C T I O N R O U T I N E S
343: */
344:
345: /* Character handling
346: ** ------------------
1.1 timbl 347: */
1.4 timbl 348: PRIVATE void HTML_put_character ARGS2(HTStructured *, me, char, c)
1.1 timbl 349: {
1.2 timbl 350:
1.4 timbl 351: switch (me->sp[0].tag_number) {
1.2 timbl 352: case HTML_COMMENT:
353: break; /* Do Nothing */
354:
355: case HTML_TITLE:
1.4 timbl 356: HTChunkPutc(&me->title, c);
1.2 timbl 357: break;
358:
359:
360: case HTML_LISTING: /* Litteral text */
361: case HTML_XMP:
362: case HTML_PLAINTEXT:
363: case HTML_PRE:
364: /* We guarrantee that the style is up-to-date in begin_litteral
365: */
1.4 timbl 366: HText_appendCharacter(me->text, c);
1.2 timbl 367: break;
368:
369: default: /* Free format text */
1.4 timbl 370: if (me->style_change) {
1.2 timbl 371: if ((c=='\n') || (c==' ')) return; /* Ignore it */
372: UPDATE_STYLE;
373: }
374: if (c=='\n') {
1.4 timbl 375: if (me->in_word) {
376: HText_appendCharacter(me->text, ' ');
377: me->in_word = NO;
1.2 timbl 378: }
379: } else {
1.4 timbl 380: HText_appendCharacter(me->text, c);
381: me->in_word = YES;
1.2 timbl 382: }
383: } /* end switch */
1.1 timbl 384: }
385:
1.2 timbl 386:
387:
388: /* String handling
389: ** ---------------
390: **
391: ** This is written separately from put_character becuase the loop can
1.11 timbl 392: ** in some cases be promoted to a higher function call level for speed.
1.2 timbl 393: */
1.4 timbl 394: PRIVATE void HTML_put_string ARGS2(HTStructured *, me, CONST char*, s)
1.1 timbl 395: {
1.2 timbl 396:
1.4 timbl 397: switch (me->sp[0].tag_number) {
1.2 timbl 398: case HTML_COMMENT:
399: break; /* Do Nothing */
400:
401: case HTML_TITLE:
1.4 timbl 402: HTChunkPuts(&me->title, s);
1.2 timbl 403: break;
404:
405:
406: case HTML_LISTING: /* Litteral text */
407: case HTML_XMP:
408: case HTML_PLAINTEXT:
409: case HTML_PRE:
410:
411: /* We guarrantee that the style is up-to-date in begin_litteral
412: */
1.4 timbl 413: HText_appendText(me->text, s);
1.2 timbl 414: break;
415:
416: default: /* Free format text */
417: {
418: CONST char *p = s;
1.4 timbl 419: if (me->style_change) {
1.2 timbl 420: for (; *p && ((*p=='\n') || (*p==' ')); p++) ; /* Ignore leaders */
421: if (!*p) return;
422: UPDATE_STYLE;
423: }
424: for(; *p; p++) {
1.4 timbl 425: if (me->style_change) {
1.2 timbl 426: if ((*p=='\n') || (*p==' ')) continue; /* Ignore it */
427: UPDATE_STYLE;
428: }
429: if (*p=='\n') {
1.4 timbl 430: if (me->in_word) {
431: HText_appendCharacter(me->text, ' ');
432: me->in_word = NO;
1.2 timbl 433: }
434: } else {
1.4 timbl 435: HText_appendCharacter(me->text, *p);
436: me->in_word = YES;
1.2 timbl 437: }
438: } /* for */
439: }
440: } /* end switch */
1.1 timbl 441: }
442:
443:
1.2 timbl 444: /* Buffer write
1.3 timbl 445: ** ------------
1.1 timbl 446: */
1.4 timbl 447: PRIVATE void HTML_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
1.1 timbl 448: {
1.38 frystyk 449: while (l-- > 0)
450: HTML_put_character(me, *s++);
1.1 timbl 451: }
1.2 timbl 452:
453:
454: /* Start Element
455: ** -------------
456: */
457: PRIVATE void HTML_start_element ARGS4(
1.4 timbl 458: HTStructured *, me,
1.16 timbl 459: int, element_number,
1.3 timbl 460: CONST BOOL*, present,
1.16 timbl 461: CONST char **, value)
1.2 timbl 462: {
463: switch (element_number) {
464: case HTML_A:
465: {
1.8 timbl 466: HTChildAnchor * source;
1.9 timbl 467: char * href = NULL;
468: if (present[HTML_A_HREF]) {
469: StrAllocCopy(href, value[HTML_A_HREF]);
1.36 frystyk 470: #ifdef OLD_CODE
1.9 timbl 471: HTSimplify(href);
1.36 frystyk 472: #endif
1.9 timbl 473: }
1.8 timbl 474: source = HTAnchor_findChildAndLink(
1.4 timbl 475: me->node_anchor, /* parent */
1.2 timbl 476: present[HTML_A_NAME] ? value[HTML_A_NAME] : 0, /* Tag */
1.9 timbl 477: present[HTML_A_HREF] ? href : 0, /* Addresss */
1.16 timbl 478: present[HTML_A_REL] && value[HTML_A_REL] ?
479: (HTLinkType*)HTAtom_for(value[HTML_A_REL])
1.2 timbl 480: : 0);
481:
482: if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
483: HTParentAnchor * dest =
484: HTAnchor_parent(
485: HTAnchor_followMainLink((HTAnchor*)source)
486: );
487: if (!HTAnchor_title(dest))
488: HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
489: }
490: UPDATE_STYLE;
1.4 timbl 491: HText_beginAnchor(me->text, source);
1.18 frystyk 492: free(href); /* Leak fix Henrik 17/02-94 */
1.2 timbl 493: }
494: break;
495:
496: case HTML_TITLE:
1.4 timbl 497: HTChunkClear(&me->title);
1.2 timbl 498: break;
499:
500: case HTML_NEXTID:
501: /* if (present[NEXTID_N] && value[NEXTID_N])
1.4 timbl 502: HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2 timbl 503: break;
504:
505: case HTML_ISINDEX:
1.4 timbl 506: HTAnchor_setIndex(me->node_anchor);
1.2 timbl 507: break;
508:
1.15 timbl 509: case HTML_BR:
510: UPDATE_STYLE;
511: HText_appendCharacter(me->text, '\n');
512: me->in_word = NO;
513: break;
514:
515: case HTML_HR:
516: UPDATE_STYLE;
517: HText_appendCharacter(me->text, '\n');
1.16 timbl 518: HText_appendText(me->text, "___________________________________");
1.15 timbl 519: HText_appendCharacter(me->text, '\n');
520: me->in_word = NO;
521: break;
522:
1.2 timbl 523: case HTML_P:
524: UPDATE_STYLE;
1.4 timbl 525: HText_appendParagraph(me->text);
526: me->in_word = NO;
1.2 timbl 527: break;
528:
529: case HTML_DL:
1.11 timbl 530: change_paragraph_style(me, present && present[DL_COMPACT]
1.16 timbl 531: ? styles[HTML_DL]
1.2 timbl 532: : styles[HTML_DL]);
533: break;
534:
535: case HTML_DT:
1.4 timbl 536: if (!me->style_change) {
537: HText_appendParagraph(me->text);
538: me->in_word = NO;
1.2 timbl 539: }
540: break;
541:
542: case HTML_DD:
543: UPDATE_STYLE;
1.4 timbl 544: HTML_put_character(me, '\t'); /* Just tab out one stop */
545: me->in_word = NO;
546: break;
1.2 timbl 547:
548: case HTML_UL:
549: case HTML_OL:
550: case HTML_MENU:
551: case HTML_DIR:
1.11 timbl 552: change_paragraph_style(me, styles[element_number]);
1.2 timbl 553: break;
554:
555: case HTML_LI:
556: UPDATE_STYLE;
1.7 timbl 557: if (me->sp[0].tag_number != HTML_DIR)
1.4 timbl 558: HText_appendParagraph(me->text);
1.2 timbl 559: else
1.4 timbl 560: HText_appendCharacter(me->text, '\t'); /* Tab @@ nl for UL? */
561: me->in_word = NO;
1.2 timbl 562: break;
563:
564: case HTML_LISTING: /* Litteral text */
565: case HTML_XMP:
566: case HTML_PLAINTEXT:
567: case HTML_PRE:
1.11 timbl 568: change_paragraph_style(me, styles[element_number]);
1.2 timbl 569: UPDATE_STYLE;
1.4 timbl 570: if (me->comment_end)
571: HText_appendText(me->text, me->comment_end);
1.2 timbl 572: break;
1.11 timbl 573:
1.23 frystyk 574: case HTML_IMG: /* Images */
575: {
576: HTChildAnchor *source;
577: char *src = NULL;
578: if (present[HTML_IMG_SRC]) {
579: StrAllocCopy(src, value[HTML_IMG_SRC]);
1.36 frystyk 580: #ifdef OLD_CODE
1.23 frystyk 581: HTSimplify(src);
1.36 frystyk 582: #endif
1.23 frystyk 583: }
584: source = HTAnchor_findChildAndLink(
585: me->node_anchor, /* parent */
586: 0, /* Tag */
587: src ? src : 0, /* Addresss */
588: 0);
589: UPDATE_STYLE;
590: HText_appendImage(me->text, source,
1.24 frystyk 591: present[HTML_IMG_ALT] ? value[HTML_IMG_ALT] : NULL,
592: present[HTML_IMG_ALIGN] ? value[HTML_IMG_ALIGN] : NULL,
593: present[HTML_IMG_ISMAP] ? YES : NO);
1.23 frystyk 594: free(src);
1.24 frystyk 595: }
596: break;
597:
598: case HTML_HTML: /* Ignore these altogether */
599: case HTML_HEAD:
600: case HTML_BODY:
601:
1.10 timbl 602: case HTML_TT: /* Physical character highlighting */
603: case HTML_B: /* Currently ignored */
604: case HTML_I:
605: case HTML_U:
606:
607: case HTML_EM: /* Logical character highlighting */
608: case HTML_STRONG: /* Currently ignored */
609: case HTML_CODE:
610: case HTML_SAMP:
611: case HTML_KBD:
612: case HTML_VAR:
613: case HTML_DFN:
614: case HTML_CITE:
615: break;
616:
1.11 timbl 617: case HTML_H1: /* paragraph styles */
618: case HTML_H2:
619: case HTML_H3:
620: case HTML_H4:
621: case HTML_H5:
622: case HTML_H6:
623: case HTML_H7:
624: case HTML_ADDRESS:
625: case HTML_BLOCKQUOTE:
626: change_paragraph_style(me, styles[element_number]); /* May be postponed */
1.2 timbl 627: break;
628:
629: } /* end switch */
630:
1.16 timbl 631: if (me->dtd->tags[element_number].contents!= SGML_EMPTY) {
1.13 timbl 632: if (me->sp == me->stack) {
1.12 timbl 633: fprintf(stderr, "HTML: ****** Maximum nesting of %d exceded!\n",
634: MAX_NESTING);
635: return;
636: }
1.4 timbl 637: --(me->sp);
638: me->sp[0].style = me->new_style; /* Stack new style */
639: me->sp[0].tag_number = element_number;
1.10 timbl 640: }
1.1 timbl 641: }
1.10 timbl 642:
1.2 timbl 643:
1.1 timbl 644: /* End Element
1.2 timbl 645: ** -----------
1.1 timbl 646: **
1.2 timbl 647: */
648: /* When we end an element, the style must be returned to that
1.1 timbl 649: ** in effect before that element. Note that anchors (etc?)
650: ** don't have an associated style, so that we must scan down the
651: ** stack for an element with a defined style. (In fact, the styles
652: ** should be linked to the whole stack not just the top one.)
653: ** TBL 921119
1.6 timbl 654: **
655: ** We don't turn on "CAREFUL" check because the parser produces
656: ** (internal code errors apart) good nesting. The parser checks
657: ** incoming code errors, not this module.
1.1 timbl 658: */
1.4 timbl 659: PRIVATE void HTML_end_element ARGS2(HTStructured *, me, int , element_number)
1.1 timbl 660: {
1.2 timbl 661: #ifdef CAREFUL /* parser assumed to produce good nesting */
1.4 timbl 662: if (element_number != me->sp[0].tag_number) {
1.2 timbl 663: fprintf(stderr, "HTMLText: end of element %s when expecting end of %s\n",
1.16 timbl 664: me->dtd->tags[element_number].name,
665: me->dtd->tags[me->sp->tag_number].name);
1.6 timbl 666: /* panic */
1.1 timbl 667: }
1.2 timbl 668: #endif
669:
1.4 timbl 670: me->sp++; /* Pop state off stack */
1.2 timbl 671:
672: switch(element_number) {
673:
674: case HTML_A:
675: UPDATE_STYLE;
1.4 timbl 676: HText_endAnchor(me->text);
1.2 timbl 677: break;
678:
679: case HTML_TITLE:
1.4 timbl 680: HTChunkTerminate(&me->title);
681: HTAnchor_setTitle(me->node_anchor, me->title.data);
1.2 timbl 682: break;
683:
684: case HTML_LISTING: /* Litteral text */
685: case HTML_XMP:
686: case HTML_PLAINTEXT:
687: case HTML_PRE:
1.4 timbl 688: if (me->comment_start)
689: HText_appendText(me->text, me->comment_start);
1.2 timbl 690: /* Fall through */
691:
692: default:
693:
1.11 timbl 694: change_paragraph_style(me, me->sp->style); /* Often won't really change */
1.2 timbl 695: break;
696:
697: } /* switch */
1.1 timbl 698: }
699:
1.2 timbl 700:
701: /* Expanding entities
702: ** ------------------
703: */
704: /* (In fact, they all shrink!)
1.1 timbl 705: */
1.2 timbl 706:
1.4 timbl 707: PRIVATE void HTML_put_entity ARGS2(HTStructured *, me, int, entity_number)
1.1 timbl 708: {
1.4 timbl 709: HTML_put_string(me, ISO_Latin1[entity_number]); /* @@ Other representations */
1.1 timbl 710: }
1.2 timbl 711:
712:
713: /* Free an HTML object
714: ** -------------------
715: **
1.4 timbl 716: ** If the document is empty, the text object will not yet exist.
717: So we could in fact abandon creating the document and return
718: an error code. In fact an empty document is an important type
719: of document, so we don't.
720: **
721: ** If non-interactive, everything is freed off. No: crashes -listrefs
1.2 timbl 722: ** Otherwise, the interactive object is left.
723: */
1.37 frystyk 724: PUBLIC int HTML_free ARGS1(HTStructured *, me)
1.1 timbl 725: {
1.4 timbl 726: UPDATE_STYLE; /* Creates empty document here! */
727: if (me->comment_end)
728: HTML_put_string(me,me->comment_end);
729: HText_endAppend(me->text);
730:
731: if (me->target) {
1.35 duns 732: (*me->targetClass._free)(me->target);
1.2 timbl 733: }
1.19 frystyk 734: HTChunkClear(&me->title); /* Henrik 18/02-94 */
1.4 timbl 735: free(me);
1.37 frystyk 736: return 0;
1.1 timbl 737: }
738:
739:
1.37 frystyk 740: PRIVATE int HTML_abort ARGS2(HTStructured *, me, HTError, e)
1.1 timbl 741:
1.14 timbl 742: {
743: if (me->target) {
744: (*me->targetClass.abort)(me->target, e);
745: }
1.19 frystyk 746: HTChunkClear(&me->title); /* Henrik 18/02-94 */
1.14 timbl 747: free(me);
1.37 frystyk 748: return EOF;
1.1 timbl 749: }
750:
1.2 timbl 751:
752: /* Get Styles from style sheet
753: ** ---------------------------
754: */
755: PRIVATE void get_styles NOARGS
1.1 timbl 756: {
1.2 timbl 757: got_styles = YES;
758:
759: default_style = HTStyleNamed(styleSheet, "Normal");
1.1 timbl 760:
1.2 timbl 761: styles[HTML_H1] = HTStyleNamed(styleSheet, "Heading1");
762: styles[HTML_H2] = HTStyleNamed(styleSheet, "Heading2");
763: styles[HTML_H3] = HTStyleNamed(styleSheet, "Heading3");
764: styles[HTML_H4] = HTStyleNamed(styleSheet, "Heading4");
765: styles[HTML_H5] = HTStyleNamed(styleSheet, "Heading5");
766: styles[HTML_H6] = HTStyleNamed(styleSheet, "Heading6");
767: styles[HTML_H7] = HTStyleNamed(styleSheet, "Heading7");
768:
769: styles[HTML_DL] = HTStyleNamed(styleSheet, "Glossary");
770: styles[HTML_UL] =
771: styles[HTML_OL] = HTStyleNamed(styleSheet, "List");
772: styles[HTML_MENU] = HTStyleNamed(styleSheet, "Menu");
773: styles[HTML_DIR] = HTStyleNamed(styleSheet, "Dir");
1.16 timbl 774: /* styles[HTML_DLC] = HTStyleNamed(styleSheet, "GlossaryCompact"); */
1.2 timbl 775: styles[HTML_ADDRESS]= HTStyleNamed(styleSheet, "Address");
776: styles[HTML_BLOCKQUOTE]= HTStyleNamed(styleSheet, "BlockQuote");
777: styles[HTML_PLAINTEXT] =
778: styles[HTML_XMP] = HTStyleNamed(styleSheet, "Example");
779: styles[HTML_PRE] = HTStyleNamed(styleSheet, "Preformatted");
780: styles[HTML_LISTING] = HTStyleNamed(styleSheet, "Listing");
781: }
782: /* P U B L I C
783: */
784:
785: /* Structured Object Class
786: ** -----------------------
787: */
788: PUBLIC CONST HTStructuredClass HTMLPresentation = /* As opposed to print etc */
789: {
790: "text/html",
791: HTML_free,
1.14 timbl 792: HTML_abort,
1.2 timbl 793: HTML_put_character, HTML_put_string, HTML_write,
794: HTML_start_element, HTML_end_element,
795: HTML_put_entity
796: };
1.1 timbl 797:
1.4 timbl 798:
1.2 timbl 799: /* New Structured Text object
800: ** --------------------------
801: **
1.16 timbl 802: ** The structured stream can generate either presentation,
1.4 timbl 803: ** or plain text, or HTML.
1.1 timbl 804: */
1.16 timbl 805: PUBLIC HTStructured* HTML_new ARGS5(
806: HTRequest *, request,
807: void *, param,
808: HTFormat, input_format,
809: HTFormat, output_format,
810: HTStream *, output_stream)
1.1 timbl 811: {
812:
1.4 timbl 813: HTStructured * me;
814:
1.16 timbl 815: if (output_format != WWW_PLAINTEXT
816: && output_format != WWW_PRESENT
817: && output_format != HTAtom_for("text/x-c")) {
1.37 frystyk 818: HTStream * intermediate = HTStreamStack(WWW_HTML, output_format,
819: output_stream, request, NO);
1.6 timbl 820: if (intermediate) return HTMLGenerator(intermediate);
1.4 timbl 821: fprintf(stderr, "** Internal error: can't parse HTML to %s\n",
1.16 timbl 822: HTAtom_name(output_format));
1.4 timbl 823: exit (-99);
824: }
825:
826: me = (HTStructured*) malloc(sizeof(*me));
827: if (me == NULL) outofmem(__FILE__, "HTML_new");
1.1 timbl 828:
829: if (!got_styles) get_styles();
830:
1.4 timbl 831: me->isa = &HTMLPresentation;
1.16 timbl 832: me->dtd = &DTD;
833: me->node_anchor = request->anchor;
1.4 timbl 834: me->title.size = 0;
835: me->title.growby = 128;
836: me->title.allocated = 0;
837: me->title.data = 0;
838: me->text = 0;
839: me->style_change = YES; /* Force check leading to text creation */
840: me->new_style = default_style;
841: me->old_style = 0;
842: me->sp = me->stack + MAX_NESTING - 1;
843: me->sp->tag_number = -1; /* INVALID */
844: me->sp->style = default_style; /* INVALID */
1.1 timbl 845:
1.4 timbl 846: me->comment_start = NULL;
847: me->comment_end = NULL;
1.16 timbl 848: me->target = output_stream;
849: if (output_stream) me->targetClass = *output_stream->isa; /* Copy pointers */
1.1 timbl 850:
1.4 timbl 851: return (HTStructured*) me;
1.1 timbl 852: }
853:
854:
1.2 timbl 855: /* HTConverter for HTML to plain text
856: ** ----------------------------------
1.1 timbl 857: **
1.2 timbl 858: ** This will convert from HTML to presentation or plain text.
1.1 timbl 859: */
1.16 timbl 860: PUBLIC HTStream* HTMLToPlain ARGS5(
861: HTRequest *, request,
862: void *, param,
863: HTFormat, input_format,
864: HTFormat, output_format,
865: HTStream *, output_stream)
1.1 timbl 866: {
1.16 timbl 867: return SGML_new(&DTD, HTML_new(
868: request, NULL, input_format, output_format, output_stream));
1.1 timbl 869: }
870:
871:
1.2 timbl 872: /* HTConverter for HTML to C code
873: ** ------------------------------
874: **
1.36 frystyk 875: ** C code is like plain text but all non-preformatted code
1.2 timbl 876: ** is commented out.
877: ** This will convert from HTML to presentation or plain text.
878: */
1.16 timbl 879: PUBLIC HTStream* HTMLToC ARGS5(
880: HTRequest *, request,
881: void *, param,
882: HTFormat, input_format,
883: HTFormat, output_format,
884: HTStream *, output_stream)
1.1 timbl 885: {
1.4 timbl 886:
887: HTStructured * html;
888:
1.36 frystyk 889: (*output_stream->isa->put_string)(output_stream, "/* "); /* Before title */
1.16 timbl 890: html = HTML_new(request, NULL, input_format, output_format, output_stream);
1.2 timbl 891: html->comment_start = "/* ";
1.16 timbl 892: html->dtd = &DTD;
1.2 timbl 893: html->comment_end = " */\n"; /* Must start in col 1 for cpp */
1.16 timbl 894: return SGML_new(&DTD, html);
1.1 timbl 895: }
896:
897:
1.2 timbl 898: /* Presenter for HTML
899: ** ------------------
900: **
901: ** This will convert from HTML to presentation or plain text.
902: **
903: ** Override this if you have a windows version
1.1 timbl 904: */
1.2 timbl 905: #ifndef GUI
1.16 timbl 906: PUBLIC HTStream* HTMLPresent ARGS5(
907: HTRequest *, request,
908: void *, param,
909: HTFormat, input_format,
910: HTFormat, output_format,
911: HTStream *, output_stream)
1.1 timbl 912: {
1.16 timbl 913: return SGML_new(&DTD, HTML_new(
914: request, NULL, input_format, output_format, output_stream));
1.1 timbl 915: }
1.2 timbl 916: #endif
1.1 timbl 917:
918:
1.2 timbl 919: /* Record error message as a hypertext object
920: ** ------------------------------------------
921: **
922: ** The error message should be marked as an error so that
923: ** it can be reloaded later.
924: ** This implementation just throws up an error message
925: ** and leaves the document unloaded.
1.9 timbl 926: ** A smarter implementation would load an error document,
927: ** marking at such so that it is retried on reload.
1.1 timbl 928: **
1.2 timbl 929: ** On entry,
930: ** sink is a stream to the output device if any
931: ** number is the HTTP error number
932: ** message is the human readable message.
1.9 timbl 933: **
934: ** On exit,
935: ** returns a negative number to indicate lack of success in the load.
1.1 timbl 936: */
1.2 timbl 937:
938: PUBLIC int HTLoadError ARGS3(
1.17 luotonen 939: HTRequest *, req,
1.2 timbl 940: int, number,
941: CONST char *, message)
942: {
1.20 frystyk 943: char *err = "Oh I screwed up!"; /* Dummy pointer not used (I hope) */
1.2 timbl 944: HTAlert(message); /* @@@@@@@@@@@@@@@@@@@ */
1.20 frystyk 945: /* Clean up! Henrik 04/03-94 */
946: if (req && req->output_stream)
947: (*req->output_stream->isa->abort)(req->output_stream, err);
1.33 frystyk 948: #if OLD_CODE
1.25 luotonen 949: HTClearErrors(req);
1.33 frystyk 950: #endif
1.2 timbl 951: return -number;
952: }
1.29 frystyk 953:
Webmaster