Annotation of libwww/Library/src/HTML.c, revision 1.28
1.2 timbl 1: /* Structured stream to Rich hypertext converter
2: ** ============================================
1.1 timbl 3: **
1.2 timbl 4: ** This generates of a hypertext object. It converts from the
5: ** structured stream interface fro HTMl events into the style-
6: ** oriented iunterface of the HText.h interface. This module is
7: ** only used in clients and shouldnot be linked into servers.
1.1 timbl 8: **
1.6 timbl 9: ** Override this module if making a new GUI browser.
1.1 timbl 10: **
11: */
1.16 timbl 12:
1.1 timbl 13: #include "HTML.h"
14:
1.16 timbl 15: /* #define CAREFUL Check nesting here not really necessary */
1.2 timbl 16:
1.1 timbl 17: #include <ctype.h>
18: #include <stdio.h>
19:
20: #include "HTAtom.h"
21: #include "HTChunk.h"
22: #include "HText.h"
23: #include "HTStyle.h"
24:
1.3 timbl 25: #include "HTAlert.h"
1.4 timbl 26: #include "HTMLGen.h"
1.8 timbl 27: #include "HTParse.h"
1.28 ! frystyk 28: #include "HTError.h" /* Because of HTErrorMsg */
1.1 timbl 29:
30: extern HTStyleSheet * styleSheet; /* Application-wide */
31:
32: /* Module-wide style cache
33: */
34: PRIVATE int got_styles = 0;
1.16 timbl 35: PRIVATE HTStyle *styles[HTMLP_ELEMENTS];
1.2 timbl 36: PRIVATE HTStyle *default_style;
1.1 timbl 37:
38:
39: /* HTML Object
40: ** -----------
41: */
1.2 timbl 42: #define MAX_NESTING 20 /* Should be checked by parser */
43:
44: typedef struct _stack_element {
45: HTStyle * style;
46: int tag_number;
47: } stack_element;
48:
49: struct _HTStructured {
50: CONST HTStructuredClass * isa;
51: HTParentAnchor * node_anchor;
52: HText * text;
53:
54: HTStream* target; /* Output stream */
55: HTStreamClass targetClass; /* Output routines */
56:
57: HTChunk title; /* Grow by 128 */
58:
59: char * comment_start; /* for literate programming */
60: char * comment_end;
1.16 timbl 61:
62: CONST SGML_dtd* dtd;
63:
1.2 timbl 64: HTTag * current_tag;
65: BOOL style_change;
66: HTStyle * new_style;
67: HTStyle * old_style;
68: BOOL in_word; /* Have just had a non-white char */
69: stack_element stack[MAX_NESTING];
70: stack_element *sp; /* Style stack pointer */
1.1 timbl 71: };
72:
1.2 timbl 73: struct _HTStream {
74: CONST HTStreamClass * isa;
75: /* .... */
76: };
1.1 timbl 77:
78: /* Forward declarations of routines
79: */
80: PRIVATE void get_styles NOPARAMS;
81:
82:
1.4 timbl 83: PRIVATE void actually_set_style PARAMS((HTStructured * me));
1.11 timbl 84: PRIVATE void change_paragraph_style PARAMS((HTStructured * me, HTStyle * style));
1.1 timbl 85:
86: /* Style buffering avoids dummy paragraph begin/ends.
87: */
1.4 timbl 88: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1 timbl 89:
90:
1.2 timbl 91: #ifdef OLD_CODE
1.1 timbl 92: /* The following accented characters are from peter Flynn, curia project */
93:
94: /* these ifdefs don't solve the problem of a simple terminal emulator
95: ** with a different character set to the client machine. But nothing does,
96: ** except looking at the TERM setting */
97:
1.2 timbl 98:
1.1 timbl 99: { "ocus" , "&" }, /* for CURIA */
100: #ifdef IBMPC
101: { "aacute" , "\240" }, /* For PC display */
102: { "eacute" , "\202" },
103: { "iacute" , "\241" },
104: { "oacute" , "\242" },
105: { "uacute" , "\243" },
106: { "Aacute" , "\101" },
107: { "Eacute" , "\220" },
108: { "Iacute" , "\111" },
109: { "Oacute" , "\117" },
110: { "Uacute" , "\125" },
111: #else
112: { "aacute" , "\341" }, /* Works for openwindows -- Peter Flynn */
113: { "eacute" , "\351" },
114: { "iacute" , "\355" },
115: { "oacute" , "\363" },
116: { "uacute" , "\372" },
117: { "Aacute" , "\301" },
118: { "Eacute" , "\310" },
119: { "Iacute" , "\315" },
120: { "Oacute" , "\323" },
121: { "Uacute" , "\332" },
122: #endif
123: { 0, 0 } /* Terminate list */
124: };
1.2 timbl 125: #endif
1.1 timbl 126:
127:
1.2 timbl 128: /* Entity values -- for ISO Latin 1 local representation
129: **
130: ** This MUST match exactly the table referred to in the DTD!
131: */
132: static char * ISO_Latin1[] = {
133: "\306", /* capital AE diphthong (ligature) */
134: "\301", /* capital A, acute accent */
135: "\302", /* capital A, circumflex accent */
136: "\300", /* capital A, grave accent */
137: "\305", /* capital A, ring */
138: "\303", /* capital A, tilde */
139: "\304", /* capital A, dieresis or umlaut mark */
140: "\307", /* capital C, cedilla */
141: "\320", /* capital Eth, Icelandic */
142: "\311", /* capital E, acute accent */
143: "\312", /* capital E, circumflex accent */
144: "\310", /* capital E, grave accent */
145: "\313", /* capital E, dieresis or umlaut mark */
146: "\315", /* capital I, acute accent */
147: "\316", /* capital I, circumflex accent */
148: "\314", /* capital I, grave accent */
149: "\317", /* capital I, dieresis or umlaut mark */
150: "\321", /* capital N, tilde */
151: "\323", /* capital O, acute accent */
152: "\324", /* capital O, circumflex accent */
153: "\322", /* capital O, grave accent */
154: "\330", /* capital O, slash */
155: "\325", /* capital O, tilde */
156: "\326", /* capital O, dieresis or umlaut mark */
157: "\336", /* capital THORN, Icelandic */
158: "\332", /* capital U, acute accent */
159: "\333", /* capital U, circumflex accent */
160: "\331", /* capital U, grave accent */
161: "\334", /* capital U, dieresis or umlaut mark */
162: "\335", /* capital Y, acute accent */
163: "\341", /* small a, acute accent */
164: "\342", /* small a, circumflex accent */
165: "\346", /* small ae diphthong (ligature) */
166: "\340", /* small a, grave accent */
167: "\046", /* ampersand */
168: "\345", /* small a, ring */
169: "\343", /* small a, tilde */
170: "\344", /* small a, dieresis or umlaut mark */
171: "\347", /* small c, cedilla */
172: "\351", /* small e, acute accent */
173: "\352", /* small e, circumflex accent */
174: "\350", /* small e, grave accent */
175: "\360", /* small eth, Icelandic */
176: "\353", /* small e, dieresis or umlaut mark */
177: "\076", /* greater than */
178: "\355", /* small i, acute accent */
179: "\356", /* small i, circumflex accent */
180: "\354", /* small i, grave accent */
181: "\357", /* small i, dieresis or umlaut mark */
182: "\074", /* less than */
183: "\361", /* small n, tilde */
184: "\363", /* small o, acute accent */
185: "\364", /* small o, circumflex accent */
186: "\362", /* small o, grave accent */
187: "\370", /* small o, slash */
188: "\365", /* small o, tilde */
189: "\366", /* small o, dieresis or umlaut mark */
190: "\337", /* small sharp s, German (sz ligature) */
191: "\376", /* small thorn, Icelandic */
192: "\372", /* small u, acute accent */
193: "\373", /* small u, circumflex accent */
194: "\371", /* small u, grave accent */
195: "\374", /* small u, dieresis or umlaut mark */
196: "\375", /* small y, acute accent */
197: "\377", /* small y, dieresis or umlaut mark */
1.1 timbl 198: };
199:
1.2 timbl 200:
201: /* Entity values -- for NeXT local representation
202: **
203: ** This MUST match exactly the table referred to in the DTD!
204: **
205: */
206: static char * NeXTCharacters[] = {
207: "\341", /* capital AE diphthong (ligature) */
208: "\202", /* capital A, acute accent */
209: "\203", /* capital A, circumflex accent */
210: "\201", /* capital A, grave accent */
211: "\206", /* capital A, ring */
212: "\204", /* capital A, tilde */
213: "\205", /* capital A, dieresis or umlaut mark */
214: "\207", /* capital C, cedilla */
215: "\220", /* capital Eth, Icelandic */
216: "\211", /* capital E, acute accent */
217: "\212", /* capital E, circumflex accent */
218: "\210", /* capital E, grave accent */
219: "\213", /* capital E, dieresis or umlaut mark */
220: "\215", /* capital I, acute accent */
221: "\216", /* capital I, circumflex accent these are */
222: "\214", /* capital I, grave accent ISO -100 hex */
223: "\217", /* capital I, dieresis or umlaut mark */
224: "\221", /* capital N, tilde */
225: "\223", /* capital O, acute accent */
226: "\224", /* capital O, circumflex accent */
227: "\222", /* capital O, grave accent */
228: "\351", /* capital O, slash 'cept this */
229: "\225", /* capital O, tilde */
230: "\226", /* capital O, dieresis or umlaut mark */
231: "\234", /* capital THORN, Icelandic */
232: "\230", /* capital U, acute accent */
233: "\231", /* capital U, circumflex accent */
234: "\227", /* capital U, grave accent */
235: "\232", /* capital U, dieresis or umlaut mark */
236: "\233", /* capital Y, acute accent */
237: "\326", /* small a, acute accent */
238: "\327", /* small a, circumflex accent */
239: "\361", /* small ae diphthong (ligature) */
240: "\325", /* small a, grave accent */
241: "\046", /* ampersand */
242: "\332", /* small a, ring */
243: "\330", /* small a, tilde */
244: "\331", /* small a, dieresis or umlaut mark */
245: "\333", /* small c, cedilla */
246: "\335", /* small e, acute accent */
247: "\336", /* small e, circumflex accent */
248: "\334", /* small e, grave accent */
249: "\346", /* small eth, Icelandic */
250: "\337", /* small e, dieresis or umlaut mark */
251: "\076", /* greater than */
252: "\342", /* small i, acute accent */
253: "\344", /* small i, circumflex accent */
254: "\340", /* small i, grave accent */
255: "\345", /* small i, dieresis or umlaut mark */
256: "\074", /* less than */
257: "\347", /* small n, tilde */
258: "\355", /* small o, acute accent */
259: "\356", /* small o, circumflex accent */
260: "\354", /* small o, grave accent */
261: "\371", /* small o, slash */
262: "\357", /* small o, tilde */
263: "\360", /* small o, dieresis or umlaut mark */
264: "\373", /* small sharp s, German (sz ligature) */
265: "\374", /* small thorn, Icelandic */
266: "\363", /* small u, acute accent */
267: "\364", /* small u, circumflex accent */
268: "\362", /* small u, grave accent */
269: "\366", /* small u, dieresis or umlaut mark */
270: "\367", /* small y, acute accent */
271: "\375", /* small y, dieresis or umlaut mark */
1.1 timbl 272: };
273:
1.2 timbl 274: /* Entity values -- for IBM/PC Code Page 850 (International)
275: **
276: ** This MUST match exactly the table referred to in the DTD!
277: **
278: */
279: /* @@@@@@@@@@@@@@@@@ TBD */
280:
281:
282:
283: /* Set character set
284: ** ----------------
285: */
286:
287: PRIVATE char** p_entity_values = ISO_Latin1; /* Pointer to translation */
1.1 timbl 288:
1.2 timbl 289: PUBLIC void HTMLUseCharacterSet ARGS1(HTMLCharacterSet, i)
290: {
291: p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
292: : ISO_Latin1;
293: }
1.1 timbl 294:
295:
296: /* Flattening the style structure
297: ** ------------------------------
298: **
299: On the NeXT, and on any read-only browser, it is simpler for the text to have
300: a sequence of styles, rather than a nested tree of styles. In this
301: case we have to flatten the structure as it arrives from SGML tags into
302: a sequence of styles.
303: */
304:
305: /* If style really needs to be set, call this
306: */
1.4 timbl 307: PRIVATE void actually_set_style ARGS1(HTStructured *, me)
1.1 timbl 308: {
1.4 timbl 309: if (!me->text) { /* First time through */
310: me->text = HText_new2(me->node_anchor, me->target);
311: HText_beginAppend(me->text);
312: HText_setStyle(me->text, me->new_style);
313: me->in_word = NO;
1.1 timbl 314: } else {
1.4 timbl 315: HText_setStyle(me->text, me->new_style);
1.1 timbl 316: }
1.4 timbl 317: me->old_style = me->new_style;
318: me->style_change = NO;
1.1 timbl 319: }
320:
321: /* If you THINK you need to change style, call this
322: */
323:
1.11 timbl 324: PRIVATE void change_paragraph_style ARGS2(HTStructured *, me, HTStyle *,style)
1.1 timbl 325: {
1.4 timbl 326: if (me->new_style!=style) {
327: me->style_change = YES;
328: me->new_style = style;
1.1 timbl 329: }
1.11 timbl 330: me->in_word = NO;
1.1 timbl 331: }
332:
1.2 timbl 333: /*_________________________________________________________________________
334: **
335: ** A C T I O N R O U T I N E S
336: */
337:
338: /* Character handling
339: ** ------------------
1.1 timbl 340: */
1.4 timbl 341: PRIVATE void HTML_put_character ARGS2(HTStructured *, me, char, c)
1.1 timbl 342: {
1.2 timbl 343:
1.4 timbl 344: switch (me->sp[0].tag_number) {
1.2 timbl 345: case HTML_COMMENT:
346: break; /* Do Nothing */
347:
348: case HTML_TITLE:
1.4 timbl 349: HTChunkPutc(&me->title, c);
1.2 timbl 350: break;
351:
352:
353: case HTML_LISTING: /* Litteral text */
354: case HTML_XMP:
355: case HTML_PLAINTEXT:
356: case HTML_PRE:
357: /* We guarrantee that the style is up-to-date in begin_litteral
358: */
1.4 timbl 359: HText_appendCharacter(me->text, c);
1.2 timbl 360: break;
361:
362: default: /* Free format text */
1.4 timbl 363: if (me->style_change) {
1.2 timbl 364: if ((c=='\n') || (c==' ')) return; /* Ignore it */
365: UPDATE_STYLE;
366: }
367: if (c=='\n') {
1.4 timbl 368: if (me->in_word) {
369: HText_appendCharacter(me->text, ' ');
370: me->in_word = NO;
1.2 timbl 371: }
372: } else {
1.4 timbl 373: HText_appendCharacter(me->text, c);
374: me->in_word = YES;
1.2 timbl 375: }
376: } /* end switch */
1.1 timbl 377: }
378:
1.2 timbl 379:
380:
381: /* String handling
382: ** ---------------
383: **
384: ** This is written separately from put_character becuase the loop can
1.11 timbl 385: ** in some cases be promoted to a higher function call level for speed.
1.2 timbl 386: */
1.4 timbl 387: PRIVATE void HTML_put_string ARGS2(HTStructured *, me, CONST char*, s)
1.1 timbl 388: {
1.2 timbl 389:
1.4 timbl 390: switch (me->sp[0].tag_number) {
1.2 timbl 391: case HTML_COMMENT:
392: break; /* Do Nothing */
393:
394: case HTML_TITLE:
1.4 timbl 395: HTChunkPuts(&me->title, s);
1.2 timbl 396: break;
397:
398:
399: case HTML_LISTING: /* Litteral text */
400: case HTML_XMP:
401: case HTML_PLAINTEXT:
402: case HTML_PRE:
403:
404: /* We guarrantee that the style is up-to-date in begin_litteral
405: */
1.4 timbl 406: HText_appendText(me->text, s);
1.2 timbl 407: break;
408:
409: default: /* Free format text */
410: {
411: CONST char *p = s;
1.4 timbl 412: if (me->style_change) {
1.2 timbl 413: for (; *p && ((*p=='\n') || (*p==' ')); p++) ; /* Ignore leaders */
414: if (!*p) return;
415: UPDATE_STYLE;
416: }
417: for(; *p; p++) {
1.4 timbl 418: if (me->style_change) {
1.2 timbl 419: if ((*p=='\n') || (*p==' ')) continue; /* Ignore it */
420: UPDATE_STYLE;
421: }
422: if (*p=='\n') {
1.4 timbl 423: if (me->in_word) {
424: HText_appendCharacter(me->text, ' ');
425: me->in_word = NO;
1.2 timbl 426: }
427: } else {
1.4 timbl 428: HText_appendCharacter(me->text, *p);
429: me->in_word = YES;
1.2 timbl 430: }
431: } /* for */
432: }
433: } /* end switch */
1.1 timbl 434: }
435:
436:
1.2 timbl 437: /* Buffer write
1.3 timbl 438: ** ------------
1.1 timbl 439: */
1.4 timbl 440: PRIVATE void HTML_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
1.1 timbl 441: {
1.2 timbl 442: CONST char* p;
443: CONST char* e = s+l;
1.4 timbl 444: for (p=s; s<e; p++) HTML_put_character(me, *p);
1.1 timbl 445: }
1.2 timbl 446:
447:
448: /* Start Element
449: ** -------------
450: */
451: PRIVATE void HTML_start_element ARGS4(
1.4 timbl 452: HTStructured *, me,
1.16 timbl 453: int, element_number,
1.3 timbl 454: CONST BOOL*, present,
1.16 timbl 455: CONST char **, value)
1.2 timbl 456: {
457: switch (element_number) {
458: case HTML_A:
459: {
1.8 timbl 460: HTChildAnchor * source;
1.9 timbl 461: char * href = NULL;
462: if (present[HTML_A_HREF]) {
463: StrAllocCopy(href, value[HTML_A_HREF]);
464: HTSimplify(href);
465: }
1.8 timbl 466: source = HTAnchor_findChildAndLink(
1.4 timbl 467: me->node_anchor, /* parent */
1.2 timbl 468: present[HTML_A_NAME] ? value[HTML_A_NAME] : 0, /* Tag */
1.9 timbl 469: present[HTML_A_HREF] ? href : 0, /* Addresss */
1.16 timbl 470: present[HTML_A_REL] && value[HTML_A_REL] ?
471: (HTLinkType*)HTAtom_for(value[HTML_A_REL])
1.2 timbl 472: : 0);
473:
474: if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
475: HTParentAnchor * dest =
476: HTAnchor_parent(
477: HTAnchor_followMainLink((HTAnchor*)source)
478: );
479: if (!HTAnchor_title(dest))
480: HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
481: }
482: UPDATE_STYLE;
1.4 timbl 483: HText_beginAnchor(me->text, source);
1.18 frystyk 484: free(href); /* Leak fix Henrik 17/02-94 */
1.2 timbl 485: }
486: break;
487:
488: case HTML_TITLE:
1.4 timbl 489: HTChunkClear(&me->title);
1.2 timbl 490: break;
491:
492: case HTML_NEXTID:
493: /* if (present[NEXTID_N] && value[NEXTID_N])
1.4 timbl 494: HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2 timbl 495: break;
496:
497: case HTML_ISINDEX:
1.4 timbl 498: HTAnchor_setIndex(me->node_anchor);
1.2 timbl 499: break;
500:
1.15 timbl 501: case HTML_BR:
502: UPDATE_STYLE;
503: HText_appendCharacter(me->text, '\n');
504: me->in_word = NO;
505: break;
506:
507: case HTML_HR:
508: UPDATE_STYLE;
509: HText_appendCharacter(me->text, '\n');
1.16 timbl 510: HText_appendText(me->text, "___________________________________");
1.15 timbl 511: HText_appendCharacter(me->text, '\n');
512: me->in_word = NO;
513: break;
514:
1.2 timbl 515: case HTML_P:
516: UPDATE_STYLE;
1.4 timbl 517: HText_appendParagraph(me->text);
518: me->in_word = NO;
1.2 timbl 519: break;
520:
521: case HTML_DL:
1.11 timbl 522: change_paragraph_style(me, present && present[DL_COMPACT]
1.16 timbl 523: ? styles[HTML_DL]
1.2 timbl 524: : styles[HTML_DL]);
525: break;
526:
527: case HTML_DT:
1.4 timbl 528: if (!me->style_change) {
529: HText_appendParagraph(me->text);
530: me->in_word = NO;
1.2 timbl 531: }
532: break;
533:
534: case HTML_DD:
535: UPDATE_STYLE;
1.4 timbl 536: HTML_put_character(me, '\t'); /* Just tab out one stop */
537: me->in_word = NO;
538: break;
1.2 timbl 539:
540: case HTML_UL:
541: case HTML_OL:
542: case HTML_MENU:
543: case HTML_DIR:
1.11 timbl 544: change_paragraph_style(me, styles[element_number]);
1.2 timbl 545: break;
546:
547: case HTML_LI:
548: UPDATE_STYLE;
1.7 timbl 549: if (me->sp[0].tag_number != HTML_DIR)
1.4 timbl 550: HText_appendParagraph(me->text);
1.2 timbl 551: else
1.4 timbl 552: HText_appendCharacter(me->text, '\t'); /* Tab @@ nl for UL? */
553: me->in_word = NO;
1.2 timbl 554: break;
555:
556: case HTML_LISTING: /* Litteral text */
557: case HTML_XMP:
558: case HTML_PLAINTEXT:
559: case HTML_PRE:
1.11 timbl 560: change_paragraph_style(me, styles[element_number]);
1.2 timbl 561: UPDATE_STYLE;
1.4 timbl 562: if (me->comment_end)
563: HText_appendText(me->text, me->comment_end);
1.2 timbl 564: break;
1.11 timbl 565:
1.23 frystyk 566: case HTML_IMG: /* Images */
567: {
568: HTChildAnchor *source;
569: char *src = NULL;
570: if (present[HTML_IMG_SRC]) {
571: StrAllocCopy(src, value[HTML_IMG_SRC]);
572: HTSimplify(src);
573: }
574: source = HTAnchor_findChildAndLink(
575: me->node_anchor, /* parent */
576: 0, /* Tag */
577: src ? src : 0, /* Addresss */
578: 0);
579: UPDATE_STYLE;
580: HText_appendImage(me->text, source,
1.24 frystyk 581: present[HTML_IMG_ALT] ? value[HTML_IMG_ALT] : NULL,
582: present[HTML_IMG_ALIGN] ? value[HTML_IMG_ALIGN] : NULL,
583: present[HTML_IMG_ISMAP] ? YES : NO);
1.23 frystyk 584: free(src);
1.24 frystyk 585: }
586: break;
587:
588: case HTML_HTML: /* Ignore these altogether */
589: case HTML_HEAD:
590: case HTML_BODY:
591:
1.10 timbl 592: case HTML_TT: /* Physical character highlighting */
593: case HTML_B: /* Currently ignored */
594: case HTML_I:
595: case HTML_U:
596:
597: case HTML_EM: /* Logical character highlighting */
598: case HTML_STRONG: /* Currently ignored */
599: case HTML_CODE:
600: case HTML_SAMP:
601: case HTML_KBD:
602: case HTML_VAR:
603: case HTML_DFN:
604: case HTML_CITE:
605: break;
606:
1.11 timbl 607: case HTML_H1: /* paragraph styles */
608: case HTML_H2:
609: case HTML_H3:
610: case HTML_H4:
611: case HTML_H5:
612: case HTML_H6:
613: case HTML_H7:
614: case HTML_ADDRESS:
615: case HTML_BLOCKQUOTE:
616: change_paragraph_style(me, styles[element_number]); /* May be postponed */
1.2 timbl 617: break;
618:
619: } /* end switch */
620:
1.16 timbl 621: if (me->dtd->tags[element_number].contents!= SGML_EMPTY) {
1.13 timbl 622: if (me->sp == me->stack) {
1.12 timbl 623: fprintf(stderr, "HTML: ****** Maximum nesting of %d exceded!\n",
624: MAX_NESTING);
625: return;
626: }
1.4 timbl 627: --(me->sp);
628: me->sp[0].style = me->new_style; /* Stack new style */
629: me->sp[0].tag_number = element_number;
1.10 timbl 630: }
1.1 timbl 631: }
1.10 timbl 632:
1.2 timbl 633:
1.1 timbl 634: /* End Element
1.2 timbl 635: ** -----------
1.1 timbl 636: **
1.2 timbl 637: */
638: /* When we end an element, the style must be returned to that
1.1 timbl 639: ** in effect before that element. Note that anchors (etc?)
640: ** don't have an associated style, so that we must scan down the
641: ** stack for an element with a defined style. (In fact, the styles
642: ** should be linked to the whole stack not just the top one.)
643: ** TBL 921119
1.6 timbl 644: **
645: ** We don't turn on "CAREFUL" check because the parser produces
646: ** (internal code errors apart) good nesting. The parser checks
647: ** incoming code errors, not this module.
1.1 timbl 648: */
1.4 timbl 649: PRIVATE void HTML_end_element ARGS2(HTStructured *, me, int , element_number)
1.1 timbl 650: {
1.2 timbl 651: #ifdef CAREFUL /* parser assumed to produce good nesting */
1.4 timbl 652: if (element_number != me->sp[0].tag_number) {
1.2 timbl 653: fprintf(stderr, "HTMLText: end of element %s when expecting end of %s\n",
1.16 timbl 654: me->dtd->tags[element_number].name,
655: me->dtd->tags[me->sp->tag_number].name);
1.6 timbl 656: /* panic */
1.1 timbl 657: }
1.2 timbl 658: #endif
659:
1.4 timbl 660: me->sp++; /* Pop state off stack */
1.2 timbl 661:
662: switch(element_number) {
663:
664: case HTML_A:
665: UPDATE_STYLE;
1.4 timbl 666: HText_endAnchor(me->text);
1.2 timbl 667: break;
668:
669: case HTML_TITLE:
1.4 timbl 670: HTChunkTerminate(&me->title);
671: HTAnchor_setTitle(me->node_anchor, me->title.data);
1.2 timbl 672: break;
673:
674: case HTML_LISTING: /* Litteral text */
675: case HTML_XMP:
676: case HTML_PLAINTEXT:
677: case HTML_PRE:
1.4 timbl 678: if (me->comment_start)
679: HText_appendText(me->text, me->comment_start);
1.2 timbl 680: /* Fall through */
681:
682: default:
683:
1.11 timbl 684: change_paragraph_style(me, me->sp->style); /* Often won't really change */
1.2 timbl 685: break;
686:
687: } /* switch */
1.1 timbl 688: }
689:
1.2 timbl 690:
691: /* Expanding entities
692: ** ------------------
693: */
694: /* (In fact, they all shrink!)
1.1 timbl 695: */
1.2 timbl 696:
1.4 timbl 697: PRIVATE void HTML_put_entity ARGS2(HTStructured *, me, int, entity_number)
1.1 timbl 698: {
1.4 timbl 699: HTML_put_string(me, ISO_Latin1[entity_number]); /* @@ Other representations */
1.1 timbl 700: }
1.2 timbl 701:
702:
703: /* Free an HTML object
704: ** -------------------
705: **
1.4 timbl 706: ** If the document is empty, the text object will not yet exist.
707: So we could in fact abandon creating the document and return
708: an error code. In fact an empty document is an important type
709: of document, so we don't.
710: **
711: ** If non-interactive, everything is freed off. No: crashes -listrefs
1.2 timbl 712: ** Otherwise, the interactive object is left.
713: */
1.4 timbl 714: PUBLIC void HTML_free ARGS1(HTStructured *, me)
1.1 timbl 715: {
1.4 timbl 716: UPDATE_STYLE; /* Creates empty document here! */
717: if (me->comment_end)
718: HTML_put_string(me,me->comment_end);
719: HText_endAppend(me->text);
720:
721: if (me->target) {
722: (*me->targetClass.free)(me->target);
1.2 timbl 723: }
1.19 frystyk 724: HTChunkClear(&me->title); /* Henrik 18/02-94 */
1.4 timbl 725: free(me);
1.1 timbl 726: }
727:
728:
1.14 timbl 729: PRIVATE void HTML_abort ARGS2(HTStructured *, me, HTError, e)
1.1 timbl 730:
1.14 timbl 731: {
732: if (me->target) {
733: (*me->targetClass.abort)(me->target, e);
734: }
1.19 frystyk 735: HTChunkClear(&me->title); /* Henrik 18/02-94 */
1.14 timbl 736: free(me);
1.1 timbl 737: }
738:
1.2 timbl 739:
740: /* Get Styles from style sheet
741: ** ---------------------------
742: */
743: PRIVATE void get_styles NOARGS
1.1 timbl 744: {
1.2 timbl 745: got_styles = YES;
746:
747: default_style = HTStyleNamed(styleSheet, "Normal");
1.1 timbl 748:
1.2 timbl 749: styles[HTML_H1] = HTStyleNamed(styleSheet, "Heading1");
750: styles[HTML_H2] = HTStyleNamed(styleSheet, "Heading2");
751: styles[HTML_H3] = HTStyleNamed(styleSheet, "Heading3");
752: styles[HTML_H4] = HTStyleNamed(styleSheet, "Heading4");
753: styles[HTML_H5] = HTStyleNamed(styleSheet, "Heading5");
754: styles[HTML_H6] = HTStyleNamed(styleSheet, "Heading6");
755: styles[HTML_H7] = HTStyleNamed(styleSheet, "Heading7");
756:
757: styles[HTML_DL] = HTStyleNamed(styleSheet, "Glossary");
758: styles[HTML_UL] =
759: styles[HTML_OL] = HTStyleNamed(styleSheet, "List");
760: styles[HTML_MENU] = HTStyleNamed(styleSheet, "Menu");
761: styles[HTML_DIR] = HTStyleNamed(styleSheet, "Dir");
1.16 timbl 762: /* styles[HTML_DLC] = HTStyleNamed(styleSheet, "GlossaryCompact"); */
1.2 timbl 763: styles[HTML_ADDRESS]= HTStyleNamed(styleSheet, "Address");
764: styles[HTML_BLOCKQUOTE]= HTStyleNamed(styleSheet, "BlockQuote");
765: styles[HTML_PLAINTEXT] =
766: styles[HTML_XMP] = HTStyleNamed(styleSheet, "Example");
767: styles[HTML_PRE] = HTStyleNamed(styleSheet, "Preformatted");
768: styles[HTML_LISTING] = HTStyleNamed(styleSheet, "Listing");
769: }
770: /* P U B L I C
771: */
772:
773: /* Structured Object Class
774: ** -----------------------
775: */
776: PUBLIC CONST HTStructuredClass HTMLPresentation = /* As opposed to print etc */
777: {
778: "text/html",
779: HTML_free,
1.14 timbl 780: HTML_abort,
1.2 timbl 781: HTML_put_character, HTML_put_string, HTML_write,
782: HTML_start_element, HTML_end_element,
783: HTML_put_entity
784: };
1.1 timbl 785:
1.4 timbl 786:
1.2 timbl 787: /* New Structured Text object
788: ** --------------------------
789: **
1.16 timbl 790: ** The structured stream can generate either presentation,
1.4 timbl 791: ** or plain text, or HTML.
1.1 timbl 792: */
1.16 timbl 793: PUBLIC HTStructured* HTML_new ARGS5(
794: HTRequest *, request,
795: void *, param,
796: HTFormat, input_format,
797: HTFormat, output_format,
798: HTStream *, output_stream)
1.1 timbl 799: {
800:
1.4 timbl 801: HTStructured * me;
802:
1.16 timbl 803: if (output_format != WWW_PLAINTEXT
804: && output_format != WWW_PRESENT
805: && output_format != HTAtom_for("text/x-c")) {
1.21 luotonen 806: HTStream * intermediate = HTStreamStack(WWW_HTML, request, NO);
1.6 timbl 807: if (intermediate) return HTMLGenerator(intermediate);
1.4 timbl 808: fprintf(stderr, "** Internal error: can't parse HTML to %s\n",
1.16 timbl 809: HTAtom_name(output_format));
1.4 timbl 810: exit (-99);
811: }
812:
813: me = (HTStructured*) malloc(sizeof(*me));
814: if (me == NULL) outofmem(__FILE__, "HTML_new");
1.1 timbl 815:
816: if (!got_styles) get_styles();
817:
1.4 timbl 818: me->isa = &HTMLPresentation;
1.16 timbl 819: me->dtd = &DTD;
820: me->node_anchor = request->anchor;
1.4 timbl 821: me->title.size = 0;
822: me->title.growby = 128;
823: me->title.allocated = 0;
824: me->title.data = 0;
825: me->text = 0;
826: me->style_change = YES; /* Force check leading to text creation */
827: me->new_style = default_style;
828: me->old_style = 0;
829: me->sp = me->stack + MAX_NESTING - 1;
830: me->sp->tag_number = -1; /* INVALID */
831: me->sp->style = default_style; /* INVALID */
1.1 timbl 832:
1.4 timbl 833: me->comment_start = NULL;
834: me->comment_end = NULL;
1.16 timbl 835: me->target = output_stream;
836: if (output_stream) me->targetClass = *output_stream->isa; /* Copy pointers */
1.1 timbl 837:
1.4 timbl 838: return (HTStructured*) me;
1.1 timbl 839: }
840:
841:
1.2 timbl 842: /* HTConverter for HTML to plain text
843: ** ----------------------------------
1.1 timbl 844: **
1.2 timbl 845: ** This will convert from HTML to presentation or plain text.
1.1 timbl 846: */
1.16 timbl 847: PUBLIC HTStream* HTMLToPlain ARGS5(
848: HTRequest *, request,
849: void *, param,
850: HTFormat, input_format,
851: HTFormat, output_format,
852: HTStream *, output_stream)
1.1 timbl 853: {
1.16 timbl 854: return SGML_new(&DTD, HTML_new(
855: request, NULL, input_format, output_format, output_stream));
1.1 timbl 856: }
857:
858:
1.2 timbl 859: /* HTConverter for HTML to C code
860: ** ------------------------------
861: **
862: ** C copde is like plain text but all non-preformatted code
863: ** is commented out.
864: ** This will convert from HTML to presentation or plain text.
865: */
1.16 timbl 866: PUBLIC HTStream* HTMLToC ARGS5(
867: HTRequest *, request,
868: void *, param,
869: HTFormat, input_format,
870: HTFormat, output_format,
871: HTStream *, output_stream)
1.1 timbl 872: {
1.4 timbl 873:
874: HTStructured * html;
875:
1.16 timbl 876: (*output_stream->isa->put_string)(output_stream, "/* "); /* Before even title */
877: html = HTML_new(request, NULL, input_format, output_format, output_stream);
1.2 timbl 878: html->comment_start = "/* ";
1.16 timbl 879: html->dtd = &DTD;
1.2 timbl 880: html->comment_end = " */\n"; /* Must start in col 1 for cpp */
1.4 timbl 881: /* HTML_put_string(html,html->comment_start); */
1.16 timbl 882: return SGML_new(&DTD, html);
1.1 timbl 883: }
884:
885:
1.2 timbl 886: /* Presenter for HTML
887: ** ------------------
888: **
889: ** This will convert from HTML to presentation or plain text.
890: **
891: ** Override this if you have a windows version
1.1 timbl 892: */
1.2 timbl 893: #ifndef GUI
1.16 timbl 894: PUBLIC HTStream* HTMLPresent ARGS5(
895: HTRequest *, request,
896: void *, param,
897: HTFormat, input_format,
898: HTFormat, output_format,
899: HTStream *, output_stream)
1.1 timbl 900: {
1.16 timbl 901: return SGML_new(&DTD, HTML_new(
902: request, NULL, input_format, output_format, output_stream));
1.1 timbl 903: }
1.2 timbl 904: #endif
1.1 timbl 905:
906:
1.2 timbl 907: /* Record error message as a hypertext object
908: ** ------------------------------------------
909: **
910: ** The error message should be marked as an error so that
911: ** it can be reloaded later.
912: ** This implementation just throws up an error message
913: ** and leaves the document unloaded.
1.9 timbl 914: ** A smarter implementation would load an error document,
915: ** marking at such so that it is retried on reload.
1.1 timbl 916: **
1.2 timbl 917: ** On entry,
918: ** sink is a stream to the output device if any
919: ** number is the HTTP error number
920: ** message is the human readable message.
1.9 timbl 921: **
922: ** On exit,
923: ** returns a negative number to indicate lack of success in the load.
1.1 timbl 924: */
1.2 timbl 925:
926: PUBLIC int HTLoadError ARGS3(
1.17 luotonen 927: HTRequest *, req,
1.2 timbl 928: int, number,
929: CONST char *, message)
930: {
1.20 frystyk 931: char *err = "Oh I screwed up!"; /* Dummy pointer not used (I hope) */
1.2 timbl 932: HTAlert(message); /* @@@@@@@@@@@@@@@@@@@ */
1.20 frystyk 933: /* Clean up! Henrik 04/03-94 */
934: if (req && req->output_stream)
935: (*req->output_stream->isa->abort)(req->output_stream, err);
1.25 luotonen 936: HTClearErrors(req);
1.2 timbl 937: return -number;
938: }
939:
1.28 ! frystyk 940:
! 941: /* ------------------------------------------------------------------------- */
! 942: /* NOTE: THIS FUNCTION IS PLACED HEER AS THE HTML.C MODULE NORMALLY GETS
! 943: ** OVERWRITTEN BY THE CLIENT OR SERVER
! 944: ** HTErrorMsg
! 945: **
! 946: ** Creates an error message on standard output containing the
! 947: ** error_stack messages. The HTErr
! 948: ** Only if the global variable HTErrorInfoPath != NULL, an anchor
! 949: ** will be created to an message help file. It is garanteed that
! 950: ** NO STREAM has been put up or taken down in the library at this point.
! 951: ** This function might be overwritten by a smart server or client.
! 952: */
! 953: PUBLIC void HTErrorMsg ARGS1(HTRequest *, request)
! 954: {
! 955: HTList *cur = request->error_stack;
! 956: BOOL highest = YES;
! 957: HTErrorInfo *pres;
! 958: if (!request) {
! 959: if (TRACE) fprintf(stderr, "HTErrorMsg.. Bad argument!\n");
! 960: return;
! 961: }
! 962: if (request->error_block) {
! 963: if (TRACE) fprintf(stderr, "HTErrorMsg.. Errors are not printed as no stream is available.\n");
! 964: return;
! 965: }
! 966:
! 967: /* Output messages */
! 968: while ((pres = (HTErrorInfo *) HTList_nextObject(cur))) {
! 969:
! 970: /* Check if we are going to show the message */
! 971: if ((!pres->ignore || HTErrorShowMask & HT_ERR_SHOW_IGNORE) &&
! 972: (HTErrorShowMask & pres->severity)) {
! 973:
! 974: /* Output code number */
! 975: if (highest) { /* If first time through */
! 976: if (TRACE)
! 977: fprintf(stderr,
! 978: "HTError..... Generating error message.\n");
! 979:
! 980: /* Output title */
! 981: fprintf(stderr, "\nError Message:\n");
! 982:
! 983: if (pres->severity == ERR_WARNING)
! 984: fprintf(stderr, "Warning ");
! 985: else if (pres->severity == ERR_NON_FATAL)
! 986: fprintf(stderr, "Non Fatal Error ");
! 987: else if (pres->severity == ERR_FATAL)
! 988: fprintf(stderr, "Fatal Error ");
! 989: else {
! 990: fprintf(stderr, "Unknown Classification of Error...\n");
! 991: return;
! 992: }
! 993:
! 994: /* Only output error code if it is a real HTTP code */
! 995: if (pres->element < HTERR_HTTP_CODES_END)
! 996: fprintf(stderr, "%d ", error_info[pres->element].code);
! 997: highest = NO;
! 998: } else
! 999: fprintf(stderr, "This occurred because: ");
! 1000:
! 1001: /* Output error message */
! 1002: fprintf(stderr, "%s\n", error_info[pres->element].msg);
! 1003:
! 1004: /* Output parameters */
! 1005: if (pres->par && HTErrorShowMask & HT_ERR_SHOW_PARS) {
! 1006: int cnt;
! 1007: char *tstr;
! 1008: char *nptr;
! 1009: if ((tstr = (char *) malloc(pres->par_length+1)) == NULL)
! 1010: outofmem(__FILE__, "HTErrorMsg");
! 1011: nptr = tstr;
! 1012: for (cnt=0; cnt<pres->par_length; cnt++) {
! 1013: if (*((char *)(pres->par)+cnt) < 0x20 ||
! 1014: *((char *)(pres->par)+cnt) >= 0x7F)
! 1015: *nptr++ = '#';
! 1016: else
! 1017: *nptr++ = *((char *)(pres->par)+cnt);
! 1018: }
! 1019: *nptr = '\0';
! 1020: fprintf(stderr, " (%s)\n", tstr);
! 1021: free(tstr);
! 1022: }
! 1023:
! 1024: /* Output location */
! 1025: if (pres->where && HTErrorShowMask & HT_ERR_SHOW_LOCATION) {
! 1026: fprintf(stderr, "This occured in %s\n", pres->where);
! 1027: }
! 1028:
! 1029: /* If we only are going to show the higest entry */
! 1030: if (HTErrorShowMask & HT_ERR_SHOW_FIRST)
! 1031: break;
! 1032: }
! 1033: }
! 1034: return;
! 1035: }
Webmaster