Annotation of libwww/Library/src/HTML.c, revision 1.71
1.39 frystyk 1: /* HTML.c
2: ** STRUCTURED STREAM TO RICH HYPERTEXT CONVERTER
3: **
1.43 frystyk 4: ** (c) COPYRIGHT MIT 1995.
1.39 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
1.71 ! frystyk 6: ** @(#) $Id: HTML.c,v 1.70 1998/02/25 20:48:31 frystyk Exp $
1.1 timbl 7: **
1.2 timbl 8: ** This generates of a hypertext object. It converts from the
9: ** structured stream interface fro HTMl events into the style-
1.47 frystyk 10: ** oriented iunterface of the HText interface. This module is
1.2 timbl 11: ** only used in clients and shouldnot be linked into servers.
1.1 timbl 12: **
1.6 timbl 13: ** Override this module if making a new GUI browser.
1.1 timbl 14: **
1.35 duns 15: ** HISTORY:
16: ** 8 Jul 94 FM Insulate free() from _free structure element.
17: **
1.1 timbl 18: */
1.16 timbl 19:
1.41 frystyk 20: /* Library include files */
1.60 frystyk 21: #include "sysdep.h"
1.63 frystyk 22: #include "WWWUtil.h"
23: #include "WWWCore.h"
24: #include "WWWHTML.h"
1.1 timbl 25: #include "HText.h"
26: #include "HTStyle.h"
1.41 frystyk 27: #include "HTML.h"
1.1 timbl 28:
29: extern HTStyleSheet * styleSheet; /* Application-wide */
30:
31: /* Module-wide style cache
32: */
33: PRIVATE int got_styles = 0;
1.16 timbl 34: PRIVATE HTStyle *styles[HTMLP_ELEMENTS];
1.2 timbl 35: PRIVATE HTStyle *default_style;
1.1 timbl 36:
1.71 ! frystyk 37: #define HTTAB '\0'
1.1 timbl 38:
39: /* HTML Object
40: ** -----------
41: */
1.2 timbl 42: #define MAX_NESTING 20 /* Should be checked by parser */
43:
44: typedef struct _stack_element {
45: HTStyle * style;
46: int tag_number;
47: } stack_element;
48:
49: struct _HTStructured {
1.60 frystyk 50: const HTStructuredClass * isa;
1.54 frystyk 51: HTRequest * request;
1.2 timbl 52: HTParentAnchor * node_anchor;
53: HText * text;
54:
55: HTStream* target; /* Output stream */
56: HTStreamClass targetClass; /* Output routines */
57:
1.56 frystyk 58: HTChunk * title; /* Grow by 128 */
1.2 timbl 59:
60: char * comment_start; /* for literate programming */
61: char * comment_end;
1.16 timbl 62:
1.60 frystyk 63: const SGML_dtd* dtd;
1.16 timbl 64:
1.2 timbl 65: HTTag * current_tag;
66: BOOL style_change;
67: HTStyle * new_style;
68: HTStyle * old_style;
69: BOOL in_word; /* Have just had a non-white char */
1.44 frystyk 70:
71: stack_element stack[MAX_NESTING];
72: stack_element *sp; /* Style stack pointer */
73: int overflow; /* Keep track of overflow nesting */
1.1 timbl 74: };
75:
1.2 timbl 76: struct _HTStream {
1.60 frystyk 77: const HTStreamClass * isa;
1.2 timbl 78: /* .... */
79: };
1.1 timbl 80:
81: /* Forward declarations of routines
82: */
1.52 frystyk 83: PRIVATE void get_styles (void);
1.1 timbl 84:
85:
1.52 frystyk 86: PRIVATE void actually_set_style (HTStructured * me);
87: PRIVATE void change_paragraph_style (HTStructured * me, HTStyle * style);
1.1 timbl 88:
89: /* Style buffering avoids dummy paragraph begin/ends.
90: */
1.4 timbl 91: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1 timbl 92:
1.2 timbl 93: /* Entity values -- for ISO Latin 1 local representation
94: **
95: ** This MUST match exactly the table referred to in the DTD!
96: */
97: static char * ISO_Latin1[] = {
98: "\306", /* capital AE diphthong (ligature) */
99: "\301", /* capital A, acute accent */
100: "\302", /* capital A, circumflex accent */
101: "\300", /* capital A, grave accent */
102: "\305", /* capital A, ring */
103: "\303", /* capital A, tilde */
104: "\304", /* capital A, dieresis or umlaut mark */
105: "\307", /* capital C, cedilla */
106: "\320", /* capital Eth, Icelandic */
107: "\311", /* capital E, acute accent */
108: "\312", /* capital E, circumflex accent */
109: "\310", /* capital E, grave accent */
110: "\313", /* capital E, dieresis or umlaut mark */
111: "\315", /* capital I, acute accent */
112: "\316", /* capital I, circumflex accent */
113: "\314", /* capital I, grave accent */
114: "\317", /* capital I, dieresis or umlaut mark */
115: "\321", /* capital N, tilde */
116: "\323", /* capital O, acute accent */
117: "\324", /* capital O, circumflex accent */
118: "\322", /* capital O, grave accent */
119: "\330", /* capital O, slash */
120: "\325", /* capital O, tilde */
121: "\326", /* capital O, dieresis or umlaut mark */
122: "\336", /* capital THORN, Icelandic */
123: "\332", /* capital U, acute accent */
124: "\333", /* capital U, circumflex accent */
125: "\331", /* capital U, grave accent */
126: "\334", /* capital U, dieresis or umlaut mark */
127: "\335", /* capital Y, acute accent */
128: "\341", /* small a, acute accent */
129: "\342", /* small a, circumflex accent */
130: "\346", /* small ae diphthong (ligature) */
131: "\340", /* small a, grave accent */
132: "\046", /* ampersand */
133: "\345", /* small a, ring */
134: "\343", /* small a, tilde */
135: "\344", /* small a, dieresis or umlaut mark */
136: "\347", /* small c, cedilla */
137: "\351", /* small e, acute accent */
138: "\352", /* small e, circumflex accent */
139: "\350", /* small e, grave accent */
140: "\360", /* small eth, Icelandic */
141: "\353", /* small e, dieresis or umlaut mark */
142: "\076", /* greater than */
143: "\355", /* small i, acute accent */
144: "\356", /* small i, circumflex accent */
145: "\354", /* small i, grave accent */
146: "\357", /* small i, dieresis or umlaut mark */
147: "\074", /* less than */
1.62 frystyk 148: "\040", /* non-breaking space */
1.2 timbl 149: "\361", /* small n, tilde */
150: "\363", /* small o, acute accent */
151: "\364", /* small o, circumflex accent */
152: "\362", /* small o, grave accent */
153: "\370", /* small o, slash */
154: "\365", /* small o, tilde */
155: "\366", /* small o, dieresis or umlaut mark */
1.36 frystyk 156: "\042", /* double quote sign - June 94 */
1.2 timbl 157: "\337", /* small sharp s, German (sz ligature) */
158: "\376", /* small thorn, Icelandic */
159: "\372", /* small u, acute accent */
160: "\373", /* small u, circumflex accent */
161: "\371", /* small u, grave accent */
162: "\374", /* small u, dieresis or umlaut mark */
163: "\375", /* small y, acute accent */
164: "\377", /* small y, dieresis or umlaut mark */
1.1 timbl 165: };
166:
1.2 timbl 167:
168: /* Entity values -- for NeXT local representation
169: **
170: ** This MUST match exactly the table referred to in the DTD!
171: **
172: */
173: static char * NeXTCharacters[] = {
174: "\341", /* capital AE diphthong (ligature) */
175: "\202", /* capital A, acute accent */
176: "\203", /* capital A, circumflex accent */
177: "\201", /* capital A, grave accent */
178: "\206", /* capital A, ring */
179: "\204", /* capital A, tilde */
180: "\205", /* capital A, dieresis or umlaut mark */
181: "\207", /* capital C, cedilla */
182: "\220", /* capital Eth, Icelandic */
183: "\211", /* capital E, acute accent */
184: "\212", /* capital E, circumflex accent */
185: "\210", /* capital E, grave accent */
186: "\213", /* capital E, dieresis or umlaut mark */
187: "\215", /* capital I, acute accent */
188: "\216", /* capital I, circumflex accent these are */
189: "\214", /* capital I, grave accent ISO -100 hex */
190: "\217", /* capital I, dieresis or umlaut mark */
191: "\221", /* capital N, tilde */
192: "\223", /* capital O, acute accent */
193: "\224", /* capital O, circumflex accent */
194: "\222", /* capital O, grave accent */
195: "\351", /* capital O, slash 'cept this */
196: "\225", /* capital O, tilde */
197: "\226", /* capital O, dieresis or umlaut mark */
198: "\234", /* capital THORN, Icelandic */
199: "\230", /* capital U, acute accent */
200: "\231", /* capital U, circumflex accent */
201: "\227", /* capital U, grave accent */
202: "\232", /* capital U, dieresis or umlaut mark */
203: "\233", /* capital Y, acute accent */
204: "\326", /* small a, acute accent */
205: "\327", /* small a, circumflex accent */
206: "\361", /* small ae diphthong (ligature) */
207: "\325", /* small a, grave accent */
208: "\046", /* ampersand */
209: "\332", /* small a, ring */
210: "\330", /* small a, tilde */
211: "\331", /* small a, dieresis or umlaut mark */
212: "\333", /* small c, cedilla */
213: "\335", /* small e, acute accent */
214: "\336", /* small e, circumflex accent */
215: "\334", /* small e, grave accent */
216: "\346", /* small eth, Icelandic */
217: "\337", /* small e, dieresis or umlaut mark */
218: "\076", /* greater than */
219: "\342", /* small i, acute accent */
220: "\344", /* small i, circumflex accent */
221: "\340", /* small i, grave accent */
222: "\345", /* small i, dieresis or umlaut mark */
223: "\074", /* less than */
1.62 frystyk 224: "\040", /* non-breaking space */
1.2 timbl 225: "\347", /* small n, tilde */
226: "\355", /* small o, acute accent */
227: "\356", /* small o, circumflex accent */
228: "\354", /* small o, grave accent */
229: "\371", /* small o, slash */
230: "\357", /* small o, tilde */
231: "\360", /* small o, dieresis or umlaut mark */
1.36 frystyk 232: "\042", /* double quote sign - June 94 */
1.2 timbl 233: "\373", /* small sharp s, German (sz ligature) */
234: "\374", /* small thorn, Icelandic */
235: "\363", /* small u, acute accent */
236: "\364", /* small u, circumflex accent */
237: "\362", /* small u, grave accent */
238: "\366", /* small u, dieresis or umlaut mark */
239: "\367", /* small y, acute accent */
240: "\375", /* small y, dieresis or umlaut mark */
1.1 timbl 241: };
242:
1.2 timbl 243: /* Entity values -- for IBM/PC Code Page 850 (International)
244: **
245: ** This MUST match exactly the table referred to in the DTD!
246: **
247: */
248: /* @@@@@@@@@@@@@@@@@ TBD */
249:
250:
251:
252: /* Set character set
253: ** ----------------
254: */
255:
256: PRIVATE char** p_entity_values = ISO_Latin1; /* Pointer to translation */
1.1 timbl 257:
1.53 frystyk 258: PUBLIC void HTMLUseCharacterSet (HTMLCharacterSet i)
1.2 timbl 259: {
260: p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
261: : ISO_Latin1;
262: }
1.1 timbl 263:
264:
265: /* Flattening the style structure
266: ** ------------------------------
267: **
268: On the NeXT, and on any read-only browser, it is simpler for the text to have
269: a sequence of styles, rather than a nested tree of styles. In this
270: case we have to flatten the structure as it arrives from SGML tags into
271: a sequence of styles.
272: */
273:
274: /* If style really needs to be set, call this
275: */
1.53 frystyk 276: PRIVATE void actually_set_style (HTStructured * me)
1.1 timbl 277: {
1.4 timbl 278: if (!me->text) { /* First time through */
1.54 frystyk 279: me->text = HText_new2(me->request, me->node_anchor, me->target);
1.4 timbl 280: HText_beginAppend(me->text);
281: HText_setStyle(me->text, me->new_style);
282: me->in_word = NO;
1.1 timbl 283: } else {
1.4 timbl 284: HText_setStyle(me->text, me->new_style);
1.1 timbl 285: }
1.4 timbl 286: me->old_style = me->new_style;
287: me->style_change = NO;
1.1 timbl 288: }
289:
290: /* If you THINK you need to change style, call this
291: */
292:
1.53 frystyk 293: PRIVATE void change_paragraph_style (HTStructured * me, HTStyle *style)
1.1 timbl 294: {
1.4 timbl 295: if (me->new_style!=style) {
296: me->style_change = YES;
297: me->new_style = style;
1.1 timbl 298: }
1.11 timbl 299: me->in_word = NO;
1.1 timbl 300: }
301:
1.2 timbl 302: /*_________________________________________________________________________
303: **
304: ** A C T I O N R O U T I N E S
305: */
306:
1.71 ! frystyk 307: PRIVATE int HTML_write (HTStructured * me, const char * b, int l)
1.1 timbl 308: {
1.71 ! frystyk 309: while (l-- > 0) {
! 310: const char c = *b++;
! 311: switch (me->sp[0].tag_number) {
! 312: case HTML_COMMENT:
! 313: break; /* Do Nothing */
! 314:
! 315: case HTML_TITLE:
! 316: HTChunk_putb(me->title, &c, 1);
! 317: break;
! 318:
! 319: case HTML_LISTING: /* Litteral text */
! 320: case HTML_XMP:
! 321: case HTML_PLAINTEXT:
! 322: case HTML_PRE:
! 323: /* We guarrantee that the style is up-to-date in begin_litteral */
! 324: HText_appendCharacter(me->text, c);
! 325: break;
1.2 timbl 326:
1.71 ! frystyk 327: default: /* Free format text */
! 328: if (me->style_change) {
! 329: if ((c=='\n') || (c==' ')) return HT_OK; /* Ignore it */
! 330: UPDATE_STYLE;
! 331: }
! 332: if (c == HTTAB)
! 333: HText_appendCharacter(me->text, '\t');
! 334: else if (isspace((int) c)) {
! 335: if (me->in_word) {
! 336: HText_appendCharacter(me->text, ' ');
! 337: me->in_word = NO;
! 338: }
! 339: } else {
! 340: HText_appendCharacter(me->text, c);
! 341: me->in_word = YES;
1.2 timbl 342: }
343: }
1.71 ! frystyk 344: }
1.42 frystyk 345: return HT_OK;
1.1 timbl 346: }
347:
1.71 ! frystyk 348: PRIVATE int HTML_put_character (HTStructured * me, char c)
1.1 timbl 349: {
1.71 ! frystyk 350: return HTML_write(me, &c, sizeof(char));
1.1 timbl 351: }
352:
1.71 ! frystyk 353:
1.64 frystyk 354: PRIVATE int HTML_put_string (HTStructured * me, const char* s)
1.1 timbl 355: {
1.71 ! frystyk 356: return HTML_write(me, s, (int) strlen(s));
1.1 timbl 357: }
1.2 timbl 358:
359: /* Start Element
360: ** -------------
361: */
1.53 frystyk 362: PRIVATE void HTML_start_element (
363: HTStructured * me,
364: int element_number,
1.60 frystyk 365: const BOOL* present,
366: const char ** value)
1.2 timbl 367: {
368: switch (element_number) {
369: case HTML_A:
1.69 frystyk 370: {
371: HTChildAnchor * source = HTAnchor_findChildAndLink(
372: me->node_anchor, /* parent */
373: present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL, /* Tag */
374: present[HTML_A_HREF] ? value[HTML_A_HREF] : NULL, /* Addresss */
375: present[HTML_A_REL] && value[HTML_A_REL] ?
376: (HTLinkType) HTAtom_caseFor(value[HTML_A_REL]) : NULL);
1.2 timbl 377:
1.69 frystyk 378: if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
379: HTLink * link = HTAnchor_mainLink((HTAnchor *) source);
380: HTParentAnchor * dest = HTAnchor_parent(HTLink_destination(link));
381: if (!HTAnchor_title(dest)) HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
1.2 timbl 382: }
1.69 frystyk 383: UPDATE_STYLE;
384: HText_beginAnchor(me->text, source);
385: }
386: break;
1.2 timbl 387:
1.63 frystyk 388: case HTML_LINK:
1.69 frystyk 389: {
390: if (present[HTML_LINK_HREF] && value[HTML_LINK_HREF]) {
391: char * relative_to = HTAnchor_expandedAddress((HTAnchor *) me->node_anchor);
392: char * dest_addr = HTParse(value[HTML_LINK_HREF], relative_to, PARSE_ALL);
393: HTParentAnchor * dest = HTAnchor_parent(HTAnchor_findAddress(dest_addr));
394:
395: /* If forward reference */
396: if ((present[HTML_LINK_REL] && value[HTML_LINK_REL])) {
397: char * strval = NULL;
398: char * ptr = NULL;
399: char * relation = NULL;
400: StrAllocCopy(strval, value[HTML_LINK_REL]);
401: ptr = strval;
402: while ((relation = HTNextLWSToken(&ptr)) != NULL) {
403: HTLink_add((HTAnchor *) me->node_anchor, (HTAnchor *) dest,
404: (HTLinkType) HTAtom_caseFor(relation),
405: METHOD_INVALID);
406: }
407: HT_FREE(strval);
408: }
409:
410: /* If reverse reference */
411: if ((present[HTML_LINK_REV] && value[HTML_LINK_REV])) {
412: char * strval = NULL;
413: char * ptr = NULL;
414: char * relation = NULL;
415: StrAllocCopy(strval, value[HTML_LINK_REV]);
416: ptr = strval;
417: while ((relation = HTNextLWSToken(&ptr)) != NULL) {
418: HTLink_add((HTAnchor *) dest, (HTAnchor *) me->node_anchor,
419: (HTLinkType) HTAtom_caseFor(relation),
420: METHOD_INVALID);
421: }
422: HT_FREE(strval);
423: }
1.63 frystyk 424:
1.69 frystyk 425: /* If we got any type information as well */
426: if (present[HTML_LINK_TYPE] && value[HTML_LINK_TYPE]) {
427: if (HTAnchor_format(dest) == WWW_UNKNOWN)
428: HTAnchor_setFormat(dest,
429: (HTFormat) HTAtom_caseFor(value[HTML_LINK_TYPE]));
430: }
1.63 frystyk 431:
1.69 frystyk 432: HT_FREE(dest_addr);
433: HT_FREE(relative_to);
1.70 frystyk 434: }
435: }
436: break;
437:
438: case HTML_META:
439: {
440: /*
441: ** We don't handle HTTP-EQUIV here - only "NAME". It shouldn't be
442: ** a problem, though :)
443: */
444: if (present[HTML_META_NAME] && value[HTML_META_NAME]) {
445: HTAnchor_addMeta (me->node_anchor,
446: value[HTML_META_NAME],
447: (present[HTML_META_CONTENT] && value[HTML_META_CONTENT]) ?
448: value[HTML_META_CONTENT] : "");
1.69 frystyk 449: }
450: }
451: break;
1.63 frystyk 452:
1.2 timbl 453: case HTML_TITLE:
1.56 frystyk 454: HTChunk_clear(me->title);
1.2 timbl 455: break;
456:
457: case HTML_NEXTID:
458: /* if (present[NEXTID_N] && value[NEXTID_N])
1.4 timbl 459: HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2 timbl 460: break;
461:
462: case HTML_ISINDEX:
1.4 timbl 463: HTAnchor_setIndex(me->node_anchor);
1.2 timbl 464: break;
465:
1.15 timbl 466: case HTML_BR:
467: UPDATE_STYLE;
468: HText_appendCharacter(me->text, '\n');
469: me->in_word = NO;
470: break;
471:
472: case HTML_HR:
473: UPDATE_STYLE;
474: HText_appendCharacter(me->text, '\n');
1.16 timbl 475: HText_appendText(me->text, "___________________________________");
1.15 timbl 476: HText_appendCharacter(me->text, '\n');
477: me->in_word = NO;
478: break;
479:
1.2 timbl 480: case HTML_P:
481: UPDATE_STYLE;
1.4 timbl 482: HText_appendParagraph(me->text);
483: me->in_word = NO;
1.2 timbl 484: break;
485:
486: case HTML_DL:
1.11 timbl 487: change_paragraph_style(me, present && present[DL_COMPACT]
1.16 timbl 488: ? styles[HTML_DL]
1.2 timbl 489: : styles[HTML_DL]);
490: break;
491:
492: case HTML_DT:
1.4 timbl 493: if (!me->style_change) {
494: HText_appendParagraph(me->text);
495: me->in_word = NO;
1.2 timbl 496: }
497: break;
498:
499: case HTML_DD:
500: UPDATE_STYLE;
1.71 ! frystyk 501: HTML_put_character(me, HTTAB); /* Just tab out one stop */
1.4 timbl 502: me->in_word = NO;
503: break;
1.2 timbl 504:
505: case HTML_UL:
506: case HTML_OL:
507: case HTML_MENU:
508: case HTML_DIR:
1.11 timbl 509: change_paragraph_style(me, styles[element_number]);
1.2 timbl 510: break;
511:
512: case HTML_LI:
513: UPDATE_STYLE;
1.7 timbl 514: if (me->sp[0].tag_number != HTML_DIR)
1.4 timbl 515: HText_appendParagraph(me->text);
1.2 timbl 516: else
1.71 ! frystyk 517: HText_appendCharacter(me->text, HTTAB);
1.4 timbl 518: me->in_word = NO;
1.2 timbl 519: break;
520:
521: case HTML_LISTING: /* Litteral text */
522: case HTML_XMP:
523: case HTML_PLAINTEXT:
524: case HTML_PRE:
1.11 timbl 525: change_paragraph_style(me, styles[element_number]);
1.2 timbl 526: UPDATE_STYLE;
1.4 timbl 527: if (me->comment_end)
528: HText_appendText(me->text, me->comment_end);
1.2 timbl 529: break;
1.11 timbl 530:
1.23 frystyk 531: case HTML_IMG: /* Images */
532: {
533: HTChildAnchor *source;
534: char *src = NULL;
1.49 frystyk 535: if (present[HTML_IMG_SRC])
1.23 frystyk 536: StrAllocCopy(src, value[HTML_IMG_SRC]);
537: source = HTAnchor_findChildAndLink(
538: me->node_anchor, /* parent */
539: 0, /* Tag */
540: src ? src : 0, /* Addresss */
541: 0);
542: UPDATE_STYLE;
543: HText_appendImage(me->text, source,
1.24 frystyk 544: present[HTML_IMG_ALT] ? value[HTML_IMG_ALT] : NULL,
545: present[HTML_IMG_ALIGN] ? value[HTML_IMG_ALIGN] : NULL,
546: present[HTML_IMG_ISMAP] ? YES : NO);
1.58 frystyk 547: HT_FREE(src);
1.24 frystyk 548: }
549: break;
550:
1.63 frystyk 551: case HTML_BASE: /* Base header */
552: if (present[HTML_BASE_HREF]) {
553: char * base = (char *) value[HTML_BASE_HREF];
554: if (base) {
555: HTAnchor_setBase(me->node_anchor, base);
556: if (SGML_TRACE) HTTrace("HTML Parser. New base `%s\'\n", base);
557: } else {
558: if (SGML_TRACE) HTTrace("HTML Parser. No base found\n");
559: }
560: }
561: break;
562:
1.24 frystyk 563: case HTML_HTML: /* Ignore these altogether */
564: case HTML_HEAD:
565: case HTML_BODY:
1.62 frystyk 566: break;
1.24 frystyk 567:
1.10 timbl 568: case HTML_TT: /* Physical character highlighting */
569: case HTML_B: /* Currently ignored */
570: case HTML_I:
571: case HTML_U:
1.62 frystyk 572: UPDATE_STYLE;
1.65 frystyk 573: #if 0
1.62 frystyk 574: HText_appendCharacter(me->text, '_');
1.65 frystyk 575: #endif
1.62 frystyk 576: me->in_word = NO;
577: break;
1.10 timbl 578:
579: case HTML_EM: /* Logical character highlighting */
580: case HTML_STRONG: /* Currently ignored */
581: case HTML_CODE:
582: case HTML_SAMP:
583: case HTML_KBD:
584: case HTML_VAR:
585: case HTML_DFN:
586: case HTML_CITE:
587: break;
588:
1.11 timbl 589: case HTML_H1: /* paragraph styles */
590: case HTML_H2:
591: case HTML_H3:
592: case HTML_H4:
593: case HTML_H5:
594: case HTML_H6:
595: case HTML_H7:
596: case HTML_ADDRESS:
597: case HTML_BLOCKQUOTE:
598: change_paragraph_style(me, styles[element_number]); /* May be postponed */
1.2 timbl 599: break;
600:
601: } /* end switch */
602:
1.16 timbl 603: if (me->dtd->tags[element_number].contents!= SGML_EMPTY) {
1.13 timbl 604: if (me->sp == me->stack) {
1.44 frystyk 605: if (SGML_TRACE)
1.63 frystyk 606: HTTrace("HTML Parser. Maximum nesting of %d exceded!\n",
1.44 frystyk 607: MAX_NESTING);
608: me->overflow++;
1.12 timbl 609: return;
610: }
1.4 timbl 611: --(me->sp);
612: me->sp[0].style = me->new_style; /* Stack new style */
613: me->sp[0].tag_number = element_number;
1.10 timbl 614: }
1.1 timbl 615: }
1.10 timbl 616:
1.2 timbl 617:
1.1 timbl 618: /* End Element
1.2 timbl 619: ** -----------
1.1 timbl 620: **
1.2 timbl 621: */
622: /* When we end an element, the style must be returned to that
1.1 timbl 623: ** in effect before that element. Note that anchors (etc?)
624: ** don't have an associated style, so that we must scan down the
625: ** stack for an element with a defined style. (In fact, the styles
626: ** should be linked to the whole stack not just the top one.)
627: ** TBL 921119
1.6 timbl 628: **
629: ** We don't turn on "CAREFUL" check because the parser produces
630: ** (internal code errors apart) good nesting. The parser checks
631: ** incoming code errors, not this module.
1.1 timbl 632: */
1.53 frystyk 633: PRIVATE void HTML_end_element (HTStructured * me, int element_number)
1.1 timbl 634: {
1.2 timbl 635: #ifdef CAREFUL /* parser assumed to produce good nesting */
1.4 timbl 636: if (element_number != me->sp[0].tag_number) {
1.59 eric 637: HTTrace("HTMLText: end of element %s when expecting end of %s\n",
1.16 timbl 638: me->dtd->tags[element_number].name,
639: me->dtd->tags[me->sp->tag_number].name);
1.6 timbl 640: /* panic */
1.1 timbl 641: }
1.2 timbl 642: #endif
1.44 frystyk 643:
644: /* HFN, If overflow of nestings, we need to get back to reality */
645: if (me->overflow > 0) {
646: me->overflow--;
647: return;
648: }
649:
1.4 timbl 650: me->sp++; /* Pop state off stack */
1.67 frystyk 651: if (me->sp > me->stack + MAX_NESTING - 1) {
652: if (SGML_TRACE) HTTrace("HTML Parser. Bottom of style stack reached\n");
653: me->sp = me->stack + MAX_NESTING - 1;
654: }
1.44 frystyk 655:
1.2 timbl 656: switch(element_number) {
657:
658: case HTML_A:
659: UPDATE_STYLE;
1.4 timbl 660: HText_endAnchor(me->text);
1.2 timbl 661: break;
662:
663: case HTML_TITLE:
1.56 frystyk 664: HTAnchor_setTitle(me->node_anchor, HTChunk_data(me->title));
1.2 timbl 665: break;
666:
1.62 frystyk 667: case HTML_TT: /* Physical character highlighting */
668: case HTML_B: /* Currently ignored */
669: case HTML_I:
670: case HTML_U:
671: UPDATE_STYLE;
1.66 frystyk 672: #if 0
1.62 frystyk 673: HText_appendCharacter(me->text, '_');
1.66 frystyk 674: #endif
1.62 frystyk 675: break;
1.66 frystyk 676:
677: case HTML_EM: /* Logical character highlighting */
678: case HTML_STRONG: /* Currently ignored */
679: case HTML_CODE:
680: case HTML_SAMP:
681: case HTML_KBD:
682: case HTML_VAR:
683: case HTML_DFN:
684: case HTML_CITE:
685: break;
1.62 frystyk 686:
1.2 timbl 687: case HTML_LISTING: /* Litteral text */
688: case HTML_XMP:
689: case HTML_PLAINTEXT:
690: case HTML_PRE:
1.4 timbl 691: if (me->comment_start)
692: HText_appendText(me->text, me->comment_start);
1.2 timbl 693: /* Fall through */
694:
695: default:
1.44 frystyk 696:
697: /* Often won't really change */
698: change_paragraph_style(me, me->sp->style);
1.2 timbl 699: break;
700:
701: } /* switch */
1.1 timbl 702: }
703:
1.2 timbl 704:
705: /* Expanding entities
706: ** ------------------
707: */
708: /* (In fact, they all shrink!)
1.1 timbl 709: */
1.2 timbl 710:
1.53 frystyk 711: PRIVATE void HTML_put_entity (HTStructured * me, int entity_number)
1.1 timbl 712: {
1.4 timbl 713: HTML_put_string(me, ISO_Latin1[entity_number]); /* @@ Other representations */
1.1 timbl 714: }
1.2 timbl 715:
1.42 frystyk 716: /* Flush an HTML object
717: ** --------------------
718: */
1.53 frystyk 719: PUBLIC int HTML_flush (HTStructured * me)
1.42 frystyk 720: {
721: UPDATE_STYLE; /* Creates empty document here! */
1.57 frystyk 722: if (me->comment_end) HTML_put_string(me,me->comment_end);
723: return me->target ? (*me->targetClass.flush)(me->target) : HT_OK;
1.42 frystyk 724: }
1.2 timbl 725:
726: /* Free an HTML object
727: ** -------------------
728: **
1.4 timbl 729: ** If the document is empty, the text object will not yet exist.
730: So we could in fact abandon creating the document and return
731: an error code. In fact an empty document is an important type
732: of document, so we don't.
733: **
734: ** If non-interactive, everything is freed off. No: crashes -listrefs
1.2 timbl 735: ** Otherwise, the interactive object is left.
736: */
1.53 frystyk 737: PUBLIC int HTML_free (HTStructured * me)
1.1 timbl 738: {
1.4 timbl 739: UPDATE_STYLE; /* Creates empty document here! */
740: if (me->comment_end)
741: HTML_put_string(me,me->comment_end);
742: HText_endAppend(me->text);
743:
744: if (me->target) {
1.35 duns 745: (*me->targetClass._free)(me->target);
1.2 timbl 746: }
1.56 frystyk 747: HTChunk_delete(me->title);
1.58 frystyk 748: HT_FREE(me);
1.42 frystyk 749: return HT_OK;
1.1 timbl 750: }
751:
752:
1.53 frystyk 753: PRIVATE int HTML_abort (HTStructured * me, HTList * e)
1.1 timbl 754:
1.14 timbl 755: {
756: if (me->target) {
757: (*me->targetClass.abort)(me->target, e);
758: }
1.56 frystyk 759: HTChunk_delete(me->title);
1.58 frystyk 760: HT_FREE(me);
1.42 frystyk 761: return HT_ERROR;
1.1 timbl 762: }
763:
1.2 timbl 764:
765: /* Get Styles from style sheet
766: ** ---------------------------
767: */
1.53 frystyk 768: PRIVATE void get_styles (void)
1.1 timbl 769: {
1.2 timbl 770: got_styles = YES;
771:
772: default_style = HTStyleNamed(styleSheet, "Normal");
1.1 timbl 773:
1.2 timbl 774: styles[HTML_H1] = HTStyleNamed(styleSheet, "Heading1");
775: styles[HTML_H2] = HTStyleNamed(styleSheet, "Heading2");
776: styles[HTML_H3] = HTStyleNamed(styleSheet, "Heading3");
777: styles[HTML_H4] = HTStyleNamed(styleSheet, "Heading4");
778: styles[HTML_H5] = HTStyleNamed(styleSheet, "Heading5");
779: styles[HTML_H6] = HTStyleNamed(styleSheet, "Heading6");
780: styles[HTML_H7] = HTStyleNamed(styleSheet, "Heading7");
781:
782: styles[HTML_DL] = HTStyleNamed(styleSheet, "Glossary");
783: styles[HTML_UL] =
784: styles[HTML_OL] = HTStyleNamed(styleSheet, "List");
785: styles[HTML_MENU] = HTStyleNamed(styleSheet, "Menu");
786: styles[HTML_DIR] = HTStyleNamed(styleSheet, "Dir");
1.16 timbl 787: /* styles[HTML_DLC] = HTStyleNamed(styleSheet, "GlossaryCompact"); */
1.2 timbl 788: styles[HTML_ADDRESS]= HTStyleNamed(styleSheet, "Address");
789: styles[HTML_BLOCKQUOTE]= HTStyleNamed(styleSheet, "BlockQuote");
790: styles[HTML_PLAINTEXT] =
791: styles[HTML_XMP] = HTStyleNamed(styleSheet, "Example");
792: styles[HTML_PRE] = HTStyleNamed(styleSheet, "Preformatted");
793: styles[HTML_LISTING] = HTStyleNamed(styleSheet, "Listing");
794: }
795: /* P U B L I C
796: */
797:
798: /* Structured Object Class
799: ** -----------------------
800: */
1.60 frystyk 801: PRIVATE const HTStructuredClass HTMLPresentation = /* As opposed to print etc */
1.2 timbl 802: {
803: "text/html",
1.42 frystyk 804: HTML_flush,
1.2 timbl 805: HTML_free,
1.14 timbl 806: HTML_abort,
1.2 timbl 807: HTML_put_character, HTML_put_string, HTML_write,
808: HTML_start_element, HTML_end_element,
809: HTML_put_entity
810: };
1.1 timbl 811:
1.4 timbl 812:
1.2 timbl 813: /* New Structured Text object
814: ** --------------------------
815: **
1.16 timbl 816: ** The structured stream can generate either presentation,
1.4 timbl 817: ** or plain text, or HTML.
1.1 timbl 818: */
1.53 frystyk 819: PRIVATE HTStructured* HTML_new (HTRequest * request,
820: void * param,
821: HTFormat input_format,
822: HTFormat output_format,
823: HTStream * output_stream)
1.1 timbl 824: {
825:
1.4 timbl 826: HTStructured * me;
827:
1.47 frystyk 828: #if 0
1.16 timbl 829: if (output_format != WWW_PLAINTEXT
830: && output_format != WWW_PRESENT
831: && output_format != HTAtom_for("text/x-c")) {
1.37 frystyk 832: HTStream * intermediate = HTStreamStack(WWW_HTML, output_format,
833: output_stream, request, NO);
1.6 timbl 834: if (intermediate) return HTMLGenerator(intermediate);
1.44 frystyk 835: if (SGML_TRACE)
1.63 frystyk 836: HTTrace("HTML Parser. Can't parse HTML to %s\n",
1.44 frystyk 837: HTAtom_name(output_format));
1.4 timbl 838: exit (-99);
839: }
1.47 frystyk 840: #endif
1.4 timbl 841:
1.58 frystyk 842: if ((me = (HTStructured *) HT_CALLOC(1, sizeof(*me))) == NULL)
843: HT_OUTOFMEM("HTML_new");
1.1 timbl 844:
845: if (!got_styles) get_styles();
846:
1.4 timbl 847: me->isa = &HTMLPresentation;
1.47 frystyk 848: me->dtd = &HTMLP_dtd;
1.54 frystyk 849: me->request = request;
1.48 frystyk 850: me->node_anchor = HTRequest_anchor(request);
1.56 frystyk 851: me->title = HTChunk_new(128);
1.4 timbl 852: me->text = 0;
853: me->style_change = YES; /* Force check leading to text creation */
854: me->new_style = default_style;
855: me->old_style = 0;
856: me->sp = me->stack + MAX_NESTING - 1;
857: me->sp->tag_number = -1; /* INVALID */
858: me->sp->style = default_style; /* INVALID */
1.1 timbl 859:
1.4 timbl 860: me->comment_start = NULL;
861: me->comment_end = NULL;
1.16 timbl 862: me->target = output_stream;
863: if (output_stream) me->targetClass = *output_stream->isa; /* Copy pointers */
1.1 timbl 864:
1.4 timbl 865: return (HTStructured*) me;
1.1 timbl 866: }
867:
868:
1.2 timbl 869: /* HTConverter for HTML to plain text
870: ** ----------------------------------
1.1 timbl 871: **
1.2 timbl 872: ** This will convert from HTML to presentation or plain text.
1.1 timbl 873: */
1.53 frystyk 874: PUBLIC HTStream* HTMLToPlain (
875: HTRequest * request,
876: void * param,
877: HTFormat input_format,
878: HTFormat output_format,
879: HTStream * output_stream)
1.1 timbl 880: {
1.47 frystyk 881: return SGML_new(&HTMLP_dtd, HTML_new(
1.16 timbl 882: request, NULL, input_format, output_format, output_stream));
1.1 timbl 883: }
884:
885:
1.2 timbl 886: /* HTConverter for HTML to C code
887: ** ------------------------------
888: **
1.36 frystyk 889: ** C code is like plain text but all non-preformatted code
1.2 timbl 890: ** is commented out.
891: ** This will convert from HTML to presentation or plain text.
892: */
1.53 frystyk 893: PUBLIC HTStream* HTMLToC (
894: HTRequest * request,
895: void * param,
896: HTFormat input_format,
897: HTFormat output_format,
898: HTStream * output_stream)
1.1 timbl 899: {
1.4 timbl 900:
901: HTStructured * html;
902:
1.36 frystyk 903: (*output_stream->isa->put_string)(output_stream, "/* "); /* Before title */
1.16 timbl 904: html = HTML_new(request, NULL, input_format, output_format, output_stream);
1.45 frystyk 905: html->comment_start = "\n/* ";
1.47 frystyk 906: html->dtd = &HTMLP_dtd;
1.2 timbl 907: html->comment_end = " */\n"; /* Must start in col 1 for cpp */
1.47 frystyk 908: return SGML_new(&HTMLP_dtd, html);
1.1 timbl 909: }
910:
911:
1.2 timbl 912: /* Presenter for HTML
913: ** ------------------
914: **
915: ** This will convert from HTML to presentation or plain text.
916: **
917: ** Override this if you have a windows version
1.1 timbl 918: */
1.2 timbl 919: #ifndef GUI
1.53 frystyk 920: PUBLIC HTStream* HTMLPresent (
921: HTRequest * request,
922: void * param,
923: HTFormat input_format,
924: HTFormat output_format,
925: HTStream * output_stream)
1.1 timbl 926: {
1.47 frystyk 927: return SGML_new(&HTMLP_dtd, HTML_new(
1.16 timbl 928: request, NULL, input_format, output_format, output_stream));
1.1 timbl 929: }
1.2 timbl 930: #endif
1.29 frystyk 931:
Webmaster