Annotation of libwww/Library/src/HTML.c, revision 1.69

1.39      frystyk     1: /*                                                                      HTML.c
                      2: **     STRUCTURED STREAM TO RICH HYPERTEXT CONVERTER
                      3: **
1.43      frystyk     4: **     (c) COPYRIGHT MIT 1995.
1.39      frystyk     5: **     Please first read the full copyright statement in the file COPYRIGH.
1.69    ! frystyk     6: **     @(#) $Id: HTML.c,v 1.68 1998/02/01 19:04:14 frystyk Exp $
1.1       timbl       7: **
1.2       timbl       8: **     This generates of a hypertext object.  It converts from the
                      9: **     structured stream interface fro HTMl events into the style-
1.47      frystyk    10: **     oriented iunterface of the HText interface.  This module is
1.2       timbl      11: **     only used in clients and shouldnot be linked into servers.
1.1       timbl      12: **
1.6       timbl      13: **     Override this module if making a new GUI browser.
1.1       timbl      14: **
1.35      duns       15: ** HISTORY:
                     16: **      8 Jul 94  FM   Insulate free() from _free structure element.
                     17: **
1.1       timbl      18: */
1.16      timbl      19: 
1.41      frystyk    20: /* Library include files */
1.60      frystyk    21: #include "sysdep.h"
1.63      frystyk    22: #include "WWWUtil.h"
                     23: #include "WWWCore.h"
                     24: #include "WWWHTML.h"
1.1       timbl      25: #include "HText.h"
                     26: #include "HTStyle.h"
1.41      frystyk    27: #include "HTML.h"
1.1       timbl      28: 
                     29: extern HTStyleSheet * styleSheet;      /* Application-wide */
                     30: 
                     31: /*     Module-wide style cache
                     32: */
                     33: PRIVATE int            got_styles = 0;
1.16      timbl      34: PRIVATE HTStyle *styles[HTMLP_ELEMENTS];
1.2       timbl      35: PRIVATE HTStyle *default_style;
1.1       timbl      36: 
1.62      frystyk    37: #define TAB    '\0'
1.1       timbl      38: 
                     39: /*             HTML Object
                     40: **             -----------
                     41: */
1.2       timbl      42: #define MAX_NESTING 20         /* Should be checked by parser */
                     43: 
                     44: typedef struct _stack_element {
                     45:         HTStyle *      style;
                     46:        int             tag_number;
                     47: } stack_element;
                     48: 
                     49: struct _HTStructured {
1.60      frystyk    50:     const HTStructuredClass *  isa;
1.54      frystyk    51:     HTRequest *                        request;
1.2       timbl      52:     HTParentAnchor *           node_anchor;
                     53:     HText *                    text;
                     54: 
                     55:     HTStream*                  target;                 /* Output stream */
                     56:     HTStreamClass              targetClass;            /* Output routines */
                     57: 
1.56      frystyk    58:     HTChunk *                  title;          /* Grow by 128 */
1.2       timbl      59:     
                     60:     char *                     comment_start;  /* for literate programming */
                     61:     char *                     comment_end;
1.16      timbl      62:     
1.60      frystyk    63:     const SGML_dtd*            dtd;
1.16      timbl      64:     
1.2       timbl      65:     HTTag *                    current_tag;
                     66:     BOOL                       style_change;
                     67:     HTStyle *                  new_style;
                     68:     HTStyle *                  old_style;
                     69:     BOOL                       in_word;  /* Have just had a non-white char */
1.44      frystyk    70: 
                     71:     stack_element              stack[MAX_NESTING];
                     72:     stack_element              *sp;                  /* Style stack pointer */
                     73:     int                                overflow;  /* Keep track of overflow nesting */
1.1       timbl      74: };
                     75: 
1.2       timbl      76: struct _HTStream {
1.60      frystyk    77:     const HTStreamClass *      isa;
1.2       timbl      78:     /* .... */
                     79: };
1.1       timbl      80: 
                     81: /*             Forward declarations of routines
                     82: */
1.52      frystyk    83: PRIVATE void get_styles (void);
1.1       timbl      84: 
                     85: 
1.52      frystyk    86: PRIVATE void actually_set_style (HTStructured * me);
                     87: PRIVATE void change_paragraph_style (HTStructured * me, HTStyle * style);
1.1       timbl      88: 
                     89: /*     Style buffering avoids dummy paragraph begin/ends.
                     90: */
1.4       timbl      91: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1       timbl      92: 
1.2       timbl      93: /*     Entity values -- for ISO Latin 1 local representation
                     94: **
                     95: **     This MUST match exactly the table referred to in the DTD!
                     96: */
                     97: static char * ISO_Latin1[] = {
                     98:        "\306", /* capital AE diphthong (ligature) */ 
                     99:        "\301", /* capital A, acute accent */ 
                    100:        "\302", /* capital A, circumflex accent */ 
                    101:        "\300", /* capital A, grave accent */ 
                    102:        "\305", /* capital A, ring */ 
                    103:        "\303", /* capital A, tilde */ 
                    104:        "\304", /* capital A, dieresis or umlaut mark */ 
                    105:        "\307", /* capital C, cedilla */ 
                    106:        "\320", /* capital Eth, Icelandic */ 
                    107:        "\311", /* capital E, acute accent */ 
                    108:        "\312", /* capital E, circumflex accent */ 
                    109:        "\310", /* capital E, grave accent */ 
                    110:        "\313", /* capital E, dieresis or umlaut mark */ 
                    111:        "\315", /* capital I, acute accent */ 
                    112:        "\316", /* capital I, circumflex accent */ 
                    113:        "\314", /* capital I, grave accent */ 
                    114:        "\317", /* capital I, dieresis or umlaut mark */ 
                    115:        "\321", /* capital N, tilde */ 
                    116:        "\323", /* capital O, acute accent */ 
                    117:        "\324", /* capital O, circumflex accent */ 
                    118:        "\322", /* capital O, grave accent */ 
                    119:        "\330", /* capital O, slash */ 
                    120:        "\325", /* capital O, tilde */ 
                    121:        "\326", /* capital O, dieresis or umlaut mark */ 
                    122:        "\336", /* capital THORN, Icelandic */ 
                    123:        "\332", /* capital U, acute accent */ 
                    124:        "\333", /* capital U, circumflex accent */ 
                    125:        "\331", /* capital U, grave accent */ 
                    126:        "\334", /* capital U, dieresis or umlaut mark */ 
                    127:        "\335", /* capital Y, acute accent */ 
                    128:        "\341", /* small a, acute accent */ 
                    129:        "\342", /* small a, circumflex accent */ 
                    130:        "\346", /* small ae diphthong (ligature) */ 
                    131:        "\340", /* small a, grave accent */ 
                    132:        "\046", /* ampersand */ 
                    133:        "\345", /* small a, ring */ 
                    134:        "\343", /* small a, tilde */ 
                    135:        "\344", /* small a, dieresis or umlaut mark */ 
                    136:        "\347", /* small c, cedilla */ 
                    137:        "\351", /* small e, acute accent */ 
                    138:        "\352", /* small e, circumflex accent */ 
                    139:        "\350", /* small e, grave accent */ 
                    140:        "\360", /* small eth, Icelandic */ 
                    141:        "\353", /* small e, dieresis or umlaut mark */ 
                    142:        "\076", /* greater than */ 
                    143:        "\355", /* small i, acute accent */ 
                    144:        "\356", /* small i, circumflex accent */ 
                    145:        "\354", /* small i, grave accent */ 
                    146:        "\357", /* small i, dieresis or umlaut mark */ 
                    147:        "\074", /* less than */ 
1.62      frystyk   148:        "\040", /* non-breaking space */
1.2       timbl     149:        "\361", /* small n, tilde */ 
                    150:        "\363", /* small o, acute accent */ 
                    151:        "\364", /* small o, circumflex accent */ 
                    152:        "\362", /* small o, grave accent */ 
                    153:        "\370", /* small o, slash */ 
                    154:        "\365", /* small o, tilde */ 
                    155:        "\366", /* small o, dieresis or umlaut mark */ 
1.36      frystyk   156:         "\042", /* double quote sign - June 94 */
1.2       timbl     157:        "\337", /* small sharp s, German (sz ligature) */ 
                    158:        "\376", /* small thorn, Icelandic */ 
                    159:        "\372", /* small u, acute accent */ 
                    160:        "\373", /* small u, circumflex accent */ 
                    161:        "\371", /* small u, grave accent */ 
                    162:        "\374", /* small u, dieresis or umlaut mark */ 
                    163:        "\375", /* small y, acute accent */ 
                    164:        "\377", /* small y, dieresis or umlaut mark */ 
1.1       timbl     165: };
                    166: 
1.2       timbl     167: 
                    168: /*     Entity values -- for NeXT local representation
                    169: **
                    170: **     This MUST match exactly the table referred to in the DTD!
                    171: **
                    172: */
                    173: static char * NeXTCharacters[] = {
                    174:        "\341", /* capital AE diphthong (ligature)      */ 
                    175:        "\202", /* capital A, acute accent              */ 
                    176:        "\203", /* capital A, circumflex accent         */ 
                    177:        "\201", /* capital A, grave accent              */ 
                    178:        "\206", /* capital A, ring                      */ 
                    179:        "\204", /* capital A, tilde                     */ 
                    180:        "\205", /* capital A, dieresis or umlaut mark   */ 
                    181:        "\207", /* capital C, cedilla                   */ 
                    182:        "\220", /* capital Eth, Icelandic               */ 
                    183:        "\211", /* capital E, acute accent                              */ 
                    184:        "\212", /* capital E, circumflex accent                         */ 
                    185:        "\210", /* capital E, grave accent                              */ 
                    186:        "\213", /* capital E, dieresis or umlaut mark                   */ 
                    187:        "\215", /* capital I, acute accent                              */ 
                    188:        "\216", /* capital I, circumflex accent         these are       */ 
                    189:        "\214", /* capital I, grave accent              ISO -100 hex    */ 
                    190:        "\217", /* capital I, dieresis or umlaut mark                   */ 
                    191:        "\221", /* capital N, tilde                                     */ 
                    192:        "\223", /* capital O, acute accent                              */ 
                    193:        "\224", /* capital O, circumflex accent                         */ 
                    194:        "\222", /* capital O, grave accent                              */ 
                    195:        "\351", /* capital O, slash             'cept this */ 
                    196:        "\225", /* capital O, tilde                                     */ 
                    197:        "\226", /* capital O, dieresis or umlaut mark                   */ 
                    198:        "\234", /* capital THORN, Icelandic */ 
                    199:        "\230", /* capital U, acute accent */ 
                    200:        "\231", /* capital U, circumflex accent */ 
                    201:        "\227", /* capital U, grave accent */ 
                    202:        "\232", /* capital U, dieresis or umlaut mark */ 
                    203:        "\233", /* capital Y, acute accent */ 
                    204:        "\326", /* small a, acute accent */ 
                    205:        "\327", /* small a, circumflex accent */ 
                    206:        "\361", /* small ae diphthong (ligature) */ 
                    207:        "\325", /* small a, grave accent */ 
                    208:        "\046", /* ampersand */ 
                    209:        "\332", /* small a, ring */ 
                    210:        "\330", /* small a, tilde */ 
                    211:        "\331", /* small a, dieresis or umlaut mark */ 
                    212:        "\333", /* small c, cedilla */ 
                    213:        "\335", /* small e, acute accent */ 
                    214:        "\336", /* small e, circumflex accent */ 
                    215:        "\334", /* small e, grave accent */ 
                    216:        "\346", /* small eth, Icelandic         */ 
                    217:        "\337", /* small e, dieresis or umlaut mark */ 
                    218:        "\076", /* greater than */ 
                    219:        "\342", /* small i, acute accent */ 
                    220:        "\344", /* small i, circumflex accent */ 
                    221:        "\340", /* small i, grave accent */ 
                    222:        "\345", /* small i, dieresis or umlaut mark */ 
                    223:        "\074", /* less than */ 
1.62      frystyk   224:        "\040", /* non-breaking space */
1.2       timbl     225:        "\347", /* small n, tilde */ 
                    226:        "\355", /* small o, acute accent */ 
                    227:        "\356", /* small o, circumflex accent */ 
                    228:        "\354", /* small o, grave accent */ 
                    229:        "\371", /* small o, slash */ 
                    230:        "\357", /* small o, tilde */ 
                    231:        "\360", /* small o, dieresis or umlaut mark */ 
1.36      frystyk   232:         "\042", /* double quote sign - June 94 */
1.2       timbl     233:        "\373", /* small sharp s, German (sz ligature) */ 
                    234:        "\374", /* small thorn, Icelandic */ 
                    235:        "\363", /* small u, acute accent */ 
                    236:        "\364", /* small u, circumflex accent */ 
                    237:        "\362", /* small u, grave accent */ 
                    238:        "\366", /* small u, dieresis or umlaut mark */ 
                    239:        "\367", /* small y, acute accent */ 
                    240:        "\375", /* small y, dieresis or umlaut mark */ 
1.1       timbl     241: };
                    242: 
1.2       timbl     243: /*     Entity values -- for IBM/PC Code Page 850 (International)
                    244: **
                    245: **     This MUST match exactly the table referred to in the DTD!
                    246: **
                    247: */
                    248: /* @@@@@@@@@@@@@@@@@ TBD */
                    249: 
                    250: 
                    251: 
                    252: /*             Set character set
                    253: **             ----------------
                    254: */
                    255: 
                    256: PRIVATE char** p_entity_values = ISO_Latin1;   /* Pointer to translation */
1.1       timbl     257: 
1.53      frystyk   258: PUBLIC void HTMLUseCharacterSet (HTMLCharacterSet i)
1.2       timbl     259: {
                    260:     p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
                    261:                                             : ISO_Latin1;
                    262: }
1.1       timbl     263: 
                    264: 
                    265: /*             Flattening the style structure
                    266: **             ------------------------------
                    267: **
                    268: On the NeXT, and on any read-only browser, it is simpler for the text to have
                    269: a sequence of styles, rather than a nested tree of styles. In this
                    270: case we have to flatten the structure as it arrives from SGML tags into
                    271: a sequence of styles.
                    272: */
                    273: 
                    274: /*             If style really needs to be set, call this
                    275: */
1.53      frystyk   276: PRIVATE void actually_set_style (HTStructured * me)
1.1       timbl     277: {
1.4       timbl     278:     if (!me->text) {                   /* First time through */
1.54      frystyk   279:            me->text = HText_new2(me->request, me->node_anchor, me->target);
1.4       timbl     280:            HText_beginAppend(me->text);
                    281:            HText_setStyle(me->text, me->new_style);
                    282:            me->in_word = NO;
1.1       timbl     283:     } else {
1.4       timbl     284:            HText_setStyle(me->text, me->new_style);
1.1       timbl     285:     }
1.4       timbl     286:     me->old_style = me->new_style;
                    287:     me->style_change = NO;
1.1       timbl     288: }
                    289: 
                    290: /*      If you THINK you need to change style, call this
                    291: */
                    292: 
1.53      frystyk   293: PRIVATE void change_paragraph_style (HTStructured * me, HTStyle *style)
1.1       timbl     294: {
1.4       timbl     295:     if (me->new_style!=style) {
                    296:        me->style_change = YES;
                    297:        me->new_style = style;
1.1       timbl     298:     }
1.11      timbl     299:     me->in_word = NO;
1.1       timbl     300: }
                    301: 
1.2       timbl     302: /*_________________________________________________________________________
                    303: **
                    304: **                     A C T I O N     R O U T I N E S
                    305: */
                    306: 
1.64      frystyk   307: PRIVATE int HTML_put_character (HTStructured * me, char c)
1.1       timbl     308: {
1.4       timbl     309:     switch (me->sp[0].tag_number) {
1.2       timbl     310:     case HTML_COMMENT:
                    311:        break;                                  /* Do Nothing */
                    312:        
                    313:     case HTML_TITLE:   
1.56      frystyk   314:        HTChunk_putb(me->title, &c, 1);
1.2       timbl     315:        break;
                    316: 
                    317:        
                    318:     case HTML_LISTING:                         /* Litteral text */
                    319:     case HTML_XMP:
                    320:     case HTML_PLAINTEXT:
                    321:     case HTML_PRE:
                    322: /*     We guarrantee that the style is up-to-date in begin_litteral
                    323: */
1.4       timbl     324:        HText_appendCharacter(me->text, c);
1.2       timbl     325:        break;
                    326:        
                    327:     default:                                   /* Free format text */
1.4       timbl     328:        if (me->style_change) {
1.42      frystyk   329:            if ((c=='\n') || (c==' ')) return HT_OK;    /* Ignore it */
1.2       timbl     330:            UPDATE_STYLE;
                    331:        }
1.62      frystyk   332:        if (c == TAB)
                    333:            HText_appendCharacter(me->text, '\t');
1.68      frystyk   334:        else if (isspace((int) c)) {
1.4       timbl     335:            if (me->in_word) {
                    336:                HText_appendCharacter(me->text, ' ');
                    337:                me->in_word = NO;
1.2       timbl     338:            }
                    339:        } else {
1.4       timbl     340:            HText_appendCharacter(me->text, c);
                    341:            me->in_word = YES;
1.2       timbl     342:        }
                    343:     } /* end switch */
1.42      frystyk   344:     return HT_OK;
1.1       timbl     345: }
                    346: 
1.64      frystyk   347: 
                    348: PRIVATE int HTML_write (HTStructured * me, const char * b, int l)
1.1       timbl     349: {
1.64      frystyk   350:     while (l-- > 0) HTML_put_character(me, *b++);
                    351:     return HT_OK;
1.1       timbl     352: }
                    353: 
1.64      frystyk   354: PRIVATE int HTML_put_string (HTStructured * me, const char* s)
1.1       timbl     355: {
1.64      frystyk   356:     while (*s) HTML_put_character(me, *s++);
                    357:     return HT_OK;
1.1       timbl     358: }
1.2       timbl     359: 
                    360: /*     Start Element
                    361: **     -------------
                    362: */
1.53      frystyk   363: PRIVATE void HTML_start_element (
                    364:        HTStructured *  me,
                    365:        int                     element_number,
1.60      frystyk   366:        const BOOL*             present,
                    367:        const char **           value)
1.2       timbl     368: {
                    369:     switch (element_number) {
                    370:     case HTML_A:
1.69    ! frystyk   371:     {
        !           372:        HTChildAnchor * source = HTAnchor_findChildAndLink(
        !           373:            me->node_anchor,                                    /* parent */
        !           374:            present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL,   /* Tag */
        !           375:            present[HTML_A_HREF] ? value[HTML_A_HREF] : NULL,   /* Addresss */
        !           376:            present[HTML_A_REL] && value[HTML_A_REL] ? 
        !           377:            (HTLinkType) HTAtom_caseFor(value[HTML_A_REL]) : NULL);
1.2       timbl     378:            
1.69    ! frystyk   379:        if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
        !           380:            HTLink * link = HTAnchor_mainLink((HTAnchor *) source);
        !           381:            HTParentAnchor * dest = HTAnchor_parent(HTLink_destination(link));
        !           382:            if (!HTAnchor_title(dest)) HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
1.2       timbl     383:        }
1.69    ! frystyk   384:        UPDATE_STYLE;
        !           385:        HText_beginAnchor(me->text, source);
        !           386:     }
        !           387:     break;
1.2       timbl     388:        
1.63      frystyk   389:     case HTML_LINK:
1.69    ! frystyk   390:     {
        !           391:        if (present[HTML_LINK_HREF] && value[HTML_LINK_HREF]) {
        !           392:            char * relative_to = HTAnchor_expandedAddress((HTAnchor *) me->node_anchor);
        !           393:            char * dest_addr = HTParse(value[HTML_LINK_HREF], relative_to, PARSE_ALL);
        !           394:            HTParentAnchor * dest = HTAnchor_parent(HTAnchor_findAddress(dest_addr));
        !           395: 
        !           396:            /* If forward reference */
        !           397:            if ((present[HTML_LINK_REL] && value[HTML_LINK_REL])) {
        !           398:                char * strval = NULL;
        !           399:                char * ptr = NULL;
        !           400:                char * relation = NULL;
        !           401:                StrAllocCopy(strval, value[HTML_LINK_REL]);
        !           402:                ptr = strval;
        !           403:                while ((relation = HTNextLWSToken(&ptr)) != NULL) {
        !           404:                    HTLink_add((HTAnchor *) me->node_anchor, (HTAnchor *) dest,
        !           405:                               (HTLinkType) HTAtom_caseFor(relation),
        !           406:                               METHOD_INVALID);
        !           407:                }
        !           408:                HT_FREE(strval);
        !           409:            }
        !           410: 
        !           411:            /* If reverse reference */
        !           412:            if ((present[HTML_LINK_REV] && value[HTML_LINK_REV])) {
        !           413:                char * strval = NULL;
        !           414:                char * ptr = NULL;
        !           415:                char * relation = NULL;
        !           416:                StrAllocCopy(strval, value[HTML_LINK_REV]);
        !           417:                ptr = strval;
        !           418:                while ((relation = HTNextLWSToken(&ptr)) != NULL) {
        !           419:                    HTLink_add((HTAnchor *) dest, (HTAnchor *) me->node_anchor,
        !           420:                               (HTLinkType) HTAtom_caseFor(relation),
        !           421:                               METHOD_INVALID);
        !           422:                }
        !           423:                HT_FREE(strval);
        !           424:            }
1.63      frystyk   425: 
1.69    ! frystyk   426:            /* If we got any type information as well */
        !           427:            if (present[HTML_LINK_TYPE] && value[HTML_LINK_TYPE]) {
        !           428:                if (HTAnchor_format(dest) == WWW_UNKNOWN)
        !           429:                    HTAnchor_setFormat(dest,
        !           430:                                       (HTFormat) HTAtom_caseFor(value[HTML_LINK_TYPE]));
        !           431:            }
1.63      frystyk   432: 
1.69    ! frystyk   433:            HT_FREE(dest_addr);
        !           434:            HT_FREE(relative_to);
        !           435:        }
        !           436:     }
        !           437:     break;
1.63      frystyk   438: 
1.2       timbl     439:     case HTML_TITLE:
1.56      frystyk   440:         HTChunk_clear(me->title);
1.2       timbl     441:        break;
                    442:        
                    443:     case HTML_NEXTID:
                    444:        /* if (present[NEXTID_N] && value[NEXTID_N])
1.4       timbl     445:                HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2       timbl     446:        break;
                    447:        
                    448:     case HTML_ISINDEX:
1.4       timbl     449:        HTAnchor_setIndex(me->node_anchor);
1.2       timbl     450:        break;
                    451:        
1.15      timbl     452:     case HTML_BR: 
                    453:        UPDATE_STYLE;
                    454:        HText_appendCharacter(me->text, '\n');
                    455:        me->in_word = NO;
                    456:        break;
                    457:        
                    458:     case HTML_HR: 
                    459:        UPDATE_STYLE;
                    460:        HText_appendCharacter(me->text, '\n');
1.16      timbl     461:        HText_appendText(me->text, "___________________________________");
1.15      timbl     462:        HText_appendCharacter(me->text, '\n');
                    463:        me->in_word = NO;
                    464:        break;
                    465:        
1.2       timbl     466:     case HTML_P:
                    467:        UPDATE_STYLE;
1.4       timbl     468:        HText_appendParagraph(me->text);
                    469:        me->in_word = NO;
1.2       timbl     470:        break;
                    471: 
                    472:     case HTML_DL:
1.11      timbl     473:         change_paragraph_style(me, present && present[DL_COMPACT]
1.16      timbl     474:                ? styles[HTML_DL]
1.2       timbl     475:                : styles[HTML_DL]);
                    476:        break;
                    477:        
                    478:     case HTML_DT:
1.4       timbl     479:         if (!me->style_change) {
                    480:            HText_appendParagraph(me->text);
                    481:            me->in_word = NO;
1.2       timbl     482:        }
                    483:        break;
                    484:        
                    485:     case HTML_DD:
                    486:         UPDATE_STYLE;
1.62      frystyk   487:        HTML_put_character(me, TAB);    /* Just tab out one stop */
1.4       timbl     488:        me->in_word = NO;
                    489:        break;
1.2       timbl     490: 
                    491:     case HTML_UL:
                    492:     case HTML_OL:
                    493:     case HTML_MENU:
                    494:     case HTML_DIR:
1.11      timbl     495:        change_paragraph_style(me, styles[element_number]);
1.2       timbl     496:        break;
                    497:        
                    498:     case HTML_LI:
                    499:         UPDATE_STYLE;
1.7       timbl     500:        if (me->sp[0].tag_number != HTML_DIR)
1.4       timbl     501:            HText_appendParagraph(me->text);
1.2       timbl     502:        else
1.62      frystyk   503:            HText_appendCharacter(me->text, TAB);
1.4       timbl     504:        me->in_word = NO;
1.2       timbl     505:        break;
                    506:        
                    507:     case HTML_LISTING:                         /* Litteral text */
                    508:     case HTML_XMP:
                    509:     case HTML_PLAINTEXT:
                    510:     case HTML_PRE:
1.11      timbl     511:        change_paragraph_style(me, styles[element_number]);
1.2       timbl     512:        UPDATE_STYLE;
1.4       timbl     513:        if (me->comment_end)
                    514:            HText_appendText(me->text, me->comment_end);
1.2       timbl     515:        break;
1.11      timbl     516: 
1.23      frystyk   517:     case HTML_IMG:                     /* Images */
                    518:        {
                    519:            HTChildAnchor *source;
                    520:            char *src = NULL;
1.49      frystyk   521:            if (present[HTML_IMG_SRC])
1.23      frystyk   522:                StrAllocCopy(src, value[HTML_IMG_SRC]);
                    523:            source = HTAnchor_findChildAndLink(
                    524:                                               me->node_anchor,    /* parent */
                    525:                                               0,                     /* Tag */
                    526:                                               src ? src : 0,    /* Addresss */
                    527:                                               0);
                    528:            UPDATE_STYLE;
                    529:            HText_appendImage(me->text, source,
1.24      frystyk   530:                      present[HTML_IMG_ALT] ? value[HTML_IMG_ALT] : NULL,
                    531:                      present[HTML_IMG_ALIGN] ? value[HTML_IMG_ALIGN] : NULL,
                    532:                      present[HTML_IMG_ISMAP] ? YES : NO);
1.58      frystyk   533:            HT_FREE(src);
1.24      frystyk   534:        }       
                    535:        break;
                    536: 
1.63      frystyk   537:     case HTML_BASE:                    /* Base header */
                    538:       if (present[HTML_BASE_HREF]) {
                    539:          char * base = (char *) value[HTML_BASE_HREF];
                    540:          if (base) {
                    541:              HTAnchor_setBase(me->node_anchor, base);
                    542:              if (SGML_TRACE) HTTrace("HTML Parser. New base `%s\'\n", base);
                    543:          } else {
                    544:              if (SGML_TRACE) HTTrace("HTML Parser. No base found\n");
                    545:          }
                    546:       }
                    547:       break;
                    548: 
1.24      frystyk   549:     case HTML_HTML:                    /* Ignore these altogether */
                    550:     case HTML_HEAD:
                    551:     case HTML_BODY:
1.62      frystyk   552:        break;
1.24      frystyk   553:     
1.10      timbl     554:     case HTML_TT:                      /* Physical character highlighting */
                    555:     case HTML_B:                       /* Currently ignored */
                    556:     case HTML_I:
                    557:     case HTML_U:
1.62      frystyk   558:        UPDATE_STYLE;
1.65      frystyk   559: #if 0
1.62      frystyk   560:        HText_appendCharacter(me->text, '_');
1.65      frystyk   561: #endif
1.62      frystyk   562:        me->in_word = NO;
                    563:        break;
1.10      timbl     564:     
                    565:     case HTML_EM:                      /* Logical character highlighting */
                    566:     case HTML_STRONG:                  /* Currently ignored */
                    567:     case HTML_CODE:
                    568:     case HTML_SAMP:
                    569:     case HTML_KBD:
                    570:     case HTML_VAR:
                    571:     case HTML_DFN:
                    572:     case HTML_CITE:
                    573:        break;
                    574:        
1.11      timbl     575:     case HTML_H1:                      /* paragraph styles */
                    576:     case HTML_H2:
                    577:     case HTML_H3:
                    578:     case HTML_H4:
                    579:     case HTML_H5:
                    580:     case HTML_H6:
                    581:     case HTML_H7:
                    582:     case HTML_ADDRESS:
                    583:     case HTML_BLOCKQUOTE:
                    584:        change_paragraph_style(me, styles[element_number]);     /* May be postponed */
1.2       timbl     585:        break;
                    586: 
                    587:     } /* end switch */
                    588: 
1.16      timbl     589:     if (me->dtd->tags[element_number].contents!= SGML_EMPTY) {
1.13      timbl     590:         if (me->sp == me->stack) {
1.44      frystyk   591:            if (SGML_TRACE)
1.63      frystyk   592:                HTTrace("HTML Parser. Maximum nesting of %d exceded!\n",
1.44      frystyk   593:                        MAX_NESTING); 
                    594:            me->overflow++;
1.12      timbl     595:            return;
                    596:        }
1.4       timbl     597:        --(me->sp);
                    598:        me->sp[0].style = me->new_style;        /* Stack new style */
                    599:        me->sp[0].tag_number = element_number;
1.10      timbl     600:     }  
1.1       timbl     601: }
1.10      timbl     602: 
1.2       timbl     603: 
1.1       timbl     604: /*             End Element
1.2       timbl     605: **             -----------
1.1       timbl     606: **
1.2       timbl     607: */
                    608: /*     When we end an element, the style must be returned to that
1.1       timbl     609: **     in effect before that element.  Note that anchors (etc?)
                    610: **     don't have an associated style, so that we must scan down the
                    611: **     stack for an element with a defined style. (In fact, the styles
                    612: **     should be linked to the whole stack not just the top one.)
                    613: **     TBL 921119
1.6       timbl     614: **
                    615: **     We don't turn on "CAREFUL" check because the parser produces
                    616: **     (internal code errors apart) good nesting. The parser checks
                    617: **     incoming code errors, not this module.
1.1       timbl     618: */
1.53      frystyk   619: PRIVATE void HTML_end_element (HTStructured * me, int element_number)
1.1       timbl     620: {
1.2       timbl     621: #ifdef CAREFUL                 /* parser assumed to produce good nesting */
1.4       timbl     622:     if (element_number != me->sp[0].tag_number) {
1.59      eric      623:         HTTrace("HTMLText: end of element %s when expecting end of %s\n",
1.16      timbl     624:                me->dtd->tags[element_number].name,
                    625:                me->dtd->tags[me->sp->tag_number].name);
1.6       timbl     626:                /* panic */
1.1       timbl     627:     }
1.2       timbl     628: #endif
1.44      frystyk   629: 
                    630:     /* HFN, If overflow of nestings, we need to get back to reality */
                    631:     if (me->overflow > 0) {
                    632:        me->overflow--;
                    633:        return;
                    634:     }
                    635: 
1.4       timbl     636:     me->sp++;                          /* Pop state off stack */
1.67      frystyk   637:     if (me->sp > me->stack + MAX_NESTING - 1) {
                    638:        if (SGML_TRACE) HTTrace("HTML Parser. Bottom of style stack reached\n");
                    639:        me->sp = me->stack + MAX_NESTING - 1;
                    640:     }
1.44      frystyk   641: 
1.2       timbl     642:     switch(element_number) {
                    643: 
                    644:     case HTML_A:
                    645:        UPDATE_STYLE;
1.4       timbl     646:        HText_endAnchor(me->text);
1.2       timbl     647:        break;
                    648: 
                    649:     case HTML_TITLE:
1.56      frystyk   650:        HTAnchor_setTitle(me->node_anchor, HTChunk_data(me->title));
1.2       timbl     651:        break;
                    652:        
1.62      frystyk   653:     case HTML_TT:                      /* Physical character highlighting */
                    654:     case HTML_B:                       /* Currently ignored */
                    655:     case HTML_I:
                    656:     case HTML_U:
                    657:        UPDATE_STYLE;
1.66      frystyk   658: #if 0
1.62      frystyk   659:        HText_appendCharacter(me->text, '_');
1.66      frystyk   660: #endif
1.62      frystyk   661:        break;
1.66      frystyk   662: 
                    663:     case HTML_EM:                      /* Logical character highlighting */
                    664:     case HTML_STRONG:                  /* Currently ignored */
                    665:     case HTML_CODE:
                    666:     case HTML_SAMP:
                    667:     case HTML_KBD:
                    668:     case HTML_VAR:
                    669:     case HTML_DFN:
                    670:     case HTML_CITE:
                    671:        break;
1.62      frystyk   672:     
1.2       timbl     673:     case HTML_LISTING:                         /* Litteral text */
                    674:     case HTML_XMP:
                    675:     case HTML_PLAINTEXT:
                    676:     case HTML_PRE:
1.4       timbl     677:        if (me->comment_start)
                    678:            HText_appendText(me->text, me->comment_start);
1.2       timbl     679:        /* Fall through */
                    680:        
                    681:     default:
1.44      frystyk   682: 
                    683:        /* Often won't really change */
                    684:        change_paragraph_style(me, me->sp->style);
1.2       timbl     685:        break;
                    686:        
                    687:     } /* switch */
1.1       timbl     688: }
                    689: 
1.2       timbl     690: 
                    691: /*             Expanding entities
                    692: **             ------------------
                    693: */
                    694: /*     (In fact, they all shrink!)
1.1       timbl     695: */
1.2       timbl     696: 
1.53      frystyk   697: PRIVATE void HTML_put_entity (HTStructured * me, int entity_number)
1.1       timbl     698: {
1.4       timbl     699:     HTML_put_string(me, ISO_Latin1[entity_number]);    /* @@ Other representations */
1.1       timbl     700: }
1.2       timbl     701: 
1.42      frystyk   702: /*     Flush an HTML object
                    703: **     --------------------
                    704: */
1.53      frystyk   705: PUBLIC int HTML_flush (HTStructured * me)
1.42      frystyk   706: {
                    707:     UPDATE_STYLE;                           /* Creates empty document here! */
1.57      frystyk   708:     if (me->comment_end) HTML_put_string(me,me->comment_end);
                    709:     return me->target ? (*me->targetClass.flush)(me->target) : HT_OK;
1.42      frystyk   710: }
1.2       timbl     711: 
                    712: /*     Free an HTML object
                    713: **     -------------------
                    714: **
1.4       timbl     715: ** If the document is empty, the text object will not yet exist.
                    716:    So we could in fact abandon creating the document and return
                    717:    an error code.  In fact an empty document is an important type
                    718:    of document, so we don't.
                    719: **
                    720: **     If non-interactive, everything is freed off.   No: crashes -listrefs
1.2       timbl     721: **     Otherwise, the interactive object is left.      
                    722: */
1.53      frystyk   723: PUBLIC int HTML_free (HTStructured * me)
1.1       timbl     724: {
1.4       timbl     725:     UPDATE_STYLE;              /* Creates empty document here! */
                    726:     if (me->comment_end)
                    727:                HTML_put_string(me,me->comment_end);
                    728:     HText_endAppend(me->text);
                    729: 
                    730:     if (me->target) {
1.35      duns      731:         (*me->targetClass._free)(me->target);
1.2       timbl     732:     }
1.56      frystyk   733:     HTChunk_delete(me->title);
1.58      frystyk   734:     HT_FREE(me);
1.42      frystyk   735:     return HT_OK;
1.1       timbl     736: }
                    737: 
                    738: 
1.53      frystyk   739: PRIVATE int HTML_abort (HTStructured * me, HTList * e)
1.1       timbl     740: 
1.14      timbl     741: {
                    742:     if (me->target) {
                    743:         (*me->targetClass.abort)(me->target, e);
                    744:     }
1.56      frystyk   745:     HTChunk_delete(me->title);
1.58      frystyk   746:     HT_FREE(me);
1.42      frystyk   747:     return HT_ERROR;
1.1       timbl     748: }
                    749: 
1.2       timbl     750: 
                    751: /*     Get Styles from style sheet
                    752: **     ---------------------------
                    753: */
1.53      frystyk   754: PRIVATE void get_styles (void)
1.1       timbl     755: {
1.2       timbl     756:     got_styles = YES;
                    757:     
                    758:     default_style =            HTStyleNamed(styleSheet, "Normal");
1.1       timbl     759: 
1.2       timbl     760:     styles[HTML_H1] =          HTStyleNamed(styleSheet, "Heading1");
                    761:     styles[HTML_H2] =          HTStyleNamed(styleSheet, "Heading2");
                    762:     styles[HTML_H3] =          HTStyleNamed(styleSheet, "Heading3");
                    763:     styles[HTML_H4] =          HTStyleNamed(styleSheet, "Heading4");
                    764:     styles[HTML_H5] =          HTStyleNamed(styleSheet, "Heading5");
                    765:     styles[HTML_H6] =          HTStyleNamed(styleSheet, "Heading6");
                    766:     styles[HTML_H7] =          HTStyleNamed(styleSheet, "Heading7");
                    767: 
                    768:     styles[HTML_DL] =          HTStyleNamed(styleSheet, "Glossary");
                    769:     styles[HTML_UL] =
                    770:     styles[HTML_OL] =          HTStyleNamed(styleSheet, "List");
                    771:     styles[HTML_MENU] =                HTStyleNamed(styleSheet, "Menu");
                    772:     styles[HTML_DIR] =         HTStyleNamed(styleSheet, "Dir");    
1.16      timbl     773: /*  styles[HTML_DLC] =         HTStyleNamed(styleSheet, "GlossaryCompact"); */
1.2       timbl     774:     styles[HTML_ADDRESS]=      HTStyleNamed(styleSheet, "Address");
                    775:     styles[HTML_BLOCKQUOTE]=   HTStyleNamed(styleSheet, "BlockQuote");
                    776:     styles[HTML_PLAINTEXT] =
                    777:     styles[HTML_XMP] =         HTStyleNamed(styleSheet, "Example");
                    778:     styles[HTML_PRE] =         HTStyleNamed(styleSheet, "Preformatted");
                    779:     styles[HTML_LISTING] =     HTStyleNamed(styleSheet, "Listing");
                    780: }
                    781: /*                             P U B L I C
                    782: */
                    783: 
                    784: /*     Structured Object Class
                    785: **     -----------------------
                    786: */
1.60      frystyk   787: PRIVATE const HTStructuredClass HTMLPresentation = /* As opposed to print etc */
1.2       timbl     788: {              
                    789:        "text/html",
1.42      frystyk   790:        HTML_flush,
1.2       timbl     791:        HTML_free,
1.14      timbl     792:        HTML_abort,
1.2       timbl     793:        HTML_put_character,     HTML_put_string,  HTML_write,
                    794:        HTML_start_element,     HTML_end_element,
                    795:        HTML_put_entity
                    796: }; 
1.1       timbl     797: 
1.4       timbl     798: 
1.2       timbl     799: /*             New Structured Text object
                    800: **             --------------------------
                    801: **
1.16      timbl     802: **     The structured stream can generate either presentation,
1.4       timbl     803: **     or plain text, or HTML.
1.1       timbl     804: */
1.53      frystyk   805: PRIVATE HTStructured* HTML_new (HTRequest *    request,
                    806:                                     void *             param,
                    807:                                     HTFormat           input_format,
                    808:                                     HTFormat           output_format,
                    809:                                     HTStream * output_stream)
1.1       timbl     810: {
                    811: 
1.4       timbl     812:     HTStructured * me;
                    813:     
1.47      frystyk   814: #if 0
1.16      timbl     815:     if (output_format != WWW_PLAINTEXT
                    816:        && output_format != WWW_PRESENT
                    817:        && output_format != HTAtom_for("text/x-c")) {
1.37      frystyk   818:         HTStream * intermediate = HTStreamStack(WWW_HTML, output_format,
                    819:                                                output_stream, request, NO);
1.6       timbl     820:        if (intermediate) return HTMLGenerator(intermediate);
1.44      frystyk   821:        if (SGML_TRACE)
1.63      frystyk   822:            HTTrace("HTML Parser. Can't parse HTML to %s\n",
1.44      frystyk   823:                    HTAtom_name(output_format));
1.4       timbl     824:        exit (-99);
                    825:     }
1.47      frystyk   826: #endif
1.4       timbl     827: 
1.58      frystyk   828:     if ((me = (HTStructured *) HT_CALLOC(1, sizeof(*me))) == NULL)
                    829:         HT_OUTOFMEM("HTML_new");
1.1       timbl     830: 
                    831:     if (!got_styles) get_styles();
                    832: 
1.4       timbl     833:     me->isa = &HTMLPresentation;
1.47      frystyk   834:     me->dtd = &HTMLP_dtd;
1.54      frystyk   835:     me->request = request;
1.48      frystyk   836:     me->node_anchor =  HTRequest_anchor(request);
1.56      frystyk   837:     me->title = HTChunk_new(128);
1.4       timbl     838:     me->text = 0;
                    839:     me->style_change = YES; /* Force check leading to text creation */
                    840:     me->new_style = default_style;
                    841:     me->old_style = 0;
                    842:     me->sp = me->stack + MAX_NESTING - 1;
                    843:     me->sp->tag_number = -1;                           /* INVALID */
                    844:     me->sp->style = default_style;                     /* INVALID */
1.1       timbl     845:     
1.4       timbl     846:     me->comment_start = NULL;
                    847:     me->comment_end = NULL;
1.16      timbl     848:     me->target = output_stream;
                    849:     if (output_stream) me->targetClass = *output_stream->isa;  /* Copy pointers */
1.1       timbl     850:     
1.4       timbl     851:     return (HTStructured*) me;
1.1       timbl     852: }
                    853: 
                    854: 
1.2       timbl     855: /*     HTConverter for HTML to plain text
                    856: **     ----------------------------------
1.1       timbl     857: **
1.2       timbl     858: **     This will convert from HTML to presentation or plain text.
1.1       timbl     859: */
1.53      frystyk   860: PUBLIC HTStream* HTMLToPlain (
                    861:        HTRequest *             request,
                    862:        void *                  param,
                    863:        HTFormat                input_format,
                    864:        HTFormat                output_format,
                    865:        HTStream *              output_stream)
1.1       timbl     866: {
1.47      frystyk   867:     return SGML_new(&HTMLP_dtd, HTML_new(
1.16      timbl     868:        request, NULL, input_format, output_format, output_stream));
1.1       timbl     869: }
                    870: 
                    871: 
1.2       timbl     872: /*     HTConverter for HTML to C code
                    873: **     ------------------------------
                    874: **
1.36      frystyk   875: **     C code is like plain text but all non-preformatted code
1.2       timbl     876: **     is commented out.
                    877: **     This will convert from HTML to presentation or plain text.
                    878: */
1.53      frystyk   879: PUBLIC HTStream* HTMLToC (
                    880:        HTRequest *             request,
                    881:        void *                  param,
                    882:        HTFormat                input_format,
                    883:        HTFormat                output_format,
                    884:        HTStream *              output_stream)
1.1       timbl     885: {
1.4       timbl     886:     
                    887:     HTStructured * html;
                    888:     
1.36      frystyk   889:     (*output_stream->isa->put_string)(output_stream, "/* "); /* Before title */
1.16      timbl     890:     html = HTML_new(request, NULL, input_format, output_format, output_stream);
1.45      frystyk   891:     html->comment_start = "\n/* ";
1.47      frystyk   892:     html->dtd = &HTMLP_dtd;
1.2       timbl     893:     html->comment_end = " */\n";       /* Must start in col 1 for cpp */
1.47      frystyk   894:     return SGML_new(&HTMLP_dtd, html);
1.1       timbl     895: }
                    896: 
                    897: 
1.2       timbl     898: /*     Presenter for HTML
                    899: **     ------------------
                    900: **
                    901: **     This will convert from HTML to presentation or plain text.
                    902: **
                    903: **     Override this if you have a windows version
1.1       timbl     904: */
1.2       timbl     905: #ifndef GUI
1.53      frystyk   906: PUBLIC HTStream* HTMLPresent (
                    907:        HTRequest *             request,
                    908:        void *                  param,
                    909:        HTFormat                input_format,
                    910:        HTFormat                output_format,
                    911:        HTStream *              output_stream)
1.1       timbl     912: {
1.47      frystyk   913:     return SGML_new(&HTMLP_dtd, HTML_new(
1.16      timbl     914:        request, NULL, input_format, output_format, output_stream));
1.1       timbl     915: }
1.2       timbl     916: #endif
1.29      frystyk   917: 

Webmaster