Annotation of libwww/Library/src/HTML.c, revision 1.3

1.2       timbl       1: /*             Structured stream to Rich hypertext converter
                      2: **             ============================================
1.1       timbl       3: **
1.2       timbl       4: **     This generates of a hypertext object.  It converts from the
                      5: **     structured stream interface fro HTMl events into the style-
                      6: **     oriented iunterface of the HText.h interface.  This module is
                      7: **     only used in clients and shouldnot be linked into servers.
1.1       timbl       8: **
1.2       timbl       9: **     Override this module is making a new GUI browser.
1.1       timbl      10: **
                     11: */
                     12: #include "HTML.h"
                     13: 
1.2       timbl      14: #define CAREFUL                /* Check nesting here notreally necessary */
                     15: 
1.1       timbl      16: #include <ctype.h>
                     17: #include <stdio.h>
                     18: 
                     19: #include "HTAtom.h"
                     20: #include "HTChunk.h"
                     21: #include "HText.h"
                     22: #include "HTStyle.h"
                     23: 
1.3     ! timbl      24: #include "HTAlert.h"
1.1       timbl      25: 
                     26: extern HTStyleSheet * styleSheet;      /* Application-wide */
                     27: 
                     28: /*     Module-wide style cache
                     29: */
                     30: PRIVATE int            got_styles = 0;
1.2       timbl      31: PRIVATE HTStyle *styles[HTML_ELEMENTS];
                     32: PRIVATE HTStyle *default_style;
1.1       timbl      33: 
                     34: 
                     35: /*             HTML Object
                     36: **             -----------
                     37: */
1.2       timbl      38: #define MAX_NESTING 20         /* Should be checked by parser */
                     39: 
                     40: typedef struct _stack_element {
                     41:         HTStyle *      style;
                     42:        int             tag_number;
                     43: } stack_element;
                     44: 
                     45: struct _HTStructured {
                     46:     CONST HTStructuredClass *  isa;
                     47:     HTParentAnchor *           node_anchor;
                     48:     HText *                    text;
                     49: 
                     50:     HTStream*                  target;                 /* Output stream */
                     51:     HTStreamClass              targetClass;            /* Output routines */
                     52: 
                     53:     HTChunk                    title;          /* Grow by 128 */
                     54:     
                     55:     char *                     comment_start;  /* for literate programming */
                     56:     char *                     comment_end;
                     57: 
                     58:     HTTag *                    current_tag;
                     59:     BOOL                       style_change;
                     60:     HTStyle *                  new_style;
                     61:     HTStyle *                  old_style;
                     62:     BOOL                       in_word;  /* Have just had a non-white char */
                     63:     stack_element      stack[MAX_NESTING];
                     64:     stack_element      *sp;            /* Style stack pointer */
1.1       timbl      65: };
                     66: 
1.2       timbl      67: struct _HTStream {
                     68:     CONST HTStreamClass *      isa;
                     69:     /* .... */
                     70: };
1.1       timbl      71: 
                     72: /*             Forward declarations of routines
                     73: */
                     74: PRIVATE void get_styles NOPARAMS;
                     75: 
                     76: 
1.2       timbl      77: PRIVATE void actually_set_style PARAMS((HTStructured * this));
                     78: PRIVATE void change_style PARAMS((HTStructured * this, HTStyle * style));
1.1       timbl      79: 
                     80: /*     Style buffering avoids dummy paragraph begin/ends.
                     81: */
1.2       timbl      82: #define UPDATE_STYLE if (this->style_change) { actually_set_style(this); }
1.1       timbl      83: 
                     84: 
1.2       timbl      85: #ifdef OLD_CODE
1.1       timbl      86: /* The following accented characters are from peter Flynn, curia project */
                     87: 
                     88: /* these ifdefs don't solve the problem of a simple terminal emulator
                     89: ** with a different character set to the client machine. But nothing does,
                     90: ** except looking at the TERM setting */
                     91: 
1.2       timbl      92: 
1.1       timbl      93:         { "ocus" , "&" },       /* for CURIA */
                     94: #ifdef IBMPC
                     95:         { "aacute" , "\240" }, /* For PC display */
                     96:         { "eacute" , "\202" },
                     97:         { "iacute" , "\241" },
                     98:         { "oacute" , "\242" },
                     99:         { "uacute" , "\243" },
                    100:         { "Aacute" , "\101" },
                    101:         { "Eacute" , "\220" },
                    102:         { "Iacute" , "\111" },
                    103:         { "Oacute" , "\117" },
                    104:         { "Uacute" , "\125" },
                    105: #else
                    106:         { "aacute" , "\341" }, /* Works for openwindows -- Peter Flynn */
                    107:         { "eacute" , "\351" },
                    108:         { "iacute" , "\355" },
                    109:         { "oacute" , "\363" },
                    110:         { "uacute" , "\372" },
                    111:         { "Aacute" , "\301" },
                    112:         { "Eacute" , "\310" },
                    113:         { "Iacute" , "\315" },
                    114:         { "Oacute" , "\323" },
                    115:         { "Uacute" , "\332" }, 
                    116: #endif
                    117:        { 0,    0 }  /* Terminate list */
                    118: };
1.2       timbl     119: #endif
1.1       timbl     120: 
                    121: 
1.2       timbl     122: /*     Entity values -- for ISO Latin 1 local representation
                    123: **
                    124: **     This MUST match exactly the table referred to in the DTD!
                    125: */
                    126: static char * ISO_Latin1[] = {
                    127:        "\306", /* capital AE diphthong (ligature) */ 
                    128:        "\301", /* capital A, acute accent */ 
                    129:        "\302", /* capital A, circumflex accent */ 
                    130:        "\300", /* capital A, grave accent */ 
                    131:        "\305", /* capital A, ring */ 
                    132:        "\303", /* capital A, tilde */ 
                    133:        "\304", /* capital A, dieresis or umlaut mark */ 
                    134:        "\307", /* capital C, cedilla */ 
                    135:        "\320", /* capital Eth, Icelandic */ 
                    136:        "\311", /* capital E, acute accent */ 
                    137:        "\312", /* capital E, circumflex accent */ 
                    138:        "\310", /* capital E, grave accent */ 
                    139:        "\313", /* capital E, dieresis or umlaut mark */ 
                    140:        "\315", /* capital I, acute accent */ 
                    141:        "\316", /* capital I, circumflex accent */ 
                    142:        "\314", /* capital I, grave accent */ 
                    143:        "\317", /* capital I, dieresis or umlaut mark */ 
                    144:        "\321", /* capital N, tilde */ 
                    145:        "\323", /* capital O, acute accent */ 
                    146:        "\324", /* capital O, circumflex accent */ 
                    147:        "\322", /* capital O, grave accent */ 
                    148:        "\330", /* capital O, slash */ 
                    149:        "\325", /* capital O, tilde */ 
                    150:        "\326", /* capital O, dieresis or umlaut mark */ 
                    151:        "\336", /* capital THORN, Icelandic */ 
                    152:        "\332", /* capital U, acute accent */ 
                    153:        "\333", /* capital U, circumflex accent */ 
                    154:        "\331", /* capital U, grave accent */ 
                    155:        "\334", /* capital U, dieresis or umlaut mark */ 
                    156:        "\335", /* capital Y, acute accent */ 
                    157:        "\341", /* small a, acute accent */ 
                    158:        "\342", /* small a, circumflex accent */ 
                    159:        "\346", /* small ae diphthong (ligature) */ 
                    160:        "\340", /* small a, grave accent */ 
                    161:        "\046", /* ampersand */ 
                    162:        "\345", /* small a, ring */ 
                    163:        "\343", /* small a, tilde */ 
                    164:        "\344", /* small a, dieresis or umlaut mark */ 
                    165:        "\347", /* small c, cedilla */ 
                    166:        "\351", /* small e, acute accent */ 
                    167:        "\352", /* small e, circumflex accent */ 
                    168:        "\350", /* small e, grave accent */ 
                    169:        "\360", /* small eth, Icelandic */ 
                    170:        "\353", /* small e, dieresis or umlaut mark */ 
                    171:        "\076", /* greater than */ 
                    172:        "\355", /* small i, acute accent */ 
                    173:        "\356", /* small i, circumflex accent */ 
                    174:        "\354", /* small i, grave accent */ 
                    175:        "\357", /* small i, dieresis or umlaut mark */ 
                    176:        "\074", /* less than */ 
                    177:        "\361", /* small n, tilde */ 
                    178:        "\363", /* small o, acute accent */ 
                    179:        "\364", /* small o, circumflex accent */ 
                    180:        "\362", /* small o, grave accent */ 
                    181:        "\370", /* small o, slash */ 
                    182:        "\365", /* small o, tilde */ 
                    183:        "\366", /* small o, dieresis or umlaut mark */ 
                    184:        "\337", /* small sharp s, German (sz ligature) */ 
                    185:        "\376", /* small thorn, Icelandic */ 
                    186:        "\372", /* small u, acute accent */ 
                    187:        "\373", /* small u, circumflex accent */ 
                    188:        "\371", /* small u, grave accent */ 
                    189:        "\374", /* small u, dieresis or umlaut mark */ 
                    190:        "\375", /* small y, acute accent */ 
                    191:        "\377", /* small y, dieresis or umlaut mark */ 
1.1       timbl     192: };
                    193: 
1.2       timbl     194: 
                    195: /*     Entity values -- for NeXT local representation
                    196: **
                    197: **     This MUST match exactly the table referred to in the DTD!
                    198: **
                    199: */
                    200: static char * NeXTCharacters[] = {
                    201:        "\341", /* capital AE diphthong (ligature)      */ 
                    202:        "\202", /* capital A, acute accent              */ 
                    203:        "\203", /* capital A, circumflex accent         */ 
                    204:        "\201", /* capital A, grave accent              */ 
                    205:        "\206", /* capital A, ring                      */ 
                    206:        "\204", /* capital A, tilde                     */ 
                    207:        "\205", /* capital A, dieresis or umlaut mark   */ 
                    208:        "\207", /* capital C, cedilla                   */ 
                    209:        "\220", /* capital Eth, Icelandic               */ 
                    210:        "\211", /* capital E, acute accent                              */ 
                    211:        "\212", /* capital E, circumflex accent                         */ 
                    212:        "\210", /* capital E, grave accent                              */ 
                    213:        "\213", /* capital E, dieresis or umlaut mark                   */ 
                    214:        "\215", /* capital I, acute accent                              */ 
                    215:        "\216", /* capital I, circumflex accent         these are       */ 
                    216:        "\214", /* capital I, grave accent              ISO -100 hex    */ 
                    217:        "\217", /* capital I, dieresis or umlaut mark                   */ 
                    218:        "\221", /* capital N, tilde                                     */ 
                    219:        "\223", /* capital O, acute accent                              */ 
                    220:        "\224", /* capital O, circumflex accent                         */ 
                    221:        "\222", /* capital O, grave accent                              */ 
                    222:        "\351", /* capital O, slash             'cept this */ 
                    223:        "\225", /* capital O, tilde                                     */ 
                    224:        "\226", /* capital O, dieresis or umlaut mark                   */ 
                    225:        "\234", /* capital THORN, Icelandic */ 
                    226:        "\230", /* capital U, acute accent */ 
                    227:        "\231", /* capital U, circumflex accent */ 
                    228:        "\227", /* capital U, grave accent */ 
                    229:        "\232", /* capital U, dieresis or umlaut mark */ 
                    230:        "\233", /* capital Y, acute accent */ 
                    231:        "\326", /* small a, acute accent */ 
                    232:        "\327", /* small a, circumflex accent */ 
                    233:        "\361", /* small ae diphthong (ligature) */ 
                    234:        "\325", /* small a, grave accent */ 
                    235:        "\046", /* ampersand */ 
                    236:        "\332", /* small a, ring */ 
                    237:        "\330", /* small a, tilde */ 
                    238:        "\331", /* small a, dieresis or umlaut mark */ 
                    239:        "\333", /* small c, cedilla */ 
                    240:        "\335", /* small e, acute accent */ 
                    241:        "\336", /* small e, circumflex accent */ 
                    242:        "\334", /* small e, grave accent */ 
                    243:        "\346", /* small eth, Icelandic         */ 
                    244:        "\337", /* small e, dieresis or umlaut mark */ 
                    245:        "\076", /* greater than */ 
                    246:        "\342", /* small i, acute accent */ 
                    247:        "\344", /* small i, circumflex accent */ 
                    248:        "\340", /* small i, grave accent */ 
                    249:        "\345", /* small i, dieresis or umlaut mark */ 
                    250:        "\074", /* less than */ 
                    251:        "\347", /* small n, tilde */ 
                    252:        "\355", /* small o, acute accent */ 
                    253:        "\356", /* small o, circumflex accent */ 
                    254:        "\354", /* small o, grave accent */ 
                    255:        "\371", /* small o, slash */ 
                    256:        "\357", /* small o, tilde */ 
                    257:        "\360", /* small o, dieresis or umlaut mark */ 
                    258:        "\373", /* small sharp s, German (sz ligature) */ 
                    259:        "\374", /* small thorn, Icelandic */ 
                    260:        "\363", /* small u, acute accent */ 
                    261:        "\364", /* small u, circumflex accent */ 
                    262:        "\362", /* small u, grave accent */ 
                    263:        "\366", /* small u, dieresis or umlaut mark */ 
                    264:        "\367", /* small y, acute accent */ 
                    265:        "\375", /* small y, dieresis or umlaut mark */ 
1.1       timbl     266: };
                    267: 
1.2       timbl     268: /*     Entity values -- for IBM/PC Code Page 850 (International)
                    269: **
                    270: **     This MUST match exactly the table referred to in the DTD!
                    271: **
                    272: */
                    273: /* @@@@@@@@@@@@@@@@@ TBD */
                    274: 
                    275: 
                    276: 
                    277: /*             Set character set
                    278: **             ----------------
                    279: */
                    280: 
                    281: PRIVATE char** p_entity_values = ISO_Latin1;   /* Pointer to translation */
1.1       timbl     282: 
1.2       timbl     283: PUBLIC void HTMLUseCharacterSet ARGS1(HTMLCharacterSet, i)
                    284: {
                    285:     p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
                    286:                                             : ISO_Latin1;
                    287: }
1.1       timbl     288: 
                    289: 
                    290: /*             Flattening the style structure
                    291: **             ------------------------------
                    292: **
                    293: On the NeXT, and on any read-only browser, it is simpler for the text to have
                    294: a sequence of styles, rather than a nested tree of styles. In this
                    295: case we have to flatten the structure as it arrives from SGML tags into
                    296: a sequence of styles.
                    297: */
                    298: 
                    299: /*             If style really needs to be set, call this
                    300: */
1.2       timbl     301: PRIVATE void actually_set_style ARGS1(HTStructured *, this)
1.1       timbl     302: {
1.2       timbl     303:     if (!this->text) {                 /* First time through */
                    304:            this->text = HText_new2(this->node_anchor, this->target);
                    305:            HText_beginAppend(this->text);
                    306:            HText_setStyle(this->text, this->new_style);
                    307:            this->in_word = NO;
1.1       timbl     308:     } else {
1.2       timbl     309:            HText_setStyle(this->text, this->new_style);
1.1       timbl     310:     }
1.2       timbl     311:     this->old_style = this->new_style;
                    312:     this->style_change = NO;
1.1       timbl     313: }
                    314: 
                    315: /*      If you THINK you need to change style, call this
                    316: */
                    317: 
1.2       timbl     318: PRIVATE void change_style ARGS2(HTStructured *, this, HTStyle *,style)
1.1       timbl     319: {
1.2       timbl     320:     if (this->new_style!=style) {
                    321:        this->style_change = YES;
                    322:        this->new_style = style;
1.1       timbl     323:     }
                    324: }
                    325: 
1.2       timbl     326: /*_________________________________________________________________________
                    327: **
                    328: **                     A C T I O N     R O U T I N E S
                    329: */
                    330: 
                    331: /*     Character handling
                    332: **     ------------------
1.1       timbl     333: */
1.2       timbl     334: PRIVATE void HTML_put_character ARGS2(HTStructured *, this, char, c)
1.1       timbl     335: {
1.2       timbl     336: 
                    337:     switch (this->sp[0].tag_number) {
                    338:     case HTML_COMMENT:
                    339:        break;                                  /* Do Nothing */
                    340:        
                    341:     case HTML_TITLE:   
                    342:        HTChunkPutc(&this->title, c);
                    343:        break;
                    344: 
                    345:        
                    346:     case HTML_LISTING:                         /* Litteral text */
                    347:     case HTML_XMP:
                    348:     case HTML_PLAINTEXT:
                    349:     case HTML_PRE:
                    350: /*     We guarrantee that the style is up-to-date in begin_litteral
                    351: */
                    352:        HText_appendCharacter(this->text, c);
                    353:        break;
                    354:        
                    355:     default:                                   /* Free format text */
                    356:        if (this->style_change) {
                    357:            if ((c=='\n') || (c==' ')) return;  /* Ignore it */
                    358:            UPDATE_STYLE;
                    359:        }
                    360:        if (c=='\n') {
                    361:            if (this->in_word) {
                    362:                HText_appendCharacter(this->text, ' ');
                    363:                this->in_word = NO;
                    364:            }
                    365:        } else {
                    366:            HText_appendCharacter(this->text, c);
                    367:            this->in_word = YES;
                    368:        }
                    369:     } /* end switch */
1.1       timbl     370: }
                    371: 
1.2       timbl     372: 
                    373: 
                    374: /*     String handling
                    375: **     ---------------
                    376: **
                    377: **     This is written separately from put_character becuase the loop can
                    378: **     in some cases be postponed to a lower level for speed.
                    379: */
                    380: PRIVATE void HTML_put_string ARGS2(HTStructured *, this, CONST char*, s)
1.1       timbl     381: {
1.2       timbl     382: 
                    383:     switch (this->sp[0].tag_number) {
                    384:     case HTML_COMMENT:
                    385:        break;                                  /* Do Nothing */
                    386:        
                    387:     case HTML_TITLE:   
                    388:        HTChunkPuts(&this->title, s);
                    389:        break;
                    390: 
                    391:        
                    392:     case HTML_LISTING:                         /* Litteral text */
                    393:     case HTML_XMP:
                    394:     case HTML_PLAINTEXT:
                    395:     case HTML_PRE:
                    396: 
                    397: /*     We guarrantee that the style is up-to-date in begin_litteral
                    398: */
                    399:        HText_appendText(this->text, s);
                    400:        break;
                    401:        
                    402:     default:                                   /* Free format text */
                    403:         {
                    404:            CONST char *p = s;
                    405:            if (this->style_change) {
                    406:                for (; *p && ((*p=='\n') || (*p==' ')); p++)  ;  /* Ignore leaders */
                    407:                if (!*p) return;
                    408:                UPDATE_STYLE;
                    409:            }
                    410:            for(; *p; p++) {
                    411:                if (this->style_change) {
                    412:                    if ((*p=='\n') || (*p==' ')) continue;  /* Ignore it */
                    413:                    UPDATE_STYLE;
                    414:                }
                    415:                if (*p=='\n') {
                    416:                    if (this->in_word) {
                    417:                        HText_appendCharacter(this->text, ' ');
                    418:                        this->in_word = NO;
                    419:                    }
                    420:                } else {
                    421:                    HText_appendCharacter(this->text, *p);
                    422:                    this->in_word = YES;
                    423:                }
                    424:            } /* for */
                    425:        }
                    426:     } /* end switch */
1.1       timbl     427: }
                    428: 
                    429: 
1.2       timbl     430: /*     Buffer write
1.3     ! timbl     431: **     ------------
1.1       timbl     432: */
1.2       timbl     433: PRIVATE void HTML_write ARGS3(HTStructured *, this, CONST char*, s, int, l)
1.1       timbl     434: {
1.2       timbl     435:     CONST char* p;
                    436:     CONST char* e = s+l;
                    437:     for (p=s; s<e; p++) HTML_put_character(this, *p);
1.1       timbl     438: }
1.2       timbl     439: 
                    440: 
                    441: /*     Start Element
                    442: **     -------------
                    443: */
                    444: PRIVATE void HTML_start_element ARGS4(
                    445:        HTStructured *,         this,
                    446:        int,            element_number,
1.3     ! timbl     447:        CONST BOOL*,            present,
        !           448:        CONST char **,  value)
1.2       timbl     449: {
                    450:     switch (element_number) {
                    451:     case HTML_A:
                    452:        {
                    453:            HTChildAnchor * source = HTAnchor_findChildAndLink(
                    454:                this->node_anchor,                              /* parent */
                    455:                present[HTML_A_NAME] ? value[HTML_A_NAME] : 0,  /* Tag */
                    456:                present[HTML_A_HREF] ? value[HTML_A_HREF] : 0,  /* Addresss */
                    457:                present[HTML_A_TYPE] && value[HTML_A_TYPE] ? 
                    458:                        (HTLinkType*)HTAtom_for(value[HTML_A_TYPE])
                    459:                                                : 0);
                    460:            
                    461:            if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
                    462:                HTParentAnchor * dest = 
                    463:                    HTAnchor_parent(
                    464:                        HTAnchor_followMainLink((HTAnchor*)source)
                    465:                                    );
                    466:                if (!HTAnchor_title(dest))
                    467:                        HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
                    468:            }
                    469:            UPDATE_STYLE;
                    470:            HText_beginAnchor(this->text, source);
                    471:        }
                    472:        break;
                    473:        
                    474:     case HTML_TITLE:
                    475:         HTChunkClear(&this->title);
                    476:        break;
                    477:        
                    478:     case HTML_NEXTID:
                    479:        /* if (present[NEXTID_N] && value[NEXTID_N])
                    480:                HText_setNextId(this->text, atoi(value[NEXTID_N])); */
                    481:        break;
                    482:        
                    483:     case HTML_ISINDEX:
                    484:        HTAnchor_setIndex(this->node_anchor);
                    485:        break;
                    486:        
                    487:     case HTML_P:
                    488:        UPDATE_STYLE;
                    489:        HText_appendParagraph(this->text);
                    490:        this->in_word = NO;
                    491:        break;
                    492: 
                    493:     case HTML_DL:
                    494:         change_style(this, present[DL_COMPACT]
                    495:                ? styles[HTML_DLC]
                    496:                : styles[HTML_DL]);
                    497:        this->in_word = NO;
                    498:        break;
                    499:        
                    500:     case HTML_DT:
                    501:         if (!this->style_change) {
                    502:            HText_appendParagraph(this->text);
                    503:            this->in_word = NO;
                    504:        }
                    505:        break;
                    506:        
                    507:     case HTML_DD:
                    508:         UPDATE_STYLE;
                    509:        HTML_put_character(this, '\t'); /* Just tab out one stop */
                    510:        this->in_word = NO;
                    511: 
                    512:     case HTML_UL:
                    513:     case HTML_OL:
                    514:     case HTML_MENU:
                    515:     case HTML_DIR:
                    516:        change_style(this, styles[element_number]);
                    517:        this->in_word = NO;
                    518:        break;
                    519:        
                    520:     case HTML_LI:
                    521:         UPDATE_STYLE;
                    522:        if (this->sp[1].tag_number != HTML_DIR)
                    523:            HText_appendParagraph(this->text);
                    524:        else
                    525:            HText_appendCharacter(this->text, '\t');    /* Tab @@ nl for UL? */
                    526:        this->in_word = NO;
                    527:        break;
                    528:        
                    529:     case HTML_LISTING:                         /* Litteral text */
                    530:     case HTML_XMP:
                    531:     case HTML_PLAINTEXT:
                    532:     case HTML_PRE:
                    533:        change_style(this, styles[element_number]);
                    534:        UPDATE_STYLE;
                    535:        if (this->comment_end)
                    536:            HText_appendText(this->text, this->comment_end);
                    537:        break;
                    538:        
                    539:     default:
                    540:        change_style(this, styles[element_number]);     /* May be postponed */
                    541:        break;
                    542: 
                    543:     } /* end switch */
                    544: 
                    545:     if (HTML_dtd.tags[element_number].contents!= SGML_EMPTY) {
                    546:        --(this->sp);
                    547:        this->sp[0].style = this->new_style;    /* Stack new style */
                    548:        this->sp[0].tag_number = element_number;
                    549:     }
                    550:        
1.1       timbl     551: }
1.2       timbl     552: 
1.1       timbl     553: /*             End Element
1.2       timbl     554: **             -----------
1.1       timbl     555: **
1.2       timbl     556: */
                    557: /*     When we end an element, the style must be returned to that
1.1       timbl     558: **     in effect before that element.  Note that anchors (etc?)
                    559: **     don't have an associated style, so that we must scan down the
                    560: **     stack for an element with a defined style. (In fact, the styles
                    561: **     should be linked to the whole stack not just the top one.)
                    562: **     TBL 921119
                    563: */
1.2       timbl     564: PRIVATE void HTML_end_element ARGS2(HTStructured *, this, int , element_number)
1.1       timbl     565: {
1.2       timbl     566: #ifdef CAREFUL                 /* parser assumed to produce good nesting */
                    567:     if (element_number != this->sp[0].tag_number) {
                    568:         fprintf(stderr, "HTMLText: end of element %s when expecting end of %s\n",
                    569:                HTML_dtd.tags[element_number].name,
                    570:                HTML_dtd.tags[this->sp->tag_number].name);
                    571:                exit(-20);
1.1       timbl     572:     }
1.2       timbl     573: #endif
                    574:     
                    575:     this->sp++;                                /* Pop state off stack */
                    576:     
                    577:     switch(element_number) {
                    578: 
                    579:     case HTML_A:
                    580:        UPDATE_STYLE;
                    581:        HText_endAnchor(this->text);
                    582:        break;
                    583: 
                    584:     case HTML_TITLE:
                    585:         HTChunkTerminate(&this->title);
                    586:        HTAnchor_setTitle(this->node_anchor, this->title.data);
                    587:        break;
                    588:        
                    589:     case HTML_LISTING:                         /* Litteral text */
                    590:     case HTML_XMP:
                    591:     case HTML_PLAINTEXT:
                    592:     case HTML_PRE:
                    593:        if (this->comment_start)
                    594:            HText_appendText(this->text, this->comment_start);
                    595:        /* Fall through */
                    596:        
                    597:     default:
                    598:     
                    599:        change_style(this, this->sp->style);    /* Often won't really change */
                    600:        this->in_word = NO;             /* Paragraph styles only @@ */
                    601:        break;
                    602:        
                    603:     } /* switch */
1.1       timbl     604: }
                    605: 
1.2       timbl     606: 
                    607: /*             Expanding entities
                    608: **             ------------------
                    609: */
                    610: /*     (In fact, they all shrink!)
1.1       timbl     611: */
1.2       timbl     612: 
                    613: PRIVATE void HTML_put_entity ARGS2(HTStructured *, this, int, entity_number)
1.1       timbl     614: {
1.2       timbl     615:     HTML_put_string(this, ISO_Latin1[entity_number]);  /* @@ Other representations */
1.1       timbl     616: }
                    617: 
1.2       timbl     618: 
                    619: 
                    620: /*     Free an HTML object
                    621: **     -------------------
                    622: **
                    623: **     If non-interactive, everything is freed off.
                    624: **     Otherwise, the interactive object is left.      
                    625: */
                    626: PUBLIC void HTML_free ARGS1(HTStructured *, this)
1.1       timbl     627: {
1.2       timbl     628:     if (this->target) {
                    629:         (*this->targetClass.free)(this->target);
                    630:        HText_free(this->text);
                    631:     }
                    632:     free(this);
1.1       timbl     633: }
                    634: 
                    635: 
1.2       timbl     636: PRIVATE void HTML_end_document ARGS1(HTStructured *, this)
1.1       timbl     637: 
1.2       timbl     638: /* If the document is empty, the text object will not yet exist.
                    639:    So we could in fact abandon creating the document and return
                    640:    an error code.  In fact an empty document is an important type
                    641:    of document, so we don't.
                    642: */
1.1       timbl     643: {
1.2       timbl     644:     UPDATE_STYLE;              /* Creates empty document here! */
                    645:     if (this->comment_end)
                    646:                HTML_put_string(this,this->comment_end);
                    647:     HText_endAppend(this->text);
                    648:     if (this->target)
                    649:         (*this->targetClass.end_document)(this->target);
1.1       timbl     650: }
                    651: 
1.2       timbl     652: 
                    653: /*     Get Styles from style sheet
                    654: **     ---------------------------
                    655: */
                    656: PRIVATE void get_styles NOARGS
1.1       timbl     657: {
1.2       timbl     658:     got_styles = YES;
                    659:     
                    660:     default_style =            HTStyleNamed(styleSheet, "Normal");
1.1       timbl     661: 
1.2       timbl     662:     styles[HTML_H1] =          HTStyleNamed(styleSheet, "Heading1");
                    663:     styles[HTML_H2] =          HTStyleNamed(styleSheet, "Heading2");
                    664:     styles[HTML_H3] =          HTStyleNamed(styleSheet, "Heading3");
                    665:     styles[HTML_H4] =          HTStyleNamed(styleSheet, "Heading4");
                    666:     styles[HTML_H5] =          HTStyleNamed(styleSheet, "Heading5");
                    667:     styles[HTML_H6] =          HTStyleNamed(styleSheet, "Heading6");
                    668:     styles[HTML_H7] =          HTStyleNamed(styleSheet, "Heading7");
                    669: 
                    670:     styles[HTML_DL] =          HTStyleNamed(styleSheet, "Glossary");
                    671:     styles[HTML_UL] =
                    672:     styles[HTML_OL] =          HTStyleNamed(styleSheet, "List");
                    673:     styles[HTML_MENU] =                HTStyleNamed(styleSheet, "Menu");
                    674:     styles[HTML_DIR] =         HTStyleNamed(styleSheet, "Dir");    
                    675:     styles[HTML_DLC] =         HTStyleNamed(styleSheet, "GlossaryCompact");
                    676:     styles[HTML_ADDRESS]=      HTStyleNamed(styleSheet, "Address");
                    677:     styles[HTML_BLOCKQUOTE]=   HTStyleNamed(styleSheet, "BlockQuote");
                    678:     styles[HTML_PLAINTEXT] =
                    679:     styles[HTML_XMP] =         HTStyleNamed(styleSheet, "Example");
                    680:     styles[HTML_PRE] =         HTStyleNamed(styleSheet, "Preformatted");
                    681:     styles[HTML_LISTING] =     HTStyleNamed(styleSheet, "Listing");
                    682: }
                    683: /*                             P U B L I C
                    684: */
                    685: 
                    686: /*     Structured Object Class
                    687: **     -----------------------
                    688: */
                    689: PUBLIC CONST HTStructuredClass HTMLPresentation = /* As opposed to print etc */
                    690: {              
                    691:        "text/html",
                    692:        HTML_free,
                    693:        HTML_end_document,
                    694:        HTML_put_character,     HTML_put_string,  HTML_write,
                    695:        HTML_start_element,     HTML_end_element,
                    696:        HTML_put_entity
                    697: }; 
1.1       timbl     698: 
1.2       timbl     699: /*             New Structured Text object
                    700: **             --------------------------
                    701: **
                    702: **     If the stream is NULL then an interactive object is produced
1.1       timbl     703: */
1.2       timbl     704: PUBLIC HTStructured* HTML_new ARGS2(
                    705:        HTParentAnchor *,       anchor,
                    706:        HTStream*,              stream)
1.1       timbl     707: {
                    708: 
1.2       timbl     709:     HTStructured * this = malloc(sizeof(*this));
1.1       timbl     710: 
                    711:     if (!got_styles) get_styles();
                    712: 
1.2       timbl     713:     this->isa = &HTMLPresentation;
                    714:     this->node_anchor =  anchor;
1.1       timbl     715:     this->title.size = 0;
                    716:     this->title.growby = 128;
                    717:     this->title.allocated = 0;
                    718:     this->title.data = 0;
                    719:     this->text = 0;
                    720:     this->style_change = YES; /* Force check leading to text creation */
1.2       timbl     721:     this->new_style = default_style;
                    722:     this->old_style = 0;
                    723:     this->sp = this->stack + MAX_NESTING - 1;
                    724:     this->sp->tag_number = -1;                         /* INVALID */
                    725:     this->sp->style = default_style;                   /* INVALID */
1.1       timbl     726:     
1.2       timbl     727:     this->comment_start = NULL;
                    728:     this->comment_end = NULL;
                    729:     this->target = stream;
                    730:     if (stream) this->targetClass = *stream->isa;      /* Copy pointers */
1.1       timbl     731:     
1.2       timbl     732:     return (HTStructured*) this;
1.1       timbl     733: }
                    734: 
                    735: 
1.2       timbl     736: /*     HTConverter for HTML to plain text
                    737: **     ----------------------------------
1.1       timbl     738: **
1.2       timbl     739: **     This will convert from HTML to presentation or plain text.
1.1       timbl     740: */
1.2       timbl     741: PUBLIC HTStream* HTMLToPlain ARGS3(
                    742:        HTPresentation *,       pres,
                    743:        HTParentAnchor *,       anchor, 
                    744:        HTStream *,             sink)
1.1       timbl     745: {
1.2       timbl     746:     return SGML_new(&HTML_dtd, HTML_new(anchor, sink));
1.1       timbl     747: }
                    748: 
                    749: 
1.2       timbl     750: /*     HTConverter for HTML to C code
                    751: **     ------------------------------
                    752: **
                    753: **     C copde is like plain text but all non-preformatted code
                    754: **     is commented out.
                    755: **     This will convert from HTML to presentation or plain text.
                    756: */
                    757: PUBLIC HTStream* HTMLToC ARGS3(
                    758:        HTPresentation *,       pres,
                    759:        HTParentAnchor *,       anchor, 
                    760:        HTStream *,             sink)
1.1       timbl     761: {
1.2       timbl     762:     HTStructured * html = HTML_new(anchor, sink);
                    763:     html->comment_start = "/* ";
                    764:     html->comment_end = " */\n";       /* Must start in col 1 for cpp */
                    765:     HTML_put_string(html,html->comment_start);
                    766:     return SGML_new(&HTML_dtd, html);
1.1       timbl     767: }
                    768: 
                    769: 
1.2       timbl     770: /*     Presenter for HTML
                    771: **     ------------------
                    772: **
                    773: **     This will convert from HTML to presentation or plain text.
                    774: **
                    775: **     Override this if you have a windows version
1.1       timbl     776: */
1.2       timbl     777: #ifndef GUI
                    778: PUBLIC HTStream* HTMLPresent ARGS3(
                    779:        HTPresentation *,       pres,
                    780:        HTParentAnchor *,       anchor, 
                    781:        HTStream *,             sink)
1.1       timbl     782: {
1.2       timbl     783:     return SGML_new(&HTML_dtd, HTML_new(anchor, NULL));
1.1       timbl     784: }
1.2       timbl     785: #endif
1.1       timbl     786: 
                    787: 
1.2       timbl     788: /*     Record error message as a hypertext object
                    789: **     ------------------------------------------
                    790: **
                    791: **     The error message should be marked as an error so that
                    792: **     it can be reloaded later.
                    793: **     This implementation just throws up an error message
                    794: **     and leaves the document unloaded.
1.1       timbl     795: **
1.2       timbl     796: ** On entry,
                    797: **     sink    is a stream to the output device if any
                    798: **     number  is the HTTP error number
                    799: **     message is the human readable message.
1.1       timbl     800: */
1.2       timbl     801: 
                    802: PUBLIC int HTLoadError ARGS3(
                    803:        HTStream *,     sink,
                    804:        int,            number,
                    805:        CONST char *,   message)
                    806: {
                    807:     HTAlert(message);          /* @@@@@@@@@@@@@@@@@@@ */
                    808:     return -number;
                    809: } 
                    810: 

Webmaster