Annotation of libwww/Library/src/HTML.c, revision 1.2

1.2     ! timbl       1: /*             Structured stream to Rich hypertext converter
        !             2: **             ============================================
1.1       timbl       3: **
1.2     ! timbl       4: **     This generates of a hypertext object.  It converts from the
        !             5: **     structured stream interface fro HTMl events into the style-
        !             6: **     oriented iunterface of the HText.h interface.  This module is
        !             7: **     only used in clients and shouldnot be linked into servers.
1.1       timbl       8: **
1.2     ! timbl       9: **     Override this module is making a new GUI browser.
1.1       timbl      10: **
                     11: */
                     12: #include "HTML.h"
                     13: 
1.2     ! timbl      14: #define CAREFUL                /* Check nesting here notreally necessary */
        !            15: 
1.1       timbl      16: #include <ctype.h>
                     17: #include <stdio.h>
                     18: 
                     19: #include "HTAtom.h"
                     20: #include "HTChunk.h"
                     21: #include "HText.h"
                     22: #include "HTStyle.h"
                     23: 
                     24: 
                     25: extern HTStyleSheet * styleSheet;      /* Application-wide */
                     26: 
                     27: /*     Module-wide style cache
                     28: */
                     29: PRIVATE int            got_styles = 0;
1.2     ! timbl      30: PRIVATE HTStyle *styles[HTML_ELEMENTS];
        !            31: PRIVATE HTStyle *default_style;
1.1       timbl      32: 
                     33: 
                     34: /*             HTML Object
                     35: **             -----------
                     36: */
1.2     ! timbl      37: #define MAX_NESTING 20         /* Should be checked by parser */
        !            38: 
        !            39: typedef struct _stack_element {
        !            40:         HTStyle *      style;
        !            41:        int             tag_number;
        !            42: } stack_element;
        !            43: 
        !            44: struct _HTStructured {
        !            45:     CONST HTStructuredClass *  isa;
        !            46:     HTParentAnchor *           node_anchor;
        !            47:     HText *                    text;
        !            48: 
        !            49:     HTStream*                  target;                 /* Output stream */
        !            50:     HTStreamClass              targetClass;            /* Output routines */
        !            51: 
        !            52:     HTChunk                    title;          /* Grow by 128 */
        !            53:     
        !            54:     char *                     comment_start;  /* for literate programming */
        !            55:     char *                     comment_end;
        !            56: 
        !            57:     HTTag *                    current_tag;
        !            58:     BOOL                       style_change;
        !            59:     HTStyle *                  new_style;
        !            60:     HTStyle *                  old_style;
        !            61:     BOOL                       in_word;  /* Have just had a non-white char */
        !            62:     stack_element      stack[MAX_NESTING];
        !            63:     stack_element      *sp;            /* Style stack pointer */
1.1       timbl      64: };
                     65: 
1.2     ! timbl      66: struct _HTStream {
        !            67:     CONST HTStreamClass *      isa;
        !            68:     /* .... */
        !            69: };
1.1       timbl      70: 
                     71: /*             Forward declarations of routines
                     72: */
                     73: PRIVATE void get_styles NOPARAMS;
                     74: 
                     75: 
1.2     ! timbl      76: PRIVATE void actually_set_style PARAMS((HTStructured * this));
        !            77: PRIVATE void change_style PARAMS((HTStructured * this, HTStyle * style));
1.1       timbl      78: 
                     79: /*     Style buffering avoids dummy paragraph begin/ends.
                     80: */
1.2     ! timbl      81: #define UPDATE_STYLE if (this->style_change) { actually_set_style(this); }
1.1       timbl      82: 
                     83: 
1.2     ! timbl      84: #ifdef OLD_CODE
1.1       timbl      85: /* The following accented characters are from peter Flynn, curia project */
                     86: 
                     87: /* these ifdefs don't solve the problem of a simple terminal emulator
                     88: ** with a different character set to the client machine. But nothing does,
                     89: ** except looking at the TERM setting */
                     90: 
1.2     ! timbl      91: 
1.1       timbl      92:         { "ocus" , "&" },       /* for CURIA */
                     93: #ifdef IBMPC
                     94:         { "aacute" , "\240" }, /* For PC display */
                     95:         { "eacute" , "\202" },
                     96:         { "iacute" , "\241" },
                     97:         { "oacute" , "\242" },
                     98:         { "uacute" , "\243" },
                     99:         { "Aacute" , "\101" },
                    100:         { "Eacute" , "\220" },
                    101:         { "Iacute" , "\111" },
                    102:         { "Oacute" , "\117" },
                    103:         { "Uacute" , "\125" },
                    104: #else
                    105:         { "aacute" , "\341" }, /* Works for openwindows -- Peter Flynn */
                    106:         { "eacute" , "\351" },
                    107:         { "iacute" , "\355" },
                    108:         { "oacute" , "\363" },
                    109:         { "uacute" , "\372" },
                    110:         { "Aacute" , "\301" },
                    111:         { "Eacute" , "\310" },
                    112:         { "Iacute" , "\315" },
                    113:         { "Oacute" , "\323" },
                    114:         { "Uacute" , "\332" }, 
                    115: #endif
                    116:        { 0,    0 }  /* Terminate list */
                    117: };
1.2     ! timbl     118: #endif
1.1       timbl     119: 
                    120: 
1.2     ! timbl     121: /*     Entity values -- for ISO Latin 1 local representation
        !           122: **
        !           123: **     This MUST match exactly the table referred to in the DTD!
        !           124: */
        !           125: static char * ISO_Latin1[] = {
        !           126:        "\306", /* capital AE diphthong (ligature) */ 
        !           127:        "\301", /* capital A, acute accent */ 
        !           128:        "\302", /* capital A, circumflex accent */ 
        !           129:        "\300", /* capital A, grave accent */ 
        !           130:        "\305", /* capital A, ring */ 
        !           131:        "\303", /* capital A, tilde */ 
        !           132:        "\304", /* capital A, dieresis or umlaut mark */ 
        !           133:        "\307", /* capital C, cedilla */ 
        !           134:        "\320", /* capital Eth, Icelandic */ 
        !           135:        "\311", /* capital E, acute accent */ 
        !           136:        "\312", /* capital E, circumflex accent */ 
        !           137:        "\310", /* capital E, grave accent */ 
        !           138:        "\313", /* capital E, dieresis or umlaut mark */ 
        !           139:        "\315", /* capital I, acute accent */ 
        !           140:        "\316", /* capital I, circumflex accent */ 
        !           141:        "\314", /* capital I, grave accent */ 
        !           142:        "\317", /* capital I, dieresis or umlaut mark */ 
        !           143:        "\321", /* capital N, tilde */ 
        !           144:        "\323", /* capital O, acute accent */ 
        !           145:        "\324", /* capital O, circumflex accent */ 
        !           146:        "\322", /* capital O, grave accent */ 
        !           147:        "\330", /* capital O, slash */ 
        !           148:        "\325", /* capital O, tilde */ 
        !           149:        "\326", /* capital O, dieresis or umlaut mark */ 
        !           150:        "\336", /* capital THORN, Icelandic */ 
        !           151:        "\332", /* capital U, acute accent */ 
        !           152:        "\333", /* capital U, circumflex accent */ 
        !           153:        "\331", /* capital U, grave accent */ 
        !           154:        "\334", /* capital U, dieresis or umlaut mark */ 
        !           155:        "\335", /* capital Y, acute accent */ 
        !           156:        "\341", /* small a, acute accent */ 
        !           157:        "\342", /* small a, circumflex accent */ 
        !           158:        "\346", /* small ae diphthong (ligature) */ 
        !           159:        "\340", /* small a, grave accent */ 
        !           160:        "\046", /* ampersand */ 
        !           161:        "\345", /* small a, ring */ 
        !           162:        "\343", /* small a, tilde */ 
        !           163:        "\344", /* small a, dieresis or umlaut mark */ 
        !           164:        "\347", /* small c, cedilla */ 
        !           165:        "\351", /* small e, acute accent */ 
        !           166:        "\352", /* small e, circumflex accent */ 
        !           167:        "\350", /* small e, grave accent */ 
        !           168:        "\360", /* small eth, Icelandic */ 
        !           169:        "\353", /* small e, dieresis or umlaut mark */ 
        !           170:        "\076", /* greater than */ 
        !           171:        "\355", /* small i, acute accent */ 
        !           172:        "\356", /* small i, circumflex accent */ 
        !           173:        "\354", /* small i, grave accent */ 
        !           174:        "\357", /* small i, dieresis or umlaut mark */ 
        !           175:        "\074", /* less than */ 
        !           176:        "\361", /* small n, tilde */ 
        !           177:        "\363", /* small o, acute accent */ 
        !           178:        "\364", /* small o, circumflex accent */ 
        !           179:        "\362", /* small o, grave accent */ 
        !           180:        "\370", /* small o, slash */ 
        !           181:        "\365", /* small o, tilde */ 
        !           182:        "\366", /* small o, dieresis or umlaut mark */ 
        !           183:        "\337", /* small sharp s, German (sz ligature) */ 
        !           184:        "\376", /* small thorn, Icelandic */ 
        !           185:        "\372", /* small u, acute accent */ 
        !           186:        "\373", /* small u, circumflex accent */ 
        !           187:        "\371", /* small u, grave accent */ 
        !           188:        "\374", /* small u, dieresis or umlaut mark */ 
        !           189:        "\375", /* small y, acute accent */ 
        !           190:        "\377", /* small y, dieresis or umlaut mark */ 
1.1       timbl     191: };
                    192: 
1.2     ! timbl     193: 
        !           194: /*     Entity values -- for NeXT local representation
        !           195: **
        !           196: **     This MUST match exactly the table referred to in the DTD!
        !           197: **
        !           198: */
        !           199: static char * NeXTCharacters[] = {
        !           200:        "\341", /* capital AE diphthong (ligature)      */ 
        !           201:        "\202", /* capital A, acute accent              */ 
        !           202:        "\203", /* capital A, circumflex accent         */ 
        !           203:        "\201", /* capital A, grave accent              */ 
        !           204:        "\206", /* capital A, ring                      */ 
        !           205:        "\204", /* capital A, tilde                     */ 
        !           206:        "\205", /* capital A, dieresis or umlaut mark   */ 
        !           207:        "\207", /* capital C, cedilla                   */ 
        !           208:        "\220", /* capital Eth, Icelandic               */ 
        !           209:        "\211", /* capital E, acute accent                              */ 
        !           210:        "\212", /* capital E, circumflex accent                         */ 
        !           211:        "\210", /* capital E, grave accent                              */ 
        !           212:        "\213", /* capital E, dieresis or umlaut mark                   */ 
        !           213:        "\215", /* capital I, acute accent                              */ 
        !           214:        "\216", /* capital I, circumflex accent         these are       */ 
        !           215:        "\214", /* capital I, grave accent              ISO -100 hex    */ 
        !           216:        "\217", /* capital I, dieresis or umlaut mark                   */ 
        !           217:        "\221", /* capital N, tilde                                     */ 
        !           218:        "\223", /* capital O, acute accent                              */ 
        !           219:        "\224", /* capital O, circumflex accent                         */ 
        !           220:        "\222", /* capital O, grave accent                              */ 
        !           221:        "\351", /* capital O, slash             'cept this */ 
        !           222:        "\225", /* capital O, tilde                                     */ 
        !           223:        "\226", /* capital O, dieresis or umlaut mark                   */ 
        !           224:        "\234", /* capital THORN, Icelandic */ 
        !           225:        "\230", /* capital U, acute accent */ 
        !           226:        "\231", /* capital U, circumflex accent */ 
        !           227:        "\227", /* capital U, grave accent */ 
        !           228:        "\232", /* capital U, dieresis or umlaut mark */ 
        !           229:        "\233", /* capital Y, acute accent */ 
        !           230:        "\326", /* small a, acute accent */ 
        !           231:        "\327", /* small a, circumflex accent */ 
        !           232:        "\361", /* small ae diphthong (ligature) */ 
        !           233:        "\325", /* small a, grave accent */ 
        !           234:        "\046", /* ampersand */ 
        !           235:        "\332", /* small a, ring */ 
        !           236:        "\330", /* small a, tilde */ 
        !           237:        "\331", /* small a, dieresis or umlaut mark */ 
        !           238:        "\333", /* small c, cedilla */ 
        !           239:        "\335", /* small e, acute accent */ 
        !           240:        "\336", /* small e, circumflex accent */ 
        !           241:        "\334", /* small e, grave accent */ 
        !           242:        "\346", /* small eth, Icelandic         */ 
        !           243:        "\337", /* small e, dieresis or umlaut mark */ 
        !           244:        "\076", /* greater than */ 
        !           245:        "\342", /* small i, acute accent */ 
        !           246:        "\344", /* small i, circumflex accent */ 
        !           247:        "\340", /* small i, grave accent */ 
        !           248:        "\345", /* small i, dieresis or umlaut mark */ 
        !           249:        "\074", /* less than */ 
        !           250:        "\347", /* small n, tilde */ 
        !           251:        "\355", /* small o, acute accent */ 
        !           252:        "\356", /* small o, circumflex accent */ 
        !           253:        "\354", /* small o, grave accent */ 
        !           254:        "\371", /* small o, slash */ 
        !           255:        "\357", /* small o, tilde */ 
        !           256:        "\360", /* small o, dieresis or umlaut mark */ 
        !           257:        "\373", /* small sharp s, German (sz ligature) */ 
        !           258:        "\374", /* small thorn, Icelandic */ 
        !           259:        "\363", /* small u, acute accent */ 
        !           260:        "\364", /* small u, circumflex accent */ 
        !           261:        "\362", /* small u, grave accent */ 
        !           262:        "\366", /* small u, dieresis or umlaut mark */ 
        !           263:        "\367", /* small y, acute accent */ 
        !           264:        "\375", /* small y, dieresis or umlaut mark */ 
1.1       timbl     265: };
                    266: 
1.2     ! timbl     267: /*     Entity values -- for IBM/PC Code Page 850 (International)
        !           268: **
        !           269: **     This MUST match exactly the table referred to in the DTD!
        !           270: **
        !           271: */
        !           272: /* @@@@@@@@@@@@@@@@@ TBD */
        !           273: 
        !           274: 
        !           275: 
        !           276: /*             Set character set
        !           277: **             ----------------
        !           278: */
        !           279: 
        !           280: PRIVATE char** p_entity_values = ISO_Latin1;   /* Pointer to translation */
1.1       timbl     281: 
1.2     ! timbl     282: PUBLIC void HTMLUseCharacterSet ARGS1(HTMLCharacterSet, i)
        !           283: {
        !           284:     p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
        !           285:                                             : ISO_Latin1;
        !           286: }
1.1       timbl     287: 
                    288: 
                    289: /*             Flattening the style structure
                    290: **             ------------------------------
                    291: **
                    292: On the NeXT, and on any read-only browser, it is simpler for the text to have
                    293: a sequence of styles, rather than a nested tree of styles. In this
                    294: case we have to flatten the structure as it arrives from SGML tags into
                    295: a sequence of styles.
                    296: */
                    297: 
                    298: /*             If style really needs to be set, call this
                    299: */
1.2     ! timbl     300: PRIVATE void actually_set_style ARGS1(HTStructured *, this)
1.1       timbl     301: {
1.2     ! timbl     302:     if (!this->text) {                 /* First time through */
        !           303:            this->text = HText_new2(this->node_anchor, this->target);
        !           304:            HText_beginAppend(this->text);
        !           305:            HText_setStyle(this->text, this->new_style);
        !           306:            this->in_word = NO;
1.1       timbl     307:     } else {
1.2     ! timbl     308:            HText_setStyle(this->text, this->new_style);
1.1       timbl     309:     }
1.2     ! timbl     310:     this->old_style = this->new_style;
        !           311:     this->style_change = NO;
1.1       timbl     312: }
                    313: 
                    314: /*      If you THINK you need to change style, call this
                    315: */
                    316: 
1.2     ! timbl     317: PRIVATE void change_style ARGS2(HTStructured *, this, HTStyle *,style)
1.1       timbl     318: {
1.2     ! timbl     319:     if (this->new_style!=style) {
        !           320:        this->style_change = YES;
        !           321:        this->new_style = style;
1.1       timbl     322:     }
                    323: }
                    324: 
1.2     ! timbl     325: /*_________________________________________________________________________
        !           326: **
        !           327: **                     A C T I O N     R O U T I N E S
        !           328: */
        !           329: 
        !           330: /*     Character handling
        !           331: **     ------------------
1.1       timbl     332: */
1.2     ! timbl     333: PRIVATE void HTML_put_character ARGS2(HTStructured *, this, char, c)
1.1       timbl     334: {
1.2     ! timbl     335: 
        !           336:     switch (this->sp[0].tag_number) {
        !           337:     case HTML_COMMENT:
        !           338:        break;                                  /* Do Nothing */
        !           339:        
        !           340:     case HTML_TITLE:   
        !           341:        HTChunkPutc(&this->title, c);
        !           342:        break;
        !           343: 
        !           344:        
        !           345:     case HTML_LISTING:                         /* Litteral text */
        !           346:     case HTML_XMP:
        !           347:     case HTML_PLAINTEXT:
        !           348:     case HTML_PRE:
        !           349: /*     We guarrantee that the style is up-to-date in begin_litteral
        !           350: */
        !           351:        HText_appendCharacter(this->text, c);
        !           352:        break;
        !           353:        
        !           354:     default:                                   /* Free format text */
        !           355:        if (this->style_change) {
        !           356:            if ((c=='\n') || (c==' ')) return;  /* Ignore it */
        !           357:            UPDATE_STYLE;
        !           358:        }
        !           359:        if (c=='\n') {
        !           360:            if (this->in_word) {
        !           361:                HText_appendCharacter(this->text, ' ');
        !           362:                this->in_word = NO;
        !           363:            }
        !           364:        } else {
        !           365:            HText_appendCharacter(this->text, c);
        !           366:            this->in_word = YES;
        !           367:        }
        !           368:     } /* end switch */
1.1       timbl     369: }
                    370: 
1.2     ! timbl     371: 
        !           372: 
        !           373: /*     String handling
        !           374: **     ---------------
        !           375: **
        !           376: **     This is written separately from put_character becuase the loop can
        !           377: **     in some cases be postponed to a lower level for speed.
        !           378: */
        !           379: PRIVATE void HTML_put_string ARGS2(HTStructured *, this, CONST char*, s)
1.1       timbl     380: {
1.2     ! timbl     381: 
        !           382:     switch (this->sp[0].tag_number) {
        !           383:     case HTML_COMMENT:
        !           384:        break;                                  /* Do Nothing */
        !           385:        
        !           386:     case HTML_TITLE:   
        !           387:        HTChunkPuts(&this->title, s);
        !           388:        break;
        !           389: 
        !           390:        
        !           391:     case HTML_LISTING:                         /* Litteral text */
        !           392:     case HTML_XMP:
        !           393:     case HTML_PLAINTEXT:
        !           394:     case HTML_PRE:
        !           395: 
        !           396: /*     We guarrantee that the style is up-to-date in begin_litteral
        !           397: */
        !           398:        HText_appendText(this->text, s);
        !           399:        break;
        !           400:        
        !           401:     default:                                   /* Free format text */
        !           402:         {
        !           403:            CONST char *p = s;
        !           404:            if (this->style_change) {
        !           405:                for (; *p && ((*p=='\n') || (*p==' ')); p++)  ;  /* Ignore leaders */
        !           406:                if (!*p) return;
        !           407:                UPDATE_STYLE;
        !           408:            }
        !           409:            for(; *p; p++) {
        !           410:                if (this->style_change) {
        !           411:                    if ((*p=='\n') || (*p==' ')) continue;  /* Ignore it */
        !           412:                    UPDATE_STYLE;
        !           413:                }
        !           414:                if (*p=='\n') {
        !           415:                    if (this->in_word) {
        !           416:                        HText_appendCharacter(this->text, ' ');
        !           417:                        this->in_word = NO;
        !           418:                    }
        !           419:                } else {
        !           420:                    HText_appendCharacter(this->text, *p);
        !           421:                    this->in_word = YES;
        !           422:                }
        !           423:            } /* for */
        !           424:        }
        !           425:     } /* end switch */
1.1       timbl     426: }
                    427: 
                    428: 
1.2     ! timbl     429: /*     Buffer write
        !           430: **
1.1       timbl     431: */
1.2     ! timbl     432: PRIVATE void HTML_write ARGS3(HTStructured *, this, CONST char*, s, int, l)
1.1       timbl     433: {
1.2     ! timbl     434:     CONST char* p;
        !           435:     CONST char* e = s+l;
        !           436:     for (p=s; s<e; p++) HTML_put_character(this, *p);
1.1       timbl     437: }
1.2     ! timbl     438: 
        !           439: 
        !           440: /*     Start Element
        !           441: **     -------------
        !           442: */
        !           443: PRIVATE void HTML_start_element ARGS4(
        !           444:        HTStructured *,         this,
        !           445:        int,            element_number,
        !           446:        BOOL*,          present,
        !           447:        char **,        value)
        !           448: {
        !           449:     switch (element_number) {
        !           450:     case HTML_A:
        !           451:        {
        !           452:            HTChildAnchor * source = HTAnchor_findChildAndLink(
        !           453:                this->node_anchor,                              /* parent */
        !           454:                present[HTML_A_NAME] ? value[HTML_A_NAME] : 0,  /* Tag */
        !           455:                present[HTML_A_HREF] ? value[HTML_A_HREF] : 0,  /* Addresss */
        !           456:                present[HTML_A_TYPE] && value[HTML_A_TYPE] ? 
        !           457:                        (HTLinkType*)HTAtom_for(value[HTML_A_TYPE])
        !           458:                                                : 0);
        !           459:            
        !           460:            if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
        !           461:                HTParentAnchor * dest = 
        !           462:                    HTAnchor_parent(
        !           463:                        HTAnchor_followMainLink((HTAnchor*)source)
        !           464:                                    );
        !           465:                if (!HTAnchor_title(dest))
        !           466:                        HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
        !           467:            }
        !           468:            UPDATE_STYLE;
        !           469:            HText_beginAnchor(this->text, source);
        !           470:        }
        !           471:        break;
        !           472:        
        !           473:     case HTML_TITLE:
        !           474:         HTChunkClear(&this->title);
        !           475:        break;
        !           476:        
        !           477:     case HTML_NEXTID:
        !           478:        /* if (present[NEXTID_N] && value[NEXTID_N])
        !           479:                HText_setNextId(this->text, atoi(value[NEXTID_N])); */
        !           480:        break;
        !           481:        
        !           482:     case HTML_ISINDEX:
        !           483:        HTAnchor_setIndex(this->node_anchor);
        !           484:        break;
        !           485:        
        !           486:     case HTML_P:
        !           487:        UPDATE_STYLE;
        !           488:        HText_appendParagraph(this->text);
        !           489:        this->in_word = NO;
        !           490:        break;
        !           491: 
        !           492:     case HTML_DL:
        !           493:         change_style(this, present[DL_COMPACT]
        !           494:                ? styles[HTML_DLC]
        !           495:                : styles[HTML_DL]);
        !           496:        this->in_word = NO;
        !           497:        break;
        !           498:        
        !           499:     case HTML_DT:
        !           500:         if (!this->style_change) {
        !           501:            HText_appendParagraph(this->text);
        !           502:            this->in_word = NO;
        !           503:        }
        !           504:        break;
        !           505:        
        !           506:     case HTML_DD:
        !           507:         UPDATE_STYLE;
        !           508:        HTML_put_character(this, '\t'); /* Just tab out one stop */
        !           509:        this->in_word = NO;
        !           510: 
        !           511:     case HTML_UL:
        !           512:     case HTML_OL:
        !           513:     case HTML_MENU:
        !           514:     case HTML_DIR:
        !           515:        change_style(this, styles[element_number]);
        !           516:        this->in_word = NO;
        !           517:        break;
        !           518:        
        !           519:     case HTML_LI:
        !           520:         UPDATE_STYLE;
        !           521:        if (this->sp[1].tag_number != HTML_DIR)
        !           522:            HText_appendParagraph(this->text);
        !           523:        else
        !           524:            HText_appendCharacter(this->text, '\t');    /* Tab @@ nl for UL? */
        !           525:        this->in_word = NO;
        !           526:        break;
        !           527:        
        !           528:     case HTML_LISTING:                         /* Litteral text */
        !           529:     case HTML_XMP:
        !           530:     case HTML_PLAINTEXT:
        !           531:     case HTML_PRE:
        !           532:        change_style(this, styles[element_number]);
        !           533:        UPDATE_STYLE;
        !           534:        if (this->comment_end)
        !           535:            HText_appendText(this->text, this->comment_end);
        !           536:        break;
        !           537:        
        !           538:     default:
        !           539:        change_style(this, styles[element_number]);     /* May be postponed */
        !           540:        break;
        !           541: 
        !           542:     } /* end switch */
        !           543: 
        !           544:     if (HTML_dtd.tags[element_number].contents!= SGML_EMPTY) {
        !           545:        --(this->sp);
        !           546:        this->sp[0].style = this->new_style;    /* Stack new style */
        !           547:        this->sp[0].tag_number = element_number;
        !           548:     }
        !           549:        
1.1       timbl     550: }
1.2     ! timbl     551: 
1.1       timbl     552: /*             End Element
1.2     ! timbl     553: **             -----------
1.1       timbl     554: **
1.2     ! timbl     555: */
        !           556: /*     When we end an element, the style must be returned to that
1.1       timbl     557: **     in effect before that element.  Note that anchors (etc?)
                    558: **     don't have an associated style, so that we must scan down the
                    559: **     stack for an element with a defined style. (In fact, the styles
                    560: **     should be linked to the whole stack not just the top one.)
                    561: **     TBL 921119
                    562: */
1.2     ! timbl     563: PRIVATE void HTML_end_element ARGS2(HTStructured *, this, int , element_number)
1.1       timbl     564: {
1.2     ! timbl     565: #ifdef CAREFUL                 /* parser assumed to produce good nesting */
        !           566:     if (element_number != this->sp[0].tag_number) {
        !           567:         fprintf(stderr, "HTMLText: end of element %s when expecting end of %s\n",
        !           568:                HTML_dtd.tags[element_number].name,
        !           569:                HTML_dtd.tags[this->sp->tag_number].name);
        !           570:                exit(-20);
1.1       timbl     571:     }
1.2     ! timbl     572: #endif
        !           573:     
        !           574:     this->sp++;                                /* Pop state off stack */
        !           575:     
        !           576:     switch(element_number) {
        !           577: 
        !           578:     case HTML_A:
        !           579:        UPDATE_STYLE;
        !           580:        HText_endAnchor(this->text);
        !           581:        break;
        !           582: 
        !           583:     case HTML_TITLE:
        !           584:         HTChunkTerminate(&this->title);
        !           585:        HTAnchor_setTitle(this->node_anchor, this->title.data);
        !           586:        break;
        !           587:        
        !           588:     case HTML_LISTING:                         /* Litteral text */
        !           589:     case HTML_XMP:
        !           590:     case HTML_PLAINTEXT:
        !           591:     case HTML_PRE:
        !           592:        if (this->comment_start)
        !           593:            HText_appendText(this->text, this->comment_start);
        !           594:        /* Fall through */
        !           595:        
        !           596:     default:
        !           597:     
        !           598:        change_style(this, this->sp->style);    /* Often won't really change */
        !           599:        this->in_word = NO;             /* Paragraph styles only @@ */
        !           600:        break;
        !           601:        
        !           602:     } /* switch */
1.1       timbl     603: }
                    604: 
1.2     ! timbl     605: 
        !           606: /*             Expanding entities
        !           607: **             ------------------
        !           608: */
        !           609: /*     (In fact, they all shrink!)
1.1       timbl     610: */
1.2     ! timbl     611: 
        !           612: PRIVATE void HTML_put_entity ARGS2(HTStructured *, this, int, entity_number)
1.1       timbl     613: {
1.2     ! timbl     614:     HTML_put_string(this, ISO_Latin1[entity_number]);  /* @@ Other representations */
1.1       timbl     615: }
                    616: 
1.2     ! timbl     617: 
        !           618: 
        !           619: /*     Free an HTML object
        !           620: **     -------------------
        !           621: **
        !           622: **     If non-interactive, everything is freed off.
        !           623: **     Otherwise, the interactive object is left.      
        !           624: */
        !           625: PUBLIC void HTML_free ARGS1(HTStructured *, this)
1.1       timbl     626: {
1.2     ! timbl     627:     if (this->target) {
        !           628:         (*this->targetClass.free)(this->target);
        !           629:        HText_free(this->text);
        !           630:     }
        !           631:     free(this);
1.1       timbl     632: }
                    633: 
                    634: 
1.2     ! timbl     635: PRIVATE void HTML_end_document ARGS1(HTStructured *, this)
1.1       timbl     636: 
1.2     ! timbl     637: /* If the document is empty, the text object will not yet exist.
        !           638:    So we could in fact abandon creating the document and return
        !           639:    an error code.  In fact an empty document is an important type
        !           640:    of document, so we don't.
        !           641: */
1.1       timbl     642: {
1.2     ! timbl     643:     UPDATE_STYLE;              /* Creates empty document here! */
        !           644:     if (this->comment_end)
        !           645:                HTML_put_string(this,this->comment_end);
        !           646:     HText_endAppend(this->text);
        !           647:     if (this->target)
        !           648:         (*this->targetClass.end_document)(this->target);
1.1       timbl     649: }
                    650: 
1.2     ! timbl     651: 
        !           652: /*     Get Styles from style sheet
        !           653: **     ---------------------------
        !           654: */
        !           655: PRIVATE void get_styles NOARGS
1.1       timbl     656: {
1.2     ! timbl     657:     got_styles = YES;
        !           658:     
        !           659:     default_style =            HTStyleNamed(styleSheet, "Normal");
1.1       timbl     660: 
1.2     ! timbl     661:     styles[HTML_H1] =          HTStyleNamed(styleSheet, "Heading1");
        !           662:     styles[HTML_H2] =          HTStyleNamed(styleSheet, "Heading2");
        !           663:     styles[HTML_H3] =          HTStyleNamed(styleSheet, "Heading3");
        !           664:     styles[HTML_H4] =          HTStyleNamed(styleSheet, "Heading4");
        !           665:     styles[HTML_H5] =          HTStyleNamed(styleSheet, "Heading5");
        !           666:     styles[HTML_H6] =          HTStyleNamed(styleSheet, "Heading6");
        !           667:     styles[HTML_H7] =          HTStyleNamed(styleSheet, "Heading7");
        !           668: 
        !           669:     styles[HTML_DL] =          HTStyleNamed(styleSheet, "Glossary");
        !           670:     styles[HTML_UL] =
        !           671:     styles[HTML_OL] =          HTStyleNamed(styleSheet, "List");
        !           672:     styles[HTML_MENU] =                HTStyleNamed(styleSheet, "Menu");
        !           673:     styles[HTML_DIR] =         HTStyleNamed(styleSheet, "Dir");    
        !           674:     styles[HTML_DLC] =         HTStyleNamed(styleSheet, "GlossaryCompact");
        !           675:     styles[HTML_ADDRESS]=      HTStyleNamed(styleSheet, "Address");
        !           676:     styles[HTML_BLOCKQUOTE]=   HTStyleNamed(styleSheet, "BlockQuote");
        !           677:     styles[HTML_PLAINTEXT] =
        !           678:     styles[HTML_XMP] =         HTStyleNamed(styleSheet, "Example");
        !           679:     styles[HTML_PRE] =         HTStyleNamed(styleSheet, "Preformatted");
        !           680:     styles[HTML_LISTING] =     HTStyleNamed(styleSheet, "Listing");
        !           681: }
        !           682: /*                             P U B L I C
        !           683: */
        !           684: 
        !           685: /*     Structured Object Class
        !           686: **     -----------------------
        !           687: */
        !           688: PUBLIC CONST HTStructuredClass HTMLPresentation = /* As opposed to print etc */
        !           689: {              
        !           690:        "text/html",
        !           691:        HTML_free,
        !           692:        HTML_end_document,
        !           693:        HTML_put_character,     HTML_put_string,  HTML_write,
        !           694:        HTML_start_element,     HTML_end_element,
        !           695:        HTML_put_entity
        !           696: }; 
1.1       timbl     697: 
1.2     ! timbl     698: /*             New Structured Text object
        !           699: **             --------------------------
        !           700: **
        !           701: **     If the stream is NULL then an interactive object is produced
1.1       timbl     702: */
1.2     ! timbl     703: PUBLIC HTStructured* HTML_new ARGS2(
        !           704:        HTParentAnchor *,       anchor,
        !           705:        HTStream*,              stream)
1.1       timbl     706: {
                    707: 
1.2     ! timbl     708:     HTStructured * this = malloc(sizeof(*this));
1.1       timbl     709: 
                    710:     if (!got_styles) get_styles();
                    711: 
1.2     ! timbl     712:     this->isa = &HTMLPresentation;
        !           713:     this->node_anchor =  anchor;
1.1       timbl     714:     this->title.size = 0;
                    715:     this->title.growby = 128;
                    716:     this->title.allocated = 0;
                    717:     this->title.data = 0;
                    718:     this->text = 0;
                    719:     this->style_change = YES; /* Force check leading to text creation */
1.2     ! timbl     720:     this->new_style = default_style;
        !           721:     this->old_style = 0;
        !           722:     this->sp = this->stack + MAX_NESTING - 1;
        !           723:     this->sp->tag_number = -1;                         /* INVALID */
        !           724:     this->sp->style = default_style;                   /* INVALID */
1.1       timbl     725:     
1.2     ! timbl     726:     this->comment_start = NULL;
        !           727:     this->comment_end = NULL;
        !           728:     this->target = stream;
        !           729:     if (stream) this->targetClass = *stream->isa;      /* Copy pointers */
1.1       timbl     730:     
1.2     ! timbl     731:     return (HTStructured*) this;
1.1       timbl     732: }
                    733: 
                    734: 
1.2     ! timbl     735: /*     HTConverter for HTML to plain text
        !           736: **     ----------------------------------
1.1       timbl     737: **
1.2     ! timbl     738: **     This will convert from HTML to presentation or plain text.
1.1       timbl     739: */
1.2     ! timbl     740: PUBLIC HTStream* HTMLToPlain ARGS3(
        !           741:        HTPresentation *,       pres,
        !           742:        HTParentAnchor *,       anchor, 
        !           743:        HTStream *,             sink)
1.1       timbl     744: {
1.2     ! timbl     745:     return SGML_new(&HTML_dtd, HTML_new(anchor, sink));
1.1       timbl     746: }
                    747: 
                    748: 
1.2     ! timbl     749: /*     HTConverter for HTML to C code
        !           750: **     ------------------------------
        !           751: **
        !           752: **     C copde is like plain text but all non-preformatted code
        !           753: **     is commented out.
        !           754: **     This will convert from HTML to presentation or plain text.
        !           755: */
        !           756: PUBLIC HTStream* HTMLToC ARGS3(
        !           757:        HTPresentation *,       pres,
        !           758:        HTParentAnchor *,       anchor, 
        !           759:        HTStream *,             sink)
1.1       timbl     760: {
1.2     ! timbl     761:     HTStructured * html = HTML_new(anchor, sink);
        !           762:     html->comment_start = "/* ";
        !           763:     html->comment_end = " */\n";       /* Must start in col 1 for cpp */
        !           764:     HTML_put_string(html,html->comment_start);
        !           765:     return SGML_new(&HTML_dtd, html);
1.1       timbl     766: }
                    767: 
                    768: 
1.2     ! timbl     769: /*     Presenter for HTML
        !           770: **     ------------------
        !           771: **
        !           772: **     This will convert from HTML to presentation or plain text.
        !           773: **
        !           774: **     Override this if you have a windows version
1.1       timbl     775: */
1.2     ! timbl     776: #ifndef GUI
        !           777: PUBLIC HTStream* HTMLPresent ARGS3(
        !           778:        HTPresentation *,       pres,
        !           779:        HTParentAnchor *,       anchor, 
        !           780:        HTStream *,             sink)
1.1       timbl     781: {
1.2     ! timbl     782:     return SGML_new(&HTML_dtd, HTML_new(anchor, NULL));
1.1       timbl     783: }
1.2     ! timbl     784: #endif
1.1       timbl     785: 
                    786: 
1.2     ! timbl     787: /*     Record error message as a hypertext object
        !           788: **     ------------------------------------------
        !           789: **
        !           790: **     The error message should be marked as an error so that
        !           791: **     it can be reloaded later.
        !           792: **     This implementation just throws up an error message
        !           793: **     and leaves the document unloaded.
1.1       timbl     794: **
1.2     ! timbl     795: ** On entry,
        !           796: **     sink    is a stream to the output device if any
        !           797: **     number  is the HTTP error number
        !           798: **     message is the human readable message.
1.1       timbl     799: */
1.2     ! timbl     800: 
        !           801: PUBLIC int HTLoadError ARGS3(
        !           802:        HTStream *,     sink,
        !           803:        int,            number,
        !           804:        CONST char *,   message)
        !           805: {
        !           806:     HTAlert(message);          /* @@@@@@@@@@@@@@@@@@@ */
        !           807:     return -number;
        !           808: } 
        !           809: 

Webmaster