Annotation of libwww/Library/src/HTML.c, revision 1.15

1.2       timbl       1: /*             Structured stream to Rich hypertext converter
                      2: **             ============================================
1.1       timbl       3: **
1.2       timbl       4: **     This generates of a hypertext object.  It converts from the
                      5: **     structured stream interface fro HTMl events into the style-
                      6: **     oriented iunterface of the HText.h interface.  This module is
                      7: **     only used in clients and shouldnot be linked into servers.
1.1       timbl       8: **
1.6       timbl       9: **     Override this module if making a new GUI browser.
1.1       timbl      10: **
                     11: */
                     12: #include "HTML.h"
                     13: 
1.6       timbl      14: /* #define CAREFUL              Check nesting here notreally necessary */
1.2       timbl      15: 
1.1       timbl      16: #include <ctype.h>
                     17: #include <stdio.h>
                     18: 
                     19: #include "HTAtom.h"
                     20: #include "HTChunk.h"
                     21: #include "HText.h"
                     22: #include "HTStyle.h"
                     23: 
1.3       timbl      24: #include "HTAlert.h"
1.4       timbl      25: #include "HTMLGen.h"
1.8       timbl      26: #include "HTParse.h"
1.1       timbl      27: 
                     28: extern HTStyleSheet * styleSheet;      /* Application-wide */
                     29: 
                     30: /*     Module-wide style cache
                     31: */
                     32: PRIVATE int            got_styles = 0;
1.2       timbl      33: PRIVATE HTStyle *styles[HTML_ELEMENTS];
                     34: PRIVATE HTStyle *default_style;
1.1       timbl      35: 
                     36: 
                     37: /*             HTML Object
                     38: **             -----------
                     39: */
1.2       timbl      40: #define MAX_NESTING 20         /* Should be checked by parser */
                     41: 
                     42: typedef struct _stack_element {
                     43:         HTStyle *      style;
                     44:        int             tag_number;
                     45: } stack_element;
                     46: 
                     47: struct _HTStructured {
                     48:     CONST HTStructuredClass *  isa;
                     49:     HTParentAnchor *           node_anchor;
                     50:     HText *                    text;
                     51: 
                     52:     HTStream*                  target;                 /* Output stream */
                     53:     HTStreamClass              targetClass;            /* Output routines */
                     54: 
                     55:     HTChunk                    title;          /* Grow by 128 */
                     56:     
                     57:     char *                     comment_start;  /* for literate programming */
                     58:     char *                     comment_end;
                     59: 
                     60:     HTTag *                    current_tag;
                     61:     BOOL                       style_change;
                     62:     HTStyle *                  new_style;
                     63:     HTStyle *                  old_style;
                     64:     BOOL                       in_word;  /* Have just had a non-white char */
                     65:     stack_element      stack[MAX_NESTING];
                     66:     stack_element      *sp;            /* Style stack pointer */
1.1       timbl      67: };
                     68: 
1.2       timbl      69: struct _HTStream {
                     70:     CONST HTStreamClass *      isa;
                     71:     /* .... */
                     72: };
1.1       timbl      73: 
                     74: /*             Forward declarations of routines
                     75: */
                     76: PRIVATE void get_styles NOPARAMS;
                     77: 
                     78: 
1.4       timbl      79: PRIVATE void actually_set_style PARAMS((HTStructured * me));
1.11      timbl      80: PRIVATE void change_paragraph_style PARAMS((HTStructured * me, HTStyle * style));
1.1       timbl      81: 
                     82: /*     Style buffering avoids dummy paragraph begin/ends.
                     83: */
1.4       timbl      84: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1       timbl      85: 
                     86: 
1.2       timbl      87: #ifdef OLD_CODE
1.1       timbl      88: /* The following accented characters are from peter Flynn, curia project */
                     89: 
                     90: /* these ifdefs don't solve the problem of a simple terminal emulator
                     91: ** with a different character set to the client machine. But nothing does,
                     92: ** except looking at the TERM setting */
                     93: 
1.2       timbl      94: 
1.1       timbl      95:         { "ocus" , "&" },       /* for CURIA */
                     96: #ifdef IBMPC
                     97:         { "aacute" , "\240" }, /* For PC display */
                     98:         { "eacute" , "\202" },
                     99:         { "iacute" , "\241" },
                    100:         { "oacute" , "\242" },
                    101:         { "uacute" , "\243" },
                    102:         { "Aacute" , "\101" },
                    103:         { "Eacute" , "\220" },
                    104:         { "Iacute" , "\111" },
                    105:         { "Oacute" , "\117" },
                    106:         { "Uacute" , "\125" },
                    107: #else
                    108:         { "aacute" , "\341" }, /* Works for openwindows -- Peter Flynn */
                    109:         { "eacute" , "\351" },
                    110:         { "iacute" , "\355" },
                    111:         { "oacute" , "\363" },
                    112:         { "uacute" , "\372" },
                    113:         { "Aacute" , "\301" },
                    114:         { "Eacute" , "\310" },
                    115:         { "Iacute" , "\315" },
                    116:         { "Oacute" , "\323" },
                    117:         { "Uacute" , "\332" }, 
                    118: #endif
                    119:        { 0,    0 }  /* Terminate list */
                    120: };
1.2       timbl     121: #endif
1.1       timbl     122: 
                    123: 
1.2       timbl     124: /*     Entity values -- for ISO Latin 1 local representation
                    125: **
                    126: **     This MUST match exactly the table referred to in the DTD!
                    127: */
                    128: static char * ISO_Latin1[] = {
                    129:        "\306", /* capital AE diphthong (ligature) */ 
                    130:        "\301", /* capital A, acute accent */ 
                    131:        "\302", /* capital A, circumflex accent */ 
                    132:        "\300", /* capital A, grave accent */ 
                    133:        "\305", /* capital A, ring */ 
                    134:        "\303", /* capital A, tilde */ 
                    135:        "\304", /* capital A, dieresis or umlaut mark */ 
                    136:        "\307", /* capital C, cedilla */ 
                    137:        "\320", /* capital Eth, Icelandic */ 
                    138:        "\311", /* capital E, acute accent */ 
                    139:        "\312", /* capital E, circumflex accent */ 
                    140:        "\310", /* capital E, grave accent */ 
                    141:        "\313", /* capital E, dieresis or umlaut mark */ 
                    142:        "\315", /* capital I, acute accent */ 
                    143:        "\316", /* capital I, circumflex accent */ 
                    144:        "\314", /* capital I, grave accent */ 
                    145:        "\317", /* capital I, dieresis or umlaut mark */ 
                    146:        "\321", /* capital N, tilde */ 
                    147:        "\323", /* capital O, acute accent */ 
                    148:        "\324", /* capital O, circumflex accent */ 
                    149:        "\322", /* capital O, grave accent */ 
                    150:        "\330", /* capital O, slash */ 
                    151:        "\325", /* capital O, tilde */ 
                    152:        "\326", /* capital O, dieresis or umlaut mark */ 
                    153:        "\336", /* capital THORN, Icelandic */ 
                    154:        "\332", /* capital U, acute accent */ 
                    155:        "\333", /* capital U, circumflex accent */ 
                    156:        "\331", /* capital U, grave accent */ 
                    157:        "\334", /* capital U, dieresis or umlaut mark */ 
                    158:        "\335", /* capital Y, acute accent */ 
                    159:        "\341", /* small a, acute accent */ 
                    160:        "\342", /* small a, circumflex accent */ 
                    161:        "\346", /* small ae diphthong (ligature) */ 
                    162:        "\340", /* small a, grave accent */ 
                    163:        "\046", /* ampersand */ 
                    164:        "\345", /* small a, ring */ 
                    165:        "\343", /* small a, tilde */ 
                    166:        "\344", /* small a, dieresis or umlaut mark */ 
                    167:        "\347", /* small c, cedilla */ 
                    168:        "\351", /* small e, acute accent */ 
                    169:        "\352", /* small e, circumflex accent */ 
                    170:        "\350", /* small e, grave accent */ 
                    171:        "\360", /* small eth, Icelandic */ 
                    172:        "\353", /* small e, dieresis or umlaut mark */ 
                    173:        "\076", /* greater than */ 
                    174:        "\355", /* small i, acute accent */ 
                    175:        "\356", /* small i, circumflex accent */ 
                    176:        "\354", /* small i, grave accent */ 
                    177:        "\357", /* small i, dieresis or umlaut mark */ 
                    178:        "\074", /* less than */ 
                    179:        "\361", /* small n, tilde */ 
                    180:        "\363", /* small o, acute accent */ 
                    181:        "\364", /* small o, circumflex accent */ 
                    182:        "\362", /* small o, grave accent */ 
                    183:        "\370", /* small o, slash */ 
                    184:        "\365", /* small o, tilde */ 
                    185:        "\366", /* small o, dieresis or umlaut mark */ 
                    186:        "\337", /* small sharp s, German (sz ligature) */ 
                    187:        "\376", /* small thorn, Icelandic */ 
                    188:        "\372", /* small u, acute accent */ 
                    189:        "\373", /* small u, circumflex accent */ 
                    190:        "\371", /* small u, grave accent */ 
                    191:        "\374", /* small u, dieresis or umlaut mark */ 
                    192:        "\375", /* small y, acute accent */ 
                    193:        "\377", /* small y, dieresis or umlaut mark */ 
1.1       timbl     194: };
                    195: 
1.2       timbl     196: 
                    197: /*     Entity values -- for NeXT local representation
                    198: **
                    199: **     This MUST match exactly the table referred to in the DTD!
                    200: **
                    201: */
                    202: static char * NeXTCharacters[] = {
                    203:        "\341", /* capital AE diphthong (ligature)      */ 
                    204:        "\202", /* capital A, acute accent              */ 
                    205:        "\203", /* capital A, circumflex accent         */ 
                    206:        "\201", /* capital A, grave accent              */ 
                    207:        "\206", /* capital A, ring                      */ 
                    208:        "\204", /* capital A, tilde                     */ 
                    209:        "\205", /* capital A, dieresis or umlaut mark   */ 
                    210:        "\207", /* capital C, cedilla                   */ 
                    211:        "\220", /* capital Eth, Icelandic               */ 
                    212:        "\211", /* capital E, acute accent                              */ 
                    213:        "\212", /* capital E, circumflex accent                         */ 
                    214:        "\210", /* capital E, grave accent                              */ 
                    215:        "\213", /* capital E, dieresis or umlaut mark                   */ 
                    216:        "\215", /* capital I, acute accent                              */ 
                    217:        "\216", /* capital I, circumflex accent         these are       */ 
                    218:        "\214", /* capital I, grave accent              ISO -100 hex    */ 
                    219:        "\217", /* capital I, dieresis or umlaut mark                   */ 
                    220:        "\221", /* capital N, tilde                                     */ 
                    221:        "\223", /* capital O, acute accent                              */ 
                    222:        "\224", /* capital O, circumflex accent                         */ 
                    223:        "\222", /* capital O, grave accent                              */ 
                    224:        "\351", /* capital O, slash             'cept this */ 
                    225:        "\225", /* capital O, tilde                                     */ 
                    226:        "\226", /* capital O, dieresis or umlaut mark                   */ 
                    227:        "\234", /* capital THORN, Icelandic */ 
                    228:        "\230", /* capital U, acute accent */ 
                    229:        "\231", /* capital U, circumflex accent */ 
                    230:        "\227", /* capital U, grave accent */ 
                    231:        "\232", /* capital U, dieresis or umlaut mark */ 
                    232:        "\233", /* capital Y, acute accent */ 
                    233:        "\326", /* small a, acute accent */ 
                    234:        "\327", /* small a, circumflex accent */ 
                    235:        "\361", /* small ae diphthong (ligature) */ 
                    236:        "\325", /* small a, grave accent */ 
                    237:        "\046", /* ampersand */ 
                    238:        "\332", /* small a, ring */ 
                    239:        "\330", /* small a, tilde */ 
                    240:        "\331", /* small a, dieresis or umlaut mark */ 
                    241:        "\333", /* small c, cedilla */ 
                    242:        "\335", /* small e, acute accent */ 
                    243:        "\336", /* small e, circumflex accent */ 
                    244:        "\334", /* small e, grave accent */ 
                    245:        "\346", /* small eth, Icelandic         */ 
                    246:        "\337", /* small e, dieresis or umlaut mark */ 
                    247:        "\076", /* greater than */ 
                    248:        "\342", /* small i, acute accent */ 
                    249:        "\344", /* small i, circumflex accent */ 
                    250:        "\340", /* small i, grave accent */ 
                    251:        "\345", /* small i, dieresis or umlaut mark */ 
                    252:        "\074", /* less than */ 
                    253:        "\347", /* small n, tilde */ 
                    254:        "\355", /* small o, acute accent */ 
                    255:        "\356", /* small o, circumflex accent */ 
                    256:        "\354", /* small o, grave accent */ 
                    257:        "\371", /* small o, slash */ 
                    258:        "\357", /* small o, tilde */ 
                    259:        "\360", /* small o, dieresis or umlaut mark */ 
                    260:        "\373", /* small sharp s, German (sz ligature) */ 
                    261:        "\374", /* small thorn, Icelandic */ 
                    262:        "\363", /* small u, acute accent */ 
                    263:        "\364", /* small u, circumflex accent */ 
                    264:        "\362", /* small u, grave accent */ 
                    265:        "\366", /* small u, dieresis or umlaut mark */ 
                    266:        "\367", /* small y, acute accent */ 
                    267:        "\375", /* small y, dieresis or umlaut mark */ 
1.1       timbl     268: };
                    269: 
1.2       timbl     270: /*     Entity values -- for IBM/PC Code Page 850 (International)
                    271: **
                    272: **     This MUST match exactly the table referred to in the DTD!
                    273: **
                    274: */
                    275: /* @@@@@@@@@@@@@@@@@ TBD */
                    276: 
                    277: 
                    278: 
                    279: /*             Set character set
                    280: **             ----------------
                    281: */
                    282: 
                    283: PRIVATE char** p_entity_values = ISO_Latin1;   /* Pointer to translation */
1.1       timbl     284: 
1.2       timbl     285: PUBLIC void HTMLUseCharacterSet ARGS1(HTMLCharacterSet, i)
                    286: {
                    287:     p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
                    288:                                             : ISO_Latin1;
                    289: }
1.1       timbl     290: 
                    291: 
                    292: /*             Flattening the style structure
                    293: **             ------------------------------
                    294: **
                    295: On the NeXT, and on any read-only browser, it is simpler for the text to have
                    296: a sequence of styles, rather than a nested tree of styles. In this
                    297: case we have to flatten the structure as it arrives from SGML tags into
                    298: a sequence of styles.
                    299: */
                    300: 
                    301: /*             If style really needs to be set, call this
                    302: */
1.4       timbl     303: PRIVATE void actually_set_style ARGS1(HTStructured *, me)
1.1       timbl     304: {
1.4       timbl     305:     if (!me->text) {                   /* First time through */
                    306:            me->text = HText_new2(me->node_anchor, me->target);
                    307:            HText_beginAppend(me->text);
                    308:            HText_setStyle(me->text, me->new_style);
                    309:            me->in_word = NO;
1.1       timbl     310:     } else {
1.4       timbl     311:            HText_setStyle(me->text, me->new_style);
1.1       timbl     312:     }
1.4       timbl     313:     me->old_style = me->new_style;
                    314:     me->style_change = NO;
1.1       timbl     315: }
                    316: 
                    317: /*      If you THINK you need to change style, call this
                    318: */
                    319: 
1.11      timbl     320: PRIVATE void change_paragraph_style ARGS2(HTStructured *, me, HTStyle *,style)
1.1       timbl     321: {
1.4       timbl     322:     if (me->new_style!=style) {
                    323:        me->style_change = YES;
                    324:        me->new_style = style;
1.1       timbl     325:     }
1.11      timbl     326:     me->in_word = NO;
1.1       timbl     327: }
                    328: 
1.2       timbl     329: /*_________________________________________________________________________
                    330: **
                    331: **                     A C T I O N     R O U T I N E S
                    332: */
                    333: 
                    334: /*     Character handling
                    335: **     ------------------
1.1       timbl     336: */
1.4       timbl     337: PRIVATE void HTML_put_character ARGS2(HTStructured *, me, char, c)
1.1       timbl     338: {
1.2       timbl     339: 
1.4       timbl     340:     switch (me->sp[0].tag_number) {
1.2       timbl     341:     case HTML_COMMENT:
                    342:        break;                                  /* Do Nothing */
                    343:        
                    344:     case HTML_TITLE:   
1.4       timbl     345:        HTChunkPutc(&me->title, c);
1.2       timbl     346:        break;
                    347: 
                    348:        
                    349:     case HTML_LISTING:                         /* Litteral text */
                    350:     case HTML_XMP:
                    351:     case HTML_PLAINTEXT:
                    352:     case HTML_PRE:
                    353: /*     We guarrantee that the style is up-to-date in begin_litteral
                    354: */
1.4       timbl     355:        HText_appendCharacter(me->text, c);
1.2       timbl     356:        break;
                    357:        
                    358:     default:                                   /* Free format text */
1.4       timbl     359:        if (me->style_change) {
1.2       timbl     360:            if ((c=='\n') || (c==' ')) return;  /* Ignore it */
                    361:            UPDATE_STYLE;
                    362:        }
                    363:        if (c=='\n') {
1.4       timbl     364:            if (me->in_word) {
                    365:                HText_appendCharacter(me->text, ' ');
                    366:                me->in_word = NO;
1.2       timbl     367:            }
                    368:        } else {
1.4       timbl     369:            HText_appendCharacter(me->text, c);
                    370:            me->in_word = YES;
1.2       timbl     371:        }
                    372:     } /* end switch */
1.1       timbl     373: }
                    374: 
1.2       timbl     375: 
                    376: 
                    377: /*     String handling
                    378: **     ---------------
                    379: **
                    380: **     This is written separately from put_character becuase the loop can
1.11      timbl     381: **     in some cases be promoted to a higher function call level for speed.
1.2       timbl     382: */
1.4       timbl     383: PRIVATE void HTML_put_string ARGS2(HTStructured *, me, CONST char*, s)
1.1       timbl     384: {
1.2       timbl     385: 
1.4       timbl     386:     switch (me->sp[0].tag_number) {
1.2       timbl     387:     case HTML_COMMENT:
                    388:        break;                                  /* Do Nothing */
                    389:        
                    390:     case HTML_TITLE:   
1.4       timbl     391:        HTChunkPuts(&me->title, s);
1.2       timbl     392:        break;
                    393: 
                    394:        
                    395:     case HTML_LISTING:                         /* Litteral text */
                    396:     case HTML_XMP:
                    397:     case HTML_PLAINTEXT:
                    398:     case HTML_PRE:
                    399: 
                    400: /*     We guarrantee that the style is up-to-date in begin_litteral
                    401: */
1.4       timbl     402:        HText_appendText(me->text, s);
1.2       timbl     403:        break;
                    404:        
                    405:     default:                                   /* Free format text */
                    406:         {
                    407:            CONST char *p = s;
1.4       timbl     408:            if (me->style_change) {
1.2       timbl     409:                for (; *p && ((*p=='\n') || (*p==' ')); p++)  ;  /* Ignore leaders */
                    410:                if (!*p) return;
                    411:                UPDATE_STYLE;
                    412:            }
                    413:            for(; *p; p++) {
1.4       timbl     414:                if (me->style_change) {
1.2       timbl     415:                    if ((*p=='\n') || (*p==' ')) continue;  /* Ignore it */
                    416:                    UPDATE_STYLE;
                    417:                }
                    418:                if (*p=='\n') {
1.4       timbl     419:                    if (me->in_word) {
                    420:                        HText_appendCharacter(me->text, ' ');
                    421:                        me->in_word = NO;
1.2       timbl     422:                    }
                    423:                } else {
1.4       timbl     424:                    HText_appendCharacter(me->text, *p);
                    425:                    me->in_word = YES;
1.2       timbl     426:                }
                    427:            } /* for */
                    428:        }
                    429:     } /* end switch */
1.1       timbl     430: }
                    431: 
                    432: 
1.2       timbl     433: /*     Buffer write
1.3       timbl     434: **     ------------
1.1       timbl     435: */
1.4       timbl     436: PRIVATE void HTML_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
1.1       timbl     437: {
1.2       timbl     438:     CONST char* p;
                    439:     CONST char* e = s+l;
1.4       timbl     440:     for (p=s; s<e; p++) HTML_put_character(me, *p);
1.1       timbl     441: }
1.2       timbl     442: 
                    443: 
                    444: /*     Start Element
                    445: **     -------------
                    446: */
                    447: PRIVATE void HTML_start_element ARGS4(
1.4       timbl     448:        HTStructured *,         me,
1.2       timbl     449:        int,            element_number,
1.3       timbl     450:        CONST BOOL*,            present,
                    451:        CONST char **,  value)
1.2       timbl     452: {
                    453:     switch (element_number) {
                    454:     case HTML_A:
                    455:        {
1.8       timbl     456:            HTChildAnchor * source;
1.9       timbl     457:            char * href = NULL;
                    458:            if (present[HTML_A_HREF]) {
                    459:                StrAllocCopy(href, value[HTML_A_HREF]);
                    460:                HTSimplify(href);
                    461:            }
1.8       timbl     462:            source = HTAnchor_findChildAndLink(
1.4       timbl     463:                me->node_anchor,                                /* parent */
1.2       timbl     464:                present[HTML_A_NAME] ? value[HTML_A_NAME] : 0,  /* Tag */
1.9       timbl     465:                present[HTML_A_HREF] ? href : 0,                /* Addresss */
1.2       timbl     466:                present[HTML_A_TYPE] && value[HTML_A_TYPE] ? 
                    467:                        (HTLinkType*)HTAtom_for(value[HTML_A_TYPE])
                    468:                                                : 0);
                    469:            
                    470:            if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
                    471:                HTParentAnchor * dest = 
                    472:                    HTAnchor_parent(
                    473:                        HTAnchor_followMainLink((HTAnchor*)source)
                    474:                                    );
                    475:                if (!HTAnchor_title(dest))
                    476:                        HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
                    477:            }
                    478:            UPDATE_STYLE;
1.4       timbl     479:            HText_beginAnchor(me->text, source);
1.2       timbl     480:        }
                    481:        break;
                    482:        
                    483:     case HTML_TITLE:
1.4       timbl     484:         HTChunkClear(&me->title);
1.2       timbl     485:        break;
                    486:        
                    487:     case HTML_NEXTID:
                    488:        /* if (present[NEXTID_N] && value[NEXTID_N])
1.4       timbl     489:                HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2       timbl     490:        break;
                    491:        
                    492:     case HTML_ISINDEX:
1.4       timbl     493:        HTAnchor_setIndex(me->node_anchor);
1.2       timbl     494:        break;
                    495:        
1.15    ! timbl     496:     case HTML_BR: 
        !           497:        UPDATE_STYLE;
        !           498:        HText_appendCharacter(me->text, '\n');
        !           499:        me->in_word = NO;
        !           500:        break;
        !           501:        
        !           502:     case HTML_HR: 
        !           503:        UPDATE_STYLE;
        !           504:        HText_appendCharacter(me->text, '\n');
        !           505:        HText_appendCharacter(me->text, "___________________________________");
        !           506:        HText_appendCharacter(me->text, '\n');
        !           507:        me->in_word = NO;
        !           508:        break;
        !           509:        
1.2       timbl     510:     case HTML_P:
                    511:        UPDATE_STYLE;
1.4       timbl     512:        HText_appendParagraph(me->text);
                    513:        me->in_word = NO;
1.2       timbl     514:        break;
                    515: 
                    516:     case HTML_DL:
1.11      timbl     517:         change_paragraph_style(me, present && present[DL_COMPACT]
1.2       timbl     518:                ? styles[HTML_DLC]
                    519:                : styles[HTML_DL]);
                    520:        break;
                    521:        
                    522:     case HTML_DT:
1.4       timbl     523:         if (!me->style_change) {
                    524:            HText_appendParagraph(me->text);
                    525:            me->in_word = NO;
1.2       timbl     526:        }
                    527:        break;
                    528:        
                    529:     case HTML_DD:
                    530:         UPDATE_STYLE;
1.4       timbl     531:        HTML_put_character(me, '\t');   /* Just tab out one stop */
                    532:        me->in_word = NO;
                    533:        break;
1.2       timbl     534: 
                    535:     case HTML_UL:
                    536:     case HTML_OL:
                    537:     case HTML_MENU:
                    538:     case HTML_DIR:
1.11      timbl     539:        change_paragraph_style(me, styles[element_number]);
1.2       timbl     540:        break;
                    541:        
                    542:     case HTML_LI:
                    543:         UPDATE_STYLE;
1.7       timbl     544:        if (me->sp[0].tag_number != HTML_DIR)
1.4       timbl     545:            HText_appendParagraph(me->text);
1.2       timbl     546:        else
1.4       timbl     547:            HText_appendCharacter(me->text, '\t');      /* Tab @@ nl for UL? */
                    548:        me->in_word = NO;
1.2       timbl     549:        break;
                    550:        
                    551:     case HTML_LISTING:                         /* Litteral text */
                    552:     case HTML_XMP:
                    553:     case HTML_PLAINTEXT:
                    554:     case HTML_PRE:
1.11      timbl     555:        change_paragraph_style(me, styles[element_number]);
1.2       timbl     556:        UPDATE_STYLE;
1.4       timbl     557:        if (me->comment_end)
                    558:            HText_appendText(me->text, me->comment_end);
1.2       timbl     559:        break;
1.11      timbl     560: 
                    561:     case HTML_HTML:                    /* Ignore these altogether */
                    562:     case HTML_HEAD:
                    563:     case HTML_BODY:
                    564:     
1.10      timbl     565:     case HTML_IMG:                     /* Images -- ignore */
                    566:     
                    567:     case HTML_TT:                      /* Physical character highlighting */
                    568:     case HTML_B:                       /* Currently ignored */
                    569:     case HTML_I:
                    570:     case HTML_U:
                    571:     
                    572:     case HTML_EM:                      /* Logical character highlighting */
                    573:     case HTML_STRONG:                  /* Currently ignored */
                    574:     case HTML_CODE:
                    575:     case HTML_SAMP:
                    576:     case HTML_KBD:
                    577:     case HTML_VAR:
                    578:     case HTML_DFN:
                    579:     case HTML_CITE:
                    580:        break;
                    581:        
1.11      timbl     582:     case HTML_H1:                      /* paragraph styles */
                    583:     case HTML_H2:
                    584:     case HTML_H3:
                    585:     case HTML_H4:
                    586:     case HTML_H5:
                    587:     case HTML_H6:
                    588:     case HTML_H7:
                    589:     case HTML_ADDRESS:
                    590:     case HTML_BLOCKQUOTE:
                    591:        change_paragraph_style(me, styles[element_number]);     /* May be postponed */
1.2       timbl     592:        break;
                    593: 
                    594:     } /* end switch */
                    595: 
                    596:     if (HTML_dtd.tags[element_number].contents!= SGML_EMPTY) {
1.13      timbl     597:         if (me->sp == me->stack) {
1.12      timbl     598:            fprintf(stderr, "HTML: ****** Maximum nesting of %d exceded!\n",
                    599:            MAX_NESTING); 
                    600:            return;
                    601:        }
1.4       timbl     602:        --(me->sp);
                    603:        me->sp[0].style = me->new_style;        /* Stack new style */
                    604:        me->sp[0].tag_number = element_number;
1.10      timbl     605:     }  
1.1       timbl     606: }
1.10      timbl     607: 
1.2       timbl     608: 
1.1       timbl     609: /*             End Element
1.2       timbl     610: **             -----------
1.1       timbl     611: **
1.2       timbl     612: */
                    613: /*     When we end an element, the style must be returned to that
1.1       timbl     614: **     in effect before that element.  Note that anchors (etc?)
                    615: **     don't have an associated style, so that we must scan down the
                    616: **     stack for an element with a defined style. (In fact, the styles
                    617: **     should be linked to the whole stack not just the top one.)
                    618: **     TBL 921119
1.6       timbl     619: **
                    620: **     We don't turn on "CAREFUL" check because the parser produces
                    621: **     (internal code errors apart) good nesting. The parser checks
                    622: **     incoming code errors, not this module.
1.1       timbl     623: */
1.4       timbl     624: PRIVATE void HTML_end_element ARGS2(HTStructured *, me, int , element_number)
1.1       timbl     625: {
1.2       timbl     626: #ifdef CAREFUL                 /* parser assumed to produce good nesting */
1.4       timbl     627:     if (element_number != me->sp[0].tag_number) {
1.2       timbl     628:         fprintf(stderr, "HTMLText: end of element %s when expecting end of %s\n",
                    629:                HTML_dtd.tags[element_number].name,
1.4       timbl     630:                HTML_dtd.tags[me->sp->tag_number].name);
1.6       timbl     631:                /* panic */
1.1       timbl     632:     }
1.2       timbl     633: #endif
                    634:     
1.4       timbl     635:     me->sp++;                          /* Pop state off stack */
1.2       timbl     636:     
                    637:     switch(element_number) {
                    638: 
                    639:     case HTML_A:
                    640:        UPDATE_STYLE;
1.4       timbl     641:        HText_endAnchor(me->text);
1.2       timbl     642:        break;
                    643: 
                    644:     case HTML_TITLE:
1.4       timbl     645:         HTChunkTerminate(&me->title);
                    646:        HTAnchor_setTitle(me->node_anchor, me->title.data);
1.2       timbl     647:        break;
                    648:        
                    649:     case HTML_LISTING:                         /* Litteral text */
                    650:     case HTML_XMP:
                    651:     case HTML_PLAINTEXT:
                    652:     case HTML_PRE:
1.4       timbl     653:        if (me->comment_start)
                    654:            HText_appendText(me->text, me->comment_start);
1.2       timbl     655:        /* Fall through */
                    656:        
                    657:     default:
                    658:     
1.11      timbl     659:        change_paragraph_style(me, me->sp->style);      /* Often won't really change */
1.2       timbl     660:        break;
                    661:        
                    662:     } /* switch */
1.1       timbl     663: }
                    664: 
1.2       timbl     665: 
                    666: /*             Expanding entities
                    667: **             ------------------
                    668: */
                    669: /*     (In fact, they all shrink!)
1.1       timbl     670: */
1.2       timbl     671: 
1.4       timbl     672: PRIVATE void HTML_put_entity ARGS2(HTStructured *, me, int, entity_number)
1.1       timbl     673: {
1.4       timbl     674:     HTML_put_string(me, ISO_Latin1[entity_number]);    /* @@ Other representations */
1.1       timbl     675: }
1.2       timbl     676: 
                    677: 
                    678: /*     Free an HTML object
                    679: **     -------------------
                    680: **
1.4       timbl     681: ** If the document is empty, the text object will not yet exist.
                    682:    So we could in fact abandon creating the document and return
                    683:    an error code.  In fact an empty document is an important type
                    684:    of document, so we don't.
                    685: **
                    686: **     If non-interactive, everything is freed off.   No: crashes -listrefs
1.2       timbl     687: **     Otherwise, the interactive object is left.      
                    688: */
1.4       timbl     689: PUBLIC void HTML_free ARGS1(HTStructured *, me)
1.1       timbl     690: {
1.4       timbl     691:     UPDATE_STYLE;              /* Creates empty document here! */
                    692:     if (me->comment_end)
                    693:                HTML_put_string(me,me->comment_end);
                    694:     HText_endAppend(me->text);
                    695: 
                    696:     if (me->target) {
                    697:         (*me->targetClass.free)(me->target);
1.2       timbl     698:     }
1.4       timbl     699:     free(me);
1.1       timbl     700: }
                    701: 
                    702: 
1.14      timbl     703: PRIVATE void HTML_abort ARGS2(HTStructured *, me, HTError, e)
1.1       timbl     704: 
1.14      timbl     705: {
                    706:     if (me->target) {
                    707:         (*me->targetClass.abort)(me->target, e);
                    708:     }
                    709:     free(me);
                    710: 
1.1       timbl     711: }
                    712: 
1.2       timbl     713: 
                    714: /*     Get Styles from style sheet
                    715: **     ---------------------------
                    716: */
                    717: PRIVATE void get_styles NOARGS
1.1       timbl     718: {
1.2       timbl     719:     got_styles = YES;
                    720:     
                    721:     default_style =            HTStyleNamed(styleSheet, "Normal");
1.1       timbl     722: 
1.2       timbl     723:     styles[HTML_H1] =          HTStyleNamed(styleSheet, "Heading1");
                    724:     styles[HTML_H2] =          HTStyleNamed(styleSheet, "Heading2");
                    725:     styles[HTML_H3] =          HTStyleNamed(styleSheet, "Heading3");
                    726:     styles[HTML_H4] =          HTStyleNamed(styleSheet, "Heading4");
                    727:     styles[HTML_H5] =          HTStyleNamed(styleSheet, "Heading5");
                    728:     styles[HTML_H6] =          HTStyleNamed(styleSheet, "Heading6");
                    729:     styles[HTML_H7] =          HTStyleNamed(styleSheet, "Heading7");
                    730: 
                    731:     styles[HTML_DL] =          HTStyleNamed(styleSheet, "Glossary");
                    732:     styles[HTML_UL] =
                    733:     styles[HTML_OL] =          HTStyleNamed(styleSheet, "List");
                    734:     styles[HTML_MENU] =                HTStyleNamed(styleSheet, "Menu");
                    735:     styles[HTML_DIR] =         HTStyleNamed(styleSheet, "Dir");    
                    736:     styles[HTML_DLC] =         HTStyleNamed(styleSheet, "GlossaryCompact");
                    737:     styles[HTML_ADDRESS]=      HTStyleNamed(styleSheet, "Address");
                    738:     styles[HTML_BLOCKQUOTE]=   HTStyleNamed(styleSheet, "BlockQuote");
                    739:     styles[HTML_PLAINTEXT] =
                    740:     styles[HTML_XMP] =         HTStyleNamed(styleSheet, "Example");
                    741:     styles[HTML_PRE] =         HTStyleNamed(styleSheet, "Preformatted");
                    742:     styles[HTML_LISTING] =     HTStyleNamed(styleSheet, "Listing");
                    743: }
                    744: /*                             P U B L I C
                    745: */
                    746: 
                    747: /*     Structured Object Class
                    748: **     -----------------------
                    749: */
                    750: PUBLIC CONST HTStructuredClass HTMLPresentation = /* As opposed to print etc */
                    751: {              
                    752:        "text/html",
                    753:        HTML_free,
1.14      timbl     754:        HTML_abort,
1.2       timbl     755:        HTML_put_character,     HTML_put_string,  HTML_write,
                    756:        HTML_start_element,     HTML_end_element,
                    757:        HTML_put_entity
                    758: }; 
1.1       timbl     759: 
1.4       timbl     760: 
1.2       timbl     761: /*             New Structured Text object
                    762: **             --------------------------
                    763: **
1.4       timbl     764: **     The strutcured stream can generate either presentation,
                    765: **     or plain text, or HTML.
1.1       timbl     766: */
1.4       timbl     767: PUBLIC HTStructured* HTML_new ARGS3(
1.2       timbl     768:        HTParentAnchor *,       anchor,
1.4       timbl     769:        HTFormat,               format_out,
1.2       timbl     770:        HTStream*,              stream)
1.1       timbl     771: {
                    772: 
1.4       timbl     773:     HTStructured * me;
                    774:     
                    775:     if (format_out != WWW_PLAINTEXT && format_out != WWW_PRESENT) {
1.6       timbl     776:         HTStream * intermediate = HTStreamStack(WWW_HTML, format_out,
                    777:                stream, anchor);
                    778:        if (intermediate) return HTMLGenerator(intermediate);
1.4       timbl     779:         fprintf(stderr, "** Internal error: can't parse HTML to %s\n",
                    780:                        HTAtom_name(format_out));
                    781:        exit (-99);
                    782:     }
                    783: 
                    784:     me = (HTStructured*) malloc(sizeof(*me));
                    785:     if (me == NULL) outofmem(__FILE__, "HTML_new");
1.1       timbl     786: 
                    787:     if (!got_styles) get_styles();
                    788: 
1.4       timbl     789:     me->isa = &HTMLPresentation;
                    790:     me->node_anchor =  anchor;
                    791:     me->title.size = 0;
                    792:     me->title.growby = 128;
                    793:     me->title.allocated = 0;
                    794:     me->title.data = 0;
                    795:     me->text = 0;
                    796:     me->style_change = YES; /* Force check leading to text creation */
                    797:     me->new_style = default_style;
                    798:     me->old_style = 0;
                    799:     me->sp = me->stack + MAX_NESTING - 1;
                    800:     me->sp->tag_number = -1;                           /* INVALID */
                    801:     me->sp->style = default_style;                     /* INVALID */
1.1       timbl     802:     
1.4       timbl     803:     me->comment_start = NULL;
                    804:     me->comment_end = NULL;
                    805:     me->target = stream;
                    806:     if (stream) me->targetClass = *stream->isa;        /* Copy pointers */
1.1       timbl     807:     
1.4       timbl     808:     return (HTStructured*) me;
1.1       timbl     809: }
                    810: 
                    811: 
1.2       timbl     812: /*     HTConverter for HTML to plain text
                    813: **     ----------------------------------
1.1       timbl     814: **
1.2       timbl     815: **     This will convert from HTML to presentation or plain text.
1.1       timbl     816: */
1.2       timbl     817: PUBLIC HTStream* HTMLToPlain ARGS3(
                    818:        HTPresentation *,       pres,
                    819:        HTParentAnchor *,       anchor, 
                    820:        HTStream *,             sink)
1.1       timbl     821: {
1.4       timbl     822:     return SGML_new(&HTML_dtd, HTML_new(anchor, pres->rep_out, sink));
1.1       timbl     823: }
                    824: 
                    825: 
1.2       timbl     826: /*     HTConverter for HTML to C code
                    827: **     ------------------------------
                    828: **
                    829: **     C copde is like plain text but all non-preformatted code
                    830: **     is commented out.
                    831: **     This will convert from HTML to presentation or plain text.
                    832: */
                    833: PUBLIC HTStream* HTMLToC ARGS3(
                    834:        HTPresentation *,       pres,
                    835:        HTParentAnchor *,       anchor, 
                    836:        HTStream *,             sink)
1.1       timbl     837: {
1.4       timbl     838:     
                    839:     HTStructured * html;
                    840:     
                    841:     (*sink->isa->put_string)(sink, "/* ");     /* Before even title */
                    842:     html = HTML_new(anchor, WWW_PLAINTEXT, sink);
1.2       timbl     843:     html->comment_start = "/* ";
                    844:     html->comment_end = " */\n";       /* Must start in col 1 for cpp */
1.4       timbl     845: /*    HTML_put_string(html,html->comment_start); */
1.2       timbl     846:     return SGML_new(&HTML_dtd, html);
1.1       timbl     847: }
                    848: 
                    849: 
1.2       timbl     850: /*     Presenter for HTML
                    851: **     ------------------
                    852: **
                    853: **     This will convert from HTML to presentation or plain text.
                    854: **
                    855: **     Override this if you have a windows version
1.1       timbl     856: */
1.2       timbl     857: #ifndef GUI
                    858: PUBLIC HTStream* HTMLPresent ARGS3(
                    859:        HTPresentation *,       pres,
                    860:        HTParentAnchor *,       anchor, 
                    861:        HTStream *,             sink)
1.1       timbl     862: {
1.14      timbl     863:     return SGML_new(&HTML_dtd, HTML_new(anchor, WWW_PRESENT, sink));
1.1       timbl     864: }
1.2       timbl     865: #endif
1.1       timbl     866: 
                    867: 
1.2       timbl     868: /*     Record error message as a hypertext object
                    869: **     ------------------------------------------
                    870: **
                    871: **     The error message should be marked as an error so that
                    872: **     it can be reloaded later.
                    873: **     This implementation just throws up an error message
                    874: **     and leaves the document unloaded.
1.9       timbl     875: **     A smarter implementation would load an error document,
                    876: **     marking at such so that it is retried on reload.
1.1       timbl     877: **
1.2       timbl     878: ** On entry,
                    879: **     sink    is a stream to the output device if any
                    880: **     number  is the HTTP error number
                    881: **     message is the human readable message.
1.9       timbl     882: **
                    883: ** On exit,
                    884: **     returns a negative number to indicate lack of success in the load.
1.1       timbl     885: */
1.2       timbl     886: 
                    887: PUBLIC int HTLoadError ARGS3(
                    888:        HTStream *,     sink,
                    889:        int,            number,
                    890:        CONST char *,   message)
                    891: {
                    892:     HTAlert(message);          /* @@@@@@@@@@@@@@@@@@@ */
                    893:     return -number;
                    894: } 
                    895: 

Webmaster