Annotation of libwww/Library/src/HTML.c, revision 1.44

1.39      frystyk     1: /*                                                                      HTML.c
                      2: **     STRUCTURED STREAM TO RICH HYPERTEXT CONVERTER
                      3: **
1.43      frystyk     4: **     (c) COPYRIGHT MIT 1995.
1.39      frystyk     5: **     Please first read the full copyright statement in the file COPYRIGH.
1.1       timbl       6: **
1.2       timbl       7: **     This generates of a hypertext object.  It converts from the
                      8: **     structured stream interface fro HTMl events into the style-
                      9: **     oriented iunterface of the HText.h interface.  This module is
                     10: **     only used in clients and shouldnot be linked into servers.
1.1       timbl      11: **
1.6       timbl      12: **     Override this module if making a new GUI browser.
1.1       timbl      13: **
1.35      duns       14: ** HISTORY:
                     15: **      8 Jul 94  FM   Insulate free() from _free structure element.
                     16: **
1.1       timbl      17: */
1.16      timbl      18: 
1.41      frystyk    19: /* Library include files */
                     20: #include "tcp.h"
                     21: #include "HTUtils.h"
                     22: #include "HTString.h"
1.1       timbl      23: #include "HTAtom.h"
                     24: #include "HTChunk.h"
                     25: #include "HText.h"
                     26: #include "HTStyle.h"
1.3       timbl      27: #include "HTAlert.h"
1.4       timbl      28: #include "HTMLGen.h"
1.8       timbl      29: #include "HTParse.h"
1.41      frystyk    30: #include "HTML.h"
1.1       timbl      31: 
                     32: extern HTStyleSheet * styleSheet;      /* Application-wide */
                     33: 
                     34: /*     Module-wide style cache
                     35: */
                     36: PRIVATE int            got_styles = 0;
1.16      timbl      37: PRIVATE HTStyle *styles[HTMLP_ELEMENTS];
1.2       timbl      38: PRIVATE HTStyle *default_style;
1.1       timbl      39: 
                     40: 
                     41: /*             HTML Object
                     42: **             -----------
                     43: */
1.2       timbl      44: #define MAX_NESTING 20         /* Should be checked by parser */
                     45: 
                     46: typedef struct _stack_element {
                     47:         HTStyle *      style;
                     48:        int             tag_number;
                     49: } stack_element;
                     50: 
                     51: struct _HTStructured {
                     52:     CONST HTStructuredClass *  isa;
                     53:     HTParentAnchor *           node_anchor;
                     54:     HText *                    text;
                     55: 
                     56:     HTStream*                  target;                 /* Output stream */
                     57:     HTStreamClass              targetClass;            /* Output routines */
                     58: 
                     59:     HTChunk                    title;          /* Grow by 128 */
                     60:     
                     61:     char *                     comment_start;  /* for literate programming */
                     62:     char *                     comment_end;
1.16      timbl      63:     
                     64:     CONST SGML_dtd*            dtd;
                     65:     
1.2       timbl      66:     HTTag *                    current_tag;
                     67:     BOOL                       style_change;
                     68:     HTStyle *                  new_style;
                     69:     HTStyle *                  old_style;
                     70:     BOOL                       in_word;  /* Have just had a non-white char */
1.44    ! frystyk    71: 
        !            72:     stack_element              stack[MAX_NESTING];
        !            73:     stack_element              *sp;                  /* Style stack pointer */
        !            74:     int                                overflow;  /* Keep track of overflow nesting */
1.1       timbl      75: };
                     76: 
1.2       timbl      77: struct _HTStream {
                     78:     CONST HTStreamClass *      isa;
                     79:     /* .... */
                     80: };
1.1       timbl      81: 
                     82: /*             Forward declarations of routines
                     83: */
                     84: PRIVATE void get_styles NOPARAMS;
                     85: 
                     86: 
1.4       timbl      87: PRIVATE void actually_set_style PARAMS((HTStructured * me));
1.11      timbl      88: PRIVATE void change_paragraph_style PARAMS((HTStructured * me, HTStyle * style));
1.1       timbl      89: 
                     90: /*     Style buffering avoids dummy paragraph begin/ends.
                     91: */
1.4       timbl      92: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1       timbl      93: 
                     94: 
1.2       timbl      95: #ifdef OLD_CODE
1.1       timbl      96: /* The following accented characters are from peter Flynn, curia project */
                     97: 
                     98: /* these ifdefs don't solve the problem of a simple terminal emulator
                     99: ** with a different character set to the client machine. But nothing does,
                    100: ** except looking at the TERM setting */
                    101: 
1.2       timbl     102: 
1.1       timbl     103:         { "ocus" , "&" },       /* for CURIA */
                    104: #ifdef IBMPC
                    105:         { "aacute" , "\240" }, /* For PC display */
                    106:         { "eacute" , "\202" },
                    107:         { "iacute" , "\241" },
                    108:         { "oacute" , "\242" },
                    109:         { "uacute" , "\243" },
                    110:         { "Aacute" , "\101" },
                    111:         { "Eacute" , "\220" },
                    112:         { "Iacute" , "\111" },
                    113:         { "Oacute" , "\117" },
                    114:         { "Uacute" , "\125" },
                    115: #else
                    116:         { "aacute" , "\341" }, /* Works for openwindows -- Peter Flynn */
                    117:         { "eacute" , "\351" },
                    118:         { "iacute" , "\355" },
                    119:         { "oacute" , "\363" },
                    120:         { "uacute" , "\372" },
                    121:         { "Aacute" , "\301" },
                    122:         { "Eacute" , "\310" },
                    123:         { "Iacute" , "\315" },
                    124:         { "Oacute" , "\323" },
                    125:         { "Uacute" , "\332" }, 
                    126: #endif
                    127:        { 0,    0 }  /* Terminate list */
                    128: };
1.2       timbl     129: #endif
1.1       timbl     130: 
                    131: 
1.2       timbl     132: /*     Entity values -- for ISO Latin 1 local representation
                    133: **
                    134: **     This MUST match exactly the table referred to in the DTD!
                    135: */
                    136: static char * ISO_Latin1[] = {
                    137:        "\306", /* capital AE diphthong (ligature) */ 
                    138:        "\301", /* capital A, acute accent */ 
                    139:        "\302", /* capital A, circumflex accent */ 
                    140:        "\300", /* capital A, grave accent */ 
                    141:        "\305", /* capital A, ring */ 
                    142:        "\303", /* capital A, tilde */ 
                    143:        "\304", /* capital A, dieresis or umlaut mark */ 
                    144:        "\307", /* capital C, cedilla */ 
                    145:        "\320", /* capital Eth, Icelandic */ 
                    146:        "\311", /* capital E, acute accent */ 
                    147:        "\312", /* capital E, circumflex accent */ 
                    148:        "\310", /* capital E, grave accent */ 
                    149:        "\313", /* capital E, dieresis or umlaut mark */ 
                    150:        "\315", /* capital I, acute accent */ 
                    151:        "\316", /* capital I, circumflex accent */ 
                    152:        "\314", /* capital I, grave accent */ 
                    153:        "\317", /* capital I, dieresis or umlaut mark */ 
                    154:        "\321", /* capital N, tilde */ 
                    155:        "\323", /* capital O, acute accent */ 
                    156:        "\324", /* capital O, circumflex accent */ 
                    157:        "\322", /* capital O, grave accent */ 
                    158:        "\330", /* capital O, slash */ 
                    159:        "\325", /* capital O, tilde */ 
                    160:        "\326", /* capital O, dieresis or umlaut mark */ 
                    161:        "\336", /* capital THORN, Icelandic */ 
                    162:        "\332", /* capital U, acute accent */ 
                    163:        "\333", /* capital U, circumflex accent */ 
                    164:        "\331", /* capital U, grave accent */ 
                    165:        "\334", /* capital U, dieresis or umlaut mark */ 
                    166:        "\335", /* capital Y, acute accent */ 
                    167:        "\341", /* small a, acute accent */ 
                    168:        "\342", /* small a, circumflex accent */ 
                    169:        "\346", /* small ae diphthong (ligature) */ 
                    170:        "\340", /* small a, grave accent */ 
                    171:        "\046", /* ampersand */ 
                    172:        "\345", /* small a, ring */ 
                    173:        "\343", /* small a, tilde */ 
                    174:        "\344", /* small a, dieresis or umlaut mark */ 
                    175:        "\347", /* small c, cedilla */ 
                    176:        "\351", /* small e, acute accent */ 
                    177:        "\352", /* small e, circumflex accent */ 
                    178:        "\350", /* small e, grave accent */ 
                    179:        "\360", /* small eth, Icelandic */ 
                    180:        "\353", /* small e, dieresis or umlaut mark */ 
                    181:        "\076", /* greater than */ 
                    182:        "\355", /* small i, acute accent */ 
                    183:        "\356", /* small i, circumflex accent */ 
                    184:        "\354", /* small i, grave accent */ 
                    185:        "\357", /* small i, dieresis or umlaut mark */ 
                    186:        "\074", /* less than */ 
                    187:        "\361", /* small n, tilde */ 
                    188:        "\363", /* small o, acute accent */ 
                    189:        "\364", /* small o, circumflex accent */ 
                    190:        "\362", /* small o, grave accent */ 
                    191:        "\370", /* small o, slash */ 
                    192:        "\365", /* small o, tilde */ 
                    193:        "\366", /* small o, dieresis or umlaut mark */ 
1.36      frystyk   194:         "\042", /* double quote sign - June 94 */
1.2       timbl     195:        "\337", /* small sharp s, German (sz ligature) */ 
                    196:        "\376", /* small thorn, Icelandic */ 
                    197:        "\372", /* small u, acute accent */ 
                    198:        "\373", /* small u, circumflex accent */ 
                    199:        "\371", /* small u, grave accent */ 
                    200:        "\374", /* small u, dieresis or umlaut mark */ 
                    201:        "\375", /* small y, acute accent */ 
                    202:        "\377", /* small y, dieresis or umlaut mark */ 
1.1       timbl     203: };
                    204: 
1.2       timbl     205: 
                    206: /*     Entity values -- for NeXT local representation
                    207: **
                    208: **     This MUST match exactly the table referred to in the DTD!
                    209: **
                    210: */
                    211: static char * NeXTCharacters[] = {
                    212:        "\341", /* capital AE diphthong (ligature)      */ 
                    213:        "\202", /* capital A, acute accent              */ 
                    214:        "\203", /* capital A, circumflex accent         */ 
                    215:        "\201", /* capital A, grave accent              */ 
                    216:        "\206", /* capital A, ring                      */ 
                    217:        "\204", /* capital A, tilde                     */ 
                    218:        "\205", /* capital A, dieresis or umlaut mark   */ 
                    219:        "\207", /* capital C, cedilla                   */ 
                    220:        "\220", /* capital Eth, Icelandic               */ 
                    221:        "\211", /* capital E, acute accent                              */ 
                    222:        "\212", /* capital E, circumflex accent                         */ 
                    223:        "\210", /* capital E, grave accent                              */ 
                    224:        "\213", /* capital E, dieresis or umlaut mark                   */ 
                    225:        "\215", /* capital I, acute accent                              */ 
                    226:        "\216", /* capital I, circumflex accent         these are       */ 
                    227:        "\214", /* capital I, grave accent              ISO -100 hex    */ 
                    228:        "\217", /* capital I, dieresis or umlaut mark                   */ 
                    229:        "\221", /* capital N, tilde                                     */ 
                    230:        "\223", /* capital O, acute accent                              */ 
                    231:        "\224", /* capital O, circumflex accent                         */ 
                    232:        "\222", /* capital O, grave accent                              */ 
                    233:        "\351", /* capital O, slash             'cept this */ 
                    234:        "\225", /* capital O, tilde                                     */ 
                    235:        "\226", /* capital O, dieresis or umlaut mark                   */ 
                    236:        "\234", /* capital THORN, Icelandic */ 
                    237:        "\230", /* capital U, acute accent */ 
                    238:        "\231", /* capital U, circumflex accent */ 
                    239:        "\227", /* capital U, grave accent */ 
                    240:        "\232", /* capital U, dieresis or umlaut mark */ 
                    241:        "\233", /* capital Y, acute accent */ 
                    242:        "\326", /* small a, acute accent */ 
                    243:        "\327", /* small a, circumflex accent */ 
                    244:        "\361", /* small ae diphthong (ligature) */ 
                    245:        "\325", /* small a, grave accent */ 
                    246:        "\046", /* ampersand */ 
                    247:        "\332", /* small a, ring */ 
                    248:        "\330", /* small a, tilde */ 
                    249:        "\331", /* small a, dieresis or umlaut mark */ 
                    250:        "\333", /* small c, cedilla */ 
                    251:        "\335", /* small e, acute accent */ 
                    252:        "\336", /* small e, circumflex accent */ 
                    253:        "\334", /* small e, grave accent */ 
                    254:        "\346", /* small eth, Icelandic         */ 
                    255:        "\337", /* small e, dieresis or umlaut mark */ 
                    256:        "\076", /* greater than */ 
                    257:        "\342", /* small i, acute accent */ 
                    258:        "\344", /* small i, circumflex accent */ 
                    259:        "\340", /* small i, grave accent */ 
                    260:        "\345", /* small i, dieresis or umlaut mark */ 
                    261:        "\074", /* less than */ 
                    262:        "\347", /* small n, tilde */ 
                    263:        "\355", /* small o, acute accent */ 
                    264:        "\356", /* small o, circumflex accent */ 
                    265:        "\354", /* small o, grave accent */ 
                    266:        "\371", /* small o, slash */ 
                    267:        "\357", /* small o, tilde */ 
                    268:        "\360", /* small o, dieresis or umlaut mark */ 
1.36      frystyk   269:         "\042", /* double quote sign - June 94 */
1.2       timbl     270:        "\373", /* small sharp s, German (sz ligature) */ 
                    271:        "\374", /* small thorn, Icelandic */ 
                    272:        "\363", /* small u, acute accent */ 
                    273:        "\364", /* small u, circumflex accent */ 
                    274:        "\362", /* small u, grave accent */ 
                    275:        "\366", /* small u, dieresis or umlaut mark */ 
                    276:        "\367", /* small y, acute accent */ 
                    277:        "\375", /* small y, dieresis or umlaut mark */ 
1.1       timbl     278: };
                    279: 
1.2       timbl     280: /*     Entity values -- for IBM/PC Code Page 850 (International)
                    281: **
                    282: **     This MUST match exactly the table referred to in the DTD!
                    283: **
                    284: */
                    285: /* @@@@@@@@@@@@@@@@@ TBD */
                    286: 
                    287: 
                    288: 
                    289: /*             Set character set
                    290: **             ----------------
                    291: */
                    292: 
                    293: PRIVATE char** p_entity_values = ISO_Latin1;   /* Pointer to translation */
1.1       timbl     294: 
1.2       timbl     295: PUBLIC void HTMLUseCharacterSet ARGS1(HTMLCharacterSet, i)
                    296: {
                    297:     p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
                    298:                                             : ISO_Latin1;
                    299: }
1.1       timbl     300: 
                    301: 
                    302: /*             Flattening the style structure
                    303: **             ------------------------------
                    304: **
                    305: On the NeXT, and on any read-only browser, it is simpler for the text to have
                    306: a sequence of styles, rather than a nested tree of styles. In this
                    307: case we have to flatten the structure as it arrives from SGML tags into
                    308: a sequence of styles.
                    309: */
                    310: 
                    311: /*             If style really needs to be set, call this
                    312: */
1.4       timbl     313: PRIVATE void actually_set_style ARGS1(HTStructured *, me)
1.1       timbl     314: {
1.4       timbl     315:     if (!me->text) {                   /* First time through */
                    316:            me->text = HText_new2(me->node_anchor, me->target);
                    317:            HText_beginAppend(me->text);
                    318:            HText_setStyle(me->text, me->new_style);
                    319:            me->in_word = NO;
1.1       timbl     320:     } else {
1.4       timbl     321:            HText_setStyle(me->text, me->new_style);
1.1       timbl     322:     }
1.4       timbl     323:     me->old_style = me->new_style;
                    324:     me->style_change = NO;
1.1       timbl     325: }
                    326: 
                    327: /*      If you THINK you need to change style, call this
                    328: */
                    329: 
1.11      timbl     330: PRIVATE void change_paragraph_style ARGS2(HTStructured *, me, HTStyle *,style)
1.1       timbl     331: {
1.4       timbl     332:     if (me->new_style!=style) {
                    333:        me->style_change = YES;
                    334:        me->new_style = style;
1.1       timbl     335:     }
1.11      timbl     336:     me->in_word = NO;
1.1       timbl     337: }
                    338: 
1.2       timbl     339: /*_________________________________________________________________________
                    340: **
                    341: **                     A C T I O N     R O U T I N E S
                    342: */
                    343: 
                    344: /*     Character handling
                    345: **     ------------------
1.1       timbl     346: */
1.42      frystyk   347: PRIVATE int HTML_put_character ARGS2(HTStructured *, me, char, c)
1.1       timbl     348: {
1.2       timbl     349: 
1.4       timbl     350:     switch (me->sp[0].tag_number) {
1.2       timbl     351:     case HTML_COMMENT:
                    352:        break;                                  /* Do Nothing */
                    353:        
                    354:     case HTML_TITLE:   
1.4       timbl     355:        HTChunkPutc(&me->title, c);
1.2       timbl     356:        break;
                    357: 
                    358:        
                    359:     case HTML_LISTING:                         /* Litteral text */
                    360:     case HTML_XMP:
                    361:     case HTML_PLAINTEXT:
                    362:     case HTML_PRE:
                    363: /*     We guarrantee that the style is up-to-date in begin_litteral
                    364: */
1.4       timbl     365:        HText_appendCharacter(me->text, c);
1.2       timbl     366:        break;
                    367:        
                    368:     default:                                   /* Free format text */
1.4       timbl     369:        if (me->style_change) {
1.42      frystyk   370:            if ((c=='\n') || (c==' ')) return HT_OK;    /* Ignore it */
1.2       timbl     371:            UPDATE_STYLE;
                    372:        }
                    373:        if (c=='\n') {
1.4       timbl     374:            if (me->in_word) {
                    375:                HText_appendCharacter(me->text, ' ');
                    376:                me->in_word = NO;
1.2       timbl     377:            }
                    378:        } else {
1.4       timbl     379:            HText_appendCharacter(me->text, c);
                    380:            me->in_word = YES;
1.2       timbl     381:        }
                    382:     } /* end switch */
1.42      frystyk   383:     return HT_OK;
1.1       timbl     384: }
                    385: 
1.2       timbl     386: 
                    387: 
                    388: /*     String handling
                    389: **     ---------------
                    390: **
                    391: **     This is written separately from put_character becuase the loop can
1.11      timbl     392: **     in some cases be promoted to a higher function call level for speed.
1.2       timbl     393: */
1.42      frystyk   394: PRIVATE int HTML_put_string ARGS2(HTStructured *, me, CONST char*, s)
1.1       timbl     395: {
1.2       timbl     396: 
1.4       timbl     397:     switch (me->sp[0].tag_number) {
1.2       timbl     398:     case HTML_COMMENT:
                    399:        break;                                  /* Do Nothing */
                    400:        
                    401:     case HTML_TITLE:   
1.4       timbl     402:        HTChunkPuts(&me->title, s);
1.2       timbl     403:        break;
                    404: 
                    405:        
                    406:     case HTML_LISTING:                         /* Litteral text */
                    407:     case HTML_XMP:
                    408:     case HTML_PLAINTEXT:
                    409:     case HTML_PRE:
                    410: 
                    411: /*     We guarrantee that the style is up-to-date in begin_litteral
                    412: */
1.4       timbl     413:        HText_appendText(me->text, s);
1.2       timbl     414:        break;
                    415:        
                    416:     default:                                   /* Free format text */
                    417:         {
                    418:            CONST char *p = s;
1.4       timbl     419:            if (me->style_change) {
1.2       timbl     420:                for (; *p && ((*p=='\n') || (*p==' ')); p++)  ;  /* Ignore leaders */
1.42      frystyk   421:                if (!*p) return HT_OK;
1.2       timbl     422:                UPDATE_STYLE;
                    423:            }
                    424:            for(; *p; p++) {
1.4       timbl     425:                if (me->style_change) {
1.2       timbl     426:                    if ((*p=='\n') || (*p==' ')) continue;  /* Ignore it */
                    427:                    UPDATE_STYLE;
                    428:                }
                    429:                if (*p=='\n') {
1.4       timbl     430:                    if (me->in_word) {
                    431:                        HText_appendCharacter(me->text, ' ');
                    432:                        me->in_word = NO;
1.2       timbl     433:                    }
                    434:                } else {
1.4       timbl     435:                    HText_appendCharacter(me->text, *p);
                    436:                    me->in_word = YES;
1.2       timbl     437:                }
                    438:            } /* for */
                    439:        }
                    440:     } /* end switch */
1.42      frystyk   441:     return HT_OK;
1.1       timbl     442: }
                    443: 
                    444: 
1.2       timbl     445: /*     Buffer write
1.3       timbl     446: **     ------------
1.1       timbl     447: */
1.42      frystyk   448: PRIVATE int HTML_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
1.1       timbl     449: {
1.38      frystyk   450:     while (l-- > 0)
                    451:        HTML_put_character(me, *s++);
1.42      frystyk   452:     return HT_OK;
1.1       timbl     453: }
1.2       timbl     454: 
                    455: 
                    456: /*     Start Element
                    457: **     -------------
                    458: */
                    459: PRIVATE void HTML_start_element ARGS4(
1.4       timbl     460:        HTStructured *,         me,
1.16      timbl     461:        int,                    element_number,
1.3       timbl     462:        CONST BOOL*,            present,
1.16      timbl     463:        CONST char **,          value)
1.2       timbl     464: {
                    465:     switch (element_number) {
                    466:     case HTML_A:
                    467:        {
1.8       timbl     468:            HTChildAnchor * source;
1.9       timbl     469:            char * href = NULL;
1.42      frystyk   470:            if (present[HTML_A_HREF])
1.9       timbl     471:                StrAllocCopy(href, value[HTML_A_HREF]);
1.8       timbl     472:            source = HTAnchor_findChildAndLink(
1.4       timbl     473:                me->node_anchor,                                /* parent */
1.2       timbl     474:                present[HTML_A_NAME] ? value[HTML_A_NAME] : 0,  /* Tag */
1.9       timbl     475:                present[HTML_A_HREF] ? href : 0,                /* Addresss */
1.16      timbl     476:                present[HTML_A_REL] && value[HTML_A_REL] ? 
                    477:                        (HTLinkType*)HTAtom_for(value[HTML_A_REL])
1.2       timbl     478:                                                : 0);
                    479:            
                    480:            if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
                    481:                HTParentAnchor * dest = 
                    482:                    HTAnchor_parent(
                    483:                        HTAnchor_followMainLink((HTAnchor*)source)
                    484:                                    );
                    485:                if (!HTAnchor_title(dest))
                    486:                        HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
                    487:            }
                    488:            UPDATE_STYLE;
1.4       timbl     489:            HText_beginAnchor(me->text, source);
1.42      frystyk   490:            FREE(href);                          /* Leak fix Henrik 17/02-94 */
1.2       timbl     491:        }
                    492:        break;
                    493:        
                    494:     case HTML_TITLE:
1.4       timbl     495:         HTChunkClear(&me->title);
1.2       timbl     496:        break;
                    497:        
                    498:     case HTML_NEXTID:
                    499:        /* if (present[NEXTID_N] && value[NEXTID_N])
1.4       timbl     500:                HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2       timbl     501:        break;
                    502:        
                    503:     case HTML_ISINDEX:
1.4       timbl     504:        HTAnchor_setIndex(me->node_anchor);
1.2       timbl     505:        break;
                    506:        
1.15      timbl     507:     case HTML_BR: 
                    508:        UPDATE_STYLE;
                    509:        HText_appendCharacter(me->text, '\n');
                    510:        me->in_word = NO;
                    511:        break;
                    512:        
                    513:     case HTML_HR: 
                    514:        UPDATE_STYLE;
                    515:        HText_appendCharacter(me->text, '\n');
1.16      timbl     516:        HText_appendText(me->text, "___________________________________");
1.15      timbl     517:        HText_appendCharacter(me->text, '\n');
                    518:        me->in_word = NO;
                    519:        break;
                    520:        
1.2       timbl     521:     case HTML_P:
                    522:        UPDATE_STYLE;
1.4       timbl     523:        HText_appendParagraph(me->text);
                    524:        me->in_word = NO;
1.2       timbl     525:        break;
                    526: 
                    527:     case HTML_DL:
1.11      timbl     528:         change_paragraph_style(me, present && present[DL_COMPACT]
1.16      timbl     529:                ? styles[HTML_DL]
1.2       timbl     530:                : styles[HTML_DL]);
                    531:        break;
                    532:        
                    533:     case HTML_DT:
1.4       timbl     534:         if (!me->style_change) {
                    535:            HText_appendParagraph(me->text);
                    536:            me->in_word = NO;
1.2       timbl     537:        }
                    538:        break;
                    539:        
                    540:     case HTML_DD:
                    541:         UPDATE_STYLE;
1.4       timbl     542:        HTML_put_character(me, '\t');   /* Just tab out one stop */
                    543:        me->in_word = NO;
                    544:        break;
1.2       timbl     545: 
                    546:     case HTML_UL:
                    547:     case HTML_OL:
                    548:     case HTML_MENU:
                    549:     case HTML_DIR:
1.11      timbl     550:        change_paragraph_style(me, styles[element_number]);
1.2       timbl     551:        break;
                    552:        
                    553:     case HTML_LI:
                    554:         UPDATE_STYLE;
1.7       timbl     555:        if (me->sp[0].tag_number != HTML_DIR)
1.4       timbl     556:            HText_appendParagraph(me->text);
1.2       timbl     557:        else
1.4       timbl     558:            HText_appendCharacter(me->text, '\t');      /* Tab @@ nl for UL? */
                    559:        me->in_word = NO;
1.2       timbl     560:        break;
                    561:        
                    562:     case HTML_LISTING:                         /* Litteral text */
                    563:     case HTML_XMP:
                    564:     case HTML_PLAINTEXT:
                    565:     case HTML_PRE:
1.11      timbl     566:        change_paragraph_style(me, styles[element_number]);
1.2       timbl     567:        UPDATE_STYLE;
1.4       timbl     568:        if (me->comment_end)
                    569:            HText_appendText(me->text, me->comment_end);
1.2       timbl     570:        break;
1.11      timbl     571: 
1.23      frystyk   572:     case HTML_IMG:                     /* Images */
                    573:        {
                    574:            HTChildAnchor *source;
                    575:            char *src = NULL;
                    576:            if (present[HTML_IMG_SRC]) {
                    577:                StrAllocCopy(src, value[HTML_IMG_SRC]);
1.36      frystyk   578: #ifdef OLD_CODE
1.23      frystyk   579:                HTSimplify(src);
1.36      frystyk   580: #endif
1.23      frystyk   581:            }
                    582:            source = HTAnchor_findChildAndLink(
                    583:                                               me->node_anchor,    /* parent */
                    584:                                               0,                     /* Tag */
                    585:                                               src ? src : 0,    /* Addresss */
                    586:                                               0);
                    587:            UPDATE_STYLE;
                    588:            HText_appendImage(me->text, source,
1.24      frystyk   589:                      present[HTML_IMG_ALT] ? value[HTML_IMG_ALT] : NULL,
                    590:                      present[HTML_IMG_ALIGN] ? value[HTML_IMG_ALIGN] : NULL,
                    591:                      present[HTML_IMG_ISMAP] ? YES : NO);
1.23      frystyk   592:            free(src);
1.24      frystyk   593:        }       
                    594:        break;
                    595: 
                    596:     case HTML_HTML:                    /* Ignore these altogether */
                    597:     case HTML_HEAD:
                    598:     case HTML_BODY:
                    599:     
1.10      timbl     600:     case HTML_TT:                      /* Physical character highlighting */
                    601:     case HTML_B:                       /* Currently ignored */
                    602:     case HTML_I:
                    603:     case HTML_U:
                    604:     
                    605:     case HTML_EM:                      /* Logical character highlighting */
                    606:     case HTML_STRONG:                  /* Currently ignored */
                    607:     case HTML_CODE:
                    608:     case HTML_SAMP:
                    609:     case HTML_KBD:
                    610:     case HTML_VAR:
                    611:     case HTML_DFN:
                    612:     case HTML_CITE:
                    613:        break;
                    614:        
1.11      timbl     615:     case HTML_H1:                      /* paragraph styles */
                    616:     case HTML_H2:
                    617:     case HTML_H3:
                    618:     case HTML_H4:
                    619:     case HTML_H5:
                    620:     case HTML_H6:
                    621:     case HTML_H7:
                    622:     case HTML_ADDRESS:
                    623:     case HTML_BLOCKQUOTE:
                    624:        change_paragraph_style(me, styles[element_number]);     /* May be postponed */
1.2       timbl     625:        break;
                    626: 
                    627:     } /* end switch */
                    628: 
1.16      timbl     629:     if (me->dtd->tags[element_number].contents!= SGML_EMPTY) {
1.13      timbl     630:         if (me->sp == me->stack) {
1.44    ! frystyk   631:            if (SGML_TRACE)
        !           632:                fprintf(TDEST, "HTML........ Maximum nesting of %d exceded!\n",
        !           633:                        MAX_NESTING); 
        !           634:            me->overflow++;
1.12      timbl     635:            return;
                    636:        }
1.4       timbl     637:        --(me->sp);
                    638:        me->sp[0].style = me->new_style;        /* Stack new style */
                    639:        me->sp[0].tag_number = element_number;
1.10      timbl     640:     }  
1.1       timbl     641: }
1.10      timbl     642: 
1.2       timbl     643: 
1.1       timbl     644: /*             End Element
1.2       timbl     645: **             -----------
1.1       timbl     646: **
1.2       timbl     647: */
                    648: /*     When we end an element, the style must be returned to that
1.1       timbl     649: **     in effect before that element.  Note that anchors (etc?)
                    650: **     don't have an associated style, so that we must scan down the
                    651: **     stack for an element with a defined style. (In fact, the styles
                    652: **     should be linked to the whole stack not just the top one.)
                    653: **     TBL 921119
1.6       timbl     654: **
                    655: **     We don't turn on "CAREFUL" check because the parser produces
                    656: **     (internal code errors apart) good nesting. The parser checks
                    657: **     incoming code errors, not this module.
1.1       timbl     658: */
1.4       timbl     659: PRIVATE void HTML_end_element ARGS2(HTStructured *, me, int , element_number)
1.1       timbl     660: {
1.2       timbl     661: #ifdef CAREFUL                 /* parser assumed to produce good nesting */
1.4       timbl     662:     if (element_number != me->sp[0].tag_number) {
1.41      frystyk   663:         fprintf(TDEST, "HTMLText: end of element %s when expecting end of %s\n",
1.16      timbl     664:                me->dtd->tags[element_number].name,
                    665:                me->dtd->tags[me->sp->tag_number].name);
1.6       timbl     666:                /* panic */
1.1       timbl     667:     }
1.2       timbl     668: #endif
1.44    ! frystyk   669: 
        !           670:     /* HFN, If overflow of nestings, we need to get back to reality */
        !           671:     if (me->overflow > 0) {
        !           672:        me->overflow--;
        !           673:        return;
        !           674:     }
        !           675: 
1.4       timbl     676:     me->sp++;                          /* Pop state off stack */
1.44    ! frystyk   677: 
1.2       timbl     678:     switch(element_number) {
                    679: 
                    680:     case HTML_A:
                    681:        UPDATE_STYLE;
1.4       timbl     682:        HText_endAnchor(me->text);
1.2       timbl     683:        break;
                    684: 
                    685:     case HTML_TITLE:
1.4       timbl     686:         HTChunkTerminate(&me->title);
                    687:        HTAnchor_setTitle(me->node_anchor, me->title.data);
1.2       timbl     688:        break;
                    689:        
                    690:     case HTML_LISTING:                         /* Litteral text */
                    691:     case HTML_XMP:
                    692:     case HTML_PLAINTEXT:
                    693:     case HTML_PRE:
1.4       timbl     694:        if (me->comment_start)
                    695:            HText_appendText(me->text, me->comment_start);
1.2       timbl     696:        /* Fall through */
                    697:        
                    698:     default:
1.44    ! frystyk   699: 
        !           700:        /* Often won't really change */
        !           701:        change_paragraph_style(me, me->sp->style);
1.2       timbl     702:        break;
                    703:        
                    704:     } /* switch */
1.1       timbl     705: }
                    706: 
1.2       timbl     707: 
                    708: /*             Expanding entities
                    709: **             ------------------
                    710: */
                    711: /*     (In fact, they all shrink!)
1.1       timbl     712: */
1.2       timbl     713: 
1.4       timbl     714: PRIVATE void HTML_put_entity ARGS2(HTStructured *, me, int, entity_number)
1.1       timbl     715: {
1.4       timbl     716:     HTML_put_string(me, ISO_Latin1[entity_number]);    /* @@ Other representations */
1.1       timbl     717: }
1.2       timbl     718: 
1.42      frystyk   719: /*     Flush an HTML object
                    720: **     --------------------
                    721: */
                    722: PUBLIC int HTML_flush ARGS1(HTStructured *, me)
                    723: {
                    724:     UPDATE_STYLE;                           /* Creates empty document here! */
                    725:     if (me->comment_end)
                    726:                HTML_put_string(me,me->comment_end);
                    727:     HText_endAppend(me->text);
                    728:     return (*me->targetClass.flush)(me->target);
                    729: }
1.2       timbl     730: 
                    731: /*     Free an HTML object
                    732: **     -------------------
                    733: **
1.4       timbl     734: ** If the document is empty, the text object will not yet exist.
                    735:    So we could in fact abandon creating the document and return
                    736:    an error code.  In fact an empty document is an important type
                    737:    of document, so we don't.
                    738: **
                    739: **     If non-interactive, everything is freed off.   No: crashes -listrefs
1.2       timbl     740: **     Otherwise, the interactive object is left.      
                    741: */
1.37      frystyk   742: PUBLIC int HTML_free ARGS1(HTStructured *, me)
1.1       timbl     743: {
1.4       timbl     744:     UPDATE_STYLE;              /* Creates empty document here! */
                    745:     if (me->comment_end)
                    746:                HTML_put_string(me,me->comment_end);
                    747:     HText_endAppend(me->text);
                    748: 
                    749:     if (me->target) {
1.35      duns      750:         (*me->targetClass._free)(me->target);
1.2       timbl     751:     }
1.19      frystyk   752:     HTChunkClear(&me->title);  /* Henrik 18/02-94 */
1.4       timbl     753:     free(me);
1.42      frystyk   754:     return HT_OK;
1.1       timbl     755: }
                    756: 
                    757: 
1.37      frystyk   758: PRIVATE int HTML_abort ARGS2(HTStructured *, me, HTError, e)
1.1       timbl     759: 
1.14      timbl     760: {
                    761:     if (me->target) {
                    762:         (*me->targetClass.abort)(me->target, e);
                    763:     }
1.19      frystyk   764:     HTChunkClear(&me->title);  /* Henrik 18/02-94 */
1.14      timbl     765:     free(me);
1.42      frystyk   766:     return HT_ERROR;
1.1       timbl     767: }
                    768: 
1.2       timbl     769: 
                    770: /*     Get Styles from style sheet
                    771: **     ---------------------------
                    772: */
                    773: PRIVATE void get_styles NOARGS
1.1       timbl     774: {
1.2       timbl     775:     got_styles = YES;
                    776:     
                    777:     default_style =            HTStyleNamed(styleSheet, "Normal");
1.1       timbl     778: 
1.2       timbl     779:     styles[HTML_H1] =          HTStyleNamed(styleSheet, "Heading1");
                    780:     styles[HTML_H2] =          HTStyleNamed(styleSheet, "Heading2");
                    781:     styles[HTML_H3] =          HTStyleNamed(styleSheet, "Heading3");
                    782:     styles[HTML_H4] =          HTStyleNamed(styleSheet, "Heading4");
                    783:     styles[HTML_H5] =          HTStyleNamed(styleSheet, "Heading5");
                    784:     styles[HTML_H6] =          HTStyleNamed(styleSheet, "Heading6");
                    785:     styles[HTML_H7] =          HTStyleNamed(styleSheet, "Heading7");
                    786: 
                    787:     styles[HTML_DL] =          HTStyleNamed(styleSheet, "Glossary");
                    788:     styles[HTML_UL] =
                    789:     styles[HTML_OL] =          HTStyleNamed(styleSheet, "List");
                    790:     styles[HTML_MENU] =                HTStyleNamed(styleSheet, "Menu");
                    791:     styles[HTML_DIR] =         HTStyleNamed(styleSheet, "Dir");    
1.16      timbl     792: /*  styles[HTML_DLC] =         HTStyleNamed(styleSheet, "GlossaryCompact"); */
1.2       timbl     793:     styles[HTML_ADDRESS]=      HTStyleNamed(styleSheet, "Address");
                    794:     styles[HTML_BLOCKQUOTE]=   HTStyleNamed(styleSheet, "BlockQuote");
                    795:     styles[HTML_PLAINTEXT] =
                    796:     styles[HTML_XMP] =         HTStyleNamed(styleSheet, "Example");
                    797:     styles[HTML_PRE] =         HTStyleNamed(styleSheet, "Preformatted");
                    798:     styles[HTML_LISTING] =     HTStyleNamed(styleSheet, "Listing");
                    799: }
                    800: /*                             P U B L I C
                    801: */
                    802: 
                    803: /*     Structured Object Class
                    804: **     -----------------------
                    805: */
                    806: PUBLIC CONST HTStructuredClass HTMLPresentation = /* As opposed to print etc */
                    807: {              
                    808:        "text/html",
1.42      frystyk   809:        HTML_flush,
1.2       timbl     810:        HTML_free,
1.14      timbl     811:        HTML_abort,
1.2       timbl     812:        HTML_put_character,     HTML_put_string,  HTML_write,
                    813:        HTML_start_element,     HTML_end_element,
                    814:        HTML_put_entity
                    815: }; 
1.1       timbl     816: 
1.4       timbl     817: 
1.2       timbl     818: /*             New Structured Text object
                    819: **             --------------------------
                    820: **
1.16      timbl     821: **     The structured stream can generate either presentation,
1.4       timbl     822: **     or plain text, or HTML.
1.1       timbl     823: */
1.16      timbl     824: PUBLIC HTStructured* HTML_new ARGS5(
                    825:        HTRequest *,            request,
                    826:        void *,                 param,
                    827:        HTFormat,               input_format,
                    828:        HTFormat,               output_format,
                    829:        HTStream *,             output_stream)
1.1       timbl     830: {
                    831: 
1.4       timbl     832:     HTStructured * me;
                    833:     
1.16      timbl     834:     if (output_format != WWW_PLAINTEXT
                    835:        && output_format != WWW_PRESENT
                    836:        && output_format != HTAtom_for("text/x-c")) {
1.37      frystyk   837:         HTStream * intermediate = HTStreamStack(WWW_HTML, output_format,
                    838:                                                output_stream, request, NO);
1.6       timbl     839:        if (intermediate) return HTMLGenerator(intermediate);
1.44    ! frystyk   840:        if (SGML_TRACE)
        !           841:            fprintf(TDEST, "HTML........ Can't parse HTML to %s\n",
        !           842:                    HTAtom_name(output_format));
1.4       timbl     843:        exit (-99);
                    844:     }
                    845: 
1.44    ! frystyk   846:     if ((me = (HTStructured*) calloc(1, sizeof(*me))) == NULL)
        !           847:        outofmem(__FILE__, "HTML_new");
1.1       timbl     848: 
                    849:     if (!got_styles) get_styles();
                    850: 
1.4       timbl     851:     me->isa = &HTMLPresentation;
1.16      timbl     852:     me->dtd = &DTD;
                    853:     me->node_anchor =  request->anchor;
1.4       timbl     854:     me->title.size = 0;
                    855:     me->title.growby = 128;
                    856:     me->title.allocated = 0;
                    857:     me->title.data = 0;
                    858:     me->text = 0;
                    859:     me->style_change = YES; /* Force check leading to text creation */
                    860:     me->new_style = default_style;
                    861:     me->old_style = 0;
                    862:     me->sp = me->stack + MAX_NESTING - 1;
                    863:     me->sp->tag_number = -1;                           /* INVALID */
                    864:     me->sp->style = default_style;                     /* INVALID */
1.1       timbl     865:     
1.4       timbl     866:     me->comment_start = NULL;
                    867:     me->comment_end = NULL;
1.16      timbl     868:     me->target = output_stream;
                    869:     if (output_stream) me->targetClass = *output_stream->isa;  /* Copy pointers */
1.1       timbl     870:     
1.4       timbl     871:     return (HTStructured*) me;
1.1       timbl     872: }
                    873: 
                    874: 
1.2       timbl     875: /*     HTConverter for HTML to plain text
                    876: **     ----------------------------------
1.1       timbl     877: **
1.2       timbl     878: **     This will convert from HTML to presentation or plain text.
1.1       timbl     879: */
1.16      timbl     880: PUBLIC HTStream* HTMLToPlain ARGS5(
                    881:        HTRequest *,            request,
                    882:        void *,                 param,
                    883:        HTFormat,               input_format,
                    884:        HTFormat,               output_format,
                    885:        HTStream *,             output_stream)
1.1       timbl     886: {
1.16      timbl     887:     return SGML_new(&DTD, HTML_new(
                    888:        request, NULL, input_format, output_format, output_stream));
1.1       timbl     889: }
                    890: 
                    891: 
1.2       timbl     892: /*     HTConverter for HTML to C code
                    893: **     ------------------------------
                    894: **
1.36      frystyk   895: **     C code is like plain text but all non-preformatted code
1.2       timbl     896: **     is commented out.
                    897: **     This will convert from HTML to presentation or plain text.
                    898: */
1.16      timbl     899: PUBLIC HTStream* HTMLToC ARGS5(
                    900:        HTRequest *,            request,
                    901:        void *,                 param,
                    902:        HTFormat,               input_format,
                    903:        HTFormat,               output_format,
                    904:        HTStream *,             output_stream)
1.1       timbl     905: {
1.4       timbl     906:     
                    907:     HTStructured * html;
                    908:     
1.36      frystyk   909:     (*output_stream->isa->put_string)(output_stream, "/* "); /* Before title */
1.16      timbl     910:     html = HTML_new(request, NULL, input_format, output_format, output_stream);
1.2       timbl     911:     html->comment_start = "/* ";
1.16      timbl     912:     html->dtd = &DTD;
1.2       timbl     913:     html->comment_end = " */\n";       /* Must start in col 1 for cpp */
1.16      timbl     914:     return SGML_new(&DTD, html);
1.1       timbl     915: }
                    916: 
                    917: 
1.2       timbl     918: /*     Presenter for HTML
                    919: **     ------------------
                    920: **
                    921: **     This will convert from HTML to presentation or plain text.
                    922: **
                    923: **     Override this if you have a windows version
1.1       timbl     924: */
1.2       timbl     925: #ifndef GUI
1.16      timbl     926: PUBLIC HTStream* HTMLPresent ARGS5(
                    927:        HTRequest *,            request,
                    928:        void *,                 param,
                    929:        HTFormat,               input_format,
                    930:        HTFormat,               output_format,
                    931:        HTStream *,             output_stream)
1.1       timbl     932: {
1.16      timbl     933:     return SGML_new(&DTD, HTML_new(
                    934:        request, NULL, input_format, output_format, output_stream));
1.1       timbl     935: }
1.2       timbl     936: #endif
1.1       timbl     937: 
                    938: 
1.2       timbl     939: /*     Record error message as a hypertext object
                    940: **     ------------------------------------------
                    941: **
                    942: **     The error message should be marked as an error so that
                    943: **     it can be reloaded later.
                    944: **     This implementation just throws up an error message
                    945: **     and leaves the document unloaded.
1.9       timbl     946: **     A smarter implementation would load an error document,
                    947: **     marking at such so that it is retried on reload.
1.1       timbl     948: **
1.2       timbl     949: ** On entry,
                    950: **     sink    is a stream to the output device if any
                    951: **     number  is the HTTP error number
                    952: **     message is the human readable message.
1.9       timbl     953: **
                    954: ** On exit,
                    955: **     returns a negative number to indicate lack of success in the load.
1.1       timbl     956: */
1.2       timbl     957: 
                    958: PUBLIC int HTLoadError ARGS3(
1.17      luotonen  959:        HTRequest *,    req,
1.2       timbl     960:        int,            number,
                    961:        CONST char *,   message)
                    962: {
1.20      frystyk   963:     char *err = "Oh I screwed up!";            /* Dummy pointer not used (I hope) */
1.2       timbl     964:     HTAlert(message);          /* @@@@@@@@@@@@@@@@@@@ */
1.20      frystyk   965:     /* Clean up! Henrik 04/03-94 */
                    966:     if (req && req->output_stream)
                    967:        (*req->output_stream->isa->abort)(req->output_stream, err);
1.33      frystyk   968: #if OLD_CODE
1.25      luotonen  969:     HTClearErrors(req);
1.33      frystyk   970: #endif
1.2       timbl     971:     return -number;
                    972: } 
1.29      frystyk   973: 

Webmaster