Annotation of libwww/Library/src/HTML.c, revision 1.62

1.39      frystyk     1: /*                                                                      HTML.c
                      2: **     STRUCTURED STREAM TO RICH HYPERTEXT CONVERTER
                      3: **
1.43      frystyk     4: **     (c) COPYRIGHT MIT 1995.
1.39      frystyk     5: **     Please first read the full copyright statement in the file COPYRIGH.
1.62    ! frystyk     6: **     @(#) $Id: HTML.c,v 1.61 1996/04/12 17:47:44 frystyk Exp $
1.1       timbl       7: **
1.2       timbl       8: **     This generates of a hypertext object.  It converts from the
                      9: **     structured stream interface fro HTMl events into the style-
1.47      frystyk    10: **     oriented iunterface of the HText interface.  This module is
1.2       timbl      11: **     only used in clients and shouldnot be linked into servers.
1.1       timbl      12: **
1.6       timbl      13: **     Override this module if making a new GUI browser.
1.1       timbl      14: **
1.35      duns       15: ** HISTORY:
                     16: **      8 Jul 94  FM   Insulate free() from _free structure element.
                     17: **
1.1       timbl      18: */
1.16      timbl      19: 
1.41      frystyk    20: /* Library include files */
1.60      frystyk    21: #include "sysdep.h"
1.41      frystyk    22: #include "HTUtils.h"
                     23: #include "HTString.h"
1.1       timbl      24: #include "HTAtom.h"
                     25: #include "HTChunk.h"
                     26: #include "HText.h"
                     27: #include "HTStyle.h"
1.3       timbl      28: #include "HTAlert.h"
1.4       timbl      29: #include "HTMLGen.h"
1.8       timbl      30: #include "HTParse.h"
1.41      frystyk    31: #include "HTML.h"
1.1       timbl      32: 
                     33: extern HTStyleSheet * styleSheet;      /* Application-wide */
                     34: 
                     35: /*     Module-wide style cache
                     36: */
                     37: PRIVATE int            got_styles = 0;
1.16      timbl      38: PRIVATE HTStyle *styles[HTMLP_ELEMENTS];
1.2       timbl      39: PRIVATE HTStyle *default_style;
1.1       timbl      40: 
1.62    ! frystyk    41: #define TAB    '\0'
1.1       timbl      42: 
                     43: /*             HTML Object
                     44: **             -----------
                     45: */
1.2       timbl      46: #define MAX_NESTING 20         /* Should be checked by parser */
                     47: 
                     48: typedef struct _stack_element {
                     49:         HTStyle *      style;
                     50:        int             tag_number;
                     51: } stack_element;
                     52: 
                     53: struct _HTStructured {
1.60      frystyk    54:     const HTStructuredClass *  isa;
1.54      frystyk    55:     HTRequest *                        request;
1.2       timbl      56:     HTParentAnchor *           node_anchor;
                     57:     HText *                    text;
                     58: 
                     59:     HTStream*                  target;                 /* Output stream */
                     60:     HTStreamClass              targetClass;            /* Output routines */
                     61: 
1.56      frystyk    62:     HTChunk *                  title;          /* Grow by 128 */
1.2       timbl      63:     
                     64:     char *                     comment_start;  /* for literate programming */
                     65:     char *                     comment_end;
1.16      timbl      66:     
1.60      frystyk    67:     const SGML_dtd*            dtd;
1.16      timbl      68:     
1.2       timbl      69:     HTTag *                    current_tag;
                     70:     BOOL                       style_change;
                     71:     HTStyle *                  new_style;
                     72:     HTStyle *                  old_style;
                     73:     BOOL                       in_word;  /* Have just had a non-white char */
1.44      frystyk    74: 
                     75:     stack_element              stack[MAX_NESTING];
                     76:     stack_element              *sp;                  /* Style stack pointer */
                     77:     int                                overflow;  /* Keep track of overflow nesting */
1.1       timbl      78: };
                     79: 
1.2       timbl      80: struct _HTStream {
1.60      frystyk    81:     const HTStreamClass *      isa;
1.2       timbl      82:     /* .... */
                     83: };
1.1       timbl      84: 
                     85: /*             Forward declarations of routines
                     86: */
1.52      frystyk    87: PRIVATE void get_styles (void);
1.1       timbl      88: 
                     89: 
1.52      frystyk    90: PRIVATE void actually_set_style (HTStructured * me);
                     91: PRIVATE void change_paragraph_style (HTStructured * me, HTStyle * style);
1.1       timbl      92: 
                     93: /*     Style buffering avoids dummy paragraph begin/ends.
                     94: */
1.4       timbl      95: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1       timbl      96: 
                     97: 
1.2       timbl      98: #ifdef OLD_CODE
1.1       timbl      99: /* The following accented characters are from peter Flynn, curia project */
                    100: 
                    101: /* these ifdefs don't solve the problem of a simple terminal emulator
                    102: ** with a different character set to the client machine. But nothing does,
                    103: ** except looking at the TERM setting */
                    104: 
1.2       timbl     105: 
1.1       timbl     106:         { "ocus" , "&" },       /* for CURIA */
                    107: #ifdef IBMPC
                    108:         { "aacute" , "\240" }, /* For PC display */
                    109:         { "eacute" , "\202" },
                    110:         { "iacute" , "\241" },
                    111:         { "oacute" , "\242" },
                    112:         { "uacute" , "\243" },
                    113:         { "Aacute" , "\101" },
                    114:         { "Eacute" , "\220" },
                    115:         { "Iacute" , "\111" },
                    116:         { "Oacute" , "\117" },
                    117:         { "Uacute" , "\125" },
                    118: #else
                    119:         { "aacute" , "\341" }, /* Works for openwindows -- Peter Flynn */
                    120:         { "eacute" , "\351" },
                    121:         { "iacute" , "\355" },
                    122:         { "oacute" , "\363" },
                    123:         { "uacute" , "\372" },
                    124:         { "Aacute" , "\301" },
                    125:         { "Eacute" , "\310" },
                    126:         { "Iacute" , "\315" },
                    127:         { "Oacute" , "\323" },
                    128:         { "Uacute" , "\332" }, 
                    129: #endif
                    130:        { 0,    0 }  /* Terminate list */
                    131: };
1.2       timbl     132: #endif
1.1       timbl     133: 
                    134: 
1.2       timbl     135: /*     Entity values -- for ISO Latin 1 local representation
                    136: **
                    137: **     This MUST match exactly the table referred to in the DTD!
                    138: */
                    139: static char * ISO_Latin1[] = {
                    140:        "\306", /* capital AE diphthong (ligature) */ 
                    141:        "\301", /* capital A, acute accent */ 
                    142:        "\302", /* capital A, circumflex accent */ 
                    143:        "\300", /* capital A, grave accent */ 
                    144:        "\305", /* capital A, ring */ 
                    145:        "\303", /* capital A, tilde */ 
                    146:        "\304", /* capital A, dieresis or umlaut mark */ 
                    147:        "\307", /* capital C, cedilla */ 
                    148:        "\320", /* capital Eth, Icelandic */ 
                    149:        "\311", /* capital E, acute accent */ 
                    150:        "\312", /* capital E, circumflex accent */ 
                    151:        "\310", /* capital E, grave accent */ 
                    152:        "\313", /* capital E, dieresis or umlaut mark */ 
                    153:        "\315", /* capital I, acute accent */ 
                    154:        "\316", /* capital I, circumflex accent */ 
                    155:        "\314", /* capital I, grave accent */ 
                    156:        "\317", /* capital I, dieresis or umlaut mark */ 
                    157:        "\321", /* capital N, tilde */ 
                    158:        "\323", /* capital O, acute accent */ 
                    159:        "\324", /* capital O, circumflex accent */ 
                    160:        "\322", /* capital O, grave accent */ 
                    161:        "\330", /* capital O, slash */ 
                    162:        "\325", /* capital O, tilde */ 
                    163:        "\326", /* capital O, dieresis or umlaut mark */ 
                    164:        "\336", /* capital THORN, Icelandic */ 
                    165:        "\332", /* capital U, acute accent */ 
                    166:        "\333", /* capital U, circumflex accent */ 
                    167:        "\331", /* capital U, grave accent */ 
                    168:        "\334", /* capital U, dieresis or umlaut mark */ 
                    169:        "\335", /* capital Y, acute accent */ 
                    170:        "\341", /* small a, acute accent */ 
                    171:        "\342", /* small a, circumflex accent */ 
                    172:        "\346", /* small ae diphthong (ligature) */ 
                    173:        "\340", /* small a, grave accent */ 
                    174:        "\046", /* ampersand */ 
                    175:        "\345", /* small a, ring */ 
                    176:        "\343", /* small a, tilde */ 
                    177:        "\344", /* small a, dieresis or umlaut mark */ 
                    178:        "\347", /* small c, cedilla */ 
                    179:        "\351", /* small e, acute accent */ 
                    180:        "\352", /* small e, circumflex accent */ 
                    181:        "\350", /* small e, grave accent */ 
                    182:        "\360", /* small eth, Icelandic */ 
                    183:        "\353", /* small e, dieresis or umlaut mark */ 
                    184:        "\076", /* greater than */ 
                    185:        "\355", /* small i, acute accent */ 
                    186:        "\356", /* small i, circumflex accent */ 
                    187:        "\354", /* small i, grave accent */ 
                    188:        "\357", /* small i, dieresis or umlaut mark */ 
                    189:        "\074", /* less than */ 
1.62    ! frystyk   190:        "\040", /* non-breaking space */
1.2       timbl     191:        "\361", /* small n, tilde */ 
                    192:        "\363", /* small o, acute accent */ 
                    193:        "\364", /* small o, circumflex accent */ 
                    194:        "\362", /* small o, grave accent */ 
                    195:        "\370", /* small o, slash */ 
                    196:        "\365", /* small o, tilde */ 
                    197:        "\366", /* small o, dieresis or umlaut mark */ 
1.36      frystyk   198:         "\042", /* double quote sign - June 94 */
1.2       timbl     199:        "\337", /* small sharp s, German (sz ligature) */ 
                    200:        "\376", /* small thorn, Icelandic */ 
                    201:        "\372", /* small u, acute accent */ 
                    202:        "\373", /* small u, circumflex accent */ 
                    203:        "\371", /* small u, grave accent */ 
                    204:        "\374", /* small u, dieresis or umlaut mark */ 
                    205:        "\375", /* small y, acute accent */ 
                    206:        "\377", /* small y, dieresis or umlaut mark */ 
1.1       timbl     207: };
                    208: 
1.2       timbl     209: 
                    210: /*     Entity values -- for NeXT local representation
                    211: **
                    212: **     This MUST match exactly the table referred to in the DTD!
                    213: **
                    214: */
                    215: static char * NeXTCharacters[] = {
                    216:        "\341", /* capital AE diphthong (ligature)      */ 
                    217:        "\202", /* capital A, acute accent              */ 
                    218:        "\203", /* capital A, circumflex accent         */ 
                    219:        "\201", /* capital A, grave accent              */ 
                    220:        "\206", /* capital A, ring                      */ 
                    221:        "\204", /* capital A, tilde                     */ 
                    222:        "\205", /* capital A, dieresis or umlaut mark   */ 
                    223:        "\207", /* capital C, cedilla                   */ 
                    224:        "\220", /* capital Eth, Icelandic               */ 
                    225:        "\211", /* capital E, acute accent                              */ 
                    226:        "\212", /* capital E, circumflex accent                         */ 
                    227:        "\210", /* capital E, grave accent                              */ 
                    228:        "\213", /* capital E, dieresis or umlaut mark                   */ 
                    229:        "\215", /* capital I, acute accent                              */ 
                    230:        "\216", /* capital I, circumflex accent         these are       */ 
                    231:        "\214", /* capital I, grave accent              ISO -100 hex    */ 
                    232:        "\217", /* capital I, dieresis or umlaut mark                   */ 
                    233:        "\221", /* capital N, tilde                                     */ 
                    234:        "\223", /* capital O, acute accent                              */ 
                    235:        "\224", /* capital O, circumflex accent                         */ 
                    236:        "\222", /* capital O, grave accent                              */ 
                    237:        "\351", /* capital O, slash             'cept this */ 
                    238:        "\225", /* capital O, tilde                                     */ 
                    239:        "\226", /* capital O, dieresis or umlaut mark                   */ 
                    240:        "\234", /* capital THORN, Icelandic */ 
                    241:        "\230", /* capital U, acute accent */ 
                    242:        "\231", /* capital U, circumflex accent */ 
                    243:        "\227", /* capital U, grave accent */ 
                    244:        "\232", /* capital U, dieresis or umlaut mark */ 
                    245:        "\233", /* capital Y, acute accent */ 
                    246:        "\326", /* small a, acute accent */ 
                    247:        "\327", /* small a, circumflex accent */ 
                    248:        "\361", /* small ae diphthong (ligature) */ 
                    249:        "\325", /* small a, grave accent */ 
                    250:        "\046", /* ampersand */ 
                    251:        "\332", /* small a, ring */ 
                    252:        "\330", /* small a, tilde */ 
                    253:        "\331", /* small a, dieresis or umlaut mark */ 
                    254:        "\333", /* small c, cedilla */ 
                    255:        "\335", /* small e, acute accent */ 
                    256:        "\336", /* small e, circumflex accent */ 
                    257:        "\334", /* small e, grave accent */ 
                    258:        "\346", /* small eth, Icelandic         */ 
                    259:        "\337", /* small e, dieresis or umlaut mark */ 
                    260:        "\076", /* greater than */ 
                    261:        "\342", /* small i, acute accent */ 
                    262:        "\344", /* small i, circumflex accent */ 
                    263:        "\340", /* small i, grave accent */ 
                    264:        "\345", /* small i, dieresis or umlaut mark */ 
                    265:        "\074", /* less than */ 
1.62    ! frystyk   266:        "\040", /* non-breaking space */
1.2       timbl     267:        "\347", /* small n, tilde */ 
                    268:        "\355", /* small o, acute accent */ 
                    269:        "\356", /* small o, circumflex accent */ 
                    270:        "\354", /* small o, grave accent */ 
                    271:        "\371", /* small o, slash */ 
                    272:        "\357", /* small o, tilde */ 
                    273:        "\360", /* small o, dieresis or umlaut mark */ 
1.36      frystyk   274:         "\042", /* double quote sign - June 94 */
1.2       timbl     275:        "\373", /* small sharp s, German (sz ligature) */ 
                    276:        "\374", /* small thorn, Icelandic */ 
                    277:        "\363", /* small u, acute accent */ 
                    278:        "\364", /* small u, circumflex accent */ 
                    279:        "\362", /* small u, grave accent */ 
                    280:        "\366", /* small u, dieresis or umlaut mark */ 
                    281:        "\367", /* small y, acute accent */ 
                    282:        "\375", /* small y, dieresis or umlaut mark */ 
1.1       timbl     283: };
                    284: 
1.2       timbl     285: /*     Entity values -- for IBM/PC Code Page 850 (International)
                    286: **
                    287: **     This MUST match exactly the table referred to in the DTD!
                    288: **
                    289: */
                    290: /* @@@@@@@@@@@@@@@@@ TBD */
                    291: 
                    292: 
                    293: 
                    294: /*             Set character set
                    295: **             ----------------
                    296: */
                    297: 
                    298: PRIVATE char** p_entity_values = ISO_Latin1;   /* Pointer to translation */
1.1       timbl     299: 
1.53      frystyk   300: PUBLIC void HTMLUseCharacterSet (HTMLCharacterSet i)
1.2       timbl     301: {
                    302:     p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
                    303:                                             : ISO_Latin1;
                    304: }
1.1       timbl     305: 
                    306: 
                    307: /*             Flattening the style structure
                    308: **             ------------------------------
                    309: **
                    310: On the NeXT, and on any read-only browser, it is simpler for the text to have
                    311: a sequence of styles, rather than a nested tree of styles. In this
                    312: case we have to flatten the structure as it arrives from SGML tags into
                    313: a sequence of styles.
                    314: */
                    315: 
                    316: /*             If style really needs to be set, call this
                    317: */
1.53      frystyk   318: PRIVATE void actually_set_style (HTStructured * me)
1.1       timbl     319: {
1.4       timbl     320:     if (!me->text) {                   /* First time through */
1.54      frystyk   321:            me->text = HText_new2(me->request, me->node_anchor, me->target);
1.4       timbl     322:            HText_beginAppend(me->text);
                    323:            HText_setStyle(me->text, me->new_style);
                    324:            me->in_word = NO;
1.1       timbl     325:     } else {
1.4       timbl     326:            HText_setStyle(me->text, me->new_style);
1.1       timbl     327:     }
1.4       timbl     328:     me->old_style = me->new_style;
                    329:     me->style_change = NO;
1.1       timbl     330: }
                    331: 
                    332: /*      If you THINK you need to change style, call this
                    333: */
                    334: 
1.53      frystyk   335: PRIVATE void change_paragraph_style (HTStructured * me, HTStyle *style)
1.1       timbl     336: {
1.4       timbl     337:     if (me->new_style!=style) {
                    338:        me->style_change = YES;
                    339:        me->new_style = style;
1.1       timbl     340:     }
1.11      timbl     341:     me->in_word = NO;
1.1       timbl     342: }
                    343: 
1.2       timbl     344: /*_________________________________________________________________________
                    345: **
                    346: **                     A C T I O N     R O U T I N E S
                    347: */
                    348: 
                    349: /*     Character handling
                    350: **     ------------------
1.1       timbl     351: */
1.53      frystyk   352: PRIVATE int HTML_put_character (HTStructured * me, char c)
1.1       timbl     353: {
1.2       timbl     354: 
1.4       timbl     355:     switch (me->sp[0].tag_number) {
1.2       timbl     356:     case HTML_COMMENT:
                    357:        break;                                  /* Do Nothing */
                    358:        
                    359:     case HTML_TITLE:   
1.56      frystyk   360:        HTChunk_putb(me->title, &c, 1);
1.2       timbl     361:        break;
                    362: 
                    363:        
                    364:     case HTML_LISTING:                         /* Litteral text */
                    365:     case HTML_XMP:
                    366:     case HTML_PLAINTEXT:
                    367:     case HTML_PRE:
                    368: /*     We guarrantee that the style is up-to-date in begin_litteral
                    369: */
1.4       timbl     370:        HText_appendCharacter(me->text, c);
1.2       timbl     371:        break;
                    372:        
                    373:     default:                                   /* Free format text */
1.4       timbl     374:        if (me->style_change) {
1.42      frystyk   375:            if ((c=='\n') || (c==' ')) return HT_OK;    /* Ignore it */
1.2       timbl     376:            UPDATE_STYLE;
                    377:        }
1.62    ! frystyk   378:        if (c == TAB)
        !           379:            HText_appendCharacter(me->text, '\t');
        !           380:        else if (WHITE(c)) {
1.4       timbl     381:            if (me->in_word) {
                    382:                HText_appendCharacter(me->text, ' ');
                    383:                me->in_word = NO;
1.2       timbl     384:            }
                    385:        } else {
1.4       timbl     386:            HText_appendCharacter(me->text, c);
                    387:            me->in_word = YES;
1.2       timbl     388:        }
                    389:     } /* end switch */
1.42      frystyk   390:     return HT_OK;
1.1       timbl     391: }
                    392: 
1.2       timbl     393: 
                    394: 
                    395: /*     String handling
                    396: **     ---------------
                    397: **
                    398: **     This is written separately from put_character becuase the loop can
1.11      timbl     399: **     in some cases be promoted to a higher function call level for speed.
1.2       timbl     400: */
1.60      frystyk   401: PRIVATE int HTML_put_string (HTStructured * me, const char* s)
1.1       timbl     402: {
1.2       timbl     403: 
1.4       timbl     404:     switch (me->sp[0].tag_number) {
1.2       timbl     405:     case HTML_COMMENT:
                    406:        break;                                  /* Do Nothing */
                    407:        
                    408:     case HTML_TITLE:   
1.56      frystyk   409:        HTChunk_putb(me->title, s, strlen(s));
1.2       timbl     410:        break;
                    411: 
                    412:        
                    413:     case HTML_LISTING:                         /* Litteral text */
                    414:     case HTML_XMP:
                    415:     case HTML_PLAINTEXT:
                    416:     case HTML_PRE:
                    417: 
                    418: /*     We guarrantee that the style is up-to-date in begin_litteral
                    419: */
1.4       timbl     420:        HText_appendText(me->text, s);
1.2       timbl     421:        break;
                    422:        
                    423:     default:                                   /* Free format text */
                    424:         {
1.60      frystyk   425:            const char *p = s;
1.4       timbl     426:            if (me->style_change) {
1.2       timbl     427:                for (; *p && ((*p=='\n') || (*p==' ')); p++)  ;  /* Ignore leaders */
1.42      frystyk   428:                if (!*p) return HT_OK;
1.2       timbl     429:                UPDATE_STYLE;
                    430:            }
                    431:            for(; *p; p++) {
1.4       timbl     432:                if (me->style_change) {
1.2       timbl     433:                    if ((*p=='\n') || (*p==' ')) continue;  /* Ignore it */
                    434:                    UPDATE_STYLE;
                    435:                }
                    436:                if (*p=='\n') {
1.4       timbl     437:                    if (me->in_word) {
                    438:                        HText_appendCharacter(me->text, ' ');
                    439:                        me->in_word = NO;
1.2       timbl     440:                    }
                    441:                } else {
1.4       timbl     442:                    HText_appendCharacter(me->text, *p);
                    443:                    me->in_word = YES;
1.2       timbl     444:                }
                    445:            } /* for */
                    446:        }
                    447:     } /* end switch */
1.42      frystyk   448:     return HT_OK;
1.1       timbl     449: }
                    450: 
                    451: 
1.2       timbl     452: /*     Buffer write
1.3       timbl     453: **     ------------
1.1       timbl     454: */
1.60      frystyk   455: PRIVATE int HTML_write (HTStructured * me, const char* s, int l)
1.1       timbl     456: {
1.38      frystyk   457:     while (l-- > 0)
                    458:        HTML_put_character(me, *s++);
1.42      frystyk   459:     return HT_OK;
1.1       timbl     460: }
1.2       timbl     461: 
                    462: 
                    463: /*     Start Element
                    464: **     -------------
                    465: */
1.53      frystyk   466: PRIVATE void HTML_start_element (
                    467:        HTStructured *  me,
                    468:        int                     element_number,
1.60      frystyk   469:        const BOOL*             present,
                    470:        const char **           value)
1.2       timbl     471: {
                    472:     switch (element_number) {
                    473:     case HTML_A:
                    474:        {
1.8       timbl     475:            HTChildAnchor * source;
1.9       timbl     476:            char * href = NULL;
1.42      frystyk   477:            if (present[HTML_A_HREF])
1.9       timbl     478:                StrAllocCopy(href, value[HTML_A_HREF]);
1.8       timbl     479:            source = HTAnchor_findChildAndLink(
1.4       timbl     480:                me->node_anchor,                                /* parent */
1.2       timbl     481:                present[HTML_A_NAME] ? value[HTML_A_NAME] : 0,  /* Tag */
1.9       timbl     482:                present[HTML_A_HREF] ? href : 0,                /* Addresss */
1.16      timbl     483:                present[HTML_A_REL] && value[HTML_A_REL] ? 
1.54      frystyk   484:                        (HTLinkType) HTAtom_for(value[HTML_A_REL])
                    485:                                               : 0);
1.2       timbl     486:            
                    487:            if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
                    488:                HTParentAnchor * dest = 
                    489:                    HTAnchor_parent(
                    490:                        HTAnchor_followMainLink((HTAnchor*)source)
                    491:                                    );
                    492:                if (!HTAnchor_title(dest))
                    493:                        HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
                    494:            }
                    495:            UPDATE_STYLE;
1.4       timbl     496:            HText_beginAnchor(me->text, source);
1.58      frystyk   497:            HT_FREE(href);                      /* Leak fix Henrik 17/02-94 */
1.2       timbl     498:        }
                    499:        break;
                    500:        
                    501:     case HTML_TITLE:
1.56      frystyk   502:         HTChunk_clear(me->title);
1.2       timbl     503:        break;
                    504:        
                    505:     case HTML_NEXTID:
                    506:        /* if (present[NEXTID_N] && value[NEXTID_N])
1.4       timbl     507:                HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2       timbl     508:        break;
                    509:        
                    510:     case HTML_ISINDEX:
1.4       timbl     511:        HTAnchor_setIndex(me->node_anchor);
1.2       timbl     512:        break;
                    513:        
1.15      timbl     514:     case HTML_BR: 
                    515:        UPDATE_STYLE;
                    516:        HText_appendCharacter(me->text, '\n');
                    517:        me->in_word = NO;
                    518:        break;
                    519:        
                    520:     case HTML_HR: 
                    521:        UPDATE_STYLE;
                    522:        HText_appendCharacter(me->text, '\n');
1.16      timbl     523:        HText_appendText(me->text, "___________________________________");
1.15      timbl     524:        HText_appendCharacter(me->text, '\n');
                    525:        me->in_word = NO;
                    526:        break;
                    527:        
1.2       timbl     528:     case HTML_P:
                    529:        UPDATE_STYLE;
1.4       timbl     530:        HText_appendParagraph(me->text);
                    531:        me->in_word = NO;
1.2       timbl     532:        break;
                    533: 
                    534:     case HTML_DL:
1.11      timbl     535:         change_paragraph_style(me, present && present[DL_COMPACT]
1.16      timbl     536:                ? styles[HTML_DL]
1.2       timbl     537:                : styles[HTML_DL]);
                    538:        break;
                    539:        
                    540:     case HTML_DT:
1.4       timbl     541:         if (!me->style_change) {
                    542:            HText_appendParagraph(me->text);
                    543:            me->in_word = NO;
1.2       timbl     544:        }
                    545:        break;
                    546:        
                    547:     case HTML_DD:
                    548:         UPDATE_STYLE;
1.62    ! frystyk   549:        HTML_put_character(me, TAB);    /* Just tab out one stop */
1.4       timbl     550:        me->in_word = NO;
                    551:        break;
1.2       timbl     552: 
                    553:     case HTML_UL:
                    554:     case HTML_OL:
                    555:     case HTML_MENU:
                    556:     case HTML_DIR:
1.11      timbl     557:        change_paragraph_style(me, styles[element_number]);
1.2       timbl     558:        break;
                    559:        
                    560:     case HTML_LI:
                    561:         UPDATE_STYLE;
1.7       timbl     562:        if (me->sp[0].tag_number != HTML_DIR)
1.4       timbl     563:            HText_appendParagraph(me->text);
1.2       timbl     564:        else
1.62    ! frystyk   565:            HText_appendCharacter(me->text, TAB);
1.4       timbl     566:        me->in_word = NO;
1.2       timbl     567:        break;
                    568:        
                    569:     case HTML_LISTING:                         /* Litteral text */
                    570:     case HTML_XMP:
                    571:     case HTML_PLAINTEXT:
                    572:     case HTML_PRE:
1.11      timbl     573:        change_paragraph_style(me, styles[element_number]);
1.2       timbl     574:        UPDATE_STYLE;
1.4       timbl     575:        if (me->comment_end)
                    576:            HText_appendText(me->text, me->comment_end);
1.2       timbl     577:        break;
1.11      timbl     578: 
1.23      frystyk   579:     case HTML_IMG:                     /* Images */
                    580:        {
                    581:            HTChildAnchor *source;
                    582:            char *src = NULL;
1.49      frystyk   583:            if (present[HTML_IMG_SRC])
1.23      frystyk   584:                StrAllocCopy(src, value[HTML_IMG_SRC]);
                    585:            source = HTAnchor_findChildAndLink(
                    586:                                               me->node_anchor,    /* parent */
                    587:                                               0,                     /* Tag */
                    588:                                               src ? src : 0,    /* Addresss */
                    589:                                               0);
                    590:            UPDATE_STYLE;
                    591:            HText_appendImage(me->text, source,
1.24      frystyk   592:                      present[HTML_IMG_ALT] ? value[HTML_IMG_ALT] : NULL,
                    593:                      present[HTML_IMG_ALIGN] ? value[HTML_IMG_ALIGN] : NULL,
                    594:                      present[HTML_IMG_ISMAP] ? YES : NO);
1.58      frystyk   595:            HT_FREE(src);
1.24      frystyk   596:        }       
                    597:        break;
                    598: 
                    599:     case HTML_HTML:                    /* Ignore these altogether */
                    600:     case HTML_HEAD:
                    601:     case HTML_BODY:
1.62    ! frystyk   602:        break;
1.24      frystyk   603:     
1.10      timbl     604:     case HTML_TT:                      /* Physical character highlighting */
                    605:     case HTML_B:                       /* Currently ignored */
                    606:     case HTML_I:
                    607:     case HTML_U:
1.62    ! frystyk   608:        UPDATE_STYLE;
        !           609:        HText_appendCharacter(me->text, '_');
        !           610:        me->in_word = NO;
        !           611:        break;
1.10      timbl     612:     
                    613:     case HTML_EM:                      /* Logical character highlighting */
                    614:     case HTML_STRONG:                  /* Currently ignored */
                    615:     case HTML_CODE:
                    616:     case HTML_SAMP:
                    617:     case HTML_KBD:
                    618:     case HTML_VAR:
                    619:     case HTML_DFN:
                    620:     case HTML_CITE:
                    621:        break;
                    622:        
1.11      timbl     623:     case HTML_H1:                      /* paragraph styles */
                    624:     case HTML_H2:
                    625:     case HTML_H3:
                    626:     case HTML_H4:
                    627:     case HTML_H5:
                    628:     case HTML_H6:
                    629:     case HTML_H7:
                    630:     case HTML_ADDRESS:
                    631:     case HTML_BLOCKQUOTE:
                    632:        change_paragraph_style(me, styles[element_number]);     /* May be postponed */
1.2       timbl     633:        break;
                    634: 
                    635:     } /* end switch */
                    636: 
1.16      timbl     637:     if (me->dtd->tags[element_number].contents!= SGML_EMPTY) {
1.13      timbl     638:         if (me->sp == me->stack) {
1.44      frystyk   639:            if (SGML_TRACE)
1.59      eric      640:                HTTrace("HTML........ Maximum nesting of %d exceded!\n",
1.44      frystyk   641:                        MAX_NESTING); 
                    642:            me->overflow++;
1.12      timbl     643:            return;
                    644:        }
1.4       timbl     645:        --(me->sp);
                    646:        me->sp[0].style = me->new_style;        /* Stack new style */
                    647:        me->sp[0].tag_number = element_number;
1.10      timbl     648:     }  
1.1       timbl     649: }
1.10      timbl     650: 
1.2       timbl     651: 
1.1       timbl     652: /*             End Element
1.2       timbl     653: **             -----------
1.1       timbl     654: **
1.2       timbl     655: */
                    656: /*     When we end an element, the style must be returned to that
1.1       timbl     657: **     in effect before that element.  Note that anchors (etc?)
                    658: **     don't have an associated style, so that we must scan down the
                    659: **     stack for an element with a defined style. (In fact, the styles
                    660: **     should be linked to the whole stack not just the top one.)
                    661: **     TBL 921119
1.6       timbl     662: **
                    663: **     We don't turn on "CAREFUL" check because the parser produces
                    664: **     (internal code errors apart) good nesting. The parser checks
                    665: **     incoming code errors, not this module.
1.1       timbl     666: */
1.53      frystyk   667: PRIVATE void HTML_end_element (HTStructured * me, int element_number)
1.1       timbl     668: {
1.2       timbl     669: #ifdef CAREFUL                 /* parser assumed to produce good nesting */
1.4       timbl     670:     if (element_number != me->sp[0].tag_number) {
1.59      eric      671:         HTTrace("HTMLText: end of element %s when expecting end of %s\n",
1.16      timbl     672:                me->dtd->tags[element_number].name,
                    673:                me->dtd->tags[me->sp->tag_number].name);
1.6       timbl     674:                /* panic */
1.1       timbl     675:     }
1.2       timbl     676: #endif
1.44      frystyk   677: 
                    678:     /* HFN, If overflow of nestings, we need to get back to reality */
                    679:     if (me->overflow > 0) {
                    680:        me->overflow--;
                    681:        return;
                    682:     }
                    683: 
1.4       timbl     684:     me->sp++;                          /* Pop state off stack */
1.44      frystyk   685: 
1.2       timbl     686:     switch(element_number) {
                    687: 
                    688:     case HTML_A:
                    689:        UPDATE_STYLE;
1.4       timbl     690:        HText_endAnchor(me->text);
1.2       timbl     691:        break;
                    692: 
                    693:     case HTML_TITLE:
1.56      frystyk   694:        HTAnchor_setTitle(me->node_anchor, HTChunk_data(me->title));
1.2       timbl     695:        break;
                    696:        
1.62    ! frystyk   697:     case HTML_TT:                      /* Physical character highlighting */
        !           698:     case HTML_B:                       /* Currently ignored */
        !           699:     case HTML_I:
        !           700:     case HTML_U:
        !           701:        UPDATE_STYLE;
        !           702:        HText_appendCharacter(me->text, '_');
        !           703:        break;
        !           704:     
1.2       timbl     705:     case HTML_LISTING:                         /* Litteral text */
                    706:     case HTML_XMP:
                    707:     case HTML_PLAINTEXT:
                    708:     case HTML_PRE:
1.4       timbl     709:        if (me->comment_start)
                    710:            HText_appendText(me->text, me->comment_start);
1.2       timbl     711:        /* Fall through */
                    712:        
                    713:     default:
1.44      frystyk   714: 
                    715:        /* Often won't really change */
                    716:        change_paragraph_style(me, me->sp->style);
1.2       timbl     717:        break;
                    718:        
                    719:     } /* switch */
1.1       timbl     720: }
                    721: 
1.2       timbl     722: 
                    723: /*             Expanding entities
                    724: **             ------------------
                    725: */
                    726: /*     (In fact, they all shrink!)
1.1       timbl     727: */
1.2       timbl     728: 
1.53      frystyk   729: PRIVATE void HTML_put_entity (HTStructured * me, int entity_number)
1.1       timbl     730: {
1.4       timbl     731:     HTML_put_string(me, ISO_Latin1[entity_number]);    /* @@ Other representations */
1.1       timbl     732: }
1.2       timbl     733: 
1.42      frystyk   734: /*     Flush an HTML object
                    735: **     --------------------
                    736: */
1.53      frystyk   737: PUBLIC int HTML_flush (HTStructured * me)
1.42      frystyk   738: {
                    739:     UPDATE_STYLE;                           /* Creates empty document here! */
1.57      frystyk   740:     if (me->comment_end) HTML_put_string(me,me->comment_end);
                    741:     return me->target ? (*me->targetClass.flush)(me->target) : HT_OK;
1.42      frystyk   742: }
1.2       timbl     743: 
                    744: /*     Free an HTML object
                    745: **     -------------------
                    746: **
1.4       timbl     747: ** If the document is empty, the text object will not yet exist.
                    748:    So we could in fact abandon creating the document and return
                    749:    an error code.  In fact an empty document is an important type
                    750:    of document, so we don't.
                    751: **
                    752: **     If non-interactive, everything is freed off.   No: crashes -listrefs
1.2       timbl     753: **     Otherwise, the interactive object is left.      
                    754: */
1.53      frystyk   755: PUBLIC int HTML_free (HTStructured * me)
1.1       timbl     756: {
1.4       timbl     757:     UPDATE_STYLE;              /* Creates empty document here! */
                    758:     if (me->comment_end)
                    759:                HTML_put_string(me,me->comment_end);
                    760:     HText_endAppend(me->text);
                    761: 
                    762:     if (me->target) {
1.35      duns      763:         (*me->targetClass._free)(me->target);
1.2       timbl     764:     }
1.56      frystyk   765:     HTChunk_delete(me->title);
1.58      frystyk   766:     HT_FREE(me);
1.42      frystyk   767:     return HT_OK;
1.1       timbl     768: }
                    769: 
                    770: 
1.53      frystyk   771: PRIVATE int HTML_abort (HTStructured * me, HTList * e)
1.1       timbl     772: 
1.14      timbl     773: {
                    774:     if (me->target) {
                    775:         (*me->targetClass.abort)(me->target, e);
                    776:     }
1.56      frystyk   777:     HTChunk_delete(me->title);
1.58      frystyk   778:     HT_FREE(me);
1.42      frystyk   779:     return HT_ERROR;
1.1       timbl     780: }
                    781: 
1.2       timbl     782: 
                    783: /*     Get Styles from style sheet
                    784: **     ---------------------------
                    785: */
1.53      frystyk   786: PRIVATE void get_styles (void)
1.1       timbl     787: {
1.2       timbl     788:     got_styles = YES;
                    789:     
                    790:     default_style =            HTStyleNamed(styleSheet, "Normal");
1.1       timbl     791: 
1.2       timbl     792:     styles[HTML_H1] =          HTStyleNamed(styleSheet, "Heading1");
                    793:     styles[HTML_H2] =          HTStyleNamed(styleSheet, "Heading2");
                    794:     styles[HTML_H3] =          HTStyleNamed(styleSheet, "Heading3");
                    795:     styles[HTML_H4] =          HTStyleNamed(styleSheet, "Heading4");
                    796:     styles[HTML_H5] =          HTStyleNamed(styleSheet, "Heading5");
                    797:     styles[HTML_H6] =          HTStyleNamed(styleSheet, "Heading6");
                    798:     styles[HTML_H7] =          HTStyleNamed(styleSheet, "Heading7");
                    799: 
                    800:     styles[HTML_DL] =          HTStyleNamed(styleSheet, "Glossary");
                    801:     styles[HTML_UL] =
                    802:     styles[HTML_OL] =          HTStyleNamed(styleSheet, "List");
                    803:     styles[HTML_MENU] =                HTStyleNamed(styleSheet, "Menu");
                    804:     styles[HTML_DIR] =         HTStyleNamed(styleSheet, "Dir");    
1.16      timbl     805: /*  styles[HTML_DLC] =         HTStyleNamed(styleSheet, "GlossaryCompact"); */
1.2       timbl     806:     styles[HTML_ADDRESS]=      HTStyleNamed(styleSheet, "Address");
                    807:     styles[HTML_BLOCKQUOTE]=   HTStyleNamed(styleSheet, "BlockQuote");
                    808:     styles[HTML_PLAINTEXT] =
                    809:     styles[HTML_XMP] =         HTStyleNamed(styleSheet, "Example");
                    810:     styles[HTML_PRE] =         HTStyleNamed(styleSheet, "Preformatted");
                    811:     styles[HTML_LISTING] =     HTStyleNamed(styleSheet, "Listing");
                    812: }
                    813: /*                             P U B L I C
                    814: */
                    815: 
                    816: /*     Structured Object Class
                    817: **     -----------------------
                    818: */
1.60      frystyk   819: PRIVATE const HTStructuredClass HTMLPresentation = /* As opposed to print etc */
1.2       timbl     820: {              
                    821:        "text/html",
1.42      frystyk   822:        HTML_flush,
1.2       timbl     823:        HTML_free,
1.14      timbl     824:        HTML_abort,
1.2       timbl     825:        HTML_put_character,     HTML_put_string,  HTML_write,
                    826:        HTML_start_element,     HTML_end_element,
                    827:        HTML_put_entity
                    828: }; 
1.1       timbl     829: 
1.4       timbl     830: 
1.2       timbl     831: /*             New Structured Text object
                    832: **             --------------------------
                    833: **
1.16      timbl     834: **     The structured stream can generate either presentation,
1.4       timbl     835: **     or plain text, or HTML.
1.1       timbl     836: */
1.53      frystyk   837: PRIVATE HTStructured* HTML_new (HTRequest *    request,
                    838:                                     void *             param,
                    839:                                     HTFormat           input_format,
                    840:                                     HTFormat           output_format,
                    841:                                     HTStream * output_stream)
1.1       timbl     842: {
                    843: 
1.4       timbl     844:     HTStructured * me;
                    845:     
1.47      frystyk   846: #if 0
1.16      timbl     847:     if (output_format != WWW_PLAINTEXT
                    848:        && output_format != WWW_PRESENT
                    849:        && output_format != HTAtom_for("text/x-c")) {
1.37      frystyk   850:         HTStream * intermediate = HTStreamStack(WWW_HTML, output_format,
                    851:                                                output_stream, request, NO);
1.6       timbl     852:        if (intermediate) return HTMLGenerator(intermediate);
1.44      frystyk   853:        if (SGML_TRACE)
1.59      eric      854:            HTTrace("HTML........ Can't parse HTML to %s\n",
1.44      frystyk   855:                    HTAtom_name(output_format));
1.4       timbl     856:        exit (-99);
                    857:     }
1.47      frystyk   858: #endif
1.4       timbl     859: 
1.58      frystyk   860:     if ((me = (HTStructured *) HT_CALLOC(1, sizeof(*me))) == NULL)
                    861:         HT_OUTOFMEM("HTML_new");
1.1       timbl     862: 
                    863:     if (!got_styles) get_styles();
                    864: 
1.4       timbl     865:     me->isa = &HTMLPresentation;
1.47      frystyk   866:     me->dtd = &HTMLP_dtd;
1.54      frystyk   867:     me->request = request;
1.48      frystyk   868:     me->node_anchor =  HTRequest_anchor(request);
1.56      frystyk   869:     me->title = HTChunk_new(128);
1.4       timbl     870:     me->text = 0;
                    871:     me->style_change = YES; /* Force check leading to text creation */
                    872:     me->new_style = default_style;
                    873:     me->old_style = 0;
                    874:     me->sp = me->stack + MAX_NESTING - 1;
                    875:     me->sp->tag_number = -1;                           /* INVALID */
                    876:     me->sp->style = default_style;                     /* INVALID */
1.1       timbl     877:     
1.4       timbl     878:     me->comment_start = NULL;
                    879:     me->comment_end = NULL;
1.16      timbl     880:     me->target = output_stream;
                    881:     if (output_stream) me->targetClass = *output_stream->isa;  /* Copy pointers */
1.1       timbl     882:     
1.4       timbl     883:     return (HTStructured*) me;
1.1       timbl     884: }
                    885: 
                    886: 
1.2       timbl     887: /*     HTConverter for HTML to plain text
                    888: **     ----------------------------------
1.1       timbl     889: **
1.2       timbl     890: **     This will convert from HTML to presentation or plain text.
1.1       timbl     891: */
1.53      frystyk   892: PUBLIC HTStream* HTMLToPlain (
                    893:        HTRequest *             request,
                    894:        void *                  param,
                    895:        HTFormat                input_format,
                    896:        HTFormat                output_format,
                    897:        HTStream *              output_stream)
1.1       timbl     898: {
1.47      frystyk   899:     return SGML_new(&HTMLP_dtd, HTML_new(
1.16      timbl     900:        request, NULL, input_format, output_format, output_stream));
1.1       timbl     901: }
                    902: 
                    903: 
1.2       timbl     904: /*     HTConverter for HTML to C code
                    905: **     ------------------------------
                    906: **
1.36      frystyk   907: **     C code is like plain text but all non-preformatted code
1.2       timbl     908: **     is commented out.
                    909: **     This will convert from HTML to presentation or plain text.
                    910: */
1.53      frystyk   911: PUBLIC HTStream* HTMLToC (
                    912:        HTRequest *             request,
                    913:        void *                  param,
                    914:        HTFormat                input_format,
                    915:        HTFormat                output_format,
                    916:        HTStream *              output_stream)
1.1       timbl     917: {
1.4       timbl     918:     
                    919:     HTStructured * html;
                    920:     
1.36      frystyk   921:     (*output_stream->isa->put_string)(output_stream, "/* "); /* Before title */
1.16      timbl     922:     html = HTML_new(request, NULL, input_format, output_format, output_stream);
1.45      frystyk   923:     html->comment_start = "\n/* ";
1.47      frystyk   924:     html->dtd = &HTMLP_dtd;
1.2       timbl     925:     html->comment_end = " */\n";       /* Must start in col 1 for cpp */
1.47      frystyk   926:     return SGML_new(&HTMLP_dtd, html);
1.1       timbl     927: }
                    928: 
                    929: 
1.2       timbl     930: /*     Presenter for HTML
                    931: **     ------------------
                    932: **
                    933: **     This will convert from HTML to presentation or plain text.
                    934: **
                    935: **     Override this if you have a windows version
1.1       timbl     936: */
1.2       timbl     937: #ifndef GUI
1.53      frystyk   938: PUBLIC HTStream* HTMLPresent (
                    939:        HTRequest *             request,
                    940:        void *                  param,
                    941:        HTFormat                input_format,
                    942:        HTFormat                output_format,
                    943:        HTStream *              output_stream)
1.1       timbl     944: {
1.47      frystyk   945:     return SGML_new(&HTMLP_dtd, HTML_new(
1.16      timbl     946:        request, NULL, input_format, output_format, output_stream));
1.1       timbl     947: }
1.2       timbl     948: #endif
1.29      frystyk   949: 

Webmaster