Annotation of libwww/Library/src/HTML.c, revision 1.32

1.2       timbl       1: /*             Structured stream to Rich hypertext converter
                      2: **             ============================================
1.1       timbl       3: **
1.2       timbl       4: **     This generates of a hypertext object.  It converts from the
                      5: **     structured stream interface fro HTMl events into the style-
                      6: **     oriented iunterface of the HText.h interface.  This module is
                      7: **     only used in clients and shouldnot be linked into servers.
1.1       timbl       8: **
1.6       timbl       9: **     Override this module if making a new GUI browser.
1.1       timbl      10: **
                     11: */
1.16      timbl      12: 
1.1       timbl      13: #include "HTML.h"
                     14: 
1.16      timbl      15: /* #define CAREFUL              Check nesting here not really necessary */
1.2       timbl      16: 
1.1       timbl      17: #include <ctype.h>
                     18: #include <stdio.h>
                     19: 
                     20: #include "HTAtom.h"
                     21: #include "HTChunk.h"
                     22: #include "HText.h"
                     23: #include "HTStyle.h"
                     24: 
1.3       timbl      25: #include "HTAlert.h"
1.4       timbl      26: #include "HTMLGen.h"
1.8       timbl      27: #include "HTParse.h"
1.28      frystyk    28: #include "HTError.h"                               /* Because of HTErrorMsg */
1.1       timbl      29: 
                     30: extern HTStyleSheet * styleSheet;      /* Application-wide */
                     31: 
                     32: /*     Module-wide style cache
                     33: */
                     34: PRIVATE int            got_styles = 0;
1.16      timbl      35: PRIVATE HTStyle *styles[HTMLP_ELEMENTS];
1.2       timbl      36: PRIVATE HTStyle *default_style;
1.1       timbl      37: 
                     38: 
                     39: /*             HTML Object
                     40: **             -----------
                     41: */
1.2       timbl      42: #define MAX_NESTING 20         /* Should be checked by parser */
                     43: 
                     44: typedef struct _stack_element {
                     45:         HTStyle *      style;
                     46:        int             tag_number;
                     47: } stack_element;
                     48: 
                     49: struct _HTStructured {
                     50:     CONST HTStructuredClass *  isa;
                     51:     HTParentAnchor *           node_anchor;
                     52:     HText *                    text;
                     53: 
                     54:     HTStream*                  target;                 /* Output stream */
                     55:     HTStreamClass              targetClass;            /* Output routines */
                     56: 
                     57:     HTChunk                    title;          /* Grow by 128 */
                     58:     
                     59:     char *                     comment_start;  /* for literate programming */
                     60:     char *                     comment_end;
1.16      timbl      61:     
                     62:     CONST SGML_dtd*            dtd;
                     63:     
1.2       timbl      64:     HTTag *                    current_tag;
                     65:     BOOL                       style_change;
                     66:     HTStyle *                  new_style;
                     67:     HTStyle *                  old_style;
                     68:     BOOL                       in_word;  /* Have just had a non-white char */
                     69:     stack_element      stack[MAX_NESTING];
                     70:     stack_element      *sp;            /* Style stack pointer */
1.1       timbl      71: };
                     72: 
1.2       timbl      73: struct _HTStream {
                     74:     CONST HTStreamClass *      isa;
                     75:     /* .... */
                     76: };
1.1       timbl      77: 
                     78: /*             Forward declarations of routines
                     79: */
                     80: PRIVATE void get_styles NOPARAMS;
                     81: 
                     82: 
1.4       timbl      83: PRIVATE void actually_set_style PARAMS((HTStructured * me));
1.11      timbl      84: PRIVATE void change_paragraph_style PARAMS((HTStructured * me, HTStyle * style));
1.1       timbl      85: 
                     86: /*     Style buffering avoids dummy paragraph begin/ends.
                     87: */
1.4       timbl      88: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1       timbl      89: 
                     90: 
1.2       timbl      91: #ifdef OLD_CODE
1.1       timbl      92: /* The following accented characters are from peter Flynn, curia project */
                     93: 
                     94: /* these ifdefs don't solve the problem of a simple terminal emulator
                     95: ** with a different character set to the client machine. But nothing does,
                     96: ** except looking at the TERM setting */
                     97: 
1.2       timbl      98: 
1.1       timbl      99:         { "ocus" , "&" },       /* for CURIA */
                    100: #ifdef IBMPC
                    101:         { "aacute" , "\240" }, /* For PC display */
                    102:         { "eacute" , "\202" },
                    103:         { "iacute" , "\241" },
                    104:         { "oacute" , "\242" },
                    105:         { "uacute" , "\243" },
                    106:         { "Aacute" , "\101" },
                    107:         { "Eacute" , "\220" },
                    108:         { "Iacute" , "\111" },
                    109:         { "Oacute" , "\117" },
                    110:         { "Uacute" , "\125" },
                    111: #else
                    112:         { "aacute" , "\341" }, /* Works for openwindows -- Peter Flynn */
                    113:         { "eacute" , "\351" },
                    114:         { "iacute" , "\355" },
                    115:         { "oacute" , "\363" },
                    116:         { "uacute" , "\372" },
                    117:         { "Aacute" , "\301" },
                    118:         { "Eacute" , "\310" },
                    119:         { "Iacute" , "\315" },
                    120:         { "Oacute" , "\323" },
                    121:         { "Uacute" , "\332" }, 
                    122: #endif
                    123:        { 0,    0 }  /* Terminate list */
                    124: };
1.2       timbl     125: #endif
1.1       timbl     126: 
                    127: 
1.2       timbl     128: /*     Entity values -- for ISO Latin 1 local representation
                    129: **
                    130: **     This MUST match exactly the table referred to in the DTD!
                    131: */
                    132: static char * ISO_Latin1[] = {
                    133:        "\306", /* capital AE diphthong (ligature) */ 
                    134:        "\301", /* capital A, acute accent */ 
                    135:        "\302", /* capital A, circumflex accent */ 
                    136:        "\300", /* capital A, grave accent */ 
                    137:        "\305", /* capital A, ring */ 
                    138:        "\303", /* capital A, tilde */ 
                    139:        "\304", /* capital A, dieresis or umlaut mark */ 
                    140:        "\307", /* capital C, cedilla */ 
                    141:        "\320", /* capital Eth, Icelandic */ 
                    142:        "\311", /* capital E, acute accent */ 
                    143:        "\312", /* capital E, circumflex accent */ 
                    144:        "\310", /* capital E, grave accent */ 
                    145:        "\313", /* capital E, dieresis or umlaut mark */ 
                    146:        "\315", /* capital I, acute accent */ 
                    147:        "\316", /* capital I, circumflex accent */ 
                    148:        "\314", /* capital I, grave accent */ 
                    149:        "\317", /* capital I, dieresis or umlaut mark */ 
                    150:        "\321", /* capital N, tilde */ 
                    151:        "\323", /* capital O, acute accent */ 
                    152:        "\324", /* capital O, circumflex accent */ 
                    153:        "\322", /* capital O, grave accent */ 
                    154:        "\330", /* capital O, slash */ 
                    155:        "\325", /* capital O, tilde */ 
                    156:        "\326", /* capital O, dieresis or umlaut mark */ 
                    157:        "\336", /* capital THORN, Icelandic */ 
                    158:        "\332", /* capital U, acute accent */ 
                    159:        "\333", /* capital U, circumflex accent */ 
                    160:        "\331", /* capital U, grave accent */ 
                    161:        "\334", /* capital U, dieresis or umlaut mark */ 
                    162:        "\335", /* capital Y, acute accent */ 
                    163:        "\341", /* small a, acute accent */ 
                    164:        "\342", /* small a, circumflex accent */ 
                    165:        "\346", /* small ae diphthong (ligature) */ 
                    166:        "\340", /* small a, grave accent */ 
                    167:        "\046", /* ampersand */ 
                    168:        "\345", /* small a, ring */ 
                    169:        "\343", /* small a, tilde */ 
                    170:        "\344", /* small a, dieresis or umlaut mark */ 
                    171:        "\347", /* small c, cedilla */ 
                    172:        "\351", /* small e, acute accent */ 
                    173:        "\352", /* small e, circumflex accent */ 
                    174:        "\350", /* small e, grave accent */ 
                    175:        "\360", /* small eth, Icelandic */ 
                    176:        "\353", /* small e, dieresis or umlaut mark */ 
                    177:        "\076", /* greater than */ 
                    178:        "\355", /* small i, acute accent */ 
                    179:        "\356", /* small i, circumflex accent */ 
                    180:        "\354", /* small i, grave accent */ 
                    181:        "\357", /* small i, dieresis or umlaut mark */ 
                    182:        "\074", /* less than */ 
                    183:        "\361", /* small n, tilde */ 
                    184:        "\363", /* small o, acute accent */ 
                    185:        "\364", /* small o, circumflex accent */ 
                    186:        "\362", /* small o, grave accent */ 
                    187:        "\370", /* small o, slash */ 
                    188:        "\365", /* small o, tilde */ 
                    189:        "\366", /* small o, dieresis or umlaut mark */ 
                    190:        "\337", /* small sharp s, German (sz ligature) */ 
                    191:        "\376", /* small thorn, Icelandic */ 
                    192:        "\372", /* small u, acute accent */ 
                    193:        "\373", /* small u, circumflex accent */ 
                    194:        "\371", /* small u, grave accent */ 
                    195:        "\374", /* small u, dieresis or umlaut mark */ 
                    196:        "\375", /* small y, acute accent */ 
                    197:        "\377", /* small y, dieresis or umlaut mark */ 
1.1       timbl     198: };
                    199: 
1.2       timbl     200: 
                    201: /*     Entity values -- for NeXT local representation
                    202: **
                    203: **     This MUST match exactly the table referred to in the DTD!
                    204: **
                    205: */
                    206: static char * NeXTCharacters[] = {
                    207:        "\341", /* capital AE diphthong (ligature)      */ 
                    208:        "\202", /* capital A, acute accent              */ 
                    209:        "\203", /* capital A, circumflex accent         */ 
                    210:        "\201", /* capital A, grave accent              */ 
                    211:        "\206", /* capital A, ring                      */ 
                    212:        "\204", /* capital A, tilde                     */ 
                    213:        "\205", /* capital A, dieresis or umlaut mark   */ 
                    214:        "\207", /* capital C, cedilla                   */ 
                    215:        "\220", /* capital Eth, Icelandic               */ 
                    216:        "\211", /* capital E, acute accent                              */ 
                    217:        "\212", /* capital E, circumflex accent                         */ 
                    218:        "\210", /* capital E, grave accent                              */ 
                    219:        "\213", /* capital E, dieresis or umlaut mark                   */ 
                    220:        "\215", /* capital I, acute accent                              */ 
                    221:        "\216", /* capital I, circumflex accent         these are       */ 
                    222:        "\214", /* capital I, grave accent              ISO -100 hex    */ 
                    223:        "\217", /* capital I, dieresis or umlaut mark                   */ 
                    224:        "\221", /* capital N, tilde                                     */ 
                    225:        "\223", /* capital O, acute accent                              */ 
                    226:        "\224", /* capital O, circumflex accent                         */ 
                    227:        "\222", /* capital O, grave accent                              */ 
                    228:        "\351", /* capital O, slash             'cept this */ 
                    229:        "\225", /* capital O, tilde                                     */ 
                    230:        "\226", /* capital O, dieresis or umlaut mark                   */ 
                    231:        "\234", /* capital THORN, Icelandic */ 
                    232:        "\230", /* capital U, acute accent */ 
                    233:        "\231", /* capital U, circumflex accent */ 
                    234:        "\227", /* capital U, grave accent */ 
                    235:        "\232", /* capital U, dieresis or umlaut mark */ 
                    236:        "\233", /* capital Y, acute accent */ 
                    237:        "\326", /* small a, acute accent */ 
                    238:        "\327", /* small a, circumflex accent */ 
                    239:        "\361", /* small ae diphthong (ligature) */ 
                    240:        "\325", /* small a, grave accent */ 
                    241:        "\046", /* ampersand */ 
                    242:        "\332", /* small a, ring */ 
                    243:        "\330", /* small a, tilde */ 
                    244:        "\331", /* small a, dieresis or umlaut mark */ 
                    245:        "\333", /* small c, cedilla */ 
                    246:        "\335", /* small e, acute accent */ 
                    247:        "\336", /* small e, circumflex accent */ 
                    248:        "\334", /* small e, grave accent */ 
                    249:        "\346", /* small eth, Icelandic         */ 
                    250:        "\337", /* small e, dieresis or umlaut mark */ 
                    251:        "\076", /* greater than */ 
                    252:        "\342", /* small i, acute accent */ 
                    253:        "\344", /* small i, circumflex accent */ 
                    254:        "\340", /* small i, grave accent */ 
                    255:        "\345", /* small i, dieresis or umlaut mark */ 
                    256:        "\074", /* less than */ 
                    257:        "\347", /* small n, tilde */ 
                    258:        "\355", /* small o, acute accent */ 
                    259:        "\356", /* small o, circumflex accent */ 
                    260:        "\354", /* small o, grave accent */ 
                    261:        "\371", /* small o, slash */ 
                    262:        "\357", /* small o, tilde */ 
                    263:        "\360", /* small o, dieresis or umlaut mark */ 
                    264:        "\373", /* small sharp s, German (sz ligature) */ 
                    265:        "\374", /* small thorn, Icelandic */ 
                    266:        "\363", /* small u, acute accent */ 
                    267:        "\364", /* small u, circumflex accent */ 
                    268:        "\362", /* small u, grave accent */ 
                    269:        "\366", /* small u, dieresis or umlaut mark */ 
                    270:        "\367", /* small y, acute accent */ 
                    271:        "\375", /* small y, dieresis or umlaut mark */ 
1.1       timbl     272: };
                    273: 
1.2       timbl     274: /*     Entity values -- for IBM/PC Code Page 850 (International)
                    275: **
                    276: **     This MUST match exactly the table referred to in the DTD!
                    277: **
                    278: */
                    279: /* @@@@@@@@@@@@@@@@@ TBD */
                    280: 
                    281: 
                    282: 
                    283: /*             Set character set
                    284: **             ----------------
                    285: */
                    286: 
                    287: PRIVATE char** p_entity_values = ISO_Latin1;   /* Pointer to translation */
1.1       timbl     288: 
1.2       timbl     289: PUBLIC void HTMLUseCharacterSet ARGS1(HTMLCharacterSet, i)
                    290: {
                    291:     p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
                    292:                                             : ISO_Latin1;
                    293: }
1.1       timbl     294: 
                    295: 
                    296: /*             Flattening the style structure
                    297: **             ------------------------------
                    298: **
                    299: On the NeXT, and on any read-only browser, it is simpler for the text to have
                    300: a sequence of styles, rather than a nested tree of styles. In this
                    301: case we have to flatten the structure as it arrives from SGML tags into
                    302: a sequence of styles.
                    303: */
                    304: 
                    305: /*             If style really needs to be set, call this
                    306: */
1.4       timbl     307: PRIVATE void actually_set_style ARGS1(HTStructured *, me)
1.1       timbl     308: {
1.4       timbl     309:     if (!me->text) {                   /* First time through */
                    310:            me->text = HText_new2(me->node_anchor, me->target);
                    311:            HText_beginAppend(me->text);
                    312:            HText_setStyle(me->text, me->new_style);
                    313:            me->in_word = NO;
1.1       timbl     314:     } else {
1.4       timbl     315:            HText_setStyle(me->text, me->new_style);
1.1       timbl     316:     }
1.4       timbl     317:     me->old_style = me->new_style;
                    318:     me->style_change = NO;
1.1       timbl     319: }
                    320: 
                    321: /*      If you THINK you need to change style, call this
                    322: */
                    323: 
1.11      timbl     324: PRIVATE void change_paragraph_style ARGS2(HTStructured *, me, HTStyle *,style)
1.1       timbl     325: {
1.4       timbl     326:     if (me->new_style!=style) {
                    327:        me->style_change = YES;
                    328:        me->new_style = style;
1.1       timbl     329:     }
1.11      timbl     330:     me->in_word = NO;
1.1       timbl     331: }
                    332: 
1.2       timbl     333: /*_________________________________________________________________________
                    334: **
                    335: **                     A C T I O N     R O U T I N E S
                    336: */
                    337: 
                    338: /*     Character handling
                    339: **     ------------------
1.1       timbl     340: */
1.4       timbl     341: PRIVATE void HTML_put_character ARGS2(HTStructured *, me, char, c)
1.1       timbl     342: {
1.2       timbl     343: 
1.4       timbl     344:     switch (me->sp[0].tag_number) {
1.2       timbl     345:     case HTML_COMMENT:
                    346:        break;                                  /* Do Nothing */
                    347:        
                    348:     case HTML_TITLE:   
1.4       timbl     349:        HTChunkPutc(&me->title, c);
1.2       timbl     350:        break;
                    351: 
                    352:        
                    353:     case HTML_LISTING:                         /* Litteral text */
                    354:     case HTML_XMP:
                    355:     case HTML_PLAINTEXT:
                    356:     case HTML_PRE:
                    357: /*     We guarrantee that the style is up-to-date in begin_litteral
                    358: */
1.4       timbl     359:        HText_appendCharacter(me->text, c);
1.2       timbl     360:        break;
                    361:        
                    362:     default:                                   /* Free format text */
1.4       timbl     363:        if (me->style_change) {
1.2       timbl     364:            if ((c=='\n') || (c==' ')) return;  /* Ignore it */
                    365:            UPDATE_STYLE;
                    366:        }
                    367:        if (c=='\n') {
1.4       timbl     368:            if (me->in_word) {
                    369:                HText_appendCharacter(me->text, ' ');
                    370:                me->in_word = NO;
1.2       timbl     371:            }
                    372:        } else {
1.4       timbl     373:            HText_appendCharacter(me->text, c);
                    374:            me->in_word = YES;
1.2       timbl     375:        }
                    376:     } /* end switch */
1.1       timbl     377: }
                    378: 
1.2       timbl     379: 
                    380: 
                    381: /*     String handling
                    382: **     ---------------
                    383: **
                    384: **     This is written separately from put_character becuase the loop can
1.11      timbl     385: **     in some cases be promoted to a higher function call level for speed.
1.2       timbl     386: */
1.4       timbl     387: PRIVATE void HTML_put_string ARGS2(HTStructured *, me, CONST char*, s)
1.1       timbl     388: {
1.2       timbl     389: 
1.4       timbl     390:     switch (me->sp[0].tag_number) {
1.2       timbl     391:     case HTML_COMMENT:
                    392:        break;                                  /* Do Nothing */
                    393:        
                    394:     case HTML_TITLE:   
1.4       timbl     395:        HTChunkPuts(&me->title, s);
1.2       timbl     396:        break;
                    397: 
                    398:        
                    399:     case HTML_LISTING:                         /* Litteral text */
                    400:     case HTML_XMP:
                    401:     case HTML_PLAINTEXT:
                    402:     case HTML_PRE:
                    403: 
                    404: /*     We guarrantee that the style is up-to-date in begin_litteral
                    405: */
1.4       timbl     406:        HText_appendText(me->text, s);
1.2       timbl     407:        break;
                    408:        
                    409:     default:                                   /* Free format text */
                    410:         {
                    411:            CONST char *p = s;
1.4       timbl     412:            if (me->style_change) {
1.2       timbl     413:                for (; *p && ((*p=='\n') || (*p==' ')); p++)  ;  /* Ignore leaders */
                    414:                if (!*p) return;
                    415:                UPDATE_STYLE;
                    416:            }
                    417:            for(; *p; p++) {
1.4       timbl     418:                if (me->style_change) {
1.2       timbl     419:                    if ((*p=='\n') || (*p==' ')) continue;  /* Ignore it */
                    420:                    UPDATE_STYLE;
                    421:                }
                    422:                if (*p=='\n') {
1.4       timbl     423:                    if (me->in_word) {
                    424:                        HText_appendCharacter(me->text, ' ');
                    425:                        me->in_word = NO;
1.2       timbl     426:                    }
                    427:                } else {
1.4       timbl     428:                    HText_appendCharacter(me->text, *p);
                    429:                    me->in_word = YES;
1.2       timbl     430:                }
                    431:            } /* for */
                    432:        }
                    433:     } /* end switch */
1.1       timbl     434: }
                    435: 
                    436: 
1.2       timbl     437: /*     Buffer write
1.3       timbl     438: **     ------------
1.1       timbl     439: */
1.4       timbl     440: PRIVATE void HTML_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
1.1       timbl     441: {
1.2       timbl     442:     CONST char* p;
                    443:     CONST char* e = s+l;
1.4       timbl     444:     for (p=s; s<e; p++) HTML_put_character(me, *p);
1.1       timbl     445: }
1.2       timbl     446: 
                    447: 
                    448: /*     Start Element
                    449: **     -------------
                    450: */
                    451: PRIVATE void HTML_start_element ARGS4(
1.4       timbl     452:        HTStructured *,         me,
1.16      timbl     453:        int,                    element_number,
1.3       timbl     454:        CONST BOOL*,            present,
1.16      timbl     455:        CONST char **,          value)
1.2       timbl     456: {
                    457:     switch (element_number) {
                    458:     case HTML_A:
                    459:        {
1.8       timbl     460:            HTChildAnchor * source;
1.9       timbl     461:            char * href = NULL;
                    462:            if (present[HTML_A_HREF]) {
                    463:                StrAllocCopy(href, value[HTML_A_HREF]);
                    464:                HTSimplify(href);
                    465:            }
1.8       timbl     466:            source = HTAnchor_findChildAndLink(
1.4       timbl     467:                me->node_anchor,                                /* parent */
1.2       timbl     468:                present[HTML_A_NAME] ? value[HTML_A_NAME] : 0,  /* Tag */
1.9       timbl     469:                present[HTML_A_HREF] ? href : 0,                /* Addresss */
1.16      timbl     470:                present[HTML_A_REL] && value[HTML_A_REL] ? 
                    471:                        (HTLinkType*)HTAtom_for(value[HTML_A_REL])
1.2       timbl     472:                                                : 0);
                    473:            
                    474:            if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
                    475:                HTParentAnchor * dest = 
                    476:                    HTAnchor_parent(
                    477:                        HTAnchor_followMainLink((HTAnchor*)source)
                    478:                                    );
                    479:                if (!HTAnchor_title(dest))
                    480:                        HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
                    481:            }
                    482:            UPDATE_STYLE;
1.4       timbl     483:            HText_beginAnchor(me->text, source);
1.18      frystyk   484:            free(href);                 /* Leak fix Henrik 17/02-94 */
1.2       timbl     485:        }
                    486:        break;
                    487:        
                    488:     case HTML_TITLE:
1.4       timbl     489:         HTChunkClear(&me->title);
1.2       timbl     490:        break;
                    491:        
                    492:     case HTML_NEXTID:
                    493:        /* if (present[NEXTID_N] && value[NEXTID_N])
1.4       timbl     494:                HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2       timbl     495:        break;
                    496:        
                    497:     case HTML_ISINDEX:
1.4       timbl     498:        HTAnchor_setIndex(me->node_anchor);
1.2       timbl     499:        break;
                    500:        
1.15      timbl     501:     case HTML_BR: 
                    502:        UPDATE_STYLE;
                    503:        HText_appendCharacter(me->text, '\n');
                    504:        me->in_word = NO;
                    505:        break;
                    506:        
                    507:     case HTML_HR: 
                    508:        UPDATE_STYLE;
                    509:        HText_appendCharacter(me->text, '\n');
1.16      timbl     510:        HText_appendText(me->text, "___________________________________");
1.15      timbl     511:        HText_appendCharacter(me->text, '\n');
                    512:        me->in_word = NO;
                    513:        break;
                    514:        
1.2       timbl     515:     case HTML_P:
                    516:        UPDATE_STYLE;
1.4       timbl     517:        HText_appendParagraph(me->text);
                    518:        me->in_word = NO;
1.2       timbl     519:        break;
                    520: 
                    521:     case HTML_DL:
1.11      timbl     522:         change_paragraph_style(me, present && present[DL_COMPACT]
1.16      timbl     523:                ? styles[HTML_DL]
1.2       timbl     524:                : styles[HTML_DL]);
                    525:        break;
                    526:        
                    527:     case HTML_DT:
1.4       timbl     528:         if (!me->style_change) {
                    529:            HText_appendParagraph(me->text);
                    530:            me->in_word = NO;
1.2       timbl     531:        }
                    532:        break;
                    533:        
                    534:     case HTML_DD:
                    535:         UPDATE_STYLE;
1.4       timbl     536:        HTML_put_character(me, '\t');   /* Just tab out one stop */
                    537:        me->in_word = NO;
                    538:        break;
1.2       timbl     539: 
                    540:     case HTML_UL:
                    541:     case HTML_OL:
                    542:     case HTML_MENU:
                    543:     case HTML_DIR:
1.11      timbl     544:        change_paragraph_style(me, styles[element_number]);
1.2       timbl     545:        break;
                    546:        
                    547:     case HTML_LI:
                    548:         UPDATE_STYLE;
1.7       timbl     549:        if (me->sp[0].tag_number != HTML_DIR)
1.4       timbl     550:            HText_appendParagraph(me->text);
1.2       timbl     551:        else
1.4       timbl     552:            HText_appendCharacter(me->text, '\t');      /* Tab @@ nl for UL? */
                    553:        me->in_word = NO;
1.2       timbl     554:        break;
                    555:        
                    556:     case HTML_LISTING:                         /* Litteral text */
                    557:     case HTML_XMP:
                    558:     case HTML_PLAINTEXT:
                    559:     case HTML_PRE:
1.11      timbl     560:        change_paragraph_style(me, styles[element_number]);
1.2       timbl     561:        UPDATE_STYLE;
1.4       timbl     562:        if (me->comment_end)
                    563:            HText_appendText(me->text, me->comment_end);
1.2       timbl     564:        break;
1.11      timbl     565: 
1.23      frystyk   566:     case HTML_IMG:                     /* Images */
                    567:        {
                    568:            HTChildAnchor *source;
                    569:            char *src = NULL;
                    570:            if (present[HTML_IMG_SRC]) {
                    571:                StrAllocCopy(src, value[HTML_IMG_SRC]);
                    572:                HTSimplify(src);
                    573:            }
                    574:            source = HTAnchor_findChildAndLink(
                    575:                                               me->node_anchor,    /* parent */
                    576:                                               0,                     /* Tag */
                    577:                                               src ? src : 0,    /* Addresss */
                    578:                                               0);
                    579:            UPDATE_STYLE;
                    580:            HText_appendImage(me->text, source,
1.24      frystyk   581:                      present[HTML_IMG_ALT] ? value[HTML_IMG_ALT] : NULL,
                    582:                      present[HTML_IMG_ALIGN] ? value[HTML_IMG_ALIGN] : NULL,
                    583:                      present[HTML_IMG_ISMAP] ? YES : NO);
1.23      frystyk   584:            free(src);
1.24      frystyk   585:        }       
                    586:        break;
                    587: 
                    588:     case HTML_HTML:                    /* Ignore these altogether */
                    589:     case HTML_HEAD:
                    590:     case HTML_BODY:
                    591:     
1.10      timbl     592:     case HTML_TT:                      /* Physical character highlighting */
                    593:     case HTML_B:                       /* Currently ignored */
                    594:     case HTML_I:
                    595:     case HTML_U:
                    596:     
                    597:     case HTML_EM:                      /* Logical character highlighting */
                    598:     case HTML_STRONG:                  /* Currently ignored */
                    599:     case HTML_CODE:
                    600:     case HTML_SAMP:
                    601:     case HTML_KBD:
                    602:     case HTML_VAR:
                    603:     case HTML_DFN:
                    604:     case HTML_CITE:
                    605:        break;
                    606:        
1.11      timbl     607:     case HTML_H1:                      /* paragraph styles */
                    608:     case HTML_H2:
                    609:     case HTML_H3:
                    610:     case HTML_H4:
                    611:     case HTML_H5:
                    612:     case HTML_H6:
                    613:     case HTML_H7:
                    614:     case HTML_ADDRESS:
                    615:     case HTML_BLOCKQUOTE:
                    616:        change_paragraph_style(me, styles[element_number]);     /* May be postponed */
1.2       timbl     617:        break;
                    618: 
                    619:     } /* end switch */
                    620: 
1.16      timbl     621:     if (me->dtd->tags[element_number].contents!= SGML_EMPTY) {
1.13      timbl     622:         if (me->sp == me->stack) {
1.12      timbl     623:            fprintf(stderr, "HTML: ****** Maximum nesting of %d exceded!\n",
                    624:            MAX_NESTING); 
                    625:            return;
                    626:        }
1.4       timbl     627:        --(me->sp);
                    628:        me->sp[0].style = me->new_style;        /* Stack new style */
                    629:        me->sp[0].tag_number = element_number;
1.10      timbl     630:     }  
1.1       timbl     631: }
1.10      timbl     632: 
1.2       timbl     633: 
1.1       timbl     634: /*             End Element
1.2       timbl     635: **             -----------
1.1       timbl     636: **
1.2       timbl     637: */
                    638: /*     When we end an element, the style must be returned to that
1.1       timbl     639: **     in effect before that element.  Note that anchors (etc?)
                    640: **     don't have an associated style, so that we must scan down the
                    641: **     stack for an element with a defined style. (In fact, the styles
                    642: **     should be linked to the whole stack not just the top one.)
                    643: **     TBL 921119
1.6       timbl     644: **
                    645: **     We don't turn on "CAREFUL" check because the parser produces
                    646: **     (internal code errors apart) good nesting. The parser checks
                    647: **     incoming code errors, not this module.
1.1       timbl     648: */
1.4       timbl     649: PRIVATE void HTML_end_element ARGS2(HTStructured *, me, int , element_number)
1.1       timbl     650: {
1.2       timbl     651: #ifdef CAREFUL                 /* parser assumed to produce good nesting */
1.4       timbl     652:     if (element_number != me->sp[0].tag_number) {
1.2       timbl     653:         fprintf(stderr, "HTMLText: end of element %s when expecting end of %s\n",
1.16      timbl     654:                me->dtd->tags[element_number].name,
                    655:                me->dtd->tags[me->sp->tag_number].name);
1.6       timbl     656:                /* panic */
1.1       timbl     657:     }
1.2       timbl     658: #endif
                    659:     
1.4       timbl     660:     me->sp++;                          /* Pop state off stack */
1.2       timbl     661:     
                    662:     switch(element_number) {
                    663: 
                    664:     case HTML_A:
                    665:        UPDATE_STYLE;
1.4       timbl     666:        HText_endAnchor(me->text);
1.2       timbl     667:        break;
                    668: 
                    669:     case HTML_TITLE:
1.4       timbl     670:         HTChunkTerminate(&me->title);
                    671:        HTAnchor_setTitle(me->node_anchor, me->title.data);
1.2       timbl     672:        break;
                    673:        
                    674:     case HTML_LISTING:                         /* Litteral text */
                    675:     case HTML_XMP:
                    676:     case HTML_PLAINTEXT:
                    677:     case HTML_PRE:
1.4       timbl     678:        if (me->comment_start)
                    679:            HText_appendText(me->text, me->comment_start);
1.2       timbl     680:        /* Fall through */
                    681:        
                    682:     default:
                    683:     
1.11      timbl     684:        change_paragraph_style(me, me->sp->style);      /* Often won't really change */
1.2       timbl     685:        break;
                    686:        
                    687:     } /* switch */
1.1       timbl     688: }
                    689: 
1.2       timbl     690: 
                    691: /*             Expanding entities
                    692: **             ------------------
                    693: */
                    694: /*     (In fact, they all shrink!)
1.1       timbl     695: */
1.2       timbl     696: 
1.4       timbl     697: PRIVATE void HTML_put_entity ARGS2(HTStructured *, me, int, entity_number)
1.1       timbl     698: {
1.4       timbl     699:     HTML_put_string(me, ISO_Latin1[entity_number]);    /* @@ Other representations */
1.1       timbl     700: }
1.2       timbl     701: 
                    702: 
                    703: /*     Free an HTML object
                    704: **     -------------------
                    705: **
1.4       timbl     706: ** If the document is empty, the text object will not yet exist.
                    707:    So we could in fact abandon creating the document and return
                    708:    an error code.  In fact an empty document is an important type
                    709:    of document, so we don't.
                    710: **
                    711: **     If non-interactive, everything is freed off.   No: crashes -listrefs
1.2       timbl     712: **     Otherwise, the interactive object is left.      
                    713: */
1.4       timbl     714: PUBLIC void HTML_free ARGS1(HTStructured *, me)
1.1       timbl     715: {
1.4       timbl     716:     UPDATE_STYLE;              /* Creates empty document here! */
                    717:     if (me->comment_end)
                    718:                HTML_put_string(me,me->comment_end);
                    719:     HText_endAppend(me->text);
                    720: 
                    721:     if (me->target) {
                    722:         (*me->targetClass.free)(me->target);
1.2       timbl     723:     }
1.19      frystyk   724:     HTChunkClear(&me->title);  /* Henrik 18/02-94 */
1.4       timbl     725:     free(me);
1.1       timbl     726: }
                    727: 
                    728: 
1.14      timbl     729: PRIVATE void HTML_abort ARGS2(HTStructured *, me, HTError, e)
1.1       timbl     730: 
1.14      timbl     731: {
                    732:     if (me->target) {
                    733:         (*me->targetClass.abort)(me->target, e);
                    734:     }
1.19      frystyk   735:     HTChunkClear(&me->title);  /* Henrik 18/02-94 */
1.14      timbl     736:     free(me);
1.1       timbl     737: }
                    738: 
1.2       timbl     739: 
                    740: /*     Get Styles from style sheet
                    741: **     ---------------------------
                    742: */
                    743: PRIVATE void get_styles NOARGS
1.1       timbl     744: {
1.2       timbl     745:     got_styles = YES;
                    746:     
                    747:     default_style =            HTStyleNamed(styleSheet, "Normal");
1.1       timbl     748: 
1.2       timbl     749:     styles[HTML_H1] =          HTStyleNamed(styleSheet, "Heading1");
                    750:     styles[HTML_H2] =          HTStyleNamed(styleSheet, "Heading2");
                    751:     styles[HTML_H3] =          HTStyleNamed(styleSheet, "Heading3");
                    752:     styles[HTML_H4] =          HTStyleNamed(styleSheet, "Heading4");
                    753:     styles[HTML_H5] =          HTStyleNamed(styleSheet, "Heading5");
                    754:     styles[HTML_H6] =          HTStyleNamed(styleSheet, "Heading6");
                    755:     styles[HTML_H7] =          HTStyleNamed(styleSheet, "Heading7");
                    756: 
                    757:     styles[HTML_DL] =          HTStyleNamed(styleSheet, "Glossary");
                    758:     styles[HTML_UL] =
                    759:     styles[HTML_OL] =          HTStyleNamed(styleSheet, "List");
                    760:     styles[HTML_MENU] =                HTStyleNamed(styleSheet, "Menu");
                    761:     styles[HTML_DIR] =         HTStyleNamed(styleSheet, "Dir");    
1.16      timbl     762: /*  styles[HTML_DLC] =         HTStyleNamed(styleSheet, "GlossaryCompact"); */
1.2       timbl     763:     styles[HTML_ADDRESS]=      HTStyleNamed(styleSheet, "Address");
                    764:     styles[HTML_BLOCKQUOTE]=   HTStyleNamed(styleSheet, "BlockQuote");
                    765:     styles[HTML_PLAINTEXT] =
                    766:     styles[HTML_XMP] =         HTStyleNamed(styleSheet, "Example");
                    767:     styles[HTML_PRE] =         HTStyleNamed(styleSheet, "Preformatted");
                    768:     styles[HTML_LISTING] =     HTStyleNamed(styleSheet, "Listing");
                    769: }
                    770: /*                             P U B L I C
                    771: */
                    772: 
                    773: /*     Structured Object Class
                    774: **     -----------------------
                    775: */
                    776: PUBLIC CONST HTStructuredClass HTMLPresentation = /* As opposed to print etc */
                    777: {              
                    778:        "text/html",
                    779:        HTML_free,
1.14      timbl     780:        HTML_abort,
1.2       timbl     781:        HTML_put_character,     HTML_put_string,  HTML_write,
                    782:        HTML_start_element,     HTML_end_element,
                    783:        HTML_put_entity
                    784: }; 
1.1       timbl     785: 
1.4       timbl     786: 
1.2       timbl     787: /*             New Structured Text object
                    788: **             --------------------------
                    789: **
1.16      timbl     790: **     The structured stream can generate either presentation,
1.4       timbl     791: **     or plain text, or HTML.
1.1       timbl     792: */
1.16      timbl     793: PUBLIC HTStructured* HTML_new ARGS5(
                    794:        HTRequest *,            request,
                    795:        void *,                 param,
                    796:        HTFormat,               input_format,
                    797:        HTFormat,               output_format,
                    798:        HTStream *,             output_stream)
1.1       timbl     799: {
                    800: 
1.4       timbl     801:     HTStructured * me;
                    802:     
1.16      timbl     803:     if (output_format != WWW_PLAINTEXT
                    804:        && output_format != WWW_PRESENT
                    805:        && output_format != HTAtom_for("text/x-c")) {
1.21      luotonen  806:         HTStream * intermediate = HTStreamStack(WWW_HTML, request, NO);
1.6       timbl     807:        if (intermediate) return HTMLGenerator(intermediate);
1.4       timbl     808:         fprintf(stderr, "** Internal error: can't parse HTML to %s\n",
1.16      timbl     809:                        HTAtom_name(output_format));
1.4       timbl     810:        exit (-99);
                    811:     }
                    812: 
                    813:     me = (HTStructured*) malloc(sizeof(*me));
                    814:     if (me == NULL) outofmem(__FILE__, "HTML_new");
1.1       timbl     815: 
                    816:     if (!got_styles) get_styles();
                    817: 
1.4       timbl     818:     me->isa = &HTMLPresentation;
1.16      timbl     819:     me->dtd = &DTD;
                    820:     me->node_anchor =  request->anchor;
1.4       timbl     821:     me->title.size = 0;
                    822:     me->title.growby = 128;
                    823:     me->title.allocated = 0;
                    824:     me->title.data = 0;
                    825:     me->text = 0;
                    826:     me->style_change = YES; /* Force check leading to text creation */
                    827:     me->new_style = default_style;
                    828:     me->old_style = 0;
                    829:     me->sp = me->stack + MAX_NESTING - 1;
                    830:     me->sp->tag_number = -1;                           /* INVALID */
                    831:     me->sp->style = default_style;                     /* INVALID */
1.1       timbl     832:     
1.4       timbl     833:     me->comment_start = NULL;
                    834:     me->comment_end = NULL;
1.16      timbl     835:     me->target = output_stream;
                    836:     if (output_stream) me->targetClass = *output_stream->isa;  /* Copy pointers */
1.1       timbl     837:     
1.4       timbl     838:     return (HTStructured*) me;
1.1       timbl     839: }
                    840: 
                    841: 
1.2       timbl     842: /*     HTConverter for HTML to plain text
                    843: **     ----------------------------------
1.1       timbl     844: **
1.2       timbl     845: **     This will convert from HTML to presentation or plain text.
1.1       timbl     846: */
1.16      timbl     847: PUBLIC HTStream* HTMLToPlain ARGS5(
                    848:        HTRequest *,            request,
                    849:        void *,                 param,
                    850:        HTFormat,               input_format,
                    851:        HTFormat,               output_format,
                    852:        HTStream *,             output_stream)
1.1       timbl     853: {
1.16      timbl     854:     return SGML_new(&DTD, HTML_new(
                    855:        request, NULL, input_format, output_format, output_stream));
1.1       timbl     856: }
                    857: 
                    858: 
1.2       timbl     859: /*     HTConverter for HTML to C code
                    860: **     ------------------------------
                    861: **
                    862: **     C copde is like plain text but all non-preformatted code
                    863: **     is commented out.
                    864: **     This will convert from HTML to presentation or plain text.
                    865: */
1.16      timbl     866: PUBLIC HTStream* HTMLToC ARGS5(
                    867:        HTRequest *,            request,
                    868:        void *,                 param,
                    869:        HTFormat,               input_format,
                    870:        HTFormat,               output_format,
                    871:        HTStream *,             output_stream)
1.1       timbl     872: {
1.4       timbl     873:     
                    874:     HTStructured * html;
                    875:     
1.16      timbl     876:     (*output_stream->isa->put_string)(output_stream, "/* "); /* Before even title */
                    877:     html = HTML_new(request, NULL, input_format, output_format, output_stream);
1.2       timbl     878:     html->comment_start = "/* ";
1.16      timbl     879:     html->dtd = &DTD;
1.2       timbl     880:     html->comment_end = " */\n";       /* Must start in col 1 for cpp */
1.4       timbl     881: /*    HTML_put_string(html,html->comment_start); */
1.16      timbl     882:     return SGML_new(&DTD, html);
1.1       timbl     883: }
                    884: 
                    885: 
1.2       timbl     886: /*     Presenter for HTML
                    887: **     ------------------
                    888: **
                    889: **     This will convert from HTML to presentation or plain text.
                    890: **
                    891: **     Override this if you have a windows version
1.1       timbl     892: */
1.2       timbl     893: #ifndef GUI
1.16      timbl     894: PUBLIC HTStream* HTMLPresent ARGS5(
                    895:        HTRequest *,            request,
                    896:        void *,                 param,
                    897:        HTFormat,               input_format,
                    898:        HTFormat,               output_format,
                    899:        HTStream *,             output_stream)
1.1       timbl     900: {
1.16      timbl     901:     return SGML_new(&DTD, HTML_new(
                    902:        request, NULL, input_format, output_format, output_stream));
1.1       timbl     903: }
1.2       timbl     904: #endif
1.1       timbl     905: 
                    906: 
1.2       timbl     907: /*     Record error message as a hypertext object
                    908: **     ------------------------------------------
                    909: **
                    910: **     The error message should be marked as an error so that
                    911: **     it can be reloaded later.
                    912: **     This implementation just throws up an error message
                    913: **     and leaves the document unloaded.
1.9       timbl     914: **     A smarter implementation would load an error document,
                    915: **     marking at such so that it is retried on reload.
1.1       timbl     916: **
1.2       timbl     917: ** On entry,
                    918: **     sink    is a stream to the output device if any
                    919: **     number  is the HTTP error number
                    920: **     message is the human readable message.
1.9       timbl     921: **
                    922: ** On exit,
                    923: **     returns a negative number to indicate lack of success in the load.
1.1       timbl     924: */
1.2       timbl     925: 
                    926: PUBLIC int HTLoadError ARGS3(
1.17      luotonen  927:        HTRequest *,    req,
1.2       timbl     928:        int,            number,
                    929:        CONST char *,   message)
                    930: {
1.20      frystyk   931:     char *err = "Oh I screwed up!";            /* Dummy pointer not used (I hope) */
1.2       timbl     932:     HTAlert(message);          /* @@@@@@@@@@@@@@@@@@@ */
1.20      frystyk   933:     /* Clean up! Henrik 04/03-94 */
                    934:     if (req && req->output_stream)
                    935:        (*req->output_stream->isa->abort)(req->output_stream, err);
1.25      luotonen  936:     HTClearErrors(req);
1.2       timbl     937:     return -number;
                    938: } 
                    939: 
1.28      frystyk   940: 
                    941: /* ------------------------------------------------------------------------- */
                    942: /* NOTE: THIS FUNCTION IS PLACED HEER AS THE HTML.C MODULE NORMALLY GETS
1.30      luotonen  943: **       OVERRIDDEN BY THE CLIENT OR SERVER
1.28      frystyk   944: **                                                             HTErrorMsg
                    945: **
1.32    ! frystyk   946: **     Default function that creates an error message using HTAlert() to
        !           947: **     put out the contents of the error_stack messages. Furthermore, the
        !           948: **     error_info structure contains a name of a help file that might be put
        !           949: **     up as a link. This file can then be multi-linguistic.
        !           950: **
1.28      frystyk   951: **     This function might be overwritten by a smart server or client.
                    952: */
                    953: PUBLIC void HTErrorMsg ARGS1(HTRequest *, request)
                    954: {
                    955:     HTList *cur = request->error_stack;
                    956:     BOOL highest = YES;
1.32    ! frystyk   957:     HTChunk *chunk;
1.28      frystyk   958:     HTErrorInfo *pres;
                    959:     if (!request) {
                    960:        if (TRACE) fprintf(stderr, "HTErrorMsg.. Bad argument!\n");
                    961:        return;
                    962:     }
1.32    ! frystyk   963: 
        !           964:     /* This check is only necessary if the error message is put down the
        !           965:        stream, because we have to know if a stream has been put up and/or
        !           966:        taken down again. Here it is only put as an example */
1.28      frystyk   967:     if (request->error_block) {
                    968:        if (TRACE) fprintf(stderr, "HTErrorMsg.. Errors are not printed as no stream is available.\n");
                    969:        return;
                    970:     }
                    971: 
                    972:     /* Output messages */
1.32    ! frystyk   973:     chunk = HTChunkCreate(128);
1.28      frystyk   974:     while ((pres = (HTErrorInfo *) HTList_nextObject(cur))) {
                    975: 
                    976:        /* Check if we are going to show the message */
                    977:        if ((!pres->ignore || HTErrorShowMask & HT_ERR_SHOW_IGNORE) && 
                    978:            (HTErrorShowMask & pres->severity)) {
                    979: 
                    980:            /* Output code number */
                    981:            if (highest) {                          /* If first time through */
                    982:                if (TRACE)
                    983:                    fprintf(stderr,
                    984:                            "HTError..... Generating error message.\n");
                    985:                
                    986:                /* Output title */
                    987:                if (pres->severity == ERR_WARNING)
1.32    ! frystyk   988:                    HTChunkPuts(chunk, "Warning ");
1.28      frystyk   989:                else if (pres->severity == ERR_NON_FATAL)
1.32    ! frystyk   990:                    HTChunkPuts(chunk, "Non Fatal Error ");
1.28      frystyk   991:                else if (pres->severity == ERR_FATAL)
1.32    ! frystyk   992:                    HTChunkPuts(chunk, "Fatal Error ");
1.28      frystyk   993:                else {
1.32    ! frystyk   994:                    if (TRACE)
        !           995:                        fprintf(stderr, "HTError..... Unknown Classification of Error (%d)...\n", pres->severity);
        !           996:                    HTChunkFree(chunk);
1.28      frystyk   997:                    return;
                    998:                }
                    999: 
                   1000:                /* Only output error code if it is a real HTTP code */
1.32    ! frystyk  1001:                if (pres->element < HTERR_HTTP_CODES_END) {
        !          1002:                    char codestr[10];
        !          1003:                    sprintf(codestr, "%d ", error_info[pres->element].code);
        !          1004:                    HTChunkPuts(chunk, codestr);
        !          1005:                }
1.28      frystyk  1006:                highest = NO;
                   1007:            } else
1.32    ! frystyk  1008:                HTChunkPuts(chunk, "\nReason: ");
1.28      frystyk  1009: 
                   1010:            /* Output error message */
1.32    ! frystyk  1011:            if (pres->element != HTERR_SYSTEM) {
        !          1012:                HTChunkPuts(chunk, error_info[pres->element].msg);
        !          1013:                HTChunkPutc(chunk, ' ');
        !          1014:            }
1.28      frystyk  1015: 
                   1016:            /* Output parameters */
                   1017:            if (pres->par && HTErrorShowMask & HT_ERR_SHOW_PARS) {
                   1018:                int cnt;
1.32    ! frystyk  1019:                char ch;
1.28      frystyk  1020:                for (cnt=0; cnt<pres->par_length; cnt++) {
1.32    ! frystyk  1021:                    ch = *((char *)(pres->par)+cnt);
        !          1022:                    if (ch < 0x20 || ch >= 0x7F)
        !          1023:                        HTChunkPutc(chunk, '#'); /* Can't print real content */
1.28      frystyk  1024:                    else
1.32    ! frystyk  1025:                        HTChunkPutc(chunk, ch);
1.28      frystyk  1026:                }
                   1027:            }
                   1028: 
                   1029:            /* Output location */
                   1030:            if (pres->where && HTErrorShowMask & HT_ERR_SHOW_LOCATION) {
1.32    ! frystyk  1031:                HTChunkPuts(chunk, "This occured in ");
        !          1032:                HTChunkPuts(chunk, pres->where);
        !          1033:                HTChunkPutc(chunk, '\n');
1.28      frystyk  1034:            }
                   1035:            
                   1036:            /* If we only are going to show the higest entry */
                   1037:            if (HTErrorShowMask & HT_ERR_SHOW_FIRST)
                   1038:                break;
                   1039:        }
                   1040:     }
1.32    ! frystyk  1041:     HTChunkPutc(chunk,  '\n');
        !          1042:     HTChunkTerminate(chunk);
        !          1043:     HTAlert(chunk->data);
        !          1044:     HTChunkFree(chunk);
1.28      frystyk  1045:     return;
                   1046: }
1.29      frystyk  1047: 
                   1048: 
                   1049: 

Webmaster