Annotation of libwww/Library/src/HTMLGen.c, revision 2.27.2.1

2.25      frystyk     1: /*                                                                   HTMLGen.c
                      2: **     HTML GENERATOR
                      3: **
                      4: **     (c) COPYRIGHT CERN 1994.
                      5: **     Please first read the full copyright statement in the file COPYRIGH.
2.1       timbl       6: **
                      7: **     This version of the HTML object sends HTML markup to the output stream.
                      8: **
                      9: ** Bugs:       Line wrapping is not done at all.
                     10: **             All data handled as PCDATA.
                     11: **             Should convert old XMP, LISTING and PLAINTEXT to PRE.
                     12: **
                     13: **     It is not obvious to me right now whether the HEAD should be generated
2.7       timbl      14: **     from the incomming data or the anchor.  Currently it is from the former
2.17      timbl      15: **     which is cleanest. TBL
2.22      duns       16: **
                     17: ** HISTORY:
                     18: **      8 Jul 94  FM   Insulate free() from _free structure element.
                     19: **
2.1       timbl      20: */
                     21: 
2.27      frystyk    22: /* Library include files */
                     23: #include "tcp.h"
                     24: #include "HTUtils.h"
2.12      timbl      25: #include "HTMLPDTD.h"
2.1       timbl      26: #include "HTStream.h"
                     27: #include "SGML.h"
                     28: #include "HTFormat.h"
2.23      frystyk    29: #include "HTMLGen.h"                                    /* Implemented here */
                     30: 
                     31: #define BUFFER_SIZE    80      /* Line buffer attempts to make neat breaks */
2.1       timbl      32: 
2.3       timbl      33: #define PUTC(c) (*me->targetClass.put_character)(me->target, c)
2.7       timbl      34: /* #define PUTS(s) (*me->targetClass.put_string)(me->target, s) */
2.4       timbl      35: #define PUTB(s,l) (*me->targetClass.put_block)(me->target, s, l)
2.1       timbl      36: 
                     37: /*             HTML Object
                     38: **             -----------
                     39: */
                     40: 
                     41: struct _HTStream {
                     42:        CONST HTStreamClass *           isa;    
                     43:        HTStream *                      target;
                     44:        HTStreamClass                   targetClass;    /* COPY for speed */
                     45: };
                     46: 
2.21      timbl      47: #define MAX_CLEANNESS 10
2.1       timbl      48: struct _HTStructured {
                     49:        CONST HTStructuredClass *       isa;
                     50:        HTStream *                      target;
                     51:        HTStreamClass                   targetClass;    /* COPY for speed */
2.12      timbl      52:        CONST SGML_dtd *                dtd;
2.17      timbl      53:        BOOL                            seven_bit;      /* restrict output*/
2.7       timbl      54:        
2.14      frystyk    55:        char                            buffer[BUFFER_SIZE+1];
2.7       timbl      56:        char *                          write_pointer;
2.21      timbl      57:        char *                          line_break [MAX_CLEANNESS+1];
2.7       timbl      58:        int                             cleanness;
2.21      timbl      59:        BOOL                            overflowed;
                     60:        BOOL                            delete_line_break_char
                     61:                                                [MAX_CLEANNESS+1];
2.14      frystyk    62:        char                            preformatted;
2.1       timbl      63: };
                     64: 
2.17      timbl      65: /*                     OUTPUT FUNCTIONS
                     66: **
                     67: **     These function output the finished SGML stream doing the
                     68: **     line wrap
                     69: */
                     70: 
2.7       timbl      71: /*     Flush Buffer
                     72: **     ------------
                     73: */
2.21      timbl      74: 
                     75: PRIVATE void flush_breaks ARGS1(HTStructured *, me)
                     76: {
                     77:     int i;
                     78:     for (i=0; i<= MAX_CLEANNESS; i++) {
                     79:         me->line_break[i] = NULL;
                     80:     }
                     81: }
                     82: 
                     83: 
2.27.2.1! frystyk    84: PRIVATE int HTMLGen_flush ARGS1(HTStructured *, me)
2.7       timbl      85: {
                     86:     (*me->targetClass.put_block)(me->target, 
                     87:                                me->buffer,
                     88:                                me->write_pointer - me->buffer);
                     89:     me->write_pointer = me->buffer;
2.21      timbl      90:     flush_breaks(me);
2.7       timbl      91:     me->cleanness = 0;
2.27.2.1! frystyk    92:     return HT_OK;
2.21      timbl      93: }
                     94: 
                     95: 
                     96: /*     Weighted optional line break
                     97: **
                     98: **     We keep track of all the breaks for when we chop the line
                     99: */
                    100: 
                    101: PRIVATE void allow_break ARGS3(HTStructured *, me, int, new_cleanness,
                    102:                BOOL, dlbc)
                    103: {
                    104:     me->line_break[new_cleanness] = 
                    105:                         dlbc ? me->write_pointer - 1 /* Point to space */
                    106:                              : me->write_pointer ;   /* point to gap */
                    107:     me->delete_line_break_char[new_cleanness] = dlbc;
                    108:     if (new_cleanness >= me->cleanness)
                    109:        me->cleanness = new_cleanness;
2.7       timbl     110: }
                    111: 
                    112: 
2.1       timbl     113: /*     Character handling
                    114: **     ------------------
2.8       timbl     115: **
                    116: **     The tricky bits are the line break handling.  This attempts
                    117: **     to synchrononise line breaks on sentence or phrase ends. This
                    118: **     is important if one stores SGML files in a line-oriented code
                    119: **     repository, so that if a small change is made, line ends don't
                    120: **     shift in a ripple-through to apparently change a large part of the
                    121: **     file. We give extra "cleanness" to spaces appearing directly
                    122: **     after periods (full stops), [semi]colons and commas.
                    123: **        This should make the source files easier to read and modify
2.17      timbl     124: **     by hand, too, though this is not a primary design consideration. TBL
2.1       timbl     125: */
2.21      timbl     126: PRIVATE char delims[] = ",;:.";                /* @@ english bias */
2.27.2.1! frystyk   127: PRIVATE int HTMLGen_output_character ARGS2(HTStructured *, me, char, c)
2.1       timbl     128: {
2.7       timbl     129: 
                    130:     *me->write_pointer++ = c;
                    131:     
2.21      timbl     132:     if (c=='\n') {             /* Newlines */
                    133:         if (me->preformatted) {
                    134:            HTMLGen_flush(me);
2.27.2.1! frystyk   135:            return HT_OK;
2.21      timbl     136:        } else {
                    137:            me->write_pointer[-1] = c = ' ';    /* Treat same as space */
                    138:        }
2.7       timbl     139:     }
                    140:     
2.21      timbl     141:     /* Figure our whether we can break at this point
                    142:     */
2.7       timbl     143:     if ((!me->preformatted  && c==' ')) {
2.8       timbl     144:         int new_cleanness = 1;
                    145:        if (me->write_pointer > (me->buffer + 1)) {
2.9       luotonen  146:            char * p;
2.11      timbl     147:            p = strchr(delims, me->write_pointer[-2]);
2.21      timbl     148:            if (p) new_cleanness = p - delims + 4;
2.8       timbl     149:        }
2.21      timbl     150:        allow_break(me, new_cleanness, YES);
2.7       timbl     151:     }
                    152:     
2.21      timbl     153:     /* Flush buffer out when full, or whenever the line is over
                    154:        the nominal maximum and we can break at all
                    155:     */
                    156:     if (me->write_pointer >= me->buffer + BUFFER_SIZE-1
                    157:         ||  (me->overflowed && me->cleanness)) {
                    158:        if (me->cleanness) {
                    159:            char line_break_char = me->line_break[me->cleanness][0];
                    160:            char * saved = me->line_break[me->cleanness];
2.8       timbl     161:            
2.21      timbl     162:            if (me->delete_line_break_char[me->cleanness]) saved++; 
                    163:            me->line_break[me->cleanness][0] = '\n';
2.7       timbl     164:            (*me->targetClass.put_block)(me->target,
                    165:                                        me->buffer,
2.21      timbl     166:                                        me->line_break[me->cleanness] - me->buffer + 1);
                    167:            me->line_break[me->cleanness][0] = line_break_char;
2.7       timbl     168:            {  /* move next line in */
2.8       timbl     169:                char * p=saved;
                    170:                char *q;
                    171:                for(q=me->buffer; p < me->write_pointer; )
2.7       timbl     172:                        *q++ = *p++;
                    173:            }
                    174:            me->cleanness = 0;
2.21      timbl     175:            /* Now we have to check whether ther are any perfectly good breaks
                    176:            ** which weren't good enough for the last line but may be
                    177:            **  good enough for the next
                    178:            */
                    179:            {
                    180:                int i;
                    181:                for(i=0; i <= MAX_CLEANNESS; i++) {
                    182:                    if (me->line_break[i] > saved) {
                    183:                        me->line_break[i] = me->line_break[i] -
                    184:                                                (saved-me->buffer);
                    185:                        me->cleanness = i;
                    186:                    } else {
                    187:                        me->line_break[i] = NULL;
                    188:                    }
                    189:                }
                    190:            }
                    191: 
2.8       timbl     192:            me->write_pointer = me->write_pointer - (saved-me->buffer);
2.21      timbl     193:            me->overflowed = NO;
                    194:        } else {   /* No break- just output with no newline */
2.7       timbl     195:            (*me->targetClass.put_block)(me->target,
2.14      frystyk   196:                                         me->buffer,
2.15      luotonen  197:                                         me->write_pointer - me->buffer);
2.8       timbl     198:            me->write_pointer = me->buffer;
2.21      timbl     199:            flush_breaks(me);
                    200:            me->overflowed = YES;
2.7       timbl     201:        }
                    202:     }
2.27.2.1! frystyk   203:     return HT_OK;
2.1       timbl     204: }
                    205: 
                    206: 
                    207: /*     String handling
                    208: **     ---------------
                    209: */
2.27.2.1! frystyk   210: PRIVATE int HTMLGen_output_string ARGS2(HTStructured *, me, CONST char*, s)
2.17      timbl     211: {
2.27.2.1! frystyk   212:     while (*s)
        !           213:        HTMLGen_output_character(me, *s++);
        !           214:     return HT_OK;
2.17      timbl     215: }
                    216: 
                    217: 
                    218: /*                     INPUT FUNCTIONS
                    219: **
                    220: **     These take data from the structured stream.  In the input
                    221: **     stream, entities are in raw form.  The seven_bit flag controls
                    222: **     whether the ISO Latin-1 charactrs are represented in SGML entity
                    223: **     form.  This is only recommended for viewing on older non-latin-1
                    224: **     capable equipment, or for mailing for example. 
                    225: **
                    226: ** Bug: assumes local encoding is ISO!
                    227: */     
2.27.2.1! frystyk   228: PRIVATE int HTMLGen_put_character ARGS2(HTStructured *, me, char, c)
2.17      timbl     229: {
                    230:     if (c=='&') HTMLGen_output_string(me, "&amp;");
                    231:     else if (c=='<') HTMLGen_output_string(me, "&lt;");
                    232:     else if (me->seven_bit && ((unsigned char)c > 127)) {
                    233:         char temp[8];
                    234:        sprintf(temp, "&%d;", c);
                    235:        HTMLGen_output_string(me, temp);
2.27.2.1! frystyk   236:     } else
        !           237:        HTMLGen_output_character(me, c);
        !           238:     return HT_OK;
2.17      timbl     239: }
                    240: 
2.27.2.1! frystyk   241: PRIVATE int HTMLGen_put_string ARGS2(HTStructured *, me, CONST char*, s)
2.1       timbl     242: {
2.27.2.1! frystyk   243:     while (*s)
        !           244:        HTMLGen_put_character(me, *s++);
        !           245:     return HT_OK;
2.1       timbl     246: }
                    247: 
2.27.2.1! frystyk   248: PRIVATE int HTMLGen_write ARGS3(HTStructured *, me, CONST char*, b, int, l)
2.1       timbl     249: {
2.27.2.1! frystyk   250:     while (l-- > 0)
        !           251:        HTMLGen_put_character(me, *b++);
        !           252:     return HT_OK;
2.1       timbl     253: }
                    254: 
                    255: 
                    256: /*     Start Element
                    257: **     -------------
2.7       timbl     258: **
                    259: **     Within the opening tag, there may be spaces
                    260: **     and the line may be broken at these spaces.
2.1       timbl     261: */
                    262: PRIVATE void HTMLGen_start_element ARGS4(
2.3       timbl     263:        HTStructured *,         me,
2.2       timbl     264:        int,                    element_number,
                    265:        CONST BOOL*,            present,
                    266:        CONST char **,          value)
2.1       timbl     267: {
                    268:     int i;
2.12      timbl     269:     HTTag * tag = &me->dtd->tags[element_number];
2.1       timbl     270: 
2.20      timbl     271:     /* Control line breaks allowed within tag! */
                    272:     int was_preformatted = me->preformatted;   /* save state */
                    273:     me->preformatted = 1;      /* Can break between attributes */
                    274: 
2.17      timbl     275:     HTMLGen_output_character(me, '<');
                    276:     HTMLGen_output_string(me, tag->name);
2.1       timbl     277:     if (present) for (i=0; i< tag->number_of_attributes; i++) {
                    278:         if (present[i]) {
2.17      timbl     279:            HTMLGen_output_character(me, ' ');
2.21      timbl     280:            allow_break(me, 1, YES);
2.17      timbl     281:            HTMLGen_output_string(me, tag->attributes[i].name);
2.1       timbl     282:            if (value[i]) {
2.17      timbl     283:                HTMLGen_output_string(me, "=\"");
                    284:                HTMLGen_output_string(me, value[i]);
                    285:                HTMLGen_output_character(me, '"');
2.1       timbl     286:            }
                    287:        }
                    288:     }
2.20      timbl     289:     me->preformatted = was_preformatted;       /* Restore state */
                    290: 
2.14      frystyk   291:     /* Nested PRE is no more a problem! */
                    292:     if (element_number == HTML_PRE)
                    293:        me->preformatted++;
2.19      timbl     294: 
                    295:     HTMLGen_output_character(me, '>');
2.7       timbl     296:     
2.20      timbl     297:     /* Here is a funny one.  In PRE, newlines are significant, except of
                    298:     course for one after the <PRE> which is ignored. This means that
                    299:     we MUST put in a dummy one after the <PRE> to protect any real newline
                    300:     within the pre section.
                    301:     
                    302:     However, *within* a PRE section, although we can break after
                    303:     (for example) emphasis start tags, it will probably confuse some
                    304:     parsers so we won't.*/
                    305:     
                    306:     if (element_number == HTML_PRE) {
                    307:         HTMLGen_output_character(me, '\n');
                    308:     } else  if (!me->preformatted && 
                    309:         tag->contents != SGML_EMPTY) {  /* can break after element start */ 
2.21      timbl     310:        allow_break(me, 3, NO);
2.8       timbl     311:     }
2.1       timbl     312: }
                    313: 
                    314: 
2.17      timbl     315: /*     End Element
                    316: **     -----------
2.1       timbl     317: **
2.16      timbl     318: **      The rules for insertring CR LF into SGML are weird, strict, and
                    319: **     nonintitive.
2.20      timbl     320: **     See comment also about PRE above.
2.1       timbl     321: */
2.3       timbl     322: PRIVATE void HTMLGen_end_element ARGS2(HTStructured *, me,
2.24      frystyk   323:                                      int , element_number)
2.1       timbl     324: {
2.20      timbl     325:     if (element_number == HTML_PRE) {
                    326:         HTMLGen_output_character(me, '\n');
                    327:     } else  if (!me->preformatted) { /* can break before element end */ 
2.21      timbl     328:        allow_break(me, 1, NO);
2.8       timbl     329:     }
2.17      timbl     330:     HTMLGen_output_string(me, "</");
                    331:     HTMLGen_output_string(me, me->dtd->tags[element_number].name);
                    332:     HTMLGen_output_character(me, '>');    /* NO break after. TBL 940501 */
2.14      frystyk   333:     if (element_number == HTML_PRE && me->preformatted)
                    334:        me->preformatted--;
2.1       timbl     335: }
                    336: 
                    337: 
2.17      timbl     338: /*     Expanding entities
                    339: **     ------------------
2.1       timbl     340: **
                    341: */
                    342: 
2.3       timbl     343: PRIVATE void HTMLGen_put_entity ARGS2(HTStructured *, me, int, entity_number)
2.1       timbl     344: {
2.17      timbl     345:     HTMLGen_output_character(me, '&');
                    346:     HTMLGen_output_string(me, me->dtd->entity_names[entity_number]);
                    347:     HTMLGen_output_character(me, ';');
2.1       timbl     348: }
                    349: 
2.17      timbl     350: /*     Free an object
                    351: **     --------------
2.1       timbl     352: **
                    353: */
2.24      frystyk   354: PRIVATE int HTMLGen_free ARGS1(HTStructured *, me)
2.1       timbl     355: {
2.21      timbl     356:     HTMLGen_flush(me);
2.7       timbl     357:     (*me->targetClass.put_character)(me->target, '\n');
2.22      duns      358:     (*me->targetClass._free)(me->target);      /* ripple through */
2.3       timbl     359:     free(me);
2.27.2.1! frystyk   360:     return HT_OK;
2.1       timbl     361: }
                    362: 
                    363: 
2.24      frystyk   364: PRIVATE int PlainToHTML_free ARGS1(HTStructured *, me)
2.7       timbl     365: {
                    366:     HTMLGen_end_element(me, HTML_PRE);
                    367:     HTMLGen_end_element(me, HTML_BODY);
                    368:     HTMLGen_end_element(me, HTML_HTML);
                    369:     HTMLGen_free(me);
2.27.2.1! frystyk   370:     return HT_OK;
2.7       timbl     371: }
                    372: 
                    373: 
2.1       timbl     374: 
2.24      frystyk   375: PRIVATE int HTMLGen_abort ARGS2(HTStructured *, me, HTError, e)
2.1       timbl     376: {
2.6       timbl     377:     HTMLGen_free(me);
2.27.2.1! frystyk   378:     return HT_ERROR;
2.1       timbl     379: }
                    380: 
                    381: 
2.24      frystyk   382: PRIVATE int PlainToHTML_abort ARGS2(HTStructured *, me, HTError, e)
2.1       timbl     383: {
2.7       timbl     384:     PlainToHTML_free(me);
2.27.2.1! frystyk   385:     return HT_ERROR;
2.1       timbl     386: }
                    387: 
                    388: 
                    389: 
                    390: /*     Structured Object Class
                    391: **     -----------------------
                    392: */
2.5       timbl     393: PRIVATE CONST HTStructuredClass HTMLGeneration = /* As opposed to print etc */
2.1       timbl     394: {              
                    395:        "text/html",
2.27.2.1! frystyk   396:        HTMLGen_flush,
2.1       timbl     397:        HTMLGen_free,
2.6       timbl     398:        HTMLGen_abort,
2.1       timbl     399:        HTMLGen_put_character,  HTMLGen_put_string, HTMLGen_write,
2.13      frystyk   400:        HTMLGen_start_element,  HTMLGen_end_element,
2.1       timbl     401:        HTMLGen_put_entity
                    402: }; 
                    403: 
                    404: 
                    405: /*     Subclass-specific Methods
                    406: **     -------------------------
                    407: */
                    408: 
                    409: PUBLIC HTStructured * HTMLGenerator ARGS1(HTStream *, output)
                    410: {
2.18      luotonen  411:     HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3       timbl     412:     if (me == NULL) outofmem(__FILE__, "HTMLGenerator");
                    413:     me->isa = &HTMLGeneration;       
2.12      timbl     414:     me->dtd = &HTMLP_dtd;
2.1       timbl     415: 
2.3       timbl     416:     me->target = output;
                    417:     me->targetClass = *me->target->isa; /* Copy pointers to routines for speed*/
2.7       timbl     418:     
                    419:     me->write_pointer = me->buffer;
2.21      timbl     420:     flush_breaks(me);
2.3       timbl     421:     return me;
2.1       timbl     422: }
                    423: 
                    424: /*     Stream Object Class
                    425: **     -------------------
                    426: **
2.2       timbl     427: **     This object just converts a plain text stream into HTML
2.12      timbl     428: **     It is officially a structured stream but only the stream bits exist.
2.2       timbl     429: **     This is just the easiest way of typecasting all the routines.
2.1       timbl     430: */
2.2       timbl     431: PRIVATE CONST HTStructuredClass PlainToHTMLConversion =
2.1       timbl     432: {              
                    433:        "plaintexttoHTML",
2.27.2.1! frystyk   434:        HTMLGen_flush,
2.13      frystyk   435:        PlainToHTML_free,       /* HTMLGen_free,  Henrik 03/03-94 */
2.6       timbl     436:        PlainToHTML_abort,      
2.1       timbl     437:        HTMLGen_put_character,
                    438:        HTMLGen_put_string,
                    439:        HTMLGen_write,
2.2       timbl     440:        NULL,           /* Structured stuff */
                    441:        NULL,
                    442:        NULL
2.1       timbl     443: }; 
                    444: 
                    445: 
                    446: /*     HTConverter from plain text to HTML Stream
                    447: **     ------------------------------------------
2.13      frystyk   448: **
                    449: ** Changed by henrik 03/03-94, so no more core dumps etc. (I hope!!!)
2.1       timbl     450: */
                    451: 
2.12      timbl     452: PUBLIC HTStream* HTPlainToHTML ARGS5(
                    453:        HTRequest *,            request,
                    454:        void *,                 param,
                    455:        HTFormat,               input_format,
                    456:        HTFormat,               output_format,
                    457:        HTStream *,             output_stream)
2.1       timbl     458: {
2.13      frystyk   459:     BOOL present[MAX_ATTRIBUTES];      /* Flags: attribute is present? */
                    460:     CONST char *value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
2.18      luotonen  461:     HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3       timbl     462:     if (me == NULL) outofmem(__FILE__, "PlainToHTML");
2.13      frystyk   463:     
                    464:     memset(present, '\0', MAX_ATTRIBUTES);
2.27      frystyk   465:     memset((char *) value, '\0', MAX_ATTRIBUTES*sizeof(char *));
2.13      frystyk   466:     
                    467:     me->isa = (HTStructuredClass*) &PlainToHTMLConversion;
2.12      timbl     468:     me->dtd = &HTMLP_dtd;
                    469:     me->target = output_stream;
2.13      frystyk   470:     me->targetClass = *me->target->isa;/* Copy pointers to routines for speed*/
                    471:     me->write_pointer = me->buffer;
2.21      timbl     472:     flush_breaks(me);
2.13      frystyk   473:     
                    474:     HTMLGen_start_element(me, HTML_HTML, present, value);
                    475:     HTMLGen_start_element(me, HTML_BODY, present, value);
                    476:     HTMLGen_start_element(me, HTML_PRE, present, value);
                    477: 
2.7       timbl     478:     return (HTStream*) me;
2.1       timbl     479: }
2.13      frystyk   480: 
                    481: 
2.17      timbl     482: /*     A safe version for making 7-bit restricted HTML
                    483: **     Beware that thsi makes it horrible for the Scandinavians
                    484: **     to actually read it.
                    485: */
                    486: 
                    487: PUBLIC HTStream* HTPlainTo7BitHTML ARGS5(
                    488:        HTRequest *,            request,
                    489:        void *,                 param,
                    490:        HTFormat,               input_format,
                    491:        HTFormat,               output_format,
                    492:        HTStream *,             output_stream)
2.13      frystyk   493: 
2.17      timbl     494: {
                    495:     HTStream* me = HTPlainToHTML(request,param,input_format,
                    496:                output_format, output_stream);
                    497:     ((HTStructured*)me)->seven_bit = YES;
                    498:     return me;
                    499: }
2.1       timbl     500: 

Webmaster