Annotation of libwww/Library/src/HTMLGen.c, revision 2.31

2.25      frystyk     1: /*                                                                   HTMLGen.c
                      2: **     HTML GENERATOR
                      3: **
2.29      frystyk     4: **     (c) COPYRIGHT MIT 1995.
2.25      frystyk     5: **     Please first read the full copyright statement in the file COPYRIGH.
2.1       timbl       6: **
                      7: **     This version of the HTML object sends HTML markup to the output stream.
                      8: **
                      9: ** Bugs:       Line wrapping is not done at all.
                     10: **             All data handled as PCDATA.
                     11: **             Should convert old XMP, LISTING and PLAINTEXT to PRE.
                     12: **
                     13: **     It is not obvious to me right now whether the HEAD should be generated
2.7       timbl      14: **     from the incomming data or the anchor.  Currently it is from the former
2.17      timbl      15: **     which is cleanest. TBL
2.22      duns       16: **
                     17: ** HISTORY:
                     18: **      8 Jul 94  FM   Insulate free() from _free structure element.
                     19: **
2.1       timbl      20: */
                     21: 
2.27      frystyk    22: /* Library include files */
                     23: #include "tcp.h"
                     24: #include "HTUtils.h"
2.12      timbl      25: #include "HTMLPDTD.h"
2.31    ! frystyk    26: #include "HTStruct.h"
2.1       timbl      27: #include "HTFormat.h"
2.31    ! frystyk    28: #include "HTFWrite.h"
2.23      frystyk    29: #include "HTMLGen.h"                                    /* Implemented here */
                     30: 
                     31: #define BUFFER_SIZE    80      /* Line buffer attempts to make neat breaks */
2.31    ! frystyk    32: #define MAX_CLEANNESS  10
2.1       timbl      33: 
2.31    ! frystyk    34: #define PUT_CHAR(c)    (*me->target->isa->put_character)(me->target, c)
        !            35: #define PUT_STR(s)     (*me->target->isa->put_string)(me->target, s)
        !            36: #define PUT_BLOCK(s,l) (*me->target->isa->put_block)(me->target, s, l)
2.1       timbl      37: 
2.31    ! frystyk    38: /* HTML Generator Object */
2.1       timbl      39: struct _HTStream {
2.31    ! frystyk    40:     CONST HTStreamClass *      isa;
        !            41:     HTStream *                 target;
2.1       timbl      42: };
                     43: 
                     44: struct _HTStructured {
2.31    ! frystyk    45:     CONST HTStructuredClass *  isa;
        !            46:     HTStream *                         target;
        !            47:     CONST SGML_dtd *           dtd;
        !            48:     BOOL                       seven_bit;                /* restrict output */
2.7       timbl      49:        
2.31    ! frystyk    50:     char                       buffer[BUFFER_SIZE+1];
        !            51:     char *                     write_pointer;
        !            52:     char *                     line_break [MAX_CLEANNESS+1];
        !            53:     int                                cleanness;
        !            54:     BOOL                       overflowed;
        !            55:     BOOL                       delete_line_break_char[MAX_CLEANNESS+1];
        !            56:     char                       preformatted;
2.1       timbl      57: };
                     58: 
2.17      timbl      59: /*                     OUTPUT FUNCTIONS
                     60: **
                     61: **     These function output the finished SGML stream doing the
                     62: **     line wrap
                     63: */
                     64: 
2.7       timbl      65: /*     Flush Buffer
                     66: **     ------------
                     67: */
2.21      timbl      68: 
                     69: PRIVATE void flush_breaks ARGS1(HTStructured *, me)
                     70: {
                     71:     int i;
                     72:     for (i=0; i<= MAX_CLEANNESS; i++) {
                     73:         me->line_break[i] = NULL;
                     74:     }
                     75: }
                     76: 
                     77: 
2.28      frystyk    78: PRIVATE int HTMLGen_flush ARGS1(HTStructured *, me)
2.7       timbl      79: {
2.31    ! frystyk    80:     PUT_BLOCK(me->buffer, me->write_pointer - me->buffer);
2.7       timbl      81:     me->write_pointer = me->buffer;
2.21      timbl      82:     flush_breaks(me);
2.7       timbl      83:     me->cleanness = 0;
2.28      frystyk    84:     return HT_OK;
2.21      timbl      85: }
                     86: 
                     87: 
                     88: /*     Weighted optional line break
                     89: **
                     90: **     We keep track of all the breaks for when we chop the line
                     91: */
                     92: 
                     93: PRIVATE void allow_break ARGS3(HTStructured *, me, int, new_cleanness,
                     94:                BOOL, dlbc)
                     95: {
                     96:     me->line_break[new_cleanness] = 
                     97:                         dlbc ? me->write_pointer - 1 /* Point to space */
                     98:                              : me->write_pointer ;   /* point to gap */
                     99:     me->delete_line_break_char[new_cleanness] = dlbc;
                    100:     if (new_cleanness >= me->cleanness)
                    101:        me->cleanness = new_cleanness;
2.7       timbl     102: }
                    103: 
                    104: 
2.1       timbl     105: /*     Character handling
                    106: **     ------------------
2.8       timbl     107: **
                    108: **     The tricky bits are the line break handling.  This attempts
                    109: **     to synchrononise line breaks on sentence or phrase ends. This
                    110: **     is important if one stores SGML files in a line-oriented code
                    111: **     repository, so that if a small change is made, line ends don't
                    112: **     shift in a ripple-through to apparently change a large part of the
                    113: **     file. We give extra "cleanness" to spaces appearing directly
                    114: **     after periods (full stops), [semi]colons and commas.
                    115: **        This should make the source files easier to read and modify
2.17      timbl     116: **     by hand, too, though this is not a primary design consideration. TBL
2.1       timbl     117: */
2.21      timbl     118: PRIVATE char delims[] = ",;:.";                /* @@ english bias */
2.28      frystyk   119: PRIVATE int HTMLGen_output_character ARGS2(HTStructured *, me, char, c)
2.1       timbl     120: {
2.7       timbl     121: 
                    122:     *me->write_pointer++ = c;
                    123:     
2.21      timbl     124:     if (c=='\n') {             /* Newlines */
                    125:         if (me->preformatted) {
                    126:            HTMLGen_flush(me);
2.28      frystyk   127:            return HT_OK;
2.21      timbl     128:        } else {
                    129:            me->write_pointer[-1] = c = ' ';    /* Treat same as space */
                    130:        }
2.7       timbl     131:     }
                    132:     
2.21      timbl     133:     /* Figure our whether we can break at this point
                    134:     */
2.7       timbl     135:     if ((!me->preformatted  && c==' ')) {
2.8       timbl     136:         int new_cleanness = 1;
                    137:        if (me->write_pointer > (me->buffer + 1)) {
2.9       luotonen  138:            char * p;
2.11      timbl     139:            p = strchr(delims, me->write_pointer[-2]);
2.21      timbl     140:            if (p) new_cleanness = p - delims + 4;
2.8       timbl     141:        }
2.21      timbl     142:        allow_break(me, new_cleanness, YES);
2.7       timbl     143:     }
                    144:     
2.21      timbl     145:     /* Flush buffer out when full, or whenever the line is over
                    146:        the nominal maximum and we can break at all
                    147:     */
                    148:     if (me->write_pointer >= me->buffer + BUFFER_SIZE-1
                    149:         ||  (me->overflowed && me->cleanness)) {
                    150:        if (me->cleanness) {
                    151:            char line_break_char = me->line_break[me->cleanness][0];
                    152:            char * saved = me->line_break[me->cleanness];
2.8       timbl     153:            
2.21      timbl     154:            if (me->delete_line_break_char[me->cleanness]) saved++; 
                    155:            me->line_break[me->cleanness][0] = '\n';
2.31    ! frystyk   156:            PUT_BLOCK(me->buffer, me->line_break[me->cleanness]-me->buffer+1);
2.21      timbl     157:            me->line_break[me->cleanness][0] = line_break_char;
2.7       timbl     158:            {  /* move next line in */
2.8       timbl     159:                char * p=saved;
                    160:                char *q;
                    161:                for(q=me->buffer; p < me->write_pointer; )
2.7       timbl     162:                        *q++ = *p++;
                    163:            }
                    164:            me->cleanness = 0;
2.21      timbl     165:            /* Now we have to check whether ther are any perfectly good breaks
                    166:            ** which weren't good enough for the last line but may be
                    167:            **  good enough for the next
                    168:            */
                    169:            {
                    170:                int i;
                    171:                for(i=0; i <= MAX_CLEANNESS; i++) {
                    172:                    if (me->line_break[i] > saved) {
                    173:                        me->line_break[i] = me->line_break[i] -
                    174:                                                (saved-me->buffer);
                    175:                        me->cleanness = i;
                    176:                    } else {
                    177:                        me->line_break[i] = NULL;
                    178:                    }
                    179:                }
                    180:            }
                    181: 
2.8       timbl     182:            me->write_pointer = me->write_pointer - (saved-me->buffer);
2.21      timbl     183:            me->overflowed = NO;
                    184:        } else {   /* No break- just output with no newline */
2.31    ! frystyk   185:            PUT_BLOCK(me->buffer, me->write_pointer - me->buffer);
2.8       timbl     186:            me->write_pointer = me->buffer;
2.21      timbl     187:            flush_breaks(me);
                    188:            me->overflowed = YES;
2.7       timbl     189:        }
                    190:     }
2.28      frystyk   191:     return HT_OK;
2.1       timbl     192: }
                    193: 
                    194: 
                    195: /*     String handling
                    196: **     ---------------
                    197: */
2.28      frystyk   198: PRIVATE int HTMLGen_output_string ARGS2(HTStructured *, me, CONST char*, s)
2.17      timbl     199: {
2.28      frystyk   200:     while (*s)
                    201:        HTMLGen_output_character(me, *s++);
                    202:     return HT_OK;
2.17      timbl     203: }
                    204: 
                    205: 
                    206: /*                     INPUT FUNCTIONS
                    207: **
                    208: **     These take data from the structured stream.  In the input
                    209: **     stream, entities are in raw form.  The seven_bit flag controls
                    210: **     whether the ISO Latin-1 charactrs are represented in SGML entity
                    211: **     form.  This is only recommended for viewing on older non-latin-1
                    212: **     capable equipment, or for mailing for example. 
                    213: **
                    214: ** Bug: assumes local encoding is ISO!
                    215: */     
2.28      frystyk   216: PRIVATE int HTMLGen_put_character ARGS2(HTStructured *, me, char, c)
2.17      timbl     217: {
                    218:     if (c=='&') HTMLGen_output_string(me, "&amp;");
                    219:     else if (c=='<') HTMLGen_output_string(me, "&lt;");
                    220:     else if (me->seven_bit && ((unsigned char)c > 127)) {
                    221:         char temp[8];
                    222:        sprintf(temp, "&%d;", c);
                    223:        HTMLGen_output_string(me, temp);
2.28      frystyk   224:     } else
                    225:        HTMLGen_output_character(me, c);
                    226:     return HT_OK;
2.17      timbl     227: }
                    228: 
2.28      frystyk   229: PRIVATE int HTMLGen_put_string ARGS2(HTStructured *, me, CONST char*, s)
2.1       timbl     230: {
2.28      frystyk   231:     while (*s)
                    232:        HTMLGen_put_character(me, *s++);
                    233:     return HT_OK;
2.1       timbl     234: }
                    235: 
2.28      frystyk   236: PRIVATE int HTMLGen_write ARGS3(HTStructured *, me, CONST char*, b, int, l)
2.1       timbl     237: {
2.28      frystyk   238:     while (l-- > 0)
                    239:        HTMLGen_put_character(me, *b++);
                    240:     return HT_OK;
2.1       timbl     241: }
                    242: 
                    243: 
                    244: /*     Start Element
                    245: **     -------------
2.7       timbl     246: **
                    247: **     Within the opening tag, there may be spaces
                    248: **     and the line may be broken at these spaces.
2.1       timbl     249: */
                    250: PRIVATE void HTMLGen_start_element ARGS4(
2.3       timbl     251:        HTStructured *,         me,
2.2       timbl     252:        int,                    element_number,
                    253:        CONST BOOL*,            present,
                    254:        CONST char **,          value)
2.1       timbl     255: {
                    256:     int i;
2.12      timbl     257:     HTTag * tag = &me->dtd->tags[element_number];
2.1       timbl     258: 
2.20      timbl     259:     /* Control line breaks allowed within tag! */
                    260:     int was_preformatted = me->preformatted;   /* save state */
                    261:     me->preformatted = 1;      /* Can break between attributes */
                    262: 
2.17      timbl     263:     HTMLGen_output_character(me, '<');
                    264:     HTMLGen_output_string(me, tag->name);
2.1       timbl     265:     if (present) for (i=0; i< tag->number_of_attributes; i++) {
                    266:         if (present[i]) {
2.17      timbl     267:            HTMLGen_output_character(me, ' ');
2.21      timbl     268:            allow_break(me, 1, YES);
2.17      timbl     269:            HTMLGen_output_string(me, tag->attributes[i].name);
2.1       timbl     270:            if (value[i]) {
2.17      timbl     271:                HTMLGen_output_string(me, "=\"");
                    272:                HTMLGen_output_string(me, value[i]);
                    273:                HTMLGen_output_character(me, '"');
2.1       timbl     274:            }
                    275:        }
                    276:     }
2.20      timbl     277:     me->preformatted = was_preformatted;       /* Restore state */
                    278: 
2.14      frystyk   279:     /* Nested PRE is no more a problem! */
                    280:     if (element_number == HTML_PRE)
                    281:        me->preformatted++;
2.19      timbl     282: 
                    283:     HTMLGen_output_character(me, '>');
2.7       timbl     284:     
2.20      timbl     285:     /* Here is a funny one.  In PRE, newlines are significant, except of
                    286:     course for one after the <PRE> which is ignored. This means that
                    287:     we MUST put in a dummy one after the <PRE> to protect any real newline
                    288:     within the pre section.
                    289:     
                    290:     However, *within* a PRE section, although we can break after
                    291:     (for example) emphasis start tags, it will probably confuse some
                    292:     parsers so we won't.*/
                    293:     
                    294:     if (element_number == HTML_PRE) {
                    295:         HTMLGen_output_character(me, '\n');
                    296:     } else  if (!me->preformatted && 
                    297:         tag->contents != SGML_EMPTY) {  /* can break after element start */ 
2.21      timbl     298:        allow_break(me, 3, NO);
2.8       timbl     299:     }
2.1       timbl     300: }
                    301: 
                    302: 
2.17      timbl     303: /*     End Element
                    304: **     -----------
2.1       timbl     305: **
2.16      timbl     306: **      The rules for insertring CR LF into SGML are weird, strict, and
                    307: **     nonintitive.
2.20      timbl     308: **     See comment also about PRE above.
2.1       timbl     309: */
2.3       timbl     310: PRIVATE void HTMLGen_end_element ARGS2(HTStructured *, me,
2.24      frystyk   311:                                      int , element_number)
2.1       timbl     312: {
2.20      timbl     313:     if (element_number == HTML_PRE) {
                    314:         HTMLGen_output_character(me, '\n');
                    315:     } else  if (!me->preformatted) { /* can break before element end */ 
2.21      timbl     316:        allow_break(me, 1, NO);
2.8       timbl     317:     }
2.17      timbl     318:     HTMLGen_output_string(me, "</");
                    319:     HTMLGen_output_string(me, me->dtd->tags[element_number].name);
                    320:     HTMLGen_output_character(me, '>');    /* NO break after. TBL 940501 */
2.14      frystyk   321:     if (element_number == HTML_PRE && me->preformatted)
                    322:        me->preformatted--;
2.1       timbl     323: }
                    324: 
                    325: 
2.17      timbl     326: /*     Expanding entities
                    327: **     ------------------
2.1       timbl     328: **
                    329: */
                    330: 
2.3       timbl     331: PRIVATE void HTMLGen_put_entity ARGS2(HTStructured *, me, int, entity_number)
2.1       timbl     332: {
2.17      timbl     333:     HTMLGen_output_character(me, '&');
                    334:     HTMLGen_output_string(me, me->dtd->entity_names[entity_number]);
                    335:     HTMLGen_output_character(me, ';');
2.1       timbl     336: }
                    337: 
2.17      timbl     338: /*     Free an object
                    339: **     --------------
2.1       timbl     340: **
                    341: */
2.24      frystyk   342: PRIVATE int HTMLGen_free ARGS1(HTStructured *, me)
2.1       timbl     343: {
2.21      timbl     344:     HTMLGen_flush(me);
2.31    ! frystyk   345:     PUT_CHAR('\n');
        !           346:     (*me->target->isa->_free)(me->target);
2.3       timbl     347:     free(me);
2.28      frystyk   348:     return HT_OK;
2.1       timbl     349: }
                    350: 
                    351: 
2.24      frystyk   352: PRIVATE int PlainToHTML_free ARGS1(HTStructured *, me)
2.7       timbl     353: {
                    354:     HTMLGen_end_element(me, HTML_PRE);
                    355:     HTMLGen_end_element(me, HTML_BODY);
                    356:     HTMLGen_end_element(me, HTML_HTML);
                    357:     HTMLGen_free(me);
2.28      frystyk   358:     return HT_OK;
2.7       timbl     359: }
                    360: 
                    361: 
2.1       timbl     362: 
2.24      frystyk   363: PRIVATE int HTMLGen_abort ARGS2(HTStructured *, me, HTError, e)
2.1       timbl     364: {
2.6       timbl     365:     HTMLGen_free(me);
2.28      frystyk   366:     return HT_ERROR;
2.1       timbl     367: }
                    368: 
                    369: 
2.24      frystyk   370: PRIVATE int PlainToHTML_abort ARGS2(HTStructured *, me, HTError, e)
2.1       timbl     371: {
2.7       timbl     372:     PlainToHTML_free(me);
2.28      frystyk   373:     return HT_ERROR;
2.1       timbl     374: }
                    375: 
                    376: 
                    377: 
                    378: /*     Structured Object Class
                    379: **     -----------------------
                    380: */
2.5       timbl     381: PRIVATE CONST HTStructuredClass HTMLGeneration = /* As opposed to print etc */
2.1       timbl     382: {              
                    383:        "text/html",
2.28      frystyk   384:        HTMLGen_flush,
2.1       timbl     385:        HTMLGen_free,
2.6       timbl     386:        HTMLGen_abort,
2.1       timbl     387:        HTMLGen_put_character,  HTMLGen_put_string, HTMLGen_write,
2.13      frystyk   388:        HTMLGen_start_element,  HTMLGen_end_element,
2.1       timbl     389:        HTMLGen_put_entity
                    390: }; 
                    391: 
                    392: 
                    393: /*     Subclass-specific Methods
                    394: **     -------------------------
                    395: */
2.31    ! frystyk   396: PUBLIC HTStructured* HTMLGenerator ARGS5(HTRequest *,  request,
        !           397:                                         void *,        param,
        !           398:                                         HTFormat,      input_format,
        !           399:                                         HTFormat,      output_format,
        !           400:                                         HTStream *,    output_stream)
2.1       timbl     401: {
2.31    ! frystyk   402:     HTStructured* me = (HTStructured*)calloc(1, sizeof(*me));
2.3       timbl     403:     if (me == NULL) outofmem(__FILE__, "HTMLGenerator");
                    404:     me->isa = &HTMLGeneration;       
2.12      timbl     405:     me->dtd = &HTMLP_dtd;
2.31    ! frystyk   406:     if ((me->target = HTStreamStack(WWW_HTML, output_format, output_stream,
        !           407:                                    request, YES)) == NULL) {
        !           408:        if (STREAM_TRACE)
        !           409:            fprintf(TDEST, "HTMLGen..... Can't convert to media type\n");
        !           410:        me->target = HTBlackHole();
        !           411:     }
2.7       timbl     412:     me->write_pointer = me->buffer;
2.21      timbl     413:     flush_breaks(me);
2.3       timbl     414:     return me;
2.1       timbl     415: }
                    416: 
                    417: /*     Stream Object Class
                    418: **     -------------------
                    419: **
2.2       timbl     420: **     This object just converts a plain text stream into HTML
2.12      timbl     421: **     It is officially a structured stream but only the stream bits exist.
2.2       timbl     422: **     This is just the easiest way of typecasting all the routines.
2.1       timbl     423: */
2.2       timbl     424: PRIVATE CONST HTStructuredClass PlainToHTMLConversion =
2.1       timbl     425: {              
                    426:        "plaintexttoHTML",
2.28      frystyk   427:        HTMLGen_flush,
2.13      frystyk   428:        PlainToHTML_free,       /* HTMLGen_free,  Henrik 03/03-94 */
2.6       timbl     429:        PlainToHTML_abort,      
2.1       timbl     430:        HTMLGen_put_character,
                    431:        HTMLGen_put_string,
                    432:        HTMLGen_write,
2.2       timbl     433:        NULL,           /* Structured stuff */
                    434:        NULL,
                    435:        NULL
2.1       timbl     436: }; 
                    437: 
                    438: 
                    439: /*     HTConverter from plain text to HTML Stream
                    440: **     ------------------------------------------
2.13      frystyk   441: **
                    442: ** Changed by henrik 03/03-94, so no more core dumps etc. (I hope!!!)
2.1       timbl     443: */
                    444: 
2.12      timbl     445: PUBLIC HTStream* HTPlainToHTML ARGS5(
                    446:        HTRequest *,            request,
                    447:        void *,                 param,
                    448:        HTFormat,               input_format,
                    449:        HTFormat,               output_format,
                    450:        HTStream *,             output_stream)
2.1       timbl     451: {
2.13      frystyk   452:     BOOL present[MAX_ATTRIBUTES];      /* Flags: attribute is present? */
                    453:     CONST char *value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
2.18      luotonen  454:     HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3       timbl     455:     if (me == NULL) outofmem(__FILE__, "PlainToHTML");
2.13      frystyk   456:     
                    457:     memset(present, '\0', MAX_ATTRIBUTES);
2.27      frystyk   458:     memset((char *) value, '\0', MAX_ATTRIBUTES*sizeof(char *));
2.13      frystyk   459:     
                    460:     me->isa = (HTStructuredClass*) &PlainToHTMLConversion;
2.12      timbl     461:     me->dtd = &HTMLP_dtd;
                    462:     me->target = output_stream;
2.13      frystyk   463:     me->write_pointer = me->buffer;
2.21      timbl     464:     flush_breaks(me);
2.13      frystyk   465:     
                    466:     HTMLGen_start_element(me, HTML_HTML, present, value);
                    467:     HTMLGen_start_element(me, HTML_BODY, present, value);
                    468:     HTMLGen_start_element(me, HTML_PRE, present, value);
                    469: 
2.7       timbl     470:     return (HTStream*) me;
2.1       timbl     471: }
2.13      frystyk   472: 
                    473: 
2.17      timbl     474: /*     A safe version for making 7-bit restricted HTML
                    475: **     Beware that thsi makes it horrible for the Scandinavians
                    476: **     to actually read it.
2.30      frystyk   477: **     ehh - not horrible - THIS REALLY PISSES THEM OFF - Henrik ;-)
2.17      timbl     478: */
                    479: 
                    480: PUBLIC HTStream* HTPlainTo7BitHTML ARGS5(
                    481:        HTRequest *,            request,
                    482:        void *,                 param,
                    483:        HTFormat,               input_format,
                    484:        HTFormat,               output_format,
                    485:        HTStream *,             output_stream)
2.13      frystyk   486: 
2.17      timbl     487: {
                    488:     HTStream* me = HTPlainToHTML(request,param,input_format,
                    489:                output_format, output_stream);
                    490:     ((HTStructured*)me)->seven_bit = YES;
                    491:     return me;
                    492: }
2.1       timbl     493: 

Webmaster