Annotation of libwww/Library/src/HTMLGen.c, revision 2.25.2.1

2.25      frystyk     1: /*                                                                   HTMLGen.c
                      2: **     HTML GENERATOR
                      3: **
                      4: **     (c) COPYRIGHT CERN 1994.
                      5: **     Please first read the full copyright statement in the file COPYRIGH.
2.1       timbl       6: **
                      7: **     This version of the HTML object sends HTML markup to the output stream.
                      8: **
                      9: ** Bugs:       Line wrapping is not done at all.
                     10: **             All data handled as PCDATA.
                     11: **             Should convert old XMP, LISTING and PLAINTEXT to PRE.
                     12: **
                     13: **     It is not obvious to me right now whether the HEAD should be generated
2.7       timbl      14: **     from the incomming data or the anchor.  Currently it is from the former
2.17      timbl      15: **     which is cleanest. TBL
2.22      duns       16: **
                     17: ** HISTORY:
                     18: **      8 Jul 94  FM   Insulate free() from _free structure element.
                     19: **
2.1       timbl      20: */
                     21: 
2.25.2.1! frystyk    22: /* Library include files */
        !            23: #include "tcp.h"
        !            24: #include "HTUtils.h"
2.12      timbl      25: #include "HTMLPDTD.h"
2.1       timbl      26: #include "HTStream.h"
                     27: #include "SGML.h"
                     28: #include "HTFormat.h"
2.23      frystyk    29: #include "HTMLGen.h"                                    /* Implemented here */
                     30: 
                     31: #define BUFFER_SIZE    80      /* Line buffer attempts to make neat breaks */
2.1       timbl      32: 
2.3       timbl      33: #define PUTC(c) (*me->targetClass.put_character)(me->target, c)
2.7       timbl      34: /* #define PUTS(s) (*me->targetClass.put_string)(me->target, s) */
2.4       timbl      35: #define PUTB(s,l) (*me->targetClass.put_block)(me->target, s, l)
2.1       timbl      36: 
                     37: /*             HTML Object
                     38: **             -----------
                     39: */
                     40: 
                     41: struct _HTStream {
                     42:        CONST HTStreamClass *           isa;    
                     43:        HTStream *                      target;
                     44:        HTStreamClass                   targetClass;    /* COPY for speed */
                     45: };
                     46: 
2.21      timbl      47: #define MAX_CLEANNESS 10
2.1       timbl      48: struct _HTStructured {
                     49:        CONST HTStructuredClass *       isa;
                     50:        HTStream *                      target;
                     51:        HTStreamClass                   targetClass;    /* COPY for speed */
2.12      timbl      52:        CONST SGML_dtd *                dtd;
2.17      timbl      53:        BOOL                            seven_bit;      /* restrict output*/
2.7       timbl      54:        
2.14      frystyk    55:        char                            buffer[BUFFER_SIZE+1];
2.7       timbl      56:        char *                          write_pointer;
2.21      timbl      57:        char *                          line_break [MAX_CLEANNESS+1];
2.7       timbl      58:        int                             cleanness;
2.21      timbl      59:        BOOL                            overflowed;
                     60:        BOOL                            delete_line_break_char
                     61:                                                [MAX_CLEANNESS+1];
2.14      frystyk    62:        char                            preformatted;
2.1       timbl      63: };
                     64: 
2.17      timbl      65: /*                     OUTPUT FUNCTIONS
                     66: **
                     67: **     These function output the finished SGML stream doing the
                     68: **     line wrap
                     69: */
                     70: 
2.7       timbl      71: /*     Flush Buffer
                     72: **     ------------
                     73: */
2.21      timbl      74: 
                     75: PRIVATE void flush_breaks ARGS1(HTStructured *, me)
                     76: {
                     77:     int i;
                     78:     for (i=0; i<= MAX_CLEANNESS; i++) {
                     79:         me->line_break[i] = NULL;
                     80:     }
                     81: }
                     82: 
                     83: 
2.7       timbl      84: PRIVATE void HTMLGen_flush ARGS1(HTStructured *, me)
                     85: {
                     86:     (*me->targetClass.put_block)(me->target, 
                     87:                                me->buffer,
                     88:                                me->write_pointer - me->buffer);
                     89:     me->write_pointer = me->buffer;
2.21      timbl      90:     flush_breaks(me);
2.7       timbl      91:     me->cleanness = 0;
2.21      timbl      92: }
                     93: 
                     94: 
                     95: /*     Weighted optional line break
                     96: **
                     97: **     We keep track of all the breaks for when we chop the line
                     98: */
                     99: 
                    100: PRIVATE void allow_break ARGS3(HTStructured *, me, int, new_cleanness,
                    101:                BOOL, dlbc)
                    102: {
                    103:     me->line_break[new_cleanness] = 
                    104:                         dlbc ? me->write_pointer - 1 /* Point to space */
                    105:                              : me->write_pointer ;   /* point to gap */
                    106:     me->delete_line_break_char[new_cleanness] = dlbc;
                    107:     if (new_cleanness >= me->cleanness)
                    108:        me->cleanness = new_cleanness;
2.7       timbl     109: }
                    110: 
                    111: 
2.1       timbl     112: /*     Character handling
                    113: **     ------------------
2.8       timbl     114: **
                    115: **     The tricky bits are the line break handling.  This attempts
                    116: **     to synchrononise line breaks on sentence or phrase ends. This
                    117: **     is important if one stores SGML files in a line-oriented code
                    118: **     repository, so that if a small change is made, line ends don't
                    119: **     shift in a ripple-through to apparently change a large part of the
                    120: **     file. We give extra "cleanness" to spaces appearing directly
                    121: **     after periods (full stops), [semi]colons and commas.
                    122: **        This should make the source files easier to read and modify
2.17      timbl     123: **     by hand, too, though this is not a primary design consideration. TBL
2.1       timbl     124: */
2.21      timbl     125: PRIVATE char delims[] = ",;:.";                /* @@ english bias */
2.17      timbl     126: PRIVATE void HTMLGen_output_character ARGS2(HTStructured *, me, char, c)
2.1       timbl     127: {
2.7       timbl     128: 
                    129:     *me->write_pointer++ = c;
                    130:     
2.21      timbl     131:     if (c=='\n') {             /* Newlines */
                    132:         if (me->preformatted) {
                    133:            HTMLGen_flush(me);
                    134:            return;
                    135:        } else {
                    136:            me->write_pointer[-1] = c = ' ';    /* Treat same as space */
                    137:        }
2.7       timbl     138:     }
                    139:     
2.21      timbl     140:     /* Figure our whether we can break at this point
                    141:     */
2.7       timbl     142:     if ((!me->preformatted  && c==' ')) {
2.8       timbl     143:         int new_cleanness = 1;
                    144:        if (me->write_pointer > (me->buffer + 1)) {
2.9       luotonen  145:            char * p;
2.11      timbl     146:            p = strchr(delims, me->write_pointer[-2]);
2.21      timbl     147:            if (p) new_cleanness = p - delims + 4;
2.8       timbl     148:        }
2.21      timbl     149:        allow_break(me, new_cleanness, YES);
2.7       timbl     150:     }
                    151:     
2.21      timbl     152:     /* Flush buffer out when full, or whenever the line is over
                    153:        the nominal maximum and we can break at all
                    154:     */
                    155:     if (me->write_pointer >= me->buffer + BUFFER_SIZE-1
                    156:         ||  (me->overflowed && me->cleanness)) {
                    157:        if (me->cleanness) {
                    158:            char line_break_char = me->line_break[me->cleanness][0];
                    159:            char * saved = me->line_break[me->cleanness];
2.8       timbl     160:            
2.21      timbl     161:            if (me->delete_line_break_char[me->cleanness]) saved++; 
                    162:            me->line_break[me->cleanness][0] = '\n';
2.7       timbl     163:            (*me->targetClass.put_block)(me->target,
                    164:                                        me->buffer,
2.21      timbl     165:                                        me->line_break[me->cleanness] - me->buffer + 1);
                    166:            me->line_break[me->cleanness][0] = line_break_char;
2.7       timbl     167:            {  /* move next line in */
2.8       timbl     168:                char * p=saved;
                    169:                char *q;
                    170:                for(q=me->buffer; p < me->write_pointer; )
2.7       timbl     171:                        *q++ = *p++;
                    172:            }
                    173:            me->cleanness = 0;
2.21      timbl     174:            /* Now we have to check whether ther are any perfectly good breaks
                    175:            ** which weren't good enough for the last line but may be
                    176:            **  good enough for the next
                    177:            */
                    178:            {
                    179:                int i;
                    180:                for(i=0; i <= MAX_CLEANNESS; i++) {
                    181:                    if (me->line_break[i] > saved) {
                    182:                        me->line_break[i] = me->line_break[i] -
                    183:                                                (saved-me->buffer);
                    184:                        me->cleanness = i;
                    185:                    } else {
                    186:                        me->line_break[i] = NULL;
                    187:                    }
                    188:                }
                    189:            }
                    190: 
2.8       timbl     191:            me->write_pointer = me->write_pointer - (saved-me->buffer);
2.21      timbl     192:            me->overflowed = NO;
                    193:        } else {   /* No break- just output with no newline */
2.7       timbl     194:            (*me->targetClass.put_block)(me->target,
2.14      frystyk   195:                                         me->buffer,
2.15      luotonen  196:                                         me->write_pointer - me->buffer);
2.8       timbl     197:            me->write_pointer = me->buffer;
2.21      timbl     198:            flush_breaks(me);
                    199:            me->overflowed = YES;
2.7       timbl     200:        }
                    201:     }
2.1       timbl     202: }
                    203: 
                    204: 
                    205: 
                    206: /*     String handling
                    207: **     ---------------
                    208: */
2.17      timbl     209: PRIVATE void HTMLGen_output_string ARGS2(HTStructured *, me, CONST char*, s)
                    210: {
2.24      frystyk   211:     while (*s) HTMLGen_output_character(me, *s++);
2.17      timbl     212: }
                    213: 
                    214: 
                    215: /*                     INPUT FUNCTIONS
                    216: **
                    217: **     These take data from the structured stream.  In the input
                    218: **     stream, entities are in raw form.  The seven_bit flag controls
                    219: **     whether the ISO Latin-1 charactrs are represented in SGML entity
                    220: **     form.  This is only recommended for viewing on older non-latin-1
                    221: **     capable equipment, or for mailing for example. 
                    222: **
                    223: ** Bug: assumes local encoding is ISO!
                    224: */     
                    225: PRIVATE void HTMLGen_put_character ARGS2(HTStructured *, me, char, c)
                    226: {
                    227:     if (c=='&') HTMLGen_output_string(me, "&amp;");
                    228:     else if (c=='<') HTMLGen_output_string(me, "&lt;");
                    229:     else if (me->seven_bit && ((unsigned char)c > 127)) {
                    230:         char temp[8];
                    231:        sprintf(temp, "&%d;", c);
                    232:        HTMLGen_output_string(me, temp);
                    233:     }
                    234:     else HTMLGen_output_character(me, c);
                    235: }
                    236: 
2.3       timbl     237: PRIVATE void HTMLGen_put_string ARGS2(HTStructured *, me, CONST char*, s)
2.1       timbl     238: {
2.24      frystyk   239:     while (*s) HTMLGen_put_character(me, *s++);
2.1       timbl     240: }
                    241: 
2.3       timbl     242: PRIVATE void HTMLGen_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
2.1       timbl     243: {
2.24      frystyk   244:     while (l-- > 0) HTMLGen_put_character(me, *s++);
2.1       timbl     245: }
                    246: 
                    247: 
                    248: /*     Start Element
                    249: **     -------------
2.7       timbl     250: **
                    251: **     Within the opening tag, there may be spaces
                    252: **     and the line may be broken at these spaces.
2.1       timbl     253: */
                    254: PRIVATE void HTMLGen_start_element ARGS4(
2.3       timbl     255:        HTStructured *,         me,
2.2       timbl     256:        int,                    element_number,
                    257:        CONST BOOL*,            present,
                    258:        CONST char **,          value)
2.1       timbl     259: {
                    260:     int i;
2.12      timbl     261:     HTTag * tag = &me->dtd->tags[element_number];
2.1       timbl     262: 
2.20      timbl     263:     /* Control line breaks allowed within tag! */
                    264:     int was_preformatted = me->preformatted;   /* save state */
                    265:     me->preformatted = 1;      /* Can break between attributes */
                    266: 
2.17      timbl     267:     HTMLGen_output_character(me, '<');
                    268:     HTMLGen_output_string(me, tag->name);
2.1       timbl     269:     if (present) for (i=0; i< tag->number_of_attributes; i++) {
                    270:         if (present[i]) {
2.17      timbl     271:            HTMLGen_output_character(me, ' ');
2.21      timbl     272:            allow_break(me, 1, YES);
2.17      timbl     273:            HTMLGen_output_string(me, tag->attributes[i].name);
2.1       timbl     274:            if (value[i]) {
2.17      timbl     275:                HTMLGen_output_string(me, "=\"");
                    276:                HTMLGen_output_string(me, value[i]);
                    277:                HTMLGen_output_character(me, '"');
2.1       timbl     278:            }
                    279:        }
                    280:     }
2.20      timbl     281:     me->preformatted = was_preformatted;       /* Restore state */
                    282: 
2.14      frystyk   283:     /* Nested PRE is no more a problem! */
                    284:     if (element_number == HTML_PRE)
                    285:        me->preformatted++;
2.19      timbl     286: 
                    287:     HTMLGen_output_character(me, '>');
2.7       timbl     288:     
2.20      timbl     289:     /* Here is a funny one.  In PRE, newlines are significant, except of
                    290:     course for one after the <PRE> which is ignored. This means that
                    291:     we MUST put in a dummy one after the <PRE> to protect any real newline
                    292:     within the pre section.
                    293:     
                    294:     However, *within* a PRE section, although we can break after
                    295:     (for example) emphasis start tags, it will probably confuse some
                    296:     parsers so we won't.*/
                    297:     
                    298:     if (element_number == HTML_PRE) {
                    299:         HTMLGen_output_character(me, '\n');
                    300:     } else  if (!me->preformatted && 
                    301:         tag->contents != SGML_EMPTY) {  /* can break after element start */ 
2.21      timbl     302:        allow_break(me, 3, NO);
2.8       timbl     303:     }
2.1       timbl     304: }
                    305: 
                    306: 
2.17      timbl     307: /*     End Element
                    308: **     -----------
2.1       timbl     309: **
2.16      timbl     310: **      The rules for insertring CR LF into SGML are weird, strict, and
                    311: **     nonintitive.
2.20      timbl     312: **     See comment also about PRE above.
2.1       timbl     313: */
2.3       timbl     314: PRIVATE void HTMLGen_end_element ARGS2(HTStructured *, me,
2.24      frystyk   315:                                      int , element_number)
2.1       timbl     316: {
2.20      timbl     317:     if (element_number == HTML_PRE) {
                    318:         HTMLGen_output_character(me, '\n');
                    319:     } else  if (!me->preformatted) { /* can break before element end */ 
2.21      timbl     320:        allow_break(me, 1, NO);
2.8       timbl     321:     }
2.17      timbl     322:     HTMLGen_output_string(me, "</");
                    323:     HTMLGen_output_string(me, me->dtd->tags[element_number].name);
                    324:     HTMLGen_output_character(me, '>');    /* NO break after. TBL 940501 */
2.14      frystyk   325:     if (element_number == HTML_PRE && me->preformatted)
                    326:        me->preformatted--;
2.1       timbl     327: }
                    328: 
                    329: 
2.17      timbl     330: /*     Expanding entities
                    331: **     ------------------
2.1       timbl     332: **
                    333: */
                    334: 
2.3       timbl     335: PRIVATE void HTMLGen_put_entity ARGS2(HTStructured *, me, int, entity_number)
2.1       timbl     336: {
2.17      timbl     337:     HTMLGen_output_character(me, '&');
                    338:     HTMLGen_output_string(me, me->dtd->entity_names[entity_number]);
                    339:     HTMLGen_output_character(me, ';');
2.1       timbl     340: }
                    341: 
                    342: 
                    343: 
2.17      timbl     344: /*     Free an object
                    345: **     --------------
2.1       timbl     346: **
                    347: */
2.24      frystyk   348: PRIVATE int HTMLGen_free ARGS1(HTStructured *, me)
2.1       timbl     349: {
2.21      timbl     350:     HTMLGen_flush(me);
2.7       timbl     351:     (*me->targetClass.put_character)(me->target, '\n');
2.22      duns      352:     (*me->targetClass._free)(me->target);      /* ripple through */
2.3       timbl     353:     free(me);
2.24      frystyk   354:     return 0;
2.1       timbl     355: }
                    356: 
                    357: 
2.24      frystyk   358: PRIVATE int PlainToHTML_free ARGS1(HTStructured *, me)
2.7       timbl     359: {
                    360:     HTMLGen_end_element(me, HTML_PRE);
                    361:     HTMLGen_end_element(me, HTML_BODY);
                    362:     HTMLGen_end_element(me, HTML_HTML);
                    363:     HTMLGen_free(me);
2.24      frystyk   364:     return 0;
2.7       timbl     365: }
                    366: 
                    367: 
2.1       timbl     368: 
2.24      frystyk   369: PRIVATE int HTMLGen_abort ARGS2(HTStructured *, me, HTError, e)
2.1       timbl     370: {
2.6       timbl     371:     HTMLGen_free(me);
2.24      frystyk   372:     return EOF;
2.1       timbl     373: }
                    374: 
                    375: 
2.24      frystyk   376: PRIVATE int PlainToHTML_abort ARGS2(HTStructured *, me, HTError, e)
2.1       timbl     377: {
2.7       timbl     378:     PlainToHTML_free(me);
2.24      frystyk   379:     return EOF;
2.1       timbl     380: }
                    381: 
                    382: 
                    383: 
                    384: /*     Structured Object Class
                    385: **     -----------------------
                    386: */
2.5       timbl     387: PRIVATE CONST HTStructuredClass HTMLGeneration = /* As opposed to print etc */
2.1       timbl     388: {              
                    389:        "text/html",
                    390:        HTMLGen_free,
2.6       timbl     391:        HTMLGen_abort,
2.1       timbl     392:        HTMLGen_put_character,  HTMLGen_put_string, HTMLGen_write,
2.13      frystyk   393:        HTMLGen_start_element,  HTMLGen_end_element,
2.1       timbl     394:        HTMLGen_put_entity
                    395: }; 
                    396: 
                    397: 
                    398: /*     Subclass-specific Methods
                    399: **     -------------------------
                    400: */
                    401: 
                    402: PUBLIC HTStructured * HTMLGenerator ARGS1(HTStream *, output)
                    403: {
2.18      luotonen  404:     HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3       timbl     405:     if (me == NULL) outofmem(__FILE__, "HTMLGenerator");
                    406:     me->isa = &HTMLGeneration;       
2.12      timbl     407:     me->dtd = &HTMLP_dtd;
2.1       timbl     408: 
2.3       timbl     409:     me->target = output;
                    410:     me->targetClass = *me->target->isa; /* Copy pointers to routines for speed*/
2.7       timbl     411:     
                    412:     me->write_pointer = me->buffer;
2.21      timbl     413:     flush_breaks(me);
2.3       timbl     414:     return me;
2.1       timbl     415: }
                    416: 
                    417: /*     Stream Object Class
                    418: **     -------------------
                    419: **
2.2       timbl     420: **     This object just converts a plain text stream into HTML
2.12      timbl     421: **     It is officially a structured stream but only the stream bits exist.
2.2       timbl     422: **     This is just the easiest way of typecasting all the routines.
2.1       timbl     423: */
2.2       timbl     424: PRIVATE CONST HTStructuredClass PlainToHTMLConversion =
2.1       timbl     425: {              
                    426:        "plaintexttoHTML",
2.13      frystyk   427:        PlainToHTML_free,       /* HTMLGen_free,  Henrik 03/03-94 */
2.6       timbl     428:        PlainToHTML_abort,      
2.1       timbl     429:        HTMLGen_put_character,
                    430:        HTMLGen_put_string,
                    431:        HTMLGen_write,
2.2       timbl     432:        NULL,           /* Structured stuff */
                    433:        NULL,
                    434:        NULL
2.1       timbl     435: }; 
                    436: 
                    437: 
                    438: /*     HTConverter from plain text to HTML Stream
                    439: **     ------------------------------------------
2.13      frystyk   440: **
                    441: ** Changed by henrik 03/03-94, so no more core dumps etc. (I hope!!!)
2.1       timbl     442: */
                    443: 
2.12      timbl     444: PUBLIC HTStream* HTPlainToHTML ARGS5(
                    445:        HTRequest *,            request,
                    446:        void *,                 param,
                    447:        HTFormat,               input_format,
                    448:        HTFormat,               output_format,
                    449:        HTStream *,             output_stream)
2.1       timbl     450: {
2.13      frystyk   451:     BOOL present[MAX_ATTRIBUTES];      /* Flags: attribute is present? */
                    452:     CONST char *value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
2.18      luotonen  453:     HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3       timbl     454:     if (me == NULL) outofmem(__FILE__, "PlainToHTML");
2.13      frystyk   455:     
                    456:     memset(present, '\0', MAX_ATTRIBUTES);
2.25.2.1! frystyk   457:     memset((char *) value, '\0', MAX_ATTRIBUTES*sizeof(char *));
2.13      frystyk   458:     
                    459:     me->isa = (HTStructuredClass*) &PlainToHTMLConversion;
2.12      timbl     460:     me->dtd = &HTMLP_dtd;
                    461:     me->target = output_stream;
2.13      frystyk   462:     me->targetClass = *me->target->isa;/* Copy pointers to routines for speed*/
                    463:     me->write_pointer = me->buffer;
2.21      timbl     464:     flush_breaks(me);
2.13      frystyk   465:     
                    466:     HTMLGen_start_element(me, HTML_HTML, present, value);
                    467:     HTMLGen_start_element(me, HTML_BODY, present, value);
                    468:     HTMLGen_start_element(me, HTML_PRE, present, value);
                    469: 
2.7       timbl     470:     return (HTStream*) me;
2.1       timbl     471: }
2.13      frystyk   472: 
                    473: 
2.17      timbl     474: /*     A safe version for making 7-bit restricted HTML
                    475: **     Beware that thsi makes it horrible for the Scandinavians
                    476: **     to actually read it.
                    477: */
                    478: 
                    479: PUBLIC HTStream* HTPlainTo7BitHTML ARGS5(
                    480:        HTRequest *,            request,
                    481:        void *,                 param,
                    482:        HTFormat,               input_format,
                    483:        HTFormat,               output_format,
                    484:        HTStream *,             output_stream)
2.13      frystyk   485: 
2.17      timbl     486: {
                    487:     HTStream* me = HTPlainToHTML(request,param,input_format,
                    488:                output_format, output_stream);
                    489:     ((HTStructured*)me)->seven_bit = YES;
                    490:     return me;
                    491: }
2.1       timbl     492: 

Webmaster