Annotation of libwww/Library/src/HTMLGen.c, revision 2.20

2.1       timbl       1: /*             HTML Generator
                      2: **             ==============
                      3: **
                      4: **     This version of the HTML object sends HTML markup to the output stream.
                      5: **
                      6: ** Bugs:       Line wrapping is not done at all.
                      7: **             All data handled as PCDATA.
                      8: **             Should convert old XMP, LISTING and PLAINTEXT to PRE.
                      9: **
                     10: **     It is not obvious to me right now whether the HEAD should be generated
2.7       timbl      11: **     from the incomming data or the anchor.  Currently it is from the former
2.17      timbl      12: **     which is cleanest. TBL
2.1       timbl      13: */
                     14: 
2.7       timbl      15: #define BUFFER_SIZE    80      /* Line buffer attempts to make neat breaks */
                     16: 
2.1       timbl      17: /* Implements:
                     18: */
                     19: #include "HTMLGen.h"
                     20: 
                     21: #include <stdio.h>
2.12      timbl      22: #include "HTMLPDTD.h"
2.1       timbl      23: #include "HTStream.h"
                     24: #include "SGML.h"
                     25: #include "HTFormat.h"
2.11      timbl      26: #include "tcp.h"
2.1       timbl      27: 
2.3       timbl      28: #define PUTC(c) (*me->targetClass.put_character)(me->target, c)
2.7       timbl      29: /* #define PUTS(s) (*me->targetClass.put_string)(me->target, s) */
2.4       timbl      30: #define PUTB(s,l) (*me->targetClass.put_block)(me->target, s, l)
2.1       timbl      31: 
                     32: /*             HTML Object
                     33: **             -----------
                     34: */
                     35: 
                     36: struct _HTStream {
                     37:        CONST HTStreamClass *           isa;    
                     38:        HTStream *                      target;
                     39:        HTStreamClass                   targetClass;    /* COPY for speed */
                     40: };
                     41: 
                     42: struct _HTStructured {
                     43:        CONST HTStructuredClass *       isa;
                     44:        HTStream *                      target;
                     45:        HTStreamClass                   targetClass;    /* COPY for speed */
2.12      timbl      46:        CONST SGML_dtd *                dtd;
2.17      timbl      47:        BOOL                            seven_bit;      /* restrict output*/
2.7       timbl      48:        
2.14      frystyk    49:        char                            buffer[BUFFER_SIZE+1];
2.7       timbl      50:        char *                          write_pointer;
                     51:        char *                          line_break;
                     52:        int                             cleanness;
2.8       timbl      53:        BOOL                            delete_line_break_char;
2.14      frystyk    54:        char                            preformatted;
2.1       timbl      55: };
                     56: 
2.17      timbl      57: /*                     OUTPUT FUNCTIONS
                     58: **
                     59: **     These function output the finished SGML stream doing the
                     60: **     line wrap
                     61: */
                     62: 
2.7       timbl      63: /*     Flush Buffer
                     64: **     ------------
                     65: */
                     66: PRIVATE void HTMLGen_flush ARGS1(HTStructured *, me)
                     67: {
                     68:     (*me->targetClass.put_block)(me->target, 
                     69:                                me->buffer,
                     70:                                me->write_pointer - me->buffer);
                     71:     me->write_pointer = me->buffer;
                     72:     me->line_break = me->buffer;
                     73:     me->cleanness = 0;
2.8       timbl      74:     me->delete_line_break_char = NO;
2.7       timbl      75: }
                     76: 
                     77: 
2.1       timbl      78: /*     Character handling
                     79: **     ------------------
2.8       timbl      80: **
                     81: **     The tricky bits are the line break handling.  This attempts
                     82: **     to synchrononise line breaks on sentence or phrase ends. This
                     83: **     is important if one stores SGML files in a line-oriented code
                     84: **     repository, so that if a small change is made, line ends don't
                     85: **     shift in a ripple-through to apparently change a large part of the
                     86: **     file. We give extra "cleanness" to spaces appearing directly
                     87: **     after periods (full stops), [semi]colons and commas.
                     88: **        This should make the source files easier to read and modify
2.17      timbl      89: **     by hand, too, though this is not a primary design consideration. TBL
2.1       timbl      90: */
2.17      timbl      91: PRIVATE void HTMLGen_output_character ARGS2(HTStructured *, me, char, c)
2.1       timbl      92: {
2.7       timbl      93: 
                     94:     *me->write_pointer++ = c;
                     95:     
                     96:     if (c=='\n') {
                     97:         HTMLGen_flush(me);
                     98:        return;
                     99:     }
                    100:     
                    101:     if ((!me->preformatted  && c==' ')) {
2.8       timbl     102:         int new_cleanness = 1;
                    103:        if (me->write_pointer > (me->buffer + 1)) {
2.9       luotonen  104:            char delims[5];
                    105:            char * p;
                    106:            strcpy(delims, ",;:.");             /* @@ english bias */
2.11      timbl     107:            p = strchr(delims, me->write_pointer[-2]);
2.9       luotonen  108:            if (p) new_cleanness = p - delims + 2;
2.8       timbl     109:        }
                    110:        if (new_cleanness >= me->cleanness) {
                    111:            me->line_break = me->write_pointer - 1;  /* Point to space */
                    112:            me->cleanness = new_cleanness;
                    113:            me->delete_line_break_char = YES;
                    114:        }
2.7       timbl     115:     }
                    116:     
2.19      timbl     117:     /* Flush buffer out when full. If preformatted then don't wrap! */
2.15      luotonen  118:     if (me->write_pointer >= me->buffer + BUFFER_SIZE-1) {
2.14      frystyk   119:        if (!me->preformatted && me->cleanness) {
2.8       timbl     120:            char line_break_char = me->line_break[0];
                    121:            char * saved = me->line_break;
                    122:            
                    123:            if (me->delete_line_break_char) saved++; 
                    124:            me->line_break[0] = '\n';
2.7       timbl     125:            (*me->targetClass.put_block)(me->target,
                    126:                                        me->buffer,
2.8       timbl     127:                                        me->line_break - me->buffer + 1);
                    128:            me->line_break[0] = line_break_char;
2.7       timbl     129:            {  /* move next line in */
2.8       timbl     130:                char * p=saved;
                    131:                char *q;
                    132:                for(q=me->buffer; p < me->write_pointer; )
2.7       timbl     133:                        *q++ = *p++;
                    134:            }
                    135:            me->cleanness = 0;
2.8       timbl     136:            me->delete_line_break_char = 0;
                    137:            me->write_pointer = me->write_pointer - (saved-me->buffer);
                    138: 
2.7       timbl     139:        } else {
                    140:            (*me->targetClass.put_block)(me->target,
2.14      frystyk   141:                                         me->buffer,
2.15      luotonen  142:                                         me->write_pointer - me->buffer);
2.8       timbl     143:            me->write_pointer = me->buffer;
2.7       timbl     144:        }
                    145:        me->line_break = me->buffer;
                    146:     }
2.1       timbl     147: }
                    148: 
                    149: 
                    150: 
                    151: /*     String handling
                    152: **     ---------------
                    153: */
2.17      timbl     154: PRIVATE void HTMLGen_output_string ARGS2(HTStructured *, me, CONST char*, s)
                    155: {
                    156:     CONST char * p;
                    157:     for(p=s; *p; p++) HTMLGen_output_character(me, *p);
                    158: }
                    159: 
                    160: 
                    161: 
                    162: 
                    163: /*                     INPUT FUNCTIONS
                    164: **
                    165: **     These take data from the structured stream.  In the input
                    166: **     stream, entities are in raw form.  The seven_bit flag controls
                    167: **     whether the ISO Latin-1 charactrs are represented in SGML entity
                    168: **     form.  This is only recommended for viewing on older non-latin-1
                    169: **     capable equipment, or for mailing for example. 
                    170: **
                    171: ** Bug: assumes local encoding is ISO!
                    172: */     
                    173: PRIVATE void HTMLGen_put_character ARGS2(HTStructured *, me, char, c)
                    174: {
                    175:     if (c=='&') HTMLGen_output_string(me, "&amp;");
                    176:     else if (c=='<') HTMLGen_output_string(me, "&lt;");
                    177:     else if (me->seven_bit && ((unsigned char)c > 127)) {
                    178:         char temp[8];
                    179:        sprintf(temp, "&%d;", c);
                    180:        HTMLGen_output_string(me, temp);
                    181:     }
                    182:     else HTMLGen_output_character(me, c);
                    183: }
                    184: 
2.3       timbl     185: PRIVATE void HTMLGen_put_string ARGS2(HTStructured *, me, CONST char*, s)
2.1       timbl     186: {
2.7       timbl     187:     CONST char * p;
                    188:     for(p=s; *p; p++) HTMLGen_put_character(me, *p);
2.1       timbl     189: }
                    190: 
2.3       timbl     191: PRIVATE void HTMLGen_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
2.1       timbl     192: {
2.7       timbl     193:     CONST char * p;
                    194:     for(p=s; p<s+l; p++) HTMLGen_put_character(me, *p);
2.1       timbl     195: }
                    196: 
                    197: 
                    198: /*     Start Element
                    199: **     -------------
2.7       timbl     200: **
                    201: **     Within the opening tag, there may be spaces
                    202: **     and the line may be broken at these spaces.
2.1       timbl     203: */
                    204: PRIVATE void HTMLGen_start_element ARGS4(
2.3       timbl     205:        HTStructured *,         me,
2.2       timbl     206:        int,                    element_number,
                    207:        CONST BOOL*,            present,
                    208:        CONST char **,          value)
2.1       timbl     209: {
                    210:     int i;
2.12      timbl     211:     HTTag * tag = &me->dtd->tags[element_number];
2.1       timbl     212: 
2.20    ! timbl     213:     /* Control line breaks allowed within tag! */
        !           214:     int was_preformatted = me->preformatted;   /* save state */
        !           215:     me->preformatted = 1;      /* Can break between attributes */
        !           216: 
2.17      timbl     217:     HTMLGen_output_character(me, '<');
                    218:     HTMLGen_output_string(me, tag->name);
2.1       timbl     219:     if (present) for (i=0; i< tag->number_of_attributes; i++) {
                    220:         if (present[i]) {
2.19      timbl     221:            me->line_break = me->write_pointer; /* Don't you hate SGML?  */
                    222:            me->cleanness = 1;  /* Can break between attributes */
                    223:            me->delete_line_break_char = YES;
2.17      timbl     224:            HTMLGen_output_character(me, ' ');
                    225:            HTMLGen_output_string(me, tag->attributes[i].name);
2.1       timbl     226:            if (value[i]) {
2.17      timbl     227:                HTMLGen_output_string(me, "=\"");
                    228:                HTMLGen_output_string(me, value[i]);
                    229:                HTMLGen_output_character(me, '"');
2.1       timbl     230:            }
                    231:        }
                    232:     }
2.20    ! timbl     233:     me->preformatted = was_preformatted;       /* Restore state */
        !           234: 
2.14      frystyk   235:     /* Nested PRE is no more a problem! */
                    236:     if (element_number == HTML_PRE)
                    237:        me->preformatted++;
2.19      timbl     238: 
                    239:     HTMLGen_output_character(me, '>');
2.7       timbl     240:     
2.20    ! timbl     241:     /* Here is a funny one.  In PRE, newlines are significant, except of
        !           242:     course for one after the <PRE> which is ignored. This means that
        !           243:     we MUST put in a dummy one after the <PRE> to protect any real newline
        !           244:     within the pre section.
        !           245:     
        !           246:     However, *within* a PRE section, although we can break after
        !           247:     (for example) emphasis start tags, it will probably confuse some
        !           248:     parsers so we won't.*/
        !           249:     
        !           250:     if (element_number == HTML_PRE) {
        !           251:         HTMLGen_output_character(me, '\n');
        !           252:     } else  if (!me->preformatted && 
        !           253:         tag->contents != SGML_EMPTY) {  /* can break after element start */ 
2.8       timbl     254:        me->line_break = me->write_pointer;     /* Don't you hate SGML?  */
2.19      timbl     255:        me->cleanness = 3;
2.8       timbl     256:        me->delete_line_break_char = NO;
                    257:     }
2.1       timbl     258: }
                    259: 
                    260: 
2.17      timbl     261: /*     End Element
                    262: **     -----------
2.1       timbl     263: **
2.16      timbl     264: **      The rules for insertring CR LF into SGML are weird, strict, and
                    265: **     nonintitive.
2.20    ! timbl     266: **     See comment also about PRE above.
2.1       timbl     267: */
2.3       timbl     268: PRIVATE void HTMLGen_end_element ARGS2(HTStructured *, me,
2.1       timbl     269:                        int , element_number)
                    270: {
2.20    ! timbl     271:     if (element_number == HTML_PRE) {
        !           272:         HTMLGen_output_character(me, '\n');
        !           273:     } else  if (!me->preformatted) { /* can break before element end */ 
2.8       timbl     274:        me->line_break = me->write_pointer;     /* Don't you hate SGML?  */
                    275:        me->cleanness = 1;
                    276:        me->delete_line_break_char = NO;
                    277:     }
2.17      timbl     278:     HTMLGen_output_string(me, "</");
                    279:     HTMLGen_output_string(me, me->dtd->tags[element_number].name);
                    280:     HTMLGen_output_character(me, '>');    /* NO break after. TBL 940501 */
2.14      frystyk   281:     if (element_number == HTML_PRE && me->preformatted)
                    282:        me->preformatted--;
2.1       timbl     283: }
                    284: 
                    285: 
2.17      timbl     286: /*     Expanding entities
                    287: **     ------------------
2.1       timbl     288: **
                    289: */
                    290: 
2.3       timbl     291: PRIVATE void HTMLGen_put_entity ARGS2(HTStructured *, me, int, entity_number)
2.1       timbl     292: {
2.17      timbl     293:     HTMLGen_output_character(me, '&');
                    294:     HTMLGen_output_string(me, me->dtd->entity_names[entity_number]);
                    295:     HTMLGen_output_character(me, ';');
2.1       timbl     296: }
                    297: 
                    298: 
                    299: 
2.17      timbl     300: /*     Free an object
                    301: **     --------------
2.1       timbl     302: **
                    303: */
2.3       timbl     304: PRIVATE void HTMLGen_free ARGS1(HTStructured *, me)
2.1       timbl     305: {
2.7       timbl     306:     (*me->targetClass.put_character)(me->target, '\n');
                    307:     HTMLGen_flush(me);
2.3       timbl     308:     (*me->targetClass.free)(me->target);       /* ripple through */
                    309:     free(me);
2.1       timbl     310: }
                    311: 
                    312: 
2.7       timbl     313: PRIVATE void PlainToHTML_free ARGS1(HTStructured *, me)
                    314: {
                    315:     HTMLGen_end_element(me, HTML_PRE);
                    316:     HTMLGen_end_element(me, HTML_BODY);
                    317:     HTMLGen_end_element(me, HTML_HTML);
                    318:     HTMLGen_free(me);
                    319: }
                    320: 
                    321: 
2.1       timbl     322: 
2.6       timbl     323: PRIVATE void HTMLGen_abort ARGS2(HTStructured *, me, HTError, e)
2.1       timbl     324: {
2.6       timbl     325:     HTMLGen_free(me);
2.1       timbl     326: }
                    327: 
                    328: 
2.6       timbl     329: PRIVATE void PlainToHTML_abort ARGS2(HTStructured *, me, HTError, e)
2.1       timbl     330: {
2.7       timbl     331:     PlainToHTML_free(me);
2.1       timbl     332: }
                    333: 
                    334: 
                    335: 
                    336: /*     Structured Object Class
                    337: **     -----------------------
                    338: */
2.5       timbl     339: PRIVATE CONST HTStructuredClass HTMLGeneration = /* As opposed to print etc */
2.1       timbl     340: {              
                    341:        "text/html",
                    342:        HTMLGen_free,
2.6       timbl     343:        HTMLGen_abort,
2.1       timbl     344:        HTMLGen_put_character,  HTMLGen_put_string, HTMLGen_write,
2.13      frystyk   345:        HTMLGen_start_element,  HTMLGen_end_element,
2.1       timbl     346:        HTMLGen_put_entity
                    347: }; 
                    348: 
                    349: 
                    350: /*     Subclass-specific Methods
                    351: **     -------------------------
                    352: */
                    353: 
                    354: PUBLIC HTStructured * HTMLGenerator ARGS1(HTStream *, output)
                    355: {
2.18      luotonen  356:     HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3       timbl     357:     if (me == NULL) outofmem(__FILE__, "HTMLGenerator");
                    358:     me->isa = &HTMLGeneration;       
2.12      timbl     359:     me->dtd = &HTMLP_dtd;
2.1       timbl     360: 
2.3       timbl     361:     me->target = output;
                    362:     me->targetClass = *me->target->isa; /* Copy pointers to routines for speed*/
2.7       timbl     363:     
                    364:     me->write_pointer = me->buffer;
                    365:     me->line_break =   me->buffer;
2.3       timbl     366:     return me;
2.1       timbl     367: }
                    368: 
                    369: /*     Stream Object Class
                    370: **     -------------------
                    371: **
2.2       timbl     372: **     This object just converts a plain text stream into HTML
2.12      timbl     373: **     It is officially a structured stream but only the stream bits exist.
2.2       timbl     374: **     This is just the easiest way of typecasting all the routines.
2.1       timbl     375: */
2.2       timbl     376: PRIVATE CONST HTStructuredClass PlainToHTMLConversion =
2.1       timbl     377: {              
                    378:        "plaintexttoHTML",
2.13      frystyk   379:        PlainToHTML_free,       /* HTMLGen_free,  Henrik 03/03-94 */
2.6       timbl     380:        PlainToHTML_abort,      
2.1       timbl     381:        HTMLGen_put_character,
                    382:        HTMLGen_put_string,
                    383:        HTMLGen_write,
2.2       timbl     384:        NULL,           /* Structured stuff */
                    385:        NULL,
                    386:        NULL
2.1       timbl     387: }; 
                    388: 
                    389: 
                    390: /*     HTConverter from plain text to HTML Stream
                    391: **     ------------------------------------------
2.13      frystyk   392: **
                    393: ** Changed by henrik 03/03-94, so no more core dumps etc. (I hope!!!)
2.1       timbl     394: */
                    395: 
2.12      timbl     396: PUBLIC HTStream* HTPlainToHTML ARGS5(
                    397:        HTRequest *,            request,
                    398:        void *,                 param,
                    399:        HTFormat,               input_format,
                    400:        HTFormat,               output_format,
                    401:        HTStream *,             output_stream)
2.1       timbl     402: {
2.13      frystyk   403:     BOOL present[MAX_ATTRIBUTES];      /* Flags: attribute is present? */
                    404:     CONST char *value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
2.18      luotonen  405:     HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3       timbl     406:     if (me == NULL) outofmem(__FILE__, "PlainToHTML");
2.13      frystyk   407:     
                    408:     memset(present, '\0', MAX_ATTRIBUTES);
                    409:     memset(value, '\0', MAX_ATTRIBUTES*sizeof(char *));
                    410:     
                    411:     me->isa = (HTStructuredClass*) &PlainToHTMLConversion;
2.12      timbl     412:     me->dtd = &HTMLP_dtd;
                    413:     me->target = output_stream;
2.13      frystyk   414:     me->targetClass = *me->target->isa;/* Copy pointers to routines for speed*/
                    415:     me->write_pointer = me->buffer;
                    416:     me->line_break =   me->buffer;
                    417:     
                    418:     HTMLGen_start_element(me, HTML_HTML, present, value);
                    419:     HTMLGen_start_element(me, HTML_BODY, present, value);
                    420:     HTMLGen_start_element(me, HTML_PRE, present, value);
                    421: 
2.7       timbl     422:     return (HTStream*) me;
2.1       timbl     423: }
2.13      frystyk   424: 
                    425: 
2.17      timbl     426: /*     A safe version for making 7-bit restricted HTML
                    427: **     Beware that thsi makes it horrible for the Scandinavians
                    428: **     to actually read it.
                    429: */
                    430: 
                    431: PUBLIC HTStream* HTPlainTo7BitHTML ARGS5(
                    432:        HTRequest *,            request,
                    433:        void *,                 param,
                    434:        HTFormat,               input_format,
                    435:        HTFormat,               output_format,
                    436:        HTStream *,             output_stream)
2.13      frystyk   437: 
2.17      timbl     438: {
                    439:     HTStream* me = HTPlainToHTML(request,param,input_format,
                    440:                output_format, output_stream);
                    441:     ((HTStructured*)me)->seven_bit = YES;
                    442:     return me;
                    443: }
2.1       timbl     444: 

Webmaster