Annotation of libwww/Library/src/HTMLGen.c, revision 2.23

2.1       timbl       1: /*             HTML Generator
                      2: **             ==============
                      3: **
                      4: **     This version of the HTML object sends HTML markup to the output stream.
                      5: **
                      6: ** Bugs:       Line wrapping is not done at all.
                      7: **             All data handled as PCDATA.
                      8: **             Should convert old XMP, LISTING and PLAINTEXT to PRE.
                      9: **
                     10: **     It is not obvious to me right now whether the HEAD should be generated
2.7       timbl      11: **     from the incomming data or the anchor.  Currently it is from the former
2.17      timbl      12: **     which is cleanest. TBL
2.22      duns       13: **
                     14: ** HISTORY:
                     15: **      8 Jul 94  FM   Insulate free() from _free structure element.
                     16: **
2.1       timbl      17: */
                     18: 
2.12      timbl      19: #include "HTMLPDTD.h"
2.1       timbl      20: #include "HTStream.h"
                     21: #include "SGML.h"
                     22: #include "HTFormat.h"
2.23    ! frystyk    23: #include "HTMLGen.h"                                    /* Implemented here */
        !            24: 
        !            25: #define BUFFER_SIZE    80      /* Line buffer attempts to make neat breaks */
2.1       timbl      26: 
2.3       timbl      27: #define PUTC(c) (*me->targetClass.put_character)(me->target, c)
2.7       timbl      28: /* #define PUTS(s) (*me->targetClass.put_string)(me->target, s) */
2.4       timbl      29: #define PUTB(s,l) (*me->targetClass.put_block)(me->target, s, l)
2.1       timbl      30: 
                     31: /*             HTML Object
                     32: **             -----------
                     33: */
                     34: 
                     35: struct _HTStream {
                     36:        CONST HTStreamClass *           isa;    
                     37:        HTStream *                      target;
                     38:        HTStreamClass                   targetClass;    /* COPY for speed */
                     39: };
                     40: 
2.21      timbl      41: #define MAX_CLEANNESS 10
2.1       timbl      42: struct _HTStructured {
                     43:        CONST HTStructuredClass *       isa;
                     44:        HTStream *                      target;
                     45:        HTStreamClass                   targetClass;    /* COPY for speed */
2.12      timbl      46:        CONST SGML_dtd *                dtd;
2.17      timbl      47:        BOOL                            seven_bit;      /* restrict output*/
2.7       timbl      48:        
2.14      frystyk    49:        char                            buffer[BUFFER_SIZE+1];
2.7       timbl      50:        char *                          write_pointer;
2.21      timbl      51:        char *                          line_break [MAX_CLEANNESS+1];
2.7       timbl      52:        int                             cleanness;
2.21      timbl      53:        BOOL                            overflowed;
                     54:        BOOL                            delete_line_break_char
                     55:                                                [MAX_CLEANNESS+1];
2.14      frystyk    56:        char                            preformatted;
2.1       timbl      57: };
                     58: 
2.17      timbl      59: /*                     OUTPUT FUNCTIONS
                     60: **
                     61: **     These function output the finished SGML stream doing the
                     62: **     line wrap
                     63: */
                     64: 
2.7       timbl      65: /*     Flush Buffer
                     66: **     ------------
                     67: */
2.21      timbl      68: 
                     69: PRIVATE void flush_breaks ARGS1(HTStructured *, me)
                     70: {
                     71:     int i;
                     72:     for (i=0; i<= MAX_CLEANNESS; i++) {
                     73:         me->line_break[i] = NULL;
                     74:     }
                     75: }
                     76: 
                     77: 
2.7       timbl      78: PRIVATE void HTMLGen_flush ARGS1(HTStructured *, me)
                     79: {
                     80:     (*me->targetClass.put_block)(me->target, 
                     81:                                me->buffer,
                     82:                                me->write_pointer - me->buffer);
                     83:     me->write_pointer = me->buffer;
2.21      timbl      84:     flush_breaks(me);
2.7       timbl      85:     me->cleanness = 0;
2.21      timbl      86: }
                     87: 
                     88: 
                     89: /*     Weighted optional line break
                     90: **
                     91: **     We keep track of all the breaks for when we chop the line
                     92: */
                     93: 
                     94: PRIVATE void allow_break ARGS3(HTStructured *, me, int, new_cleanness,
                     95:                BOOL, dlbc)
                     96: {
                     97:     me->line_break[new_cleanness] = 
                     98:                         dlbc ? me->write_pointer - 1 /* Point to space */
                     99:                              : me->write_pointer ;   /* point to gap */
                    100:     me->delete_line_break_char[new_cleanness] = dlbc;
                    101:     if (new_cleanness >= me->cleanness)
                    102:        me->cleanness = new_cleanness;
2.7       timbl     103: }
                    104: 
                    105: 
2.1       timbl     106: /*     Character handling
                    107: **     ------------------
2.8       timbl     108: **
                    109: **     The tricky bits are the line break handling.  This attempts
                    110: **     to synchrononise line breaks on sentence or phrase ends. This
                    111: **     is important if one stores SGML files in a line-oriented code
                    112: **     repository, so that if a small change is made, line ends don't
                    113: **     shift in a ripple-through to apparently change a large part of the
                    114: **     file. We give extra "cleanness" to spaces appearing directly
                    115: **     after periods (full stops), [semi]colons and commas.
                    116: **        This should make the source files easier to read and modify
2.17      timbl     117: **     by hand, too, though this is not a primary design consideration. TBL
2.1       timbl     118: */
2.21      timbl     119: PRIVATE char delims[] = ",;:.";                /* @@ english bias */
2.17      timbl     120: PRIVATE void HTMLGen_output_character ARGS2(HTStructured *, me, char, c)
2.1       timbl     121: {
2.7       timbl     122: 
                    123:     *me->write_pointer++ = c;
                    124:     
2.21      timbl     125:     if (c=='\n') {             /* Newlines */
                    126:         if (me->preformatted) {
                    127:            HTMLGen_flush(me);
                    128:            return;
                    129:        } else {
                    130:            me->write_pointer[-1] = c = ' ';    /* Treat same as space */
                    131:        }
2.7       timbl     132:     }
                    133:     
2.21      timbl     134:     /* Figure our whether we can break at this point
                    135:     */
2.7       timbl     136:     if ((!me->preformatted  && c==' ')) {
2.8       timbl     137:         int new_cleanness = 1;
                    138:        if (me->write_pointer > (me->buffer + 1)) {
2.9       luotonen  139:            char * p;
2.11      timbl     140:            p = strchr(delims, me->write_pointer[-2]);
2.21      timbl     141:            if (p) new_cleanness = p - delims + 4;
2.8       timbl     142:        }
2.21      timbl     143:        allow_break(me, new_cleanness, YES);
2.7       timbl     144:     }
                    145:     
2.21      timbl     146:     /* Flush buffer out when full, or whenever the line is over
                    147:        the nominal maximum and we can break at all
                    148:     */
                    149:     if (me->write_pointer >= me->buffer + BUFFER_SIZE-1
                    150:         ||  (me->overflowed && me->cleanness)) {
                    151:        if (me->cleanness) {
                    152:            char line_break_char = me->line_break[me->cleanness][0];
                    153:            char * saved = me->line_break[me->cleanness];
2.8       timbl     154:            
2.21      timbl     155:            if (me->delete_line_break_char[me->cleanness]) saved++; 
                    156:            me->line_break[me->cleanness][0] = '\n';
2.7       timbl     157:            (*me->targetClass.put_block)(me->target,
                    158:                                        me->buffer,
2.21      timbl     159:                                        me->line_break[me->cleanness] - me->buffer + 1);
                    160:            me->line_break[me->cleanness][0] = line_break_char;
2.7       timbl     161:            {  /* move next line in */
2.8       timbl     162:                char * p=saved;
                    163:                char *q;
                    164:                for(q=me->buffer; p < me->write_pointer; )
2.7       timbl     165:                        *q++ = *p++;
                    166:            }
                    167:            me->cleanness = 0;
2.21      timbl     168:            /* Now we have to check whether ther are any perfectly good breaks
                    169:            ** which weren't good enough for the last line but may be
                    170:            **  good enough for the next
                    171:            */
                    172:            {
                    173:                int i;
                    174:                for(i=0; i <= MAX_CLEANNESS; i++) {
                    175:                    if (me->line_break[i] > saved) {
                    176:                        me->line_break[i] = me->line_break[i] -
                    177:                                                (saved-me->buffer);
                    178:                        me->cleanness = i;
                    179:                    } else {
                    180:                        me->line_break[i] = NULL;
                    181:                    }
                    182:                }
                    183:            }
                    184: 
2.8       timbl     185:            me->write_pointer = me->write_pointer - (saved-me->buffer);
2.21      timbl     186:            me->overflowed = NO;
                    187:        } else {   /* No break- just output with no newline */
2.7       timbl     188:            (*me->targetClass.put_block)(me->target,
2.14      frystyk   189:                                         me->buffer,
2.15      luotonen  190:                                         me->write_pointer - me->buffer);
2.8       timbl     191:            me->write_pointer = me->buffer;
2.21      timbl     192:            flush_breaks(me);
                    193:            me->overflowed = YES;
2.7       timbl     194:        }
                    195:     }
2.1       timbl     196: }
                    197: 
                    198: 
                    199: 
                    200: /*     String handling
                    201: **     ---------------
                    202: */
2.17      timbl     203: PRIVATE void HTMLGen_output_string ARGS2(HTStructured *, me, CONST char*, s)
                    204: {
                    205:     CONST char * p;
                    206:     for(p=s; *p; p++) HTMLGen_output_character(me, *p);
                    207: }
                    208: 
                    209: 
                    210: 
                    211: 
                    212: /*                     INPUT FUNCTIONS
                    213: **
                    214: **     These take data from the structured stream.  In the input
                    215: **     stream, entities are in raw form.  The seven_bit flag controls
                    216: **     whether the ISO Latin-1 charactrs are represented in SGML entity
                    217: **     form.  This is only recommended for viewing on older non-latin-1
                    218: **     capable equipment, or for mailing for example. 
                    219: **
                    220: ** Bug: assumes local encoding is ISO!
                    221: */     
                    222: PRIVATE void HTMLGen_put_character ARGS2(HTStructured *, me, char, c)
                    223: {
                    224:     if (c=='&') HTMLGen_output_string(me, "&amp;");
                    225:     else if (c=='<') HTMLGen_output_string(me, "&lt;");
                    226:     else if (me->seven_bit && ((unsigned char)c > 127)) {
                    227:         char temp[8];
                    228:        sprintf(temp, "&%d;", c);
                    229:        HTMLGen_output_string(me, temp);
                    230:     }
                    231:     else HTMLGen_output_character(me, c);
                    232: }
                    233: 
2.3       timbl     234: PRIVATE void HTMLGen_put_string ARGS2(HTStructured *, me, CONST char*, s)
2.1       timbl     235: {
2.7       timbl     236:     CONST char * p;
                    237:     for(p=s; *p; p++) HTMLGen_put_character(me, *p);
2.1       timbl     238: }
                    239: 
2.3       timbl     240: PRIVATE void HTMLGen_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
2.1       timbl     241: {
2.7       timbl     242:     CONST char * p;
                    243:     for(p=s; p<s+l; p++) HTMLGen_put_character(me, *p);
2.1       timbl     244: }
                    245: 
                    246: 
                    247: /*     Start Element
                    248: **     -------------
2.7       timbl     249: **
                    250: **     Within the opening tag, there may be spaces
                    251: **     and the line may be broken at these spaces.
2.1       timbl     252: */
                    253: PRIVATE void HTMLGen_start_element ARGS4(
2.3       timbl     254:        HTStructured *,         me,
2.2       timbl     255:        int,                    element_number,
                    256:        CONST BOOL*,            present,
                    257:        CONST char **,          value)
2.1       timbl     258: {
                    259:     int i;
2.12      timbl     260:     HTTag * tag = &me->dtd->tags[element_number];
2.1       timbl     261: 
2.20      timbl     262:     /* Control line breaks allowed within tag! */
                    263:     int was_preformatted = me->preformatted;   /* save state */
                    264:     me->preformatted = 1;      /* Can break between attributes */
                    265: 
2.17      timbl     266:     HTMLGen_output_character(me, '<');
                    267:     HTMLGen_output_string(me, tag->name);
2.1       timbl     268:     if (present) for (i=0; i< tag->number_of_attributes; i++) {
                    269:         if (present[i]) {
2.17      timbl     270:            HTMLGen_output_character(me, ' ');
2.21      timbl     271:            allow_break(me, 1, YES);
2.17      timbl     272:            HTMLGen_output_string(me, tag->attributes[i].name);
2.1       timbl     273:            if (value[i]) {
2.17      timbl     274:                HTMLGen_output_string(me, "=\"");
                    275:                HTMLGen_output_string(me, value[i]);
                    276:                HTMLGen_output_character(me, '"');
2.1       timbl     277:            }
                    278:        }
                    279:     }
2.20      timbl     280:     me->preformatted = was_preformatted;       /* Restore state */
                    281: 
2.14      frystyk   282:     /* Nested PRE is no more a problem! */
                    283:     if (element_number == HTML_PRE)
                    284:        me->preformatted++;
2.19      timbl     285: 
                    286:     HTMLGen_output_character(me, '>');
2.7       timbl     287:     
2.20      timbl     288:     /* Here is a funny one.  In PRE, newlines are significant, except of
                    289:     course for one after the <PRE> which is ignored. This means that
                    290:     we MUST put in a dummy one after the <PRE> to protect any real newline
                    291:     within the pre section.
                    292:     
                    293:     However, *within* a PRE section, although we can break after
                    294:     (for example) emphasis start tags, it will probably confuse some
                    295:     parsers so we won't.*/
                    296:     
                    297:     if (element_number == HTML_PRE) {
                    298:         HTMLGen_output_character(me, '\n');
                    299:     } else  if (!me->preformatted && 
                    300:         tag->contents != SGML_EMPTY) {  /* can break after element start */ 
2.21      timbl     301:        allow_break(me, 3, NO);
2.8       timbl     302:     }
2.1       timbl     303: }
                    304: 
                    305: 
2.17      timbl     306: /*     End Element
                    307: **     -----------
2.1       timbl     308: **
2.16      timbl     309: **      The rules for insertring CR LF into SGML are weird, strict, and
                    310: **     nonintitive.
2.20      timbl     311: **     See comment also about PRE above.
2.1       timbl     312: */
2.3       timbl     313: PRIVATE void HTMLGen_end_element ARGS2(HTStructured *, me,
2.1       timbl     314:                        int , element_number)
                    315: {
2.20      timbl     316:     if (element_number == HTML_PRE) {
                    317:         HTMLGen_output_character(me, '\n');
                    318:     } else  if (!me->preformatted) { /* can break before element end */ 
2.21      timbl     319:        allow_break(me, 1, NO);
2.8       timbl     320:     }
2.17      timbl     321:     HTMLGen_output_string(me, "</");
                    322:     HTMLGen_output_string(me, me->dtd->tags[element_number].name);
                    323:     HTMLGen_output_character(me, '>');    /* NO break after. TBL 940501 */
2.14      frystyk   324:     if (element_number == HTML_PRE && me->preformatted)
                    325:        me->preformatted--;
2.1       timbl     326: }
                    327: 
                    328: 
2.17      timbl     329: /*     Expanding entities
                    330: **     ------------------
2.1       timbl     331: **
                    332: */
                    333: 
2.3       timbl     334: PRIVATE void HTMLGen_put_entity ARGS2(HTStructured *, me, int, entity_number)
2.1       timbl     335: {
2.17      timbl     336:     HTMLGen_output_character(me, '&');
                    337:     HTMLGen_output_string(me, me->dtd->entity_names[entity_number]);
                    338:     HTMLGen_output_character(me, ';');
2.1       timbl     339: }
                    340: 
                    341: 
                    342: 
2.17      timbl     343: /*     Free an object
                    344: **     --------------
2.1       timbl     345: **
                    346: */
2.3       timbl     347: PRIVATE void HTMLGen_free ARGS1(HTStructured *, me)
2.1       timbl     348: {
2.21      timbl     349:     HTMLGen_flush(me);
2.7       timbl     350:     (*me->targetClass.put_character)(me->target, '\n');
2.22      duns      351:     (*me->targetClass._free)(me->target);      /* ripple through */
2.3       timbl     352:     free(me);
2.1       timbl     353: }
                    354: 
                    355: 
2.7       timbl     356: PRIVATE void PlainToHTML_free ARGS1(HTStructured *, me)
                    357: {
                    358:     HTMLGen_end_element(me, HTML_PRE);
                    359:     HTMLGen_end_element(me, HTML_BODY);
                    360:     HTMLGen_end_element(me, HTML_HTML);
                    361:     HTMLGen_free(me);
                    362: }
                    363: 
                    364: 
2.1       timbl     365: 
2.6       timbl     366: PRIVATE void HTMLGen_abort ARGS2(HTStructured *, me, HTError, e)
2.1       timbl     367: {
2.6       timbl     368:     HTMLGen_free(me);
2.1       timbl     369: }
                    370: 
                    371: 
2.6       timbl     372: PRIVATE void PlainToHTML_abort ARGS2(HTStructured *, me, HTError, e)
2.1       timbl     373: {
2.7       timbl     374:     PlainToHTML_free(me);
2.1       timbl     375: }
                    376: 
                    377: 
                    378: 
                    379: /*     Structured Object Class
                    380: **     -----------------------
                    381: */
2.5       timbl     382: PRIVATE CONST HTStructuredClass HTMLGeneration = /* As opposed to print etc */
2.1       timbl     383: {              
                    384:        "text/html",
                    385:        HTMLGen_free,
2.6       timbl     386:        HTMLGen_abort,
2.1       timbl     387:        HTMLGen_put_character,  HTMLGen_put_string, HTMLGen_write,
2.13      frystyk   388:        HTMLGen_start_element,  HTMLGen_end_element,
2.1       timbl     389:        HTMLGen_put_entity
                    390: }; 
                    391: 
                    392: 
                    393: /*     Subclass-specific Methods
                    394: **     -------------------------
                    395: */
                    396: 
                    397: PUBLIC HTStructured * HTMLGenerator ARGS1(HTStream *, output)
                    398: {
2.18      luotonen  399:     HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3       timbl     400:     if (me == NULL) outofmem(__FILE__, "HTMLGenerator");
                    401:     me->isa = &HTMLGeneration;       
2.12      timbl     402:     me->dtd = &HTMLP_dtd;
2.1       timbl     403: 
2.3       timbl     404:     me->target = output;
                    405:     me->targetClass = *me->target->isa; /* Copy pointers to routines for speed*/
2.7       timbl     406:     
                    407:     me->write_pointer = me->buffer;
2.21      timbl     408:     flush_breaks(me);
2.3       timbl     409:     return me;
2.1       timbl     410: }
                    411: 
                    412: /*     Stream Object Class
                    413: **     -------------------
                    414: **
2.2       timbl     415: **     This object just converts a plain text stream into HTML
2.12      timbl     416: **     It is officially a structured stream but only the stream bits exist.
2.2       timbl     417: **     This is just the easiest way of typecasting all the routines.
2.1       timbl     418: */
2.2       timbl     419: PRIVATE CONST HTStructuredClass PlainToHTMLConversion =
2.1       timbl     420: {              
                    421:        "plaintexttoHTML",
2.13      frystyk   422:        PlainToHTML_free,       /* HTMLGen_free,  Henrik 03/03-94 */
2.6       timbl     423:        PlainToHTML_abort,      
2.1       timbl     424:        HTMLGen_put_character,
                    425:        HTMLGen_put_string,
                    426:        HTMLGen_write,
2.2       timbl     427:        NULL,           /* Structured stuff */
                    428:        NULL,
                    429:        NULL
2.1       timbl     430: }; 
                    431: 
                    432: 
                    433: /*     HTConverter from plain text to HTML Stream
                    434: **     ------------------------------------------
2.13      frystyk   435: **
                    436: ** Changed by henrik 03/03-94, so no more core dumps etc. (I hope!!!)
2.1       timbl     437: */
                    438: 
2.12      timbl     439: PUBLIC HTStream* HTPlainToHTML ARGS5(
                    440:        HTRequest *,            request,
                    441:        void *,                 param,
                    442:        HTFormat,               input_format,
                    443:        HTFormat,               output_format,
                    444:        HTStream *,             output_stream)
2.1       timbl     445: {
2.13      frystyk   446:     BOOL present[MAX_ATTRIBUTES];      /* Flags: attribute is present? */
                    447:     CONST char *value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
2.18      luotonen  448:     HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3       timbl     449:     if (me == NULL) outofmem(__FILE__, "PlainToHTML");
2.13      frystyk   450:     
                    451:     memset(present, '\0', MAX_ATTRIBUTES);
                    452:     memset(value, '\0', MAX_ATTRIBUTES*sizeof(char *));
                    453:     
                    454:     me->isa = (HTStructuredClass*) &PlainToHTMLConversion;
2.12      timbl     455:     me->dtd = &HTMLP_dtd;
                    456:     me->target = output_stream;
2.13      frystyk   457:     me->targetClass = *me->target->isa;/* Copy pointers to routines for speed*/
                    458:     me->write_pointer = me->buffer;
2.21      timbl     459:     flush_breaks(me);
2.13      frystyk   460:     
                    461:     HTMLGen_start_element(me, HTML_HTML, present, value);
                    462:     HTMLGen_start_element(me, HTML_BODY, present, value);
                    463:     HTMLGen_start_element(me, HTML_PRE, present, value);
                    464: 
2.7       timbl     465:     return (HTStream*) me;
2.1       timbl     466: }
2.13      frystyk   467: 
                    468: 
2.17      timbl     469: /*     A safe version for making 7-bit restricted HTML
                    470: **     Beware that thsi makes it horrible for the Scandinavians
                    471: **     to actually read it.
                    472: */
                    473: 
                    474: PUBLIC HTStream* HTPlainTo7BitHTML ARGS5(
                    475:        HTRequest *,            request,
                    476:        void *,                 param,
                    477:        HTFormat,               input_format,
                    478:        HTFormat,               output_format,
                    479:        HTStream *,             output_stream)
2.13      frystyk   480: 
2.17      timbl     481: {
                    482:     HTStream* me = HTPlainToHTML(request,param,input_format,
                    483:                output_format, output_stream);
                    484:     ((HTStructured*)me)->seven_bit = YES;
                    485:     return me;
                    486: }
2.1       timbl     487: 

Webmaster