Annotation of libwww/Library/src/HTMLGen.c, revision 2.31
2.25 frystyk 1: /* HTMLGen.c
2: ** HTML GENERATOR
3: **
2.29 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.25 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This version of the HTML object sends HTML markup to the output stream.
8: **
9: ** Bugs: Line wrapping is not done at all.
10: ** All data handled as PCDATA.
11: ** Should convert old XMP, LISTING and PLAINTEXT to PRE.
12: **
13: ** It is not obvious to me right now whether the HEAD should be generated
2.7 timbl 14: ** from the incomming data or the anchor. Currently it is from the former
2.17 timbl 15: ** which is cleanest. TBL
2.22 duns 16: **
17: ** HISTORY:
18: ** 8 Jul 94 FM Insulate free() from _free structure element.
19: **
2.1 timbl 20: */
21:
2.27 frystyk 22: /* Library include files */
23: #include "tcp.h"
24: #include "HTUtils.h"
2.12 timbl 25: #include "HTMLPDTD.h"
2.31 ! frystyk 26: #include "HTStruct.h"
2.1 timbl 27: #include "HTFormat.h"
2.31 ! frystyk 28: #include "HTFWrite.h"
2.23 frystyk 29: #include "HTMLGen.h" /* Implemented here */
30:
31: #define BUFFER_SIZE 80 /* Line buffer attempts to make neat breaks */
2.31 ! frystyk 32: #define MAX_CLEANNESS 10
2.1 timbl 33:
2.31 ! frystyk 34: #define PUT_CHAR(c) (*me->target->isa->put_character)(me->target, c)
! 35: #define PUT_STR(s) (*me->target->isa->put_string)(me->target, s)
! 36: #define PUT_BLOCK(s,l) (*me->target->isa->put_block)(me->target, s, l)
2.1 timbl 37:
2.31 ! frystyk 38: /* HTML Generator Object */
2.1 timbl 39: struct _HTStream {
2.31 ! frystyk 40: CONST HTStreamClass * isa;
! 41: HTStream * target;
2.1 timbl 42: };
43:
44: struct _HTStructured {
2.31 ! frystyk 45: CONST HTStructuredClass * isa;
! 46: HTStream * target;
! 47: CONST SGML_dtd * dtd;
! 48: BOOL seven_bit; /* restrict output */
2.7 timbl 49:
2.31 ! frystyk 50: char buffer[BUFFER_SIZE+1];
! 51: char * write_pointer;
! 52: char * line_break [MAX_CLEANNESS+1];
! 53: int cleanness;
! 54: BOOL overflowed;
! 55: BOOL delete_line_break_char[MAX_CLEANNESS+1];
! 56: char preformatted;
2.1 timbl 57: };
58:
2.17 timbl 59: /* OUTPUT FUNCTIONS
60: **
61: ** These function output the finished SGML stream doing the
62: ** line wrap
63: */
64:
2.7 timbl 65: /* Flush Buffer
66: ** ------------
67: */
2.21 timbl 68:
69: PRIVATE void flush_breaks ARGS1(HTStructured *, me)
70: {
71: int i;
72: for (i=0; i<= MAX_CLEANNESS; i++) {
73: me->line_break[i] = NULL;
74: }
75: }
76:
77:
2.28 frystyk 78: PRIVATE int HTMLGen_flush ARGS1(HTStructured *, me)
2.7 timbl 79: {
2.31 ! frystyk 80: PUT_BLOCK(me->buffer, me->write_pointer - me->buffer);
2.7 timbl 81: me->write_pointer = me->buffer;
2.21 timbl 82: flush_breaks(me);
2.7 timbl 83: me->cleanness = 0;
2.28 frystyk 84: return HT_OK;
2.21 timbl 85: }
86:
87:
88: /* Weighted optional line break
89: **
90: ** We keep track of all the breaks for when we chop the line
91: */
92:
93: PRIVATE void allow_break ARGS3(HTStructured *, me, int, new_cleanness,
94: BOOL, dlbc)
95: {
96: me->line_break[new_cleanness] =
97: dlbc ? me->write_pointer - 1 /* Point to space */
98: : me->write_pointer ; /* point to gap */
99: me->delete_line_break_char[new_cleanness] = dlbc;
100: if (new_cleanness >= me->cleanness)
101: me->cleanness = new_cleanness;
2.7 timbl 102: }
103:
104:
2.1 timbl 105: /* Character handling
106: ** ------------------
2.8 timbl 107: **
108: ** The tricky bits are the line break handling. This attempts
109: ** to synchrononise line breaks on sentence or phrase ends. This
110: ** is important if one stores SGML files in a line-oriented code
111: ** repository, so that if a small change is made, line ends don't
112: ** shift in a ripple-through to apparently change a large part of the
113: ** file. We give extra "cleanness" to spaces appearing directly
114: ** after periods (full stops), [semi]colons and commas.
115: ** This should make the source files easier to read and modify
2.17 timbl 116: ** by hand, too, though this is not a primary design consideration. TBL
2.1 timbl 117: */
2.21 timbl 118: PRIVATE char delims[] = ",;:."; /* @@ english bias */
2.28 frystyk 119: PRIVATE int HTMLGen_output_character ARGS2(HTStructured *, me, char, c)
2.1 timbl 120: {
2.7 timbl 121:
122: *me->write_pointer++ = c;
123:
2.21 timbl 124: if (c=='\n') { /* Newlines */
125: if (me->preformatted) {
126: HTMLGen_flush(me);
2.28 frystyk 127: return HT_OK;
2.21 timbl 128: } else {
129: me->write_pointer[-1] = c = ' '; /* Treat same as space */
130: }
2.7 timbl 131: }
132:
2.21 timbl 133: /* Figure our whether we can break at this point
134: */
2.7 timbl 135: if ((!me->preformatted && c==' ')) {
2.8 timbl 136: int new_cleanness = 1;
137: if (me->write_pointer > (me->buffer + 1)) {
2.9 luotonen 138: char * p;
2.11 timbl 139: p = strchr(delims, me->write_pointer[-2]);
2.21 timbl 140: if (p) new_cleanness = p - delims + 4;
2.8 timbl 141: }
2.21 timbl 142: allow_break(me, new_cleanness, YES);
2.7 timbl 143: }
144:
2.21 timbl 145: /* Flush buffer out when full, or whenever the line is over
146: the nominal maximum and we can break at all
147: */
148: if (me->write_pointer >= me->buffer + BUFFER_SIZE-1
149: || (me->overflowed && me->cleanness)) {
150: if (me->cleanness) {
151: char line_break_char = me->line_break[me->cleanness][0];
152: char * saved = me->line_break[me->cleanness];
2.8 timbl 153:
2.21 timbl 154: if (me->delete_line_break_char[me->cleanness]) saved++;
155: me->line_break[me->cleanness][0] = '\n';
2.31 ! frystyk 156: PUT_BLOCK(me->buffer, me->line_break[me->cleanness]-me->buffer+1);
2.21 timbl 157: me->line_break[me->cleanness][0] = line_break_char;
2.7 timbl 158: { /* move next line in */
2.8 timbl 159: char * p=saved;
160: char *q;
161: for(q=me->buffer; p < me->write_pointer; )
2.7 timbl 162: *q++ = *p++;
163: }
164: me->cleanness = 0;
2.21 timbl 165: /* Now we have to check whether ther are any perfectly good breaks
166: ** which weren't good enough for the last line but may be
167: ** good enough for the next
168: */
169: {
170: int i;
171: for(i=0; i <= MAX_CLEANNESS; i++) {
172: if (me->line_break[i] > saved) {
173: me->line_break[i] = me->line_break[i] -
174: (saved-me->buffer);
175: me->cleanness = i;
176: } else {
177: me->line_break[i] = NULL;
178: }
179: }
180: }
181:
2.8 timbl 182: me->write_pointer = me->write_pointer - (saved-me->buffer);
2.21 timbl 183: me->overflowed = NO;
184: } else { /* No break- just output with no newline */
2.31 ! frystyk 185: PUT_BLOCK(me->buffer, me->write_pointer - me->buffer);
2.8 timbl 186: me->write_pointer = me->buffer;
2.21 timbl 187: flush_breaks(me);
188: me->overflowed = YES;
2.7 timbl 189: }
190: }
2.28 frystyk 191: return HT_OK;
2.1 timbl 192: }
193:
194:
195: /* String handling
196: ** ---------------
197: */
2.28 frystyk 198: PRIVATE int HTMLGen_output_string ARGS2(HTStructured *, me, CONST char*, s)
2.17 timbl 199: {
2.28 frystyk 200: while (*s)
201: HTMLGen_output_character(me, *s++);
202: return HT_OK;
2.17 timbl 203: }
204:
205:
206: /* INPUT FUNCTIONS
207: **
208: ** These take data from the structured stream. In the input
209: ** stream, entities are in raw form. The seven_bit flag controls
210: ** whether the ISO Latin-1 charactrs are represented in SGML entity
211: ** form. This is only recommended for viewing on older non-latin-1
212: ** capable equipment, or for mailing for example.
213: **
214: ** Bug: assumes local encoding is ISO!
215: */
2.28 frystyk 216: PRIVATE int HTMLGen_put_character ARGS2(HTStructured *, me, char, c)
2.17 timbl 217: {
218: if (c=='&') HTMLGen_output_string(me, "&");
219: else if (c=='<') HTMLGen_output_string(me, "<");
220: else if (me->seven_bit && ((unsigned char)c > 127)) {
221: char temp[8];
222: sprintf(temp, "&%d;", c);
223: HTMLGen_output_string(me, temp);
2.28 frystyk 224: } else
225: HTMLGen_output_character(me, c);
226: return HT_OK;
2.17 timbl 227: }
228:
2.28 frystyk 229: PRIVATE int HTMLGen_put_string ARGS2(HTStructured *, me, CONST char*, s)
2.1 timbl 230: {
2.28 frystyk 231: while (*s)
232: HTMLGen_put_character(me, *s++);
233: return HT_OK;
2.1 timbl 234: }
235:
2.28 frystyk 236: PRIVATE int HTMLGen_write ARGS3(HTStructured *, me, CONST char*, b, int, l)
2.1 timbl 237: {
2.28 frystyk 238: while (l-- > 0)
239: HTMLGen_put_character(me, *b++);
240: return HT_OK;
2.1 timbl 241: }
242:
243:
244: /* Start Element
245: ** -------------
2.7 timbl 246: **
247: ** Within the opening tag, there may be spaces
248: ** and the line may be broken at these spaces.
2.1 timbl 249: */
250: PRIVATE void HTMLGen_start_element ARGS4(
2.3 timbl 251: HTStructured *, me,
2.2 timbl 252: int, element_number,
253: CONST BOOL*, present,
254: CONST char **, value)
2.1 timbl 255: {
256: int i;
2.12 timbl 257: HTTag * tag = &me->dtd->tags[element_number];
2.1 timbl 258:
2.20 timbl 259: /* Control line breaks allowed within tag! */
260: int was_preformatted = me->preformatted; /* save state */
261: me->preformatted = 1; /* Can break between attributes */
262:
2.17 timbl 263: HTMLGen_output_character(me, '<');
264: HTMLGen_output_string(me, tag->name);
2.1 timbl 265: if (present) for (i=0; i< tag->number_of_attributes; i++) {
266: if (present[i]) {
2.17 timbl 267: HTMLGen_output_character(me, ' ');
2.21 timbl 268: allow_break(me, 1, YES);
2.17 timbl 269: HTMLGen_output_string(me, tag->attributes[i].name);
2.1 timbl 270: if (value[i]) {
2.17 timbl 271: HTMLGen_output_string(me, "=\"");
272: HTMLGen_output_string(me, value[i]);
273: HTMLGen_output_character(me, '"');
2.1 timbl 274: }
275: }
276: }
2.20 timbl 277: me->preformatted = was_preformatted; /* Restore state */
278:
2.14 frystyk 279: /* Nested PRE is no more a problem! */
280: if (element_number == HTML_PRE)
281: me->preformatted++;
2.19 timbl 282:
283: HTMLGen_output_character(me, '>');
2.7 timbl 284:
2.20 timbl 285: /* Here is a funny one. In PRE, newlines are significant, except of
286: course for one after the <PRE> which is ignored. This means that
287: we MUST put in a dummy one after the <PRE> to protect any real newline
288: within the pre section.
289:
290: However, *within* a PRE section, although we can break after
291: (for example) emphasis start tags, it will probably confuse some
292: parsers so we won't.*/
293:
294: if (element_number == HTML_PRE) {
295: HTMLGen_output_character(me, '\n');
296: } else if (!me->preformatted &&
297: tag->contents != SGML_EMPTY) { /* can break after element start */
2.21 timbl 298: allow_break(me, 3, NO);
2.8 timbl 299: }
2.1 timbl 300: }
301:
302:
2.17 timbl 303: /* End Element
304: ** -----------
2.1 timbl 305: **
2.16 timbl 306: ** The rules for insertring CR LF into SGML are weird, strict, and
307: ** nonintitive.
2.20 timbl 308: ** See comment also about PRE above.
2.1 timbl 309: */
2.3 timbl 310: PRIVATE void HTMLGen_end_element ARGS2(HTStructured *, me,
2.24 frystyk 311: int , element_number)
2.1 timbl 312: {
2.20 timbl 313: if (element_number == HTML_PRE) {
314: HTMLGen_output_character(me, '\n');
315: } else if (!me->preformatted) { /* can break before element end */
2.21 timbl 316: allow_break(me, 1, NO);
2.8 timbl 317: }
2.17 timbl 318: HTMLGen_output_string(me, "</");
319: HTMLGen_output_string(me, me->dtd->tags[element_number].name);
320: HTMLGen_output_character(me, '>'); /* NO break after. TBL 940501 */
2.14 frystyk 321: if (element_number == HTML_PRE && me->preformatted)
322: me->preformatted--;
2.1 timbl 323: }
324:
325:
2.17 timbl 326: /* Expanding entities
327: ** ------------------
2.1 timbl 328: **
329: */
330:
2.3 timbl 331: PRIVATE void HTMLGen_put_entity ARGS2(HTStructured *, me, int, entity_number)
2.1 timbl 332: {
2.17 timbl 333: HTMLGen_output_character(me, '&');
334: HTMLGen_output_string(me, me->dtd->entity_names[entity_number]);
335: HTMLGen_output_character(me, ';');
2.1 timbl 336: }
337:
2.17 timbl 338: /* Free an object
339: ** --------------
2.1 timbl 340: **
341: */
2.24 frystyk 342: PRIVATE int HTMLGen_free ARGS1(HTStructured *, me)
2.1 timbl 343: {
2.21 timbl 344: HTMLGen_flush(me);
2.31 ! frystyk 345: PUT_CHAR('\n');
! 346: (*me->target->isa->_free)(me->target);
2.3 timbl 347: free(me);
2.28 frystyk 348: return HT_OK;
2.1 timbl 349: }
350:
351:
2.24 frystyk 352: PRIVATE int PlainToHTML_free ARGS1(HTStructured *, me)
2.7 timbl 353: {
354: HTMLGen_end_element(me, HTML_PRE);
355: HTMLGen_end_element(me, HTML_BODY);
356: HTMLGen_end_element(me, HTML_HTML);
357: HTMLGen_free(me);
2.28 frystyk 358: return HT_OK;
2.7 timbl 359: }
360:
361:
2.1 timbl 362:
2.24 frystyk 363: PRIVATE int HTMLGen_abort ARGS2(HTStructured *, me, HTError, e)
2.1 timbl 364: {
2.6 timbl 365: HTMLGen_free(me);
2.28 frystyk 366: return HT_ERROR;
2.1 timbl 367: }
368:
369:
2.24 frystyk 370: PRIVATE int PlainToHTML_abort ARGS2(HTStructured *, me, HTError, e)
2.1 timbl 371: {
2.7 timbl 372: PlainToHTML_free(me);
2.28 frystyk 373: return HT_ERROR;
2.1 timbl 374: }
375:
376:
377:
378: /* Structured Object Class
379: ** -----------------------
380: */
2.5 timbl 381: PRIVATE CONST HTStructuredClass HTMLGeneration = /* As opposed to print etc */
2.1 timbl 382: {
383: "text/html",
2.28 frystyk 384: HTMLGen_flush,
2.1 timbl 385: HTMLGen_free,
2.6 timbl 386: HTMLGen_abort,
2.1 timbl 387: HTMLGen_put_character, HTMLGen_put_string, HTMLGen_write,
2.13 frystyk 388: HTMLGen_start_element, HTMLGen_end_element,
2.1 timbl 389: HTMLGen_put_entity
390: };
391:
392:
393: /* Subclass-specific Methods
394: ** -------------------------
395: */
2.31 ! frystyk 396: PUBLIC HTStructured* HTMLGenerator ARGS5(HTRequest *, request,
! 397: void *, param,
! 398: HTFormat, input_format,
! 399: HTFormat, output_format,
! 400: HTStream *, output_stream)
2.1 timbl 401: {
2.31 ! frystyk 402: HTStructured* me = (HTStructured*)calloc(1, sizeof(*me));
2.3 timbl 403: if (me == NULL) outofmem(__FILE__, "HTMLGenerator");
404: me->isa = &HTMLGeneration;
2.12 timbl 405: me->dtd = &HTMLP_dtd;
2.31 ! frystyk 406: if ((me->target = HTStreamStack(WWW_HTML, output_format, output_stream,
! 407: request, YES)) == NULL) {
! 408: if (STREAM_TRACE)
! 409: fprintf(TDEST, "HTMLGen..... Can't convert to media type\n");
! 410: me->target = HTBlackHole();
! 411: }
2.7 timbl 412: me->write_pointer = me->buffer;
2.21 timbl 413: flush_breaks(me);
2.3 timbl 414: return me;
2.1 timbl 415: }
416:
417: /* Stream Object Class
418: ** -------------------
419: **
2.2 timbl 420: ** This object just converts a plain text stream into HTML
2.12 timbl 421: ** It is officially a structured stream but only the stream bits exist.
2.2 timbl 422: ** This is just the easiest way of typecasting all the routines.
2.1 timbl 423: */
2.2 timbl 424: PRIVATE CONST HTStructuredClass PlainToHTMLConversion =
2.1 timbl 425: {
426: "plaintexttoHTML",
2.28 frystyk 427: HTMLGen_flush,
2.13 frystyk 428: PlainToHTML_free, /* HTMLGen_free, Henrik 03/03-94 */
2.6 timbl 429: PlainToHTML_abort,
2.1 timbl 430: HTMLGen_put_character,
431: HTMLGen_put_string,
432: HTMLGen_write,
2.2 timbl 433: NULL, /* Structured stuff */
434: NULL,
435: NULL
2.1 timbl 436: };
437:
438:
439: /* HTConverter from plain text to HTML Stream
440: ** ------------------------------------------
2.13 frystyk 441: **
442: ** Changed by henrik 03/03-94, so no more core dumps etc. (I hope!!!)
2.1 timbl 443: */
444:
2.12 timbl 445: PUBLIC HTStream* HTPlainToHTML ARGS5(
446: HTRequest *, request,
447: void *, param,
448: HTFormat, input_format,
449: HTFormat, output_format,
450: HTStream *, output_stream)
2.1 timbl 451: {
2.13 frystyk 452: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
453: CONST char *value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
2.18 luotonen 454: HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3 timbl 455: if (me == NULL) outofmem(__FILE__, "PlainToHTML");
2.13 frystyk 456:
457: memset(present, '\0', MAX_ATTRIBUTES);
2.27 frystyk 458: memset((char *) value, '\0', MAX_ATTRIBUTES*sizeof(char *));
2.13 frystyk 459:
460: me->isa = (HTStructuredClass*) &PlainToHTMLConversion;
2.12 timbl 461: me->dtd = &HTMLP_dtd;
462: me->target = output_stream;
2.13 frystyk 463: me->write_pointer = me->buffer;
2.21 timbl 464: flush_breaks(me);
2.13 frystyk 465:
466: HTMLGen_start_element(me, HTML_HTML, present, value);
467: HTMLGen_start_element(me, HTML_BODY, present, value);
468: HTMLGen_start_element(me, HTML_PRE, present, value);
469:
2.7 timbl 470: return (HTStream*) me;
2.1 timbl 471: }
2.13 frystyk 472:
473:
2.17 timbl 474: /* A safe version for making 7-bit restricted HTML
475: ** Beware that thsi makes it horrible for the Scandinavians
476: ** to actually read it.
2.30 frystyk 477: ** ehh - not horrible - THIS REALLY PISSES THEM OFF - Henrik ;-)
2.17 timbl 478: */
479:
480: PUBLIC HTStream* HTPlainTo7BitHTML ARGS5(
481: HTRequest *, request,
482: void *, param,
483: HTFormat, input_format,
484: HTFormat, output_format,
485: HTStream *, output_stream)
2.13 frystyk 486:
2.17 timbl 487: {
488: HTStream* me = HTPlainToHTML(request,param,input_format,
489: output_format, output_stream);
490: ((HTStructured*)me)->seven_bit = YES;
491: return me;
492: }
2.1 timbl 493:
Webmaster