Annotation of libwww/Library/src/HTMLGen.c, revision 2.43
2.25 frystyk 1: /* HTMLGen.c
2: ** HTML GENERATOR
3: **
2.29 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.25 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.43 ! frystyk 6: ** @(#) $Id: HTMLGen.c,v 2.42 1996/04/12 17:47:48 frystyk Exp $
2.1 timbl 7: **
8: ** This version of the HTML object sends HTML markup to the output stream.
9: **
10: ** Bugs: Line wrapping is not done at all.
11: ** All data handled as PCDATA.
12: ** Should convert old XMP, LISTING and PLAINTEXT to PRE.
13: **
14: ** It is not obvious to me right now whether the HEAD should be generated
2.7 timbl 15: ** from the incomming data or the anchor. Currently it is from the former
2.17 timbl 16: ** which is cleanest. TBL
2.22 duns 17: **
18: ** HISTORY:
19: ** 8 Jul 94 FM Insulate free() from _free structure element.
20: **
2.1 timbl 21: */
22:
2.27 frystyk 23: /* Library include files */
2.43 ! frystyk 24: #include "wwwsys.h"
2.27 frystyk 25: #include "HTUtils.h"
2.12 timbl 26: #include "HTMLPDTD.h"
2.31 frystyk 27: #include "HTStruct.h"
2.1 timbl 28: #include "HTFormat.h"
2.23 frystyk 29: #include "HTMLGen.h" /* Implemented here */
30:
31: #define BUFFER_SIZE 80 /* Line buffer attempts to make neat breaks */
2.31 frystyk 32: #define MAX_CLEANNESS 10
2.1 timbl 33:
2.31 frystyk 34: #define PUT_CHAR(c) (*me->target->isa->put_character)(me->target, c)
35: #define PUT_STR(s) (*me->target->isa->put_string)(me->target, s)
36: #define PUT_BLOCK(s,l) (*me->target->isa->put_block)(me->target, s, l)
2.1 timbl 37:
2.31 frystyk 38: /* HTML Generator Object */
2.1 timbl 39: struct _HTStream {
2.41 frystyk 40: const HTStreamClass * isa;
2.31 frystyk 41: HTStream * target;
2.1 timbl 42: };
43:
44: struct _HTStructured {
2.41 frystyk 45: const HTStructuredClass * isa;
2.31 frystyk 46: HTStream * target;
2.41 frystyk 47: const SGML_dtd * dtd;
2.31 frystyk 48: BOOL seven_bit; /* restrict output */
2.7 timbl 49:
2.31 frystyk 50: char buffer[BUFFER_SIZE+1];
51: char * write_pointer;
52: char * line_break [MAX_CLEANNESS+1];
53: int cleanness;
54: BOOL overflowed;
55: BOOL delete_line_break_char[MAX_CLEANNESS+1];
56: char preformatted;
2.1 timbl 57: };
58:
2.17 timbl 59: /* OUTPUT FUNCTIONS
60: **
61: ** These function output the finished SGML stream doing the
62: ** line wrap
63: */
64:
2.7 timbl 65: /* Flush Buffer
66: ** ------------
67: */
2.21 timbl 68:
2.37 frystyk 69: PRIVATE void flush_breaks (HTStructured * me)
2.21 timbl 70: {
71: int i;
72: for (i=0; i<= MAX_CLEANNESS; i++) {
73: me->line_break[i] = NULL;
74: }
75: }
76:
77:
2.37 frystyk 78: PRIVATE int HTMLGen_flush (HTStructured * me)
2.7 timbl 79: {
2.31 frystyk 80: PUT_BLOCK(me->buffer, me->write_pointer - me->buffer);
2.7 timbl 81: me->write_pointer = me->buffer;
2.21 timbl 82: flush_breaks(me);
2.7 timbl 83: me->cleanness = 0;
2.28 frystyk 84: return HT_OK;
2.21 timbl 85: }
86:
87:
88: /* Weighted optional line break
89: **
90: ** We keep track of all the breaks for when we chop the line
91: */
92:
2.37 frystyk 93: PRIVATE void allow_break (HTStructured * me, int new_cleanness, BOOL dlbc)
2.21 timbl 94: {
95: me->line_break[new_cleanness] =
96: dlbc ? me->write_pointer - 1 /* Point to space */
97: : me->write_pointer ; /* point to gap */
98: me->delete_line_break_char[new_cleanness] = dlbc;
99: if (new_cleanness >= me->cleanness)
100: me->cleanness = new_cleanness;
2.7 timbl 101: }
102:
103:
2.1 timbl 104: /* Character handling
105: ** ------------------
2.8 timbl 106: **
107: ** The tricky bits are the line break handling. This attempts
108: ** to synchrononise line breaks on sentence or phrase ends. This
109: ** is important if one stores SGML files in a line-oriented code
110: ** repository, so that if a small change is made, line ends don't
111: ** shift in a ripple-through to apparently change a large part of the
112: ** file. We give extra "cleanness" to spaces appearing directly
113: ** after periods (full stops), [semi]colons and commas.
114: ** This should make the source files easier to read and modify
2.17 timbl 115: ** by hand, too, though this is not a primary design consideration. TBL
2.1 timbl 116: */
2.21 timbl 117: PRIVATE char delims[] = ",;:."; /* @@ english bias */
2.37 frystyk 118: PRIVATE int HTMLGen_output_character (HTStructured * me, char c)
2.1 timbl 119: {
2.7 timbl 120:
121: *me->write_pointer++ = c;
122:
2.21 timbl 123: if (c=='\n') { /* Newlines */
124: if (me->preformatted) {
125: HTMLGen_flush(me);
2.28 frystyk 126: return HT_OK;
2.21 timbl 127: } else {
128: me->write_pointer[-1] = c = ' '; /* Treat same as space */
129: }
2.7 timbl 130: }
131:
2.21 timbl 132: /* Figure our whether we can break at this point
133: */
2.7 timbl 134: if ((!me->preformatted && c==' ')) {
2.8 timbl 135: int new_cleanness = 1;
136: if (me->write_pointer > (me->buffer + 1)) {
2.9 luotonen 137: char * p;
2.11 timbl 138: p = strchr(delims, me->write_pointer[-2]);
2.21 timbl 139: if (p) new_cleanness = p - delims + 4;
2.8 timbl 140: }
2.21 timbl 141: allow_break(me, new_cleanness, YES);
2.7 timbl 142: }
143:
2.21 timbl 144: /* Flush buffer out when full, or whenever the line is over
145: the nominal maximum and we can break at all
146: */
147: if (me->write_pointer >= me->buffer + BUFFER_SIZE-1
148: || (me->overflowed && me->cleanness)) {
149: if (me->cleanness) {
150: char line_break_char = me->line_break[me->cleanness][0];
151: char * saved = me->line_break[me->cleanness];
2.8 timbl 152:
2.21 timbl 153: if (me->delete_line_break_char[me->cleanness]) saved++;
154: me->line_break[me->cleanness][0] = '\n';
2.31 frystyk 155: PUT_BLOCK(me->buffer, me->line_break[me->cleanness]-me->buffer+1);
2.21 timbl 156: me->line_break[me->cleanness][0] = line_break_char;
2.7 timbl 157: { /* move next line in */
2.8 timbl 158: char * p=saved;
159: char *q;
160: for(q=me->buffer; p < me->write_pointer; )
2.7 timbl 161: *q++ = *p++;
162: }
163: me->cleanness = 0;
2.21 timbl 164: /* Now we have to check whether ther are any perfectly good breaks
165: ** which weren't good enough for the last line but may be
166: ** good enough for the next
167: */
168: {
169: int i;
170: for(i=0; i <= MAX_CLEANNESS; i++) {
171: if (me->line_break[i] > saved) {
172: me->line_break[i] = me->line_break[i] -
173: (saved-me->buffer);
174: me->cleanness = i;
175: } else {
176: me->line_break[i] = NULL;
177: }
178: }
179: }
180:
2.8 timbl 181: me->write_pointer = me->write_pointer - (saved-me->buffer);
2.21 timbl 182: me->overflowed = NO;
183: } else { /* No break- just output with no newline */
2.31 frystyk 184: PUT_BLOCK(me->buffer, me->write_pointer - me->buffer);
2.8 timbl 185: me->write_pointer = me->buffer;
2.21 timbl 186: flush_breaks(me);
187: me->overflowed = YES;
2.7 timbl 188: }
189: }
2.28 frystyk 190: return HT_OK;
2.1 timbl 191: }
192:
193:
194: /* String handling
195: ** ---------------
196: */
2.41 frystyk 197: PRIVATE int HTMLGen_output_string (HTStructured * me, const char* s)
2.17 timbl 198: {
2.28 frystyk 199: while (*s)
200: HTMLGen_output_character(me, *s++);
201: return HT_OK;
2.17 timbl 202: }
203:
204:
205: /* INPUT FUNCTIONS
206: **
207: ** These take data from the structured stream. In the input
208: ** stream, entities are in raw form. The seven_bit flag controls
209: ** whether the ISO Latin-1 charactrs are represented in SGML entity
210: ** form. This is only recommended for viewing on older non-latin-1
211: ** capable equipment, or for mailing for example.
212: **
213: ** Bug: assumes local encoding is ISO!
214: */
2.37 frystyk 215: PRIVATE int HTMLGen_put_character (HTStructured * me, char c)
2.17 timbl 216: {
217: if (c=='&') HTMLGen_output_string(me, "&");
218: else if (c=='<') HTMLGen_output_string(me, "<");
219: else if (me->seven_bit && ((unsigned char)c > 127)) {
220: char temp[8];
221: sprintf(temp, "&%d;", c);
222: HTMLGen_output_string(me, temp);
2.28 frystyk 223: } else
224: HTMLGen_output_character(me, c);
225: return HT_OK;
2.17 timbl 226: }
227:
2.41 frystyk 228: PRIVATE int HTMLGen_put_string (HTStructured * me, const char* s)
2.1 timbl 229: {
2.28 frystyk 230: while (*s)
231: HTMLGen_put_character(me, *s++);
232: return HT_OK;
2.1 timbl 233: }
234:
2.41 frystyk 235: PRIVATE int HTMLGen_write (HTStructured * me, const char* b, int l)
2.1 timbl 236: {
2.28 frystyk 237: while (l-- > 0)
238: HTMLGen_put_character(me, *b++);
239: return HT_OK;
2.1 timbl 240: }
241:
242:
243: /* Start Element
244: ** -------------
2.7 timbl 245: **
246: ** Within the opening tag, there may be spaces
247: ** and the line may be broken at these spaces.
2.1 timbl 248: */
2.37 frystyk 249: PRIVATE void HTMLGen_start_element (
250: HTStructured * me,
251: int element_number,
2.41 frystyk 252: const BOOL* present,
253: const char ** value)
2.1 timbl 254: {
255: int i;
2.12 timbl 256: HTTag * tag = &me->dtd->tags[element_number];
2.1 timbl 257:
2.20 timbl 258: /* Control line breaks allowed within tag! */
259: int was_preformatted = me->preformatted; /* save state */
260: me->preformatted = 1; /* Can break between attributes */
261:
2.17 timbl 262: HTMLGen_output_character(me, '<');
263: HTMLGen_output_string(me, tag->name);
2.1 timbl 264: if (present) for (i=0; i< tag->number_of_attributes; i++) {
265: if (present[i]) {
2.17 timbl 266: HTMLGen_output_character(me, ' ');
2.21 timbl 267: allow_break(me, 1, YES);
2.17 timbl 268: HTMLGen_output_string(me, tag->attributes[i].name);
2.1 timbl 269: if (value[i]) {
2.17 timbl 270: HTMLGen_output_string(me, "=\"");
271: HTMLGen_output_string(me, value[i]);
272: HTMLGen_output_character(me, '"');
2.1 timbl 273: }
274: }
275: }
2.20 timbl 276: me->preformatted = was_preformatted; /* Restore state */
277:
2.14 frystyk 278: /* Nested PRE is no more a problem! */
279: if (element_number == HTML_PRE)
280: me->preformatted++;
2.19 timbl 281:
282: HTMLGen_output_character(me, '>');
2.7 timbl 283:
2.20 timbl 284: /* Here is a funny one. In PRE, newlines are significant, except of
285: course for one after the <PRE> which is ignored. This means that
286: we MUST put in a dummy one after the <PRE> to protect any real newline
287: within the pre section.
288:
289: However, *within* a PRE section, although we can break after
290: (for example) emphasis start tags, it will probably confuse some
291: parsers so we won't.*/
292:
293: if (element_number == HTML_PRE) {
294: HTMLGen_output_character(me, '\n');
295: } else if (!me->preformatted &&
296: tag->contents != SGML_EMPTY) { /* can break after element start */
2.21 timbl 297: allow_break(me, 3, NO);
2.8 timbl 298: }
2.1 timbl 299: }
300:
301:
2.17 timbl 302: /* End Element
303: ** -----------
2.1 timbl 304: **
2.16 timbl 305: ** The rules for insertring CR LF into SGML are weird, strict, and
306: ** nonintitive.
2.20 timbl 307: ** See comment also about PRE above.
2.1 timbl 308: */
2.37 frystyk 309: PRIVATE void HTMLGen_end_element (HTStructured * me, int element_number)
2.1 timbl 310: {
2.20 timbl 311: if (element_number == HTML_PRE) {
312: HTMLGen_output_character(me, '\n');
313: } else if (!me->preformatted) { /* can break before element end */
2.21 timbl 314: allow_break(me, 1, NO);
2.8 timbl 315: }
2.17 timbl 316: HTMLGen_output_string(me, "</");
317: HTMLGen_output_string(me, me->dtd->tags[element_number].name);
318: HTMLGen_output_character(me, '>'); /* NO break after. TBL 940501 */
2.14 frystyk 319: if (element_number == HTML_PRE && me->preformatted)
320: me->preformatted--;
2.1 timbl 321: }
322:
323:
2.17 timbl 324: /* Expanding entities
325: ** ------------------
2.1 timbl 326: **
327: */
328:
2.37 frystyk 329: PRIVATE void HTMLGen_put_entity (HTStructured * me, int entity_number)
2.1 timbl 330: {
2.17 timbl 331: HTMLGen_output_character(me, '&');
332: HTMLGen_output_string(me, me->dtd->entity_names[entity_number]);
333: HTMLGen_output_character(me, ';');
2.1 timbl 334: }
335:
2.17 timbl 336: /* Free an object
337: ** --------------
2.1 timbl 338: **
339: */
2.37 frystyk 340: PRIVATE int HTMLGen_free (HTStructured * me)
2.1 timbl 341: {
2.21 timbl 342: HTMLGen_flush(me);
2.31 frystyk 343: PUT_CHAR('\n');
344: (*me->target->isa->_free)(me->target);
2.39 frystyk 345: HT_FREE(me);
2.28 frystyk 346: return HT_OK;
2.1 timbl 347: }
348:
349:
2.37 frystyk 350: PRIVATE int PlainToHTML_free (HTStructured * me)
2.7 timbl 351: {
352: HTMLGen_end_element(me, HTML_PRE);
353: HTMLGen_end_element(me, HTML_BODY);
354: HTMLGen_end_element(me, HTML_HTML);
355: HTMLGen_free(me);
2.28 frystyk 356: return HT_OK;
2.7 timbl 357: }
358:
359:
2.1 timbl 360:
2.37 frystyk 361: PRIVATE int HTMLGen_abort (HTStructured * me, HTList * e)
2.1 timbl 362: {
2.6 timbl 363: HTMLGen_free(me);
2.28 frystyk 364: return HT_ERROR;
2.1 timbl 365: }
366:
367:
2.37 frystyk 368: PRIVATE int PlainToHTML_abort (HTStructured * me, HTList * e)
2.1 timbl 369: {
2.7 timbl 370: PlainToHTML_free(me);
2.28 frystyk 371: return HT_ERROR;
2.1 timbl 372: }
373:
374:
375:
376: /* Structured Object Class
377: ** -----------------------
378: */
2.41 frystyk 379: PRIVATE const HTStructuredClass HTMLGeneration = /* As opposed to print etc */
2.1 timbl 380: {
381: "text/html",
2.28 frystyk 382: HTMLGen_flush,
2.1 timbl 383: HTMLGen_free,
2.6 timbl 384: HTMLGen_abort,
2.1 timbl 385: HTMLGen_put_character, HTMLGen_put_string, HTMLGen_write,
2.13 frystyk 386: HTMLGen_start_element, HTMLGen_end_element,
2.1 timbl 387: HTMLGen_put_entity
388: };
389:
390:
391: /* Subclass-specific Methods
392: ** -------------------------
393: */
2.37 frystyk 394: PUBLIC HTStructured* HTMLGenerator (HTRequest * request,
395: void * param,
396: HTFormat input_format,
397: HTFormat output_format,
398: HTStream * output_stream)
2.1 timbl 399: {
2.39 frystyk 400: HTStructured* me;
401: if ((me = (HTStructured *) HT_CALLOC(1, sizeof(HTStructured))) == NULL)
402: HT_OUTOFMEM("HTMLGenerator");
2.3 timbl 403: me->isa = &HTMLGeneration;
2.12 timbl 404: me->dtd = &HTMLP_dtd;
2.31 frystyk 405: if ((me->target = HTStreamStack(WWW_HTML, output_format, output_stream,
406: request, YES)) == NULL) {
407: if (STREAM_TRACE)
2.40 eric 408: HTTrace("HTMLGen..... Can't convert to media type\n");
2.39 frystyk 409: HT_FREE(me);
2.38 frystyk 410: me->target = HTErrorStream();
2.31 frystyk 411: }
2.7 timbl 412: me->write_pointer = me->buffer;
2.21 timbl 413: flush_breaks(me);
2.3 timbl 414: return me;
2.1 timbl 415: }
416:
417: /* Stream Object Class
418: ** -------------------
419: **
2.2 timbl 420: ** This object just converts a plain text stream into HTML
2.12 timbl 421: ** It is officially a structured stream but only the stream bits exist.
2.2 timbl 422: ** This is just the easiest way of typecasting all the routines.
2.1 timbl 423: */
2.41 frystyk 424: PRIVATE const HTStructuredClass PlainToHTMLConversion =
2.1 timbl 425: {
426: "plaintexttoHTML",
2.28 frystyk 427: HTMLGen_flush,
2.13 frystyk 428: PlainToHTML_free, /* HTMLGen_free, Henrik 03/03-94 */
2.6 timbl 429: PlainToHTML_abort,
2.1 timbl 430: HTMLGen_put_character,
431: HTMLGen_put_string,
432: HTMLGen_write,
2.2 timbl 433: NULL, /* Structured stuff */
434: NULL,
435: NULL
2.1 timbl 436: };
437:
438:
439: /* HTConverter from plain text to HTML Stream
440: ** ------------------------------------------
2.13 frystyk 441: **
442: ** Changed by henrik 03/03-94, so no more core dumps etc. (I hope!!!)
2.1 timbl 443: */
444:
2.37 frystyk 445: PUBLIC HTStream* HTPlainToHTML (HTRequest * request,
446: void * param,
447: HTFormat input_format,
448: HTFormat output_format,
449: HTStream * output_stream)
2.1 timbl 450: {
2.13 frystyk 451: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
2.41 frystyk 452: const char *value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
2.39 frystyk 453: HTStructured* me;
454: if ((me = (HTStructured *) HT_CALLOC(1,sizeof(*me))) == NULL)
455: HT_OUTOFMEM("PlainToHTML");
2.13 frystyk 456:
2.32 frystyk 457: memset((void *) present, '\0', MAX_ATTRIBUTES);
458: memset((void *) value, '\0', MAX_ATTRIBUTES*sizeof(char *));
2.13 frystyk 459:
460: me->isa = (HTStructuredClass*) &PlainToHTMLConversion;
2.12 timbl 461: me->dtd = &HTMLP_dtd;
462: me->target = output_stream;
2.13 frystyk 463: me->write_pointer = me->buffer;
2.21 timbl 464: flush_breaks(me);
2.13 frystyk 465:
2.33 frystyk 466: if (me->target) {
467: HTMLGen_start_element(me, HTML_HTML, present, value);
468: HTMLGen_start_element(me, HTML_BODY, present, value);
469: HTMLGen_start_element(me, HTML_PRE, present, value);
470: }
2.7 timbl 471: return (HTStream*) me;
2.1 timbl 472: }
2.13 frystyk 473:
474:
2.17 timbl 475: /* A safe version for making 7-bit restricted HTML
476: ** Beware that thsi makes it horrible for the Scandinavians
477: ** to actually read it.
2.30 frystyk 478: ** ehh - not horrible - THIS REALLY PISSES THEM OFF - Henrik ;-)
2.17 timbl 479: */
480:
2.37 frystyk 481: PUBLIC HTStream* HTPlainTo7BitHTML (HTRequest * request,
482: void * param,
483: HTFormat input_format,
484: HTFormat output_format,
485: HTStream * output_stream)
2.17 timbl 486: {
487: HTStream* me = HTPlainToHTML(request,param,input_format,
488: output_format, output_stream);
489: ((HTStructured*)me)->seven_bit = YES;
490: return me;
491: }
2.1 timbl 492:
Webmaster