Annotation of libwww/Library/src/HTMLGen.c, revision 2.25
2.25 ! frystyk 1: /* HTMLGen.c
! 2: ** HTML GENERATOR
! 3: **
! 4: ** (c) COPYRIGHT CERN 1994.
! 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This version of the HTML object sends HTML markup to the output stream.
8: **
9: ** Bugs: Line wrapping is not done at all.
10: ** All data handled as PCDATA.
11: ** Should convert old XMP, LISTING and PLAINTEXT to PRE.
12: **
13: ** It is not obvious to me right now whether the HEAD should be generated
2.7 timbl 14: ** from the incomming data or the anchor. Currently it is from the former
2.17 timbl 15: ** which is cleanest. TBL
2.22 duns 16: **
17: ** HISTORY:
18: ** 8 Jul 94 FM Insulate free() from _free structure element.
19: **
2.1 timbl 20: */
21:
2.12 timbl 22: #include "HTMLPDTD.h"
2.1 timbl 23: #include "HTStream.h"
24: #include "SGML.h"
25: #include "HTFormat.h"
2.23 frystyk 26: #include "HTMLGen.h" /* Implemented here */
27:
28: #define BUFFER_SIZE 80 /* Line buffer attempts to make neat breaks */
2.1 timbl 29:
2.3 timbl 30: #define PUTC(c) (*me->targetClass.put_character)(me->target, c)
2.7 timbl 31: /* #define PUTS(s) (*me->targetClass.put_string)(me->target, s) */
2.4 timbl 32: #define PUTB(s,l) (*me->targetClass.put_block)(me->target, s, l)
2.1 timbl 33:
34: /* HTML Object
35: ** -----------
36: */
37:
38: struct _HTStream {
39: CONST HTStreamClass * isa;
40: HTStream * target;
41: HTStreamClass targetClass; /* COPY for speed */
42: };
43:
2.21 timbl 44: #define MAX_CLEANNESS 10
2.1 timbl 45: struct _HTStructured {
46: CONST HTStructuredClass * isa;
47: HTStream * target;
48: HTStreamClass targetClass; /* COPY for speed */
2.12 timbl 49: CONST SGML_dtd * dtd;
2.17 timbl 50: BOOL seven_bit; /* restrict output*/
2.7 timbl 51:
2.14 frystyk 52: char buffer[BUFFER_SIZE+1];
2.7 timbl 53: char * write_pointer;
2.21 timbl 54: char * line_break [MAX_CLEANNESS+1];
2.7 timbl 55: int cleanness;
2.21 timbl 56: BOOL overflowed;
57: BOOL delete_line_break_char
58: [MAX_CLEANNESS+1];
2.14 frystyk 59: char preformatted;
2.1 timbl 60: };
61:
2.17 timbl 62: /* OUTPUT FUNCTIONS
63: **
64: ** These function output the finished SGML stream doing the
65: ** line wrap
66: */
67:
2.7 timbl 68: /* Flush Buffer
69: ** ------------
70: */
2.21 timbl 71:
72: PRIVATE void flush_breaks ARGS1(HTStructured *, me)
73: {
74: int i;
75: for (i=0; i<= MAX_CLEANNESS; i++) {
76: me->line_break[i] = NULL;
77: }
78: }
79:
80:
2.7 timbl 81: PRIVATE void HTMLGen_flush ARGS1(HTStructured *, me)
82: {
83: (*me->targetClass.put_block)(me->target,
84: me->buffer,
85: me->write_pointer - me->buffer);
86: me->write_pointer = me->buffer;
2.21 timbl 87: flush_breaks(me);
2.7 timbl 88: me->cleanness = 0;
2.21 timbl 89: }
90:
91:
92: /* Weighted optional line break
93: **
94: ** We keep track of all the breaks for when we chop the line
95: */
96:
97: PRIVATE void allow_break ARGS3(HTStructured *, me, int, new_cleanness,
98: BOOL, dlbc)
99: {
100: me->line_break[new_cleanness] =
101: dlbc ? me->write_pointer - 1 /* Point to space */
102: : me->write_pointer ; /* point to gap */
103: me->delete_line_break_char[new_cleanness] = dlbc;
104: if (new_cleanness >= me->cleanness)
105: me->cleanness = new_cleanness;
2.7 timbl 106: }
107:
108:
2.1 timbl 109: /* Character handling
110: ** ------------------
2.8 timbl 111: **
112: ** The tricky bits are the line break handling. This attempts
113: ** to synchrononise line breaks on sentence or phrase ends. This
114: ** is important if one stores SGML files in a line-oriented code
115: ** repository, so that if a small change is made, line ends don't
116: ** shift in a ripple-through to apparently change a large part of the
117: ** file. We give extra "cleanness" to spaces appearing directly
118: ** after periods (full stops), [semi]colons and commas.
119: ** This should make the source files easier to read and modify
2.17 timbl 120: ** by hand, too, though this is not a primary design consideration. TBL
2.1 timbl 121: */
2.21 timbl 122: PRIVATE char delims[] = ",;:."; /* @@ english bias */
2.17 timbl 123: PRIVATE void HTMLGen_output_character ARGS2(HTStructured *, me, char, c)
2.1 timbl 124: {
2.7 timbl 125:
126: *me->write_pointer++ = c;
127:
2.21 timbl 128: if (c=='\n') { /* Newlines */
129: if (me->preformatted) {
130: HTMLGen_flush(me);
131: return;
132: } else {
133: me->write_pointer[-1] = c = ' '; /* Treat same as space */
134: }
2.7 timbl 135: }
136:
2.21 timbl 137: /* Figure our whether we can break at this point
138: */
2.7 timbl 139: if ((!me->preformatted && c==' ')) {
2.8 timbl 140: int new_cleanness = 1;
141: if (me->write_pointer > (me->buffer + 1)) {
2.9 luotonen 142: char * p;
2.11 timbl 143: p = strchr(delims, me->write_pointer[-2]);
2.21 timbl 144: if (p) new_cleanness = p - delims + 4;
2.8 timbl 145: }
2.21 timbl 146: allow_break(me, new_cleanness, YES);
2.7 timbl 147: }
148:
2.21 timbl 149: /* Flush buffer out when full, or whenever the line is over
150: the nominal maximum and we can break at all
151: */
152: if (me->write_pointer >= me->buffer + BUFFER_SIZE-1
153: || (me->overflowed && me->cleanness)) {
154: if (me->cleanness) {
155: char line_break_char = me->line_break[me->cleanness][0];
156: char * saved = me->line_break[me->cleanness];
2.8 timbl 157:
2.21 timbl 158: if (me->delete_line_break_char[me->cleanness]) saved++;
159: me->line_break[me->cleanness][0] = '\n';
2.7 timbl 160: (*me->targetClass.put_block)(me->target,
161: me->buffer,
2.21 timbl 162: me->line_break[me->cleanness] - me->buffer + 1);
163: me->line_break[me->cleanness][0] = line_break_char;
2.7 timbl 164: { /* move next line in */
2.8 timbl 165: char * p=saved;
166: char *q;
167: for(q=me->buffer; p < me->write_pointer; )
2.7 timbl 168: *q++ = *p++;
169: }
170: me->cleanness = 0;
2.21 timbl 171: /* Now we have to check whether ther are any perfectly good breaks
172: ** which weren't good enough for the last line but may be
173: ** good enough for the next
174: */
175: {
176: int i;
177: for(i=0; i <= MAX_CLEANNESS; i++) {
178: if (me->line_break[i] > saved) {
179: me->line_break[i] = me->line_break[i] -
180: (saved-me->buffer);
181: me->cleanness = i;
182: } else {
183: me->line_break[i] = NULL;
184: }
185: }
186: }
187:
2.8 timbl 188: me->write_pointer = me->write_pointer - (saved-me->buffer);
2.21 timbl 189: me->overflowed = NO;
190: } else { /* No break- just output with no newline */
2.7 timbl 191: (*me->targetClass.put_block)(me->target,
2.14 frystyk 192: me->buffer,
2.15 luotonen 193: me->write_pointer - me->buffer);
2.8 timbl 194: me->write_pointer = me->buffer;
2.21 timbl 195: flush_breaks(me);
196: me->overflowed = YES;
2.7 timbl 197: }
198: }
2.1 timbl 199: }
200:
201:
202:
203: /* String handling
204: ** ---------------
205: */
2.17 timbl 206: PRIVATE void HTMLGen_output_string ARGS2(HTStructured *, me, CONST char*, s)
207: {
2.24 frystyk 208: while (*s) HTMLGen_output_character(me, *s++);
2.17 timbl 209: }
210:
211:
212: /* INPUT FUNCTIONS
213: **
214: ** These take data from the structured stream. In the input
215: ** stream, entities are in raw form. The seven_bit flag controls
216: ** whether the ISO Latin-1 charactrs are represented in SGML entity
217: ** form. This is only recommended for viewing on older non-latin-1
218: ** capable equipment, or for mailing for example.
219: **
220: ** Bug: assumes local encoding is ISO!
221: */
222: PRIVATE void HTMLGen_put_character ARGS2(HTStructured *, me, char, c)
223: {
224: if (c=='&') HTMLGen_output_string(me, "&");
225: else if (c=='<') HTMLGen_output_string(me, "<");
226: else if (me->seven_bit && ((unsigned char)c > 127)) {
227: char temp[8];
228: sprintf(temp, "&%d;", c);
229: HTMLGen_output_string(me, temp);
230: }
231: else HTMLGen_output_character(me, c);
232: }
233:
2.3 timbl 234: PRIVATE void HTMLGen_put_string ARGS2(HTStructured *, me, CONST char*, s)
2.1 timbl 235: {
2.24 frystyk 236: while (*s) HTMLGen_put_character(me, *s++);
2.1 timbl 237: }
238:
2.3 timbl 239: PRIVATE void HTMLGen_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
2.1 timbl 240: {
2.24 frystyk 241: while (l-- > 0) HTMLGen_put_character(me, *s++);
2.1 timbl 242: }
243:
244:
245: /* Start Element
246: ** -------------
2.7 timbl 247: **
248: ** Within the opening tag, there may be spaces
249: ** and the line may be broken at these spaces.
2.1 timbl 250: */
251: PRIVATE void HTMLGen_start_element ARGS4(
2.3 timbl 252: HTStructured *, me,
2.2 timbl 253: int, element_number,
254: CONST BOOL*, present,
255: CONST char **, value)
2.1 timbl 256: {
257: int i;
2.12 timbl 258: HTTag * tag = &me->dtd->tags[element_number];
2.1 timbl 259:
2.20 timbl 260: /* Control line breaks allowed within tag! */
261: int was_preformatted = me->preformatted; /* save state */
262: me->preformatted = 1; /* Can break between attributes */
263:
2.17 timbl 264: HTMLGen_output_character(me, '<');
265: HTMLGen_output_string(me, tag->name);
2.1 timbl 266: if (present) for (i=0; i< tag->number_of_attributes; i++) {
267: if (present[i]) {
2.17 timbl 268: HTMLGen_output_character(me, ' ');
2.21 timbl 269: allow_break(me, 1, YES);
2.17 timbl 270: HTMLGen_output_string(me, tag->attributes[i].name);
2.1 timbl 271: if (value[i]) {
2.17 timbl 272: HTMLGen_output_string(me, "=\"");
273: HTMLGen_output_string(me, value[i]);
274: HTMLGen_output_character(me, '"');
2.1 timbl 275: }
276: }
277: }
2.20 timbl 278: me->preformatted = was_preformatted; /* Restore state */
279:
2.14 frystyk 280: /* Nested PRE is no more a problem! */
281: if (element_number == HTML_PRE)
282: me->preformatted++;
2.19 timbl 283:
284: HTMLGen_output_character(me, '>');
2.7 timbl 285:
2.20 timbl 286: /* Here is a funny one. In PRE, newlines are significant, except of
287: course for one after the <PRE> which is ignored. This means that
288: we MUST put in a dummy one after the <PRE> to protect any real newline
289: within the pre section.
290:
291: However, *within* a PRE section, although we can break after
292: (for example) emphasis start tags, it will probably confuse some
293: parsers so we won't.*/
294:
295: if (element_number == HTML_PRE) {
296: HTMLGen_output_character(me, '\n');
297: } else if (!me->preformatted &&
298: tag->contents != SGML_EMPTY) { /* can break after element start */
2.21 timbl 299: allow_break(me, 3, NO);
2.8 timbl 300: }
2.1 timbl 301: }
302:
303:
2.17 timbl 304: /* End Element
305: ** -----------
2.1 timbl 306: **
2.16 timbl 307: ** The rules for insertring CR LF into SGML are weird, strict, and
308: ** nonintitive.
2.20 timbl 309: ** See comment also about PRE above.
2.1 timbl 310: */
2.3 timbl 311: PRIVATE void HTMLGen_end_element ARGS2(HTStructured *, me,
2.24 frystyk 312: int , element_number)
2.1 timbl 313: {
2.20 timbl 314: if (element_number == HTML_PRE) {
315: HTMLGen_output_character(me, '\n');
316: } else if (!me->preformatted) { /* can break before element end */
2.21 timbl 317: allow_break(me, 1, NO);
2.8 timbl 318: }
2.17 timbl 319: HTMLGen_output_string(me, "</");
320: HTMLGen_output_string(me, me->dtd->tags[element_number].name);
321: HTMLGen_output_character(me, '>'); /* NO break after. TBL 940501 */
2.14 frystyk 322: if (element_number == HTML_PRE && me->preformatted)
323: me->preformatted--;
2.1 timbl 324: }
325:
326:
2.17 timbl 327: /* Expanding entities
328: ** ------------------
2.1 timbl 329: **
330: */
331:
2.3 timbl 332: PRIVATE void HTMLGen_put_entity ARGS2(HTStructured *, me, int, entity_number)
2.1 timbl 333: {
2.17 timbl 334: HTMLGen_output_character(me, '&');
335: HTMLGen_output_string(me, me->dtd->entity_names[entity_number]);
336: HTMLGen_output_character(me, ';');
2.1 timbl 337: }
338:
339:
340:
2.17 timbl 341: /* Free an object
342: ** --------------
2.1 timbl 343: **
344: */
2.24 frystyk 345: PRIVATE int HTMLGen_free ARGS1(HTStructured *, me)
2.1 timbl 346: {
2.21 timbl 347: HTMLGen_flush(me);
2.7 timbl 348: (*me->targetClass.put_character)(me->target, '\n');
2.22 duns 349: (*me->targetClass._free)(me->target); /* ripple through */
2.3 timbl 350: free(me);
2.24 frystyk 351: return 0;
2.1 timbl 352: }
353:
354:
2.24 frystyk 355: PRIVATE int PlainToHTML_free ARGS1(HTStructured *, me)
2.7 timbl 356: {
357: HTMLGen_end_element(me, HTML_PRE);
358: HTMLGen_end_element(me, HTML_BODY);
359: HTMLGen_end_element(me, HTML_HTML);
360: HTMLGen_free(me);
2.24 frystyk 361: return 0;
2.7 timbl 362: }
363:
364:
2.1 timbl 365:
2.24 frystyk 366: PRIVATE int HTMLGen_abort ARGS2(HTStructured *, me, HTError, e)
2.1 timbl 367: {
2.6 timbl 368: HTMLGen_free(me);
2.24 frystyk 369: return EOF;
2.1 timbl 370: }
371:
372:
2.24 frystyk 373: PRIVATE int PlainToHTML_abort ARGS2(HTStructured *, me, HTError, e)
2.1 timbl 374: {
2.7 timbl 375: PlainToHTML_free(me);
2.24 frystyk 376: return EOF;
2.1 timbl 377: }
378:
379:
380:
381: /* Structured Object Class
382: ** -----------------------
383: */
2.5 timbl 384: PRIVATE CONST HTStructuredClass HTMLGeneration = /* As opposed to print etc */
2.1 timbl 385: {
386: "text/html",
387: HTMLGen_free,
2.6 timbl 388: HTMLGen_abort,
2.1 timbl 389: HTMLGen_put_character, HTMLGen_put_string, HTMLGen_write,
2.13 frystyk 390: HTMLGen_start_element, HTMLGen_end_element,
2.1 timbl 391: HTMLGen_put_entity
392: };
393:
394:
395: /* Subclass-specific Methods
396: ** -------------------------
397: */
398:
399: PUBLIC HTStructured * HTMLGenerator ARGS1(HTStream *, output)
400: {
2.18 luotonen 401: HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3 timbl 402: if (me == NULL) outofmem(__FILE__, "HTMLGenerator");
403: me->isa = &HTMLGeneration;
2.12 timbl 404: me->dtd = &HTMLP_dtd;
2.1 timbl 405:
2.3 timbl 406: me->target = output;
407: me->targetClass = *me->target->isa; /* Copy pointers to routines for speed*/
2.7 timbl 408:
409: me->write_pointer = me->buffer;
2.21 timbl 410: flush_breaks(me);
2.3 timbl 411: return me;
2.1 timbl 412: }
413:
414: /* Stream Object Class
415: ** -------------------
416: **
2.2 timbl 417: ** This object just converts a plain text stream into HTML
2.12 timbl 418: ** It is officially a structured stream but only the stream bits exist.
2.2 timbl 419: ** This is just the easiest way of typecasting all the routines.
2.1 timbl 420: */
2.2 timbl 421: PRIVATE CONST HTStructuredClass PlainToHTMLConversion =
2.1 timbl 422: {
423: "plaintexttoHTML",
2.13 frystyk 424: PlainToHTML_free, /* HTMLGen_free, Henrik 03/03-94 */
2.6 timbl 425: PlainToHTML_abort,
2.1 timbl 426: HTMLGen_put_character,
427: HTMLGen_put_string,
428: HTMLGen_write,
2.2 timbl 429: NULL, /* Structured stuff */
430: NULL,
431: NULL
2.1 timbl 432: };
433:
434:
435: /* HTConverter from plain text to HTML Stream
436: ** ------------------------------------------
2.13 frystyk 437: **
438: ** Changed by henrik 03/03-94, so no more core dumps etc. (I hope!!!)
2.1 timbl 439: */
440:
2.12 timbl 441: PUBLIC HTStream* HTPlainToHTML ARGS5(
442: HTRequest *, request,
443: void *, param,
444: HTFormat, input_format,
445: HTFormat, output_format,
446: HTStream *, output_stream)
2.1 timbl 447: {
2.13 frystyk 448: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
449: CONST char *value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
2.18 luotonen 450: HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3 timbl 451: if (me == NULL) outofmem(__FILE__, "PlainToHTML");
2.13 frystyk 452:
453: memset(present, '\0', MAX_ATTRIBUTES);
454: memset(value, '\0', MAX_ATTRIBUTES*sizeof(char *));
455:
456: me->isa = (HTStructuredClass*) &PlainToHTMLConversion;
2.12 timbl 457: me->dtd = &HTMLP_dtd;
458: me->target = output_stream;
2.13 frystyk 459: me->targetClass = *me->target->isa;/* Copy pointers to routines for speed*/
460: me->write_pointer = me->buffer;
2.21 timbl 461: flush_breaks(me);
2.13 frystyk 462:
463: HTMLGen_start_element(me, HTML_HTML, present, value);
464: HTMLGen_start_element(me, HTML_BODY, present, value);
465: HTMLGen_start_element(me, HTML_PRE, present, value);
466:
2.7 timbl 467: return (HTStream*) me;
2.1 timbl 468: }
2.13 frystyk 469:
470:
2.17 timbl 471: /* A safe version for making 7-bit restricted HTML
472: ** Beware that thsi makes it horrible for the Scandinavians
473: ** to actually read it.
474: */
475:
476: PUBLIC HTStream* HTPlainTo7BitHTML ARGS5(
477: HTRequest *, request,
478: void *, param,
479: HTFormat, input_format,
480: HTFormat, output_format,
481: HTStream *, output_stream)
2.13 frystyk 482:
2.17 timbl 483: {
484: HTStream* me = HTPlainToHTML(request,param,input_format,
485: output_format, output_stream);
486: ((HTStructured*)me)->seven_bit = YES;
487: return me;
488: }
2.1 timbl 489:
Webmaster