Annotation of libwww/Library/src/HTMLGen.c, revision 2.23
2.1 timbl 1: /* HTML Generator
2: ** ==============
3: **
4: ** This version of the HTML object sends HTML markup to the output stream.
5: **
6: ** Bugs: Line wrapping is not done at all.
7: ** All data handled as PCDATA.
8: ** Should convert old XMP, LISTING and PLAINTEXT to PRE.
9: **
10: ** It is not obvious to me right now whether the HEAD should be generated
2.7 timbl 11: ** from the incomming data or the anchor. Currently it is from the former
2.17 timbl 12: ** which is cleanest. TBL
2.22 duns 13: **
14: ** HISTORY:
15: ** 8 Jul 94 FM Insulate free() from _free structure element.
16: **
2.1 timbl 17: */
18:
2.12 timbl 19: #include "HTMLPDTD.h"
2.1 timbl 20: #include "HTStream.h"
21: #include "SGML.h"
22: #include "HTFormat.h"
2.23 ! frystyk 23: #include "HTMLGen.h" /* Implemented here */
! 24:
! 25: #define BUFFER_SIZE 80 /* Line buffer attempts to make neat breaks */
2.1 timbl 26:
2.3 timbl 27: #define PUTC(c) (*me->targetClass.put_character)(me->target, c)
2.7 timbl 28: /* #define PUTS(s) (*me->targetClass.put_string)(me->target, s) */
2.4 timbl 29: #define PUTB(s,l) (*me->targetClass.put_block)(me->target, s, l)
2.1 timbl 30:
31: /* HTML Object
32: ** -----------
33: */
34:
35: struct _HTStream {
36: CONST HTStreamClass * isa;
37: HTStream * target;
38: HTStreamClass targetClass; /* COPY for speed */
39: };
40:
2.21 timbl 41: #define MAX_CLEANNESS 10
2.1 timbl 42: struct _HTStructured {
43: CONST HTStructuredClass * isa;
44: HTStream * target;
45: HTStreamClass targetClass; /* COPY for speed */
2.12 timbl 46: CONST SGML_dtd * dtd;
2.17 timbl 47: BOOL seven_bit; /* restrict output*/
2.7 timbl 48:
2.14 frystyk 49: char buffer[BUFFER_SIZE+1];
2.7 timbl 50: char * write_pointer;
2.21 timbl 51: char * line_break [MAX_CLEANNESS+1];
2.7 timbl 52: int cleanness;
2.21 timbl 53: BOOL overflowed;
54: BOOL delete_line_break_char
55: [MAX_CLEANNESS+1];
2.14 frystyk 56: char preformatted;
2.1 timbl 57: };
58:
2.17 timbl 59: /* OUTPUT FUNCTIONS
60: **
61: ** These function output the finished SGML stream doing the
62: ** line wrap
63: */
64:
2.7 timbl 65: /* Flush Buffer
66: ** ------------
67: */
2.21 timbl 68:
69: PRIVATE void flush_breaks ARGS1(HTStructured *, me)
70: {
71: int i;
72: for (i=0; i<= MAX_CLEANNESS; i++) {
73: me->line_break[i] = NULL;
74: }
75: }
76:
77:
2.7 timbl 78: PRIVATE void HTMLGen_flush ARGS1(HTStructured *, me)
79: {
80: (*me->targetClass.put_block)(me->target,
81: me->buffer,
82: me->write_pointer - me->buffer);
83: me->write_pointer = me->buffer;
2.21 timbl 84: flush_breaks(me);
2.7 timbl 85: me->cleanness = 0;
2.21 timbl 86: }
87:
88:
89: /* Weighted optional line break
90: **
91: ** We keep track of all the breaks for when we chop the line
92: */
93:
94: PRIVATE void allow_break ARGS3(HTStructured *, me, int, new_cleanness,
95: BOOL, dlbc)
96: {
97: me->line_break[new_cleanness] =
98: dlbc ? me->write_pointer - 1 /* Point to space */
99: : me->write_pointer ; /* point to gap */
100: me->delete_line_break_char[new_cleanness] = dlbc;
101: if (new_cleanness >= me->cleanness)
102: me->cleanness = new_cleanness;
2.7 timbl 103: }
104:
105:
2.1 timbl 106: /* Character handling
107: ** ------------------
2.8 timbl 108: **
109: ** The tricky bits are the line break handling. This attempts
110: ** to synchrononise line breaks on sentence or phrase ends. This
111: ** is important if one stores SGML files in a line-oriented code
112: ** repository, so that if a small change is made, line ends don't
113: ** shift in a ripple-through to apparently change a large part of the
114: ** file. We give extra "cleanness" to spaces appearing directly
115: ** after periods (full stops), [semi]colons and commas.
116: ** This should make the source files easier to read and modify
2.17 timbl 117: ** by hand, too, though this is not a primary design consideration. TBL
2.1 timbl 118: */
2.21 timbl 119: PRIVATE char delims[] = ",;:."; /* @@ english bias */
2.17 timbl 120: PRIVATE void HTMLGen_output_character ARGS2(HTStructured *, me, char, c)
2.1 timbl 121: {
2.7 timbl 122:
123: *me->write_pointer++ = c;
124:
2.21 timbl 125: if (c=='\n') { /* Newlines */
126: if (me->preformatted) {
127: HTMLGen_flush(me);
128: return;
129: } else {
130: me->write_pointer[-1] = c = ' '; /* Treat same as space */
131: }
2.7 timbl 132: }
133:
2.21 timbl 134: /* Figure our whether we can break at this point
135: */
2.7 timbl 136: if ((!me->preformatted && c==' ')) {
2.8 timbl 137: int new_cleanness = 1;
138: if (me->write_pointer > (me->buffer + 1)) {
2.9 luotonen 139: char * p;
2.11 timbl 140: p = strchr(delims, me->write_pointer[-2]);
2.21 timbl 141: if (p) new_cleanness = p - delims + 4;
2.8 timbl 142: }
2.21 timbl 143: allow_break(me, new_cleanness, YES);
2.7 timbl 144: }
145:
2.21 timbl 146: /* Flush buffer out when full, or whenever the line is over
147: the nominal maximum and we can break at all
148: */
149: if (me->write_pointer >= me->buffer + BUFFER_SIZE-1
150: || (me->overflowed && me->cleanness)) {
151: if (me->cleanness) {
152: char line_break_char = me->line_break[me->cleanness][0];
153: char * saved = me->line_break[me->cleanness];
2.8 timbl 154:
2.21 timbl 155: if (me->delete_line_break_char[me->cleanness]) saved++;
156: me->line_break[me->cleanness][0] = '\n';
2.7 timbl 157: (*me->targetClass.put_block)(me->target,
158: me->buffer,
2.21 timbl 159: me->line_break[me->cleanness] - me->buffer + 1);
160: me->line_break[me->cleanness][0] = line_break_char;
2.7 timbl 161: { /* move next line in */
2.8 timbl 162: char * p=saved;
163: char *q;
164: for(q=me->buffer; p < me->write_pointer; )
2.7 timbl 165: *q++ = *p++;
166: }
167: me->cleanness = 0;
2.21 timbl 168: /* Now we have to check whether ther are any perfectly good breaks
169: ** which weren't good enough for the last line but may be
170: ** good enough for the next
171: */
172: {
173: int i;
174: for(i=0; i <= MAX_CLEANNESS; i++) {
175: if (me->line_break[i] > saved) {
176: me->line_break[i] = me->line_break[i] -
177: (saved-me->buffer);
178: me->cleanness = i;
179: } else {
180: me->line_break[i] = NULL;
181: }
182: }
183: }
184:
2.8 timbl 185: me->write_pointer = me->write_pointer - (saved-me->buffer);
2.21 timbl 186: me->overflowed = NO;
187: } else { /* No break- just output with no newline */
2.7 timbl 188: (*me->targetClass.put_block)(me->target,
2.14 frystyk 189: me->buffer,
2.15 luotonen 190: me->write_pointer - me->buffer);
2.8 timbl 191: me->write_pointer = me->buffer;
2.21 timbl 192: flush_breaks(me);
193: me->overflowed = YES;
2.7 timbl 194: }
195: }
2.1 timbl 196: }
197:
198:
199:
200: /* String handling
201: ** ---------------
202: */
2.17 timbl 203: PRIVATE void HTMLGen_output_string ARGS2(HTStructured *, me, CONST char*, s)
204: {
205: CONST char * p;
206: for(p=s; *p; p++) HTMLGen_output_character(me, *p);
207: }
208:
209:
210:
211:
212: /* INPUT FUNCTIONS
213: **
214: ** These take data from the structured stream. In the input
215: ** stream, entities are in raw form. The seven_bit flag controls
216: ** whether the ISO Latin-1 charactrs are represented in SGML entity
217: ** form. This is only recommended for viewing on older non-latin-1
218: ** capable equipment, or for mailing for example.
219: **
220: ** Bug: assumes local encoding is ISO!
221: */
222: PRIVATE void HTMLGen_put_character ARGS2(HTStructured *, me, char, c)
223: {
224: if (c=='&') HTMLGen_output_string(me, "&");
225: else if (c=='<') HTMLGen_output_string(me, "<");
226: else if (me->seven_bit && ((unsigned char)c > 127)) {
227: char temp[8];
228: sprintf(temp, "&%d;", c);
229: HTMLGen_output_string(me, temp);
230: }
231: else HTMLGen_output_character(me, c);
232: }
233:
2.3 timbl 234: PRIVATE void HTMLGen_put_string ARGS2(HTStructured *, me, CONST char*, s)
2.1 timbl 235: {
2.7 timbl 236: CONST char * p;
237: for(p=s; *p; p++) HTMLGen_put_character(me, *p);
2.1 timbl 238: }
239:
2.3 timbl 240: PRIVATE void HTMLGen_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
2.1 timbl 241: {
2.7 timbl 242: CONST char * p;
243: for(p=s; p<s+l; p++) HTMLGen_put_character(me, *p);
2.1 timbl 244: }
245:
246:
247: /* Start Element
248: ** -------------
2.7 timbl 249: **
250: ** Within the opening tag, there may be spaces
251: ** and the line may be broken at these spaces.
2.1 timbl 252: */
253: PRIVATE void HTMLGen_start_element ARGS4(
2.3 timbl 254: HTStructured *, me,
2.2 timbl 255: int, element_number,
256: CONST BOOL*, present,
257: CONST char **, value)
2.1 timbl 258: {
259: int i;
2.12 timbl 260: HTTag * tag = &me->dtd->tags[element_number];
2.1 timbl 261:
2.20 timbl 262: /* Control line breaks allowed within tag! */
263: int was_preformatted = me->preformatted; /* save state */
264: me->preformatted = 1; /* Can break between attributes */
265:
2.17 timbl 266: HTMLGen_output_character(me, '<');
267: HTMLGen_output_string(me, tag->name);
2.1 timbl 268: if (present) for (i=0; i< tag->number_of_attributes; i++) {
269: if (present[i]) {
2.17 timbl 270: HTMLGen_output_character(me, ' ');
2.21 timbl 271: allow_break(me, 1, YES);
2.17 timbl 272: HTMLGen_output_string(me, tag->attributes[i].name);
2.1 timbl 273: if (value[i]) {
2.17 timbl 274: HTMLGen_output_string(me, "=\"");
275: HTMLGen_output_string(me, value[i]);
276: HTMLGen_output_character(me, '"');
2.1 timbl 277: }
278: }
279: }
2.20 timbl 280: me->preformatted = was_preformatted; /* Restore state */
281:
2.14 frystyk 282: /* Nested PRE is no more a problem! */
283: if (element_number == HTML_PRE)
284: me->preformatted++;
2.19 timbl 285:
286: HTMLGen_output_character(me, '>');
2.7 timbl 287:
2.20 timbl 288: /* Here is a funny one. In PRE, newlines are significant, except of
289: course for one after the <PRE> which is ignored. This means that
290: we MUST put in a dummy one after the <PRE> to protect any real newline
291: within the pre section.
292:
293: However, *within* a PRE section, although we can break after
294: (for example) emphasis start tags, it will probably confuse some
295: parsers so we won't.*/
296:
297: if (element_number == HTML_PRE) {
298: HTMLGen_output_character(me, '\n');
299: } else if (!me->preformatted &&
300: tag->contents != SGML_EMPTY) { /* can break after element start */
2.21 timbl 301: allow_break(me, 3, NO);
2.8 timbl 302: }
2.1 timbl 303: }
304:
305:
2.17 timbl 306: /* End Element
307: ** -----------
2.1 timbl 308: **
2.16 timbl 309: ** The rules for insertring CR LF into SGML are weird, strict, and
310: ** nonintitive.
2.20 timbl 311: ** See comment also about PRE above.
2.1 timbl 312: */
2.3 timbl 313: PRIVATE void HTMLGen_end_element ARGS2(HTStructured *, me,
2.1 timbl 314: int , element_number)
315: {
2.20 timbl 316: if (element_number == HTML_PRE) {
317: HTMLGen_output_character(me, '\n');
318: } else if (!me->preformatted) { /* can break before element end */
2.21 timbl 319: allow_break(me, 1, NO);
2.8 timbl 320: }
2.17 timbl 321: HTMLGen_output_string(me, "</");
322: HTMLGen_output_string(me, me->dtd->tags[element_number].name);
323: HTMLGen_output_character(me, '>'); /* NO break after. TBL 940501 */
2.14 frystyk 324: if (element_number == HTML_PRE && me->preformatted)
325: me->preformatted--;
2.1 timbl 326: }
327:
328:
2.17 timbl 329: /* Expanding entities
330: ** ------------------
2.1 timbl 331: **
332: */
333:
2.3 timbl 334: PRIVATE void HTMLGen_put_entity ARGS2(HTStructured *, me, int, entity_number)
2.1 timbl 335: {
2.17 timbl 336: HTMLGen_output_character(me, '&');
337: HTMLGen_output_string(me, me->dtd->entity_names[entity_number]);
338: HTMLGen_output_character(me, ';');
2.1 timbl 339: }
340:
341:
342:
2.17 timbl 343: /* Free an object
344: ** --------------
2.1 timbl 345: **
346: */
2.3 timbl 347: PRIVATE void HTMLGen_free ARGS1(HTStructured *, me)
2.1 timbl 348: {
2.21 timbl 349: HTMLGen_flush(me);
2.7 timbl 350: (*me->targetClass.put_character)(me->target, '\n');
2.22 duns 351: (*me->targetClass._free)(me->target); /* ripple through */
2.3 timbl 352: free(me);
2.1 timbl 353: }
354:
355:
2.7 timbl 356: PRIVATE void PlainToHTML_free ARGS1(HTStructured *, me)
357: {
358: HTMLGen_end_element(me, HTML_PRE);
359: HTMLGen_end_element(me, HTML_BODY);
360: HTMLGen_end_element(me, HTML_HTML);
361: HTMLGen_free(me);
362: }
363:
364:
2.1 timbl 365:
2.6 timbl 366: PRIVATE void HTMLGen_abort ARGS2(HTStructured *, me, HTError, e)
2.1 timbl 367: {
2.6 timbl 368: HTMLGen_free(me);
2.1 timbl 369: }
370:
371:
2.6 timbl 372: PRIVATE void PlainToHTML_abort ARGS2(HTStructured *, me, HTError, e)
2.1 timbl 373: {
2.7 timbl 374: PlainToHTML_free(me);
2.1 timbl 375: }
376:
377:
378:
379: /* Structured Object Class
380: ** -----------------------
381: */
2.5 timbl 382: PRIVATE CONST HTStructuredClass HTMLGeneration = /* As opposed to print etc */
2.1 timbl 383: {
384: "text/html",
385: HTMLGen_free,
2.6 timbl 386: HTMLGen_abort,
2.1 timbl 387: HTMLGen_put_character, HTMLGen_put_string, HTMLGen_write,
2.13 frystyk 388: HTMLGen_start_element, HTMLGen_end_element,
2.1 timbl 389: HTMLGen_put_entity
390: };
391:
392:
393: /* Subclass-specific Methods
394: ** -------------------------
395: */
396:
397: PUBLIC HTStructured * HTMLGenerator ARGS1(HTStream *, output)
398: {
2.18 luotonen 399: HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3 timbl 400: if (me == NULL) outofmem(__FILE__, "HTMLGenerator");
401: me->isa = &HTMLGeneration;
2.12 timbl 402: me->dtd = &HTMLP_dtd;
2.1 timbl 403:
2.3 timbl 404: me->target = output;
405: me->targetClass = *me->target->isa; /* Copy pointers to routines for speed*/
2.7 timbl 406:
407: me->write_pointer = me->buffer;
2.21 timbl 408: flush_breaks(me);
2.3 timbl 409: return me;
2.1 timbl 410: }
411:
412: /* Stream Object Class
413: ** -------------------
414: **
2.2 timbl 415: ** This object just converts a plain text stream into HTML
2.12 timbl 416: ** It is officially a structured stream but only the stream bits exist.
2.2 timbl 417: ** This is just the easiest way of typecasting all the routines.
2.1 timbl 418: */
2.2 timbl 419: PRIVATE CONST HTStructuredClass PlainToHTMLConversion =
2.1 timbl 420: {
421: "plaintexttoHTML",
2.13 frystyk 422: PlainToHTML_free, /* HTMLGen_free, Henrik 03/03-94 */
2.6 timbl 423: PlainToHTML_abort,
2.1 timbl 424: HTMLGen_put_character,
425: HTMLGen_put_string,
426: HTMLGen_write,
2.2 timbl 427: NULL, /* Structured stuff */
428: NULL,
429: NULL
2.1 timbl 430: };
431:
432:
433: /* HTConverter from plain text to HTML Stream
434: ** ------------------------------------------
2.13 frystyk 435: **
436: ** Changed by henrik 03/03-94, so no more core dumps etc. (I hope!!!)
2.1 timbl 437: */
438:
2.12 timbl 439: PUBLIC HTStream* HTPlainToHTML ARGS5(
440: HTRequest *, request,
441: void *, param,
442: HTFormat, input_format,
443: HTFormat, output_format,
444: HTStream *, output_stream)
2.1 timbl 445: {
2.13 frystyk 446: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
447: CONST char *value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
2.18 luotonen 448: HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3 timbl 449: if (me == NULL) outofmem(__FILE__, "PlainToHTML");
2.13 frystyk 450:
451: memset(present, '\0', MAX_ATTRIBUTES);
452: memset(value, '\0', MAX_ATTRIBUTES*sizeof(char *));
453:
454: me->isa = (HTStructuredClass*) &PlainToHTMLConversion;
2.12 timbl 455: me->dtd = &HTMLP_dtd;
456: me->target = output_stream;
2.13 frystyk 457: me->targetClass = *me->target->isa;/* Copy pointers to routines for speed*/
458: me->write_pointer = me->buffer;
2.21 timbl 459: flush_breaks(me);
2.13 frystyk 460:
461: HTMLGen_start_element(me, HTML_HTML, present, value);
462: HTMLGen_start_element(me, HTML_BODY, present, value);
463: HTMLGen_start_element(me, HTML_PRE, present, value);
464:
2.7 timbl 465: return (HTStream*) me;
2.1 timbl 466: }
2.13 frystyk 467:
468:
2.17 timbl 469: /* A safe version for making 7-bit restricted HTML
470: ** Beware that thsi makes it horrible for the Scandinavians
471: ** to actually read it.
472: */
473:
474: PUBLIC HTStream* HTPlainTo7BitHTML ARGS5(
475: HTRequest *, request,
476: void *, param,
477: HTFormat, input_format,
478: HTFormat, output_format,
479: HTStream *, output_stream)
2.13 frystyk 480:
2.17 timbl 481: {
482: HTStream* me = HTPlainToHTML(request,param,input_format,
483: output_format, output_stream);
484: ((HTStructured*)me)->seven_bit = YES;
485: return me;
486: }
2.1 timbl 487:
Webmaster