Annotation of libwww/Library/src/HTMLGen.c, revision 2.20
2.1 timbl 1: /* HTML Generator
2: ** ==============
3: **
4: ** This version of the HTML object sends HTML markup to the output stream.
5: **
6: ** Bugs: Line wrapping is not done at all.
7: ** All data handled as PCDATA.
8: ** Should convert old XMP, LISTING and PLAINTEXT to PRE.
9: **
10: ** It is not obvious to me right now whether the HEAD should be generated
2.7 timbl 11: ** from the incomming data or the anchor. Currently it is from the former
2.17 timbl 12: ** which is cleanest. TBL
2.1 timbl 13: */
14:
2.7 timbl 15: #define BUFFER_SIZE 80 /* Line buffer attempts to make neat breaks */
16:
2.1 timbl 17: /* Implements:
18: */
19: #include "HTMLGen.h"
20:
21: #include <stdio.h>
2.12 timbl 22: #include "HTMLPDTD.h"
2.1 timbl 23: #include "HTStream.h"
24: #include "SGML.h"
25: #include "HTFormat.h"
2.11 timbl 26: #include "tcp.h"
2.1 timbl 27:
2.3 timbl 28: #define PUTC(c) (*me->targetClass.put_character)(me->target, c)
2.7 timbl 29: /* #define PUTS(s) (*me->targetClass.put_string)(me->target, s) */
2.4 timbl 30: #define PUTB(s,l) (*me->targetClass.put_block)(me->target, s, l)
2.1 timbl 31:
32: /* HTML Object
33: ** -----------
34: */
35:
36: struct _HTStream {
37: CONST HTStreamClass * isa;
38: HTStream * target;
39: HTStreamClass targetClass; /* COPY for speed */
40: };
41:
42: struct _HTStructured {
43: CONST HTStructuredClass * isa;
44: HTStream * target;
45: HTStreamClass targetClass; /* COPY for speed */
2.12 timbl 46: CONST SGML_dtd * dtd;
2.17 timbl 47: BOOL seven_bit; /* restrict output*/
2.7 timbl 48:
2.14 frystyk 49: char buffer[BUFFER_SIZE+1];
2.7 timbl 50: char * write_pointer;
51: char * line_break;
52: int cleanness;
2.8 timbl 53: BOOL delete_line_break_char;
2.14 frystyk 54: char preformatted;
2.1 timbl 55: };
56:
2.17 timbl 57: /* OUTPUT FUNCTIONS
58: **
59: ** These function output the finished SGML stream doing the
60: ** line wrap
61: */
62:
2.7 timbl 63: /* Flush Buffer
64: ** ------------
65: */
66: PRIVATE void HTMLGen_flush ARGS1(HTStructured *, me)
67: {
68: (*me->targetClass.put_block)(me->target,
69: me->buffer,
70: me->write_pointer - me->buffer);
71: me->write_pointer = me->buffer;
72: me->line_break = me->buffer;
73: me->cleanness = 0;
2.8 timbl 74: me->delete_line_break_char = NO;
2.7 timbl 75: }
76:
77:
2.1 timbl 78: /* Character handling
79: ** ------------------
2.8 timbl 80: **
81: ** The tricky bits are the line break handling. This attempts
82: ** to synchrononise line breaks on sentence or phrase ends. This
83: ** is important if one stores SGML files in a line-oriented code
84: ** repository, so that if a small change is made, line ends don't
85: ** shift in a ripple-through to apparently change a large part of the
86: ** file. We give extra "cleanness" to spaces appearing directly
87: ** after periods (full stops), [semi]colons and commas.
88: ** This should make the source files easier to read and modify
2.17 timbl 89: ** by hand, too, though this is not a primary design consideration. TBL
2.1 timbl 90: */
2.17 timbl 91: PRIVATE void HTMLGen_output_character ARGS2(HTStructured *, me, char, c)
2.1 timbl 92: {
2.7 timbl 93:
94: *me->write_pointer++ = c;
95:
96: if (c=='\n') {
97: HTMLGen_flush(me);
98: return;
99: }
100:
101: if ((!me->preformatted && c==' ')) {
2.8 timbl 102: int new_cleanness = 1;
103: if (me->write_pointer > (me->buffer + 1)) {
2.9 luotonen 104: char delims[5];
105: char * p;
106: strcpy(delims, ",;:."); /* @@ english bias */
2.11 timbl 107: p = strchr(delims, me->write_pointer[-2]);
2.9 luotonen 108: if (p) new_cleanness = p - delims + 2;
2.8 timbl 109: }
110: if (new_cleanness >= me->cleanness) {
111: me->line_break = me->write_pointer - 1; /* Point to space */
112: me->cleanness = new_cleanness;
113: me->delete_line_break_char = YES;
114: }
2.7 timbl 115: }
116:
2.19 timbl 117: /* Flush buffer out when full. If preformatted then don't wrap! */
2.15 luotonen 118: if (me->write_pointer >= me->buffer + BUFFER_SIZE-1) {
2.14 frystyk 119: if (!me->preformatted && me->cleanness) {
2.8 timbl 120: char line_break_char = me->line_break[0];
121: char * saved = me->line_break;
122:
123: if (me->delete_line_break_char) saved++;
124: me->line_break[0] = '\n';
2.7 timbl 125: (*me->targetClass.put_block)(me->target,
126: me->buffer,
2.8 timbl 127: me->line_break - me->buffer + 1);
128: me->line_break[0] = line_break_char;
2.7 timbl 129: { /* move next line in */
2.8 timbl 130: char * p=saved;
131: char *q;
132: for(q=me->buffer; p < me->write_pointer; )
2.7 timbl 133: *q++ = *p++;
134: }
135: me->cleanness = 0;
2.8 timbl 136: me->delete_line_break_char = 0;
137: me->write_pointer = me->write_pointer - (saved-me->buffer);
138:
2.7 timbl 139: } else {
140: (*me->targetClass.put_block)(me->target,
2.14 frystyk 141: me->buffer,
2.15 luotonen 142: me->write_pointer - me->buffer);
2.8 timbl 143: me->write_pointer = me->buffer;
2.7 timbl 144: }
145: me->line_break = me->buffer;
146: }
2.1 timbl 147: }
148:
149:
150:
151: /* String handling
152: ** ---------------
153: */
2.17 timbl 154: PRIVATE void HTMLGen_output_string ARGS2(HTStructured *, me, CONST char*, s)
155: {
156: CONST char * p;
157: for(p=s; *p; p++) HTMLGen_output_character(me, *p);
158: }
159:
160:
161:
162:
163: /* INPUT FUNCTIONS
164: **
165: ** These take data from the structured stream. In the input
166: ** stream, entities are in raw form. The seven_bit flag controls
167: ** whether the ISO Latin-1 charactrs are represented in SGML entity
168: ** form. This is only recommended for viewing on older non-latin-1
169: ** capable equipment, or for mailing for example.
170: **
171: ** Bug: assumes local encoding is ISO!
172: */
173: PRIVATE void HTMLGen_put_character ARGS2(HTStructured *, me, char, c)
174: {
175: if (c=='&') HTMLGen_output_string(me, "&");
176: else if (c=='<') HTMLGen_output_string(me, "<");
177: else if (me->seven_bit && ((unsigned char)c > 127)) {
178: char temp[8];
179: sprintf(temp, "&%d;", c);
180: HTMLGen_output_string(me, temp);
181: }
182: else HTMLGen_output_character(me, c);
183: }
184:
2.3 timbl 185: PRIVATE void HTMLGen_put_string ARGS2(HTStructured *, me, CONST char*, s)
2.1 timbl 186: {
2.7 timbl 187: CONST char * p;
188: for(p=s; *p; p++) HTMLGen_put_character(me, *p);
2.1 timbl 189: }
190:
2.3 timbl 191: PRIVATE void HTMLGen_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
2.1 timbl 192: {
2.7 timbl 193: CONST char * p;
194: for(p=s; p<s+l; p++) HTMLGen_put_character(me, *p);
2.1 timbl 195: }
196:
197:
198: /* Start Element
199: ** -------------
2.7 timbl 200: **
201: ** Within the opening tag, there may be spaces
202: ** and the line may be broken at these spaces.
2.1 timbl 203: */
204: PRIVATE void HTMLGen_start_element ARGS4(
2.3 timbl 205: HTStructured *, me,
2.2 timbl 206: int, element_number,
207: CONST BOOL*, present,
208: CONST char **, value)
2.1 timbl 209: {
210: int i;
2.12 timbl 211: HTTag * tag = &me->dtd->tags[element_number];
2.1 timbl 212:
2.20 ! timbl 213: /* Control line breaks allowed within tag! */
! 214: int was_preformatted = me->preformatted; /* save state */
! 215: me->preformatted = 1; /* Can break between attributes */
! 216:
2.17 timbl 217: HTMLGen_output_character(me, '<');
218: HTMLGen_output_string(me, tag->name);
2.1 timbl 219: if (present) for (i=0; i< tag->number_of_attributes; i++) {
220: if (present[i]) {
2.19 timbl 221: me->line_break = me->write_pointer; /* Don't you hate SGML? */
222: me->cleanness = 1; /* Can break between attributes */
223: me->delete_line_break_char = YES;
2.17 timbl 224: HTMLGen_output_character(me, ' ');
225: HTMLGen_output_string(me, tag->attributes[i].name);
2.1 timbl 226: if (value[i]) {
2.17 timbl 227: HTMLGen_output_string(me, "=\"");
228: HTMLGen_output_string(me, value[i]);
229: HTMLGen_output_character(me, '"');
2.1 timbl 230: }
231: }
232: }
2.20 ! timbl 233: me->preformatted = was_preformatted; /* Restore state */
! 234:
2.14 frystyk 235: /* Nested PRE is no more a problem! */
236: if (element_number == HTML_PRE)
237: me->preformatted++;
2.19 timbl 238:
239: HTMLGen_output_character(me, '>');
2.7 timbl 240:
2.20 ! timbl 241: /* Here is a funny one. In PRE, newlines are significant, except of
! 242: course for one after the <PRE> which is ignored. This means that
! 243: we MUST put in a dummy one after the <PRE> to protect any real newline
! 244: within the pre section.
! 245:
! 246: However, *within* a PRE section, although we can break after
! 247: (for example) emphasis start tags, it will probably confuse some
! 248: parsers so we won't.*/
! 249:
! 250: if (element_number == HTML_PRE) {
! 251: HTMLGen_output_character(me, '\n');
! 252: } else if (!me->preformatted &&
! 253: tag->contents != SGML_EMPTY) { /* can break after element start */
2.8 timbl 254: me->line_break = me->write_pointer; /* Don't you hate SGML? */
2.19 timbl 255: me->cleanness = 3;
2.8 timbl 256: me->delete_line_break_char = NO;
257: }
2.1 timbl 258: }
259:
260:
2.17 timbl 261: /* End Element
262: ** -----------
2.1 timbl 263: **
2.16 timbl 264: ** The rules for insertring CR LF into SGML are weird, strict, and
265: ** nonintitive.
2.20 ! timbl 266: ** See comment also about PRE above.
2.1 timbl 267: */
2.3 timbl 268: PRIVATE void HTMLGen_end_element ARGS2(HTStructured *, me,
2.1 timbl 269: int , element_number)
270: {
2.20 ! timbl 271: if (element_number == HTML_PRE) {
! 272: HTMLGen_output_character(me, '\n');
! 273: } else if (!me->preformatted) { /* can break before element end */
2.8 timbl 274: me->line_break = me->write_pointer; /* Don't you hate SGML? */
275: me->cleanness = 1;
276: me->delete_line_break_char = NO;
277: }
2.17 timbl 278: HTMLGen_output_string(me, "</");
279: HTMLGen_output_string(me, me->dtd->tags[element_number].name);
280: HTMLGen_output_character(me, '>'); /* NO break after. TBL 940501 */
2.14 frystyk 281: if (element_number == HTML_PRE && me->preformatted)
282: me->preformatted--;
2.1 timbl 283: }
284:
285:
2.17 timbl 286: /* Expanding entities
287: ** ------------------
2.1 timbl 288: **
289: */
290:
2.3 timbl 291: PRIVATE void HTMLGen_put_entity ARGS2(HTStructured *, me, int, entity_number)
2.1 timbl 292: {
2.17 timbl 293: HTMLGen_output_character(me, '&');
294: HTMLGen_output_string(me, me->dtd->entity_names[entity_number]);
295: HTMLGen_output_character(me, ';');
2.1 timbl 296: }
297:
298:
299:
2.17 timbl 300: /* Free an object
301: ** --------------
2.1 timbl 302: **
303: */
2.3 timbl 304: PRIVATE void HTMLGen_free ARGS1(HTStructured *, me)
2.1 timbl 305: {
2.7 timbl 306: (*me->targetClass.put_character)(me->target, '\n');
307: HTMLGen_flush(me);
2.3 timbl 308: (*me->targetClass.free)(me->target); /* ripple through */
309: free(me);
2.1 timbl 310: }
311:
312:
2.7 timbl 313: PRIVATE void PlainToHTML_free ARGS1(HTStructured *, me)
314: {
315: HTMLGen_end_element(me, HTML_PRE);
316: HTMLGen_end_element(me, HTML_BODY);
317: HTMLGen_end_element(me, HTML_HTML);
318: HTMLGen_free(me);
319: }
320:
321:
2.1 timbl 322:
2.6 timbl 323: PRIVATE void HTMLGen_abort ARGS2(HTStructured *, me, HTError, e)
2.1 timbl 324: {
2.6 timbl 325: HTMLGen_free(me);
2.1 timbl 326: }
327:
328:
2.6 timbl 329: PRIVATE void PlainToHTML_abort ARGS2(HTStructured *, me, HTError, e)
2.1 timbl 330: {
2.7 timbl 331: PlainToHTML_free(me);
2.1 timbl 332: }
333:
334:
335:
336: /* Structured Object Class
337: ** -----------------------
338: */
2.5 timbl 339: PRIVATE CONST HTStructuredClass HTMLGeneration = /* As opposed to print etc */
2.1 timbl 340: {
341: "text/html",
342: HTMLGen_free,
2.6 timbl 343: HTMLGen_abort,
2.1 timbl 344: HTMLGen_put_character, HTMLGen_put_string, HTMLGen_write,
2.13 frystyk 345: HTMLGen_start_element, HTMLGen_end_element,
2.1 timbl 346: HTMLGen_put_entity
347: };
348:
349:
350: /* Subclass-specific Methods
351: ** -------------------------
352: */
353:
354: PUBLIC HTStructured * HTMLGenerator ARGS1(HTStream *, output)
355: {
2.18 luotonen 356: HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3 timbl 357: if (me == NULL) outofmem(__FILE__, "HTMLGenerator");
358: me->isa = &HTMLGeneration;
2.12 timbl 359: me->dtd = &HTMLP_dtd;
2.1 timbl 360:
2.3 timbl 361: me->target = output;
362: me->targetClass = *me->target->isa; /* Copy pointers to routines for speed*/
2.7 timbl 363:
364: me->write_pointer = me->buffer;
365: me->line_break = me->buffer;
2.3 timbl 366: return me;
2.1 timbl 367: }
368:
369: /* Stream Object Class
370: ** -------------------
371: **
2.2 timbl 372: ** This object just converts a plain text stream into HTML
2.12 timbl 373: ** It is officially a structured stream but only the stream bits exist.
2.2 timbl 374: ** This is just the easiest way of typecasting all the routines.
2.1 timbl 375: */
2.2 timbl 376: PRIVATE CONST HTStructuredClass PlainToHTMLConversion =
2.1 timbl 377: {
378: "plaintexttoHTML",
2.13 frystyk 379: PlainToHTML_free, /* HTMLGen_free, Henrik 03/03-94 */
2.6 timbl 380: PlainToHTML_abort,
2.1 timbl 381: HTMLGen_put_character,
382: HTMLGen_put_string,
383: HTMLGen_write,
2.2 timbl 384: NULL, /* Structured stuff */
385: NULL,
386: NULL
2.1 timbl 387: };
388:
389:
390: /* HTConverter from plain text to HTML Stream
391: ** ------------------------------------------
2.13 frystyk 392: **
393: ** Changed by henrik 03/03-94, so no more core dumps etc. (I hope!!!)
2.1 timbl 394: */
395:
2.12 timbl 396: PUBLIC HTStream* HTPlainToHTML ARGS5(
397: HTRequest *, request,
398: void *, param,
399: HTFormat, input_format,
400: HTFormat, output_format,
401: HTStream *, output_stream)
2.1 timbl 402: {
2.13 frystyk 403: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
404: CONST char *value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
2.18 luotonen 405: HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3 timbl 406: if (me == NULL) outofmem(__FILE__, "PlainToHTML");
2.13 frystyk 407:
408: memset(present, '\0', MAX_ATTRIBUTES);
409: memset(value, '\0', MAX_ATTRIBUTES*sizeof(char *));
410:
411: me->isa = (HTStructuredClass*) &PlainToHTMLConversion;
2.12 timbl 412: me->dtd = &HTMLP_dtd;
413: me->target = output_stream;
2.13 frystyk 414: me->targetClass = *me->target->isa;/* Copy pointers to routines for speed*/
415: me->write_pointer = me->buffer;
416: me->line_break = me->buffer;
417:
418: HTMLGen_start_element(me, HTML_HTML, present, value);
419: HTMLGen_start_element(me, HTML_BODY, present, value);
420: HTMLGen_start_element(me, HTML_PRE, present, value);
421:
2.7 timbl 422: return (HTStream*) me;
2.1 timbl 423: }
2.13 frystyk 424:
425:
2.17 timbl 426: /* A safe version for making 7-bit restricted HTML
427: ** Beware that thsi makes it horrible for the Scandinavians
428: ** to actually read it.
429: */
430:
431: PUBLIC HTStream* HTPlainTo7BitHTML ARGS5(
432: HTRequest *, request,
433: void *, param,
434: HTFormat, input_format,
435: HTFormat, output_format,
436: HTStream *, output_stream)
2.13 frystyk 437:
2.17 timbl 438: {
439: HTStream* me = HTPlainToHTML(request,param,input_format,
440: output_format, output_stream);
441: ((HTStructured*)me)->seven_bit = YES;
442: return me;
443: }
2.1 timbl 444:
Webmaster