Annotation of libwww/Library/src/HTMLGen.c, revision 2.25.2.1
2.25 frystyk 1: /* HTMLGen.c
2: ** HTML GENERATOR
3: **
4: ** (c) COPYRIGHT CERN 1994.
5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This version of the HTML object sends HTML markup to the output stream.
8: **
9: ** Bugs: Line wrapping is not done at all.
10: ** All data handled as PCDATA.
11: ** Should convert old XMP, LISTING and PLAINTEXT to PRE.
12: **
13: ** It is not obvious to me right now whether the HEAD should be generated
2.7 timbl 14: ** from the incomming data or the anchor. Currently it is from the former
2.17 timbl 15: ** which is cleanest. TBL
2.22 duns 16: **
17: ** HISTORY:
18: ** 8 Jul 94 FM Insulate free() from _free structure element.
19: **
2.1 timbl 20: */
21:
2.25.2.1! frystyk 22: /* Library include files */
! 23: #include "tcp.h"
! 24: #include "HTUtils.h"
2.12 timbl 25: #include "HTMLPDTD.h"
2.1 timbl 26: #include "HTStream.h"
27: #include "SGML.h"
28: #include "HTFormat.h"
2.23 frystyk 29: #include "HTMLGen.h" /* Implemented here */
30:
31: #define BUFFER_SIZE 80 /* Line buffer attempts to make neat breaks */
2.1 timbl 32:
2.3 timbl 33: #define PUTC(c) (*me->targetClass.put_character)(me->target, c)
2.7 timbl 34: /* #define PUTS(s) (*me->targetClass.put_string)(me->target, s) */
2.4 timbl 35: #define PUTB(s,l) (*me->targetClass.put_block)(me->target, s, l)
2.1 timbl 36:
37: /* HTML Object
38: ** -----------
39: */
40:
41: struct _HTStream {
42: CONST HTStreamClass * isa;
43: HTStream * target;
44: HTStreamClass targetClass; /* COPY for speed */
45: };
46:
2.21 timbl 47: #define MAX_CLEANNESS 10
2.1 timbl 48: struct _HTStructured {
49: CONST HTStructuredClass * isa;
50: HTStream * target;
51: HTStreamClass targetClass; /* COPY for speed */
2.12 timbl 52: CONST SGML_dtd * dtd;
2.17 timbl 53: BOOL seven_bit; /* restrict output*/
2.7 timbl 54:
2.14 frystyk 55: char buffer[BUFFER_SIZE+1];
2.7 timbl 56: char * write_pointer;
2.21 timbl 57: char * line_break [MAX_CLEANNESS+1];
2.7 timbl 58: int cleanness;
2.21 timbl 59: BOOL overflowed;
60: BOOL delete_line_break_char
61: [MAX_CLEANNESS+1];
2.14 frystyk 62: char preformatted;
2.1 timbl 63: };
64:
2.17 timbl 65: /* OUTPUT FUNCTIONS
66: **
67: ** These function output the finished SGML stream doing the
68: ** line wrap
69: */
70:
2.7 timbl 71: /* Flush Buffer
72: ** ------------
73: */
2.21 timbl 74:
75: PRIVATE void flush_breaks ARGS1(HTStructured *, me)
76: {
77: int i;
78: for (i=0; i<= MAX_CLEANNESS; i++) {
79: me->line_break[i] = NULL;
80: }
81: }
82:
83:
2.7 timbl 84: PRIVATE void HTMLGen_flush ARGS1(HTStructured *, me)
85: {
86: (*me->targetClass.put_block)(me->target,
87: me->buffer,
88: me->write_pointer - me->buffer);
89: me->write_pointer = me->buffer;
2.21 timbl 90: flush_breaks(me);
2.7 timbl 91: me->cleanness = 0;
2.21 timbl 92: }
93:
94:
95: /* Weighted optional line break
96: **
97: ** We keep track of all the breaks for when we chop the line
98: */
99:
100: PRIVATE void allow_break ARGS3(HTStructured *, me, int, new_cleanness,
101: BOOL, dlbc)
102: {
103: me->line_break[new_cleanness] =
104: dlbc ? me->write_pointer - 1 /* Point to space */
105: : me->write_pointer ; /* point to gap */
106: me->delete_line_break_char[new_cleanness] = dlbc;
107: if (new_cleanness >= me->cleanness)
108: me->cleanness = new_cleanness;
2.7 timbl 109: }
110:
111:
2.1 timbl 112: /* Character handling
113: ** ------------------
2.8 timbl 114: **
115: ** The tricky bits are the line break handling. This attempts
116: ** to synchrononise line breaks on sentence or phrase ends. This
117: ** is important if one stores SGML files in a line-oriented code
118: ** repository, so that if a small change is made, line ends don't
119: ** shift in a ripple-through to apparently change a large part of the
120: ** file. We give extra "cleanness" to spaces appearing directly
121: ** after periods (full stops), [semi]colons and commas.
122: ** This should make the source files easier to read and modify
2.17 timbl 123: ** by hand, too, though this is not a primary design consideration. TBL
2.1 timbl 124: */
2.21 timbl 125: PRIVATE char delims[] = ",;:."; /* @@ english bias */
2.17 timbl 126: PRIVATE void HTMLGen_output_character ARGS2(HTStructured *, me, char, c)
2.1 timbl 127: {
2.7 timbl 128:
129: *me->write_pointer++ = c;
130:
2.21 timbl 131: if (c=='\n') { /* Newlines */
132: if (me->preformatted) {
133: HTMLGen_flush(me);
134: return;
135: } else {
136: me->write_pointer[-1] = c = ' '; /* Treat same as space */
137: }
2.7 timbl 138: }
139:
2.21 timbl 140: /* Figure our whether we can break at this point
141: */
2.7 timbl 142: if ((!me->preformatted && c==' ')) {
2.8 timbl 143: int new_cleanness = 1;
144: if (me->write_pointer > (me->buffer + 1)) {
2.9 luotonen 145: char * p;
2.11 timbl 146: p = strchr(delims, me->write_pointer[-2]);
2.21 timbl 147: if (p) new_cleanness = p - delims + 4;
2.8 timbl 148: }
2.21 timbl 149: allow_break(me, new_cleanness, YES);
2.7 timbl 150: }
151:
2.21 timbl 152: /* Flush buffer out when full, or whenever the line is over
153: the nominal maximum and we can break at all
154: */
155: if (me->write_pointer >= me->buffer + BUFFER_SIZE-1
156: || (me->overflowed && me->cleanness)) {
157: if (me->cleanness) {
158: char line_break_char = me->line_break[me->cleanness][0];
159: char * saved = me->line_break[me->cleanness];
2.8 timbl 160:
2.21 timbl 161: if (me->delete_line_break_char[me->cleanness]) saved++;
162: me->line_break[me->cleanness][0] = '\n';
2.7 timbl 163: (*me->targetClass.put_block)(me->target,
164: me->buffer,
2.21 timbl 165: me->line_break[me->cleanness] - me->buffer + 1);
166: me->line_break[me->cleanness][0] = line_break_char;
2.7 timbl 167: { /* move next line in */
2.8 timbl 168: char * p=saved;
169: char *q;
170: for(q=me->buffer; p < me->write_pointer; )
2.7 timbl 171: *q++ = *p++;
172: }
173: me->cleanness = 0;
2.21 timbl 174: /* Now we have to check whether ther are any perfectly good breaks
175: ** which weren't good enough for the last line but may be
176: ** good enough for the next
177: */
178: {
179: int i;
180: for(i=0; i <= MAX_CLEANNESS; i++) {
181: if (me->line_break[i] > saved) {
182: me->line_break[i] = me->line_break[i] -
183: (saved-me->buffer);
184: me->cleanness = i;
185: } else {
186: me->line_break[i] = NULL;
187: }
188: }
189: }
190:
2.8 timbl 191: me->write_pointer = me->write_pointer - (saved-me->buffer);
2.21 timbl 192: me->overflowed = NO;
193: } else { /* No break- just output with no newline */
2.7 timbl 194: (*me->targetClass.put_block)(me->target,
2.14 frystyk 195: me->buffer,
2.15 luotonen 196: me->write_pointer - me->buffer);
2.8 timbl 197: me->write_pointer = me->buffer;
2.21 timbl 198: flush_breaks(me);
199: me->overflowed = YES;
2.7 timbl 200: }
201: }
2.1 timbl 202: }
203:
204:
205:
206: /* String handling
207: ** ---------------
208: */
2.17 timbl 209: PRIVATE void HTMLGen_output_string ARGS2(HTStructured *, me, CONST char*, s)
210: {
2.24 frystyk 211: while (*s) HTMLGen_output_character(me, *s++);
2.17 timbl 212: }
213:
214:
215: /* INPUT FUNCTIONS
216: **
217: ** These take data from the structured stream. In the input
218: ** stream, entities are in raw form. The seven_bit flag controls
219: ** whether the ISO Latin-1 charactrs are represented in SGML entity
220: ** form. This is only recommended for viewing on older non-latin-1
221: ** capable equipment, or for mailing for example.
222: **
223: ** Bug: assumes local encoding is ISO!
224: */
225: PRIVATE void HTMLGen_put_character ARGS2(HTStructured *, me, char, c)
226: {
227: if (c=='&') HTMLGen_output_string(me, "&");
228: else if (c=='<') HTMLGen_output_string(me, "<");
229: else if (me->seven_bit && ((unsigned char)c > 127)) {
230: char temp[8];
231: sprintf(temp, "&%d;", c);
232: HTMLGen_output_string(me, temp);
233: }
234: else HTMLGen_output_character(me, c);
235: }
236:
2.3 timbl 237: PRIVATE void HTMLGen_put_string ARGS2(HTStructured *, me, CONST char*, s)
2.1 timbl 238: {
2.24 frystyk 239: while (*s) HTMLGen_put_character(me, *s++);
2.1 timbl 240: }
241:
2.3 timbl 242: PRIVATE void HTMLGen_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
2.1 timbl 243: {
2.24 frystyk 244: while (l-- > 0) HTMLGen_put_character(me, *s++);
2.1 timbl 245: }
246:
247:
248: /* Start Element
249: ** -------------
2.7 timbl 250: **
251: ** Within the opening tag, there may be spaces
252: ** and the line may be broken at these spaces.
2.1 timbl 253: */
254: PRIVATE void HTMLGen_start_element ARGS4(
2.3 timbl 255: HTStructured *, me,
2.2 timbl 256: int, element_number,
257: CONST BOOL*, present,
258: CONST char **, value)
2.1 timbl 259: {
260: int i;
2.12 timbl 261: HTTag * tag = &me->dtd->tags[element_number];
2.1 timbl 262:
2.20 timbl 263: /* Control line breaks allowed within tag! */
264: int was_preformatted = me->preformatted; /* save state */
265: me->preformatted = 1; /* Can break between attributes */
266:
2.17 timbl 267: HTMLGen_output_character(me, '<');
268: HTMLGen_output_string(me, tag->name);
2.1 timbl 269: if (present) for (i=0; i< tag->number_of_attributes; i++) {
270: if (present[i]) {
2.17 timbl 271: HTMLGen_output_character(me, ' ');
2.21 timbl 272: allow_break(me, 1, YES);
2.17 timbl 273: HTMLGen_output_string(me, tag->attributes[i].name);
2.1 timbl 274: if (value[i]) {
2.17 timbl 275: HTMLGen_output_string(me, "=\"");
276: HTMLGen_output_string(me, value[i]);
277: HTMLGen_output_character(me, '"');
2.1 timbl 278: }
279: }
280: }
2.20 timbl 281: me->preformatted = was_preformatted; /* Restore state */
282:
2.14 frystyk 283: /* Nested PRE is no more a problem! */
284: if (element_number == HTML_PRE)
285: me->preformatted++;
2.19 timbl 286:
287: HTMLGen_output_character(me, '>');
2.7 timbl 288:
2.20 timbl 289: /* Here is a funny one. In PRE, newlines are significant, except of
290: course for one after the <PRE> which is ignored. This means that
291: we MUST put in a dummy one after the <PRE> to protect any real newline
292: within the pre section.
293:
294: However, *within* a PRE section, although we can break after
295: (for example) emphasis start tags, it will probably confuse some
296: parsers so we won't.*/
297:
298: if (element_number == HTML_PRE) {
299: HTMLGen_output_character(me, '\n');
300: } else if (!me->preformatted &&
301: tag->contents != SGML_EMPTY) { /* can break after element start */
2.21 timbl 302: allow_break(me, 3, NO);
2.8 timbl 303: }
2.1 timbl 304: }
305:
306:
2.17 timbl 307: /* End Element
308: ** -----------
2.1 timbl 309: **
2.16 timbl 310: ** The rules for insertring CR LF into SGML are weird, strict, and
311: ** nonintitive.
2.20 timbl 312: ** See comment also about PRE above.
2.1 timbl 313: */
2.3 timbl 314: PRIVATE void HTMLGen_end_element ARGS2(HTStructured *, me,
2.24 frystyk 315: int , element_number)
2.1 timbl 316: {
2.20 timbl 317: if (element_number == HTML_PRE) {
318: HTMLGen_output_character(me, '\n');
319: } else if (!me->preformatted) { /* can break before element end */
2.21 timbl 320: allow_break(me, 1, NO);
2.8 timbl 321: }
2.17 timbl 322: HTMLGen_output_string(me, "</");
323: HTMLGen_output_string(me, me->dtd->tags[element_number].name);
324: HTMLGen_output_character(me, '>'); /* NO break after. TBL 940501 */
2.14 frystyk 325: if (element_number == HTML_PRE && me->preformatted)
326: me->preformatted--;
2.1 timbl 327: }
328:
329:
2.17 timbl 330: /* Expanding entities
331: ** ------------------
2.1 timbl 332: **
333: */
334:
2.3 timbl 335: PRIVATE void HTMLGen_put_entity ARGS2(HTStructured *, me, int, entity_number)
2.1 timbl 336: {
2.17 timbl 337: HTMLGen_output_character(me, '&');
338: HTMLGen_output_string(me, me->dtd->entity_names[entity_number]);
339: HTMLGen_output_character(me, ';');
2.1 timbl 340: }
341:
342:
343:
2.17 timbl 344: /* Free an object
345: ** --------------
2.1 timbl 346: **
347: */
2.24 frystyk 348: PRIVATE int HTMLGen_free ARGS1(HTStructured *, me)
2.1 timbl 349: {
2.21 timbl 350: HTMLGen_flush(me);
2.7 timbl 351: (*me->targetClass.put_character)(me->target, '\n');
2.22 duns 352: (*me->targetClass._free)(me->target); /* ripple through */
2.3 timbl 353: free(me);
2.24 frystyk 354: return 0;
2.1 timbl 355: }
356:
357:
2.24 frystyk 358: PRIVATE int PlainToHTML_free ARGS1(HTStructured *, me)
2.7 timbl 359: {
360: HTMLGen_end_element(me, HTML_PRE);
361: HTMLGen_end_element(me, HTML_BODY);
362: HTMLGen_end_element(me, HTML_HTML);
363: HTMLGen_free(me);
2.24 frystyk 364: return 0;
2.7 timbl 365: }
366:
367:
2.1 timbl 368:
2.24 frystyk 369: PRIVATE int HTMLGen_abort ARGS2(HTStructured *, me, HTError, e)
2.1 timbl 370: {
2.6 timbl 371: HTMLGen_free(me);
2.24 frystyk 372: return EOF;
2.1 timbl 373: }
374:
375:
2.24 frystyk 376: PRIVATE int PlainToHTML_abort ARGS2(HTStructured *, me, HTError, e)
2.1 timbl 377: {
2.7 timbl 378: PlainToHTML_free(me);
2.24 frystyk 379: return EOF;
2.1 timbl 380: }
381:
382:
383:
384: /* Structured Object Class
385: ** -----------------------
386: */
2.5 timbl 387: PRIVATE CONST HTStructuredClass HTMLGeneration = /* As opposed to print etc */
2.1 timbl 388: {
389: "text/html",
390: HTMLGen_free,
2.6 timbl 391: HTMLGen_abort,
2.1 timbl 392: HTMLGen_put_character, HTMLGen_put_string, HTMLGen_write,
2.13 frystyk 393: HTMLGen_start_element, HTMLGen_end_element,
2.1 timbl 394: HTMLGen_put_entity
395: };
396:
397:
398: /* Subclass-specific Methods
399: ** -------------------------
400: */
401:
402: PUBLIC HTStructured * HTMLGenerator ARGS1(HTStream *, output)
403: {
2.18 luotonen 404: HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3 timbl 405: if (me == NULL) outofmem(__FILE__, "HTMLGenerator");
406: me->isa = &HTMLGeneration;
2.12 timbl 407: me->dtd = &HTMLP_dtd;
2.1 timbl 408:
2.3 timbl 409: me->target = output;
410: me->targetClass = *me->target->isa; /* Copy pointers to routines for speed*/
2.7 timbl 411:
412: me->write_pointer = me->buffer;
2.21 timbl 413: flush_breaks(me);
2.3 timbl 414: return me;
2.1 timbl 415: }
416:
417: /* Stream Object Class
418: ** -------------------
419: **
2.2 timbl 420: ** This object just converts a plain text stream into HTML
2.12 timbl 421: ** It is officially a structured stream but only the stream bits exist.
2.2 timbl 422: ** This is just the easiest way of typecasting all the routines.
2.1 timbl 423: */
2.2 timbl 424: PRIVATE CONST HTStructuredClass PlainToHTMLConversion =
2.1 timbl 425: {
426: "plaintexttoHTML",
2.13 frystyk 427: PlainToHTML_free, /* HTMLGen_free, Henrik 03/03-94 */
2.6 timbl 428: PlainToHTML_abort,
2.1 timbl 429: HTMLGen_put_character,
430: HTMLGen_put_string,
431: HTMLGen_write,
2.2 timbl 432: NULL, /* Structured stuff */
433: NULL,
434: NULL
2.1 timbl 435: };
436:
437:
438: /* HTConverter from plain text to HTML Stream
439: ** ------------------------------------------
2.13 frystyk 440: **
441: ** Changed by henrik 03/03-94, so no more core dumps etc. (I hope!!!)
2.1 timbl 442: */
443:
2.12 timbl 444: PUBLIC HTStream* HTPlainToHTML ARGS5(
445: HTRequest *, request,
446: void *, param,
447: HTFormat, input_format,
448: HTFormat, output_format,
449: HTStream *, output_stream)
2.1 timbl 450: {
2.13 frystyk 451: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
452: CONST char *value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
2.18 luotonen 453: HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3 timbl 454: if (me == NULL) outofmem(__FILE__, "PlainToHTML");
2.13 frystyk 455:
456: memset(present, '\0', MAX_ATTRIBUTES);
2.25.2.1! frystyk 457: memset((char *) value, '\0', MAX_ATTRIBUTES*sizeof(char *));
2.13 frystyk 458:
459: me->isa = (HTStructuredClass*) &PlainToHTMLConversion;
2.12 timbl 460: me->dtd = &HTMLP_dtd;
461: me->target = output_stream;
2.13 frystyk 462: me->targetClass = *me->target->isa;/* Copy pointers to routines for speed*/
463: me->write_pointer = me->buffer;
2.21 timbl 464: flush_breaks(me);
2.13 frystyk 465:
466: HTMLGen_start_element(me, HTML_HTML, present, value);
467: HTMLGen_start_element(me, HTML_BODY, present, value);
468: HTMLGen_start_element(me, HTML_PRE, present, value);
469:
2.7 timbl 470: return (HTStream*) me;
2.1 timbl 471: }
2.13 frystyk 472:
473:
2.17 timbl 474: /* A safe version for making 7-bit restricted HTML
475: ** Beware that thsi makes it horrible for the Scandinavians
476: ** to actually read it.
477: */
478:
479: PUBLIC HTStream* HTPlainTo7BitHTML ARGS5(
480: HTRequest *, request,
481: void *, param,
482: HTFormat, input_format,
483: HTFormat, output_format,
484: HTStream *, output_stream)
2.13 frystyk 485:
2.17 timbl 486: {
487: HTStream* me = HTPlainToHTML(request,param,input_format,
488: output_format, output_stream);
489: ((HTStructured*)me)->seven_bit = YES;
490: return me;
491: }
2.1 timbl 492:
Webmaster