Annotation of libwww/Library/src/HTMLGen.c, revision 2.27.2.1
2.25 frystyk 1: /* HTMLGen.c
2: ** HTML GENERATOR
3: **
4: ** (c) COPYRIGHT CERN 1994.
5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This version of the HTML object sends HTML markup to the output stream.
8: **
9: ** Bugs: Line wrapping is not done at all.
10: ** All data handled as PCDATA.
11: ** Should convert old XMP, LISTING and PLAINTEXT to PRE.
12: **
13: ** It is not obvious to me right now whether the HEAD should be generated
2.7 timbl 14: ** from the incomming data or the anchor. Currently it is from the former
2.17 timbl 15: ** which is cleanest. TBL
2.22 duns 16: **
17: ** HISTORY:
18: ** 8 Jul 94 FM Insulate free() from _free structure element.
19: **
2.1 timbl 20: */
21:
2.27 frystyk 22: /* Library include files */
23: #include "tcp.h"
24: #include "HTUtils.h"
2.12 timbl 25: #include "HTMLPDTD.h"
2.1 timbl 26: #include "HTStream.h"
27: #include "SGML.h"
28: #include "HTFormat.h"
2.23 frystyk 29: #include "HTMLGen.h" /* Implemented here */
30:
31: #define BUFFER_SIZE 80 /* Line buffer attempts to make neat breaks */
2.1 timbl 32:
2.3 timbl 33: #define PUTC(c) (*me->targetClass.put_character)(me->target, c)
2.7 timbl 34: /* #define PUTS(s) (*me->targetClass.put_string)(me->target, s) */
2.4 timbl 35: #define PUTB(s,l) (*me->targetClass.put_block)(me->target, s, l)
2.1 timbl 36:
37: /* HTML Object
38: ** -----------
39: */
40:
41: struct _HTStream {
42: CONST HTStreamClass * isa;
43: HTStream * target;
44: HTStreamClass targetClass; /* COPY for speed */
45: };
46:
2.21 timbl 47: #define MAX_CLEANNESS 10
2.1 timbl 48: struct _HTStructured {
49: CONST HTStructuredClass * isa;
50: HTStream * target;
51: HTStreamClass targetClass; /* COPY for speed */
2.12 timbl 52: CONST SGML_dtd * dtd;
2.17 timbl 53: BOOL seven_bit; /* restrict output*/
2.7 timbl 54:
2.14 frystyk 55: char buffer[BUFFER_SIZE+1];
2.7 timbl 56: char * write_pointer;
2.21 timbl 57: char * line_break [MAX_CLEANNESS+1];
2.7 timbl 58: int cleanness;
2.21 timbl 59: BOOL overflowed;
60: BOOL delete_line_break_char
61: [MAX_CLEANNESS+1];
2.14 frystyk 62: char preformatted;
2.1 timbl 63: };
64:
2.17 timbl 65: /* OUTPUT FUNCTIONS
66: **
67: ** These function output the finished SGML stream doing the
68: ** line wrap
69: */
70:
2.7 timbl 71: /* Flush Buffer
72: ** ------------
73: */
2.21 timbl 74:
75: PRIVATE void flush_breaks ARGS1(HTStructured *, me)
76: {
77: int i;
78: for (i=0; i<= MAX_CLEANNESS; i++) {
79: me->line_break[i] = NULL;
80: }
81: }
82:
83:
2.27.2.1! frystyk 84: PRIVATE int HTMLGen_flush ARGS1(HTStructured *, me)
2.7 timbl 85: {
86: (*me->targetClass.put_block)(me->target,
87: me->buffer,
88: me->write_pointer - me->buffer);
89: me->write_pointer = me->buffer;
2.21 timbl 90: flush_breaks(me);
2.7 timbl 91: me->cleanness = 0;
2.27.2.1! frystyk 92: return HT_OK;
2.21 timbl 93: }
94:
95:
96: /* Weighted optional line break
97: **
98: ** We keep track of all the breaks for when we chop the line
99: */
100:
101: PRIVATE void allow_break ARGS3(HTStructured *, me, int, new_cleanness,
102: BOOL, dlbc)
103: {
104: me->line_break[new_cleanness] =
105: dlbc ? me->write_pointer - 1 /* Point to space */
106: : me->write_pointer ; /* point to gap */
107: me->delete_line_break_char[new_cleanness] = dlbc;
108: if (new_cleanness >= me->cleanness)
109: me->cleanness = new_cleanness;
2.7 timbl 110: }
111:
112:
2.1 timbl 113: /* Character handling
114: ** ------------------
2.8 timbl 115: **
116: ** The tricky bits are the line break handling. This attempts
117: ** to synchrononise line breaks on sentence or phrase ends. This
118: ** is important if one stores SGML files in a line-oriented code
119: ** repository, so that if a small change is made, line ends don't
120: ** shift in a ripple-through to apparently change a large part of the
121: ** file. We give extra "cleanness" to spaces appearing directly
122: ** after periods (full stops), [semi]colons and commas.
123: ** This should make the source files easier to read and modify
2.17 timbl 124: ** by hand, too, though this is not a primary design consideration. TBL
2.1 timbl 125: */
2.21 timbl 126: PRIVATE char delims[] = ",;:."; /* @@ english bias */
2.27.2.1! frystyk 127: PRIVATE int HTMLGen_output_character ARGS2(HTStructured *, me, char, c)
2.1 timbl 128: {
2.7 timbl 129:
130: *me->write_pointer++ = c;
131:
2.21 timbl 132: if (c=='\n') { /* Newlines */
133: if (me->preformatted) {
134: HTMLGen_flush(me);
2.27.2.1! frystyk 135: return HT_OK;
2.21 timbl 136: } else {
137: me->write_pointer[-1] = c = ' '; /* Treat same as space */
138: }
2.7 timbl 139: }
140:
2.21 timbl 141: /* Figure our whether we can break at this point
142: */
2.7 timbl 143: if ((!me->preformatted && c==' ')) {
2.8 timbl 144: int new_cleanness = 1;
145: if (me->write_pointer > (me->buffer + 1)) {
2.9 luotonen 146: char * p;
2.11 timbl 147: p = strchr(delims, me->write_pointer[-2]);
2.21 timbl 148: if (p) new_cleanness = p - delims + 4;
2.8 timbl 149: }
2.21 timbl 150: allow_break(me, new_cleanness, YES);
2.7 timbl 151: }
152:
2.21 timbl 153: /* Flush buffer out when full, or whenever the line is over
154: the nominal maximum and we can break at all
155: */
156: if (me->write_pointer >= me->buffer + BUFFER_SIZE-1
157: || (me->overflowed && me->cleanness)) {
158: if (me->cleanness) {
159: char line_break_char = me->line_break[me->cleanness][0];
160: char * saved = me->line_break[me->cleanness];
2.8 timbl 161:
2.21 timbl 162: if (me->delete_line_break_char[me->cleanness]) saved++;
163: me->line_break[me->cleanness][0] = '\n';
2.7 timbl 164: (*me->targetClass.put_block)(me->target,
165: me->buffer,
2.21 timbl 166: me->line_break[me->cleanness] - me->buffer + 1);
167: me->line_break[me->cleanness][0] = line_break_char;
2.7 timbl 168: { /* move next line in */
2.8 timbl 169: char * p=saved;
170: char *q;
171: for(q=me->buffer; p < me->write_pointer; )
2.7 timbl 172: *q++ = *p++;
173: }
174: me->cleanness = 0;
2.21 timbl 175: /* Now we have to check whether ther are any perfectly good breaks
176: ** which weren't good enough for the last line but may be
177: ** good enough for the next
178: */
179: {
180: int i;
181: for(i=0; i <= MAX_CLEANNESS; i++) {
182: if (me->line_break[i] > saved) {
183: me->line_break[i] = me->line_break[i] -
184: (saved-me->buffer);
185: me->cleanness = i;
186: } else {
187: me->line_break[i] = NULL;
188: }
189: }
190: }
191:
2.8 timbl 192: me->write_pointer = me->write_pointer - (saved-me->buffer);
2.21 timbl 193: me->overflowed = NO;
194: } else { /* No break- just output with no newline */
2.7 timbl 195: (*me->targetClass.put_block)(me->target,
2.14 frystyk 196: me->buffer,
2.15 luotonen 197: me->write_pointer - me->buffer);
2.8 timbl 198: me->write_pointer = me->buffer;
2.21 timbl 199: flush_breaks(me);
200: me->overflowed = YES;
2.7 timbl 201: }
202: }
2.27.2.1! frystyk 203: return HT_OK;
2.1 timbl 204: }
205:
206:
207: /* String handling
208: ** ---------------
209: */
2.27.2.1! frystyk 210: PRIVATE int HTMLGen_output_string ARGS2(HTStructured *, me, CONST char*, s)
2.17 timbl 211: {
2.27.2.1! frystyk 212: while (*s)
! 213: HTMLGen_output_character(me, *s++);
! 214: return HT_OK;
2.17 timbl 215: }
216:
217:
218: /* INPUT FUNCTIONS
219: **
220: ** These take data from the structured stream. In the input
221: ** stream, entities are in raw form. The seven_bit flag controls
222: ** whether the ISO Latin-1 charactrs are represented in SGML entity
223: ** form. This is only recommended for viewing on older non-latin-1
224: ** capable equipment, or for mailing for example.
225: **
226: ** Bug: assumes local encoding is ISO!
227: */
2.27.2.1! frystyk 228: PRIVATE int HTMLGen_put_character ARGS2(HTStructured *, me, char, c)
2.17 timbl 229: {
230: if (c=='&') HTMLGen_output_string(me, "&");
231: else if (c=='<') HTMLGen_output_string(me, "<");
232: else if (me->seven_bit && ((unsigned char)c > 127)) {
233: char temp[8];
234: sprintf(temp, "&%d;", c);
235: HTMLGen_output_string(me, temp);
2.27.2.1! frystyk 236: } else
! 237: HTMLGen_output_character(me, c);
! 238: return HT_OK;
2.17 timbl 239: }
240:
2.27.2.1! frystyk 241: PRIVATE int HTMLGen_put_string ARGS2(HTStructured *, me, CONST char*, s)
2.1 timbl 242: {
2.27.2.1! frystyk 243: while (*s)
! 244: HTMLGen_put_character(me, *s++);
! 245: return HT_OK;
2.1 timbl 246: }
247:
2.27.2.1! frystyk 248: PRIVATE int HTMLGen_write ARGS3(HTStructured *, me, CONST char*, b, int, l)
2.1 timbl 249: {
2.27.2.1! frystyk 250: while (l-- > 0)
! 251: HTMLGen_put_character(me, *b++);
! 252: return HT_OK;
2.1 timbl 253: }
254:
255:
256: /* Start Element
257: ** -------------
2.7 timbl 258: **
259: ** Within the opening tag, there may be spaces
260: ** and the line may be broken at these spaces.
2.1 timbl 261: */
262: PRIVATE void HTMLGen_start_element ARGS4(
2.3 timbl 263: HTStructured *, me,
2.2 timbl 264: int, element_number,
265: CONST BOOL*, present,
266: CONST char **, value)
2.1 timbl 267: {
268: int i;
2.12 timbl 269: HTTag * tag = &me->dtd->tags[element_number];
2.1 timbl 270:
2.20 timbl 271: /* Control line breaks allowed within tag! */
272: int was_preformatted = me->preformatted; /* save state */
273: me->preformatted = 1; /* Can break between attributes */
274:
2.17 timbl 275: HTMLGen_output_character(me, '<');
276: HTMLGen_output_string(me, tag->name);
2.1 timbl 277: if (present) for (i=0; i< tag->number_of_attributes; i++) {
278: if (present[i]) {
2.17 timbl 279: HTMLGen_output_character(me, ' ');
2.21 timbl 280: allow_break(me, 1, YES);
2.17 timbl 281: HTMLGen_output_string(me, tag->attributes[i].name);
2.1 timbl 282: if (value[i]) {
2.17 timbl 283: HTMLGen_output_string(me, "=\"");
284: HTMLGen_output_string(me, value[i]);
285: HTMLGen_output_character(me, '"');
2.1 timbl 286: }
287: }
288: }
2.20 timbl 289: me->preformatted = was_preformatted; /* Restore state */
290:
2.14 frystyk 291: /* Nested PRE is no more a problem! */
292: if (element_number == HTML_PRE)
293: me->preformatted++;
2.19 timbl 294:
295: HTMLGen_output_character(me, '>');
2.7 timbl 296:
2.20 timbl 297: /* Here is a funny one. In PRE, newlines are significant, except of
298: course for one after the <PRE> which is ignored. This means that
299: we MUST put in a dummy one after the <PRE> to protect any real newline
300: within the pre section.
301:
302: However, *within* a PRE section, although we can break after
303: (for example) emphasis start tags, it will probably confuse some
304: parsers so we won't.*/
305:
306: if (element_number == HTML_PRE) {
307: HTMLGen_output_character(me, '\n');
308: } else if (!me->preformatted &&
309: tag->contents != SGML_EMPTY) { /* can break after element start */
2.21 timbl 310: allow_break(me, 3, NO);
2.8 timbl 311: }
2.1 timbl 312: }
313:
314:
2.17 timbl 315: /* End Element
316: ** -----------
2.1 timbl 317: **
2.16 timbl 318: ** The rules for insertring CR LF into SGML are weird, strict, and
319: ** nonintitive.
2.20 timbl 320: ** See comment also about PRE above.
2.1 timbl 321: */
2.3 timbl 322: PRIVATE void HTMLGen_end_element ARGS2(HTStructured *, me,
2.24 frystyk 323: int , element_number)
2.1 timbl 324: {
2.20 timbl 325: if (element_number == HTML_PRE) {
326: HTMLGen_output_character(me, '\n');
327: } else if (!me->preformatted) { /* can break before element end */
2.21 timbl 328: allow_break(me, 1, NO);
2.8 timbl 329: }
2.17 timbl 330: HTMLGen_output_string(me, "</");
331: HTMLGen_output_string(me, me->dtd->tags[element_number].name);
332: HTMLGen_output_character(me, '>'); /* NO break after. TBL 940501 */
2.14 frystyk 333: if (element_number == HTML_PRE && me->preformatted)
334: me->preformatted--;
2.1 timbl 335: }
336:
337:
2.17 timbl 338: /* Expanding entities
339: ** ------------------
2.1 timbl 340: **
341: */
342:
2.3 timbl 343: PRIVATE void HTMLGen_put_entity ARGS2(HTStructured *, me, int, entity_number)
2.1 timbl 344: {
2.17 timbl 345: HTMLGen_output_character(me, '&');
346: HTMLGen_output_string(me, me->dtd->entity_names[entity_number]);
347: HTMLGen_output_character(me, ';');
2.1 timbl 348: }
349:
2.17 timbl 350: /* Free an object
351: ** --------------
2.1 timbl 352: **
353: */
2.24 frystyk 354: PRIVATE int HTMLGen_free ARGS1(HTStructured *, me)
2.1 timbl 355: {
2.21 timbl 356: HTMLGen_flush(me);
2.7 timbl 357: (*me->targetClass.put_character)(me->target, '\n');
2.22 duns 358: (*me->targetClass._free)(me->target); /* ripple through */
2.3 timbl 359: free(me);
2.27.2.1! frystyk 360: return HT_OK;
2.1 timbl 361: }
362:
363:
2.24 frystyk 364: PRIVATE int PlainToHTML_free ARGS1(HTStructured *, me)
2.7 timbl 365: {
366: HTMLGen_end_element(me, HTML_PRE);
367: HTMLGen_end_element(me, HTML_BODY);
368: HTMLGen_end_element(me, HTML_HTML);
369: HTMLGen_free(me);
2.27.2.1! frystyk 370: return HT_OK;
2.7 timbl 371: }
372:
373:
2.1 timbl 374:
2.24 frystyk 375: PRIVATE int HTMLGen_abort ARGS2(HTStructured *, me, HTError, e)
2.1 timbl 376: {
2.6 timbl 377: HTMLGen_free(me);
2.27.2.1! frystyk 378: return HT_ERROR;
2.1 timbl 379: }
380:
381:
2.24 frystyk 382: PRIVATE int PlainToHTML_abort ARGS2(HTStructured *, me, HTError, e)
2.1 timbl 383: {
2.7 timbl 384: PlainToHTML_free(me);
2.27.2.1! frystyk 385: return HT_ERROR;
2.1 timbl 386: }
387:
388:
389:
390: /* Structured Object Class
391: ** -----------------------
392: */
2.5 timbl 393: PRIVATE CONST HTStructuredClass HTMLGeneration = /* As opposed to print etc */
2.1 timbl 394: {
395: "text/html",
2.27.2.1! frystyk 396: HTMLGen_flush,
2.1 timbl 397: HTMLGen_free,
2.6 timbl 398: HTMLGen_abort,
2.1 timbl 399: HTMLGen_put_character, HTMLGen_put_string, HTMLGen_write,
2.13 frystyk 400: HTMLGen_start_element, HTMLGen_end_element,
2.1 timbl 401: HTMLGen_put_entity
402: };
403:
404:
405: /* Subclass-specific Methods
406: ** -------------------------
407: */
408:
409: PUBLIC HTStructured * HTMLGenerator ARGS1(HTStream *, output)
410: {
2.18 luotonen 411: HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3 timbl 412: if (me == NULL) outofmem(__FILE__, "HTMLGenerator");
413: me->isa = &HTMLGeneration;
2.12 timbl 414: me->dtd = &HTMLP_dtd;
2.1 timbl 415:
2.3 timbl 416: me->target = output;
417: me->targetClass = *me->target->isa; /* Copy pointers to routines for speed*/
2.7 timbl 418:
419: me->write_pointer = me->buffer;
2.21 timbl 420: flush_breaks(me);
2.3 timbl 421: return me;
2.1 timbl 422: }
423:
424: /* Stream Object Class
425: ** -------------------
426: **
2.2 timbl 427: ** This object just converts a plain text stream into HTML
2.12 timbl 428: ** It is officially a structured stream but only the stream bits exist.
2.2 timbl 429: ** This is just the easiest way of typecasting all the routines.
2.1 timbl 430: */
2.2 timbl 431: PRIVATE CONST HTStructuredClass PlainToHTMLConversion =
2.1 timbl 432: {
433: "plaintexttoHTML",
2.27.2.1! frystyk 434: HTMLGen_flush,
2.13 frystyk 435: PlainToHTML_free, /* HTMLGen_free, Henrik 03/03-94 */
2.6 timbl 436: PlainToHTML_abort,
2.1 timbl 437: HTMLGen_put_character,
438: HTMLGen_put_string,
439: HTMLGen_write,
2.2 timbl 440: NULL, /* Structured stuff */
441: NULL,
442: NULL
2.1 timbl 443: };
444:
445:
446: /* HTConverter from plain text to HTML Stream
447: ** ------------------------------------------
2.13 frystyk 448: **
449: ** Changed by henrik 03/03-94, so no more core dumps etc. (I hope!!!)
2.1 timbl 450: */
451:
2.12 timbl 452: PUBLIC HTStream* HTPlainToHTML ARGS5(
453: HTRequest *, request,
454: void *, param,
455: HTFormat, input_format,
456: HTFormat, output_format,
457: HTStream *, output_stream)
2.1 timbl 458: {
2.13 frystyk 459: BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
460: CONST char *value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
2.18 luotonen 461: HTStructured* me = (HTStructured*)calloc(1,sizeof(*me));
2.3 timbl 462: if (me == NULL) outofmem(__FILE__, "PlainToHTML");
2.13 frystyk 463:
464: memset(present, '\0', MAX_ATTRIBUTES);
2.27 frystyk 465: memset((char *) value, '\0', MAX_ATTRIBUTES*sizeof(char *));
2.13 frystyk 466:
467: me->isa = (HTStructuredClass*) &PlainToHTMLConversion;
2.12 timbl 468: me->dtd = &HTMLP_dtd;
469: me->target = output_stream;
2.13 frystyk 470: me->targetClass = *me->target->isa;/* Copy pointers to routines for speed*/
471: me->write_pointer = me->buffer;
2.21 timbl 472: flush_breaks(me);
2.13 frystyk 473:
474: HTMLGen_start_element(me, HTML_HTML, present, value);
475: HTMLGen_start_element(me, HTML_BODY, present, value);
476: HTMLGen_start_element(me, HTML_PRE, present, value);
477:
2.7 timbl 478: return (HTStream*) me;
2.1 timbl 479: }
2.13 frystyk 480:
481:
2.17 timbl 482: /* A safe version for making 7-bit restricted HTML
483: ** Beware that thsi makes it horrible for the Scandinavians
484: ** to actually read it.
485: */
486:
487: PUBLIC HTStream* HTPlainTo7BitHTML ARGS5(
488: HTRequest *, request,
489: void *, param,
490: HTFormat, input_format,
491: HTFormat, output_format,
492: HTStream *, output_stream)
2.13 frystyk 493:
2.17 timbl 494: {
495: HTStream* me = HTPlainToHTML(request,param,input_format,
496: output_format, output_stream);
497: ((HTStructured*)me)->seven_bit = YES;
498: return me;
499: }
2.1 timbl 500:
Webmaster