Annotation of libwww/Library/src/HTTeXGen.c, revision 2.9
2.1 frystyk 1: /* Simple LaTeX Generator that converts in a 1:1 manner from HTML to LaTeX
2: ** =======================================================================
3: **
4: ** This version of the HTML object sends LaTeX to the output stream.
5: ** No attributes are considered in the translation!
6: ** The module uses simple 1:1 table-conversions, but this COULD be
7: ** expanded to a stack-machine. This would then be in start_element and
8: ** end_element...
9: ** Henrik 07/03-94
2.7 duns 10: **
11: ** HISTORY:
12: ** 8 Jul 94 FM Insulate free() from _free structure element.
13: **
2.1 frystyk 14: */
15:
16: #define BUFFER_SIZE 80 /* Line buffer attempts to make neat breaks */
2.8 frystyk 17: #define WORD_DELIMITERS ",;:[]()"
2.1 frystyk 18:
19: /* Implements: */
20: #include "HTTeXGen.h"
21: #include "HTMLPDTD.h"
22: #include "HTStream.h"
23: #include "SGML.h"
24: #include "HTFormat.h"
25:
26: /* HTML Object
27: ** -----------
28: */
29:
30: struct _HTStream {
31: CONST HTStreamClass * isa;
32: HTStream * target;
33: HTStreamClass targetClass; /* COPY for speed */
34: };
35:
36: struct _HTStructured {
37: CONST HTStructuredClass * isa;
38: HTStream * target;
39: HTStreamClass targetClass; /* COPY for speed */
40: CONST SGML_dtd * dtd;
41:
2.5 frystyk 42: char buffer[2*BUFFER_SIZE]; /* See note */
2.1 frystyk 43: char * write_pointer;
44: char * line_break;
45: BOOL sensitive; /* Can we put \n */
46: BOOL preformatted; /* Is it verbatim? */
47: BOOL markup; /* If doing LaTeX markup */
48: BOOL startup; /* To skip MIME header */
49: };
2.5 frystyk 50:
51: /* The buffer has to be bigger than 80 as latex markup might make the line
52: longer before we get to flush it. */
2.1 frystyk 53:
2.4 frystyk 54: PRIVATE char *TeX_names[HTMLP_ELEMENTS][2] = {
2.1 frystyk 55: { "", "" }, /* HTML_A */
56: { "", "" }, /* HTML_ABBREV */
57: { "\n\\begin{abstract}\n","\n\\end{abstract}\n"}, /* HTML_ABSTRACT */
58: { "", "" }, /* HTML_ACRONYM */
59: { "", "" }, /* HTML_ADDED */
60: { "{\\it ", "}" }, /* HTML_ADDRESS */
61: { "", "" }, /* HTML_ARG */
62: { "{\\bf ", "}" }, /* HTML_B */
63: { "", "" }, /* HTML_BASE */
64: { "{\\sf ", "}" }, /* HTML_BLOCKQUOTE */
65: { "", "" }, /* HTML_BODY */
66: { "", "" }, /* HTML_BOX */
67: { "", "" }, /* HTML_BR */
68: { "", "" }, /* HTML_BYLINE */
69: { "", "" }, /* HTML_CAPTION */
70: { "", "" }, /* HTML_CHANGED */
71: { "\\cite{", "}" }, /* HTML_CITE */
72: { "", "" }, /* HTML_CMD */
73: { "{\\tt ", "}" }, /* HTML_CODE */
74: { "\n\\typeout{", "}\n" }, /* HTML_COMMENT */
75: { "]", "" }, /* HTML_DD */
76: { "", "" }, /* HTML_DFN */
77: { "", "" }, /* HTML_DIR */
78: { "\n\\begin{description}","\n\\end{description}\n"}, /* HTML_DL */
79: { "\n\\item[", "" }, /* HTML_DT */
80: { "{\\em ", "}" }, /* HTML_EM */
81: { "", "" }, /* HTML_FIG */
82: { "\n\\footnote{", "}\n" }, /* HTML_FOOTNOTE */
83: { "", "" }, /* HTML_FORM */
84: { "\n\\chapter{", "}\n" }, /* HTML_H1 */
85: { "\n\\section{", "}\n" }, /* HTML_H2 */
86: { "\n\\subsection{","}\n" }, /* HTML_H3 */
87: { "\n\\subsubsection{","}\n" }, /* HTML_H4 */
88: { "\n\\paragraph{", "}\n" }, /* HTML_H5 */
89: { "\n\\subparagraph{","}\n" }, /* HTML_H6 */
90: { "", "\n" }, /* HTML_H7 */
91: { "", "" }, /* HTML_HEAD */
92: { "", "" }, /* HTML_HR */
93: { "", "" }, /* HTML_HTML */
2.3 duns 94: { "", "" }, /* HTML_HTMLPLUS */
2.1 frystyk 95: { "{\\it ", "}" }, /* HTML_I */
96: { "", "" }, /* HTML_IMAGE */
2.8 frystyk 97: { "_FIGUR_", "" }, /* HTML_IMG */
2.1 frystyk 98: { "", "" }, /* HTML_INPUT */
99: { "", "" }, /* HTML_ISINDEX */
100: { "{\\tt ", "}" }, /* HTML_KBD */
101: { "", "" }, /* HTML_L */
102: { "\n\\item ", "" }, /* HTML_LI */
103: { "", "" }, /* HTML_LINK */
104: { "", "" }, /* HTML_LISTING */
105: { "", "" }, /* HTML_LIT */
106: { "", "" }, /* HTML_MARGIN */
107: { "", "" }, /* HTML_MATH */
108: { "", "" }, /* HTML_MENU */
109: { "", "" }, /* HTML_NEXTID */
110: { "", "" }, /* HTML_NOTE */
111: { "\n\\begin{enumerate}\n","\n\\end{enumerate}\n"}, /* HTML_OL */
112: { "", "" }, /* HTML_OPTION */
113: { "", "" }, /* HTML_OVER */
114: { "\n\n", "" }, /* HTML_P */
115: { "", "" }, /* HTML_PERSON */
116: { "", "" }, /* HTML_PLAINTEXT */
117: { "\n\\begin{verbatim}"," \\end{verbatim}\n"}, /* HTML_PRE */
118: { "", "" }, /* HTML_Q */
119: { "\\begin{quote}", "\\end{quote}"}, /* HTML_QUOTE */
120: { "", "" }, /* HTML_RENDER */
121: { "", "" }, /* HTML_REMOVED */
122: { "", "" }, /* HTML_S */
123: { "", "" }, /* HTML_SAMP */
124: { "", "" }, /* HTML_SELECT */
125: { "{\\bf ", "}" }, /* HTML_STRONG */
126: { "", "" }, /* HTML_SUB */
127: { "", "" }, /* HTML_SUP */
128: { "", "" }, /* HTML_TAB */
129: { "", "" }, /* HTML_TABLE */
130: { "", "" }, /* HTML_TD */
131: { "", "" }, /* HTML_TEXTAREA */
132: { "", "" }, /* HTML_TH */
133: { "\n\\title{", "}\n\\author{}\n\\maketitle\n"}, /* HTML_TITLE */
134: { "", "" }, /* HTML_TR */
135: { "", "" }, /* HTML_TT */
136: { "", "" }, /* HTML_U */
137: { "\n\\begin{itemize}","\n\\end{itemize}\n"}, /* HTML_UL */
138: { "", "" }, /* HTML_VAR */
139: { "{\\sf ", "}" } /* HTML_XMP */
140: };
141:
2.8 frystyk 142: PRIVATE char *TeX_entities[HTML_ENTITIES] = {
2.1 frystyk 143: "\\AE ", /*"AElig", capital AE diphthong (ligature) */
2.2 frystyk 144: "\\\'{A}", /*"Aacute", capital A, acute accent */
145: "\\^{A}", /*"Acirc", capital A, circumflex accent */
146: "\\`{A}", /*"Agrave", capital A, grave accent */
147: "\\AA", /*"Aring", capital A, ring */
148: "\\~{A}", /*"Atilde", capital A, tilde */
149: "\\\"{A}", /*"Auml", capital A, dieresis or umlaut mark */
150: "\\c{C}", /*"Ccedil", capital C, cedilla */
151: "\\OE ", /*"ETH", capital Eth, Icelandic */
152: "\\\'{E}", /*"Eacute", capital E, acute accent */
153: "\\^{E}", /*"Ecirc", capital E, circumflex accent */
154: "\\`{E}", /*"Egrave", capital E, grave accent */
155: "\\\"{E}", /*"Euml", capital E, dieresis or umlaut mark */
156: "\\\'{I}", /*"Iacute", capital I, acute accent */
157: "\\^{I}", /*"Icirc", capital I, circumflex accent */
158: "\\`{I}", /*"Igrave", capital I, grave accent */
159: "\\\"{I}", /*"Iuml", capital I, dieresis or umlaut mark */
160: "\\~{N}", /*"Ntilde", capital N, tilde */
161: "\\\'{O}", /*"Oacute", capital O, acute accent */
162: "\\^{O}", /*"Ocirc", capital O, circumflex accent */
163: "\\`{O}", /*"Ograve", capital O, grave accent */
2.1 frystyk 164: "\\O ", /*"Oslash", capital O, slash */
2.2 frystyk 165: "\\~{O}", /*"Otilde", capital O, tilde */
166: "\\\"{O}", /*"Ouml", capital O, dieresis or umlaut mark */
167: " ", /*"THORN", capital THORN, Icelandic */
168: "\\\'{U}", /*"Uacute", capital U, acute accent */
169: "\\^{U}", /*"Ucirc", capital U, circumflex accent */
170: "\\`{U}", /*"Ugrave", capital U, grave accent */
171: "\\\"{U}", /*"Uuml", capital U, dieresis or umlaut mark */
172: "\\\'{Y}", /*"Yacute", capital Y, acute accent */
173: "\\\'{a}", /*"aacute", small a, acute accent */
174: "\\^{a}", /*"acirc", small a, circumflex accent */
2.1 frystyk 175: "\\ae ", /*"aelig", small ae diphthong (ligature) */
2.2 frystyk 176: "\\`{a}", /*"agrave", small a, grave accent */
2.1 frystyk 177: "&", /*"amp", ampersand */
178: "\\aa ", /*"aring", small a, ring */
2.2 frystyk 179: "\\~{a}", /*"atilde", small a, tilde */
180: "\\\"{a}", /*"auml", small a, dieresis or umlaut mark */
181: "\\c{c}", /*"ccedil", small c, cedilla */
182: "\\\'{e}", /*"eacute", small e, acute accent */
183: "\\^{c}", /*"ecirc", small e, circumflex accent */
184: "\\`{c}", /*"egrave", small e, grave accent */
185: "\\oe ", /*"eth", small eth, Icelandic */
186: "\\\"{e}", /*"euml", small e, dieresis or umlaut mark */
2.1 frystyk 187: ">", /*"gt", greater than */
2.2 frystyk 188: "\\\'{\\i}", /*"iacute", small i, acute accent */
189: "\\^{\\i}", /*"icirc", small i, circumflex accent */
190: "\\`{\\i}", /*"igrave", small i, grave accent */
191: "\\\"{\\i}", /*"iuml", small i, dieresis or umlaut mark */
2.1 frystyk 192: "<", /*"lt", less than */
2.2 frystyk 193: "\\~{n}", /*"ntilde", small n, tilde */
194: "\\\'{o}", /*"oacute", small o, acute accent */
195: "\\~{o}", /*"ocirc", small o, circumflex accent */
196: "\\`{o}", /*"ograve", small o, grave accent */
2.1 frystyk 197: "\\o ", /*"oslash", small o, slash */
2.2 frystyk 198: "\\~{o}", /*"otilde", small o, tilde */
199: "\\\"{o}", /*"ouml", small o, dieresis or umlaut mark */
2.8 frystyk 200: "\"", /*"quot", double quote sign - June 1994 */
2.1 frystyk 201: "\\ss ", /*"szlig", small sharp s, German (sz ligature)*/
2.2 frystyk 202: " ", /*"thorn", small thorn, Icelandic */
203: "\\\'{u}", /*"uacute", small u, acute accent */
204: "\\^{u}", /*"ucirc", small u, circumflex accent */
205: "\\`{u}", /*"ugrave", small u, grave accent */
206: "\\\"{u}", /*"uuml", small u, dieresis or umlaut mark */
207: "\\\'{y}", /*"yacute", small y, acute accent */
208: "\\\"{y}" /*"yuml", small y, dieresis or umlaut mark */
2.1 frystyk 209: };
210:
211:
212: /* Flush Buffer
213: ** ------------
214: */
215: PRIVATE void HTTeXGen_flush ARGS1(HTStructured *, me)
216: {
217: (*me->targetClass.put_block)(me->target,
218: me->buffer,
219: me->write_pointer - me->buffer);
220: me->write_pointer = me->buffer;
221: me->line_break = me->buffer;
222: }
223:
224:
225: /* Character handling
226: ** ------------------
227: **
228: */
229: PRIVATE void HTTeXGen_put_character ARGS2(HTStructured *, me, char, c)
230: {
231: if (!me->startup) /* To skip MIME header */
232: return;
233: if (c=='\n') {
234: if (me->markup || me->preformatted) { /* Put out as is and flush */
235: *me->write_pointer++ = c;
236: HTTeXGen_flush(me);
237: return;
238: } else if (me->sensitive || *(me->write_pointer-1)==' ') {
239: return;
240: } else
241: *me->write_pointer++ = ' '; /* Try to pretty print */
242: } else if (me->markup || me->preformatted) {
243: *me->write_pointer++ = c;
244: } else if (c==' ' || c=='\t') { /* Skip space and tabs */
245: if (*(me->write_pointer-1) != ' ')
246: *me->write_pointer++ = ' ';
247: else
248: return;
249: } else {
250: if (c=='$' || c=='&' || c=='%' || c=='#' || /* Special chars */
251: c=='{' || c=='}' || c=='_') {
252: *me->write_pointer++ = '\\';
253: *me->write_pointer++ = c;
254: } else if (c=='\\') { /* Special names */
255: char *temp = "$\\backslash$";
256: strcpy(me->write_pointer, temp);
257: me->write_pointer += strlen(temp);
258: } else if (c=='^') {
259: char *temp = "$\\hat{ }$";
260: strcpy(me->write_pointer, temp);
261: me->write_pointer += strlen(temp);
262: } else if (c=='~') {
263: char *temp = "$\\tilde{ }$";
264: strcpy(me->write_pointer, temp);
265: me->write_pointer += strlen(temp);
266: } else if (c=='|' || c=='<' || c=='>') { /* Math mode */
267: *me->write_pointer++ = '$';
268: *me->write_pointer++ = c;
269: *me->write_pointer++ = '$';
270: } else
271: *me->write_pointer++ = c; /* Char seems normal */
272: }
273:
2.6 frystyk 274: if (c==' ') /* Find delimiter */
2.1 frystyk 275: me->line_break = me->write_pointer;
276: else if (strchr(WORD_DELIMITERS, c))
277: me->line_break = me->write_pointer-1;
278:
279: /* Flush buffer out when full */
280: if (me->write_pointer >= me->buffer+BUFFER_SIZE-3) {
2.9 ! frystyk 281: #ifdef OLD_CODE
2.1 frystyk 282: if (me->markup || me->preformatted) {
2.9 ! frystyk 283: #endif /* OLD_CODE */
2.8 frystyk 284: if (me->preformatted) {
2.1 frystyk 285: *me->write_pointer = '\n';
286: (*me->targetClass.put_block)(me->target,
287: me->buffer,
288: me->write_pointer-me->buffer+1);
289: me->write_pointer = me->buffer;
290: } else { /* Use break-point */
291: char line_break_char = *me->line_break;
292: char *saved = me->line_break;
293: *me->line_break = '\n';
294: (*me->targetClass.put_block)(me->target,
295: me->buffer,
296: me->line_break-me->buffer+1);
297: *me->line_break = line_break_char;
298: { /* move next line in */
299: char *p = saved;
300: char *q;
301: for(q=me->buffer; p<me->write_pointer; )
302: *q++ = *p++;
303: }
304: me->write_pointer = me->buffer + (me->write_pointer-saved);
305: }
306: me->line_break = me->buffer;
307: }
2.9 ! frystyk 308: return;
2.1 frystyk 309: }
310:
311:
312:
313: /* String handling
314: ** ---------------
315: */
316: PRIVATE void HTTeXGen_put_string ARGS2(HTStructured *, me, CONST char*, s)
317: {
318: CONST char * p;
319: for (p=s; *p; p++)
320: HTTeXGen_put_character(me, *p);
321: }
322:
323:
324: PRIVATE void HTTeXGen_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
325: {
326: CONST char * p;
327: for(p=s; p<s+l; p++)
328: HTTeXGen_put_character(me, *p);
329: }
330:
331:
332: /* Start Element
333: ** -------------
334: **
335: ** No attributes are put to the output Henrik 07/03-94
336: ** Does no assumptions of WHAT element is started...
337: */
338: PRIVATE void HTTeXGen_start_element ARGS4(
339: HTStructured *, me,
340: int, element_number,
341: CONST BOOL*, present,
342: CONST char **, value)
343: {
344: me->startup = YES; /* Now, let's get down to it */
2.6 frystyk 345: if (me->preformatted == YES) { /* Don't start markup in here */
346: if (TRACE)
347: fprintf(stderr, "LaTeX....... No Markup in verbatim mode\n");
2.1 frystyk 348: return;
2.6 frystyk 349: }
2.1 frystyk 350: if (element_number == HTML_PRE)
351: me->preformatted = YES;
352: if (element_number == HTML_CITE || /* No \n here, please! */
353: element_number == HTML_DT ||
354: element_number == HTML_H1 ||
355: element_number == HTML_H2 ||
356: element_number == HTML_H3 ||
357: element_number == HTML_H4 ||
358: element_number == HTML_H5 ||
359: element_number == HTML_H6 ||
360: element_number == HTML_H7 ||
361: element_number == HTML_TITLE)
362: me->sensitive = YES;
363: else if (element_number == HTML_DD) /* Only way to turn <DT> off */
364: me->sensitive = NO;
365: me->markup = element_number == HTML_A ? NO : YES;
366: HTTeXGen_put_string(me, *TeX_names[element_number]);
367: me->markup = NO;
368: }
369:
370:
371: /* End Element
372: ** -----------
373: **
374: ** Ends an markup element Henrik 07/03-94
375: ** Does no assumptions of WHAT element is ended...
376: */
377: PRIVATE void HTTeXGen_end_element ARGS2(HTStructured *, me,
2.9 ! frystyk 378: int , element_number)
2.1 frystyk 379: {
2.6 frystyk 380: if (me->preformatted && element_number != HTML_PRE) {
381: if (TRACE)
382: fprintf(stderr, "LaTeX....... No markup in verbatim mode\n");
383: return;
384: }
2.1 frystyk 385: me->preformatted = NO;
386: me->markup = YES;
387: HTTeXGen_put_string(me, *(TeX_names[element_number]+1));
388: me->markup = NO;
389: if (element_number == HTML_CITE ||
390: element_number == HTML_DL ||
391: element_number == HTML_H1 ||
392: element_number == HTML_H2 ||
393: element_number == HTML_H3 ||
394: element_number == HTML_H4 ||
395: element_number == HTML_H5 ||
396: element_number == HTML_H6 ||
397: element_number == HTML_H7 ||
398: element_number == HTML_TITLE)
399: me->sensitive = NO;
400: }
401:
402:
403: /* Expanding entities
404: ** ------------------
405: **
406: */
407: PRIVATE void HTTeXGen_put_entity ARGS2(HTStructured *, me, int, entity_number)
408: {
409: BOOL mark = me->markup;
410: if (*TeX_entities[entity_number] != '&' && /* Theese are converted later */
411: *TeX_entities[entity_number] != '<' &&
412: *TeX_entities[entity_number] != '>')
413: me->markup = YES;
414: HTTeXGen_put_string(me, TeX_entities[entity_number]);
415: me->markup = mark;
416: }
417:
418:
419:
420: /* Free an HTML object
421: ** -------------------
422: **
423: */
2.9 ! frystyk 424: PRIVATE int HTTeXGen_free ARGS1(HTStructured *, me)
2.1 frystyk 425: {
426: HTTeXGen_flush(me);
427: (*me->targetClass.put_string)(me->target, "\n\\end{document}\n");
428: HTTeXGen_flush(me);
2.7 duns 429: (*me->targetClass._free)(me->target); /* ripple through */
2.1 frystyk 430: free(me);
2.9 ! frystyk 431: return 0;
2.1 frystyk 432: }
433:
434:
2.9 ! frystyk 435: PRIVATE int HTTeXGen_abort ARGS2(HTStructured *, me, HTError, e)
2.1 frystyk 436: {
437: HTTeXGen_free(me);
2.9 ! frystyk 438: return EOF;
2.1 frystyk 439: }
440:
441:
442: /* Structured Object Class
443: ** -----------------------
444: */
445: PRIVATE CONST HTStructuredClass HTTeXGeneration = /* As opposed to print etc */
446: {
447: "HTMLToTeX",
448: HTTeXGen_free,
449: HTTeXGen_abort,
450: HTTeXGen_put_character, HTTeXGen_put_string, HTTeXGen_write,
451: HTTeXGen_start_element, HTTeXGen_end_element,
452: HTTeXGen_put_entity
453: };
454:
455:
456: /* HTConverter from HTML to TeX Stream
457: ** ------------------------------------------
458: **
459: */
460: PUBLIC HTStream* HTMLToTeX ARGS5(
461: HTRequest *, request,
462: void *, param,
463: HTFormat, input_format,
464: HTFormat, output_format,
465: HTStream *, output_stream)
466: {
467: HTStructured* me = (HTStructured*) calloc(1, sizeof(*me));
468: if (me == NULL) outofmem(__FILE__, "HTMLToTeX");
469:
470: me->isa = (HTStructuredClass*) &HTTeXGeneration;
471: me->dtd = &HTMLP_dtd;
472: me->target = output_stream;
473: me->targetClass = *me->target->isa;/* Copy pointers to routines for speed*/
474: me->write_pointer = me->buffer;
475: me->line_break = me->buffer;
476: (*me->targetClass.put_string)(me->target,
477: "\\documentstyle[11pt]{report}\n\\begin{document}\n");
478: return SGML_new(&HTMLP_dtd, me);
479: }
480:
481:
482: /* END OF FILE HTTeXGen.c */
483:
484:
485:
486:
487:
488:
489:
490:
Webmaster