Annotation of libwww/Library/src/HTTeXGen.c, revision 2.8
2.1 frystyk 1: /* Simple LaTeX Generator that converts in a 1:1 manner from HTML to LaTeX
2: ** =======================================================================
3: **
4: ** This version of the HTML object sends LaTeX to the output stream.
5: ** No attributes are considered in the translation!
6: ** The module uses simple 1:1 table-conversions, but this COULD be
7: ** expanded to a stack-machine. This would then be in start_element and
8: ** end_element...
9: ** Henrik 07/03-94
2.7 duns 10: **
11: ** HISTORY:
12: ** 8 Jul 94 FM Insulate free() from _free structure element.
13: **
2.1 frystyk 14: */
15:
16: #define BUFFER_SIZE 80 /* Line buffer attempts to make neat breaks */
2.8 ! frystyk 17: #define WORD_DELIMITERS ",;:[]()"
2.1 frystyk 18:
19: /* Implements: */
20: #include "HTTeXGen.h"
21: #include "HTMLPDTD.h"
22: #include "HTStream.h"
23: #include "SGML.h"
24: #include "HTFormat.h"
25:
26: /* HTML Object
27: ** -----------
28: */
29:
30: struct _HTStream {
31: CONST HTStreamClass * isa;
32: HTStream * target;
33: HTStreamClass targetClass; /* COPY for speed */
34: };
35:
36: struct _HTStructured {
37: CONST HTStructuredClass * isa;
38: HTStream * target;
39: HTStreamClass targetClass; /* COPY for speed */
40: CONST SGML_dtd * dtd;
41:
2.5 frystyk 42: char buffer[2*BUFFER_SIZE]; /* See note */
2.1 frystyk 43: char * write_pointer;
44: char * line_break;
45: BOOL sensitive; /* Can we put \n */
46: BOOL preformatted; /* Is it verbatim? */
47: BOOL markup; /* If doing LaTeX markup */
48: BOOL startup; /* To skip MIME header */
49: };
2.5 frystyk 50:
51: /* The buffer has to be bigger than 80 as latex markup might make the line
52: longer before we get to flush it. */
2.1 frystyk 53:
2.4 frystyk 54: PRIVATE char *TeX_names[HTMLP_ELEMENTS][2] = {
2.1 frystyk 55: { "", "" }, /* HTML_A */
56: { "", "" }, /* HTML_ABBREV */
57: { "\n\\begin{abstract}\n","\n\\end{abstract}\n"}, /* HTML_ABSTRACT */
58: { "", "" }, /* HTML_ACRONYM */
59: { "", "" }, /* HTML_ADDED */
60: { "{\\it ", "}" }, /* HTML_ADDRESS */
61: { "", "" }, /* HTML_ARG */
62: { "{\\bf ", "}" }, /* HTML_B */
63: { "", "" }, /* HTML_BASE */
64: { "{\\sf ", "}" }, /* HTML_BLOCKQUOTE */
65: { "", "" }, /* HTML_BODY */
66: { "", "" }, /* HTML_BOX */
67: { "", "" }, /* HTML_BR */
68: { "", "" }, /* HTML_BYLINE */
69: { "", "" }, /* HTML_CAPTION */
70: { "", "" }, /* HTML_CHANGED */
71: { "\\cite{", "}" }, /* HTML_CITE */
72: { "", "" }, /* HTML_CMD */
73: { "{\\tt ", "}" }, /* HTML_CODE */
74: { "\n\\typeout{", "}\n" }, /* HTML_COMMENT */
75: { "]", "" }, /* HTML_DD */
76: { "", "" }, /* HTML_DFN */
77: { "", "" }, /* HTML_DIR */
78: { "\n\\begin{description}","\n\\end{description}\n"}, /* HTML_DL */
79: { "\n\\item[", "" }, /* HTML_DT */
80: { "{\\em ", "}" }, /* HTML_EM */
81: { "", "" }, /* HTML_FIG */
82: { "\n\\footnote{", "}\n" }, /* HTML_FOOTNOTE */
83: { "", "" }, /* HTML_FORM */
84: { "\n\\chapter{", "}\n" }, /* HTML_H1 */
85: { "\n\\section{", "}\n" }, /* HTML_H2 */
86: { "\n\\subsection{","}\n" }, /* HTML_H3 */
87: { "\n\\subsubsection{","}\n" }, /* HTML_H4 */
88: { "\n\\paragraph{", "}\n" }, /* HTML_H5 */
89: { "\n\\subparagraph{","}\n" }, /* HTML_H6 */
90: { "", "\n" }, /* HTML_H7 */
91: { "", "" }, /* HTML_HEAD */
92: { "", "" }, /* HTML_HR */
93: { "", "" }, /* HTML_HTML */
2.3 duns 94: { "", "" }, /* HTML_HTMLPLUS */
2.1 frystyk 95: { "{\\it ", "}" }, /* HTML_I */
96: { "", "" }, /* HTML_IMAGE */
2.8 ! frystyk 97: { "_FIGUR_", "" }, /* HTML_IMG */
2.1 frystyk 98: { "", "" }, /* HTML_INPUT */
99: { "", "" }, /* HTML_ISINDEX */
100: { "{\\tt ", "}" }, /* HTML_KBD */
101: { "", "" }, /* HTML_L */
102: { "\n\\item ", "" }, /* HTML_LI */
103: { "", "" }, /* HTML_LINK */
104: { "", "" }, /* HTML_LISTING */
105: { "", "" }, /* HTML_LIT */
106: { "", "" }, /* HTML_MARGIN */
107: { "", "" }, /* HTML_MATH */
108: { "", "" }, /* HTML_MENU */
109: { "", "" }, /* HTML_NEXTID */
110: { "", "" }, /* HTML_NOTE */
111: { "\n\\begin{enumerate}\n","\n\\end{enumerate}\n"}, /* HTML_OL */
112: { "", "" }, /* HTML_OPTION */
113: { "", "" }, /* HTML_OVER */
114: { "\n\n", "" }, /* HTML_P */
115: { "", "" }, /* HTML_PERSON */
116: { "", "" }, /* HTML_PLAINTEXT */
117: { "\n\\begin{verbatim}"," \\end{verbatim}\n"}, /* HTML_PRE */
118: { "", "" }, /* HTML_Q */
119: { "\\begin{quote}", "\\end{quote}"}, /* HTML_QUOTE */
120: { "", "" }, /* HTML_RENDER */
121: { "", "" }, /* HTML_REMOVED */
122: { "", "" }, /* HTML_S */
123: { "", "" }, /* HTML_SAMP */
124: { "", "" }, /* HTML_SELECT */
125: { "{\\bf ", "}" }, /* HTML_STRONG */
126: { "", "" }, /* HTML_SUB */
127: { "", "" }, /* HTML_SUP */
128: { "", "" }, /* HTML_TAB */
129: { "", "" }, /* HTML_TABLE */
130: { "", "" }, /* HTML_TD */
131: { "", "" }, /* HTML_TEXTAREA */
132: { "", "" }, /* HTML_TH */
133: { "\n\\title{", "}\n\\author{}\n\\maketitle\n"}, /* HTML_TITLE */
134: { "", "" }, /* HTML_TR */
135: { "", "" }, /* HTML_TT */
136: { "", "" }, /* HTML_U */
137: { "\n\\begin{itemize}","\n\\end{itemize}\n"}, /* HTML_UL */
138: { "", "" }, /* HTML_VAR */
139: { "{\\sf ", "}" } /* HTML_XMP */
140: };
141:
2.8 ! frystyk 142: PRIVATE char *TeX_entities[HTML_ENTITIES] = {
2.1 frystyk 143: "\\AE ", /*"AElig", capital AE diphthong (ligature) */
2.2 frystyk 144: "\\\'{A}", /*"Aacute", capital A, acute accent */
145: "\\^{A}", /*"Acirc", capital A, circumflex accent */
146: "\\`{A}", /*"Agrave", capital A, grave accent */
147: "\\AA", /*"Aring", capital A, ring */
148: "\\~{A}", /*"Atilde", capital A, tilde */
149: "\\\"{A}", /*"Auml", capital A, dieresis or umlaut mark */
150: "\\c{C}", /*"Ccedil", capital C, cedilla */
151: "\\OE ", /*"ETH", capital Eth, Icelandic */
152: "\\\'{E}", /*"Eacute", capital E, acute accent */
153: "\\^{E}", /*"Ecirc", capital E, circumflex accent */
154: "\\`{E}", /*"Egrave", capital E, grave accent */
155: "\\\"{E}", /*"Euml", capital E, dieresis or umlaut mark */
156: "\\\'{I}", /*"Iacute", capital I, acute accent */
157: "\\^{I}", /*"Icirc", capital I, circumflex accent */
158: "\\`{I}", /*"Igrave", capital I, grave accent */
159: "\\\"{I}", /*"Iuml", capital I, dieresis or umlaut mark */
160: "\\~{N}", /*"Ntilde", capital N, tilde */
161: "\\\'{O}", /*"Oacute", capital O, acute accent */
162: "\\^{O}", /*"Ocirc", capital O, circumflex accent */
163: "\\`{O}", /*"Ograve", capital O, grave accent */
2.1 frystyk 164: "\\O ", /*"Oslash", capital O, slash */
2.2 frystyk 165: "\\~{O}", /*"Otilde", capital O, tilde */
166: "\\\"{O}", /*"Ouml", capital O, dieresis or umlaut mark */
167: " ", /*"THORN", capital THORN, Icelandic */
168: "\\\'{U}", /*"Uacute", capital U, acute accent */
169: "\\^{U}", /*"Ucirc", capital U, circumflex accent */
170: "\\`{U}", /*"Ugrave", capital U, grave accent */
171: "\\\"{U}", /*"Uuml", capital U, dieresis or umlaut mark */
172: "\\\'{Y}", /*"Yacute", capital Y, acute accent */
173: "\\\'{a}", /*"aacute", small a, acute accent */
174: "\\^{a}", /*"acirc", small a, circumflex accent */
2.1 frystyk 175: "\\ae ", /*"aelig", small ae diphthong (ligature) */
2.2 frystyk 176: "\\`{a}", /*"agrave", small a, grave accent */
2.1 frystyk 177: "&", /*"amp", ampersand */
178: "\\aa ", /*"aring", small a, ring */
2.2 frystyk 179: "\\~{a}", /*"atilde", small a, tilde */
180: "\\\"{a}", /*"auml", small a, dieresis or umlaut mark */
181: "\\c{c}", /*"ccedil", small c, cedilla */
182: "\\\'{e}", /*"eacute", small e, acute accent */
183: "\\^{c}", /*"ecirc", small e, circumflex accent */
184: "\\`{c}", /*"egrave", small e, grave accent */
185: "\\oe ", /*"eth", small eth, Icelandic */
186: "\\\"{e}", /*"euml", small e, dieresis or umlaut mark */
2.1 frystyk 187: ">", /*"gt", greater than */
2.2 frystyk 188: "\\\'{\\i}", /*"iacute", small i, acute accent */
189: "\\^{\\i}", /*"icirc", small i, circumflex accent */
190: "\\`{\\i}", /*"igrave", small i, grave accent */
191: "\\\"{\\i}", /*"iuml", small i, dieresis or umlaut mark */
2.1 frystyk 192: "<", /*"lt", less than */
2.2 frystyk 193: "\\~{n}", /*"ntilde", small n, tilde */
194: "\\\'{o}", /*"oacute", small o, acute accent */
195: "\\~{o}", /*"ocirc", small o, circumflex accent */
196: "\\`{o}", /*"ograve", small o, grave accent */
2.1 frystyk 197: "\\o ", /*"oslash", small o, slash */
2.2 frystyk 198: "\\~{o}", /*"otilde", small o, tilde */
199: "\\\"{o}", /*"ouml", small o, dieresis or umlaut mark */
2.8 ! frystyk 200: "\"", /*"quot", double quote sign - June 1994 */
2.1 frystyk 201: "\\ss ", /*"szlig", small sharp s, German (sz ligature)*/
2.2 frystyk 202: " ", /*"thorn", small thorn, Icelandic */
203: "\\\'{u}", /*"uacute", small u, acute accent */
204: "\\^{u}", /*"ucirc", small u, circumflex accent */
205: "\\`{u}", /*"ugrave", small u, grave accent */
206: "\\\"{u}", /*"uuml", small u, dieresis or umlaut mark */
207: "\\\'{y}", /*"yacute", small y, acute accent */
208: "\\\"{y}" /*"yuml", small y, dieresis or umlaut mark */
2.1 frystyk 209: };
210:
211:
212: /* Flush Buffer
213: ** ------------
214: */
215: PRIVATE void HTTeXGen_flush ARGS1(HTStructured *, me)
216: {
217: (*me->targetClass.put_block)(me->target,
218: me->buffer,
219: me->write_pointer - me->buffer);
220: me->write_pointer = me->buffer;
221: me->line_break = me->buffer;
222: }
223:
224:
225: /* Character handling
226: ** ------------------
227: **
228: */
229: PRIVATE void HTTeXGen_put_character ARGS2(HTStructured *, me, char, c)
230: {
231: if (!me->startup) /* To skip MIME header */
232: return;
233: if (c=='\n') {
234: if (me->markup || me->preformatted) { /* Put out as is and flush */
235: *me->write_pointer++ = c;
236: HTTeXGen_flush(me);
237: return;
238: } else if (me->sensitive || *(me->write_pointer-1)==' ') {
239: return;
240: } else
241: *me->write_pointer++ = ' '; /* Try to pretty print */
242: } else if (me->markup || me->preformatted) {
243: *me->write_pointer++ = c;
244: } else if (c==' ' || c=='\t') { /* Skip space and tabs */
245: if (*(me->write_pointer-1) != ' ')
246: *me->write_pointer++ = ' ';
247: else
248: return;
249: } else {
250: if (c=='$' || c=='&' || c=='%' || c=='#' || /* Special chars */
251: c=='{' || c=='}' || c=='_') {
252: *me->write_pointer++ = '\\';
253: *me->write_pointer++ = c;
254: } else if (c=='\\') { /* Special names */
255: char *temp = "$\\backslash$";
256: strcpy(me->write_pointer, temp);
257: me->write_pointer += strlen(temp);
258: } else if (c=='^') {
259: char *temp = "$\\hat{ }$";
260: strcpy(me->write_pointer, temp);
261: me->write_pointer += strlen(temp);
262: } else if (c=='~') {
263: char *temp = "$\\tilde{ }$";
264: strcpy(me->write_pointer, temp);
265: me->write_pointer += strlen(temp);
266: } else if (c=='|' || c=='<' || c=='>') { /* Math mode */
267: *me->write_pointer++ = '$';
268: *me->write_pointer++ = c;
269: *me->write_pointer++ = '$';
270: } else
271: *me->write_pointer++ = c; /* Char seems normal */
272: }
273:
2.6 frystyk 274: if (c==' ') /* Find delimiter */
2.1 frystyk 275: me->line_break = me->write_pointer;
276: else if (strchr(WORD_DELIMITERS, c))
277: me->line_break = me->write_pointer-1;
278:
279: /* Flush buffer out when full */
280: if (me->write_pointer >= me->buffer+BUFFER_SIZE-3) {
2.8 ! frystyk 281: #if 0
2.1 frystyk 282: if (me->markup || me->preformatted) {
2.8 ! frystyk 283: #endif
! 284: if (me->preformatted) {
2.1 frystyk 285: *me->write_pointer = '\n';
286: (*me->targetClass.put_block)(me->target,
287: me->buffer,
288: me->write_pointer-me->buffer+1);
289: me->write_pointer = me->buffer;
290: } else { /* Use break-point */
291: char line_break_char = *me->line_break;
292: char *saved = me->line_break;
293: *me->line_break = '\n';
294: (*me->targetClass.put_block)(me->target,
295: me->buffer,
296: me->line_break-me->buffer+1);
297: *me->line_break = line_break_char;
298: { /* move next line in */
299: char *p = saved;
300: char *q;
301: for(q=me->buffer; p<me->write_pointer; )
302: *q++ = *p++;
303: }
304: me->write_pointer = me->buffer + (me->write_pointer-saved);
305: }
306: me->line_break = me->buffer;
307: }
308: }
309:
310:
311:
312: /* String handling
313: ** ---------------
314: */
315: PRIVATE void HTTeXGen_put_string ARGS2(HTStructured *, me, CONST char*, s)
316: {
317: CONST char * p;
318: for (p=s; *p; p++)
319: HTTeXGen_put_character(me, *p);
320: }
321:
322:
323: PRIVATE void HTTeXGen_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
324: {
325: CONST char * p;
326: for(p=s; p<s+l; p++)
327: HTTeXGen_put_character(me, *p);
328: }
329:
330:
331: /* Start Element
332: ** -------------
333: **
334: ** No attributes are put to the output Henrik 07/03-94
335: ** Does no assumptions of WHAT element is started...
336: */
337: PRIVATE void HTTeXGen_start_element ARGS4(
338: HTStructured *, me,
339: int, element_number,
340: CONST BOOL*, present,
341: CONST char **, value)
342: {
343: me->startup = YES; /* Now, let's get down to it */
2.6 frystyk 344: if (me->preformatted == YES) { /* Don't start markup in here */
345: if (TRACE)
346: fprintf(stderr, "LaTeX....... No Markup in verbatim mode\n");
2.1 frystyk 347: return;
2.6 frystyk 348: }
2.1 frystyk 349: if (element_number == HTML_PRE)
350: me->preformatted = YES;
351: if (element_number == HTML_CITE || /* No \n here, please! */
352: element_number == HTML_DT ||
353: element_number == HTML_H1 ||
354: element_number == HTML_H2 ||
355: element_number == HTML_H3 ||
356: element_number == HTML_H4 ||
357: element_number == HTML_H5 ||
358: element_number == HTML_H6 ||
359: element_number == HTML_H7 ||
360: element_number == HTML_TITLE)
361: me->sensitive = YES;
362: else if (element_number == HTML_DD) /* Only way to turn <DT> off */
363: me->sensitive = NO;
364: me->markup = element_number == HTML_A ? NO : YES;
365: HTTeXGen_put_string(me, *TeX_names[element_number]);
366: me->markup = NO;
367: }
368:
369:
370: /* End Element
371: ** -----------
372: **
373: ** Ends an markup element Henrik 07/03-94
374: ** Does no assumptions of WHAT element is ended...
375: */
376: PRIVATE void HTTeXGen_end_element ARGS2(HTStructured *, me,
377: int , element_number)
378: {
2.6 frystyk 379: if (me->preformatted && element_number != HTML_PRE) {
380: if (TRACE)
381: fprintf(stderr, "LaTeX....... No markup in verbatim mode\n");
382: return;
383: }
2.1 frystyk 384: me->preformatted = NO;
385: me->markup = YES;
386: HTTeXGen_put_string(me, *(TeX_names[element_number]+1));
387: me->markup = NO;
388: if (element_number == HTML_CITE ||
389: element_number == HTML_DL ||
390: element_number == HTML_H1 ||
391: element_number == HTML_H2 ||
392: element_number == HTML_H3 ||
393: element_number == HTML_H4 ||
394: element_number == HTML_H5 ||
395: element_number == HTML_H6 ||
396: element_number == HTML_H7 ||
397: element_number == HTML_TITLE)
398: me->sensitive = NO;
399: }
400:
401:
402: /* Expanding entities
403: ** ------------------
404: **
405: */
406: PRIVATE void HTTeXGen_put_entity ARGS2(HTStructured *, me, int, entity_number)
407: {
408: BOOL mark = me->markup;
409: if (*TeX_entities[entity_number] != '&' && /* Theese are converted later */
410: *TeX_entities[entity_number] != '<' &&
411: *TeX_entities[entity_number] != '>')
412: me->markup = YES;
413: HTTeXGen_put_string(me, TeX_entities[entity_number]);
414: me->markup = mark;
415: }
416:
417:
418:
419: /* Free an HTML object
420: ** -------------------
421: **
422: */
423: PRIVATE void HTTeXGen_free ARGS1(HTStructured *, me)
424: {
425: HTTeXGen_flush(me);
426: (*me->targetClass.put_string)(me->target, "\n\\end{document}\n");
427: HTTeXGen_flush(me);
2.7 duns 428: (*me->targetClass._free)(me->target); /* ripple through */
2.1 frystyk 429: free(me);
430: }
431:
432:
433: PRIVATE void HTTeXGen_abort ARGS2(HTStructured *, me, HTError, e)
434: {
435: HTTeXGen_free(me);
436: }
437:
438:
439: /* Structured Object Class
440: ** -----------------------
441: */
442: PRIVATE CONST HTStructuredClass HTTeXGeneration = /* As opposed to print etc */
443: {
444: "HTMLToTeX",
445: HTTeXGen_free,
446: HTTeXGen_abort,
447: HTTeXGen_put_character, HTTeXGen_put_string, HTTeXGen_write,
448: HTTeXGen_start_element, HTTeXGen_end_element,
449: HTTeXGen_put_entity
450: };
451:
452:
453: /* HTConverter from HTML to TeX Stream
454: ** ------------------------------------------
455: **
456: */
457: PUBLIC HTStream* HTMLToTeX ARGS5(
458: HTRequest *, request,
459: void *, param,
460: HTFormat, input_format,
461: HTFormat, output_format,
462: HTStream *, output_stream)
463: {
464: HTStructured* me = (HTStructured*) calloc(1, sizeof(*me));
465: if (me == NULL) outofmem(__FILE__, "HTMLToTeX");
466:
467: me->isa = (HTStructuredClass*) &HTTeXGeneration;
468: me->dtd = &HTMLP_dtd;
469: me->target = output_stream;
470: me->targetClass = *me->target->isa;/* Copy pointers to routines for speed*/
471: me->write_pointer = me->buffer;
472: me->line_break = me->buffer;
473: (*me->targetClass.put_string)(me->target,
474: "\\documentstyle[11pt]{report}\n\\begin{document}\n");
475: return SGML_new(&HTMLP_dtd, me);
476: }
477:
478:
479: /* END OF FILE HTTeXGen.c */
480:
481:
482:
483:
484:
485:
486:
487:
Webmaster