Annotation of libwww/Library/src/HTTeXGen.c, revision 2.22
2.10 frystyk 1: /* HTTeXGen.c
2: ** HTML -> LaTeX CONVERTER
3: **
2.14 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.10 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 frystyk 6: **
7: ** This version of the HTML object sends LaTeX to the output stream.
8: ** No attributes are considered in the translation!
9: ** The module uses simple 1:1 table-conversions, but this COULD be
10: ** expanded to a stack-machine. This would then be in start_element and
11: ** end_element...
12: ** Henrik 07/03-94
2.7 duns 13: **
14: ** HISTORY:
15: ** 8 Jul 94 FM Insulate free() from _free structure element.
16: **
2.1 frystyk 17: */
18:
19: #define BUFFER_SIZE 80 /* Line buffer attempts to make neat breaks */
2.8 frystyk 20: #define WORD_DELIMITERS ",;:[]()"
2.1 frystyk 21:
2.12 frystyk 22: /* Library include files */
2.22 ! frystyk 23: #include "sysdep.h"
2.12 frystyk 24: #include "HTUtils.h"
2.1 frystyk 25: #include "HTTeXGen.h"
26: #include "HTMLPDTD.h"
2.15 frystyk 27: #include "HTStruct.h"
2.1 frystyk 28: #include "HTFormat.h"
29:
30: /* HTML Object
31: ** -----------
32: */
33:
34: struct _HTStream {
2.22 ! frystyk 35: const HTStreamClass * isa;
2.1 frystyk 36: HTStream * target;
37: HTStreamClass targetClass; /* COPY for speed */
38: };
39:
40: struct _HTStructured {
2.22 ! frystyk 41: const HTStructuredClass * isa;
2.1 frystyk 42: HTStream * target;
43: HTStreamClass targetClass; /* COPY for speed */
2.22 ! frystyk 44: const SGML_dtd * dtd;
2.1 frystyk 45:
2.5 frystyk 46: char buffer[2*BUFFER_SIZE]; /* See note */
2.1 frystyk 47: char * write_pointer;
48: char * line_break;
49: BOOL sensitive; /* Can we put \n */
50: BOOL preformatted; /* Is it verbatim? */
51: BOOL markup; /* If doing LaTeX markup */
52: BOOL startup; /* To skip MIME header */
53: };
2.5 frystyk 54:
55: /* The buffer has to be bigger than 80 as latex markup might make the line
56: longer before we get to flush it. */
2.1 frystyk 57:
2.4 frystyk 58: PRIVATE char *TeX_names[HTMLP_ELEMENTS][2] = {
2.1 frystyk 59: { "", "" }, /* HTML_A */
60: { "", "" }, /* HTML_ABBREV */
61: { "\n\\begin{abstract}\n","\n\\end{abstract}\n"}, /* HTML_ABSTRACT */
62: { "", "" }, /* HTML_ACRONYM */
63: { "", "" }, /* HTML_ADDED */
64: { "{\\it ", "}" }, /* HTML_ADDRESS */
65: { "", "" }, /* HTML_ARG */
66: { "{\\bf ", "}" }, /* HTML_B */
67: { "", "" }, /* HTML_BASE */
68: { "{\\sf ", "}" }, /* HTML_BLOCKQUOTE */
69: { "", "" }, /* HTML_BODY */
70: { "", "" }, /* HTML_BOX */
71: { "", "" }, /* HTML_BR */
72: { "", "" }, /* HTML_BYLINE */
73: { "", "" }, /* HTML_CAPTION */
74: { "", "" }, /* HTML_CHANGED */
75: { "\\cite{", "}" }, /* HTML_CITE */
76: { "", "" }, /* HTML_CMD */
77: { "{\\tt ", "}" }, /* HTML_CODE */
78: { "\n\\typeout{", "}\n" }, /* HTML_COMMENT */
79: { "]", "" }, /* HTML_DD */
80: { "", "" }, /* HTML_DFN */
81: { "", "" }, /* HTML_DIR */
82: { "\n\\begin{description}","\n\\end{description}\n"}, /* HTML_DL */
83: { "\n\\item[", "" }, /* HTML_DT */
84: { "{\\em ", "}" }, /* HTML_EM */
85: { "", "" }, /* HTML_FIG */
86: { "\n\\footnote{", "}\n" }, /* HTML_FOOTNOTE */
87: { "", "" }, /* HTML_FORM */
88: { "\n\\chapter{", "}\n" }, /* HTML_H1 */
89: { "\n\\section{", "}\n" }, /* HTML_H2 */
90: { "\n\\subsection{","}\n" }, /* HTML_H3 */
91: { "\n\\subsubsection{","}\n" }, /* HTML_H4 */
92: { "\n\\paragraph{", "}\n" }, /* HTML_H5 */
93: { "\n\\subparagraph{","}\n" }, /* HTML_H6 */
94: { "", "\n" }, /* HTML_H7 */
95: { "", "" }, /* HTML_HEAD */
96: { "", "" }, /* HTML_HR */
97: { "", "" }, /* HTML_HTML */
2.3 duns 98: { "", "" }, /* HTML_HTMLPLUS */
2.1 frystyk 99: { "{\\it ", "}" }, /* HTML_I */
100: { "", "" }, /* HTML_IMAGE */
2.8 frystyk 101: { "_FIGUR_", "" }, /* HTML_IMG */
2.1 frystyk 102: { "", "" }, /* HTML_INPUT */
103: { "", "" }, /* HTML_ISINDEX */
104: { "{\\tt ", "}" }, /* HTML_KBD */
105: { "", "" }, /* HTML_L */
106: { "\n\\item ", "" }, /* HTML_LI */
107: { "", "" }, /* HTML_LINK */
108: { "", "" }, /* HTML_LISTING */
109: { "", "" }, /* HTML_LIT */
110: { "", "" }, /* HTML_MARGIN */
111: { "", "" }, /* HTML_MATH */
112: { "", "" }, /* HTML_MENU */
113: { "", "" }, /* HTML_NEXTID */
114: { "", "" }, /* HTML_NOTE */
115: { "\n\\begin{enumerate}\n","\n\\end{enumerate}\n"}, /* HTML_OL */
116: { "", "" }, /* HTML_OPTION */
117: { "", "" }, /* HTML_OVER */
118: { "\n\n", "" }, /* HTML_P */
119: { "", "" }, /* HTML_PERSON */
120: { "", "" }, /* HTML_PLAINTEXT */
121: { "\n\\begin{verbatim}"," \\end{verbatim}\n"}, /* HTML_PRE */
122: { "", "" }, /* HTML_Q */
123: { "\\begin{quote}", "\\end{quote}"}, /* HTML_QUOTE */
124: { "", "" }, /* HTML_RENDER */
125: { "", "" }, /* HTML_REMOVED */
126: { "", "" }, /* HTML_S */
127: { "", "" }, /* HTML_SAMP */
128: { "", "" }, /* HTML_SELECT */
129: { "{\\bf ", "}" }, /* HTML_STRONG */
130: { "", "" }, /* HTML_SUB */
131: { "", "" }, /* HTML_SUP */
132: { "", "" }, /* HTML_TAB */
133: { "", "" }, /* HTML_TABLE */
134: { "", "" }, /* HTML_TD */
135: { "", "" }, /* HTML_TEXTAREA */
136: { "", "" }, /* HTML_TH */
137: { "\n\\title{", "}\n\\author{}\n\\maketitle\n"}, /* HTML_TITLE */
138: { "", "" }, /* HTML_TR */
139: { "", "" }, /* HTML_TT */
140: { "", "" }, /* HTML_U */
141: { "\n\\begin{itemize}","\n\\end{itemize}\n"}, /* HTML_UL */
142: { "", "" }, /* HTML_VAR */
143: { "{\\sf ", "}" } /* HTML_XMP */
144: };
145:
2.8 frystyk 146: PRIVATE char *TeX_entities[HTML_ENTITIES] = {
2.1 frystyk 147: "\\AE ", /*"AElig", capital AE diphthong (ligature) */
2.2 frystyk 148: "\\\'{A}", /*"Aacute", capital A, acute accent */
149: "\\^{A}", /*"Acirc", capital A, circumflex accent */
150: "\\`{A}", /*"Agrave", capital A, grave accent */
151: "\\AA", /*"Aring", capital A, ring */
152: "\\~{A}", /*"Atilde", capital A, tilde */
153: "\\\"{A}", /*"Auml", capital A, dieresis or umlaut mark */
154: "\\c{C}", /*"Ccedil", capital C, cedilla */
155: "\\OE ", /*"ETH", capital Eth, Icelandic */
156: "\\\'{E}", /*"Eacute", capital E, acute accent */
157: "\\^{E}", /*"Ecirc", capital E, circumflex accent */
158: "\\`{E}", /*"Egrave", capital E, grave accent */
159: "\\\"{E}", /*"Euml", capital E, dieresis or umlaut mark */
160: "\\\'{I}", /*"Iacute", capital I, acute accent */
161: "\\^{I}", /*"Icirc", capital I, circumflex accent */
162: "\\`{I}", /*"Igrave", capital I, grave accent */
163: "\\\"{I}", /*"Iuml", capital I, dieresis or umlaut mark */
164: "\\~{N}", /*"Ntilde", capital N, tilde */
165: "\\\'{O}", /*"Oacute", capital O, acute accent */
166: "\\^{O}", /*"Ocirc", capital O, circumflex accent */
167: "\\`{O}", /*"Ograve", capital O, grave accent */
2.1 frystyk 168: "\\O ", /*"Oslash", capital O, slash */
2.2 frystyk 169: "\\~{O}", /*"Otilde", capital O, tilde */
170: "\\\"{O}", /*"Ouml", capital O, dieresis or umlaut mark */
171: " ", /*"THORN", capital THORN, Icelandic */
172: "\\\'{U}", /*"Uacute", capital U, acute accent */
173: "\\^{U}", /*"Ucirc", capital U, circumflex accent */
174: "\\`{U}", /*"Ugrave", capital U, grave accent */
175: "\\\"{U}", /*"Uuml", capital U, dieresis or umlaut mark */
176: "\\\'{Y}", /*"Yacute", capital Y, acute accent */
177: "\\\'{a}", /*"aacute", small a, acute accent */
178: "\\^{a}", /*"acirc", small a, circumflex accent */
2.1 frystyk 179: "\\ae ", /*"aelig", small ae diphthong (ligature) */
2.2 frystyk 180: "\\`{a}", /*"agrave", small a, grave accent */
2.1 frystyk 181: "&", /*"amp", ampersand */
182: "\\aa ", /*"aring", small a, ring */
2.2 frystyk 183: "\\~{a}", /*"atilde", small a, tilde */
184: "\\\"{a}", /*"auml", small a, dieresis or umlaut mark */
185: "\\c{c}", /*"ccedil", small c, cedilla */
186: "\\\'{e}", /*"eacute", small e, acute accent */
187: "\\^{c}", /*"ecirc", small e, circumflex accent */
188: "\\`{c}", /*"egrave", small e, grave accent */
189: "\\oe ", /*"eth", small eth, Icelandic */
190: "\\\"{e}", /*"euml", small e, dieresis or umlaut mark */
2.1 frystyk 191: ">", /*"gt", greater than */
2.2 frystyk 192: "\\\'{\\i}", /*"iacute", small i, acute accent */
193: "\\^{\\i}", /*"icirc", small i, circumflex accent */
194: "\\`{\\i}", /*"igrave", small i, grave accent */
195: "\\\"{\\i}", /*"iuml", small i, dieresis or umlaut mark */
2.1 frystyk 196: "<", /*"lt", less than */
2.2 frystyk 197: "\\~{n}", /*"ntilde", small n, tilde */
198: "\\\'{o}", /*"oacute", small o, acute accent */
199: "\\~{o}", /*"ocirc", small o, circumflex accent */
200: "\\`{o}", /*"ograve", small o, grave accent */
2.1 frystyk 201: "\\o ", /*"oslash", small o, slash */
2.2 frystyk 202: "\\~{o}", /*"otilde", small o, tilde */
203: "\\\"{o}", /*"ouml", small o, dieresis or umlaut mark */
2.8 frystyk 204: "\"", /*"quot", double quote sign - June 1994 */
2.1 frystyk 205: "\\ss ", /*"szlig", small sharp s, German (sz ligature)*/
2.2 frystyk 206: " ", /*"thorn", small thorn, Icelandic */
207: "\\\'{u}", /*"uacute", small u, acute accent */
208: "\\^{u}", /*"ucirc", small u, circumflex accent */
209: "\\`{u}", /*"ugrave", small u, grave accent */
210: "\\\"{u}", /*"uuml", small u, dieresis or umlaut mark */
211: "\\\'{y}", /*"yacute", small y, acute accent */
212: "\\\"{y}" /*"yuml", small y, dieresis or umlaut mark */
2.1 frystyk 213: };
214:
215:
216: /* Flush Buffer
217: ** ------------
218: */
2.19 frystyk 219: PRIVATE int HTTeXGen_flush (HTStructured * me)
2.1 frystyk 220: {
2.13 frystyk 221: int status;
222: if ((status =
223: (*me->targetClass.put_block)(me->target, me->buffer,
224: me->write_pointer-me->buffer)) != HT_OK)
225: return status;
2.1 frystyk 226: me->write_pointer = me->buffer;
227: me->line_break = me->buffer;
2.13 frystyk 228: return (*me->targetClass.flush)(me->target);
2.1 frystyk 229: }
230:
231:
232: /* Character handling
233: ** ------------------
234: **
235: */
2.19 frystyk 236: PRIVATE int HTTeXGen_put_character (HTStructured * me, char c)
2.1 frystyk 237: {
238: if (!me->startup) /* To skip MIME header */
2.13 frystyk 239: return HT_OK;
2.1 frystyk 240: if (c=='\n') {
241: if (me->markup || me->preformatted) { /* Put out as is and flush */
242: *me->write_pointer++ = c;
243: HTTeXGen_flush(me);
2.13 frystyk 244: return HT_OK;
2.1 frystyk 245: } else if (me->sensitive || *(me->write_pointer-1)==' ') {
2.13 frystyk 246: return HT_OK;
2.1 frystyk 247: } else
248: *me->write_pointer++ = ' '; /* Try to pretty print */
249: } else if (me->markup || me->preformatted) {
250: *me->write_pointer++ = c;
251: } else if (c==' ' || c=='\t') { /* Skip space and tabs */
252: if (*(me->write_pointer-1) != ' ')
253: *me->write_pointer++ = ' ';
254: else
2.13 frystyk 255: return HT_OK;
2.1 frystyk 256: } else {
257: if (c=='$' || c=='&' || c=='%' || c=='#' || /* Special chars */
258: c=='{' || c=='}' || c=='_') {
259: *me->write_pointer++ = '\\';
260: *me->write_pointer++ = c;
261: } else if (c=='\\') { /* Special names */
262: char *temp = "$\\backslash$";
263: strcpy(me->write_pointer, temp);
264: me->write_pointer += strlen(temp);
265: } else if (c=='^') {
266: char *temp = "$\\hat{ }$";
267: strcpy(me->write_pointer, temp);
268: me->write_pointer += strlen(temp);
269: } else if (c=='~') {
270: char *temp = "$\\tilde{ }$";
271: strcpy(me->write_pointer, temp);
272: me->write_pointer += strlen(temp);
273: } else if (c=='|' || c=='<' || c=='>') { /* Math mode */
274: *me->write_pointer++ = '$';
275: *me->write_pointer++ = c;
276: *me->write_pointer++ = '$';
277: } else
278: *me->write_pointer++ = c; /* Char seems normal */
279: }
280:
2.6 frystyk 281: if (c==' ') /* Find delimiter */
2.1 frystyk 282: me->line_break = me->write_pointer;
283: else if (strchr(WORD_DELIMITERS, c))
284: me->line_break = me->write_pointer-1;
285:
286: /* Flush buffer out when full */
287: if (me->write_pointer >= me->buffer+BUFFER_SIZE-3) {
2.9 frystyk 288: #ifdef OLD_CODE
2.1 frystyk 289: if (me->markup || me->preformatted) {
2.9 frystyk 290: #endif /* OLD_CODE */
2.8 frystyk 291: if (me->preformatted) {
2.1 frystyk 292: *me->write_pointer = '\n';
293: (*me->targetClass.put_block)(me->target,
294: me->buffer,
295: me->write_pointer-me->buffer+1);
296: me->write_pointer = me->buffer;
297: } else { /* Use break-point */
298: char line_break_char = *me->line_break;
299: char *saved = me->line_break;
300: *me->line_break = '\n';
301: (*me->targetClass.put_block)(me->target,
302: me->buffer,
303: me->line_break-me->buffer+1);
304: *me->line_break = line_break_char;
305: { /* move next line in */
306: char *p = saved;
307: char *q;
308: for(q=me->buffer; p<me->write_pointer; )
309: *q++ = *p++;
310: }
311: me->write_pointer = me->buffer + (me->write_pointer-saved);
312: }
313: me->line_break = me->buffer;
314: }
2.13 frystyk 315: return HT_OK;
2.1 frystyk 316: }
317:
318:
319:
320: /* String handling
321: ** ---------------
322: */
2.22 ! frystyk 323: PRIVATE int HTTeXGen_put_string (HTStructured * me, const char* s)
2.1 frystyk 324: {
2.13 frystyk 325: while (*s)
326: HTTeXGen_put_character(me, *s++);
327: return HT_OK;
2.1 frystyk 328: }
329:
330:
2.22 ! frystyk 331: PRIVATE int HTTeXGen_write (HTStructured * me, const char* b, int l)
2.1 frystyk 332: {
2.13 frystyk 333: while (l-- > 0)
334: HTTeXGen_put_character(me, *b++);
335: return HT_OK;
2.1 frystyk 336: }
337:
338:
339: /* Start Element
340: ** -------------
341: **
342: ** No attributes are put to the output Henrik 07/03-94
343: ** Does no assumptions of WHAT element is started...
344: */
2.19 frystyk 345: PRIVATE void HTTeXGen_start_element (HTStructured * me,
346: int element_number,
2.22 ! frystyk 347: const BOOL * present,
! 348: const char ** value)
2.1 frystyk 349: {
350: me->startup = YES; /* Now, let's get down to it */
2.6 frystyk 351: if (me->preformatted == YES) { /* Don't start markup in here */
2.16 frystyk 352: if (WWWTRACE)
2.21 eric 353: HTTrace("LaTeX....... No Markup in verbatim mode\n");
2.1 frystyk 354: return;
2.6 frystyk 355: }
2.1 frystyk 356: if (element_number == HTML_PRE)
357: me->preformatted = YES;
358: if (element_number == HTML_CITE || /* No \n here, please! */
359: element_number == HTML_DT ||
360: element_number == HTML_H1 ||
361: element_number == HTML_H2 ||
362: element_number == HTML_H3 ||
363: element_number == HTML_H4 ||
364: element_number == HTML_H5 ||
365: element_number == HTML_H6 ||
366: element_number == HTML_H7 ||
367: element_number == HTML_TITLE)
368: me->sensitive = YES;
369: else if (element_number == HTML_DD) /* Only way to turn <DT> off */
370: me->sensitive = NO;
371: me->markup = element_number == HTML_A ? NO : YES;
372: HTTeXGen_put_string(me, *TeX_names[element_number]);
373: me->markup = NO;
374: }
375:
376:
377: /* End Element
378: ** -----------
379: **
380: ** Ends an markup element Henrik 07/03-94
381: ** Does no assumptions of WHAT element is ended...
382: */
2.19 frystyk 383: PRIVATE void HTTeXGen_end_element (HTStructured * me, int element_number)
2.1 frystyk 384: {
2.6 frystyk 385: if (me->preformatted && element_number != HTML_PRE) {
2.16 frystyk 386: if (WWWTRACE)
2.21 eric 387: HTTrace("LaTeX....... No markup in verbatim mode\n");
2.6 frystyk 388: return;
389: }
2.1 frystyk 390: me->preformatted = NO;
391: me->markup = YES;
392: HTTeXGen_put_string(me, *(TeX_names[element_number]+1));
393: me->markup = NO;
394: if (element_number == HTML_CITE ||
395: element_number == HTML_DL ||
396: element_number == HTML_H1 ||
397: element_number == HTML_H2 ||
398: element_number == HTML_H3 ||
399: element_number == HTML_H4 ||
400: element_number == HTML_H5 ||
401: element_number == HTML_H6 ||
402: element_number == HTML_H7 ||
403: element_number == HTML_TITLE)
404: me->sensitive = NO;
405: }
406:
407:
408: /* Expanding entities
409: ** ------------------
410: **
411: */
2.19 frystyk 412: PRIVATE void HTTeXGen_put_entity (HTStructured * me, int entity_number)
2.1 frystyk 413: {
414: BOOL mark = me->markup;
415: if (*TeX_entities[entity_number] != '&' && /* Theese are converted later */
416: *TeX_entities[entity_number] != '<' &&
417: *TeX_entities[entity_number] != '>')
418: me->markup = YES;
419: HTTeXGen_put_string(me, TeX_entities[entity_number]);
420: me->markup = mark;
421: }
422:
423:
424:
425: /* Free an HTML object
426: ** -------------------
427: **
428: */
2.19 frystyk 429: PRIVATE int HTTeXGen_free (HTStructured * me)
2.1 frystyk 430: {
431: HTTeXGen_flush(me);
432: (*me->targetClass.put_string)(me->target, "\n\\end{document}\n");
433: HTTeXGen_flush(me);
2.7 duns 434: (*me->targetClass._free)(me->target); /* ripple through */
2.20 frystyk 435: HT_FREE(me);
2.13 frystyk 436: return HT_OK;
2.1 frystyk 437: }
438:
439:
2.19 frystyk 440: PRIVATE int HTTeXGen_abort (HTStructured * me, HTList * e)
2.1 frystyk 441: {
442: HTTeXGen_free(me);
2.13 frystyk 443: return HT_ERROR;
2.1 frystyk 444: }
445:
446:
447: /* Structured Object Class
448: ** -----------------------
449: */
2.22 ! frystyk 450: PRIVATE const HTStructuredClass HTTeXGeneration = /* As opposed to print etc */
2.1 frystyk 451: {
452: "HTMLToTeX",
2.13 frystyk 453: HTTeXGen_flush,
2.1 frystyk 454: HTTeXGen_free,
455: HTTeXGen_abort,
456: HTTeXGen_put_character, HTTeXGen_put_string, HTTeXGen_write,
457: HTTeXGen_start_element, HTTeXGen_end_element,
458: HTTeXGen_put_entity
459: };
460:
461:
462: /* HTConverter from HTML to TeX Stream
463: ** ------------------------------------------
464: **
465: */
2.19 frystyk 466: PUBLIC HTStream* HTMLToTeX (HTRequest * request,
467: void * param,
468: HTFormat input_format,
469: HTFormat output_format,
470: HTStream * output_stream)
2.1 frystyk 471: {
2.20 frystyk 472: HTStructured* me;
473: if ((me = (HTStructured *) HT_CALLOC(1, sizeof(*me))) == NULL)
474: HT_OUTOFMEM("HTMLToTeX");
2.1 frystyk 475:
476: me->isa = (HTStructuredClass*) &HTTeXGeneration;
477: me->dtd = &HTMLP_dtd;
478: me->target = output_stream;
479: me->targetClass = *me->target->isa;/* Copy pointers to routines for speed*/
480: me->write_pointer = me->buffer;
481: me->line_break = me->buffer;
482: (*me->targetClass.put_string)(me->target,
483: "\\documentstyle[11pt]{report}\n\\begin{document}\n");
484: return SGML_new(&HTMLP_dtd, me);
485: }
Webmaster