Annotation of libwww/Library/src/HTTeXGen.c, revision 2.13
2.10 frystyk 1: /* HTTeXGen.c
2: ** HTML -> LaTeX CONVERTER
3: **
4: ** (c) COPYRIGHT CERN 1994.
5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 frystyk 6: **
7: ** This version of the HTML object sends LaTeX to the output stream.
8: ** No attributes are considered in the translation!
9: ** The module uses simple 1:1 table-conversions, but this COULD be
10: ** expanded to a stack-machine. This would then be in start_element and
11: ** end_element...
12: ** Henrik 07/03-94
2.7 duns 13: **
14: ** HISTORY:
15: ** 8 Jul 94 FM Insulate free() from _free structure element.
16: **
2.1 frystyk 17: */
18:
19: #define BUFFER_SIZE 80 /* Line buffer attempts to make neat breaks */
2.8 frystyk 20: #define WORD_DELIMITERS ",;:[]()"
2.1 frystyk 21:
2.12 frystyk 22: /* Library include files */
23: #include "tcp.h"
24: #include "HTUtils.h"
2.1 frystyk 25: #include "HTTeXGen.h"
26: #include "HTMLPDTD.h"
27: #include "HTStream.h"
28: #include "SGML.h"
29: #include "HTFormat.h"
30:
31: /* HTML Object
32: ** -----------
33: */
34:
35: struct _HTStream {
36: CONST HTStreamClass * isa;
37: HTStream * target;
38: HTStreamClass targetClass; /* COPY for speed */
39: };
40:
41: struct _HTStructured {
42: CONST HTStructuredClass * isa;
43: HTStream * target;
44: HTStreamClass targetClass; /* COPY for speed */
45: CONST SGML_dtd * dtd;
46:
2.5 frystyk 47: char buffer[2*BUFFER_SIZE]; /* See note */
2.1 frystyk 48: char * write_pointer;
49: char * line_break;
50: BOOL sensitive; /* Can we put \n */
51: BOOL preformatted; /* Is it verbatim? */
52: BOOL markup; /* If doing LaTeX markup */
53: BOOL startup; /* To skip MIME header */
54: };
2.5 frystyk 55:
56: /* The buffer has to be bigger than 80 as latex markup might make the line
57: longer before we get to flush it. */
2.1 frystyk 58:
2.4 frystyk 59: PRIVATE char *TeX_names[HTMLP_ELEMENTS][2] = {
2.1 frystyk 60: { "", "" }, /* HTML_A */
61: { "", "" }, /* HTML_ABBREV */
62: { "\n\\begin{abstract}\n","\n\\end{abstract}\n"}, /* HTML_ABSTRACT */
63: { "", "" }, /* HTML_ACRONYM */
64: { "", "" }, /* HTML_ADDED */
65: { "{\\it ", "}" }, /* HTML_ADDRESS */
66: { "", "" }, /* HTML_ARG */
67: { "{\\bf ", "}" }, /* HTML_B */
68: { "", "" }, /* HTML_BASE */
69: { "{\\sf ", "}" }, /* HTML_BLOCKQUOTE */
70: { "", "" }, /* HTML_BODY */
71: { "", "" }, /* HTML_BOX */
72: { "", "" }, /* HTML_BR */
73: { "", "" }, /* HTML_BYLINE */
74: { "", "" }, /* HTML_CAPTION */
75: { "", "" }, /* HTML_CHANGED */
76: { "\\cite{", "}" }, /* HTML_CITE */
77: { "", "" }, /* HTML_CMD */
78: { "{\\tt ", "}" }, /* HTML_CODE */
79: { "\n\\typeout{", "}\n" }, /* HTML_COMMENT */
80: { "]", "" }, /* HTML_DD */
81: { "", "" }, /* HTML_DFN */
82: { "", "" }, /* HTML_DIR */
83: { "\n\\begin{description}","\n\\end{description}\n"}, /* HTML_DL */
84: { "\n\\item[", "" }, /* HTML_DT */
85: { "{\\em ", "}" }, /* HTML_EM */
86: { "", "" }, /* HTML_FIG */
87: { "\n\\footnote{", "}\n" }, /* HTML_FOOTNOTE */
88: { "", "" }, /* HTML_FORM */
89: { "\n\\chapter{", "}\n" }, /* HTML_H1 */
90: { "\n\\section{", "}\n" }, /* HTML_H2 */
91: { "\n\\subsection{","}\n" }, /* HTML_H3 */
92: { "\n\\subsubsection{","}\n" }, /* HTML_H4 */
93: { "\n\\paragraph{", "}\n" }, /* HTML_H5 */
94: { "\n\\subparagraph{","}\n" }, /* HTML_H6 */
95: { "", "\n" }, /* HTML_H7 */
96: { "", "" }, /* HTML_HEAD */
97: { "", "" }, /* HTML_HR */
98: { "", "" }, /* HTML_HTML */
2.3 duns 99: { "", "" }, /* HTML_HTMLPLUS */
2.1 frystyk 100: { "{\\it ", "}" }, /* HTML_I */
101: { "", "" }, /* HTML_IMAGE */
2.8 frystyk 102: { "_FIGUR_", "" }, /* HTML_IMG */
2.1 frystyk 103: { "", "" }, /* HTML_INPUT */
104: { "", "" }, /* HTML_ISINDEX */
105: { "{\\tt ", "}" }, /* HTML_KBD */
106: { "", "" }, /* HTML_L */
107: { "\n\\item ", "" }, /* HTML_LI */
108: { "", "" }, /* HTML_LINK */
109: { "", "" }, /* HTML_LISTING */
110: { "", "" }, /* HTML_LIT */
111: { "", "" }, /* HTML_MARGIN */
112: { "", "" }, /* HTML_MATH */
113: { "", "" }, /* HTML_MENU */
114: { "", "" }, /* HTML_NEXTID */
115: { "", "" }, /* HTML_NOTE */
116: { "\n\\begin{enumerate}\n","\n\\end{enumerate}\n"}, /* HTML_OL */
117: { "", "" }, /* HTML_OPTION */
118: { "", "" }, /* HTML_OVER */
119: { "\n\n", "" }, /* HTML_P */
120: { "", "" }, /* HTML_PERSON */
121: { "", "" }, /* HTML_PLAINTEXT */
122: { "\n\\begin{verbatim}"," \\end{verbatim}\n"}, /* HTML_PRE */
123: { "", "" }, /* HTML_Q */
124: { "\\begin{quote}", "\\end{quote}"}, /* HTML_QUOTE */
125: { "", "" }, /* HTML_RENDER */
126: { "", "" }, /* HTML_REMOVED */
127: { "", "" }, /* HTML_S */
128: { "", "" }, /* HTML_SAMP */
129: { "", "" }, /* HTML_SELECT */
130: { "{\\bf ", "}" }, /* HTML_STRONG */
131: { "", "" }, /* HTML_SUB */
132: { "", "" }, /* HTML_SUP */
133: { "", "" }, /* HTML_TAB */
134: { "", "" }, /* HTML_TABLE */
135: { "", "" }, /* HTML_TD */
136: { "", "" }, /* HTML_TEXTAREA */
137: { "", "" }, /* HTML_TH */
138: { "\n\\title{", "}\n\\author{}\n\\maketitle\n"}, /* HTML_TITLE */
139: { "", "" }, /* HTML_TR */
140: { "", "" }, /* HTML_TT */
141: { "", "" }, /* HTML_U */
142: { "\n\\begin{itemize}","\n\\end{itemize}\n"}, /* HTML_UL */
143: { "", "" }, /* HTML_VAR */
144: { "{\\sf ", "}" } /* HTML_XMP */
145: };
146:
2.8 frystyk 147: PRIVATE char *TeX_entities[HTML_ENTITIES] = {
2.1 frystyk 148: "\\AE ", /*"AElig", capital AE diphthong (ligature) */
2.2 frystyk 149: "\\\'{A}", /*"Aacute", capital A, acute accent */
150: "\\^{A}", /*"Acirc", capital A, circumflex accent */
151: "\\`{A}", /*"Agrave", capital A, grave accent */
152: "\\AA", /*"Aring", capital A, ring */
153: "\\~{A}", /*"Atilde", capital A, tilde */
154: "\\\"{A}", /*"Auml", capital A, dieresis or umlaut mark */
155: "\\c{C}", /*"Ccedil", capital C, cedilla */
156: "\\OE ", /*"ETH", capital Eth, Icelandic */
157: "\\\'{E}", /*"Eacute", capital E, acute accent */
158: "\\^{E}", /*"Ecirc", capital E, circumflex accent */
159: "\\`{E}", /*"Egrave", capital E, grave accent */
160: "\\\"{E}", /*"Euml", capital E, dieresis or umlaut mark */
161: "\\\'{I}", /*"Iacute", capital I, acute accent */
162: "\\^{I}", /*"Icirc", capital I, circumflex accent */
163: "\\`{I}", /*"Igrave", capital I, grave accent */
164: "\\\"{I}", /*"Iuml", capital I, dieresis or umlaut mark */
165: "\\~{N}", /*"Ntilde", capital N, tilde */
166: "\\\'{O}", /*"Oacute", capital O, acute accent */
167: "\\^{O}", /*"Ocirc", capital O, circumflex accent */
168: "\\`{O}", /*"Ograve", capital O, grave accent */
2.1 frystyk 169: "\\O ", /*"Oslash", capital O, slash */
2.2 frystyk 170: "\\~{O}", /*"Otilde", capital O, tilde */
171: "\\\"{O}", /*"Ouml", capital O, dieresis or umlaut mark */
172: " ", /*"THORN", capital THORN, Icelandic */
173: "\\\'{U}", /*"Uacute", capital U, acute accent */
174: "\\^{U}", /*"Ucirc", capital U, circumflex accent */
175: "\\`{U}", /*"Ugrave", capital U, grave accent */
176: "\\\"{U}", /*"Uuml", capital U, dieresis or umlaut mark */
177: "\\\'{Y}", /*"Yacute", capital Y, acute accent */
178: "\\\'{a}", /*"aacute", small a, acute accent */
179: "\\^{a}", /*"acirc", small a, circumflex accent */
2.1 frystyk 180: "\\ae ", /*"aelig", small ae diphthong (ligature) */
2.2 frystyk 181: "\\`{a}", /*"agrave", small a, grave accent */
2.1 frystyk 182: "&", /*"amp", ampersand */
183: "\\aa ", /*"aring", small a, ring */
2.2 frystyk 184: "\\~{a}", /*"atilde", small a, tilde */
185: "\\\"{a}", /*"auml", small a, dieresis or umlaut mark */
186: "\\c{c}", /*"ccedil", small c, cedilla */
187: "\\\'{e}", /*"eacute", small e, acute accent */
188: "\\^{c}", /*"ecirc", small e, circumflex accent */
189: "\\`{c}", /*"egrave", small e, grave accent */
190: "\\oe ", /*"eth", small eth, Icelandic */
191: "\\\"{e}", /*"euml", small e, dieresis or umlaut mark */
2.1 frystyk 192: ">", /*"gt", greater than */
2.2 frystyk 193: "\\\'{\\i}", /*"iacute", small i, acute accent */
194: "\\^{\\i}", /*"icirc", small i, circumflex accent */
195: "\\`{\\i}", /*"igrave", small i, grave accent */
196: "\\\"{\\i}", /*"iuml", small i, dieresis or umlaut mark */
2.1 frystyk 197: "<", /*"lt", less than */
2.2 frystyk 198: "\\~{n}", /*"ntilde", small n, tilde */
199: "\\\'{o}", /*"oacute", small o, acute accent */
200: "\\~{o}", /*"ocirc", small o, circumflex accent */
201: "\\`{o}", /*"ograve", small o, grave accent */
2.1 frystyk 202: "\\o ", /*"oslash", small o, slash */
2.2 frystyk 203: "\\~{o}", /*"otilde", small o, tilde */
204: "\\\"{o}", /*"ouml", small o, dieresis or umlaut mark */
2.8 frystyk 205: "\"", /*"quot", double quote sign - June 1994 */
2.1 frystyk 206: "\\ss ", /*"szlig", small sharp s, German (sz ligature)*/
2.2 frystyk 207: " ", /*"thorn", small thorn, Icelandic */
208: "\\\'{u}", /*"uacute", small u, acute accent */
209: "\\^{u}", /*"ucirc", small u, circumflex accent */
210: "\\`{u}", /*"ugrave", small u, grave accent */
211: "\\\"{u}", /*"uuml", small u, dieresis or umlaut mark */
212: "\\\'{y}", /*"yacute", small y, acute accent */
213: "\\\"{y}" /*"yuml", small y, dieresis or umlaut mark */
2.1 frystyk 214: };
215:
216:
217: /* Flush Buffer
218: ** ------------
219: */
2.13 ! frystyk 220: PRIVATE int HTTeXGen_flush ARGS1(HTStructured *, me)
2.1 frystyk 221: {
2.13 ! frystyk 222: int status;
! 223: if ((status =
! 224: (*me->targetClass.put_block)(me->target, me->buffer,
! 225: me->write_pointer-me->buffer)) != HT_OK)
! 226: return status;
2.1 frystyk 227: me->write_pointer = me->buffer;
228: me->line_break = me->buffer;
2.13 ! frystyk 229: return (*me->targetClass.flush)(me->target);
2.1 frystyk 230: }
231:
232:
233: /* Character handling
234: ** ------------------
235: **
236: */
2.13 ! frystyk 237: PRIVATE int HTTeXGen_put_character ARGS2(HTStructured *, me, char, c)
2.1 frystyk 238: {
239: if (!me->startup) /* To skip MIME header */
2.13 ! frystyk 240: return HT_OK;
2.1 frystyk 241: if (c=='\n') {
242: if (me->markup || me->preformatted) { /* Put out as is and flush */
243: *me->write_pointer++ = c;
244: HTTeXGen_flush(me);
2.13 ! frystyk 245: return HT_OK;
2.1 frystyk 246: } else if (me->sensitive || *(me->write_pointer-1)==' ') {
2.13 ! frystyk 247: return HT_OK;
2.1 frystyk 248: } else
249: *me->write_pointer++ = ' '; /* Try to pretty print */
250: } else if (me->markup || me->preformatted) {
251: *me->write_pointer++ = c;
252: } else if (c==' ' || c=='\t') { /* Skip space and tabs */
253: if (*(me->write_pointer-1) != ' ')
254: *me->write_pointer++ = ' ';
255: else
2.13 ! frystyk 256: return HT_OK;
2.1 frystyk 257: } else {
258: if (c=='$' || c=='&' || c=='%' || c=='#' || /* Special chars */
259: c=='{' || c=='}' || c=='_') {
260: *me->write_pointer++ = '\\';
261: *me->write_pointer++ = c;
262: } else if (c=='\\') { /* Special names */
263: char *temp = "$\\backslash$";
264: strcpy(me->write_pointer, temp);
265: me->write_pointer += strlen(temp);
266: } else if (c=='^') {
267: char *temp = "$\\hat{ }$";
268: strcpy(me->write_pointer, temp);
269: me->write_pointer += strlen(temp);
270: } else if (c=='~') {
271: char *temp = "$\\tilde{ }$";
272: strcpy(me->write_pointer, temp);
273: me->write_pointer += strlen(temp);
274: } else if (c=='|' || c=='<' || c=='>') { /* Math mode */
275: *me->write_pointer++ = '$';
276: *me->write_pointer++ = c;
277: *me->write_pointer++ = '$';
278: } else
279: *me->write_pointer++ = c; /* Char seems normal */
280: }
281:
2.6 frystyk 282: if (c==' ') /* Find delimiter */
2.1 frystyk 283: me->line_break = me->write_pointer;
284: else if (strchr(WORD_DELIMITERS, c))
285: me->line_break = me->write_pointer-1;
286:
287: /* Flush buffer out when full */
288: if (me->write_pointer >= me->buffer+BUFFER_SIZE-3) {
2.9 frystyk 289: #ifdef OLD_CODE
2.1 frystyk 290: if (me->markup || me->preformatted) {
2.9 frystyk 291: #endif /* OLD_CODE */
2.8 frystyk 292: if (me->preformatted) {
2.1 frystyk 293: *me->write_pointer = '\n';
294: (*me->targetClass.put_block)(me->target,
295: me->buffer,
296: me->write_pointer-me->buffer+1);
297: me->write_pointer = me->buffer;
298: } else { /* Use break-point */
299: char line_break_char = *me->line_break;
300: char *saved = me->line_break;
301: *me->line_break = '\n';
302: (*me->targetClass.put_block)(me->target,
303: me->buffer,
304: me->line_break-me->buffer+1);
305: *me->line_break = line_break_char;
306: { /* move next line in */
307: char *p = saved;
308: char *q;
309: for(q=me->buffer; p<me->write_pointer; )
310: *q++ = *p++;
311: }
312: me->write_pointer = me->buffer + (me->write_pointer-saved);
313: }
314: me->line_break = me->buffer;
315: }
2.13 ! frystyk 316: return HT_OK;
2.1 frystyk 317: }
318:
319:
320:
321: /* String handling
322: ** ---------------
323: */
2.13 ! frystyk 324: PRIVATE int HTTeXGen_put_string ARGS2(HTStructured *, me, CONST char*, s)
2.1 frystyk 325: {
2.13 ! frystyk 326: while (*s)
! 327: HTTeXGen_put_character(me, *s++);
! 328: return HT_OK;
2.1 frystyk 329: }
330:
331:
2.13 ! frystyk 332: PRIVATE int HTTeXGen_write ARGS3(HTStructured *, me, CONST char*, b, int, l)
2.1 frystyk 333: {
2.13 ! frystyk 334: while (l-- > 0)
! 335: HTTeXGen_put_character(me, *b++);
! 336: return HT_OK;
2.1 frystyk 337: }
338:
339:
340: /* Start Element
341: ** -------------
342: **
343: ** No attributes are put to the output Henrik 07/03-94
344: ** Does no assumptions of WHAT element is started...
345: */
346: PRIVATE void HTTeXGen_start_element ARGS4(
347: HTStructured *, me,
348: int, element_number,
349: CONST BOOL*, present,
350: CONST char **, value)
351: {
352: me->startup = YES; /* Now, let's get down to it */
2.6 frystyk 353: if (me->preformatted == YES) { /* Don't start markup in here */
354: if (TRACE)
2.12 frystyk 355: fprintf(TDEST, "LaTeX....... No Markup in verbatim mode\n");
2.1 frystyk 356: return;
2.6 frystyk 357: }
2.1 frystyk 358: if (element_number == HTML_PRE)
359: me->preformatted = YES;
360: if (element_number == HTML_CITE || /* No \n here, please! */
361: element_number == HTML_DT ||
362: element_number == HTML_H1 ||
363: element_number == HTML_H2 ||
364: element_number == HTML_H3 ||
365: element_number == HTML_H4 ||
366: element_number == HTML_H5 ||
367: element_number == HTML_H6 ||
368: element_number == HTML_H7 ||
369: element_number == HTML_TITLE)
370: me->sensitive = YES;
371: else if (element_number == HTML_DD) /* Only way to turn <DT> off */
372: me->sensitive = NO;
373: me->markup = element_number == HTML_A ? NO : YES;
374: HTTeXGen_put_string(me, *TeX_names[element_number]);
375: me->markup = NO;
376: }
377:
378:
379: /* End Element
380: ** -----------
381: **
382: ** Ends an markup element Henrik 07/03-94
383: ** Does no assumptions of WHAT element is ended...
384: */
385: PRIVATE void HTTeXGen_end_element ARGS2(HTStructured *, me,
2.9 frystyk 386: int , element_number)
2.1 frystyk 387: {
2.6 frystyk 388: if (me->preformatted && element_number != HTML_PRE) {
389: if (TRACE)
2.12 frystyk 390: fprintf(TDEST, "LaTeX....... No markup in verbatim mode\n");
2.6 frystyk 391: return;
392: }
2.1 frystyk 393: me->preformatted = NO;
394: me->markup = YES;
395: HTTeXGen_put_string(me, *(TeX_names[element_number]+1));
396: me->markup = NO;
397: if (element_number == HTML_CITE ||
398: element_number == HTML_DL ||
399: element_number == HTML_H1 ||
400: element_number == HTML_H2 ||
401: element_number == HTML_H3 ||
402: element_number == HTML_H4 ||
403: element_number == HTML_H5 ||
404: element_number == HTML_H6 ||
405: element_number == HTML_H7 ||
406: element_number == HTML_TITLE)
407: me->sensitive = NO;
408: }
409:
410:
411: /* Expanding entities
412: ** ------------------
413: **
414: */
415: PRIVATE void HTTeXGen_put_entity ARGS2(HTStructured *, me, int, entity_number)
416: {
417: BOOL mark = me->markup;
418: if (*TeX_entities[entity_number] != '&' && /* Theese are converted later */
419: *TeX_entities[entity_number] != '<' &&
420: *TeX_entities[entity_number] != '>')
421: me->markup = YES;
422: HTTeXGen_put_string(me, TeX_entities[entity_number]);
423: me->markup = mark;
424: }
425:
426:
427:
428: /* Free an HTML object
429: ** -------------------
430: **
431: */
2.9 frystyk 432: PRIVATE int HTTeXGen_free ARGS1(HTStructured *, me)
2.1 frystyk 433: {
434: HTTeXGen_flush(me);
435: (*me->targetClass.put_string)(me->target, "\n\\end{document}\n");
436: HTTeXGen_flush(me);
2.7 duns 437: (*me->targetClass._free)(me->target); /* ripple through */
2.1 frystyk 438: free(me);
2.13 ! frystyk 439: return HT_OK;
2.1 frystyk 440: }
441:
442:
2.9 frystyk 443: PRIVATE int HTTeXGen_abort ARGS2(HTStructured *, me, HTError, e)
2.1 frystyk 444: {
445: HTTeXGen_free(me);
2.13 ! frystyk 446: return HT_ERROR;
2.1 frystyk 447: }
448:
449:
450: /* Structured Object Class
451: ** -----------------------
452: */
453: PRIVATE CONST HTStructuredClass HTTeXGeneration = /* As opposed to print etc */
454: {
455: "HTMLToTeX",
2.13 ! frystyk 456: HTTeXGen_flush,
2.1 frystyk 457: HTTeXGen_free,
458: HTTeXGen_abort,
459: HTTeXGen_put_character, HTTeXGen_put_string, HTTeXGen_write,
460: HTTeXGen_start_element, HTTeXGen_end_element,
461: HTTeXGen_put_entity
462: };
463:
464:
465: /* HTConverter from HTML to TeX Stream
466: ** ------------------------------------------
467: **
468: */
469: PUBLIC HTStream* HTMLToTeX ARGS5(
470: HTRequest *, request,
471: void *, param,
472: HTFormat, input_format,
473: HTFormat, output_format,
474: HTStream *, output_stream)
475: {
476: HTStructured* me = (HTStructured*) calloc(1, sizeof(*me));
477: if (me == NULL) outofmem(__FILE__, "HTMLToTeX");
478:
479: me->isa = (HTStructuredClass*) &HTTeXGeneration;
480: me->dtd = &HTMLP_dtd;
481: me->target = output_stream;
482: me->targetClass = *me->target->isa;/* Copy pointers to routines for speed*/
483: me->write_pointer = me->buffer;
484: me->line_break = me->buffer;
485: (*me->targetClass.put_string)(me->target,
486: "\\documentstyle[11pt]{report}\n\\begin{document}\n");
487: return SGML_new(&HTMLP_dtd, me);
488: }
489:
490:
491: /* END OF FILE HTTeXGen.c */
492:
493:
494:
495:
496:
497:
498:
499:
Webmaster