Annotation of libwww/Library/src/HTTeXGen.c, revision 2.7
2.1 frystyk 1: /* Simple LaTeX Generator that converts in a 1:1 manner from HTML to LaTeX
2: ** =======================================================================
3: **
4: ** This version of the HTML object sends LaTeX to the output stream.
5: ** No attributes are considered in the translation!
6: ** The module uses simple 1:1 table-conversions, but this COULD be
7: ** expanded to a stack-machine. This would then be in start_element and
8: ** end_element...
9: ** Henrik 07/03-94
2.7 ! duns 10: **
! 11: ** HISTORY:
! 12: ** 8 Jul 94 FM Insulate free() from _free structure element.
! 13: **
2.1 frystyk 14: */
15:
16: #define BUFFER_SIZE 80 /* Line buffer attempts to make neat breaks */
17: #define WORD_DELIMITERS ",/;:\"[]()"
18:
19: /* Implements: */
20: #include "HTTeXGen.h"
21: #include <stdio.h>
22: #include "HTMLPDTD.h"
23: #include "HTStream.h"
24: #include "SGML.h"
25: #include "HTFormat.h"
26: #include "tcp.h"
27:
28:
29: /* HTML Object
30: ** -----------
31: */
32:
33: struct _HTStream {
34: CONST HTStreamClass * isa;
35: HTStream * target;
36: HTStreamClass targetClass; /* COPY for speed */
37: };
38:
39: struct _HTStructured {
40: CONST HTStructuredClass * isa;
41: HTStream * target;
42: HTStreamClass targetClass; /* COPY for speed */
43: CONST SGML_dtd * dtd;
44:
2.5 frystyk 45: char buffer[2*BUFFER_SIZE]; /* See note */
2.1 frystyk 46: char * write_pointer;
47: char * line_break;
48: BOOL sensitive; /* Can we put \n */
49: BOOL preformatted; /* Is it verbatim? */
50: BOOL markup; /* If doing LaTeX markup */
51: BOOL startup; /* To skip MIME header */
52: };
2.5 frystyk 53:
54: /* The buffer has to be bigger than 80 as latex markup might make the line
55: longer before we get to flush it. */
2.1 frystyk 56:
2.4 frystyk 57: PRIVATE char *TeX_names[HTMLP_ELEMENTS][2] = {
2.1 frystyk 58: { "", "" }, /* HTML_A */
59: { "", "" }, /* HTML_ABBREV */
60: { "\n\\begin{abstract}\n","\n\\end{abstract}\n"}, /* HTML_ABSTRACT */
61: { "", "" }, /* HTML_ACRONYM */
62: { "", "" }, /* HTML_ADDED */
63: { "{\\it ", "}" }, /* HTML_ADDRESS */
64: { "", "" }, /* HTML_ARG */
65: { "{\\bf ", "}" }, /* HTML_B */
66: { "", "" }, /* HTML_BASE */
67: { "{\\sf ", "}" }, /* HTML_BLOCKQUOTE */
68: { "", "" }, /* HTML_BODY */
69: { "", "" }, /* HTML_BOX */
70: { "", "" }, /* HTML_BR */
71: { "", "" }, /* HTML_BYLINE */
72: { "", "" }, /* HTML_CAPTION */
73: { "", "" }, /* HTML_CHANGED */
74: { "\\cite{", "}" }, /* HTML_CITE */
75: { "", "" }, /* HTML_CMD */
76: { "{\\tt ", "}" }, /* HTML_CODE */
77: { "\n\\typeout{", "}\n" }, /* HTML_COMMENT */
78: { "]", "" }, /* HTML_DD */
79: { "", "" }, /* HTML_DFN */
80: { "", "" }, /* HTML_DIR */
81: { "\n\\begin{description}","\n\\end{description}\n"}, /* HTML_DL */
82: { "\n\\item[", "" }, /* HTML_DT */
83: { "{\\em ", "}" }, /* HTML_EM */
84: { "", "" }, /* HTML_FIG */
85: { "\n\\footnote{", "}\n" }, /* HTML_FOOTNOTE */
86: { "", "" }, /* HTML_FORM */
87: { "\n\\chapter{", "}\n" }, /* HTML_H1 */
88: { "\n\\section{", "}\n" }, /* HTML_H2 */
89: { "\n\\subsection{","}\n" }, /* HTML_H3 */
90: { "\n\\subsubsection{","}\n" }, /* HTML_H4 */
91: { "\n\\paragraph{", "}\n" }, /* HTML_H5 */
92: { "\n\\subparagraph{","}\n" }, /* HTML_H6 */
93: { "", "\n" }, /* HTML_H7 */
94: { "", "" }, /* HTML_HEAD */
95: { "", "" }, /* HTML_HR */
96: { "", "" }, /* HTML_HTML */
2.3 duns 97: { "", "" }, /* HTML_HTMLPLUS */
2.1 frystyk 98: { "{\\it ", "}" }, /* HTML_I */
99: { "", "" }, /* HTML_IMAGE */
100: { "", "" }, /* HTML_IMG */
101: { "", "" }, /* HTML_INPUT */
102: { "", "" }, /* HTML_ISINDEX */
103: { "{\\tt ", "}" }, /* HTML_KBD */
104: { "", "" }, /* HTML_L */
105: { "\n\\item ", "" }, /* HTML_LI */
106: { "", "" }, /* HTML_LINK */
107: { "", "" }, /* HTML_LISTING */
108: { "", "" }, /* HTML_LIT */
109: { "", "" }, /* HTML_MARGIN */
110: { "", "" }, /* HTML_MATH */
111: { "", "" }, /* HTML_MENU */
112: { "", "" }, /* HTML_NEXTID */
113: { "", "" }, /* HTML_NOTE */
114: { "\n\\begin{enumerate}\n","\n\\end{enumerate}\n"}, /* HTML_OL */
115: { "", "" }, /* HTML_OPTION */
116: { "", "" }, /* HTML_OVER */
117: { "\n\n", "" }, /* HTML_P */
118: { "", "" }, /* HTML_PERSON */
119: { "", "" }, /* HTML_PLAINTEXT */
120: { "\n\\begin{verbatim}"," \\end{verbatim}\n"}, /* HTML_PRE */
121: { "", "" }, /* HTML_Q */
122: { "\\begin{quote}", "\\end{quote}"}, /* HTML_QUOTE */
123: { "", "" }, /* HTML_RENDER */
124: { "", "" }, /* HTML_REMOVED */
125: { "", "" }, /* HTML_S */
126: { "", "" }, /* HTML_SAMP */
127: { "", "" }, /* HTML_SELECT */
128: { "{\\bf ", "}" }, /* HTML_STRONG */
129: { "", "" }, /* HTML_SUB */
130: { "", "" }, /* HTML_SUP */
131: { "", "" }, /* HTML_TAB */
132: { "", "" }, /* HTML_TABLE */
133: { "", "" }, /* HTML_TD */
134: { "", "" }, /* HTML_TEXTAREA */
135: { "", "" }, /* HTML_TH */
136: { "\n\\title{", "}\n\\author{}\n\\maketitle\n"}, /* HTML_TITLE */
137: { "", "" }, /* HTML_TR */
138: { "", "" }, /* HTML_TT */
139: { "", "" }, /* HTML_U */
140: { "\n\\begin{itemize}","\n\\end{itemize}\n"}, /* HTML_UL */
141: { "", "" }, /* HTML_VAR */
142: { "{\\sf ", "}" } /* HTML_XMP */
143: };
144:
145: PRIVATE char *TeX_entities[] = {
146: "\\AE ", /*"AElig", capital AE diphthong (ligature) */
2.2 frystyk 147: "\\\'{A}", /*"Aacute", capital A, acute accent */
148: "\\^{A}", /*"Acirc", capital A, circumflex accent */
149: "\\`{A}", /*"Agrave", capital A, grave accent */
150: "\\AA", /*"Aring", capital A, ring */
151: "\\~{A}", /*"Atilde", capital A, tilde */
152: "\\\"{A}", /*"Auml", capital A, dieresis or umlaut mark */
153: "\\c{C}", /*"Ccedil", capital C, cedilla */
154: "\\OE ", /*"ETH", capital Eth, Icelandic */
155: "\\\'{E}", /*"Eacute", capital E, acute accent */
156: "\\^{E}", /*"Ecirc", capital E, circumflex accent */
157: "\\`{E}", /*"Egrave", capital E, grave accent */
158: "\\\"{E}", /*"Euml", capital E, dieresis or umlaut mark */
159: "\\\'{I}", /*"Iacute", capital I, acute accent */
160: "\\^{I}", /*"Icirc", capital I, circumflex accent */
161: "\\`{I}", /*"Igrave", capital I, grave accent */
162: "\\\"{I}", /*"Iuml", capital I, dieresis or umlaut mark */
163: "\\~{N}", /*"Ntilde", capital N, tilde */
164: "\\\'{O}", /*"Oacute", capital O, acute accent */
165: "\\^{O}", /*"Ocirc", capital O, circumflex accent */
166: "\\`{O}", /*"Ograve", capital O, grave accent */
2.1 frystyk 167: "\\O ", /*"Oslash", capital O, slash */
2.2 frystyk 168: "\\~{O}", /*"Otilde", capital O, tilde */
169: "\\\"{O}", /*"Ouml", capital O, dieresis or umlaut mark */
170: " ", /*"THORN", capital THORN, Icelandic */
171: "\\\'{U}", /*"Uacute", capital U, acute accent */
172: "\\^{U}", /*"Ucirc", capital U, circumflex accent */
173: "\\`{U}", /*"Ugrave", capital U, grave accent */
174: "\\\"{U}", /*"Uuml", capital U, dieresis or umlaut mark */
175: "\\\'{Y}", /*"Yacute", capital Y, acute accent */
176: "\\\'{a}", /*"aacute", small a, acute accent */
177: "\\^{a}", /*"acirc", small a, circumflex accent */
2.1 frystyk 178: "\\ae ", /*"aelig", small ae diphthong (ligature) */
2.2 frystyk 179: "\\`{a}", /*"agrave", small a, grave accent */
2.1 frystyk 180: "&", /*"amp", ampersand */
181: "\\aa ", /*"aring", small a, ring */
2.2 frystyk 182: "\\~{a}", /*"atilde", small a, tilde */
183: "\\\"{a}", /*"auml", small a, dieresis or umlaut mark */
184: "\\c{c}", /*"ccedil", small c, cedilla */
185: "\\\'{e}", /*"eacute", small e, acute accent */
186: "\\^{c}", /*"ecirc", small e, circumflex accent */
187: "\\`{c}", /*"egrave", small e, grave accent */
188: "\\oe ", /*"eth", small eth, Icelandic */
189: "\\\"{e}", /*"euml", small e, dieresis or umlaut mark */
2.1 frystyk 190: ">", /*"gt", greater than */
2.2 frystyk 191: "\\\'{\\i}", /*"iacute", small i, acute accent */
192: "\\^{\\i}", /*"icirc", small i, circumflex accent */
193: "\\`{\\i}", /*"igrave", small i, grave accent */
194: "\\\"{\\i}", /*"iuml", small i, dieresis or umlaut mark */
2.1 frystyk 195: "<", /*"lt", less than */
2.2 frystyk 196: "\\~{n}", /*"ntilde", small n, tilde */
197: "\\\'{o}", /*"oacute", small o, acute accent */
198: "\\~{o}", /*"ocirc", small o, circumflex accent */
199: "\\`{o}", /*"ograve", small o, grave accent */
2.1 frystyk 200: "\\o ", /*"oslash", small o, slash */
2.2 frystyk 201: "\\~{o}", /*"otilde", small o, tilde */
202: "\\\"{o}", /*"ouml", small o, dieresis or umlaut mark */
2.1 frystyk 203: "\\ss ", /*"szlig", small sharp s, German (sz ligature)*/
2.2 frystyk 204: " ", /*"thorn", small thorn, Icelandic */
205: "\\\'{u}", /*"uacute", small u, acute accent */
206: "\\^{u}", /*"ucirc", small u, circumflex accent */
207: "\\`{u}", /*"ugrave", small u, grave accent */
208: "\\\"{u}", /*"uuml", small u, dieresis or umlaut mark */
209: "\\\'{y}", /*"yacute", small y, acute accent */
210: "\\\"{y}" /*"yuml", small y, dieresis or umlaut mark */
2.1 frystyk 211: };
212:
213:
214: /* Flush Buffer
215: ** ------------
216: */
217: PRIVATE void HTTeXGen_flush ARGS1(HTStructured *, me)
218: {
219: (*me->targetClass.put_block)(me->target,
220: me->buffer,
221: me->write_pointer - me->buffer);
222: me->write_pointer = me->buffer;
223: me->line_break = me->buffer;
224: }
225:
226:
227: /* Character handling
228: ** ------------------
229: **
230: */
231: PRIVATE void HTTeXGen_put_character ARGS2(HTStructured *, me, char, c)
232: {
233: if (!me->startup) /* To skip MIME header */
234: return;
235: if (c=='\n') {
236: if (me->markup || me->preformatted) { /* Put out as is and flush */
237: *me->write_pointer++ = c;
238: HTTeXGen_flush(me);
239: return;
240: } else if (me->sensitive || *(me->write_pointer-1)==' ') {
241: return;
242: } else
243: *me->write_pointer++ = ' '; /* Try to pretty print */
244: } else if (me->markup || me->preformatted) {
245: *me->write_pointer++ = c;
246: } else if (c==' ' || c=='\t') { /* Skip space and tabs */
247: if (*(me->write_pointer-1) != ' ')
248: *me->write_pointer++ = ' ';
249: else
250: return;
251: } else {
252: if (c=='$' || c=='&' || c=='%' || c=='#' || /* Special chars */
253: c=='{' || c=='}' || c=='_') {
254: *me->write_pointer++ = '\\';
255: *me->write_pointer++ = c;
256: } else if (c=='\\') { /* Special names */
257: char *temp = "$\\backslash$";
258: strcpy(me->write_pointer, temp);
259: me->write_pointer += strlen(temp);
260: } else if (c=='^') {
261: char *temp = "$\\hat{ }$";
262: strcpy(me->write_pointer, temp);
263: me->write_pointer += strlen(temp);
264: } else if (c=='~') {
265: char *temp = "$\\tilde{ }$";
266: strcpy(me->write_pointer, temp);
267: me->write_pointer += strlen(temp);
268: } else if (c=='|' || c=='<' || c=='>') { /* Math mode */
269: *me->write_pointer++ = '$';
270: *me->write_pointer++ = c;
271: *me->write_pointer++ = '$';
272: } else
273: *me->write_pointer++ = c; /* Char seems normal */
274: }
275:
2.6 frystyk 276: if (c==' ') /* Find delimiter */
2.1 frystyk 277: me->line_break = me->write_pointer;
278: else if (strchr(WORD_DELIMITERS, c))
279: me->line_break = me->write_pointer-1;
280:
281: /* Flush buffer out when full */
282: if (me->write_pointer >= me->buffer+BUFFER_SIZE-3) {
283: if (me->markup || me->preformatted) {
284: *me->write_pointer = '\n';
285: (*me->targetClass.put_block)(me->target,
286: me->buffer,
287: me->write_pointer-me->buffer+1);
288: me->write_pointer = me->buffer;
289: } else { /* Use break-point */
290: char line_break_char = *me->line_break;
291: char *saved = me->line_break;
292: *me->line_break = '\n';
293: (*me->targetClass.put_block)(me->target,
294: me->buffer,
295: me->line_break-me->buffer+1);
296: *me->line_break = line_break_char;
297: { /* move next line in */
298: char *p = saved;
299: char *q;
300: for(q=me->buffer; p<me->write_pointer; )
301: *q++ = *p++;
302: }
303: me->write_pointer = me->buffer + (me->write_pointer-saved);
304: }
305: me->line_break = me->buffer;
306: }
307: }
308:
309:
310:
311: /* String handling
312: ** ---------------
313: */
314: PRIVATE void HTTeXGen_put_string ARGS2(HTStructured *, me, CONST char*, s)
315: {
316: CONST char * p;
317: for (p=s; *p; p++)
318: HTTeXGen_put_character(me, *p);
319: }
320:
321:
322: PRIVATE void HTTeXGen_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
323: {
324: CONST char * p;
325: for(p=s; p<s+l; p++)
326: HTTeXGen_put_character(me, *p);
327: }
328:
329:
330: /* Start Element
331: ** -------------
332: **
333: ** No attributes are put to the output Henrik 07/03-94
334: ** Does no assumptions of WHAT element is started...
335: */
336: PRIVATE void HTTeXGen_start_element ARGS4(
337: HTStructured *, me,
338: int, element_number,
339: CONST BOOL*, present,
340: CONST char **, value)
341: {
342: me->startup = YES; /* Now, let's get down to it */
2.6 frystyk 343: if (me->preformatted == YES) { /* Don't start markup in here */
344: if (TRACE)
345: fprintf(stderr, "LaTeX....... No Markup in verbatim mode\n");
2.1 frystyk 346: return;
2.6 frystyk 347: }
2.1 frystyk 348: if (element_number == HTML_PRE)
349: me->preformatted = YES;
350: if (element_number == HTML_CITE || /* No \n here, please! */
351: element_number == HTML_DT ||
352: element_number == HTML_H1 ||
353: element_number == HTML_H2 ||
354: element_number == HTML_H3 ||
355: element_number == HTML_H4 ||
356: element_number == HTML_H5 ||
357: element_number == HTML_H6 ||
358: element_number == HTML_H7 ||
359: element_number == HTML_TITLE)
360: me->sensitive = YES;
361: else if (element_number == HTML_DD) /* Only way to turn <DT> off */
362: me->sensitive = NO;
363: me->markup = element_number == HTML_A ? NO : YES;
364: HTTeXGen_put_string(me, *TeX_names[element_number]);
365: me->markup = NO;
366: }
367:
368:
369: /* End Element
370: ** -----------
371: **
372: ** Ends an markup element Henrik 07/03-94
373: ** Does no assumptions of WHAT element is ended...
374: */
375: PRIVATE void HTTeXGen_end_element ARGS2(HTStructured *, me,
376: int , element_number)
377: {
2.6 frystyk 378: if (me->preformatted && element_number != HTML_PRE) {
379: if (TRACE)
380: fprintf(stderr, "LaTeX....... No markup in verbatim mode\n");
381: return;
382: }
2.1 frystyk 383: me->preformatted = NO;
384: me->markup = YES;
385: HTTeXGen_put_string(me, *(TeX_names[element_number]+1));
386: me->markup = NO;
387: if (element_number == HTML_CITE ||
388: element_number == HTML_DL ||
389: element_number == HTML_H1 ||
390: element_number == HTML_H2 ||
391: element_number == HTML_H3 ||
392: element_number == HTML_H4 ||
393: element_number == HTML_H5 ||
394: element_number == HTML_H6 ||
395: element_number == HTML_H7 ||
396: element_number == HTML_TITLE)
397: me->sensitive = NO;
398: }
399:
400:
401: /* Expanding entities
402: ** ------------------
403: **
404: */
405: PRIVATE void HTTeXGen_put_entity ARGS2(HTStructured *, me, int, entity_number)
406: {
407: BOOL mark = me->markup;
408: if (*TeX_entities[entity_number] != '&' && /* Theese are converted later */
409: *TeX_entities[entity_number] != '<' &&
410: *TeX_entities[entity_number] != '>')
411: me->markup = YES;
412: HTTeXGen_put_string(me, TeX_entities[entity_number]);
413: me->markup = mark;
414: }
415:
416:
417:
418: /* Free an HTML object
419: ** -------------------
420: **
421: */
422: PRIVATE void HTTeXGen_free ARGS1(HTStructured *, me)
423: {
424: HTTeXGen_flush(me);
425: (*me->targetClass.put_string)(me->target, "\n\\end{document}\n");
426: HTTeXGen_flush(me);
2.7 ! duns 427: (*me->targetClass._free)(me->target); /* ripple through */
2.1 frystyk 428: free(me);
429: }
430:
431:
432: PRIVATE void HTTeXGen_abort ARGS2(HTStructured *, me, HTError, e)
433: {
434: HTTeXGen_free(me);
435: }
436:
437:
438: /* Structured Object Class
439: ** -----------------------
440: */
441: PRIVATE CONST HTStructuredClass HTTeXGeneration = /* As opposed to print etc */
442: {
443: "HTMLToTeX",
444: HTTeXGen_free,
445: HTTeXGen_abort,
446: HTTeXGen_put_character, HTTeXGen_put_string, HTTeXGen_write,
447: HTTeXGen_start_element, HTTeXGen_end_element,
448: HTTeXGen_put_entity
449: };
450:
451:
452: /* HTConverter from HTML to TeX Stream
453: ** ------------------------------------------
454: **
455: */
456: PUBLIC HTStream* HTMLToTeX ARGS5(
457: HTRequest *, request,
458: void *, param,
459: HTFormat, input_format,
460: HTFormat, output_format,
461: HTStream *, output_stream)
462: {
463: HTStructured* me = (HTStructured*) calloc(1, sizeof(*me));
464: if (me == NULL) outofmem(__FILE__, "HTMLToTeX");
465:
466: me->isa = (HTStructuredClass*) &HTTeXGeneration;
467: me->dtd = &HTMLP_dtd;
468: me->target = output_stream;
469: me->targetClass = *me->target->isa;/* Copy pointers to routines for speed*/
470: me->write_pointer = me->buffer;
471: me->line_break = me->buffer;
472: (*me->targetClass.put_string)(me->target,
473: "\\documentstyle[11pt]{report}\n\\begin{document}\n");
474: return SGML_new(&HTMLP_dtd, me);
475: }
476:
477:
478: /* END OF FILE HTTeXGen.c */
479:
480:
481:
482:
483:
484:
485:
486:
Webmaster