Return to HTTeXGen.c CVS log | Up to [Public] / libwww / Library / src |
2.1 frystyk 1: /* Simple LaTeX Generator that converts in a 1:1 manner from HTML to LaTeX
2: ** =======================================================================
3: **
4: ** This version of the HTML object sends LaTeX to the output stream.
5: ** No attributes are considered in the translation!
6: ** The module uses simple 1:1 table-conversions, but this COULD be
7: ** expanded to a stack-machine. This would then be in start_element and
8: ** end_element...
9: ** Henrik 07/03-94
10: */
11:
12: #define BUFFER_SIZE 80 /* Line buffer attempts to make neat breaks */
13: #define WORD_DELIMITERS ",/;:\"[]()"
14:
15: /* Implements: */
16: #include "HTTeXGen.h"
17: #include <stdio.h>
18: #include "HTMLPDTD.h"
19: #include "HTStream.h"
20: #include "SGML.h"
21: #include "HTFormat.h"
22: #include "tcp.h"
23:
24:
25: /* HTML Object
26: ** -----------
27: */
28:
29: struct _HTStream {
30: CONST HTStreamClass * isa;
31: HTStream * target;
32: HTStreamClass targetClass; /* COPY for speed */
33: };
34:
35: struct _HTStructured {
36: CONST HTStructuredClass * isa;
37: HTStream * target;
38: HTStreamClass targetClass; /* COPY for speed */
39: CONST SGML_dtd * dtd;
40:
41: char buffer[BUFFER_SIZE+20]; /* Needed!! */
42: char * write_pointer;
43: char * line_break;
44: BOOL sensitive; /* Can we put \n */
45: BOOL preformatted; /* Is it verbatim? */
46: BOOL markup; /* If doing LaTeX markup */
47: BOOL startup; /* To skip MIME header */
48: };
49:
50: PRIVATE char *TeX_names[][2] = {
51: { "", "" }, /* HTML_A */
52: { "", "" }, /* HTML_ABBREV */
53: { "\n\\begin{abstract}\n","\n\\end{abstract}\n"}, /* HTML_ABSTRACT */
54: { "", "" }, /* HTML_ACRONYM */
55: { "", "" }, /* HTML_ADDED */
56: { "{\\it ", "}" }, /* HTML_ADDRESS */
57: { "", "" }, /* HTML_ARG */
58: { "{\\bf ", "}" }, /* HTML_B */
59: { "", "" }, /* HTML_BASE */
60: { "{\\sf ", "}" }, /* HTML_BLOCKQUOTE */
61: { "", "" }, /* HTML_BODY */
62: { "", "" }, /* HTML_BOX */
63: { "", "" }, /* HTML_BR */
64: { "", "" }, /* HTML_BYLINE */
65: { "", "" }, /* HTML_CAPTION */
66: { "", "" }, /* HTML_CHANGED */
67: { "\\cite{", "}" }, /* HTML_CITE */
68: { "", "" }, /* HTML_CMD */
69: { "{\\tt ", "}" }, /* HTML_CODE */
70: { "\n\\typeout{", "}\n" }, /* HTML_COMMENT */
71: { "]", "" }, /* HTML_DD */
72: { "", "" }, /* HTML_DFN */
73: { "", "" }, /* HTML_DIR */
74: { "\n\\begin{description}","\n\\end{description}\n"}, /* HTML_DL */
75: { "\n\\item[", "" }, /* HTML_DT */
76: { "{\\em ", "}" }, /* HTML_EM */
77: { "", "" }, /* HTML_FIG */
78: { "\n\\footnote{", "}\n" }, /* HTML_FOOTNOTE */
79: { "", "" }, /* HTML_FORM */
80: { "\n\\chapter{", "}\n" }, /* HTML_H1 */
81: { "\n\\section{", "}\n" }, /* HTML_H2 */
82: { "\n\\subsection{","}\n" }, /* HTML_H3 */
83: { "\n\\subsubsection{","}\n" }, /* HTML_H4 */
84: { "\n\\paragraph{", "}\n" }, /* HTML_H5 */
85: { "\n\\subparagraph{","}\n" }, /* HTML_H6 */
86: { "", "\n" }, /* HTML_H7 */
87: { "", "" }, /* HTML_HEAD */
88: { "", "" }, /* HTML_HR */
89: { "", "" }, /* HTML_HTML */
2.3 ! duns 90: { "", "" }, /* HTML_HTMLPLUS */
2.1 frystyk 91: { "{\\it ", "}" }, /* HTML_I */
92: { "", "" }, /* HTML_IMAGE */
93: { "", "" }, /* HTML_IMG */
94: { "", "" }, /* HTML_INPUT */
95: { "", "" }, /* HTML_ISINDEX */
96: { "{\\tt ", "}" }, /* HTML_KBD */
97: { "", "" }, /* HTML_L */
98: { "\n\\item ", "" }, /* HTML_LI */
99: { "", "" }, /* HTML_LINK */
100: { "", "" }, /* HTML_LISTING */
101: { "", "" }, /* HTML_LIT */
102: { "", "" }, /* HTML_MARGIN */
103: { "", "" }, /* HTML_MATH */
104: { "", "" }, /* HTML_MENU */
105: { "", "" }, /* HTML_NEXTID */
106: { "", "" }, /* HTML_NOTE */
107: { "\n\\begin{enumerate}\n","\n\\end{enumerate}\n"}, /* HTML_OL */
108: { "", "" }, /* HTML_OPTION */
109: { "", "" }, /* HTML_OVER */
110: { "\n\n", "" }, /* HTML_P */
111: { "", "" }, /* HTML_PERSON */
112: { "", "" }, /* HTML_PLAINTEXT */
113: { "\n\\begin{verbatim}"," \\end{verbatim}\n"}, /* HTML_PRE */
114: { "", "" }, /* HTML_Q */
115: { "\\begin{quote}", "\\end{quote}"}, /* HTML_QUOTE */
116: { "", "" }, /* HTML_RENDER */
117: { "", "" }, /* HTML_REMOVED */
118: { "", "" }, /* HTML_S */
119: { "", "" }, /* HTML_SAMP */
120: { "", "" }, /* HTML_SELECT */
121: { "{\\bf ", "}" }, /* HTML_STRONG */
122: { "", "" }, /* HTML_SUB */
123: { "", "" }, /* HTML_SUP */
124: { "", "" }, /* HTML_TAB */
125: { "", "" }, /* HTML_TABLE */
126: { "", "" }, /* HTML_TD */
127: { "", "" }, /* HTML_TEXTAREA */
128: { "", "" }, /* HTML_TH */
129: { "\n\\title{", "}\n\\author{}\n\\maketitle\n"}, /* HTML_TITLE */
130: { "", "" }, /* HTML_TR */
131: { "", "" }, /* HTML_TT */
132: { "", "" }, /* HTML_U */
133: { "\n\\begin{itemize}","\n\\end{itemize}\n"}, /* HTML_UL */
134: { "", "" }, /* HTML_VAR */
135: { "{\\sf ", "}" } /* HTML_XMP */
136: };
137:
138: PRIVATE char *TeX_entities[] = {
139: "\\AE ", /*"AElig", capital AE diphthong (ligature) */
2.2 frystyk 140: "\\\'{A}", /*"Aacute", capital A, acute accent */
141: "\\^{A}", /*"Acirc", capital A, circumflex accent */
142: "\\`{A}", /*"Agrave", capital A, grave accent */
143: "\\AA", /*"Aring", capital A, ring */
144: "\\~{A}", /*"Atilde", capital A, tilde */
145: "\\\"{A}", /*"Auml", capital A, dieresis or umlaut mark */
146: "\\c{C}", /*"Ccedil", capital C, cedilla */
147: "\\OE ", /*"ETH", capital Eth, Icelandic */
148: "\\\'{E}", /*"Eacute", capital E, acute accent */
149: "\\^{E}", /*"Ecirc", capital E, circumflex accent */
150: "\\`{E}", /*"Egrave", capital E, grave accent */
151: "\\\"{E}", /*"Euml", capital E, dieresis or umlaut mark */
152: "\\\'{I}", /*"Iacute", capital I, acute accent */
153: "\\^{I}", /*"Icirc", capital I, circumflex accent */
154: "\\`{I}", /*"Igrave", capital I, grave accent */
155: "\\\"{I}", /*"Iuml", capital I, dieresis or umlaut mark */
156: "\\~{N}", /*"Ntilde", capital N, tilde */
157: "\\\'{O}", /*"Oacute", capital O, acute accent */
158: "\\^{O}", /*"Ocirc", capital O, circumflex accent */
159: "\\`{O}", /*"Ograve", capital O, grave accent */
2.1 frystyk 160: "\\O ", /*"Oslash", capital O, slash */
2.2 frystyk 161: "\\~{O}", /*"Otilde", capital O, tilde */
162: "\\\"{O}", /*"Ouml", capital O, dieresis or umlaut mark */
163: " ", /*"THORN", capital THORN, Icelandic */
164: "\\\'{U}", /*"Uacute", capital U, acute accent */
165: "\\^{U}", /*"Ucirc", capital U, circumflex accent */
166: "\\`{U}", /*"Ugrave", capital U, grave accent */
167: "\\\"{U}", /*"Uuml", capital U, dieresis or umlaut mark */
168: "\\\'{Y}", /*"Yacute", capital Y, acute accent */
169: "\\\'{a}", /*"aacute", small a, acute accent */
170: "\\^{a}", /*"acirc", small a, circumflex accent */
2.1 frystyk 171: "\\ae ", /*"aelig", small ae diphthong (ligature) */
2.2 frystyk 172: "\\`{a}", /*"agrave", small a, grave accent */
2.1 frystyk 173: "&", /*"amp", ampersand */
174: "\\aa ", /*"aring", small a, ring */
2.2 frystyk 175: "\\~{a}", /*"atilde", small a, tilde */
176: "\\\"{a}", /*"auml", small a, dieresis or umlaut mark */
177: "\\c{c}", /*"ccedil", small c, cedilla */
178: "\\\'{e}", /*"eacute", small e, acute accent */
179: "\\^{c}", /*"ecirc", small e, circumflex accent */
180: "\\`{c}", /*"egrave", small e, grave accent */
181: "\\oe ", /*"eth", small eth, Icelandic */
182: "\\\"{e}", /*"euml", small e, dieresis or umlaut mark */
2.1 frystyk 183: ">", /*"gt", greater than */
2.2 frystyk 184: "\\\'{\\i}", /*"iacute", small i, acute accent */
185: "\\^{\\i}", /*"icirc", small i, circumflex accent */
186: "\\`{\\i}", /*"igrave", small i, grave accent */
187: "\\\"{\\i}", /*"iuml", small i, dieresis or umlaut mark */
2.1 frystyk 188: "<", /*"lt", less than */
2.2 frystyk 189: "\\~{n}", /*"ntilde", small n, tilde */
190: "\\\'{o}", /*"oacute", small o, acute accent */
191: "\\~{o}", /*"ocirc", small o, circumflex accent */
192: "\\`{o}", /*"ograve", small o, grave accent */
2.1 frystyk 193: "\\o ", /*"oslash", small o, slash */
2.2 frystyk 194: "\\~{o}", /*"otilde", small o, tilde */
195: "\\\"{o}", /*"ouml", small o, dieresis or umlaut mark */
2.1 frystyk 196: "\\ss ", /*"szlig", small sharp s, German (sz ligature)*/
2.2 frystyk 197: " ", /*"thorn", small thorn, Icelandic */
198: "\\\'{u}", /*"uacute", small u, acute accent */
199: "\\^{u}", /*"ucirc", small u, circumflex accent */
200: "\\`{u}", /*"ugrave", small u, grave accent */
201: "\\\"{u}", /*"uuml", small u, dieresis or umlaut mark */
202: "\\\'{y}", /*"yacute", small y, acute accent */
203: "\\\"{y}" /*"yuml", small y, dieresis or umlaut mark */
2.1 frystyk 204: };
205:
206:
207: /* Flush Buffer
208: ** ------------
209: */
210: PRIVATE void HTTeXGen_flush ARGS1(HTStructured *, me)
211: {
212: (*me->targetClass.put_block)(me->target,
213: me->buffer,
214: me->write_pointer - me->buffer);
215: me->write_pointer = me->buffer;
216: me->line_break = me->buffer;
217: }
218:
219:
220: /* Character handling
221: ** ------------------
222: **
223: */
224: PRIVATE void HTTeXGen_put_character ARGS2(HTStructured *, me, char, c)
225: {
226: if (!me->startup) /* To skip MIME header */
227: return;
228: if (c=='\n') {
229: if (me->markup || me->preformatted) { /* Put out as is and flush */
230: *me->write_pointer++ = c;
231: HTTeXGen_flush(me);
232: return;
233: } else if (me->sensitive || *(me->write_pointer-1)==' ') {
234: return;
235: } else
236: *me->write_pointer++ = ' '; /* Try to pretty print */
237: } else if (me->markup || me->preformatted) {
238: *me->write_pointer++ = c;
239: } else if (c==' ' || c=='\t') { /* Skip space and tabs */
240: if (*(me->write_pointer-1) != ' ')
241: *me->write_pointer++ = ' ';
242: else
243: return;
244: } else {
245: if (c=='$' || c=='&' || c=='%' || c=='#' || /* Special chars */
246: c=='{' || c=='}' || c=='_') {
247: *me->write_pointer++ = '\\';
248: *me->write_pointer++ = c;
249: } else if (c=='\\') { /* Special names */
250: char *temp = "$\\backslash$";
251: strcpy(me->write_pointer, temp);
252: me->write_pointer += strlen(temp);
253: } else if (c=='^') {
254: char *temp = "$\\hat{ }$";
255: strcpy(me->write_pointer, temp);
256: me->write_pointer += strlen(temp);
257: } else if (c=='~') {
258: char *temp = "$\\tilde{ }$";
259: strcpy(me->write_pointer, temp);
260: me->write_pointer += strlen(temp);
261: } else if (c=='|' || c=='<' || c=='>') { /* Math mode */
262: *me->write_pointer++ = '$';
263: *me->write_pointer++ = c;
264: *me->write_pointer++ = '$';
265: } else
266: *me->write_pointer++ = c; /* Char seems normal */
267: }
268:
269: if (c==' ') /* Find deliniter */
270: me->line_break = me->write_pointer;
271: else if (strchr(WORD_DELIMITERS, c))
272: me->line_break = me->write_pointer-1;
273:
274: /* Flush buffer out when full */
275: if (me->write_pointer >= me->buffer+BUFFER_SIZE-3) {
276: if (me->markup || me->preformatted) {
277: *me->write_pointer = '\n';
278: (*me->targetClass.put_block)(me->target,
279: me->buffer,
280: me->write_pointer-me->buffer+1);
281: me->write_pointer = me->buffer;
282: } else { /* Use break-point */
283: char line_break_char = *me->line_break;
284: char *saved = me->line_break;
285: *me->line_break = '\n';
286: (*me->targetClass.put_block)(me->target,
287: me->buffer,
288: me->line_break-me->buffer+1);
289: *me->line_break = line_break_char;
290: { /* move next line in */
291: char *p = saved;
292: char *q;
293: for(q=me->buffer; p<me->write_pointer; )
294: *q++ = *p++;
295: }
296: me->write_pointer = me->buffer + (me->write_pointer-saved);
297: }
298: me->line_break = me->buffer;
299: }
300: }
301:
302:
303:
304: /* String handling
305: ** ---------------
306: */
307: PRIVATE void HTTeXGen_put_string ARGS2(HTStructured *, me, CONST char*, s)
308: {
309: CONST char * p;
310: for (p=s; *p; p++)
311: HTTeXGen_put_character(me, *p);
312: }
313:
314:
315: PRIVATE void HTTeXGen_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
316: {
317: CONST char * p;
318: for(p=s; p<s+l; p++)
319: HTTeXGen_put_character(me, *p);
320: }
321:
322:
323: /* Start Element
324: ** -------------
325: **
326: ** No attributes are put to the output Henrik 07/03-94
327: ** Does no assumptions of WHAT element is started...
328: */
329: PRIVATE void HTTeXGen_start_element ARGS4(
330: HTStructured *, me,
331: int, element_number,
332: CONST BOOL*, present,
333: CONST char **, value)
334: {
335: me->startup = YES; /* Now, let's get down to it */
336: if (me->preformatted == YES) /* Don't start markup in here */
337: return;
338: if (element_number == HTML_PRE)
339: me->preformatted = YES;
340: if (element_number == HTML_CITE || /* No \n here, please! */
341: element_number == HTML_COMMENT ||
342: element_number == HTML_DT ||
343: element_number == HTML_H1 ||
344: element_number == HTML_H2 ||
345: element_number == HTML_H3 ||
346: element_number == HTML_H4 ||
347: element_number == HTML_H5 ||
348: element_number == HTML_H6 ||
349: element_number == HTML_H7 ||
350: element_number == HTML_TITLE)
351: me->sensitive = YES;
352: else if (element_number == HTML_DD) /* Only way to turn <DT> off */
353: me->sensitive = NO;
354: me->markup = element_number == HTML_A ? NO : YES;
355: HTTeXGen_put_string(me, *TeX_names[element_number]);
356: me->markup = NO;
357: }
358:
359:
360: /* End Element
361: ** -----------
362: **
363: ** Ends an markup element Henrik 07/03-94
364: ** Does no assumptions of WHAT element is ended...
365: */
366: PRIVATE void HTTeXGen_end_element ARGS2(HTStructured *, me,
367: int , element_number)
368: {
369: if (me->preformatted && element_number != HTML_PRE)
370: return;
371: me->preformatted = NO;
372: me->markup = YES;
373: HTTeXGen_put_string(me, *(TeX_names[element_number]+1));
374: me->markup = NO;
375: if (element_number == HTML_CITE ||
376: element_number == HTML_COMMENT ||
377: element_number == HTML_DL ||
378: element_number == HTML_H1 ||
379: element_number == HTML_H2 ||
380: element_number == HTML_H3 ||
381: element_number == HTML_H4 ||
382: element_number == HTML_H5 ||
383: element_number == HTML_H6 ||
384: element_number == HTML_H7 ||
385: element_number == HTML_TITLE)
386: me->sensitive = NO;
387: }
388:
389:
390: /* Expanding entities
391: ** ------------------
392: **
393: */
394: PRIVATE void HTTeXGen_put_entity ARGS2(HTStructured *, me, int, entity_number)
395: {
396: BOOL mark = me->markup;
397: if (*TeX_entities[entity_number] != '&' && /* Theese are converted later */
398: *TeX_entities[entity_number] != '<' &&
399: *TeX_entities[entity_number] != '>')
400: me->markup = YES;
401: HTTeXGen_put_string(me, TeX_entities[entity_number]);
402: me->markup = mark;
403: }
404:
405:
406:
407: /* Free an HTML object
408: ** -------------------
409: **
410: */
411: PRIVATE void HTTeXGen_free ARGS1(HTStructured *, me)
412: {
413: HTTeXGen_flush(me);
414: (*me->targetClass.put_string)(me->target, "\n\\end{document}\n");
415: HTTeXGen_flush(me);
416: (*me->targetClass.free)(me->target); /* ripple through */
417: free(me);
418: }
419:
420:
421: PRIVATE void HTTeXGen_abort ARGS2(HTStructured *, me, HTError, e)
422: {
423: HTTeXGen_free(me);
424: }
425:
426:
427: /* Structured Object Class
428: ** -----------------------
429: */
430: PRIVATE CONST HTStructuredClass HTTeXGeneration = /* As opposed to print etc */
431: {
432: "HTMLToTeX",
433: HTTeXGen_free,
434: HTTeXGen_abort,
435: HTTeXGen_put_character, HTTeXGen_put_string, HTTeXGen_write,
436: HTTeXGen_start_element, HTTeXGen_end_element,
437: HTTeXGen_put_entity
438: };
439:
440:
441: /* HTConverter from HTML to TeX Stream
442: ** ------------------------------------------
443: **
444: */
445: PUBLIC HTStream* HTMLToTeX ARGS5(
446: HTRequest *, request,
447: void *, param,
448: HTFormat, input_format,
449: HTFormat, output_format,
450: HTStream *, output_stream)
451: {
452: HTStructured* me = (HTStructured*) calloc(1, sizeof(*me));
453: if (me == NULL) outofmem(__FILE__, "HTMLToTeX");
454:
455: me->isa = (HTStructuredClass*) &HTTeXGeneration;
456: me->dtd = &HTMLP_dtd;
457: me->target = output_stream;
458: me->targetClass = *me->target->isa;/* Copy pointers to routines for speed*/
459: me->write_pointer = me->buffer;
460: me->line_break = me->buffer;
461: (*me->targetClass.put_string)(me->target,
462: "\\documentstyle[11pt]{report}\n\\begin{document}\n");
463: return SGML_new(&HTMLP_dtd, me);
464: }
465:
466:
467: /* END OF FILE HTTeXGen.c */
468:
469:
470:
471:
472:
473:
474:
475: