Annotation of libwww/Library/src/HTTeXGen.c, revision 2.1
2.1 ! frystyk 1: /* Simple LaTeX Generator that converts in a 1:1 manner from HTML to LaTeX
! 2: ** =======================================================================
! 3: **
! 4: ** This version of the HTML object sends LaTeX to the output stream.
! 5: ** No attributes are considered in the translation!
! 6: ** The module uses simple 1:1 table-conversions, but this COULD be
! 7: ** expanded to a stack-machine. This would then be in start_element and
! 8: ** end_element...
! 9: ** Henrik 07/03-94
! 10: */
! 11:
! 12: #define BUFFER_SIZE 80 /* Line buffer attempts to make neat breaks */
! 13: #define WORD_DELIMITERS ",/;:\"[]()"
! 14:
! 15: /* Implements: */
! 16: #include "HTTeXGen.h"
! 17: #include <stdio.h>
! 18: #include "HTMLPDTD.h"
! 19: #include "HTStream.h"
! 20: #include "SGML.h"
! 21: #include "HTFormat.h"
! 22: #include "tcp.h"
! 23:
! 24:
! 25: /* HTML Object
! 26: ** -----------
! 27: */
! 28:
! 29: struct _HTStream {
! 30: CONST HTStreamClass * isa;
! 31: HTStream * target;
! 32: HTStreamClass targetClass; /* COPY for speed */
! 33: };
! 34:
! 35: struct _HTStructured {
! 36: CONST HTStructuredClass * isa;
! 37: HTStream * target;
! 38: HTStreamClass targetClass; /* COPY for speed */
! 39: CONST SGML_dtd * dtd;
! 40:
! 41: char buffer[BUFFER_SIZE+20]; /* Needed!! */
! 42: char * write_pointer;
! 43: char * line_break;
! 44: BOOL sensitive; /* Can we put \n */
! 45: BOOL preformatted; /* Is it verbatim? */
! 46: BOOL markup; /* If doing LaTeX markup */
! 47: BOOL startup; /* To skip MIME header */
! 48: };
! 49:
! 50: PRIVATE char *TeX_names[][2] = {
! 51: { "", "" }, /* HTML_A */
! 52: { "", "" }, /* HTML_ABBREV */
! 53: { "\n\\begin{abstract}\n","\n\\end{abstract}\n"}, /* HTML_ABSTRACT */
! 54: { "", "" }, /* HTML_ACRONYM */
! 55: { "", "" }, /* HTML_ADDED */
! 56: { "{\\it ", "}" }, /* HTML_ADDRESS */
! 57: { "", "" }, /* HTML_ARG */
! 58: { "{\\bf ", "}" }, /* HTML_B */
! 59: { "", "" }, /* HTML_BASE */
! 60: { "{\\sf ", "}" }, /* HTML_BLOCKQUOTE */
! 61: { "", "" }, /* HTML_BODY */
! 62: { "", "" }, /* HTML_BOX */
! 63: { "", "" }, /* HTML_BR */
! 64: { "", "" }, /* HTML_BYLINE */
! 65: { "", "" }, /* HTML_CAPTION */
! 66: { "", "" }, /* HTML_CHANGED */
! 67: { "\\cite{", "}" }, /* HTML_CITE */
! 68: { "", "" }, /* HTML_CMD */
! 69: { "{\\tt ", "}" }, /* HTML_CODE */
! 70: { "\n\\typeout{", "}\n" }, /* HTML_COMMENT */
! 71: { "]", "" }, /* HTML_DD */
! 72: { "", "" }, /* HTML_DFN */
! 73: { "", "" }, /* HTML_DIR */
! 74: { "\n\\begin{description}","\n\\end{description}\n"}, /* HTML_DL */
! 75: { "\n\\item[", "" }, /* HTML_DT */
! 76: { "{\\em ", "}" }, /* HTML_EM */
! 77: { "", "" }, /* HTML_FIG */
! 78: { "\n\\footnote{", "}\n" }, /* HTML_FOOTNOTE */
! 79: { "", "" }, /* HTML_FORM */
! 80: { "\n\\chapter{", "}\n" }, /* HTML_H1 */
! 81: { "\n\\section{", "}\n" }, /* HTML_H2 */
! 82: { "\n\\subsection{","}\n" }, /* HTML_H3 */
! 83: { "\n\\subsubsection{","}\n" }, /* HTML_H4 */
! 84: { "\n\\paragraph{", "}\n" }, /* HTML_H5 */
! 85: { "\n\\subparagraph{","}\n" }, /* HTML_H6 */
! 86: { "", "\n" }, /* HTML_H7 */
! 87: { "", "" }, /* HTML_HEAD */
! 88: { "", "" }, /* HTML_HR */
! 89: { "", "" }, /* HTML_HTML */
! 90: { "" "" }, /* HTML_HTMLPLUS */
! 91: { "{\\it ", "}" }, /* HTML_I */
! 92: { "", "" }, /* HTML_IMAGE */
! 93: { "", "" }, /* HTML_IMG */
! 94: { "", "" }, /* HTML_INPUT */
! 95: { "", "" }, /* HTML_ISINDEX */
! 96: { "{\\tt ", "}" }, /* HTML_KBD */
! 97: { "", "" }, /* HTML_L */
! 98: { "\n\\item ", "" }, /* HTML_LI */
! 99: { "", "" }, /* HTML_LINK */
! 100: { "", "" }, /* HTML_LISTING */
! 101: { "", "" }, /* HTML_LIT */
! 102: { "", "" }, /* HTML_MARGIN */
! 103: { "", "" }, /* HTML_MATH */
! 104: { "", "" }, /* HTML_MENU */
! 105: { "", "" }, /* HTML_NEXTID */
! 106: { "", "" }, /* HTML_NOTE */
! 107: { "\n\\begin{enumerate}\n","\n\\end{enumerate}\n"}, /* HTML_OL */
! 108: { "", "" }, /* HTML_OPTION */
! 109: { "", "" }, /* HTML_OVER */
! 110: { "\n\n", "" }, /* HTML_P */
! 111: { "", "" }, /* HTML_PERSON */
! 112: { "", "" }, /* HTML_PLAINTEXT */
! 113: { "\n\\begin{verbatim}"," \\end{verbatim}\n"}, /* HTML_PRE */
! 114: { "", "" }, /* HTML_Q */
! 115: { "\\begin{quote}", "\\end{quote}"}, /* HTML_QUOTE */
! 116: { "", "" }, /* HTML_RENDER */
! 117: { "", "" }, /* HTML_REMOVED */
! 118: { "", "" }, /* HTML_S */
! 119: { "", "" }, /* HTML_SAMP */
! 120: { "", "" }, /* HTML_SELECT */
! 121: { "{\\bf ", "}" }, /* HTML_STRONG */
! 122: { "", "" }, /* HTML_SUB */
! 123: { "", "" }, /* HTML_SUP */
! 124: { "", "" }, /* HTML_TAB */
! 125: { "", "" }, /* HTML_TABLE */
! 126: { "", "" }, /* HTML_TD */
! 127: { "", "" }, /* HTML_TEXTAREA */
! 128: { "", "" }, /* HTML_TH */
! 129: { "\n\\title{", "}\n\\author{}\n\\maketitle\n"}, /* HTML_TITLE */
! 130: { "", "" }, /* HTML_TR */
! 131: { "", "" }, /* HTML_TT */
! 132: { "", "" }, /* HTML_U */
! 133: { "\n\\begin{itemize}","\n\\end{itemize}\n"}, /* HTML_UL */
! 134: { "", "" }, /* HTML_VAR */
! 135: { "{\\sf ", "}" } /* HTML_XMP */
! 136: };
! 137:
! 138: PRIVATE char *TeX_entities[] = {
! 139: "\\AE ", /*"AElig", capital AE diphthong (ligature) */
! 140: "\\\'{A} ", /*"Aacute", capital A, acute accent */
! 141: "\\^{A} ", /*"Acirc", capital A, circumflex accent */
! 142: "\\`{A} ", /*"Agrave", capital A, grave accent */
! 143: "\\AA ", /*"Aring", capital A, ring */
! 144: "\\~{A} ", /*"Atilde", capital A, tilde */
! 145: "\\\"{A} ", /*"Auml", capital A, dieresis or umlaut mark */
! 146: "\\c{C} ", /*"Ccedil", capital C, cedilla */
! 147: "\\OE", /*"ETH", capital Eth, Icelandic */
! 148: "\\\'{E} ", /*"Eacute", capital E, acute accent */
! 149: "\\^{E} ", /*"Ecirc", capital E, circumflex accent */
! 150: "\\`{E} ", /*"Egrave", capital E, grave accent */
! 151: "\\\"{E} ", /*"Euml", capital E, dieresis or umlaut mark */
! 152: "\\\'{I} ", /*"Iacute", capital I, acute accent */
! 153: "\\^{I} ", /*"Icirc", capital I, circumflex accent */
! 154: "\\`{I} ", /*"Igrave", capital I, grave accent */
! 155: "\\\"{I} ", /*"Iuml", capital I, dieresis or umlaut mark */
! 156: "\\~{N} ", /*"Ntilde", capital N, tilde */
! 157: "\\\'{O} ", /*"Oacute", capital O, acute accent */
! 158: "\\^{O} ", /*"Ocirc", capital O, circumflex accent */
! 159: "\\`{O} ", /*"Ograve", capital O, grave accent */
! 160: "\\O ", /*"Oslash", capital O, slash */
! 161: "\\~{O} ", /*"Otilde", capital O, tilde */
! 162: "\\\"{O} ", /*"Ouml", capital O, dieresis or umlaut mark */
! 163: "", /*"THORN", capital THORN, Icelandic */
! 164: "\\\'{U} ", /*"Uacute", capital U, acute accent */
! 165: "\\^{U} ", /*"Ucirc", capital U, circumflex accent */
! 166: "\\`{U} ", /*"Ugrave", capital U, grave accent */
! 167: "\\\"{U} ", /*"Uuml", capital U, dieresis or umlaut mark */
! 168: "\\\'{Y} ", /*"Yacute", capital Y, acute accent */
! 169: "\\\'{a} ", /*"aacute", small a, acute accent */
! 170: "\\^{a} ", /*"acirc", small a, circumflex accent */
! 171: "\\ae ", /*"aelig", small ae diphthong (ligature) */
! 172: "\\`{a} ", /*"agrave", small a, grave accent */
! 173: "&", /*"amp", ampersand */
! 174: "\\aa ", /*"aring", small a, ring */
! 175: "\\~{a} ", /*"atilde", small a, tilde */
! 176: "\\\"{a} ", /*"auml", small a, dieresis or umlaut mark */
! 177: "\\c{c} ", /*"ccedil", small c, cedilla */
! 178: "\\\'{e} ", /*"eacute", small e, acute accent */
! 179: "\\^{c} ", /*"ecirc", small e, circumflex accent */
! 180: "\\`{c} ", /*"egrave", small e, grave accent */
! 181: "\\oe", /*"eth", small eth, Icelandic */
! 182: "\\\"{e} ", /*"euml", small e, dieresis or umlaut mark */
! 183: ">", /*"gt", greater than */
! 184: "\\\'{\\i} ", /*"iacute", small i, acute accent */
! 185: "\\^{\\i} ", /*"icirc", small i, circumflex accent */
! 186: "\\`{\\i} ", /*"igrave", small i, grave accent */
! 187: "\\\"{\\i} ", /*"iuml", small i, dieresis or umlaut mark */
! 188: "<", /*"lt", less than */
! 189: "\\~{n} ", /*"ntilde", small n, tilde */
! 190: "\\\'{o} ", /*"oacute", small o, acute accent */
! 191: "\\~{o} ", /*"ocirc", small o, circumflex accent */
! 192: "\\`{o} ", /*"ograve", small o, grave accent */
! 193: "\\o ", /*"oslash", small o, slash */
! 194: "\\~{o} ", /*"otilde", small o, tilde */
! 195: "\\\"{o} ", /*"ouml", small o, dieresis or umlaut mark */
! 196: "\\ss ", /*"szlig", small sharp s, German (sz ligature)*/
! 197: "", /*"thorn", small thorn, Icelandic */
! 198: "\\\'{u} ", /*"uacute", small u, acute accent */
! 199: "\\^{u} ", /*"ucirc", small u, circumflex accent */
! 200: "\\`{u} ", /*"ugrave", small u, grave accent */
! 201: "\\\"{u} ", /*"uuml", small u, dieresis or umlaut mark */
! 202: "\\\'{y} ", /*"yacute", small y, acute accent */
! 203: "\\\"{y} " /*"yuml", small y, dieresis or umlaut mark */
! 204: };
! 205:
! 206:
! 207: /* Flush Buffer
! 208: ** ------------
! 209: */
! 210: PRIVATE void HTTeXGen_flush ARGS1(HTStructured *, me)
! 211: {
! 212: (*me->targetClass.put_block)(me->target,
! 213: me->buffer,
! 214: me->write_pointer - me->buffer);
! 215: me->write_pointer = me->buffer;
! 216: me->line_break = me->buffer;
! 217: }
! 218:
! 219:
! 220: /* Character handling
! 221: ** ------------------
! 222: **
! 223: */
! 224: PRIVATE void HTTeXGen_put_character ARGS2(HTStructured *, me, char, c)
! 225: {
! 226: if (!me->startup) /* To skip MIME header */
! 227: return;
! 228: if (c=='\n') {
! 229: if (me->markup || me->preformatted) { /* Put out as is and flush */
! 230: *me->write_pointer++ = c;
! 231: HTTeXGen_flush(me);
! 232: return;
! 233: } else if (me->sensitive || *(me->write_pointer-1)==' ') {
! 234: return;
! 235: } else
! 236: *me->write_pointer++ = ' '; /* Try to pretty print */
! 237: } else if (me->markup || me->preformatted) {
! 238: *me->write_pointer++ = c;
! 239: } else if (c==' ' || c=='\t') { /* Skip space and tabs */
! 240: if (*(me->write_pointer-1) != ' ')
! 241: *me->write_pointer++ = ' ';
! 242: else
! 243: return;
! 244: } else {
! 245: if (c=='$' || c=='&' || c=='%' || c=='#' || /* Special chars */
! 246: c=='{' || c=='}' || c=='_') {
! 247: *me->write_pointer++ = '\\';
! 248: *me->write_pointer++ = c;
! 249: *me->write_pointer++ = ' ';
! 250: } else if (c=='\\') { /* Special names */
! 251: char *temp = "$\\backslash$";
! 252: strcpy(me->write_pointer, temp);
! 253: me->write_pointer += strlen(temp);
! 254: } else if (c=='^') {
! 255: char *temp = "$\\hat{ }$";
! 256: strcpy(me->write_pointer, temp);
! 257: me->write_pointer += strlen(temp);
! 258: } else if (c=='~') {
! 259: char *temp = "$\\tilde{ }$";
! 260: strcpy(me->write_pointer, temp);
! 261: me->write_pointer += strlen(temp);
! 262: } else if (c=='|' || c=='<' || c=='>') { /* Math mode */
! 263: *me->write_pointer++ = '$';
! 264: *me->write_pointer++ = c;
! 265: *me->write_pointer++ = '$';
! 266: } else
! 267: *me->write_pointer++ = c; /* Char seems normal */
! 268: }
! 269:
! 270: if (c==' ') /* Find deliniter */
! 271: me->line_break = me->write_pointer;
! 272: else if (strchr(WORD_DELIMITERS, c))
! 273: me->line_break = me->write_pointer-1;
! 274:
! 275: /* Flush buffer out when full */
! 276: if (me->write_pointer >= me->buffer+BUFFER_SIZE-3) {
! 277: if (me->markup || me->preformatted) {
! 278: *me->write_pointer = '\n';
! 279: (*me->targetClass.put_block)(me->target,
! 280: me->buffer,
! 281: me->write_pointer-me->buffer+1);
! 282: me->write_pointer = me->buffer;
! 283: } else { /* Use break-point */
! 284: char line_break_char = *me->line_break;
! 285: char *saved = me->line_break;
! 286: *me->line_break = '\n';
! 287: (*me->targetClass.put_block)(me->target,
! 288: me->buffer,
! 289: me->line_break-me->buffer+1);
! 290: *me->line_break = line_break_char;
! 291: { /* move next line in */
! 292: char *p = saved;
! 293: char *q;
! 294: for(q=me->buffer; p<me->write_pointer; )
! 295: *q++ = *p++;
! 296: }
! 297: me->write_pointer = me->buffer + (me->write_pointer-saved);
! 298: }
! 299: me->line_break = me->buffer;
! 300: }
! 301: }
! 302:
! 303:
! 304:
! 305: /* String handling
! 306: ** ---------------
! 307: */
! 308: PRIVATE void HTTeXGen_put_string ARGS2(HTStructured *, me, CONST char*, s)
! 309: {
! 310: CONST char * p;
! 311: for (p=s; *p; p++)
! 312: HTTeXGen_put_character(me, *p);
! 313: }
! 314:
! 315:
! 316: PRIVATE void HTTeXGen_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
! 317: {
! 318: CONST char * p;
! 319: for(p=s; p<s+l; p++)
! 320: HTTeXGen_put_character(me, *p);
! 321: }
! 322:
! 323:
! 324: /* Start Element
! 325: ** -------------
! 326: **
! 327: ** No attributes are put to the output Henrik 07/03-94
! 328: ** Does no assumptions of WHAT element is started...
! 329: */
! 330: PRIVATE void HTTeXGen_start_element ARGS4(
! 331: HTStructured *, me,
! 332: int, element_number,
! 333: CONST BOOL*, present,
! 334: CONST char **, value)
! 335: {
! 336: me->startup = YES; /* Now, let's get down to it */
! 337: if (me->preformatted == YES) /* Don't start markup in here */
! 338: return;
! 339: if (element_number == HTML_PRE)
! 340: me->preformatted = YES;
! 341: if (element_number == HTML_CITE || /* No \n here, please! */
! 342: element_number == HTML_COMMENT ||
! 343: element_number == HTML_DT ||
! 344: element_number == HTML_H1 ||
! 345: element_number == HTML_H2 ||
! 346: element_number == HTML_H3 ||
! 347: element_number == HTML_H4 ||
! 348: element_number == HTML_H5 ||
! 349: element_number == HTML_H6 ||
! 350: element_number == HTML_H7 ||
! 351: element_number == HTML_TITLE)
! 352: me->sensitive = YES;
! 353: else if (element_number == HTML_DD) /* Only way to turn <DT> off */
! 354: me->sensitive = NO;
! 355: me->markup = element_number == HTML_A ? NO : YES;
! 356: HTTeXGen_put_string(me, *TeX_names[element_number]);
! 357: me->markup = NO;
! 358: }
! 359:
! 360:
! 361: /* End Element
! 362: ** -----------
! 363: **
! 364: ** Ends an markup element Henrik 07/03-94
! 365: ** Does no assumptions of WHAT element is ended...
! 366: */
! 367: PRIVATE void HTTeXGen_end_element ARGS2(HTStructured *, me,
! 368: int , element_number)
! 369: {
! 370: if (me->preformatted && element_number != HTML_PRE)
! 371: return;
! 372: me->preformatted = NO;
! 373: me->markup = YES;
! 374: HTTeXGen_put_string(me, *(TeX_names[element_number]+1));
! 375: me->markup = NO;
! 376: if (element_number == HTML_CITE ||
! 377: element_number == HTML_COMMENT ||
! 378: element_number == HTML_DL ||
! 379: element_number == HTML_H1 ||
! 380: element_number == HTML_H2 ||
! 381: element_number == HTML_H3 ||
! 382: element_number == HTML_H4 ||
! 383: element_number == HTML_H5 ||
! 384: element_number == HTML_H6 ||
! 385: element_number == HTML_H7 ||
! 386: element_number == HTML_TITLE)
! 387: me->sensitive = NO;
! 388: }
! 389:
! 390:
! 391: /* Expanding entities
! 392: ** ------------------
! 393: **
! 394: */
! 395: PRIVATE void HTTeXGen_put_entity ARGS2(HTStructured *, me, int, entity_number)
! 396: {
! 397: BOOL mark = me->markup;
! 398: if (*TeX_entities[entity_number] != '&' && /* Theese are converted later */
! 399: *TeX_entities[entity_number] != '<' &&
! 400: *TeX_entities[entity_number] != '>')
! 401: me->markup = YES;
! 402: HTTeXGen_put_string(me, TeX_entities[entity_number]);
! 403: me->markup = mark;
! 404: }
! 405:
! 406:
! 407:
! 408: /* Free an HTML object
! 409: ** -------------------
! 410: **
! 411: */
! 412: PRIVATE void HTTeXGen_free ARGS1(HTStructured *, me)
! 413: {
! 414: HTTeXGen_flush(me);
! 415: (*me->targetClass.put_string)(me->target, "\n\\end{document}\n");
! 416: HTTeXGen_flush(me);
! 417: (*me->targetClass.free)(me->target); /* ripple through */
! 418: free(me);
! 419: }
! 420:
! 421:
! 422: PRIVATE void HTTeXGen_abort ARGS2(HTStructured *, me, HTError, e)
! 423: {
! 424: HTTeXGen_free(me);
! 425: }
! 426:
! 427:
! 428: /* Structured Object Class
! 429: ** -----------------------
! 430: */
! 431: PRIVATE CONST HTStructuredClass HTTeXGeneration = /* As opposed to print etc */
! 432: {
! 433: "HTMLToTeX",
! 434: HTTeXGen_free,
! 435: HTTeXGen_abort,
! 436: HTTeXGen_put_character, HTTeXGen_put_string, HTTeXGen_write,
! 437: HTTeXGen_start_element, HTTeXGen_end_element,
! 438: HTTeXGen_put_entity
! 439: };
! 440:
! 441:
! 442: /* HTConverter from HTML to TeX Stream
! 443: ** ------------------------------------------
! 444: **
! 445: */
! 446: PUBLIC HTStream* HTMLToTeX ARGS5(
! 447: HTRequest *, request,
! 448: void *, param,
! 449: HTFormat, input_format,
! 450: HTFormat, output_format,
! 451: HTStream *, output_stream)
! 452: {
! 453: HTStructured* me = (HTStructured*) calloc(1, sizeof(*me));
! 454: if (me == NULL) outofmem(__FILE__, "HTMLToTeX");
! 455:
! 456: me->isa = (HTStructuredClass*) &HTTeXGeneration;
! 457: me->dtd = &HTMLP_dtd;
! 458: me->target = output_stream;
! 459: me->targetClass = *me->target->isa;/* Copy pointers to routines for speed*/
! 460: me->write_pointer = me->buffer;
! 461: me->line_break = me->buffer;
! 462: (*me->targetClass.put_string)(me->target,
! 463: "\\documentstyle[11pt]{report}\n\\begin{document}\n");
! 464: return SGML_new(&HTMLP_dtd, me);
! 465: }
! 466:
! 467:
! 468: /* END OF FILE HTTeXGen.c */
! 469:
! 470:
! 471:
! 472:
! 473:
! 474:
! 475:
! 476:
Webmaster