Annotation of Amaya/amaya/XHTMLbuilder.c, revision 1.1
1.1 ! cvs 1: /*
! 2: *
! 3: * (c) COPYRIGHT MIT and INRIA, 1996.
! 4: * Please first read the full copyright statement in file COPYRIGHT.
! 5: *
! 6: */
! 7:
! 8: /*
! 9: *
! 10: * html2thot parses a HTML file and builds the corresponding abstract tree
! 11: * for a Thot document of type HTML.
! 12: *
! 13: * Author: V. Quint
! 14: * L. Carcone
! 15: * R. Guetari (W3C/INRIA): Unicode version
! 16: */
! 17:
! 18: #define THOT_EXPORT extern
! 19: #include "amaya.h"
! 20: #include "css.h"
! 21: #include "zlib.h"
! 22: #include "css_f.h"
! 23: #include "HTMLactions_f.h"
! 24: #include "HTMLedit_f.h"
! 25: #include "HTMLimage_f.h"
! 26: #include "HTMLtable_f.h"
! 27: #include "HTMLimage_f.h"
! 28: #include "UIcss_f.h"
! 29:
! 30: #include "parser.h"
! 31: #include "MathMLbuilder_f.h"
! 32: #ifdef GRAPHML
! 33: #include "GraphMLbuilder_f.h"
! 34: #endif
! 35: #include "fetchHTMLname_f.h"
! 36: #include "fetchXMLname_f.h"
! 37: #include "html2thot_f.h"
! 38: #include "styleparser_f.h"
! 39:
! 40: /* maximum length of a Thot structure schema name */
! 41: #define MAX_SS_NAME_LENGTH 32
! 42:
! 43:
! 44: typedef CHAR_T entityName[10];
! 45: typedef struct _XHTMLEntityEntry
! 46: { /* a SGML entity representing an ISO-Latin1 char */
! 47: entityName charName; /* entity name */
! 48: int charCode; /* decimal code of ISO-Latin1 char */
! 49: }
! 50: XHTMLEntityEntry;
! 51:
! 52: XHTMLEntityEntry XHTMLEntityTable[] =
! 53: {
! 54: /* This table MUST be in alphabetical order */
! 55: {TEXT("AElig"), 198}, /* latin capital letter AE = */
! 56: /* latin capital ligature AE, U+00C6 ISOlat1 */
! 57: {TEXT("Aacute"), 193}, /* latin capital letter A with acute, U+00C1 ISOlat1 */
! 58: {TEXT("Acirc"), 194}, /* latin capital letter A with circumflex, U+00C2 ISOlat1 */
! 59: {TEXT("Agrave"), 192}, /* latin capital letter A with grave = */
! 60: /* latin capital letter A grave, U+00C0 ISOlat1 */
! 61: {TEXT("Alpha"), 913}, /* greek capital letter alpha, U+0391 */
! 62: {TEXT("Aring"), 197}, /* latin capital letter A with ring above = */
! 63: /* latin capital letter A ring, U+00C5 ISOlat1 */
! 64: {TEXT("Atilde"), 195}, /* latin capital letter A with tilde, U+00C3 ISOlat1 */
! 65: {TEXT("Auml"), 196}, /* latin capital letter A with diaeresis, U+00C4 ISOlat1 */
! 66: {TEXT("Beta"), 914}, /* greek capital letter beta, U+0392 */
! 67: {TEXT("Ccedil"), 199}, /* latin capital letter C with cedilla, U+00C7 ISOlat1 */
! 68: {TEXT("Chi"), 935}, /* greek capital letter chi, U+03A7 */
! 69: {TEXT("Dagger"), 8225}, /* double dagger, U+2021 ISOpub */
! 70: {TEXT("Delta"), 916}, /* greek capital letter delta, U+0394 ISOgrk3 */
! 71: {TEXT("ETH"), 208}, /* latin capital letter ETH, U+00D0 ISOlat1 */
! 72: {TEXT("Eacute"), 201}, /* latin capital letter E with acute, U+00C9 ISOlat1 */
! 73: {TEXT("Ecirc"), 202}, /* latin capital letter E with circumflex, U+00CA ISOlat1 */
! 74: {TEXT("Egrave"), 200}, /* latin capital letter E with grave, U+00C8 ISOlat1 */
! 75: {TEXT("Epsilon"), 917}, /* greek capital letter epsilon, U+0395 */
! 76: {TEXT("Eta"), 919}, /* greek capital letter eta, U+0397 */
! 77: {TEXT("Euml"), 203}, /* latin capital letter E with diaeresis, U+00CB ISOlat1 */
! 78: {TEXT("Gamma"), 915}, /* greek capital letter gamma, U+0393 ISOgrk3 */
! 79: {TEXT("Iacute"), 205}, /* latin capital letter I with acute, U+00CD ISOlat1 */
! 80: {TEXT("Icirc"), 206}, /* latin capital letter I with circumflex, U+00CE ISOlat1 */
! 81: {TEXT("Igrave"), 204}, /* latin capital letter I with grave, U+00CC ISOlat1 */
! 82: {TEXT("Iota"), 921}, /* greek capital letter iota, U+0399 */
! 83: {TEXT("Iuml"), 207}, /* latin capital letter I with diaeresis, U+00CF ISOlat1 */
! 84: {TEXT("Kappa"), 922}, /* greek capital letter kappa, U+039A */
! 85: {TEXT("Lambda"), 923}, /* greek capital letter lambda, U+039B ISOgrk3 */
! 86: {TEXT("Mu"), 924}, /* greek capital letter mu, U+039C */
! 87: {TEXT("Ntilde"), 209}, /* latin capital letter N with tilde, U+00D1 ISOlat1 */
! 88: {TEXT("Nu"), 925}, /* greek capital letter nu, U+039D */
! 89: {TEXT("OElig"), 338}, /* latin capital ligature OE, U+0152 ISOlat2 */
! 90: {TEXT("Oacute"), 211}, /* latin capital letter O with acute, U+00D3 ISOlat1 */
! 91: {TEXT("Ocirc"), 212}, /* latin capital letter O with circumflex, U+00D4 ISOlat1 */
! 92: {TEXT("Ograve"), 210}, /* latin capital letter O with grave, U+00D2 ISOlat1 */
! 93: {TEXT("Omega"), 937}, /* greek capital letter omega, U+03A9 ISOgrk3 */
! 94: {TEXT("Omicron"), 927}, /* greek capital letter omicron, U+039F */
! 95: {TEXT("Oslash"), 216}, /* latin capital letter O with stroke = */
! 96: /* latin capital letter O slash, U+00D8 ISOlat1 */
! 97: {TEXT("Otilde"), 213}, /* latin capital letter O with tilde, U+00D5 ISOlat1 */
! 98: {TEXT("Ouml"), 214}, /* latin capital letter O with diaeresis, U+00D6 ISOlat1 */
! 99: {TEXT("Phi"), 934}, /* greek capital letter phi, U+03A6 ISOgrk3 */
! 100: {TEXT("Pi"), 928}, /* greek capital letter pi, U+03A0 ISOgrk3 */
! 101: {TEXT("Prime"), 8243}, /* double prime = seconds = inches, U+2033 ISOtech */
! 102: {TEXT("Psi"), 936}, /* greek capital letter psi, U+03A8 ISOgrk3 */
! 103: {TEXT("Rho"), 929}, /* greek capital letter rho, U+03A1 */
! 104: {TEXT("Scaron"), 352}, /* latin capital letter S with caron, U+0160 ISOlat2 */
! 105: {TEXT("Sigma"), 931}, /* greek capital letter sigma, U+03A3 ISOgrk3 */
! 106: {TEXT("THORN"), 222}, /* latin capital letter THORN, U+00DE ISOlat1 */
! 107: {TEXT("Tau"), 932}, /* greek capital letter tau, U+03A4 */
! 108: {TEXT("Theta"), 920}, /* greek capital letter theta, U+0398 ISOgrk3 */
! 109: {TEXT("Uacute"), 218}, /* latin capital letter U with acute, U+00DA ISOlat1 */
! 110: {TEXT("Ucirc"), 219}, /* latin capital letter U with circumflex, U+00DB ISOlat1 */
! 111: {TEXT("Ugrave"), 217}, /* latin capital letter U with grave, U+00D9 ISOlat1 */
! 112: {TEXT("Upsilon"), 933}, /* greek capital letter upsilon, U+03A5 ISOgrk3 */
! 113: {TEXT("Uuml"), 220}, /* latin capital letter U with diaeresis, U+00DC ISOlat1 */
! 114: {TEXT("Xi"), 926}, /* greek capital letter xi, U+039E ISOgrk3 */
! 115: {TEXT("Yacute"), 221}, /* latin capital letter Y with acute, U+00DD ISOlat1 */
! 116: {TEXT("Yuml"), 376}, /* latin capital letter Y with diaeresis, U+0178 ISOlat2 */
! 117: {TEXT("Zeta"), 918}, /* greek capital letter zeta, U+0396 */
! 118: {TEXT("aacute"), 225}, /* latin small letter a with acute, U+00E1 ISOlat1 */
! 119: {TEXT("acirc"), 226}, /* latin small letter a with circumflex, U+00E2 ISOlat1 */
! 120: {TEXT("acute"), 180}, /* acute accent = spacing acute, U+00B4 ISOdia */
! 121: {TEXT("aelig"), 230}, /* latin small letter ae = */
! 122: /* latin small ligature ae, U+00E6 ISOlat1 */
! 123: {TEXT("agrave"), 224}, /* latin small letter a with grave = */
! 124: /* latin small letter a grave, U+00E0 ISOlat1 */
! 125: {TEXT("alefsym"), 8501},/* alef symbol = first transfinite cardinal, U+2135 NEW */
! 126: {TEXT("alpha"), 945}, /* greek small letter alpha, U+03B1 ISOgrk3 */
! 127: {TEXT("amp"), 38}, /* ampersand, U+0026 ISOnum */
! 128: {TEXT("and"), 8743}, /* logical and = wedge, U+2227 ISOtech */
! 129: {TEXT("ang"), 8736}, /* angle, U+2220 ISOamso */
! 130: {TEXT("aring"), 229}, /* latin small letter a with ring above = */
! 131: /* latin small letter a ring, U+00E5 ISOlat1 */
! 132: {TEXT("asymp"), 8776}, /* almost equal to = asymptotic to, U+2248 ISOamsr */
! 133: {TEXT("atilde"), 227}, /* latin small letter a with tilde, U+00E3 ISOlat1 */
! 134: {TEXT("auml"), 228}, /* latin small letter a with diaeresis, U+00E4 ISOlat1 */
! 135: {TEXT("bdquo"), 8222}, /* double low-9 quotation mark, U+201E NEW */
! 136: {TEXT("beta"), 946}, /* greek small letter beta, U+03B2 ISOgrk3 */
! 137: {TEXT("brvbar"), 166}, /* broken bar = broken vertical bar, U+00A6 ISOnum */
! 138: {TEXT("bull"), 8226}, /* bullet = black small circle, U+2022 ISOpub */
! 139: {TEXT("cap"), 8745}, /* intersection = cap, U+2229 ISOtech */
! 140: {TEXT("ccedil"), 231}, /* latin small letter c with cedilla, U+00E7 ISOlat1 */
! 141: {TEXT("cedil"), 184}, /* cedilla = spacing cedilla, U+00B8 ISOdia */
! 142: {TEXT("cent"), 162}, /* cent sign, U+00A2 ISOnum */
! 143: {TEXT("chi"), 967}, /* greek small letter chi, U+03C7 ISOgrk3 */
! 144: {TEXT("circ"), 710}, /* modifier letter circumflex accent, U+02C6 ISOpub */
! 145: {TEXT("clubs"), 9827}, /* black club suit = shamrock, U+2663 ISOpub */
! 146: {TEXT("cong"), 8773}, /* approximately equal to, U+2245 ISOtech */
! 147: {TEXT("copy"), 169}, /* copyright sign, U+00A9 ISOnum */
! 148: {TEXT("crarr"), 8629}, /* downwards arrow with corner leftwards = */
! 149: /* carriage return, U+21B5 NEW */
! 150: {TEXT("cup"), 8746}, /* union = cup, U+222A ISOtech */
! 151: {TEXT("curren"), 164}, /* currency sign, U+00A4 ISOnum */
! 152: {TEXT("dArr"), 8659}, /* downwards double arrow, U+21D3 ISOamsa */
! 153: {TEXT("dagger"), 8224}, /* dagger, U+2020 ISOpub */
! 154: {TEXT("darr"), 8595}, /* downwards arrow, U+2193 ISOnum */
! 155: {TEXT("deg"), 176}, /* degree sign, U+00B0 ISOnum */
! 156: {TEXT("delta"), 948}, /* greek small letter delta, U+03B4 ISOgrk3 */
! 157: {TEXT("diams"), 9830}, /* black diamond suit, U+2666 ISOpub */
! 158: {TEXT("divide"), 247}, /* division sign, U+00F7 ISOnum */
! 159: {TEXT("eacute"), 233}, /* latin small letter e with acute, U+00E9 ISOlat1 */
! 160: {TEXT("ecirc"), 234}, /* latin small letter e with circumflex, U+00EA ISOlat1 */
! 161: {TEXT("egrave"), 232}, /* latin small letter e with grave, U+00E8 ISOlat1 */
! 162: {TEXT("empty"), 8709}, /* empty set = null set = diameter, U+2205 ISOamso */
! 163: {TEXT("emsp"), 8195}, /* em space, U+2003 ISOpub */
! 164: {TEXT("ensp"), 8194}, /* en space, U+2002 ISOpub */
! 165: {TEXT("epsilon"), 949}, /* greek small letter epsilon, U+03B5 ISOgrk3 */
! 166: {TEXT("equiv"), 8801}, /* identical to, U+2261 ISOtech */
! 167: {TEXT("eta"), 951}, /* greek small letter eta, U+03B7 ISOgrk3 */
! 168: {TEXT("eth"), 240}, /* latin small letter eth, U+00F0 ISOlat1 */
! 169: {TEXT("euml"), 235}, /* latin small letter e with diaeresis, U+00EB ISOlat1 */
! 170: {TEXT("euro"), 8364}, /* euro sign, U+20AC NEW */
! 171: {TEXT("exist"), 8707}, /* there exists, U+2203 ISOtech */
! 172: {TEXT("fnof"), 402}, /* latin small f with hook = function = */
! 173: /* florin, U+0192 ISOtech */
! 174: {TEXT("forall"), 8704}, /* for all, U+2200 ISOtech */
! 175: {TEXT("frac12"), 189}, /* vulgar fraction one half = */
! 176: /*fraction one half, U+00BD ISOnum */
! 177: {TEXT("frac14"), 188}, /* vulgar fraction one quarter = */
! 178: /* fraction one quarter, U+00BC ISOnum */
! 179: {TEXT("frac34"), 190}, /* vulgar fraction three quarters = */
! 180: /* fraction three quarters, U+00BE ISOnum */
! 181: {TEXT("frasl"), 8260}, /* fraction slash, U+2044 NEW */
! 182: {TEXT("gamma"), 947}, /* greek small letter gamma, U+03B3 ISOgrk3 */
! 183: {TEXT("ge"), 8805}, /* greater-than or equal to, U+2265 ISOtech */
! 184: {TEXT("gt"), 62}, /* greater-than sign, U+003E ISOnum */
! 185: {TEXT("hArr"), 8660}, /* left right double arrow, U+21D4 ISOamsa */
! 186: {TEXT("harr"), 8596}, /* left right arrow, U+2194 ISOamsa */
! 187: {TEXT("hearts"), 9829}, /* black heart suit = valentine, U+2665 ISOpub */
! 188: {TEXT("hellip"), 8230}, /* horizontal ellipsis = three dot leader, U+2026 ISOpub */
! 189: {TEXT("hyphen"), 173}, /* hyphen = discretionary hyphen, U+00AD ISOnum */
! 190: {TEXT("iacute"), 237}, /* latin small letter i with acute, U+00ED ISOlat1 */
! 191: {TEXT("icirc"), 238}, /* latin small letter i with circumflex, U+00EE ISOlat1 */
! 192: {TEXT("iexcl"), 161}, /* inverted exclamation mark, U+00A1 ISOnum */
! 193: {TEXT("igrave"), 236}, /* latin small letter i with grave, U+00EC ISOlat1 */
! 194: {TEXT("image"), 8465}, /* blackletter capital I = imaginary part, U+2111 ISOamso */
! 195: {TEXT("infin"), 8734}, /* infinity, U+221E ISOtech */
! 196: {TEXT("int"), 8747}, /* integral, U+222B ISOtech */
! 197: {TEXT("iota"), 953}, /* greek small letter iota, U+03B9 ISOgrk3 */
! 198: {TEXT("iquest"), 191}, /* inverted question mark = */
! 199: /* turned question mark, U+00BF ISOnum */
! 200: {TEXT("isin"), 8712}, /* element of, U+2208 ISOtech */
! 201: {TEXT("iuml"), 239}, /* latin small letter i with diaeresis, U+00EF ISOlat1 */
! 202: {TEXT("kappa"), 954}, /* greek small letter kappa, U+03BA ISOgrk3 */
! 203: {TEXT("lArr"), 8656}, /* leftwards double arrow, U+21D0 ISOtech */
! 204: {TEXT("lambda"), 955}, /* greek small letter lambda, U+03BB ISOgrk3 */
! 205: {TEXT("lang"), 9001}, /* left-pointing angle bracket = bra, U+2329 ISOtech */
! 206: {TEXT("laquo"), 171}, /* left-pointing double angle quotation mark = */
! 207: /* left pointing guillemet, U+00AB ISOnum */
! 208: {TEXT("larr"), 8592}, /* leftwards arrow, U+2190 ISOnum */
! 209: {TEXT("lceil"), 8968}, /* left ceiling = apl upstile, U+2308 ISOamsc */
! 210: {TEXT("ldquo"), 8220}, /* left double quotation mark, U+201C ISOnum */
! 211: {TEXT("le"), 8804}, /* less-than or equal to, U+2264 ISOtech */
! 212: {TEXT("lfloor"), 8970}, /* left floor = apl downstile, U+230A ISOamsc */
! 213: {TEXT("lowast"), 8727}, /* asterisk operator, U+2217 ISOtech */
! 214: {TEXT("loz"), 9674}, /* lozenge, U+25CA ISOpub */
! 215: {TEXT("lrm"), 8206}, /* left-to-right mark, U+200E NEW RFC 2070 */
! 216: {TEXT("lsaquo"), 8249}, /* single left-pointing angle quotation mark, */
! 217: /* U+2039 ISO proposed */
! 218: {TEXT("lsquo"), 8216}, /* left single quotation mark, U+2018 ISOnum */
! 219: {TEXT("lt"), 60}, /* less-than sign, U+003C ISOnum */
! 220: {TEXT("macr"), 175}, /* macron = spacing macron = overline = APL overbar, */
! 221: /* U+00AF ISOdia */
! 222: {TEXT("mdash"), 8212}, /* em dash, U+2014 ISOpub */
! 223: {TEXT("micro"), 181}, /* micro sign, U+00B5 ISOnum */
! 224: {TEXT("middot"), 183}, /* middle dot = Georgian comma = */
! 225: /* Greek middle dot, U+00B7 ISOnum */
! 226: {TEXT("minus"), 8722}, /* minus sign, U+2212 ISOtech */
! 227: {TEXT("mu"), 956}, /* greek small letter mu, U+03BC ISOgrk3 */
! 228: {TEXT("nabla"), 8711}, /* nabla = backward difference, U+2207 ISOtech */
! 229: {TEXT("nbsp"), 160}, /* no-break space = non-breaking space, U+00A0 ISOnum */
! 230: {TEXT("ndash"), 8211}, /* en dash, U+2013 ISOpub */
! 231: {TEXT("ne"), 8800}, /* not equal to, U+2260 ISOtech */
! 232: {TEXT("ni"), 8715}, /* contains as member, U+220B ISOtech */
! 233: {TEXT("not"), 172}, /* not sign, U+00AC ISOnum */
! 234: {TEXT("notin"), 8713}, /* not an element of, U+2209 ISOtech */
! 235: {TEXT("nsub"), 8836}, /* not a subset of, U+2284 ISOamsn */
! 236: {TEXT("ntilde"), 241}, /* latin small letter n with tilde, U+00F1 ISOlat1 */
! 237: {TEXT("nu"), 957}, /* greek small letter nu, U+03BD ISOgrk3 */
! 238: {TEXT("oacute"), 243}, /* latin small letter o with acute, U+00F3 ISOlat1 */
! 239: {TEXT("ocirc"), 244}, /* latin small letter o with circumflex, U+00F4 ISOlat1 */
! 240: {TEXT("oelig"), 339}, /* latin small ligature oe, U+0153 ISOlat2 */
! 241: {TEXT("ograve"), 242}, /* latin small letter o with grave, U+00F2 ISOlat1 */
! 242: {TEXT("oline"), 8254}, /* overline = spacing overscore, U+203E NEW */
! 243: {TEXT("omega"), 969}, /* greek small letter omega, U+03C9 ISOgrk3 */
! 244: {TEXT("omicron"), 959}, /* greek small letter omicron, U+03BF NEW */
! 245: {TEXT("oplus"), 8853}, /* circled plus = direct sum, U+2295 ISOamsb */
! 246: {TEXT("or"), 8744}, /* logical or = vee, U+2228 ISOtech */
! 247: {TEXT("ordf"), 170}, /* feminine ordinal indicator, U+00AA ISOnum */
! 248: {TEXT("ordm"), 186}, /* masculine ordinal indicator, U+00BA ISOnum */
! 249: {TEXT("oslash"), 248}, /* latin small letter o with stroke, = */
! 250: /* latin small letter o slash, U+00F8 ISOlat1 */
! 251: {TEXT("otilde"), 245}, /* latin small letter o with tilde, U+00F5 ISOlat1 */
! 252: {TEXT("otimes"), 8855}, /* circled times = vector product, U+2297 ISOamsb */
! 253: {TEXT("ouml"), 246}, /* latin small letter o with diaeresis, U+00F6 ISOlat1 */
! 254: {TEXT("para"), 182}, /* pilcrow sign = paragraph sign, U+00B6 ISOnum */
! 255: {TEXT("part"), 8706}, /* partial differential, U+2202 ISOtech */
! 256: {TEXT("permil"), 8240}, /* per mille sign, U+2030 ISOtech */
! 257: {TEXT("perp"), 8869}, /* up tack = orthogonal to = perpendicular, U+22A5 ISOtech */
! 258: {TEXT("phi"), 966}, /* greek small letter phi, U+03C6 ISOgrk3 */
! 259: {TEXT("pi"), 960}, /* greek small letter pi, U+03C0 ISOgrk3 */
! 260: {TEXT("piv"), 982}, /* greek pi symbol, U+03D6 ISOgrk3 */
! 261: {TEXT("plusmn"), 177}, /* plus-minus sign = plus-or-minus sign, U+00B1 ISOnum */
! 262: {TEXT("pound"), 163}, /* pound sign, U+00A3 ISOnum */
! 263: {TEXT("prime"), 8242}, /* prime = minutes = feet, U+2032 ISOtech */
! 264: {TEXT("prod"), 8719}, /* n-ary product = product sign, U+220F ISOamsb */
! 265: {TEXT("prop"), 8733}, /* proportional to, U+221D ISOtech */
! 266: {TEXT("psi"), 968}, /* greek small letter psi, U+03C8 ISOgrk3 */
! 267: {TEXT("quot"), 34}, /* quotation mark = APL quote, U+0022 ISOnum */
! 268: {TEXT("rArr"), 8658}, /* rightwards double arrow, U+21D2 ISOtech */
! 269: {TEXT("radic"), 8730}, /* square root = radical sign, U+221A ISOtech */
! 270: {TEXT("rang"), 9002}, /* right-pointing angle bracket = ket, U+232A ISOtech */
! 271: {TEXT("raquo"), 187}, /* right-pointing double angle quotation mark = */
! 272: /* right pointing guillemet, U+00BB ISOnum */
! 273: {TEXT("rarr"), 8594}, /* rightwards arrow, U+2192 ISOnum */
! 274: {TEXT("rceil"), 8969}, /* right ceiling, U+2309 ISOamsc */
! 275: {TEXT("rdquo"), 8221}, /* right double quotation mark, U+201D ISOnum */
! 276: {TEXT("real"), 8476}, /* blackletter capital R = real part symbol, U+211C ISOamso */
! 277: {TEXT("reg"), 174}, /* registered sign = registered trade mark sign, */
! 278: /* U+00AE ISOnum */
! 279: {TEXT("rfloor"), 8971}, /* right floor, U+230B ISOamsc */
! 280: {TEXT("rho"), 961}, /* greek small letter rho, U+03C1 ISOgrk3 */
! 281: {TEXT("rlm"), 8207}, /* right-to-left mark, U+200F NEW RFC 2070 */
! 282: {TEXT("rsaquo"), 8250}, /* single right-pointing angle quotation mark, */
! 283: /* U+203A ISO proposed */
! 284: {TEXT("rsquo"), 8217}, /* right single quotation mark, U+2019 ISOnum */
! 285: {TEXT("sbquo"), 8218}, /* single low-9 quotation mark, U+201A NEW */
! 286: {TEXT("scaron"), 353}, /* latin small letter s with caron, U+0161 ISOlat2 */
! 287: {TEXT("sdot"), 8901}, /* dot operator, U+22C5 ISOamsb */
! 288: {TEXT("sect"), 167}, /* section sign, U+00A7 ISOnum */
! 289: {TEXT("shy"), 173}, /* soft hyphen = discretionary hyphen, U+00AD ISOnum */
! 290: {TEXT("sigma"), 963}, /* greek small letter sigma, U+03C3 ISOgrk3 */
! 291: {TEXT("sigmaf"), 962}, /* greek small letter final sigma, U+03C2 ISOgrk3 */
! 292: {TEXT("sim"), 8764}, /* tilde operator = varies with = similar to, U+223C ISOtech */
! 293: {TEXT("spades"), 9824}, /* black spade suit, U+2660 ISOpub */
! 294: {TEXT("sub"), 8834}, /* subset of, U+2282 ISOtech */
! 295: {TEXT("sube"), 8838}, /* subset of or equal to, U+2286 ISOtech */
! 296: {TEXT("sum"), 8721}, /* n-ary sumation, U+2211 ISOamsb */
! 297: {TEXT("sup"), 8835}, /* superset of, U+2283 ISOtech */
! 298: {TEXT("sup1"), 185}, /* superscript one = superscript digit one, U+00B9 ISOnum */
! 299: {TEXT("sup2"), 178}, /* superscript two = superscript digit two = squared, */
! 300: /* U+00B2 ISOnum */
! 301: {TEXT("sup3"), 179}, /* superscript three = superscript digit three = cubed, */
! 302: /* U+00B3 ISOnum */
! 303: {TEXT("supe"), 8839}, /* superset of or equal to, U+2287 ISOtech */
! 304: {TEXT("szlig"), 223}, /* latin small letter sharp s = ess-zed, U+00DF ISOlat1 */
! 305: {TEXT("tau"), 964}, /* greek small letter tau, U+03C4 ISOgrk3 */
! 306: {TEXT("there4"), 8756}, /* therefore, U+2234 ISOtech */
! 307: {TEXT("theta"), 952}, /* greek small letter theta, U+03B8 ISOgrk3 */
! 308: {TEXT("thetasym"), 977},/* greek small letter theta symbol, U+03D1 NEW */
! 309: {TEXT("thinsp"), 8201}, /* thin space, U+2009 ISOpub */
! 310: {TEXT("thorn"), 254}, /* latin small letter thorn with, U+00FE ISOlat1 */
! 311: {TEXT("tilde"), 732}, /* small tilde, U+02DC ISOdia */
! 312: {TEXT("times"), 215}, /* multiplication sign, U+00D7 ISOnum */
! 313: {TEXT("trade"), 8482}, /* trade mark sign, U+2122 ISOnum */
! 314: {TEXT("uArr"), 8657}, /* upwards double arrow, U+21D1 ISOamsa */
! 315: {TEXT("uacute"), 250}, /* latin small letter u with acute, U+00FA ISOlat1 */
! 316: {TEXT("uarr"), 8593}, /* upwards arrow, U+2191 ISOnum*/
! 317: {TEXT("ucirc"), 251}, /* latin small letter u with circumflex, U+00FB ISOlat1 */
! 318: {TEXT("ugrave"), 249}, /* latin small letter u with grave, U+00F9 ISOlat1 */
! 319: {TEXT("uml"), 168}, /* diaeresis = spacing diaeresis, U+00A8 ISOdia */
! 320: {TEXT("upsih"), 978}, /* greek upsilon with hook symbol, U+03D2 NEW */
! 321: {TEXT("upsilon"), 965}, /* greek small letter upsilon, U+03C5 ISOgrk3 */
! 322: {TEXT("uuml"), 252}, /* latin small letter u with diaeresis, U+00FC ISOlat1 */
! 323: {TEXT("weierp"), 8472}, /* script capital P = power set = Weierstrass p, */
! 324: /* U+2118 ISOamso */
! 325: {TEXT("xi"), 958}, /* greek small letter xi, U+03BE ISOgrk3 */
! 326: {TEXT("yacute"), 253}, /* latin small letter y with acute, U+00FD ISOlat1 */
! 327: {TEXT("yen"), 165}, /* yen sign = yuan sign, U+00A5 ISOnum */
! 328: {TEXT("yuml"), 255}, /* latin small letter y with diaeresis, U+00FF ISOlat1 */
! 329: {TEXT("zeta"), 950}, /* greek small letter zeta, U+03B6 ISOgrk3 */
! 330: {TEXT("zwj"), 8205}, /* zero width joiner, U+200D NEW RFC 2070 */
! 331: {TEXT("zwnj"), 8204}, /* zero width non-joiner, U+200C NEW RFC 2070 */
! 332: {TEXT("zzzz"), 0} /* this last entry is required */
! 333: };
! 334:
! 335: typedef struct _ElemToBeChecked *PtrElemToBeChecked;
! 336: typedef struct _ElemToBeChecked
! 337: {
! 338: Element Elem; /* the element to be checked */
! 339: PtrElemToBeChecked nextElemToBeChecked;
! 340: }
! 341: ElemToBeChecked;
! 342:
! 343:
! 344: /* elements that cannot contain text as immediate children.
! 345: When some text is present in the HTML file it must be surrounded
! 346: by a Thot Paragraph (or Pseudo_paragraph) element */
! 347: static int NoTextChild[] =
! 348: {
! 349: HTML_EL_HTML, HTML_EL_HEAD, HTML_EL_BODY,
! 350: HTML_EL_Definition_List, HTML_EL_Block_Quote, HTML_EL_Directory,
! 351: HTML_EL_Form, HTML_EL_Menu, HTML_EL_FIELDSET,
! 352: HTML_EL_Numbered_List, HTML_EL_Option_Menu,
! 353: HTML_EL_Unnumbered_List, HTML_EL_Definition, HTML_EL_List_Item,
! 354: HTML_EL_MAP, HTML_EL_Applet,
! 355: HTML_EL_Object, HTML_EL_IFRAME, HTML_EL_NOFRAMES,
! 356: HTML_EL_Division, HTML_EL_Center, HTML_EL_NOSCRIPT,
! 357: HTML_EL_Data_cell, HTML_EL_Heading_cell,
! 358: #ifdef GRAPHML
! 359: HTML_EL_XMLGraphics,
! 360: #endif
! 361: 0};
! 362:
! 363: /* empty elements */
! 364: static int EmptyElement[] =
! 365: {
! 366: HTML_EL_AREA,
! 367: HTML_EL_BASE,
! 368: HTML_EL_BaseFont,
! 369: HTML_EL_BR,
! 370: HTML_EL_COL,
! 371: HTML_EL_FRAME,
! 372: HTML_EL_Horizontal_Rule,
! 373: HTML_EL_Input,
! 374: HTML_EL_ISINDEX,
! 375: HTML_EL_LINK,
! 376: HTML_EL_META,
! 377: HTML_EL_Parameter,
! 378: HTML_EL_PICTURE_UNIT,
! 379: 0};
! 380:
! 381: /* character level elements */
! 382: static int CharLevelElement[] =
! 383: {
! 384: HTML_EL_TEXT_UNIT, HTML_EL_PICTURE_UNIT,
! 385: HTML_EL_Anchor,
! 386: HTML_EL_Teletype_text, HTML_EL_Italic_text, HTML_EL_Bold_text,
! 387: HTML_EL_Underlined_text, HTML_EL_Struck_text, HTML_EL_Big_text,
! 388: HTML_EL_Small_text,
! 389: HTML_EL_Emphasis, HTML_EL_Strong, HTML_EL_Def, HTML_EL_Code, HTML_EL_Sample,
! 390: HTML_EL_Keyboard, HTML_EL_Variable, HTML_EL_Cite, HTML_EL_ABBR,
! 391: HTML_EL_ACRONYM,
! 392: HTML_EL_Font_, HTML_EL_Quotation, HTML_EL_Subscript, HTML_EL_Superscript,
! 393: HTML_EL_Span, HTML_EL_BDO, HTML_EL_INS, HTML_EL_DEL,
! 394: HTML_EL_Math,
! 395: HTML_EL_Input,
! 396: HTML_EL_Option, HTML_EL_OptGroup, HTML_EL_Option_Menu,
! 397: HTML_EL_Text_Input, HTML_EL_Password_Input, HTML_EL_File_Input,
! 398: HTML_EL_Text_With_Frame, HTML_EL_Inserted_Text, HTML_EL_Text_Area,
! 399: HTML_EL_Button_Input, HTML_EL_BUTTON,
! 400: HTML_EL_LABEL,
! 401: HTML_EL_BR,
! 402: 0};
! 403:
! 404: /* block level elements */
! 405: static int BlockLevelElement[] =
! 406: {
! 407: HTML_EL_Paragraph, HTML_EL_Pseudo_paragraph,
! 408: HTML_EL_H1, HTML_EL_H2, HTML_EL_H3, HTML_EL_H4, HTML_EL_H5, HTML_EL_H6,
! 409: HTML_EL_TITLE, HTML_EL_Term, HTML_EL_CAPTION, HTML_EL_LEGEND,
! 410: 0};
! 411:
! 412: /* start tags that imply the end of a current element */
! 413: /* any tag of each line implies the end of the current element if the type of
! 414: that element is in the same line */
! 415: typedef char oneLine[100];
! 416: static oneLine EquivEndingElem[] =
! 417: {
! 418: "dt dd li option",
! 419: "h1 h2 h3 h4 h5 h6",
! 420: "address pre listing xmp",
! 421: ""
! 422: };
! 423: /* acording the HTML DTD, HR should be added to the 2nd line above, as it */
! 424: /* is not allowed within a H1, H2, H3, etc. But we should tolerate that case */
! 425: /* because many documents contain rules in headings... */
! 426:
! 427: /* start tags that imply the end of current element */
! 428: static oneLine StartTagEndingElem[] =
! 429: {
! 430: "form closes form p p* hr h1 h2 h3 h4 h5 h6 dl ul ol menu dir address pre listing xmp head",
! 431: "head closes p p*",
! 432: "title closes p p*",
! 433: "body closes head style script title p p*",
! 434: "li closes p p* h1 h2 h3 h4 h5 h6 dl address pre listing xmp head",
! 435: "hr closes p p* head",
! 436: "h1 closes p p* head",
! 437: "h2 closes p p* head",
! 438: "h3 closes p p* head",
! 439: "h4 closes p p* head",
! 440: "h5 closes p p* head",
! 441: "h6 closes p p* head",
! 442: "dir closes p p* head",
! 443: "address closes p p* head ul",
! 444: "pre closes p p* head ul",
! 445: "listing closes p p* head",
! 446: "xmp closes p p* head",
! 447: "blockquote closes p p* head",
! 448: "dl closes p p* dt menu dir address pre listing xmp head",
! 449: "dt closes p p* menu dir address pre listing xmp head",
! 450: "dd closes p p* menu dir address pre listing xmp head",
! 451: "ul closes p p* head ol menu dir address pre listing xmp",
! 452: "ol closes p p* head ul",
! 453: "menu closes p p* head ul",
! 454: "p closes p p* head h1 h2 h3 h4 h5 h6",
! 455: "p* closes p p* head",
! 456: "div closes p p* head",
! 457: "noscript closes p p* head",
! 458: "center closes font b i p p* head",
! 459: "a closes a",
! 460: "caption closes p p*",
! 461: "colgroup closes caption colgroup col p p*",
! 462: "col closes caption col p p*",
! 463: "table closes p p* head h1 h2 h3 h4 h5 h6 pre listing xmp a",
! 464: "th closes th td",
! 465: "td closes th td",
! 466: "tr closes th td tr caption col colgroup",
! 467: "thead closes caption col colgroup",
! 468: "tfoot closes th td tr caption col colgroup thead tbody",
! 469: "tbody closes th td tr caption col colgroup thead tfoot tbody",
! 470: "optgroup closes option",
! 471: "fieldset closes legend p p* head h1 h2 h3 h4 h5 h6 pre listing xmp a",
! 472: ""
! 473: };
! 474:
! 475:
! 476: /* mapping table of HTML attribute values */
! 477:
! 478: static AttrValueMapping HTMLAttrValueMappingTable[] =
! 479: {
! 480: {HTML_ATTR_dir, TEXT("ltr"), HTML_ATTR_dir_VAL_ltr},
! 481: {HTML_ATTR_dir, TEXT("rtl"), HTML_ATTR_dir_VAL_rtl},
! 482:
! 483: {HTML_ATTR_TextAlign, TEXT("left"), HTML_ATTR_TextAlign_VAL_left_},
! 484: {HTML_ATTR_TextAlign, TEXT("center"), HTML_ATTR_TextAlign_VAL_center_},
! 485: {HTML_ATTR_TextAlign, TEXT("right"), HTML_ATTR_TextAlign_VAL_right_},
! 486: {HTML_ATTR_TextAlign, TEXT("justify"), HTML_ATTR_TextAlign_VAL_justify_},
! 487:
! 488: {HTML_ATTR_Align, TEXT("left"), HTML_ATTR_Align_VAL_left_},
! 489: {HTML_ATTR_Align, TEXT("center"), HTML_ATTR_Align_VAL_center_},
! 490: {HTML_ATTR_Align, TEXT("right"), HTML_ATTR_Align_VAL_right_},
! 491:
! 492: {HTML_ATTR_LAlign, TEXT("top"), HTML_ATTR_LAlign_VAL_Top_},
! 493: {HTML_ATTR_LAlign, TEXT("bottom"), HTML_ATTR_LAlign_VAL_Bottom_},
! 494: {HTML_ATTR_LAlign, TEXT("left"), HTML_ATTR_LAlign_VAL_Left_},
! 495: {HTML_ATTR_LAlign, TEXT("right"), HTML_ATTR_LAlign_VAL_Right_},
! 496:
! 497: {HTML_ATTR_Clear, TEXT("left"), HTML_ATTR_Clear_VAL_Left_},
! 498: {HTML_ATTR_Clear, TEXT("right"), HTML_ATTR_Clear_VAL_Right_},
! 499: {HTML_ATTR_Clear, TEXT("all"), HTML_ATTR_Clear_VAL_All_},
! 500: {HTML_ATTR_Clear, TEXT("none"), HTML_ATTR_Clear_VAL_None_},
! 501:
! 502: {HTML_ATTR_NumberStyle, TEXT("1"), HTML_ATTR_NumberStyle_VAL_Arabic_},
! 503: {HTML_ATTR_NumberStyle, TEXT("a"), HTML_ATTR_NumberStyle_VAL_LowerAlpha},
! 504: {HTML_ATTR_NumberStyle, TEXT("A"), HTML_ATTR_NumberStyle_VAL_UpperAlpha},
! 505: {HTML_ATTR_NumberStyle, TEXT("i"), HTML_ATTR_NumberStyle_VAL_LowerRoman},
! 506: {HTML_ATTR_NumberStyle, TEXT("I"), HTML_ATTR_NumberStyle_VAL_UpperRoman},
! 507:
! 508: {HTML_ATTR_BulletStyle, TEXT("disc"), HTML_ATTR_BulletStyle_VAL_disc},
! 509: {HTML_ATTR_BulletStyle, TEXT("square"), HTML_ATTR_BulletStyle_VAL_square},
! 510: {HTML_ATTR_BulletStyle, TEXT("circle"), HTML_ATTR_BulletStyle_VAL_circle},
! 511:
! 512: {HTML_ATTR_ItemStyle, TEXT("1"), HTML_ATTR_ItemStyle_VAL_Arabic_},
! 513: {HTML_ATTR_ItemStyle, TEXT("a"), HTML_ATTR_ItemStyle_VAL_LowerAlpha},
! 514: {HTML_ATTR_ItemStyle, TEXT("A"), HTML_ATTR_ItemStyle_VAL_UpperAlpha},
! 515: {HTML_ATTR_ItemStyle, TEXT("i"), HTML_ATTR_ItemStyle_VAL_LowerRoman},
! 516: {HTML_ATTR_ItemStyle, TEXT("I"), HTML_ATTR_ItemStyle_VAL_UpperRoman},
! 517: {HTML_ATTR_ItemStyle, TEXT("disc"), HTML_ATTR_ItemStyle_VAL_disc},
! 518: {HTML_ATTR_ItemStyle, TEXT("square"), HTML_ATTR_ItemStyle_VAL_square},
! 519: {HTML_ATTR_ItemStyle, TEXT("circle"), HTML_ATTR_ItemStyle_VAL_circle},
! 520:
! 521: {HTML_ATTR_Button_type, TEXT("button"), HTML_ATTR_Button_type_VAL_button},
! 522: {HTML_ATTR_Button_type, TEXT("submit"), HTML_ATTR_Button_type_VAL_submit},
! 523: {HTML_ATTR_Button_type, TEXT("reset"), HTML_ATTR_Button_type_VAL_reset},
! 524:
! 525: {HTML_ATTR_mode, TEXT("display"), HTML_ATTR_mode_VAL_display},
! 526: {HTML_ATTR_mode, TEXT("inline"), HTML_ATTR_mode_VAL_inline_math},
! 527:
! 528: {HTML_ATTR_frame, TEXT("void"), HTML_ATTR_frame_VAL_void},
! 529: {HTML_ATTR_frame, TEXT("above"), HTML_ATTR_frame_VAL_above},
! 530: {HTML_ATTR_frame, TEXT("below"), HTML_ATTR_frame_VAL_below},
! 531: {HTML_ATTR_frame, TEXT("hsides"), HTML_ATTR_frame_VAL_hsides},
! 532: {HTML_ATTR_frame, TEXT("lhs"), HTML_ATTR_frame_VAL_lhs},
! 533: {HTML_ATTR_frame, TEXT("rhs"), HTML_ATTR_frame_VAL_rhs},
! 534: {HTML_ATTR_frame, TEXT("vsides"), HTML_ATTR_frame_VAL_vsides},
! 535: {HTML_ATTR_frame, TEXT("box"), HTML_ATTR_frame_VAL_box},
! 536: {HTML_ATTR_frame, TEXT("border"), HTML_ATTR_frame_VAL_border},
! 537:
! 538: {HTML_ATTR_frameborder, TEXT("0"), HTML_ATTR_frameborder_VAL_Border0},
! 539: {HTML_ATTR_frameborder, TEXT("1"), HTML_ATTR_frameborder_VAL_Border1},
! 540:
! 541: {HTML_ATTR_scrolling, TEXT("yes"), HTML_ATTR_scrolling_VAL_Yes_},
! 542: {HTML_ATTR_scrolling, TEXT("no"), HTML_ATTR_scrolling_VAL_No_},
! 543: {HTML_ATTR_scrolling, TEXT("auto"), HTML_ATTR_scrolling_VAL_auto_},
! 544:
! 545: {HTML_ATTR_rules_, TEXT("none"), HTML_ATTR_rules__VAL_none_},
! 546: {HTML_ATTR_rules_, TEXT("groups"), HTML_ATTR_rules__VAL_groups},
! 547: {HTML_ATTR_rules_, TEXT("rows"), HTML_ATTR_rules__VAL_rows},
! 548: {HTML_ATTR_rules_, TEXT("cols"), HTML_ATTR_rules__VAL_cols},
! 549: {HTML_ATTR_rules_, TEXT("all"), HTML_ATTR_rules__VAL_all},
! 550:
! 551: {HTML_ATTR_Cell_align, TEXT("left"), HTML_ATTR_Cell_align_VAL_Cell_left},
! 552: {HTML_ATTR_Cell_align, TEXT("center"), HTML_ATTR_Cell_align_VAL_Cell_center},
! 553: {HTML_ATTR_Cell_align, TEXT("right"), HTML_ATTR_Cell_align_VAL_Cell_right},
! 554: {HTML_ATTR_Cell_align, TEXT("justify"), HTML_ATTR_Cell_align_VAL_Cell_justify},
! 555: {HTML_ATTR_Cell_align, TEXT("char"), HTML_ATTR_Cell_align_VAL_Cell_char},
! 556:
! 557: {HTML_ATTR_Alignment, TEXT("top"), HTML_ATTR_Alignment_VAL_Top_},
! 558: {HTML_ATTR_Alignment, TEXT("middle"), HTML_ATTR_Alignment_VAL_Middle_},
! 559: {HTML_ATTR_Alignment, TEXT("bottom"), HTML_ATTR_Alignment_VAL_Bottom_},
! 560: {HTML_ATTR_Alignment, TEXT("left"), HTML_ATTR_Alignment_VAL_Left_},
! 561: {HTML_ATTR_Alignment, TEXT("right"), HTML_ATTR_Alignment_VAL_Right_},
! 562:
! 563: {HTML_ATTR_METHOD, TEXT("get"), HTML_ATTR_METHOD_VAL_Get_},
! 564: {HTML_ATTR_METHOD, TEXT("post"), HTML_ATTR_METHOD_VAL_Post_},
! 565:
! 566: {HTML_ATTR_Position, TEXT("top"), HTML_ATTR_Position_VAL_Position_top},
! 567: {HTML_ATTR_Position, TEXT("bottom"), HTML_ATTR_Position_VAL_Position_bottom},
! 568: {HTML_ATTR_Position, TEXT("left"), HTML_ATTR_Position_VAL_Position_left},
! 569: {HTML_ATTR_Position, TEXT("right"), HTML_ATTR_Position_VAL_Position_right},
! 570:
! 571: {HTML_ATTR_Row_valign, TEXT("top"), HTML_ATTR_Row_valign_VAL_Row_top},
! 572: {HTML_ATTR_Row_valign, TEXT("middle"), HTML_ATTR_Row_valign_VAL_Row_middle},
! 573: {HTML_ATTR_Row_valign, TEXT("bottom"), HTML_ATTR_Row_valign_VAL_Row_bottom},
! 574: {HTML_ATTR_Row_valign, TEXT("baseline"), HTML_ATTR_Row_valign_VAL_Row_baseline},
! 575:
! 576: {HTML_ATTR_Cell_valign, TEXT("top"), HTML_ATTR_Cell_valign_VAL_Cell_top},
! 577: {HTML_ATTR_Cell_valign, TEXT("middle"), HTML_ATTR_Cell_valign_VAL_Cell_middle},
! 578: {HTML_ATTR_Cell_valign, TEXT("bottom"), HTML_ATTR_Cell_valign_VAL_Cell_bottom},
! 579: {HTML_ATTR_Cell_valign, TEXT("baseline"), HTML_ATTR_Cell_valign_VAL_Cell_baseline},
! 580:
! 581: {HTML_ATTR_shape, TEXT("rect"), HTML_ATTR_shape_VAL_rectangle},
! 582: {HTML_ATTR_shape, TEXT("circle"), HTML_ATTR_shape_VAL_circle},
! 583: {HTML_ATTR_shape, TEXT("poly"), HTML_ATTR_shape_VAL_polygon},
! 584:
! 585: {HTML_ATTR_valuetype, TEXT("data"), HTML_ATTR_valuetype_VAL_data_},
! 586: {HTML_ATTR_valuetype, TEXT("ref"), HTML_ATTR_valuetype_VAL_ref},
! 587: {HTML_ATTR_valuetype, TEXT("object"), HTML_ATTR_valuetype_VAL_object_},
! 588:
! 589: /* HTML attribute TYPE generates a Thot element */
! 590: {DummyAttribute, TEXT("button"), HTML_EL_Button_Input},
! 591: {DummyAttribute, TEXT("checkbox"), HTML_EL_Checkbox_Input},
! 592: {DummyAttribute, TEXT("file"), HTML_EL_File_Input},
! 593: {DummyAttribute, TEXT("hidden"), HTML_EL_Hidden_Input},
! 594: {DummyAttribute, TEXT("image"), HTML_EL_PICTURE_UNIT},
! 595: {DummyAttribute, TEXT("password"), HTML_EL_Password_Input},
! 596: {DummyAttribute, TEXT("radio"), HTML_EL_Radio_Input},
! 597: {DummyAttribute, TEXT("reset"), HTML_EL_Reset_Input},
! 598: {DummyAttribute, TEXT("submit"), HTML_EL_Submit_Input},
! 599: {DummyAttribute, TEXT("text"), HTML_EL_Text_Input},
! 600:
! 601: /* The following declarations allow the parser to accept boolean attributes */
! 602: /* written "checked=CHECKED"), for instance */
! 603: {HTML_ATTR_ISMAP, TEXT("ismap"), HTML_ATTR_ISMAP_VAL_Yes_},
! 604: {HTML_ATTR_nohref, TEXT("nohref"), HTML_ATTR_nohref_VAL_Yes_},
! 605: {HTML_ATTR_COMPACT, TEXT("compact"), HTML_ATTR_COMPACT_VAL_Yes_},
! 606: {HTML_ATTR_Multiple, TEXT("multiple"), HTML_ATTR_Multiple_VAL_Yes_},
! 607: {HTML_ATTR_Selected, TEXT("selected"), HTML_ATTR_Selected_VAL_Yes_},
! 608: {HTML_ATTR_Checked, TEXT("checked"), HTML_ATTR_Checked_VAL_Yes_},
! 609: {HTML_ATTR_No_wrap, TEXT("nowrap"), HTML_ATTR_No_wrap_VAL_no_wrap},
! 610: {HTML_ATTR_NoShade, TEXT("noshade"), HTML_ATTR_NoShade_VAL_NoShade_},
! 611: {HTML_ATTR_declare, TEXT("declare"), HTML_ATTR_declare_VAL_Yes_},
! 612: {HTML_ATTR_defer, TEXT("defer"), HTML_ATTR_defer_VAL_Yes_},
! 613: {HTML_ATTR_disabled, TEXT("disabled"), HTML_ATTR_disabled_VAL_Yes_},
! 614: {HTML_ATTR_readonly, TEXT("readonly"), HTML_ATTR_readonly_VAL_Yes_},
! 615: {HTML_ATTR_no_resize, TEXT("noresize"), HTML_ATTR_no_resize_VAL_Yes_},
! 616: {0, TEXT(""), 0} /* Last entry. Mandatory */
! 617: };
! 618:
! 619:
! 620: /* ---------------------- static variables ---------------------- */
! 621:
! 622: /* parser stack */
! 623: /* maximum stack height */
! 624: #define MaxStack 200
! 625: /* entry of GIMappingTable */
! 626: static int GINumberStack[MaxStack];
! 627: /* element in the Thot abstract tree */
! 628: static Element ElementStack[MaxStack];
! 629: /* level of element in the Thot tree */
! 630: static int ThotLevel[MaxStack];
! 631: /* element language */
! 632: static Language LanguageStack[MaxStack];
! 633: /* first free element on the stack */
! 634: static int StackLevel = 0;
! 635:
! 636: /* number of lines read in the file */
! 637: static int NumberOfLinesRead = 0;
! 638: /* number of characters read in the current line */
! 639: static int NumberOfCharRead = 0;
! 640: /* <PRE> has just been read */
! 641: static ThotBool AfterTagPRE = FALSE;
! 642: /* reading the content of a STYLE element */
! 643: static ThotBool ParsingCSS = FALSE;
! 644: /* reading the content of a text area element */
! 645: static ThotBool ParsingTextArea = FALSE;
! 646: /* <TABLE> has been read */
! 647: static int WithinTable = 0;
! 648: /* path or URL of the document */
! 649: static CHAR_T* docURL = NULL;
! 650:
! 651: /* line number in the source file of the beginning
! 652: of the text contained in the buffer */
! 653: static int BufferLineNumber = 0;
! 654:
! 655: /* information about the Thot document under construction */
! 656: /* the Thot document */
! 657: static Document theDocument = 0;
! 658: /* language used in the document */
! 659: static Language currentLanguage;
! 660: /* the HTML structure schema */
! 661: static SSchema DocumentSSchema = NULL;
! 662: /* root element of the document */
! 663: static Element rootElement;
! 664: /* last element created */
! 665: static Element lastElement = NULL;
! 666: /* last element is complete */
! 667: static ThotBool lastElementClosed = FALSE;
! 668: /* index in the GIMappingTable of the
! 669: element being created */
! 670: static int lastElemEntry = 0;
! 671: /* last attribute created */
! 672: static Attribute lastAttribute = NULL;
! 673: /* element with which the last
! 674: attribute has been associated */
! 675: static Attribute lastAttrElement = NULL;
! 676: /* entry in the AttributeMappingTable
! 677: of the attribute being created */
! 678: static AttributeMapping* lastAttrEntry = NULL;
! 679: /* the last attribute encountered is invalid */
! 680: static ThotBool UnknownAttr = FALSE;
! 681: static ThotBool ReadingAnAttrValue = FALSE;
! 682: /* TEXT element of the current Comment element */
! 683: static Element CommentText = NULL;
! 684: /* the last start tag encountered is invalid */
! 685: static ThotBool UnknownTag = FALSE;
! 686: /* character data should be catenated
! 687: with the last Text element */
! 688: static ThotBool MergeText = FALSE;
! 689: static ThotBool HTMLrootClosed = FALSE;
! 690: static CHAR_T* HTMLrootClosingTag = NULL;
! 691: static PtrElemToBeChecked FirstElemToBeChecked = NULL;
! 692: static PtrElemToBeChecked LastElemToBeChecked = NULL;
! 693:
! 694: /* information about an entity being read */
! 695: /* maximum size entity */
! 696: #define MaxEntityLength 50
! 697: /* entry of the entity table that
! 698: matches the entity read so far */
! 699: static int EntityTableEntry = 0;
! 700: /* rank of the last matching
! 701: character in that entry */
! 702: static int CharRank = 0;
! 703:
! 704: /* maximum size of error messages */
! 705: #define MaxMsgLength 200
! 706:
! 707:
! 708: #ifdef __STDC__
! 709: static void ProcessStartGI (CHAR_T* GIname);
! 710: static ThotBool InsertElement (Element * el);
! 711: #else
! 712: static void ProcessStartGI ();
! 713: static ThotBool InsertElement ();
! 714: #endif
! 715:
! 716: static FILE* ErrFile = (FILE*) 0;
! 717: static CHAR_T ErrFileName [80];
! 718:
! 719: extern CHARSET CharEncoding;
! 720: extern ThotBool charset_undefined;
! 721:
! 722:
! 723: /*----------------------------------------------------------------------
! 724: copyCEstring
! 725: Create a copy of the string of elements pointed by first and
! 726: return a pointer on the first element of the copy.
! 727: ----------------------------------------------------------------------*/
! 728: #ifdef __STDC__
! 729: static PtrClosedElement copyCEstring (PtrClosedElement first)
! 730: #else
! 731: static PtrClosedElement copyCEstring (first)
! 732: PtrClosedElement first;
! 733:
! 734: #endif
! 735: {
! 736: PtrClosedElement ret, cur, next, prev;
! 737:
! 738: ret = NULL;
! 739: cur = first;
! 740: prev = NULL;
! 741: while (cur != NULL)
! 742: {
! 743: next = (PtrClosedElement) TtaGetMemory (sizeof (ClosedElement));
! 744: next->nextClosedElem = NULL;
! 745: next->tagNum = cur->tagNum;
! 746: if (ret == NULL)
! 747: ret = next;
! 748: else
! 749: prev->nextClosedElem = next;
! 750: prev = next;
! 751: cur = cur->nextClosedElem;
! 752: }
! 753: return ret;
! 754: }
! 755:
! 756:
! 757: /*----------------------------------------------------------------------
! 758: Within
! 759: Checks if an element of type ThotType is in the stack.
! 760: ----------------------------------------------------------------------*/
! 761: #ifdef __STDC__
! 762: static ThotBool Within (int ThotType,
! 763: SSchema ThotSSchema)
! 764: #else
! 765: static ThotBool Within (ThotType,
! 766: ThotSSchema)
! 767: int ThotType;
! 768: SSchema ThotSSchema;
! 769:
! 770: #endif
! 771: {
! 772: ThotBool ret;
! 773: int i;
! 774: ElementType elType;
! 775:
! 776: ret = FALSE;
! 777: i = StackLevel - 1;
! 778: while (i >= 0 && !ret)
! 779: {
! 780: if (ElementStack[i] != NULL)
! 781: {
! 782: elType = TtaGetElementType (ElementStack[i]);
! 783: if (elType.ElTypeNum == ThotType &&
! 784: elType.ElSSchema == ThotSSchema)
! 785: ret = TRUE;
! 786: }
! 787: i--;
! 788: }
! 789: return ret;
! 790: }
! 791:
! 792:
! 793: /*----------------------------------------------------------------------
! 794: InsertSibling return TRUE if the new element must be inserted
! 795: in the Thot document as a sibling of lastElement;
! 796: return FALSE it it must be inserted as a child.
! 797: ----------------------------------------------------------------------*/
! 798: static ThotBool InsertSibling ()
! 799: {
! 800: if (StackLevel == 0)
! 801: return FALSE;
! 802: else if (lastElementClosed ||
! 803: TtaIsLeaf (TtaGetElementType (lastElement)) ||
! 804: (GINumberStack[StackLevel - 1] >= 0 &&
! 805: pHTMLGIMapping[GINumberStack[StackLevel - 1]].htmlContents == 'E'))
! 806: return TRUE;
! 807: else
! 808: return FALSE;
! 809: }
! 810:
! 811: /*----------------------------------------------------------------------
! 812: IsEmptyElement
! 813: Return TRUE if element el is defined as an empty element.
! 814: ----------------------------------------------------------------------*/
! 815: #ifdef __STDC__
! 816: static ThotBool IsEmptyElement (Element el)
! 817: #else
! 818: static ThotBool IsEmptyElement (el)
! 819: Element el;
! 820:
! 821: #endif
! 822: {
! 823: ElementType elType;
! 824: int i;
! 825: ThotBool ret;
! 826:
! 827: ret = FALSE;
! 828: elType = TtaGetElementType (el);
! 829: i = 0;
! 830: while (EmptyElement[i] > 0 && EmptyElement[i] != elType.ElTypeNum)
! 831: i++;
! 832: if (EmptyElement[i] == elType.ElTypeNum)
! 833: ret = TRUE;
! 834: return ret;
! 835: }
! 836:
! 837: /*----------------------------------------------------------------------
! 838: IsBlockElement
! 839: Return TRUE if element el is a block element.
! 840: ----------------------------------------------------------------------*/
! 841: #ifdef __STDC__
! 842: static ThotBool IsBlockElement (Element el)
! 843: #else
! 844: static ThotBool IsBlockElement (el)
! 845: Element el;
! 846:
! 847: #endif
! 848: {
! 849: ElementType elType;
! 850: int i;
! 851: ThotBool ret;
! 852:
! 853: ret = FALSE;
! 854: elType = TtaGetElementType (el);
! 855: i = 0;
! 856: while (BlockLevelElement[i] > 0 &&
! 857: BlockLevelElement[i] != elType.ElTypeNum)
! 858: i++;
! 859: if (BlockLevelElement[i] == elType.ElTypeNum)
! 860: ret = TRUE;
! 861: return ret;
! 862: }
! 863:
! 864: /*----------------------------------------------------------------------
! 865: CannotContainText
! 866: Return TRUE if element el is a block element.
! 867: ----------------------------------------------------------------------*/
! 868: #ifdef __STDC__
! 869: static ThotBool CannotContainText (ElementType elType)
! 870: #else
! 871: static ThotBool CannotContainText (elType)
! 872: ElementType elType;
! 873:
! 874: #endif
! 875: {
! 876: int i;
! 877: ThotBool ret;
! 878:
! 879: if (ustrcmp (TtaGetSSchemaName (elType.ElSSchema), TEXT("HTML")))
! 880: /* not an HTML element */
! 881: ret = TRUE;
! 882: else
! 883: {
! 884: ret = FALSE;
! 885: i = 0;
! 886: while (NoTextChild[i] > 0 && NoTextChild[i] != elType.ElTypeNum)
! 887: i++;
! 888: if (NoTextChild[i] == elType.ElTypeNum)
! 889: ret = TRUE;
! 890: }
! 891: return ret;
! 892: }
! 893:
! 894: /*----------------------------------------------------------------------
! 895: BlockInCharLevelElem
! 896: Element el is a block-level element. If its parent is a character-level
! 897: element, add a record in the list of block-level elements to be
! 898: checked when the document is complete.
! 899: ----------------------------------------------------------------------*/
! 900: #ifdef __STDC__
! 901: static void BlockInCharLevelElem (Element el)
! 902: #else
! 903: static void BlockInCharLevelElem (el)
! 904: Element el;
! 905:
! 906: #endif
! 907: {
! 908: PtrElemToBeChecked elTBC;
! 909: Element parent;
! 910:
! 911: if (LastElemToBeChecked != NULL)
! 912: if (LastElemToBeChecked->Elem == el)
! 913: /* this element is already in the queue */
! 914: return;
! 915:
! 916: parent = TtaGetParent (el);
! 917: if (parent != NULL)
! 918: if (IsCharacterLevelElement (parent))
! 919: {
! 920: elTBC = (PtrElemToBeChecked) TtaGetMemory(sizeof(ElemToBeChecked));
! 921: elTBC->Elem = el;
! 922: elTBC->nextElemToBeChecked = NULL;
! 923: if (LastElemToBeChecked == NULL)
! 924: FirstElemToBeChecked = elTBC;
! 925: else
! 926: LastElemToBeChecked->nextElemToBeChecked = elTBC;
! 927: LastElemToBeChecked = elTBC;
! 928: }
! 929: }
! 930:
! 931: /*----------------------------------------------------------------------
! 932: CheckSurrounding
! 933: Inserts an element Pseudo_paragraph in the abstract tree of the Thot
! 934: document if el is a leaf and is not allowed to be a child of element parent.
! 935: Return TRUE if element *el has been inserted in the tree.
! 936: ----------------------------------------------------------------------*/
! 937: #ifdef __STDC__
! 938: static ThotBool CheckSurrounding (Element *el,
! 939: Element parent)
! 940: #else
! 941: static ThotBool CheckSurrounding (el,
! 942: parent)
! 943: Element *el;
! 944: Element parent;
! 945:
! 946: #endif
! 947: {
! 948: ElementType parentType, newElType, elType;
! 949: Element newEl, ancestor, prev, prevprev;
! 950: ThotBool ret;
! 951:
! 952: if (parent == NULL)
! 953: return(FALSE);
! 954: ret = FALSE;
! 955: elType = TtaGetElementType (*el);
! 956: if (elType.ElTypeNum == HTML_EL_TEXT_UNIT || elType.ElTypeNum == HTML_EL_BR
! 957: || elType.ElTypeNum == HTML_EL_PICTURE_UNIT
! 958: || elType.ElTypeNum == HTML_EL_Input
! 959: || elType.ElTypeNum == HTML_EL_Text_Area)
! 960: {
! 961: /* the element to be inserted is a character string */
! 962: /* Search the ancestor that is not a character level element */
! 963: ancestor = parent;
! 964: while (ancestor != NULL && IsCharacterLevelElement (ancestor))
! 965: ancestor = TtaGetParent (ancestor);
! 966: if (ancestor != NULL)
! 967: {
! 968: elType = TtaGetElementType (ancestor);
! 969: if (CannotContainText (elType) &&
! 970: !Within (HTML_EL_Option_Menu, DocumentSSchema))
! 971: /* Element ancestor cannot contain text directly. Create a */
! 972: /* Pseudo_paragraph element as the parent of the text element */
! 973: {
! 974: newElType.ElSSchema = DocumentSSchema;
! 975: newElType.ElTypeNum = HTML_EL_Pseudo_paragraph;
! 976: newEl = TtaNewElement (theDocument, newElType);
! 977: TtaSetElementLineNumber (newEl, NumberOfLinesRead);
! 978: /* insert the new Pseudo_paragraph element */
! 979: InsertElement (&newEl);
! 980: if (newEl != NULL)
! 981: {
! 982: /* insert the Text element in the tree */
! 983: TtaInsertFirstChild (el, newEl, theDocument);
! 984: BlockInCharLevelElem (newEl);
! 985: ret = TRUE;
! 986:
! 987: /* if previous siblings of the new Pseudo_paragraph element
! 988: are character level elements, move them within the new
! 989: Pseudo_paragraph element */
! 990: prev = newEl;
! 991: TtaPreviousSibling (&prev);
! 992: while (prev != NULL)
! 993: {
! 994: if (!IsCharacterLevelElement (prev))
! 995: prev = NULL;
! 996: else
! 997: {
! 998: prevprev = prev; TtaPreviousSibling (&prevprev);
! 999: TtaRemoveTree (prev, theDocument);
! 1000: TtaInsertFirstChild (&prev, newEl, theDocument);
! 1001: prev = prevprev;
! 1002: }
! 1003: }
! 1004: }
! 1005: }
! 1006: }
! 1007: }
! 1008:
! 1009: if (elType.ElTypeNum == HTML_EL_TEXT_UNIT ||
! 1010: (elType.ElTypeNum != HTML_EL_Inserted_Text &&
! 1011: IsCharacterLevelElement (*el)))
! 1012: /* it is a character level element */
! 1013: {
! 1014: parentType = TtaGetElementType (parent);
! 1015: if (parentType.ElTypeNum == HTML_EL_Text_Area)
! 1016: /* A basic element cannot be a child of a Text_Area */
! 1017: /* create a Inserted_Text element as a child of Text_Area */
! 1018: {
! 1019: newElType.ElSSchema = DocumentSSchema;
! 1020: newElType.ElTypeNum = HTML_EL_Inserted_Text;
! 1021: newEl = TtaNewElement (theDocument, newElType);
! 1022: TtaSetElementLineNumber (newEl, NumberOfLinesRead);
! 1023: InsertElement (&newEl);
! 1024: if (newEl != NULL)
! 1025: {
! 1026: TtaInsertFirstChild (el, newEl, theDocument);
! 1027: ret = TRUE;
! 1028: }
! 1029: }
! 1030: }
! 1031: return ret;
! 1032: }
! 1033:
! 1034:
! 1035: /*----------------------------------------------------------------------
! 1036: InsertElement
! 1037: Inserts element el in the abstract tree of the Thot document,
! 1038: at the current position.
! 1039: ----------------------------------------------------------------------*/
! 1040: #ifdef __STDC__
! 1041: static ThotBool InsertElement (Element * el)
! 1042: #else
! 1043: static ThotBool InsertElement (el)
! 1044: Element *el;
! 1045:
! 1046: #endif
! 1047: {
! 1048: ThotBool ret;
! 1049: Element parent;
! 1050:
! 1051: if (InsertSibling ())
! 1052: {
! 1053: if (lastElement == NULL)
! 1054: parent = NULL;
! 1055: else
! 1056: parent = TtaGetParent (lastElement);
! 1057: if (!CheckSurrounding (el, parent))
! 1058: if (parent != NULL)
! 1059: TtaInsertSibling (*el, lastElement, FALSE, theDocument);
! 1060: else
! 1061: {
! 1062: TtaDeleteTree (*el, theDocument);
! 1063: *el = NULL;
! 1064: }
! 1065: ret = TRUE;
! 1066: }
! 1067: else
! 1068: {
! 1069: if (!CheckSurrounding (el, lastElement))
! 1070: TtaInsertFirstChild (el, lastElement, theDocument);
! 1071: ret = FALSE;
! 1072: }
! 1073: if (*el != NULL)
! 1074: {
! 1075: lastElement = *el;
! 1076: lastElementClosed = FALSE;
! 1077: }
! 1078: return ret;
! 1079: }
! 1080:
! 1081: /*----------------------------------------------------------------------
! 1082: CreateAttr
! 1083: Create an attribute of type attrType for the element el.
! 1084: ----------------------------------------------------------------------*/
! 1085: #ifdef __STDC__
! 1086: static void CreateAttr (Element el,
! 1087: AttributeType attrType,
! 1088: CHAR_T* text,
! 1089: ThotBool invalid)
! 1090: #else
! 1091: static void CreateAttr (el,
! 1092: attrType,
! 1093: text,
! 1094: invalid)
! 1095: Element el;
! 1096: AttributeType attrType;
! 1097: CHAR_T* text;
! 1098: ThotBool invalid;
! 1099:
! 1100: #endif
! 1101: {
! 1102: int attrKind;
! 1103: int length;
! 1104: CHAR_T* buffer;
! 1105: Attribute attr, oldAttr;
! 1106:
! 1107: if (attrType.AttrTypeNum != 0)
! 1108: {
! 1109: oldAttr = TtaGetAttribute (el, attrType);
! 1110: if (oldAttr != NULL)
! 1111: /* this attribute already exists */
! 1112: attr = oldAttr;
! 1113: else
! 1114: /* create a new attribute and attach it to the element */
! 1115: {
! 1116: attr = TtaNewAttribute (attrType);
! 1117: TtaAttachAttribute (el, attr, theDocument);
! 1118: }
! 1119: lastAttribute = attr;
! 1120: lastAttrElement = el;
! 1121: TtaGiveAttributeType (attr, &attrType, &attrKind);
! 1122: if (attrKind == 0) /* enumerate */
! 1123: TtaSetAttributeValue (attr, 1, el, theDocument);
! 1124: /* attribute BORDER without any value (ThotBool attribute) is */
! 1125: /* considered as BORDER=1 */
! 1126: if (attrType.AttrTypeNum == HTML_ATTR_Border)
! 1127: TtaSetAttributeValue (attr, 1, el, theDocument);
! 1128: if (invalid)
! 1129: /* Copy the name of the invalid attribute as the content */
! 1130: /* of the Invalid_attribute attribute. */
! 1131: {
! 1132: length = ustrlen (text) + 2;
! 1133: length += TtaGetTextAttributeLength (attr);
! 1134: buffer = TtaAllocString (length + 1);
! 1135: TtaGiveTextAttributeValue (attr, buffer, &length);
! 1136: ustrcat (buffer, TEXT(" "));
! 1137: ustrcat (buffer, text);
! 1138: TtaSetAttributeText (attr, buffer, el, theDocument);
! 1139: TtaFreeMemory (buffer);
! 1140: }
! 1141: }
! 1142: }
! 1143:
! 1144: /*----------------------------------------------------------------------
! 1145: ProcessOptionElement
! 1146: If multiple is FALSE, remove the SELECTED attribute from the
! 1147: option element, except if it's element el.
! 1148: If parsing is TRUE, associate a DefaultSelected attribute with
! 1149: element option if it has a SELECTED attribute.
! 1150: ----------------------------------------------------------------------*/
! 1151: #ifdef __STDC__
! 1152: static void ProcessOptionElement (Element option,
! 1153: Element el,
! 1154: Document doc,
! 1155: ThotBool multiple,
! 1156: ThotBool parsing)
! 1157: #else /* __STDC__ */
! 1158: static void ProcessOptionElement (option,
! 1159: el,
! 1160: doc,
! 1161: multiple,
! 1162: parsing)
! 1163: Element option;
! 1164: Element el;
! 1165: Document doc;
! 1166: ThotBool multiple;
! 1167: ThotBool parsing;
! 1168:
! 1169: #endif /* __STDC__ */
! 1170: {
! 1171: ElementType elType;
! 1172: AttributeType attrType;
! 1173: Attribute attr;
! 1174:
! 1175: elType = TtaGetElementType (option);
! 1176: attrType.AttrSSchema = elType.ElSSchema;
! 1177: attrType.AttrTypeNum = HTML_ATTR_Selected;
! 1178: if (!multiple && option != el)
! 1179: {
! 1180: /* Search the SELECTED attribute */
! 1181: attr = TtaGetAttribute (option, attrType);
! 1182: /* remove it if it exists */
! 1183: if (attr != NULL)
! 1184: TtaRemoveAttribute (option, attr, doc);
! 1185: }
! 1186: if (parsing)
! 1187: {
! 1188: attr = TtaGetAttribute (option, attrType);
! 1189: if (attr != NULL)
! 1190: {
! 1191: attrType.AttrTypeNum = HTML_ATTR_DefaultSelected;
! 1192: attr = TtaGetAttribute (option, attrType);
! 1193: if (!attr)
! 1194: {
! 1195: /* create the DefaultSelected attribute */
! 1196: attr = TtaNewAttribute (attrType);
! 1197: TtaAttachAttribute (option, attr, doc);
! 1198: TtaSetAttributeValue (attr, HTML_ATTR_DefaultSelected_VAL_Yes_,
! 1199: option, doc);
! 1200: }
! 1201: }
! 1202: }
! 1203: }
! 1204:
! 1205:
! 1206: /*----------------------------------------------------------------------
! 1207: LastLeafInElement
! 1208: return the last leaf element in element el.
! 1209: ----------------------------------------------------------------------*/
! 1210: #ifdef __STDC__
! 1211: static Element LastLeafInElement (Element el)
! 1212: #else
! 1213: static Element LastLeafInElement (el)
! 1214: Element el;
! 1215:
! 1216: #endif
! 1217: {
! 1218: Element child, lastLeaf;
! 1219:
! 1220: child = el;
! 1221: lastLeaf = NULL;
! 1222: while (child != NULL)
! 1223: {
! 1224: child = TtaGetLastChild (child);
! 1225: if (child != NULL)
! 1226: lastLeaf = child;
! 1227: }
! 1228: return lastLeaf;
! 1229: }
! 1230:
! 1231:
! 1232:
! 1233: /*----------------------------------------------------------------------
! 1234: ElementComplete
! 1235: Element el is complete. Check its attributes and its contents.
! 1236: ----------------------------------------------------------------------*/
! 1237: #ifdef __STDC__
! 1238: static void ElementComplete (Element el)
! 1239: #else
! 1240: static void ElementComplete (el)
! 1241: Element el;
! 1242: #endif
! 1243: {
! 1244: ElementType elType, newElType, childType;
! 1245: Element constElem, child, desc, leaf, prev, next, last,
! 1246: elFrames, lastFrame, lastChild;
! 1247: Attribute attr;
! 1248: AttributeType attrType;
! 1249: Language lang;
! 1250: STRING text;
! 1251: CHAR_T lastChar[2];
! 1252: STRING name1;
! 1253: #ifdef STANDALONE
! 1254: STRING imageName, name2;
! 1255: #endif
! 1256: int length;
! 1257:
! 1258: elType = TtaGetElementType (el);
! 1259: /* is this a block-level element in a character-level element? */
! 1260: if (!IsCharacterLevelElement (el) && elType.ElTypeNum != HTML_EL_Comment_)
! 1261: BlockInCharLevelElem (el);
! 1262:
! 1263: newElType.ElSSchema = elType.ElSSchema;
! 1264: switch (elType.ElTypeNum)
! 1265: {
! 1266: case HTML_EL_Object: /* it's an object */
! 1267: /* create Object_Content */
! 1268: child = TtaGetFirstChild (el);
! 1269: if (child != NULL)
! 1270: elType = TtaGetElementType (child);
! 1271:
! 1272: /* is it the PICTURE element ? */
! 1273: if (child == NULL || elType.ElTypeNum != HTML_EL_PICTURE_UNIT)
! 1274: {
! 1275: desc = child;
! 1276: /* create the PICTURE element */
! 1277: elType.ElTypeNum = HTML_EL_PICTURE_UNIT;
! 1278: child = TtaNewTree (theDocument, elType, "");
! 1279: if (desc == NULL)
! 1280: TtaInsertFirstChild (&child, el, theDocument);
! 1281: else
! 1282: TtaInsertSibling (child, desc, TRUE, theDocument);
! 1283: }
! 1284: /* copy attribute data into SRC attribute of Object_Image */
! 1285: attrType.AttrSSchema = DocumentSSchema;
! 1286: attrType.AttrTypeNum = HTML_ATTR_data;
! 1287: attr = TtaGetAttribute (el, attrType);
! 1288: if (attr != NULL)
! 1289: {
! 1290: length = TtaGetTextAttributeLength (attr);
! 1291: if (length > 0)
! 1292: {
! 1293: name1 = TtaAllocString (length + 1);
! 1294: TtaGiveTextAttributeValue (attr, name1, &length);
! 1295: attrType.AttrTypeNum = HTML_ATTR_SRC;
! 1296: attr = TtaGetAttribute (child, attrType);
! 1297: if (attr == NULL)
! 1298: {
! 1299: attr = TtaNewAttribute (attrType);
! 1300: TtaAttachAttribute (child, attr, theDocument);
! 1301: }
! 1302: TtaSetAttributeText (attr, name1, child, theDocument);
! 1303: TtaFreeMemory (name1);
! 1304: }
! 1305: }
! 1306: /* is the Object_Content element already created ? */
! 1307: desc = child;
! 1308: TtaNextSibling(&desc);
! 1309: if (desc != NULL)
! 1310: elType = TtaGetElementType (desc);
! 1311:
! 1312: /* is it the Object_Content element ? */
! 1313: if (desc == NULL || elType.ElTypeNum != HTML_EL_Object_Content)
! 1314: {
! 1315: /* create Object_Content */
! 1316: elType.ElTypeNum = HTML_EL_Object_Content;
! 1317: desc = TtaNewTree (theDocument, elType, "");
! 1318: TtaInsertSibling (desc, child, FALSE, theDocument);
! 1319: /* move previous existing children into Object_Content */
! 1320: child = TtaGetLastChild(el);
! 1321: while (child != desc)
! 1322: {
! 1323: TtaRemoveTree (child, theDocument);
! 1324: TtaInsertFirstChild (&child, desc, theDocument);
! 1325: child = TtaGetLastChild(el);
! 1326: }
! 1327: }
! 1328: break;
! 1329:
! 1330: case HTML_EL_Unnumbered_List:
! 1331: case HTML_EL_Numbered_List:
! 1332: case HTML_EL_Menu:
! 1333: case HTML_EL_Directory:
! 1334: /* It's a List element. It should only have List_Item children.
! 1335: If it has List element chidren, move these List elements
! 1336: within their previous List_Item sibling. This is to fix
! 1337: a bug in document generated by Mozilla. */
! 1338: prev = NULL;
! 1339: next = NULL;
! 1340: child = TtaGetFirstChild (el);
! 1341: while (child != NULL)
! 1342: {
! 1343: next = child;
! 1344: TtaNextSibling (&next);
! 1345: elType = TtaGetElementType (child);
! 1346: if (elType.ElTypeNum == HTML_EL_Unnumbered_List ||
! 1347: elType.ElTypeNum == HTML_EL_Numbered_List ||
! 1348: elType.ElTypeNum == HTML_EL_Menu ||
! 1349: elType.ElTypeNum == HTML_EL_Directory)
! 1350: /* this list element is a child of another list element */
! 1351: if (prev)
! 1352: {
! 1353: elType = TtaGetElementType (prev);
! 1354: if (elType.ElTypeNum == HTML_EL_List_Item)
! 1355: {
! 1356: /* get the last child of the previous List_Item */
! 1357: desc = TtaGetFirstChild (prev);
! 1358: last = NULL;
! 1359: while (desc)
! 1360: {
! 1361: last = desc;
! 1362: TtaNextSibling (&desc);
! 1363: }
! 1364: /* move the list element after the last child of the
! 1365: previous List_Item */
! 1366: TtaRemoveTree (child, theDocument);
! 1367: if (last)
! 1368: TtaInsertSibling (child, last, FALSE, theDocument);
! 1369: else
! 1370: TtaInsertFirstChild (&child, prev, theDocument);
! 1371: child = prev;
! 1372: }
! 1373: }
! 1374: prev = child;
! 1375: child = next;
! 1376: }
! 1377: break;
! 1378:
! 1379: case HTML_EL_FRAMESET:
! 1380: /* The FRAMESET element is now complete. Gather all its FRAMESET
! 1381: and FRAME children and wrap them up in a Frames element */
! 1382: elFrames = NULL; lastFrame = NULL;
! 1383: lastChild = NULL;
! 1384: child = TtaGetFirstChild (el);
! 1385: while (child != NULL)
! 1386: {
! 1387: next = child;
! 1388: TtaNextSibling (&next);
! 1389: elType = TtaGetElementType (child);
! 1390: if (elType.ElTypeNum == HTML_EL_FRAMESET ||
! 1391: elType.ElTypeNum == HTML_EL_FRAME ||
! 1392: elType.ElTypeNum == HTML_EL_Comment_)
! 1393: {
! 1394: /* create the Frames element if it does not exist */
! 1395: if (elFrames == NULL)
! 1396: {
! 1397: newElType.ElSSchema = DocumentSSchema;
! 1398: newElType.ElTypeNum = HTML_EL_Frames;
! 1399: elFrames = TtaNewElement (theDocument, newElType);
! 1400: TtaSetElementLineNumber (elFrames, NumberOfLinesRead);
! 1401: TtaInsertSibling (elFrames, child, TRUE, theDocument);
! 1402: }
! 1403: /* move the element as the last child of the Frames element */
! 1404: TtaRemoveTree (child, theDocument);
! 1405: if (lastFrame == NULL)
! 1406: TtaInsertFirstChild (&child, elFrames, theDocument);
! 1407: else
! 1408: TtaInsertSibling (child, lastFrame, FALSE, theDocument);
! 1409: lastFrame = child;
! 1410: }
! 1411: child = next;
! 1412: }
! 1413: break;
! 1414:
! 1415: case HTML_EL_Input: /* it's an INPUT without any TYPE attribute */
! 1416: /* Create a child of type Text_Input */
! 1417: elType.ElTypeNum = HTML_EL_Text_Input;
! 1418: child = TtaNewTree (theDocument, elType, "");
! 1419: TtaSetElementLineNumber (child, NumberOfLinesRead);
! 1420: TtaInsertFirstChild (&child, el, theDocument);
! 1421: /* now, process it like a Text_Input element */
! 1422: case HTML_EL_Text_Input:
! 1423: case HTML_EL_Password_Input:
! 1424: case HTML_EL_File_Input:
! 1425: /* get element Inserted_Text */
! 1426: child = TtaGetFirstChild (el);
! 1427: if (child != NULL)
! 1428: {
! 1429: attrType.AttrSSchema = DocumentSSchema;
! 1430: attrType.AttrTypeNum = HTML_ATTR_Value_;
! 1431: attr = TtaGetAttribute (el, attrType);
! 1432: if (attr != NULL)
! 1433: {
! 1434: /* copy the value of attribute "value" into the first text
! 1435: leaf of element */
! 1436: length = TtaGetTextAttributeLength (attr);
! 1437: if (length > 0)
! 1438: {
! 1439: /* get the text leaf */
! 1440: leaf = TtaGetFirstChild (child);
! 1441: if (leaf != NULL)
! 1442: {
! 1443: childType = TtaGetElementType (leaf);
! 1444: if (childType.ElTypeNum == HTML_EL_TEXT_UNIT)
! 1445: {
! 1446: /* copy attribute value into the text leaf */
! 1447: text = TtaAllocString (length + 1);
! 1448: TtaGiveTextAttributeValue (attr, text, &length);
! 1449: TtaSetTextContent (leaf, text, currentLanguage,
! 1450: theDocument);
! 1451: TtaFreeMemory (text);
! 1452: }
! 1453: }
! 1454: }
! 1455: }
! 1456: }
! 1457: break;
! 1458:
! 1459: case HTML_EL_META:
! 1460: ParseCharset (el);
! 1461: break;
! 1462:
! 1463: case HTML_EL_STYLE_: /* it's a STYLE element */
! 1464: case HTML_EL_Preformatted: /* it's a PRE */
! 1465: case HTML_EL_SCRIPT: /* it's a SCRIPT element */
! 1466: /* if the last line of the Preformatted is empty, remove it */
! 1467: leaf = LastLeafInElement (el);
! 1468: if (leaf != NULL)
! 1469: {
! 1470: elType = TtaGetElementType (leaf);
! 1471: if (elType.ElTypeNum == HTML_EL_TEXT_UNIT)
! 1472: /* the last leaf is a TEXT element */
! 1473: {
! 1474: length = TtaGetTextLength (leaf);
! 1475: if (length > 0)
! 1476: {
! 1477: TtaGiveSubString (leaf, lastChar, length, 1);
! 1478: if (lastChar[0] == EOL)
! 1479: /* last character is new line, delete it */
! 1480: {
! 1481: if (length == 1)
! 1482: /* empty TEXT element */
! 1483: TtaDeleteTree (leaf, theDocument);
! 1484: else
! 1485: /* remove the last character */
! 1486: TtaDeleteTextContent (leaf, length, 1,
! 1487: theDocument);
! 1488: }
! 1489: }
! 1490: }
! 1491: }
! 1492: if (ParsingCSS)
! 1493: {
! 1494: #ifndef STANDALONE
! 1495: text = GetStyleContents (el);
! 1496: if (text)
! 1497: {
! 1498: ReadCSSRules (theDocument, NULL, text, FALSE);
! 1499: TtaFreeMemory (text);
! 1500: }
! 1501: #endif /* !STANDALONE */
! 1502: ParsingCSS = FALSE;
! 1503: }
! 1504: /* and continue as if it were a Preformatted or a Script */
! 1505: break;
! 1506:
! 1507: case HTML_EL_Text_Area: /* it's a Text_Area */
! 1508: ParsingTextArea = FALSE;
! 1509: child = TtaGetFirstChild (el);
! 1510: if (child == NULL)
! 1511: /* it's an empty Text_Area */
! 1512: /* insert a Inserted_Text element in the element */
! 1513: {
! 1514: newElType.ElTypeNum = HTML_EL_Inserted_Text;
! 1515: child = TtaNewTree (theDocument, newElType, "");
! 1516: TtaInsertFirstChild (&child, el, theDocument);
! 1517: }
! 1518: else
! 1519: {
! 1520: /* save the text into Default_Value attribute */
! 1521: attrType.AttrSSchema = DocumentSSchema;
! 1522: attrType.AttrTypeNum = HTML_ATTR_Default_Value;
! 1523: if (TtaGetAttribute (el, attrType) == NULL)
! 1524: /* attribute Default_Value is missing */
! 1525: {
! 1526: attr = TtaNewAttribute (attrType);
! 1527: TtaAttachAttribute (el, attr, theDocument);
! 1528: desc = TtaGetFirstChild (child);
! 1529: length = TtaGetTextLength (desc) + 1;
! 1530: text = TtaAllocString (length);
! 1531: TtaGiveTextContent (desc, text, &length, &lang);
! 1532: TtaSetAttributeText (attr, text, el, theDocument);
! 1533: TtaFreeMemory (text);
! 1534: }
! 1535: }
! 1536: /* insert a Frame element */
! 1537: newElType.ElTypeNum = HTML_EL_Frame;
! 1538: constElem = TtaNewTree (theDocument, newElType, "");
! 1539: TtaInsertSibling (constElem, child, FALSE, theDocument);
! 1540: break;
! 1541:
! 1542: case HTML_EL_Radio_Input:
! 1543: case HTML_EL_Checkbox_Input:
! 1544: /* put an attribute Checked if it is missing */
! 1545: attrType.AttrSSchema = DocumentSSchema;
! 1546: attrType.AttrTypeNum = HTML_ATTR_Checked;
! 1547: if (TtaGetAttribute (el, attrType) == NULL)
! 1548: /* attribute Checked is missing */
! 1549: {
! 1550: attr = TtaNewAttribute (attrType);
! 1551: TtaAttachAttribute (el, attr, theDocument);
! 1552: TtaSetAttributeValue (attr, HTML_ATTR_Checked_VAL_No_, el,
! 1553: theDocument);
! 1554: }
! 1555: break;
! 1556:
! 1557: case HTML_EL_Option_Menu:
! 1558: /* Check that at least one option has a SELECTED attribute */
! 1559: OnlyOneOptionSelected (el, theDocument, TRUE);
! 1560: break;
! 1561: case HTML_EL_PICTURE_UNIT:
! 1562: #ifdef STANDALONE
! 1563: /* copy value of attribute SRC into the content of the element */
! 1564: attrType.AttrSSchema = DocumentSSchema;
! 1565: attrType.AttrTypeNum = HTML_ATTR_SRC;
! 1566: attr = TtaGetAttribute (el, attrType);
! 1567: if (attr != NULL)
! 1568: {
! 1569: length = TtaGetTextAttributeLength (attr);
! 1570: name1 = TtaAllocString (length + 1);
! 1571: name2 = TtaAllocString (length + 1);
! 1572: imageName = TtaAllocString (length + 1);
! 1573: TtaGiveTextAttributeValue (attr, name1, &length);
! 1574: /* extract image name from full name */
! 1575: TtaExtractName (name1, name2, imageName);
! 1576: if (ustrlen (imageName) == 0)
! 1577: /* full names ends with ''/ */
! 1578: TtaExtractName (name2, name1, imageName);
! 1579: if (ustrlen (imageName) != 0)
! 1580: TtaSetTextContent (el, imageName, currentLanguage, theDocument);
! 1581: TtaFreeMemory (name1);
! 1582: TtaFreeMemory (name2);
! 1583: TtaFreeMemory (imageName);
! 1584: }
! 1585: #endif /* STANDALONE */
! 1586: break;
! 1587:
! 1588: #ifndef STANDALONE
! 1589: case HTML_EL_LINK:
! 1590: CheckCSSLink (el, theDocument, DocumentSSchema);
! 1591: break;
! 1592: #endif /* STANDALONE */
! 1593:
! 1594: case HTML_EL_Data_cell:
! 1595: case HTML_EL_Heading_cell:
! 1596: /* insert a pseudo paragraph into empty cells */
! 1597: child = TtaGetFirstChild (el);
! 1598: if (child == NULL)
! 1599: {
! 1600: elType.ElTypeNum = HTML_EL_Pseudo_paragraph;
! 1601: child = TtaNewTree (theDocument, elType, "");
! 1602: if (child != NULL)
! 1603: TtaInsertFirstChild (&child, el, theDocument);
! 1604: }
! 1605:
! 1606: #ifndef STANDALONE
! 1607: /* detect whether we're parsing a whole table or just a cell */
! 1608: if (WithinTable == 0)
! 1609: NewCell (el, theDocument, FALSE);
! 1610: #endif /* STANDALONE */
! 1611: break;
! 1612:
! 1613: case HTML_EL_Table:
! 1614: #ifndef STANDALONE
! 1615: CheckTable (el, theDocument);
! 1616: #endif
! 1617: WithinTable--;
! 1618: break;
! 1619:
! 1620: #ifndef STANDALONE
! 1621: case HTML_EL_TITLE:
! 1622: /* show the TITLE in the main window */
! 1623: UpdateTitle (el, theDocument);
! 1624: break;
! 1625: #endif
! 1626:
! 1627: default:
! 1628: break;
! 1629: }
! 1630: }
! 1631:
! 1632: /*----------------------------------------------------------------------
! 1633: RemoveEndingSpaces
! 1634: If element el is a block-level element, remove all spaces contained
! 1635: at the end of that element.
! 1636: Return TRUE if spaces have been removed.
! 1637: ----------------------------------------------------------------------*/
! 1638: #ifdef __STDC__
! 1639: static ThotBool RemoveEndingSpaces (Element el)
! 1640: #else
! 1641: static ThotBool RemoveEndingSpaces (el)
! 1642: Element el;
! 1643:
! 1644: #endif
! 1645: {
! 1646: int length, nbspaces;
! 1647: ElementType elType;
! 1648: Element lastLeaf;
! 1649: CHAR_T lastChar[2];
! 1650: ThotBool endingSpacesDeleted;
! 1651:
! 1652: endingSpacesDeleted = FALSE;
! 1653: if (IsBlockElement (el))
! 1654: /* it's a block element. */
! 1655: {
! 1656: /* Search the last leaf in the element's tree */
! 1657: lastLeaf = LastLeafInElement (el);
! 1658: if (lastLeaf != NULL)
! 1659: {
! 1660: elType = TtaGetElementType (lastLeaf);
! 1661: if (elType.ElTypeNum == HTML_EL_TEXT_UNIT)
! 1662: /* the las leaf is a TEXT element */
! 1663: {
! 1664: length = TtaGetTextLength (lastLeaf);
! 1665: if (length > 0)
! 1666: {
! 1667: /* count ending spaces */
! 1668: nbspaces = 0;
! 1669: do
! 1670: {
! 1671: TtaGiveSubString (lastLeaf, lastChar, length,
! 1672: 1);
! 1673: if (lastChar[0] == SPACE)
! 1674: {
! 1675: length--;
! 1676: nbspaces++;
! 1677: }
! 1678: }
! 1679: while (lastChar[0] == SPACE && length > 0);
! 1680: if (nbspaces > 0)
! 1681: if (length == 0)
! 1682: /* empty TEXT element */
! 1683: TtaDeleteTree (lastLeaf, theDocument);
! 1684: else
! 1685: /* remove the ending spaces */
! 1686: TtaDeleteTextContent (lastLeaf, length + 1,
! 1687: nbspaces, theDocument);
! 1688: }
! 1689: }
! 1690: }
! 1691: endingSpacesDeleted = TRUE;
! 1692: }
! 1693: return endingSpacesDeleted;
! 1694: }
! 1695:
! 1696: /*----------------------------------------------------------------------
! 1697: CloseElement
! 1698: End of HTML element defined in entry entry of pHTMLGIMapping.
! 1699: Terminate all corresponding Thot elements.
! 1700: If start < 0, an explicit end tag has been encountered in the HTML file,
! 1701: else the end of element is implied by the beginning of an element
! 1702: described by entry start of pHTMLGIMapping.
! 1703: ----------------------------------------------------------------------*/
! 1704: #ifdef __STDC__
! 1705: static ThotBool CloseElement (int entry,
! 1706: int start,
! 1707: ThotBool onStartTag)
! 1708: #else
! 1709: static ThotBool CloseElement (entry,
! 1710: start,
! 1711: onStartTag)
! 1712: int entry;
! 1713: int start;
! 1714: ThotBool onStartTag;
! 1715: #endif
! 1716: {
! 1717: int i;
! 1718: ElementType elType, parentType;
! 1719: Element el, parent;
! 1720: ThotBool ret, stop, spacesDeleted;
! 1721:
! 1722: ret = FALSE;
! 1723: /* the closed HTML element corresponds to a Thot element. */
! 1724: stop = FALSE;
! 1725: /* type of the element to be closed */
! 1726: elType.ElSSchema = DocumentSSchema;
! 1727: elType.ElTypeNum = pHTMLGIMapping[entry].ThotType;
! 1728: if (StackLevel > 0)
! 1729: {
! 1730: el = lastElement;
! 1731: if (lastElementClosed)
! 1732: el = TtaGetParent (el);
! 1733: i = StackLevel - 1;
! 1734: if (start < 0)
! 1735: /* Explicit close */
! 1736: {
! 1737: /* If we meet the end tag of a form, font or center
! 1738: looks for that element in the stack, but not at
! 1739: a higher level as a table element */
! 1740: if (!onStartTag &&
! 1741: (!ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("form")) ||
! 1742: !ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("font")) ||
! 1743: !ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("center"))))
! 1744: while (i > 0 && entry != GINumberStack[i] && !stop)
! 1745: if (!ustrcmp (pHTMLGIMapping[GINumberStack[i]].htmlGI, TEXT("tbody")) ||
! 1746: !ustrcmp (pHTMLGIMapping[GINumberStack[i]].htmlGI, TEXT("tr")) ||
! 1747: !ustrcmp (pHTMLGIMapping[GINumberStack[i]].htmlGI, TEXT("th")) ||
! 1748: !ustrcmp (pHTMLGIMapping[GINumberStack[i]].htmlGI, TEXT("td")))
! 1749: {
! 1750: /* ignore this end tag */
! 1751: ret = FALSE;
! 1752: stop = TRUE;
! 1753: i = -1;
! 1754: }
! 1755: else
! 1756: i--;
! 1757: else
! 1758: /* looks in the stack for the element to be closed */
! 1759: while (i >= 0 && entry != GINumberStack[i])
! 1760: i--;
! 1761: }
! 1762: else
! 1763: /* Implicit close */
! 1764: {
! 1765: /* If the element to be closed is a list item (or
! 1766: equivalent), looks for that element in the
! 1767: stack, but not at a higher level as the list (or
! 1768: equivalent) element */
! 1769: if (!ustrcmp (pHTMLGIMapping[start].htmlGI, TEXT("li")))
! 1770: while (i > 0 && entry != GINumberStack[i] && !stop)
! 1771: if (!ustrcmp (pHTMLGIMapping[GINumberStack[i]].htmlGI, TEXT("ol")) ||
! 1772: !ustrcmp (pHTMLGIMapping[GINumberStack[i]].htmlGI, TEXT("ul")) ||
! 1773: !ustrcmp (pHTMLGIMapping[GINumberStack[i]].htmlGI, TEXT("dir")) ||
! 1774: !ustrcmp (pHTMLGIMapping[GINumberStack[i]].htmlGI, TEXT("menu")))
! 1775: stop = TRUE;
! 1776: else
! 1777: i--;
! 1778: else if (!ustrcmp (pHTMLGIMapping[start].htmlGI, TEXT("option")))
! 1779: while (i > 0 && entry != GINumberStack[i] && !stop)
! 1780: if (!ustrcmp (pHTMLGIMapping[GINumberStack[i]].htmlGI, TEXT("select")))
! 1781: stop = TRUE;
! 1782: else
! 1783: i--;
! 1784: else if (!ustrcmp (pHTMLGIMapping[start].htmlGI, TEXT("dd")) ||
! 1785: !ustrcmp (pHTMLGIMapping[start].htmlGI, TEXT("dt")))
! 1786: while (i > 0 && entry != GINumberStack[i] && !stop)
! 1787: if (!ustrcmp (pHTMLGIMapping[GINumberStack[i]].htmlGI, TEXT("dl")))
! 1788: stop = TRUE;
! 1789: else
! 1790: i--;
! 1791: else if (!ustrcmp (pHTMLGIMapping[start].htmlGI, TEXT("tr")) ||
! 1792: !ustrcmp (pHTMLGIMapping[start].htmlGI, TEXT("td")) ||
! 1793: !ustrcmp (pHTMLGIMapping[start].htmlGI, TEXT("th")))
! 1794: while (i > 0 && entry != GINumberStack[i] && !stop)
! 1795: if (!ustrcmp (pHTMLGIMapping[GINumberStack[i]].htmlGI, TEXT("table")))
! 1796: stop = TRUE;
! 1797: else
! 1798: i--;
! 1799: }
! 1800:
! 1801: if (i >= 0 && entry == GINumberStack[i])
! 1802: /* element found in the stack */
! 1803: {
! 1804: /* This element and its whole subtree are closed */
! 1805: StackLevel = i;
! 1806: lastElement = ElementStack[i];
! 1807: lastElementClosed = TRUE;
! 1808: ret = TRUE;
! 1809: }
! 1810: else if (!stop)
! 1811: /* element not found in the stack */
! 1812: if (start >= 0 && lastElement != NULL)
! 1813: {
! 1814: /* implicit close. Check the parent of current element */
! 1815: if (InsertSibling ())
! 1816: parent = TtaGetParent (lastElement);
! 1817: else
! 1818: parent = lastElement;
! 1819: if (parent != NULL)
! 1820: {
! 1821: parentType = TtaGetElementType (parent);
! 1822: if (elType.ElTypeNum == parentType.ElTypeNum)
! 1823: {
! 1824: lastElement = parent;
! 1825: lastElementClosed = TRUE;
! 1826: ret = TRUE;
! 1827: }
! 1828: else if (TtaIsLeaf (TtaGetElementType (lastElement)))
! 1829: {
! 1830: parent = TtaGetParent (parent);
! 1831: if (parent != NULL)
! 1832: {
! 1833: parentType = TtaGetElementType (parent);
! 1834: if (elType.ElTypeNum == parentType.ElTypeNum)
! 1835: {
! 1836: lastElement = parent;
! 1837: lastElementClosed = TRUE;
! 1838: ret = TRUE;
! 1839: }
! 1840: }
! 1841: }
! 1842: }
! 1843: }
! 1844:
! 1845: if (ret)
! 1846: /* successful close */
! 1847: {
! 1848: /* remove closed elements from the stack */
! 1849: while (i > 0)
! 1850: if (ElementStack[i] == lastElement)
! 1851: {
! 1852: StackLevel = i;
! 1853: i = 0;
! 1854: }
! 1855: else
! 1856: {
! 1857: if (TtaIsAncestor (ElementStack[i], lastElement))
! 1858: StackLevel = i;
! 1859: i--;
! 1860: }
! 1861: if (StackLevel > 0)
! 1862: currentLanguage = LanguageStack[StackLevel - 1];
! 1863:
! 1864: /* complete all closed elements */
! 1865: if (el != lastElement)
! 1866: if (!TtaIsAncestor(el, lastElement))
! 1867: el = NULL;
! 1868: spacesDeleted = FALSE;
! 1869: while (el != NULL)
! 1870: {
! 1871: ElementComplete (el);
! 1872: if (!spacesDeleted)
! 1873: /* If the element closed is a block-element, remove */
! 1874: /* spaces contained at the end of that element */
! 1875: spacesDeleted = RemoveEndingSpaces (el);
! 1876: if (el == lastElement)
! 1877: el = NULL;
! 1878: else
! 1879: el = TtaGetParent (el);
! 1880: }
! 1881: }
! 1882: }
! 1883:
! 1884: return ret;
! 1885: }
! 1886:
! 1887: /*-------------------- StartElement (start) ---------------------*/
! 1888:
! 1889: /*----------------------------------------------------------------------
! 1890: ProcessEndGI
! 1891: Function called at the end of a start tag.
! 1892: ----------------------------------------------------------------------*/
! 1893: #ifdef __STDC__
! 1894: static void ProcessEndGI (CHAR_T *name)
! 1895: #else
! 1896: static void ProcessEndGI (name)
! 1897: CHAR_T *name;
! 1898:
! 1899: #endif
! 1900: {
! 1901:
! 1902: ElementType elType;
! 1903: AttributeType attrType;
! 1904: Attribute attr;
! 1905: int length;
! 1906: STRING text;
! 1907: ThotBool math;
! 1908:
! 1909: UnknownTag = FALSE;
! 1910: if ((lastElement != NULL) && (lastElemEntry != -1))
! 1911: {
! 1912: math = FALSE;
! 1913:
! 1914: if (!ustrcmp (pHTMLGIMapping[lastElemEntry].htmlGI,
! 1915: TEXT("math")))
! 1916: /* a <math> tag has been read */
! 1917: math = TRUE;
! 1918: else
! 1919: if (!ustrcmp (pHTMLGIMapping[lastElemEntry].htmlGI,
! 1920: TEXT("mathdisp")))
! 1921: /* a <mathdisp> tag has been read. add an attribute "mode=display"
! 1922: (for compatibility with old MathML version WD-math-970704 */
! 1923: {
! 1924: math = TRUE;
! 1925: elType = TtaGetElementType (lastElement);
! 1926: attrType.AttrSSchema = elType.ElSSchema;
! 1927: attrType.AttrTypeNum = HTML_ATTR_mode;
! 1928: attr = TtaGetAttribute (lastElement, attrType);
! 1929: if (attr == NULL)
! 1930: /* create a new attribute and attach it to the element */
! 1931: {
! 1932: attr = TtaNewAttribute (attrType);
! 1933: TtaAttachAttribute (lastElement, attr, theDocument);
! 1934: }
! 1935: TtaSetAttributeValue (attr, HTML_ATTR_mode_VAL_display,
! 1936: lastElement, theDocument);
! 1937: }
! 1938:
! 1939: if (math)
! 1940: {
! 1941: #ifndef STANDALONE
! 1942: #ifdef LC
! 1943: /* Parse the MathML structure */
! 1944: XMLparse (stream,
! 1945: &CurrentBufChar,
! 1946: TEXT("MathML"),
! 1947: theDocument,
! 1948: lastElement,
! 1949: FALSE,
! 1950: currentLanguage,
! 1951: pHTMLGIMapping[lastElemEntry].htmlGI);
! 1952: #endif /* LC */
! 1953: #endif /* STANDALONE */
! 1954: /* when returning from the XML parser, the end tag has already
! 1955: been read */
! 1956: (void) CloseElement (lastElemEntry, -1, FALSE);
! 1957: }
! 1958:
! 1959: else
! 1960: /* !math */
! 1961: if (!ustrcmp (pHTMLGIMapping[lastElemEntry].htmlGI,
! 1962: TEXT("xmlgraphics")))
! 1963: /* a <XMLGRAPHICS> tag has been read */
! 1964: {
! 1965: /* Parse the GraphML structure */
! 1966: #ifndef STANDALONE
! 1967: #ifdef LC
! 1968: XMLparse (stream,
! 1969: &CurrentBufChar,
! 1970: TEXT("GraphML"),
! 1971: theDocument,
! 1972: lastElement,
! 1973: FALSE,
! 1974: currentLanguage,
! 1975: pHTMLGIMapping[lastElemEntry].htmlGI);
! 1976: #endif /* LC */
! 1977: #endif /* STANDALONE */
! 1978: /* when returning from the XML parser, the end tag has already
! 1979: been read */
! 1980: (void) CloseElement (lastElemEntry, -1, FALSE);
! 1981: }
! 1982: else
! 1983: /* !math and !graph*/
! 1984: if (!ustrcmp (pHTMLGIMapping[lastElemEntry].htmlGI, TEXT("pre")) ||
! 1985: !ustrcmp (pHTMLGIMapping[lastElemEntry].htmlGI, TEXT("style")) ||
! 1986: !ustrcmp (pHTMLGIMapping[lastElemEntry].htmlGI, TEXT("script")))
! 1987: /* a <PRE>, <STYLE> or <SCRIPT> tag has been read */
! 1988: AfterTagPRE = TRUE;
! 1989: else
! 1990: if (!ustrcmp (pHTMLGIMapping[lastElemEntry].htmlGI, TEXT("table")))
! 1991: /* <TABLE> has been read */
! 1992: WithinTable++;
! 1993: else
! 1994: if (pHTMLGIMapping[lastElemEntry].htmlContents == 'E')
! 1995: /* this is an empty element. Do not expect an end tag */
! 1996: {
! 1997: CloseElement (lastElemEntry, -1, TRUE);
! 1998: ElementComplete (lastElement);
! 1999: }
! 2000:
! 2001: /* if it's a LI element, creates its IntItemStyle attribute
! 2002: according to surrounding elements */
! 2003: SetAttrIntItemStyle (lastElement, theDocument);
! 2004: /* if it's an AREA element, computes its position and size */
! 2005: ParseAreaCoords (lastElement, theDocument);
! 2006: /* if it's a STYLE element in CSS notation, activate the CSS */
! 2007: /* parser for parsing the element content */
! 2008: elType = TtaGetElementType (lastElement);
! 2009: if (elType.ElTypeNum == HTML_EL_STYLE_)
! 2010: {
! 2011: /* Search the Notation attribute */
! 2012: attrType.AttrSSchema = elType.ElSSchema;
! 2013: attrType.AttrTypeNum = HTML_ATTR_Notation;
! 2014: attr = TtaGetAttribute (lastElement, attrType);
! 2015: if (attr == NULL)
! 2016: /* No Notation attribute. Assume CSS by default */
! 2017: ParsingCSS = TRUE;
! 2018: else
! 2019: /* the STYLE element has a Notation attribute */
! 2020: /* get its value */
! 2021: {
! 2022: length = TtaGetTextAttributeLength (attr);
! 2023: text = TtaAllocString (length + 1);
! 2024: TtaGiveTextAttributeValue (attr, text, &length);
! 2025: if (!ustrcasecmp (text, TEXT("text/css")))
! 2026: ParsingCSS = TRUE;
! 2027: TtaFreeMemory (text);
! 2028: }
! 2029: }
! 2030: else
! 2031: if (elType.ElTypeNum == HTML_EL_Text_Area)
! 2032: {
! 2033: /* we have to read the content as a simple text unit */
! 2034: ParsingTextArea = TRUE;
! 2035: }
! 2036: }
! 2037: }
! 2038:
! 2039: /*----------------------------------------------------------------------
! 2040: ContextOK
! 2041: Returns TRUE if the element at position entry in the mapping table
! 2042: is allowed to occur in the current structural context.
! 2043: ----------------------------------------------------------------------*/
! 2044: #ifdef __STDC__
! 2045: static ThotBool ContextOK (int entry)
! 2046: #else
! 2047: static ThotBool ContextOK (entry)
! 2048: int entry;
! 2049:
! 2050: #endif
! 2051: {
! 2052: ThotBool ok;
! 2053: int saveLastElemEntry;
! 2054:
! 2055: if (StackLevel == 0 || GINumberStack[StackLevel - 1] < 0)
! 2056: return TRUE;
! 2057: else
! 2058: {
! 2059: ok = TRUE;
! 2060: /* only TH and TD elements are allowed as children of a TR element */
! 2061: if (!ustrcmp (pHTMLGIMapping[GINumberStack[StackLevel - 1]].htmlGI, TEXT("tr")))
! 2062: if (ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("th")) &&
! 2063: ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("td")))
! 2064: ok = FALSE;
! 2065: if (ok)
! 2066: /* only CAPTION, THEAD, TFOOT, TBODY, COLGROUP, COL and TR are */
! 2067: /* allowed as children of a TABLE element */
! 2068: if (!ustrcmp (pHTMLGIMapping[GINumberStack[StackLevel - 1]].htmlGI, TEXT("table")))
! 2069: if (ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("caption")) &&
! 2070: ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("thead")) &&
! 2071: ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("tfoot")) &&
! 2072: ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("tbody")) &&
! 2073: ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("colgroup")) &&
! 2074: ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("col")) &&
! 2075: ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("tr")))
! 2076: if (!ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("td")) ||
! 2077: !ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("th")))
! 2078: /* Table cell within a table, without a tr. Assume tr */
! 2079: {
! 2080: /* save the last last GI read from the input file */
! 2081: saveLastElemEntry = lastElemEntry;
! 2082: /* simulate a <TR> tag */
! 2083: ProcessStartGI (TEXT("tr"));
! 2084: /* restore the last tag that has actually been read */
! 2085: lastElemEntry = saveLastElemEntry;
! 2086: }
! 2087: else
! 2088: ok = FALSE;
! 2089: if (ok)
! 2090: /* CAPTION, THEAD, TFOOT, TBODY, COLGROUP are allowed only as
! 2091: children of a TABLE element */
! 2092: if (ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("caption")) == 0 ||
! 2093: ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("thead")) == 0 ||
! 2094: ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("tfoot")) == 0 ||
! 2095: ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("tbody")) == 0 ||
! 2096: ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("colgroup")) == 0)
! 2097: if (ustrcmp (pHTMLGIMapping[GINumberStack[StackLevel - 1]].htmlGI,
! 2098: TEXT("table")) != 0)
! 2099: ok = FALSE;
! 2100: if (ok)
! 2101: /* only TR is allowed as a child of a THEAD, TFOOT or TBODY element */
! 2102: if (!ustrcmp (pHTMLGIMapping[GINumberStack[StackLevel - 1]].htmlGI, TEXT("thead")) ||
! 2103: !ustrcmp (pHTMLGIMapping[GINumberStack[StackLevel - 1]].htmlGI, TEXT("tfoot")) ||
! 2104: !ustrcmp (pHTMLGIMapping[GINumberStack[StackLevel - 1]].htmlGI, TEXT("tbody")))
! 2105: if (ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("tr")))
! 2106: if (!ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("td")) ||
! 2107: !ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("th")))
! 2108: /* Table cell within a thead, tfoot or tbody without a tr. */
! 2109: /* Assume tr */
! 2110: {
! 2111: /* save the last last GI read from the input file */
! 2112: saveLastElemEntry = lastElemEntry;
! 2113: /* simulate a <tr> tag */
! 2114: ProcessStartGI (TEXT("tr"));
! 2115: /* restore the last tag that has actually been read */
! 2116: lastElemEntry = saveLastElemEntry;
! 2117: }
! 2118: else
! 2119: ok = FALSE;
! 2120: if (ok)
! 2121: /* refuse BODY within BODY */
! 2122: if (ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("body")) == 0)
! 2123: if (Within (HTML_EL_BODY, DocumentSSchema))
! 2124: ok = FALSE;
! 2125: if (ok)
! 2126: /* refuse HEAD within HEAD */
! 2127: if (ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("head")) == 0)
! 2128: if (Within (HTML_EL_HEAD, DocumentSSchema))
! 2129: ok = FALSE;
! 2130: if (ok)
! 2131: /* refuse STYLE within STYLE */
! 2132: if (ustrcmp (pHTMLGIMapping[entry].htmlGI, TEXT("style")) == 0)
! 2133: if (Within (HTML_EL_STYLE_, DocumentSSchema))
! 2134: ok = FALSE;
! 2135: return ok;
! 2136: }
! 2137: }
! 2138:
! 2139: /*----------------------------------------------------------------------
! 2140: SpecialImplicitEnd
! 2141: ----------------------------------------------------------------------*/
! 2142: #ifdef __STDC__
! 2143: static void SpecialImplicitEnd (int entry)
! 2144: #else
! 2145: static void SpecialImplicitEnd (entry)
! 2146: int entry;
! 2147:
! 2148: #endif
! 2149: {
! 2150: ElementType elType;
! 2151:
! 2152: /* if current element is DD, Hn closes that DD only when there is */
! 2153: /* no enclosing DL */
! 2154: if (pHTMLGIMapping[entry].htmlGI[0] == TEXT('H') &&
! 2155: pHTMLGIMapping[entry].htmlGI[1] >= TEXT('1') &&
! 2156: pHTMLGIMapping[entry].htmlGI[1] <= TEXT('6') &&
! 2157: pHTMLGIMapping[entry].htmlGI[2] == WC_EOS)
! 2158: /* the new element is a Hn */
! 2159: if (StackLevel > 1)
! 2160: if (ElementStack[StackLevel - 1] != NULL)
! 2161: {
! 2162: elType = TtaGetElementType (ElementStack[StackLevel - 1]);
! 2163: if (elType.ElTypeNum == HTML_EL_Definition)
! 2164: /* the current element is a DD */
! 2165: {
! 2166: elType = TtaGetElementType (ElementStack[StackLevel - 2]);
! 2167: if (elType.ElTypeNum != HTML_EL_Definition_List)
! 2168: /* DD in not within a DL. Close the DD element */
! 2169: CloseElement (GINumberStack[StackLevel - 1], entry, FALSE);
! 2170: }
! 2171: }
! 2172: }
! 2173:
! 2174: /*----------------------------------------------------------------------
! 2175: InsertInvalidEl
! 2176: create an element Invalid_element with the indicated content.
! 2177: position indicate whether the element type is unknown (FALSE) or the
! 2178: tag position is incorrect (TRUE).
! 2179: ----------------------------------------------------------------------*/
! 2180: #ifdef __STDC__
! 2181: static void InsertInvalidEl (CHAR_T* content,
! 2182: ThotBool position)
! 2183: #else
! 2184: static void InsertInvalidEl (content,
! 2185: position)
! 2186: CHAR_T* content;
! 2187: ThotBool position;
! 2188:
! 2189: #endif
! 2190: {
! 2191: ElementType elType;
! 2192: AttributeType attrType;
! 2193: Element elInv, elText;
! 2194: Attribute attr;
! 2195:
! 2196: elType.ElSSchema = DocumentSSchema;
! 2197: elType.ElTypeNum = HTML_EL_Invalid_element;
! 2198: elInv = TtaNewElement (theDocument, elType);
! 2199: TtaSetElementLineNumber (elInv, NumberOfLinesRead);
! 2200: InsertElement (&elInv);
! 2201: if (elInv != NULL)
! 2202: {
! 2203: lastElementClosed = TRUE;
! 2204: elType.ElTypeNum = HTML_EL_TEXT_UNIT;
! 2205: elText = TtaNewElement (theDocument, elType);
! 2206: TtaSetElementLineNumber (elText, NumberOfLinesRead);
! 2207: TtaInsertFirstChild (&elText, elInv, theDocument);
! 2208: TtaSetTextContent (elText, content, currentLanguage, theDocument);
! 2209: TtaSetAccessRight (elText, ReadOnly, theDocument);
! 2210: attrType.AttrSSchema = DocumentSSchema;
! 2211: attrType.AttrTypeNum = HTML_ATTR_Error_type;
! 2212: attr = TtaNewAttribute (attrType);
! 2213: TtaAttachAttribute (elInv, attr, theDocument);
! 2214: if (position)
! 2215: TtaSetAttributeValue (attr, HTML_ATTR_Error_type_VAL_BadPosition,
! 2216: elInv, theDocument);
! 2217: else
! 2218: TtaSetAttributeValue (attr, HTML_ATTR_Error_type_VAL_UnknownTag,
! 2219: elInv, theDocument);
! 2220: }
! 2221: }
! 2222:
! 2223: /*----------------------------------------------------------------------
! 2224: ProcessStartGI
! 2225: An HTML GI has been read in a start tag.
! 2226: Create the corresponding Thot thing (element, attribute,
! 2227: or character), according to the mapping table.
! 2228: ----------------------------------------------------------------------*/
! 2229: #ifdef __STDC__
! 2230: static void ProcessStartGI (CHAR_T* GIname)
! 2231: #else
! 2232: static void ProcessStartGI (GIname)
! 2233: CHAR_T* GIname;
! 2234:
! 2235: #endif
! 2236: {
! 2237: ElementType elType;
! 2238: Element el;
! 2239: int entry, i;
! 2240: CHAR_T msgBuffer[MaxMsgLength];
! 2241: PtrClosedElement pClose;
! 2242: ThotBool sameLevel;
! 2243: SSchema schema;
! 2244:
! 2245: /* ignore tag <P> within PRE */
! 2246: if (Within (HTML_EL_Preformatted, DocumentSSchema))
! 2247: if (ustrcasecmp (GIname, TEXT("p")) == 0)
! 2248: return;
! 2249:
! 2250: /* search the HTML element name in the mapping table */
! 2251: schema = DocumentSSchema;
! 2252: entry = MapGI (GIname, &schema, theDocument);
! 2253: lastElemEntry = entry;
! 2254: if (entry < 0)
! 2255: /* not found in the HTML DTD */
! 2256: {
! 2257: /* check if it's the math or svg tag with a namespace prefix */
! 2258: /* So, look for a colon in the element name */
! 2259: for (i = 0; GIname[i] != TEXT(':') && GIname[i] != WC_EOS; i++);
! 2260: if (GIname[i] == TEXT(':') &&
! 2261: (ustrcasecmp (&GIname[i+1], TEXT("math")) == 0 ||
! 2262: ustrcasecmp (&GIname[i+1], TEXT("xmlgraphics")) == 0))
! 2263: /* it's a math or svg tag with a namespace prefix. OK */
! 2264: {
! 2265: entry = MapGI (&GIname[i+1], &schema, theDocument);
! 2266: lastElemEntry = entry;
! 2267: }
! 2268: else
! 2269: /* unknown tag */
! 2270: {
! 2271: if (ustrlen (GIname) > MaxMsgLength - 20)
! 2272: GIname[MaxMsgLength - 20] = WC_EOS;
! 2273: usprintf (msgBuffer, TEXT("Unknown tag <%s>"), GIname);
! 2274: ParseHTMLError (theDocument, msgBuffer);
! 2275: UnknownTag = TRUE;
! 2276: /* create an Invalid_element */
! 2277: usprintf (msgBuffer, TEXT("<%s"), GIname);
! 2278: InsertInvalidEl (msgBuffer, FALSE);
! 2279: }
! 2280: }
! 2281: if (entry >= 0)
! 2282: {
! 2283: /* does this start tag also imply the end tag of some current elements? */
! 2284: pClose = pHTMLGIMapping[entry].firstClosedElem;
! 2285: while (pClose != NULL)
! 2286: {
! 2287: CloseElement (pClose->tagNum, entry, TRUE);
! 2288: pClose = pClose->nextClosedElem;
! 2289: }
! 2290: /* process some special cases... */
! 2291: SpecialImplicitEnd (entry);
! 2292: if (!ContextOK (entry))
! 2293: /* element not allowed in the current structural context */
! 2294: {
! 2295: usprintf (msgBuffer, TEXT("Tag <%s> is not allowed here"), GIname);
! 2296: ParseHTMLError (theDocument, msgBuffer);
! 2297: UnknownTag = TRUE;
! 2298: /* create an Invalid_element */
! 2299: usprintf (msgBuffer, TEXT("<%s"), GIname);
! 2300: InsertInvalidEl (msgBuffer, TRUE);
! 2301: }
! 2302: else
! 2303: {
! 2304: el = NULL;
! 2305: sameLevel = TRUE;
! 2306: if (pHTMLGIMapping[entry].ThotType > 0)
! 2307: {
! 2308: if (pHTMLGIMapping[entry].ThotType == HTML_EL_HTML)
! 2309: /* the corresponding Thot element is the root of the
! 2310: abstract tree, which has been created at initialization */
! 2311: el = rootElement;
! 2312: else
! 2313: /* create a Thot element */
! 2314: {
! 2315: elType.ElSSchema = DocumentSSchema;
! 2316: elType.ElTypeNum = pHTMLGIMapping[entry].ThotType;
! 2317: if (pHTMLGIMapping[entry].htmlContents == 'E')
! 2318: /* empty HTML element. Create all children specified */
! 2319: /* in the Thot structure schema */
! 2320: el = TtaNewTree (theDocument, elType, "");
! 2321: else
! 2322: /* the HTML element may have children. Create only */
! 2323: /* the corresponding Thot element, without any child */
! 2324: el = TtaNewElement (theDocument, elType);
! 2325: TtaSetElementLineNumber (el, NumberOfLinesRead);
! 2326: sameLevel = InsertElement (&el);
! 2327: if (el != NULL)
! 2328: {
! 2329: if (pHTMLGIMapping[entry].htmlContents == 'E')
! 2330: lastElementClosed = TRUE;
! 2331: if (elType.ElTypeNum == HTML_EL_TEXT_UNIT)
! 2332: /* an empty Text element has been created. The */
! 2333: /* following character data must go to that elem. */
! 2334: MergeText = TRUE;
! 2335: }
! 2336: }
! 2337: }
! 2338: if (pHTMLGIMapping[entry].htmlContents != 'E')
! 2339: {
! 2340: ElementStack[StackLevel] = el;
! 2341: if (sameLevel)
! 2342: ThotLevel[StackLevel] = ThotLevel[StackLevel - 1];
! 2343: else
! 2344: ThotLevel[StackLevel] = ThotLevel[StackLevel - 1] + 1;
! 2345: LanguageStack[StackLevel] = currentLanguage;
! 2346: GINumberStack[StackLevel++] = entry;
! 2347: }
! 2348: }
! 2349: }
! 2350: }
! 2351: /*---------------------- StartElement (end) -----------------------*/
! 2352:
! 2353:
! 2354: /*---------------------- EndElement (start) -----------------------*/
! 2355:
! 2356: /*----------------------------------------------------------------------
! 2357: EndOfElement
! 2358: Terminate all corresponding Thot elements.
! 2359: ----------------------------------------------------------------------*/
! 2360: #ifdef __STDC__
! 2361: static void EndOfElement (CHAR_T *tagName)
! 2362: #else
! 2363: static void EndOfElement (tagName)
! 2364: CHAR_T *tagName;
! 2365:
! 2366: #endif
! 2367: {
! 2368: SSchema schema;
! 2369: CHAR_T msgBuffer[MaxMsgLength];
! 2370: int entry;
! 2371: int i;
! 2372: ThotBool ok;
! 2373:
! 2374:
! 2375: if (ParsingTextArea)
! 2376: {
! 2377: if (ustrcasecmp (tagName, TEXT("textarea")) != 0)
! 2378: /* We are parsing the contents of a textarea element. The end
! 2379: tag is not the one closing the current textarea, consider it
! 2380: as plain text */
! 2381: {
! 2382: return;
! 2383: }
! 2384: }
! 2385:
! 2386: /* is it the end of the current HTML fragment ? */
! 2387: ok = FALSE;
! 2388: if (HTMLrootClosingTag != EOS)
! 2389: {
! 2390: #ifdef LC
! 2391: /* look for a colon in the element name (namespaces) and ignore the
! 2392: prefix if there is one */
! 2393: for (i = 0; i < LgBuffer && inputBuffer[i] != TEXT(':'); i++);
! 2394: if (inputBuffer[i] == TEXT(':'))
! 2395: i++;
! 2396: else
! 2397: i = 0;
! 2398: #endif /* LC */
! 2399: if (ustrcasecmp (tagName, HTMLrootClosingTag) == 0)
! 2400: {
! 2401: HTMLrootClosed = TRUE;
! 2402: ok = TRUE;
! 2403: }
! 2404: }
! 2405:
! 2406: if (!ok)
! 2407: {
! 2408: /* search the HTML tag in the mapping table */
! 2409: schema = DocumentSSchema;
! 2410: entry = MapGI (tagName, &schema, theDocument);
! 2411: if (entry < 0)
! 2412: {
! 2413: if (ustrlen (tagName) > MaxMsgLength - 20)
! 2414: tagName[MaxMsgLength - 20] = WC_EOS;
! 2415: usprintf (msgBuffer, TEXT("Unknown tag </%s>"), tagName);
! 2416: ParseHTMLError (theDocument, msgBuffer);
! 2417: }
! 2418: else
! 2419: {
! 2420: if (!CloseElement (entry, -1, FALSE))
! 2421: /* the end tag does not close any current element */
! 2422: {
! 2423: usprintf (msgBuffer,
! 2424: TEXT("Unexpected end tag </%s>"),
! 2425: tagName);
! 2426: ParseHTMLError (theDocument, msgBuffer);
! 2427: }
! 2428: }
! 2429: }
! 2430:
! 2431: AfterTagPRE = FALSE;
! 2432: }
! 2433: /*--------------------- EndElement (end) --------------------------*/
! 2434:
! 2435:
! 2436: /*---------------------- Data (start) -----------------------------*/
! 2437:
! 2438: /*----------------------------------------------------------------------
! 2439: PutInElement
! 2440: ----------------------------------------------------------------------*/
! 2441: #ifdef __STDC__
! 2442: static void PutInElement (STRING data)
! 2443: #else /* __STDC__ */
! 2444: static void PutInElement (data)
! 2445: STRING data;
! 2446: #endif /* __STDC__ */
! 2447:
! 2448: {
! 2449: ElementType elType;
! 2450: Element elText, parent, ancestor, prev;
! 2451: int i;
! 2452: int length;
! 2453: ThotBool ignoreLeadingSpaces;
! 2454: static ThotBool EmptyLine;
! 2455:
! 2456: length = ustrlen (data);
! 2457: i = 0;
! 2458:
! 2459: /* remove leading spaces for merged text and */
! 2460: /* replace single CR character by space character */
! 2461: /* except for <PRE>, <STYLE> and <SCRIPT> elements */
! 2462: if (!AfterTagPRE)
! 2463: {
! 2464: if (length == 1 &&
! 2465: (data[0] == WC_EOL || data[0] == WC_CR))
! 2466: {
! 2467: data[0] = WC_SPACE;
! 2468: EmptyLine = 1;
! 2469: }
! 2470: else
! 2471: if (EmptyLine)
! 2472: {
! 2473: while ((data[i] == WC_SPACE || data[i] == WC_TAB) &&
! 2474: data[i] != WC_EOS)
! 2475: i++;
! 2476: EmptyLine = 0;
! 2477: }
! 2478: }
! 2479:
! 2480: /* replace single CR character by space character */
! 2481: /* except for <PRE>, <STYLE> and <SCRIPT> elements */
! 2482: /*
! 2483: if (length == 1 && !AfterTagPRE &&
! 2484: (data[0] == WC_EOL || data[0] == WC_CR))
! 2485: {
! 2486: data[0] = WC_SPACE;
! 2487: }
! 2488: */
! 2489:
! 2490: if (lastElement != NULL)
! 2491: {
! 2492: if (InsertSibling ())
! 2493: /* There is a previous sibling (lastElement)
! 2494: for the new Text element */
! 2495: {
! 2496: parent = TtaGetParent (lastElement);
! 2497: if (parent == NULL)
! 2498: parent = lastElement;
! 2499: elType = TtaGetElementType (parent);
! 2500: if (IsCharacterLevelElement (lastElement) &&
! 2501: elType.ElTypeNum != HTML_EL_Option_Menu &&
! 2502: elType.ElTypeNum != HTML_EL_OptGroup)
! 2503: {
! 2504: ignoreLeadingSpaces = FALSE;
! 2505: elType = TtaGetElementType (lastElement);
! 2506: if (elType.ElTypeNum == HTML_EL_BR)
! 2507: ignoreLeadingSpaces = TRUE;
! 2508: }
! 2509: else
! 2510: ignoreLeadingSpaces = TRUE;
! 2511: }
! 2512: else
! 2513: /* the new Text element should be the first child of the latest
! 2514: element encountered */
! 2515: {
! 2516: parent = lastElement;
! 2517: ignoreLeadingSpaces = TRUE;
! 2518: elType = TtaGetElementType (lastElement);
! 2519: if (elType.ElTypeNum != HTML_EL_Option_Menu &&
! 2520: elType.ElTypeNum != HTML_EL_OptGroup)
! 2521: {
! 2522: ancestor = parent;
! 2523: while (ignoreLeadingSpaces &&
! 2524: IsCharacterLevelElement (ancestor))
! 2525: {
! 2526: prev = ancestor;
! 2527: TtaPreviousSibling (&prev);
! 2528: if (prev == NULL)
! 2529: ancestor = TtaGetParent (ancestor);
! 2530: else
! 2531: ignoreLeadingSpaces = FALSE;
! 2532: }
! 2533: }
! 2534: }
! 2535:
! 2536: if (ignoreLeadingSpaces)
! 2537: if (!Within (HTML_EL_Preformatted, DocumentSSchema) &&
! 2538: !Within (HTML_EL_STYLE_, DocumentSSchema) &&
! 2539: !Within (HTML_EL_SCRIPT, DocumentSSchema))
! 2540: /* suppress leading spaces */
! 2541: while (data[i] <= WC_SPACE && data[i] != WC_EOS)
! 2542: i++;
! 2543:
! 2544: if (data[i] != WC_EOS)
! 2545: {
! 2546: elType = TtaGetElementType (lastElement);
! 2547: if (elType.ElTypeNum == HTML_EL_TEXT_UNIT && MergeText)
! 2548: {
! 2549: TtaAppendTextContent (lastElement,
! 2550: &(data[i]),
! 2551: theDocument);
! 2552: }
! 2553: else
! 2554: {
! 2555: /* create a TEXT element */
! 2556: elType.ElSSchema = DocumentSSchema;
! 2557: elType.ElTypeNum = HTML_EL_TEXT_UNIT;
! 2558: elText = TtaNewElement (theDocument, elType);
! 2559: TtaSetElementLineNumber (elText, BufferLineNumber);
! 2560: InsertElement (&elText);
! 2561: lastElementClosed = TRUE;
! 2562: MergeText = TRUE;
! 2563: /* put the content of the input buffer into the TEXT element */
! 2564: if (elText != NULL)
! 2565: {
! 2566: TtaSetTextContent (elText,
! 2567: &(data[i]),
! 2568: currentLanguage,
! 2569: theDocument);
! 2570: }
! 2571: }
! 2572: }
! 2573: }
! 2574: }
! 2575: /*---------------------- Data (end) ---------------------------*/
! 2576:
! 2577:
! 2578: /*-------------------- Attributes (start) ---------------------*/
! 2579:
! 2580: /*----------------------------------------------------------------------
! 2581: PutInContent
! 2582: Put the string ChrString in the leaf of current element.
! 2583: ----------------------------------------------------------------------*/
! 2584: #ifdef __STDC__
! 2585: static Element PutInContent (STRING ChrString)
! 2586: #else
! 2587: static Element PutInContent (ChrString)
! 2588: STRING ChrString;
! 2589:
! 2590: #endif
! 2591: {
! 2592: Element el, child;
! 2593: ElementType elType;
! 2594: int length;
! 2595:
! 2596: el = NULL;
! 2597: if (lastElement != NULL)
! 2598: {
! 2599: /* search first leaf of current element */
! 2600: el = lastElement;
! 2601: do
! 2602: {
! 2603: child = TtaGetFirstChild (el);
! 2604: if (child != NULL)
! 2605: el = child;
! 2606: }
! 2607: while (child != NULL);
! 2608: elType = TtaGetElementType (el);
! 2609: length = 0;
! 2610: if (elType.ElTypeNum == HTML_EL_TEXT_UNIT)
! 2611: length = TtaGetTextLength (el);
! 2612: if (length == 0)
! 2613: TtaSetTextContent (el, ChrString, currentLanguage, theDocument);
! 2614: else
! 2615: TtaAppendTextContent (el, ChrString, theDocument);
! 2616: }
! 2617: return el;
! 2618: }
! 2619:
! 2620: /*----------------------------------------------------------------------
! 2621: TypeAttrValue
! 2622: Value val has been read for the HTML attribute TYPE.
! 2623: Create a child for the current Thot element INPUT accordingly.
! 2624: ----------------------------------------------------------------------*/
! 2625: #ifdef __STDC__
! 2626: static void TypeAttrValue (CHAR_T* val)
! 2627: #else
! 2628: static void TypeAttrValue (val)
! 2629: CHAR_T* val;
! 2630:
! 2631: #endif
! 2632: {
! 2633: ElementType elType;
! 2634: Element newChild;
! 2635: AttributeType attrType;
! 2636: Attribute attr;
! 2637: CHAR_T msgBuffer[MaxMsgLength];
! 2638: int value;
! 2639:
! 2640: value = MapAttrValue (DummyAttribute, val);
! 2641: if (value < 0)
! 2642: {
! 2643: if (ustrlen (val) > MaxMsgLength - 40)
! 2644: val[MaxMsgLength - 40] = WC_EOS;
! 2645: usprintf (msgBuffer, TEXT("Unknown attribute value \"type = %s\""), val);
! 2646: ParseHTMLError (theDocument, msgBuffer);
! 2647: attrType.AttrSSchema = DocumentSSchema;
! 2648: attrType.AttrTypeNum = pHTMLAttributeMapping[0].ThotAttribute;
! 2649: usprintf (msgBuffer, TEXT("type=%s"), val);
! 2650: CreateAttr (lastElement, attrType, msgBuffer, TRUE);
! 2651: }
! 2652: else
! 2653: {
! 2654: elType = TtaGetElementType (lastElement);
! 2655: if (elType.ElTypeNum != HTML_EL_Input)
! 2656: {
! 2657: if (ustrlen (val) > MaxMsgLength - 40)
! 2658: val[MaxMsgLength - 40] = WC_EOS;
! 2659: usprintf (msgBuffer, TEXT("Duplicate attribute \"type = %s\""), val);
! 2660: }
! 2661: else
! 2662: {
! 2663: elType.ElSSchema = DocumentSSchema;
! 2664: elType.ElTypeNum = value;
! 2665: newChild = TtaNewTree (theDocument, elType, "");
! 2666: TtaSetElementLineNumber (newChild, NumberOfLinesRead);
! 2667: TtaInsertFirstChild (&newChild, lastElement, theDocument);
! 2668: if (value == HTML_EL_PICTURE_UNIT)
! 2669: {
! 2670: /* add the attribute IsInput to input pictures */
! 2671: attrType.AttrSSchema = elType.ElSSchema;
! 2672: attrType.AttrTypeNum = HTML_ATTR_IsInput;
! 2673: attr = TtaNewAttribute (attrType);
! 2674: TtaAttachAttribute (newChild, attr, theDocument);
! 2675: }
! 2676: }
! 2677: }
! 2678: }
! 2679:
! 2680: /*----------------------------------------------------------------------
! 2681: EndOfAttrName
! 2682: A XHTML attribute has been read. Create the
! 2683: corresponding Thot attribute.
! 2684: ----------------------------------------------------------------------*/
! 2685: #ifdef __STDC__
! 2686: static void EndOfAttrName (CHAR_T *attrName)
! 2687: #else
! 2688: static void EndOfAttrName (attrName)
! 2689: CHAR_T *attrName;
! 2690:
! 2691: #endif
! 2692: {
! 2693: AttributeMapping* tableEntry;
! 2694: AttributeType attrType;
! 2695: ElementType elType;
! 2696: Element child;
! 2697: Attribute attr;
! 2698: SSchema schema;
! 2699: CHAR_T translation;
! 2700: CHAR_T msgBuffer[MaxMsgLength];
! 2701:
! 2702: /* if a single '/' or '?' has been read instead of an attribute name, ignore
! 2703: that character. This is to accept the XML syntax for empty elements or
! 2704: processing instructions, such as <img src="SomeUrl" /> or
! 2705: <?xml version="1.0"?> */
! 2706:
! 2707: /* A FAIRE */
! 2708:
! 2709: /* get the corresponding Thot attribute */
! 2710: if (UnknownTag)
! 2711: /* ignore attributes of unknown tags */
! 2712: tableEntry = NULL;
! 2713: else
! 2714: tableEntry = MapAttr (attrName, &schema,
! 2715: lastElemEntry, theDocument);
! 2716:
! 2717: if (tableEntry == NULL)
! 2718: /* this attribute is not in the HTML mapping table */
! 2719: {
! 2720: if (ustrcasecmp (attrName, TEXT("xmlns")) == 0 ||
! 2721: ustrncasecmp (attrName, TEXT("xmlattrNamens:"), 6) == 0)
! 2722: /* this is a namespace declaration */
! 2723: {
! 2724: lastAttrEntry = NULL;
! 2725: /**** register this namespace ****/;
! 2726: }
! 2727: else
! 2728: if (ustrcasecmp (attrName, TEXT("xml:lang")) == 0)
! 2729: /* attribute xml:lang is not considered as invalid, but it is
! 2730: ignored */
! 2731: lastAttrEntry = NULL;
! 2732: else
! 2733: {
! 2734: if (ustrlen (attrName) > MaxMsgLength - 30)
! 2735: attrName[MaxMsgLength - 30] = WC_EOS;
! 2736: usprintf (msgBuffer,
! 2737: TEXT("Unknown attribute \"%s\""),
! 2738: attrName);
! 2739: ParseHTMLError (theDocument, msgBuffer);
! 2740: /* attach an Invalid_attribute to the current element */
! 2741: tableEntry = &pHTMLAttributeMapping[0];
! 2742: schema = DocumentSSchema;
! 2743: UnknownAttr = TRUE;
! 2744: }
! 2745: }
! 2746: else
! 2747: UnknownAttr = FALSE;
! 2748:
! 2749:
! 2750: if (tableEntry != NULL &&
! 2751: lastElement != NULL &&
! 2752: (!lastElementClosed || (lastElement != rootElement)))
! 2753: {
! 2754: lastAttrEntry = tableEntry;
! 2755: translation = lastAttrEntry->AttrOrContent;
! 2756:
! 2757: switch (translation)
! 2758: {
! 2759: case 'C': /* Content */
! 2760: /* Nothing to do yet: wait for attribute value */
! 2761: break;
! 2762:
! 2763: case 'A':
! 2764: /* create an attribute for current element */
! 2765: attrType.AttrSSchema = schema;
! 2766: attrType.AttrTypeNum = tableEntry->ThotAttribute;
! 2767: CreateAttr (lastElement, attrType, attrName,
! 2768: (ThotBool)(tableEntry == &pHTMLAttributeMapping[0]));
! 2769: if (attrType.AttrTypeNum == HTML_ATTR_HREF_)
! 2770: {
! 2771: elType = TtaGetElementType (lastElement);
! 2772: if (elType.ElTypeNum == HTML_EL_Anchor)
! 2773: /* attribute HREF for element Anchor */
! 2774: /* create attribute PseudoClass = link */
! 2775: {
! 2776: attrType.AttrTypeNum = HTML_ATTR_PseudoClass;
! 2777: attr = TtaNewAttribute (attrType);
! 2778: TtaAttachAttribute (lastElement, attr, theDocument);
! 2779: TtaSetAttributeText (attr, TEXT("link"),
! 2780: lastElement, theDocument);
! 2781: }
! 2782: }
! 2783: else if (attrType.AttrTypeNum == HTML_ATTR_Checked)
! 2784: {
! 2785: /* create Default-Checked attribute */
! 2786: child = TtaGetFirstChild (lastElement);
! 2787: if (child != NULL)
! 2788: {
! 2789: attrType.AttrSSchema = DocumentSSchema;
! 2790: attrType.AttrTypeNum = HTML_ATTR_DefaultChecked;
! 2791: attr = TtaNewAttribute (attrType);
! 2792: TtaAttachAttribute (child, attr, theDocument);
! 2793: TtaSetAttributeValue (attr, HTML_ATTR_DefaultChecked_VAL_Yes_,
! 2794: child, theDocument);
! 2795: }
! 2796: }
! 2797: else
! 2798: if (attrType.AttrTypeNum == HTML_ATTR_Selected)
! 2799: {
! 2800: /* create Default-Selected attribute */
! 2801: attrType.AttrSSchema = DocumentSSchema;
! 2802: attrType.AttrTypeNum = HTML_ATTR_DefaultSelected;
! 2803: attr = TtaNewAttribute (attrType);
! 2804: TtaAttachAttribute (lastElement, attr, theDocument);
! 2805: TtaSetAttributeValue (attr, HTML_ATTR_DefaultSelected_VAL_Yes_,
! 2806: lastElement, theDocument);
! 2807: }
! 2808: break;
! 2809:
! 2810: case SPACE:
! 2811: /* nothing to do */
! 2812: break;
! 2813:
! 2814: default:
! 2815: break;
! 2816: }
! 2817: }
! 2818: }
! 2819:
! 2820: /*----------------------------------------------------------------------
! 2821: EndOfAttrValue
! 2822: An attribute value has been read from the HTML file.
! 2823: Put that value in the current Thot attribute.
! 2824: ----------------------------------------------------------------------*/
! 2825: #ifdef __STDC__
! 2826: static void EndOfAttrValue (CHAR_T *attrValue)
! 2827: #else
! 2828: static void EndOfAttrValue (attrValue)
! 2829: CHAR_T *attrValue;
! 2830:
! 2831: #endif
! 2832: {
! 2833: AttributeType attrType, attrType1;
! 2834: Attribute attr;
! 2835: ElementType elType;
! 2836: Element child;
! 2837: Language lang;
! 2838: CHAR_T translation;
! 2839: char shape;
! 2840: STRING buffer;
! 2841: STRING attrName;
! 2842: int val;
! 2843: int length;
! 2844: int attrKind;
! 2845: ThotBool done;
! 2846: CHAR_T msgBuffer[MaxMsgLength];
! 2847:
! 2848: ReadingAnAttrValue = FALSE;
! 2849:
! 2850: if (UnknownAttr)
! 2851: /* this is the end of value of an invalid attribute. Keep the */
! 2852: /* quote character that ends the value for copying it into the */
! 2853: /* Invalid_attribute. */
! 2854: {
! 2855: /* What to do in thos case ? */
! 2856: }
! 2857:
! 2858: if (lastAttrEntry == NULL)
! 2859: {
! 2860: return;
! 2861: }
! 2862:
! 2863: done = FALSE;
! 2864: if (lastElementClosed && (lastElement == rootElement))
! 2865: /* an attribute after the tag </html>, ignore it */
! 2866: done = TRUE;
! 2867:
! 2868: else
! 2869: /* treatments of some particular HTML attributes */
! 2870: if (!ustrcmp (lastAttrEntry->XMLattribute, TEXT("style")))
! 2871: {
! 2872: # ifndef STANDALONE
! 2873: TtaSetAttributeText (lastAttribute, attrValue,
! 2874: lastAttrElement, theDocument);
! 2875: ParseHTMLSpecificStyle (lastElement, attrValue,
! 2876: theDocument, FALSE);
! 2877: # endif
! 2878: done = TRUE;
! 2879: }
! 2880: # ifndef STANDALONE
! 2881: else
! 2882: {
! 2883: if (!ustrcmp (lastAttrEntry->XMLattribute, TEXT("link")))
! 2884: HTMLSetAlinkColor (theDocument, attrValue);
! 2885: else
! 2886: if (!ustrcmp (lastAttrEntry->XMLattribute, TEXT("alink")))
! 2887: HTMLSetAactiveColor (theDocument, attrValue);
! 2888: else
! 2889: if (!ustrcmp (lastAttrEntry->XMLattribute, TEXT("vlink")))
! 2890: HTMLSetAvisitedColor (theDocument, attrValue);
! 2891: }
! 2892: # endif
! 2893:
! 2894: if (!done)
! 2895: {
! 2896: val = 0;
! 2897: translation = lastAttrEntry->AttrOrContent;
! 2898:
! 2899: switch (translation)
! 2900: {
! 2901: case 'C': /* Content */
! 2902: child = PutInContent (attrValue);
! 2903: if (child != NULL)
! 2904: TtaAppendTextContent (child, TEXT("\" "), theDocument);
! 2905: break;
! 2906:
! 2907: case 'A':
! 2908: if (lastAttribute != NULL)
! 2909: {
! 2910: TtaGiveAttributeType (lastAttribute, &attrType, &attrKind);
! 2911: switch (attrKind)
! 2912: {
! 2913: case 0: /* enumerate */
! 2914: val = MapAttrValue (lastAttrEntry->ThotAttribute, attrValue);
! 2915: if (val < 0)
! 2916: {
! 2917: TtaGiveAttributeType (lastAttribute, &attrType, &attrKind);
! 2918: attrName = TtaGetAttributeName (attrType);
! 2919: if (ustrlen (attrValue) > MaxMsgLength - 30)
! 2920: attrValue[MaxMsgLength - 30] = WC_EOS;
! 2921: usprintf (msgBuffer,
! 2922: TEXT("Unknown attribute value \"%s = %s\""),
! 2923: attrName, attrValue);
! 2924: ParseHTMLError (theDocument, msgBuffer);
! 2925: /* remove the attribute and replace it by an */
! 2926: /* Invalid_attribute */
! 2927: TtaRemoveAttribute (lastAttrElement,
! 2928: lastAttribute, theDocument);
! 2929: attrType.AttrSSchema = DocumentSSchema;
! 2930: attrType.AttrTypeNum = pHTMLAttributeMapping[0].ThotAttribute;
! 2931: usprintf (msgBuffer, TEXT("%s=%s"), attrName, attrValue);
! 2932: CreateAttr (lastAttrElement, attrType, msgBuffer, TRUE);
! 2933: }
! 2934: else
! 2935: TtaSetAttributeValue (lastAttribute, val,
! 2936: lastAttrElement, theDocument);
! 2937: break;
! 2938: case 1: /* integer */
! 2939: if (attrType.AttrTypeNum == HTML_ATTR_Border &&
! 2940: !ustrcasecmp (attrValue, TEXT("border")))
! 2941: {
! 2942: /* border="border" for a table */
! 2943: val = 1;
! 2944: TtaSetAttributeValue (lastAttribute, val,
! 2945: lastAttrElement, theDocument);
! 2946: }
! 2947: else
! 2948: if (usscanf (attrValue, TEXT("%d"), &val))
! 2949: TtaSetAttributeValue (lastAttribute, val,
! 2950: lastAttrElement, theDocument);
! 2951: else
! 2952: {
! 2953: TtaRemoveAttribute (lastAttrElement,
! 2954: lastAttribute, theDocument);
! 2955: usprintf (msgBuffer,
! 2956: TEXT("Invalid attribute value \"%s\""),
! 2957: attrValue);
! 2958: ParseHTMLError (theDocument, msgBuffer);
! 2959: }
! 2960: break;
! 2961: case 2: /* text */
! 2962: if (!UnknownAttr)
! 2963: {
! 2964: TtaSetAttributeText (lastAttribute, attrValue,
! 2965: lastAttrElement, theDocument);
! 2966: if (attrType.AttrTypeNum == HTML_ATTR_Langue)
! 2967: {
! 2968: /* it's a LANG attribute value */
! 2969: lang = TtaGetLanguageIdFromName (attrValue);
! 2970: if (lang == 0)
! 2971: {
! 2972: usprintf (msgBuffer,
! 2973: TEXT("Unknown language: %s"),
! 2974: attrValue);
! 2975: ParseHTMLError (theDocument, msgBuffer);
! 2976: }
! 2977: else
! 2978: {
! 2979: /* change current language */
! 2980: currentLanguage = lang;
! 2981: LanguageStack[StackLevel - 1] = currentLanguage;
! 2982: }
! 2983: if (!TtaGetParent (lastAttrElement))
! 2984: /* it's a LANG attribute on the root element */
! 2985: /* set the RealLang attribute */
! 2986: {
! 2987: attrType1.AttrSSchema = DocumentSSchema;
! 2988: attrType1.AttrTypeNum = HTML_ATTR_RealLang;
! 2989: attr = TtaNewAttribute (attrType1);
! 2990: TtaAttachAttribute (lastAttrElement,
! 2991: attr, theDocument);
! 2992: TtaSetAttributeValue (attr,
! 2993: HTML_ATTR_RealLang_VAL_Yes_,
! 2994: lastAttrElement, theDocument);
! 2995: }
! 2996: }
! 2997: }
! 2998: else
! 2999: /* this is the content of an invalid attribute */
! 3000: /* append it to the current Invalid_attribute */
! 3001: {
! 3002: length = ustrlen (attrValue) + 2;
! 3003: length += TtaGetTextAttributeLength (lastAttribute);
! 3004: buffer = TtaAllocString (length + 1);
! 3005: TtaGiveTextAttributeValue (lastAttribute,
! 3006: buffer, &length);
! 3007: ustrcat (buffer, TEXT("="));
! 3008: ustrcat (buffer, attrValue);
! 3009: TtaSetAttributeText (lastAttribute, buffer,
! 3010: lastAttrElement, theDocument);
! 3011: TtaFreeMemory (buffer);
! 3012: }
! 3013: break;
! 3014: case 3: /* reference */
! 3015: break;
! 3016: }
! 3017: }
! 3018: break;
! 3019:
! 3020: case SPACE:
! 3021: TypeAttrValue (attrValue);
! 3022: break;
! 3023:
! 3024: default:
! 3025: break;
! 3026: }
! 3027:
! 3028: if (lastAttrEntry->ThotAttribute == HTML_ATTR_Width__)
! 3029: /* HTML attribute "width" for a table or a hr */
! 3030: /* create the corresponding attribute IntWidthPercent or */
! 3031: /* IntWidthPxl */
! 3032: CreateAttrWidthPercentPxl (attrValue,
! 3033: lastAttrElement, theDocument, -1);
! 3034: else
! 3035: if (!ustrcmp (lastAttrEntry->XMLattribute, TEXT("size")))
! 3036: {
! 3037: TtaGiveAttributeType (lastAttribute, &attrType, &attrKind);
! 3038: if (attrType.AttrTypeNum == HTML_ATTR_Font_size)
! 3039: CreateAttrIntSize (attrValue,
! 3040: lastAttrElement,
! 3041: theDocument);
! 3042: }
! 3043: else
! 3044: if (!ustrcmp (lastAttrEntry->XMLattribute, TEXT("shape")))
! 3045: {
! 3046: child = TtaGetFirstChild (lastAttrElement);
! 3047: if (child != NULL)
! 3048: {
! 3049: switch (val)
! 3050: {
! 3051: case HTML_ATTR_shape_VAL_rectangle:
! 3052: shape = 'R';
! 3053: break;
! 3054: case HTML_ATTR_shape_VAL_circle:
! 3055: shape = 'a';
! 3056: break;
! 3057: case HTML_ATTR_shape_VAL_polygon:
! 3058: shape = 'p';
! 3059: break;
! 3060: default:
! 3061: shape = SPACE;
! 3062: break;
! 3063: }
! 3064: TtaSetGraphicsShape (child, shape, theDocument);
! 3065: }
! 3066: }
! 3067: else
! 3068: if (!ustrcmp (lastAttrEntry->XMLattribute, TEXT("value")))
! 3069: {
! 3070: elType = TtaGetElementType (lastAttrElement);
! 3071: if (elType.ElTypeNum == HTML_EL_Text_Input ||
! 3072: elType.ElTypeNum == HTML_EL_Password_Input ||
! 3073: elType.ElTypeNum == HTML_EL_File_Input ||
! 3074: elType.ElTypeNum == HTML_EL_Input)
! 3075: /* create a Default_Value attribute with the same content */
! 3076: {
! 3077: attrType1.AttrSSchema = attrType.AttrSSchema;
! 3078: attrType1.AttrTypeNum = HTML_ATTR_Default_Value;
! 3079: attr = TtaNewAttribute (attrType1);
! 3080: TtaAttachAttribute (lastAttrElement, attr, theDocument);
! 3081: TtaSetAttributeText (attr, attrValue,
! 3082: lastAttrElement, theDocument);
! 3083: }
! 3084: }
! 3085: #ifndef STANDALONE
! 3086: /* Some HTML attributes are equivalent to a CSS property: */
! 3087: /* background -> background */
! 3088: /* bgcolor -> background */
! 3089: /* text -> color */
! 3090: /* color -> color */
! 3091: else
! 3092: if (!ustrcmp (lastAttrEntry->XMLattribute, TEXT("background")))
! 3093: {
! 3094: if (ustrlen (attrValue) > MaxMsgLength - 30)
! 3095: attrValue[MaxMsgLength - 30] = WC_EOS;
! 3096: usprintf (msgBuffer,
! 3097: TEXT("background: url(%s)"),
! 3098: attrValue );
! 3099: ParseHTMLSpecificStyle (lastElement, msgBuffer,
! 3100: theDocument, FALSE);
! 3101: }
! 3102: else
! 3103: if (!ustrcmp (lastAttrEntry->XMLattribute, TEXT("bgcolor")))
! 3104: HTMLSetBackgroundColor (theDocument,
! 3105: lastElement, attrValue);
! 3106: else
! 3107: if (!ustrcmp (lastAttrEntry->XMLattribute, TEXT("text")) ||
! 3108: !ustrcmp (lastAttrEntry->XMLattribute, TEXT("color")))
! 3109: HTMLSetForegroundColor (theDocument,
! 3110: lastElement, attrValue);
! 3111: #endif /* !STANDALONE */
! 3112: }
! 3113: }
! 3114: /*-------------------- Attributes (end) ---------------------*/
! 3115:
! 3116:
! 3117: /*-------------------- Entities (start) ---------------------*/
! 3118:
! 3119: /*----------------------------------------------------------------------
! 3120: PutNonISOlatin1Char put a Unicode character in the input buffer.
! 3121: ----------------------------------------------------------------------*/
! 3122: #ifdef __STDC__
! 3123: static void PutNonISOlatin1Char (int code,
! 3124: STRING prefix,
! 3125: STRING entityName)
! 3126: #else
! 3127: static void PutNonISOlatin1Char (code,
! 3128: prefix,
! 3129: entityName)
! 3130: int code;
! 3131: STRING prefix;
! 3132: STRING entityName;
! 3133: #endif
! 3134: {
! 3135: Language lang, l;
! 3136: ElementType elType;
! 3137: Element elText;
! 3138: AttributeType attrType;
! 3139: Attribute attr;
! 3140: CHAR_T buffer[MaxEntityLength+10];
! 3141:
! 3142: if (ReadingAnAttrValue)
! 3143: /* this entity belongs to an attribute value */
! 3144: {
! 3145: /* Thot can't mix different languages in the same attribute value */
! 3146: /* just discard that character */
! 3147: ;
! 3148: }
! 3149: else
! 3150: /* this entity belongs to the element contents */
! 3151: {
! 3152: MergeText = FALSE;
! 3153: /* create a new text leaf */
! 3154: elType.ElSSchema = DocumentSSchema;
! 3155: elType.ElTypeNum = HTML_EL_TEXT_UNIT;
! 3156: elText = TtaNewElement (theDocument, elType);
! 3157: TtaSetElementLineNumber (elText, NumberOfLinesRead);
! 3158: InsertElement (&elText);
! 3159: lastElementClosed = TRUE;
! 3160: /* try to find a fallback character */
! 3161: l = currentLanguage;
! 3162: GetFallbackCharacter (code, buffer, &lang);
! 3163: /* put that fallback character in the new text leaf */
! 3164: TtaSetTextContent (elText, buffer, lang, theDocument);
! 3165: currentLanguage = l;
! 3166: /* make that text leaf read-only */
! 3167: TtaSetAccessRight (elText, ReadOnly, theDocument);
! 3168: /* associate an attribute EntityName with the new text leaf */
! 3169: attrType.AttrSSchema = DocumentSSchema;
! 3170: attrType.AttrTypeNum = HTML_ATTR_EntityName;
! 3171: attr = TtaNewAttribute (attrType);
! 3172: TtaAttachAttribute (elText, attr, theDocument);
! 3173: ustrcpy (buffer, prefix);
! 3174: ustrcat (buffer, entityName);
! 3175: TtaSetAttributeText (attr, buffer, elText, theDocument);
! 3176: MergeText = FALSE;
! 3177: }
! 3178: }
! 3179:
! 3180: /*---------------------------------------------------------------------------
! 3181: XHTMLMapEntity
! 3182: Search that entity in the entity table and return the corresponding value.
! 3183: ---------------------------------------------------------------------------*/
! 3184: #ifdef __STDC__
! 3185: void XHTMLMapEntity (STRING entityName,
! 3186: STRING entityValue,
! 3187: int valueLength,
! 3188: STRING alphabet)
! 3189: #else
! 3190: void XHTMLMapEntity (entityName,
! 3191: entityValue,
! 3192: valueLength,
! 3193: alphabet)
! 3194: STRING entityName;
! 3195: STRING entityValue;
! 3196: int valueLength;
! 3197: STRING alphabet;
! 3198:
! 3199: #endif
! 3200:
! 3201: {
! 3202: int i;
! 3203: ThotBool found = FALSE;
! 3204:
! 3205: i = 0;
! 3206: while ((XHTMLEntityTable[i].charName[0] < entityName[0]) &&
! 3207: (XHTMLEntityTable[i].charCode != 0))
! 3208: i++;
! 3209:
! 3210: while ((XHTMLEntityTable[i].charName[0] == entityName[0]) &&
! 3211: (XHTMLEntityTable[i].charCode != 0 &&
! 3212: !found))
! 3213: {
! 3214: if (!ustrcmp (entityName, XHTMLEntityTable[i].charName))
! 3215: found = TRUE;
! 3216: else
! 3217: i++;
! 3218: }
! 3219: }
! 3220:
! 3221: /*----------------------------------------------------------------------
! 3222: XHTMLEntityCreated
! 3223: A XTHML entity has been created by the XML parser.
! 3224: ----------------------------------------------------------------------*/
! 3225: #ifdef __STDC__
! 3226: void XHTMLEntityCreated (USTRING entityValue,
! 3227: Language lang,
! 3228: STRING entityName,
! 3229: Document doc)
! 3230: #else
! 3231: void XHTMLEntityCreated (entityValue,
! 3232: lang,
! 3233: entityName,
! 3234: doc)
! 3235: USTRING entityValue;
! 3236: Language lang;
! 3237: STRING entityName;
! 3238: Document doc;
! 3239:
! 3240: #endif
! 3241: {
! 3242: }
! 3243:
! 3244: /*-------------------- Entities (end) ---------------------*/
! 3245:
Webmaster