/* * * (c) COPYRIGHT MIT and INRIA, 1996. * Please first read the full copyright statement in file COPYRIGHT. * */ /* * * Builds the corresponding abstract tree for a Thot document of type HTML. * * Authors: L. Carcone * V. Quint */ #define THOT_EXPORT extern #include "amaya.h" #include "css.h" #include "parser.h" #include "HTML.h" #include "css_f.h" #include "fetchXMLname_f.h" #include "html2thot_f.h" #include "HTMLactions_f.h" #include "HTMLedit_f.h" #include "HTMLform_f.h" #include "HTMLimage_f.h" #include "HTMLtable_f.h" #include "HTMLimage_f.h" #include "UIcss_f.h" #include "styleparser_f.h" #include "XHTMLbuilder_f.h" #include "Xml2thot_f.h" /* maximum length of a Thot structure schema name */ #define MAX_SS_NAME_LENGTH 32 XmlEntity XhtmlEntityTable[] = { /* This table MUST be in alphabetical order */ {TEXT("AElig"), 198, TEXT(' ')}, /* latin capital letter AE = */ /* latin capital ligature AE, U+00C6 ISOlat1 */ {TEXT("Aacute"), 193, TEXT(' ')}, /* latin capital letter A with acute, U+00C1 ISOlat1 */ {TEXT("Acirc"), 194, TEXT(' ')}, /* latin capital letter A with circumflex, U+00C2 ISOlat1 */ {TEXT("Agrave"), 192, TEXT(' ')}, /* latin capital letter A with grave = */ /* latin capital letter A grave, U+00C0 ISOlat1 */ {TEXT("Alpha"), 913, TEXT(' ')}, /* greek capital letter alpha, U+0391 */ {TEXT("Aring"), 197, TEXT(' ')}, /* latin capital letter A with ring above = */ /* latin capital letter A ring, U+00C5 ISOlat1 */ {TEXT("Atilde"), 195, TEXT(' ')}, /* latin capital letter A with tilde, U+00C3 ISOlat1 */ {TEXT("Auml"), 196, TEXT(' ')}, /* latin capital letter A with diaeresis, U+00C4 ISOlat1 */ {TEXT("Beta"), 914, TEXT(' ')}, /* greek capital letter beta, U+0392 */ {TEXT("Ccedil"), 199, TEXT(' ')}, /* latin capital letter C with cedilla, U+00C7 ISOlat1 */ {TEXT("Chi"), 935, TEXT(' ')}, /* greek capital letter chi, U+03A7 */ {TEXT("Dagger"), 8225, TEXT(' ')}, /* double dagger, U+2021 ISOpub */ {TEXT("Delta"), 916, TEXT(' ')}, /* greek capital letter delta, U+0394 ISOgrk3 */ {TEXT("ETH"), 208, TEXT(' ')}, /* latin capital letter ETH, U+00D0 ISOlat1 */ {TEXT("Eacute"), 201, TEXT(' ')}, /* latin capital letter E with acute, U+00C9 ISOlat1 */ {TEXT("Ecirc"), 202, TEXT(' ')}, /* latin capital letter E with circumflex, U+00CA ISOlat1 */ {TEXT("Egrave"), 200, TEXT(' ')}, /* latin capital letter E with grave, U+00C8 ISOlat1 */ {TEXT("Epsilon"), 917, TEXT(' ')}, /* greek capital letter epsilon, U+0395 */ {TEXT("Eta"), 919, TEXT(' ')}, /* greek capital letter eta, U+0397 */ {TEXT("Euml"), 203, TEXT(' ')}, /* latin capital letter E with diaeresis, U+00CB ISOlat1 */ {TEXT("Gamma"), 915, TEXT(' ')}, /* greek capital letter gamma, U+0393 ISOgrk3 */ {TEXT("Iacute"), 205, TEXT(' ')}, /* latin capital letter I with acute, U+00CD ISOlat1 */ {TEXT("Icirc"), 206, TEXT(' ')}, /* latin capital letter I with circumflex, U+00CE ISOlat1 */ {TEXT("Igrave"), 204, TEXT(' ')}, /* latin capital letter I with grave, U+00CC ISOlat1 */ {TEXT("Iota"), 921, TEXT(' ')}, /* greek capital letter iota, U+0399 */ {TEXT("Iuml"), 207, TEXT(' ')}, /* latin capital letter I with diaeresis, U+00CF ISOlat1 */ {TEXT("Kappa"), 922, TEXT(' ')}, /* greek capital letter kappa, U+039A */ {TEXT("Lambda"), 923, TEXT(' ')}, /* greek capital letter lambda, U+039B ISOgrk3 */ {TEXT("Mu"), 924, TEXT(' ')}, /* greek capital letter mu, U+039C */ {TEXT("Ntilde"), 209, TEXT(' ')}, /* latin capital letter N with tilde, U+00D1 ISOlat1 */ {TEXT("Nu"), 925, TEXT(' ')}, /* greek capital letter nu, U+039D */ {TEXT("OElig"), 338, TEXT(' ')}, /* latin capital ligature OE, U+0152 ISOlat2 */ {TEXT("Oacute"), 211, TEXT(' ')}, /* latin capital letter O with acute, U+00D3 ISOlat1 */ {TEXT("Ocirc"), 212, TEXT(' ')}, /* latin capital letter O with circumflex, U+00D4 ISOlat1 */ {TEXT("Ograve"), 210, TEXT(' ')}, /* latin capital letter O with grave, U+00D2 ISOlat1 */ {TEXT("Omega"), 937, TEXT(' ')}, /* greek capital letter omega, U+03A9 ISOgrk3 */ {TEXT("Omicron"), 927, TEXT(' ')}, /* greek capital letter omicron, U+039F */ {TEXT("Oslash"), 216, TEXT(' ')}, /* latin capital letter O with stroke = */ /* latin capital letter O slash, U+00D8 ISOlat1 */ {TEXT("Otilde"), 213, TEXT(' ')}, /* latin capital letter O with tilde, U+00D5 ISOlat1 */ {TEXT("Ouml"), 214, TEXT(' ')}, /* latin capital letter O with diaeresis, U+00D6 ISOlat1 */ {TEXT("Phi"), 934, TEXT(' ')}, /* greek capital letter phi, U+03A6 ISOgrk3 */ {TEXT("Pi"), 928, TEXT(' ')}, /* greek capital letter pi, U+03A0 ISOgrk3 */ {TEXT("Prime"), 8243, TEXT(' ')}, /* double prime = seconds = inches, U+2033 ISOtech */ {TEXT("Psi"), 936, TEXT(' ')}, /* greek capital letter psi, U+03A8 ISOgrk3 */ {TEXT("Rho"), 929, TEXT(' ')}, /* greek capital letter rho, U+03A1 */ {TEXT("Scaron"), 352, TEXT(' ')}, /* latin capital letter S with caron, U+0160 ISOlat2 */ {TEXT("Sigma"), 931, TEXT(' ')}, /* greek capital letter sigma, U+03A3 ISOgrk3 */ {TEXT("THORN"), 222, TEXT(' ')}, /* latin capital letter THORN, U+00DE ISOlat1 */ {TEXT("Tau"), 932, TEXT(' ')}, /* greek capital letter tau, U+03A4 */ {TEXT("Theta"), 920, TEXT(' ')}, /* greek capital letter theta, U+0398 ISOgrk3 */ {TEXT("Uacute"), 218, TEXT(' ')}, /* latin capital letter U with acute, U+00DA ISOlat1 */ {TEXT("Ucirc"), 219, TEXT(' ')}, /* latin capital letter U with circumflex, U+00DB ISOlat1 */ {TEXT("Ugrave"), 217, TEXT(' ')}, /* latin capital letter U with grave, U+00D9 ISOlat1 */ {TEXT("Upsilon"), 933, TEXT(' ')}, /* greek capital letter upsilon, U+03A5 ISOgrk3 */ {TEXT("Uuml"), 220, TEXT(' ')}, /* latin capital letter U with diaeresis, U+00DC ISOlat1 */ {TEXT("Xi"), 926, TEXT(' ')}, /* greek capital letter xi, U+039E ISOgrk3 */ {TEXT("Yacute"), 221, TEXT(' ')}, /* latin capital letter Y with acute, U+00DD ISOlat1 */ {TEXT("Yuml"), 376, TEXT(' ')}, /* latin capital letter Y with diaeresis, U+0178 ISOlat2 */ {TEXT("Zeta"), 918, TEXT(' ')}, /* greek capital letter zeta, U+0396 */ {TEXT("aacute"), 225, TEXT(' ')}, /* latin small letter a with acute, U+00E1 ISOlat1 */ {TEXT("acirc"), 226, TEXT(' ')}, /* latin small letter a with circumflex, U+00E2 ISOlat1 */ {TEXT("acute"), 180, TEXT(' ')}, /* acute accent = spacing acute, U+00B4 ISOdia */ {TEXT("aelig"), 230, TEXT(' ')}, /* latin small letter ae = */ /* latin small ligature ae, U+00E6 ISOlat1 */ {TEXT("agrave"), 224, TEXT(' ')}, /* latin small letter a with grave = */ /* latin small letter a grave, U+00E0 ISOlat1 */ {TEXT("alefsym"), 8501, TEXT(' ')}, /* alef symbol = first transfinite cardinal, U+2135 NEW */ {TEXT("alpha"), 945, TEXT(' ')}, /* greek small letter alpha, U+03B1 ISOgrk3 */ {TEXT("amp"), 38, TEXT(' ')}, /* ampersand, U+0026 ISOnum */ {TEXT("and"), 8743, TEXT(' ')}, /* logical and = wedge, U+2227 ISOtech */ {TEXT("ang"), 8736, TEXT(' ')}, /* angle, U+2220 ISOamso */ {TEXT("aring"), 229, TEXT(' ')}, /* latin small letter a with ring above = */ /* latin small letter a ring, U+00E5 ISOlat1 */ {TEXT("asymp"), 8776, TEXT(' ')}, /* almost equal to = asymptotic to, U+2248 ISOamsr */ {TEXT("atilde"), 227, TEXT(' ')}, /* latin small letter a with tilde, U+00E3 ISOlat1 */ {TEXT("auml"), 228, TEXT(' ')}, /* latin small letter a with diaeresis, U+00E4 ISOlat1 */ {TEXT("bdquo"), 8222}, /* double low-9 quotation mark, U+201E NEW */ {TEXT("beta"), 946, TEXT(' ')}, /* greek small letter beta, U+03B2 ISOgrk3 */ {TEXT("brvbar"), 166, TEXT(' ')}, /* broken bar = broken vertical bar, U+00A6 ISOnum */ {TEXT("bull"), 8226, TEXT(' ')}, /* bullet = black small circle, U+2022 ISOpub */ {TEXT("cap"), 8745, TEXT(' ')}, /* intersection = cap, U+2229 ISOtech */ {TEXT("ccedil"), 231, TEXT(' ')}, /* latin small letter c with cedilla, U+00E7 ISOlat1 */ {TEXT("cedil"), 184, TEXT(' ')}, /* cedilla = spacing cedilla, U+00B8 ISOdia */ {TEXT("cent"), 162, TEXT(' ')}, /* cent sign, U+00A2 ISOnum */ {TEXT("chi"), 967, TEXT(' ')}, /* greek small letter chi, U+03C7 ISOgrk3 */ {TEXT("circ"), 710, TEXT(' ')}, /* modifier letter circumflex accent, U+02C6 ISOpub */ {TEXT("clubs"), 9827, TEXT(' ')}, /* black club suit = shamrock, U+2663 ISOpub */ {TEXT("cong"), 8773, TEXT(' ')}, /* approximately equal to, U+2245 ISOtech */ {TEXT("copy"), 169, TEXT(' ')}, /* copyright sign, U+00A9 ISOnum */ {TEXT("crarr"), 8629, TEXT(' ')}, /* downwards arrow with corner leftwards = */ /* carriage return, U+21B5 NEW */ {TEXT("cup"), 8746, TEXT(' ')}, /* union = cup, U+222A ISOtech */ {TEXT("curren"), 164, TEXT(' ')}, /* currency sign, U+00A4 ISOnum */ {TEXT("dArr"), 8659, TEXT(' ')}, /* downwards double arrow, U+21D3 ISOamsa */ {TEXT("dagger"), 8224, TEXT(' ')}, /* dagger, U+2020 ISOpub */ {TEXT("darr"), 8595, TEXT(' ')}, /* downwards arrow, U+2193 ISOnum */ {TEXT("deg"), 176, TEXT(' ')}, /* degree sign, U+00B0 ISOnum */ {TEXT("delta"), 948, TEXT(' ')}, /* greek small letter delta, U+03B4 ISOgrk3 */ {TEXT("diams"), 9830, TEXT(' ')}, /* black diamond suit, U+2666 ISOpub */ {TEXT("divide"), 247, TEXT(' ')}, /* division sign, U+00F7 ISOnum */ {TEXT("eacute"), 233, TEXT(' ')}, /* latin small letter e with acute, U+00E9 ISOlat1 */ {TEXT("ecirc"), 234, TEXT(' ')}, /* latin small letter e with circumflex, U+00EA ISOlat1 */ {TEXT("egrave"), 232, TEXT(' ')}, /* latin small letter e with grave, U+00E8 ISOlat1 */ {TEXT("empty"), 8709, TEXT(' ')}, /* empty set = null set = diameter, U+2205 ISOamso */ {TEXT("emsp"), 8195, TEXT(' ')}, /* em space, U+2003 ISOpub */ {TEXT("ensp"), 8194, TEXT(' ')}, /* en space, U+2002 ISOpub */ {TEXT("epsilon"), 949, TEXT(' ')}, /* greek small letter epsilon, U+03B5 ISOgrk3 */ {TEXT("equiv"), 8801, TEXT(' ')}, /* identical to, U+2261 ISOtech */ {TEXT("eta"), 951, TEXT(' ')}, /* greek small letter eta, U+03B7 ISOgrk3 */ {TEXT("eth"), 240, TEXT(' ')}, /* latin small letter eth, U+00F0 ISOlat1 */ {TEXT("euml"), 235, TEXT(' ')}, /* latin small letter e with diaeresis, U+00EB ISOlat1 */ {TEXT("euro"), 8364, TEXT(' ')}, /* euro sign, U+20AC NEW */ {TEXT("exist"), 8707, TEXT(' ')}, /* there exists, U+2203 ISOtech */ {TEXT("fnof"), 402, TEXT(' ')}, /* latin small f with hook = function = */ /* florin, U+0192 ISOtech */ {TEXT("forall"), 8704, TEXT(' ')}, /* for all, U+2200 ISOtech */ {TEXT("frac12"), 189, TEXT(' ')}, /* vulgar fraction one half = */ /*fraction one half, U+00BD ISOnum */ {TEXT("frac14"), 188, TEXT(' ')}, /* vulgar fraction one quarter = */ /* fraction one quarter, U+00BC ISOnum */ {TEXT("frac34"), 190, TEXT(' ')}, /* vulgar fraction three quarters = */ /* fraction three quarters, U+00BE ISOnum */ {TEXT("frasl"), 8260, TEXT(' ')}, /* fraction slash, U+2044 NEW */ {TEXT("gamma"), 947, TEXT(' ')}, /* greek small letter gamma, U+03B3 ISOgrk3 */ {TEXT("ge"), 8805, TEXT(' ')}, /* greater-than or equal to, U+2265 ISOtech */ {TEXT("gt"), 62, TEXT(' ')}, /* greater-than sign, U+003E ISOnum */ {TEXT("hArr"), 8660, TEXT(' ')}, /* left right double arrow, U+21D4 ISOamsa */ {TEXT("harr"), 8596, TEXT(' ')}, /* left right arrow, U+2194 ISOamsa */ {TEXT("hearts"), 9829, TEXT(' ')}, /* black heart suit = valentine, U+2665 ISOpub */ {TEXT("hellip"), 8230, TEXT(' ')}, /* horizontal ellipsis = three dot leader, U+2026 ISOpub */ {TEXT("hyphen"), 173, TEXT(' ')}, /* hyphen = discretionary hyphen, U+00AD ISOnum */ {TEXT("iacute"), 237, TEXT(' ')}, /* latin small letter i with acute, U+00ED ISOlat1 */ {TEXT("icirc"), 238, TEXT(' ')}, /* latin small letter i with circumflex, U+00EE ISOlat1 */ {TEXT("iexcl"), 161, TEXT(' ')}, /* inverted exclamation mark, U+00A1 ISOnum */ {TEXT("igrave"), 236, TEXT(' ')}, /* latin small letter i with grave, U+00EC ISOlat1 */ {TEXT("image"), 8465, TEXT(' ')}, /* blackletter capital I = imaginary part, U+2111 ISOamso */ {TEXT("infin"), 8734, TEXT(' ')}, /* infinity, U+221E ISOtech */ {TEXT("int"), 8747, TEXT(' ')}, /* integral, U+222B ISOtech */ {TEXT("iota"), 953, TEXT(' ')}, /* greek small letter iota, U+03B9 ISOgrk3 */ {TEXT("iquest"), 191, TEXT(' ')}, /* inverted question mark = */ /* turned question mark, U+00BF ISOnum */ {TEXT("isin"), 8712, TEXT(' ')}, /* element of, U+2208 ISOtech */ {TEXT("iuml"), 239, TEXT(' ')}, /* latin small letter i with diaeresis, U+00EF ISOlat1 */ {TEXT("kappa"), 954, TEXT(' ')}, /* greek small letter kappa, U+03BA ISOgrk3 */ {TEXT("lArr"), 8656, TEXT(' ')}, /* leftwards double arrow, U+21D0 ISOtech */ {TEXT("lambda"), 955, TEXT(' ')}, /* greek small letter lambda, U+03BB ISOgrk3 */ {TEXT("lang"), 9001, TEXT(' ')}, /* left-pointing angle bracket = bra, U+2329 ISOtech */ {TEXT("laquo"), 171, TEXT(' ')}, /* left-pointing double angle quotation mark = */ /* left pointing guillemet, U+00AB ISOnum */ {TEXT("larr"), 8592, TEXT(' ')}, /* leftwards arrow, U+2190 ISOnum */ {TEXT("lceil"), 8968, TEXT(' ')}, /* left ceiling = apl upstile, U+2308 ISOamsc */ {TEXT("ldquo"), 8220, TEXT(' ')}, /* left double quotation mark, U+201C ISOnum */ {TEXT("le"), 8804, TEXT(' ')}, /* less-than or equal to, U+2264 ISOtech */ {TEXT("lfloor"), 8970, TEXT(' ')}, /* left floor = apl downstile, U+230A ISOamsc */ {TEXT("lowast"), 8727, TEXT(' ')}, /* asterisk operator, U+2217 ISOtech */ {TEXT("loz"), 9674, TEXT(' ')}, /* lozenge, U+25CA ISOpub */ {TEXT("lrm"), 8206, TEXT(' ')}, /* left-to-right mark, U+200E NEW RFC 2070 */ {TEXT("lsaquo"), 8249, TEXT(' ')}, /* single left-pointing angle quotation mark, */ /* U+2039 ISO proposed */ {TEXT("lsquo"), 8216, TEXT(' ')}, /* left single quotation mark, U+2018 ISOnum */ {TEXT("lt"), 60, TEXT(' ')}, /* less-than sign, U+003C ISOnum */ {TEXT("macr"), 175, TEXT(' ')}, /* macron = spacing macron = overline = APL overbar, */ /* U+00AF ISOdia */ {TEXT("mdash"), 8212, TEXT(' ')}, /* em dash, U+2014 ISOpub */ {TEXT("micro"), 181, TEXT(' ')}, /* micro sign, U+00B5 ISOnum */ {TEXT("middot"), 183, TEXT(' ')}, /* middle dot = Georgian comma = */ /* Greek middle dot, U+00B7 ISOnum */ {TEXT("minus"), 8722, TEXT(' ')}, /* minus sign, U+2212 ISOtech */ {TEXT("mu"), 956, TEXT(' ')}, /* greek small letter mu, U+03BC ISOgrk3 */ {TEXT("nabla"), 8711, TEXT(' ')}, /* nabla = backward difference, U+2207 ISOtech */ {TEXT("nbsp"), 160, TEXT(' ')}, /* no-break space = non-breaking space, U+00A0 ISOnum */ {TEXT("ndash"), 8211, TEXT(' ')}, /* en dash, U+2013 ISOpub */ {TEXT("ne"), 8800, TEXT(' ')}, /* not equal to, U+2260 ISOtech */ {TEXT("ni"), 8715, TEXT(' ')}, /* contains as member, U+220B ISOtech */ {TEXT("not"), 172, TEXT(' ')}, /* not sign, U+00AC ISOnum */ {TEXT("notin"), 8713, TEXT(' ')}, /* not an element of, U+2209 ISOtech */ {TEXT("nsub"), 8836, TEXT(' ')}, /* not a subset of, U+2284 ISOamsn */ {TEXT("ntilde"), 241, TEXT(' ')}, /* latin small letter n with tilde, U+00F1 ISOlat1 */ {TEXT("nu"), 957, TEXT(' ')}, /* greek small letter nu, U+03BD ISOgrk3 */ {TEXT("oacute"), 243, TEXT(' ')}, /* latin small letter o with acute, U+00F3 ISOlat1 */ {TEXT("ocirc"), 244, TEXT(' ')}, /* latin small letter o with circumflex, U+00F4 ISOlat1 */ {TEXT("oelig"), 339, TEXT(' ')}, /* latin small ligature oe, U+0153 ISOlat2 */ {TEXT("ograve"), 242, TEXT(' ')}, /* latin small letter o with grave, U+00F2 ISOlat1 */ {TEXT("oline"), 8254, TEXT(' ')}, /* overline = spacing overscore, U+203E NEW */ {TEXT("omega"), 969, TEXT(' ')}, /* greek small letter omega, U+03C9 ISOgrk3 */ {TEXT("omicron"), 959, TEXT(' ')}, /* greek small letter omicron, U+03BF NEW */ {TEXT("oplus"), 8853, TEXT(' ')}, /* circled plus = direct sum, U+2295 ISOamsb */ {TEXT("or"), 8744, TEXT(' ')}, /* logical or = vee, U+2228 ISOtech */ {TEXT("ordf"), 170, TEXT(' ')}, /* feminine ordinal indicator, U+00AA ISOnum */ {TEXT("ordm"), 186, TEXT(' ')}, /* masculine ordinal indicator, U+00BA ISOnum */ {TEXT("oslash"), 248, TEXT(' ')}, /* latin small letter o with stroke, = */ /* latin small letter o slash, U+00F8 ISOlat1 */ {TEXT("otilde"), 245, TEXT(' ')}, /* latin small letter o with tilde, U+00F5 ISOlat1 */ {TEXT("otimes"), 8855, TEXT(' ')}, /* circled times = vector product, U+2297 ISOamsb */ {TEXT("ouml"), 246, TEXT(' ')}, /* latin small letter o with diaeresis, U+00F6 ISOlat1 */ {TEXT("para"), 182, TEXT(' ')}, /* pilcrow sign = paragraph sign, U+00B6 ISOnum */ {TEXT("part"), 8706, TEXT(' ')}, /* partial differential, U+2202 ISOtech */ {TEXT("permil"), 8240, TEXT(' ')}, /* per mille sign, U+2030 ISOtech */ {TEXT("perp"), 8869, TEXT(' ')}, /* up tack = orthogonal to = perpendicular, U+22A5 ISOtech */ {TEXT("phi"), 966, TEXT(' ')}, /* greek small letter phi, U+03C6 ISOgrk3 */ {TEXT("pi"), 960, TEXT(' ')}, /* greek small letter pi, U+03C0 ISOgrk3 */ {TEXT("piv"), 982, TEXT(' ')}, /* greek pi symbol, U+03D6 ISOgrk3 */ {TEXT("plusmn"), 177, TEXT(' ')}, /* plus-minus sign = plus-or-minus sign, U+00B1 ISOnum */ {TEXT("pound"), 163, TEXT(' ')}, /* pound sign, U+00A3 ISOnum */ {TEXT("prime"), 8242, TEXT(' ')}, /* prime = minutes = feet, U+2032 ISOtech */ {TEXT("prod"), 8719, TEXT(' ')}, /* n-ary product = product sign, U+220F ISOamsb */ {TEXT("prop"), 8733, TEXT(' ')}, /* proportional to, U+221D ISOtech */ {TEXT("psi"), 968, TEXT(' ')}, /* greek small letter psi, U+03C8 ISOgrk3 */ {TEXT("quot"), 34, TEXT(' ')}, /* quotation mark = APL quote, U+0022 ISOnum */ {TEXT("rArr"), 8658, TEXT(' ')}, /* rightwards double arrow, U+21D2 ISOtech */ {TEXT("radic"), 8730, TEXT(' ')}, /* square root = radical sign, U+221A ISOtech */ {TEXT("rang"), 9002, TEXT(' ')}, /* right-pointing angle bracket = ket, U+232A ISOtech */ {TEXT("raquo"), 187, TEXT(' ')}, /* right-pointing double angle quotation mark = */ /* right pointing guillemet, U+00BB ISOnum */ {TEXT("rarr"), 8594, TEXT(' ')}, /* rightwards arrow, U+2192 ISOnum */ {TEXT("rceil"), 8969, TEXT(' ')}, /* right ceiling, U+2309 ISOamsc */ {TEXT("rdquo"), 8221, TEXT(' ')}, /* right double quotation mark, U+201D ISOnum */ {TEXT("real"), 8476, TEXT(' ')}, /* blackletter capital R = real part symbol, U+211C ISOamso */ {TEXT("reg"), 174, TEXT(' ')}, /* registered sign = registered trade mark sign, */ /* U+00AE ISOnum */ {TEXT("rfloor"), 8971, TEXT(' ')}, /* right floor, U+230B ISOamsc */ {TEXT("rho"), 961, TEXT(' ')}, /* greek small letter rho, U+03C1 ISOgrk3 */ {TEXT("rlm"), 8207, TEXT(' ')}, /* right-to-left mark, U+200F NEW RFC 2070 */ {TEXT("rsaquo"), 8250, TEXT(' ')}, /* single right-pointing angle quotation mark, */ /* U+203A ISO proposed */ {TEXT("rsquo"), 8217, TEXT(' ')}, /* right single quotation mark, U+2019 ISOnum */ {TEXT("sbquo"), 8218, TEXT(' ')}, /* single low-9 quotation mark, U+201A NEW */ {TEXT("scaron"), 353, TEXT(' ')}, /* latin small letter s with caron, U+0161 ISOlat2 */ {TEXT("sdot"), 8901, TEXT(' ')}, /* dot operator, U+22C5 ISOamsb */ {TEXT("sect"), 167, TEXT(' ')}, /* section sign, U+00A7 ISOnum */ {TEXT("shy"), 173, TEXT(' ')}, /* soft hyphen = discretionary hyphen, U+00AD ISOnum */ {TEXT("sigma"), 963, TEXT(' ')}, /* greek small letter sigma, U+03C3 ISOgrk3 */ {TEXT("sigmaf"), 962, TEXT(' ')}, /* greek small letter final sigma, U+03C2 ISOgrk3 */ {TEXT("sim"), 8764, TEXT(' ')}, /* tilde operator = varies with = similar to, U+223C ISOtech */ {TEXT("spades"), 9824, TEXT(' ')}, /* black spade suit, U+2660 ISOpub */ {TEXT("sub"), 8834, TEXT(' ')}, /* subset of, U+2282 ISOtech */ {TEXT("sube"), 8838, TEXT(' ')}, /* subset of or equal to, U+2286 ISOtech */ {TEXT("sum"), 8721, TEXT(' ')}, /* n-ary sumation, U+2211 ISOamsb */ {TEXT("sup"), 8835, TEXT(' ')}, /* superset of, U+2283 ISOtech */ {TEXT("sup1"), 185, TEXT(' ')}, /* superscript one = superscript digit one, U+00B9 ISOnum */ {TEXT("sup2"), 178, TEXT(' ')}, /* superscript two = superscript digit two = squared, */ /* U+00B2 ISOnum */ {TEXT("sup3"), 179, TEXT(' ')}, /* superscript three = superscript digit three = cubed, */ /* U+00B3 ISOnum */ {TEXT("supe"), 8839, TEXT(' ')}, /* superset of or equal to, U+2287 ISOtech */ {TEXT("szlig"), 223, TEXT(' ')}, /* latin small letter sharp s = ess-zed, U+00DF ISOlat1 */ {TEXT("tau"), 964, TEXT(' ')}, /* greek small letter tau, U+03C4 ISOgrk3 */ {TEXT("there4"), 8756, TEXT(' ')}, /* therefore, U+2234 ISOtech */ {TEXT("theta"), 952, TEXT(' ')}, /* greek small letter theta, U+03B8 ISOgrk3 */ {TEXT("thetasym"), 977, TEXT(' ')}, /* greek small letter theta symbol, U+03D1 NEW */ {TEXT("thinsp"), 8201, TEXT(' ')}, /* thin space, U+2009 ISOpub */ {TEXT("thorn"), 254, TEXT(' ')}, /* latin small letter thorn with, U+00FE ISOlat1 */ {TEXT("tilde"), 732, TEXT(' ')}, /* small tilde, U+02DC ISOdia */ {TEXT("times"), 215, TEXT(' ')}, /* multiplication sign, U+00D7 ISOnum */ {TEXT("trade"), 8482, TEXT(' ')}, /* trade mark sign, U+2122 ISOnum */ {TEXT("uArr"), 8657, TEXT(' ')}, /* upwards double arrow, U+21D1 ISOamsa */ {TEXT("uacute"), 250, TEXT(' ')}, /* latin small letter u with acute, U+00FA ISOlat1 */ {TEXT("uarr"), 8593, TEXT(' ')}, /* upwards arrow, U+2191 ISOnum*/ {TEXT("ucirc"), 251, TEXT(' ')}, /* latin small letter u with circumflex, U+00FB ISOlat1 */ {TEXT("ugrave"), 249, TEXT(' ')}, /* latin small letter u with grave, U+00F9 ISOlat1 */ {TEXT("uml"), 168, TEXT(' ')}, /* diaeresis = spacing diaeresis, U+00A8 ISOdia */ {TEXT("upsih"), 978, TEXT(' ')}, /* greek upsilon with hook symbol, U+03D2 NEW */ {TEXT("upsilon"), 965, TEXT(' ')}, /* greek small letter upsilon, U+03C5 ISOgrk3 */ {TEXT("uuml"), 252, TEXT(' ')}, /* latin small letter u with diaeresis, U+00FC ISOlat1 */ {TEXT("weierp"), 8472, TEXT(' ')}, /* script capital P = power set = Weierstrass p, */ /* U+2118 ISOamso */ {TEXT("xi"), 958, TEXT(' ')}, /* greek small letter xi, U+03BE ISOgrk3 */ {TEXT("yacute"), 253, TEXT(' ')}, /* latin small letter y with acute, U+00FD ISOlat1 */ {TEXT("yen"), 165, TEXT(' ')}, /* yen sign = yuan sign, U+00A5 ISOnum */ {TEXT("yuml"), 255, TEXT(' ')}, /* latin small letter y with diaeresis, U+00FF ISOlat1 */ {TEXT("zeta"), 950, TEXT(' ')}, /* greek small letter zeta, U+03B6 ISOgrk3 */ {TEXT("zwj"), 8205, TEXT(' ')}, /* zero width joiner, U+200D NEW RFC 2070 */ {TEXT("zwnj"), 8204, TEXT(' ')}, /* zero width non-joiner, U+200C NEW RFC 2070 */ {TEXT("zzzz"), 0, TEXT(' ')} /* this last entry is required */ }; /* tables defined in fetchHTMLname.c */ extern AttributeMapping XhtmlAttributeMappingTable[]; /* Mapping table of HTML attribute values */ AttrValueMapping XhtmlAttrValueMappingTable[] = { {HTML_ATTR_dir, TEXT("ltr"), HTML_ATTR_dir_VAL_ltr}, {HTML_ATTR_dir, TEXT("rtl"), HTML_ATTR_dir_VAL_rtl}, {HTML_ATTR_TextAlign, TEXT("left"), HTML_ATTR_TextAlign_VAL_left_}, {HTML_ATTR_TextAlign, TEXT("center"), HTML_ATTR_TextAlign_VAL_center_}, {HTML_ATTR_TextAlign, TEXT("right"), HTML_ATTR_TextAlign_VAL_right_}, {HTML_ATTR_TextAlign, TEXT("justify"), HTML_ATTR_TextAlign_VAL_justify_}, {HTML_ATTR_Align, TEXT("left"), HTML_ATTR_Align_VAL_left_}, {HTML_ATTR_Align, TEXT("center"), HTML_ATTR_Align_VAL_center_}, {HTML_ATTR_Align, TEXT("right"), HTML_ATTR_Align_VAL_right_}, {HTML_ATTR_LAlign, TEXT("top"), HTML_ATTR_LAlign_VAL_Top_}, {HTML_ATTR_LAlign, TEXT("bottom"), HTML_ATTR_LAlign_VAL_Bottom_}, {HTML_ATTR_LAlign, TEXT("left"), HTML_ATTR_LAlign_VAL_Left_}, {HTML_ATTR_LAlign, TEXT("right"), HTML_ATTR_LAlign_VAL_Right_}, {HTML_ATTR_Clear, TEXT("left"), HTML_ATTR_Clear_VAL_Left_}, {HTML_ATTR_Clear, TEXT("right"), HTML_ATTR_Clear_VAL_Right_}, {HTML_ATTR_Clear, TEXT("all"), HTML_ATTR_Clear_VAL_All_}, {HTML_ATTR_Clear, TEXT("none"), HTML_ATTR_Clear_VAL_None_}, {HTML_ATTR_NumberStyle, TEXT("1"), HTML_ATTR_NumberStyle_VAL_Arabic_}, {HTML_ATTR_NumberStyle, TEXT("a"), HTML_ATTR_NumberStyle_VAL_LowerAlpha}, {HTML_ATTR_NumberStyle, TEXT("A"), HTML_ATTR_NumberStyle_VAL_UpperAlpha}, {HTML_ATTR_NumberStyle, TEXT("i"), HTML_ATTR_NumberStyle_VAL_LowerRoman}, {HTML_ATTR_NumberStyle, TEXT("I"), HTML_ATTR_NumberStyle_VAL_UpperRoman}, {HTML_ATTR_BulletStyle, TEXT("disc"), HTML_ATTR_BulletStyle_VAL_disc}, {HTML_ATTR_BulletStyle, TEXT("square"), HTML_ATTR_BulletStyle_VAL_square}, {HTML_ATTR_BulletStyle, TEXT("circle"), HTML_ATTR_BulletStyle_VAL_circle}, {HTML_ATTR_ItemStyle, TEXT("1"), HTML_ATTR_ItemStyle_VAL_Arabic_}, {HTML_ATTR_ItemStyle, TEXT("a"), HTML_ATTR_ItemStyle_VAL_LowerAlpha}, {HTML_ATTR_ItemStyle, TEXT("A"), HTML_ATTR_ItemStyle_VAL_UpperAlpha}, {HTML_ATTR_ItemStyle, TEXT("i"), HTML_ATTR_ItemStyle_VAL_LowerRoman}, {HTML_ATTR_ItemStyle, TEXT("I"), HTML_ATTR_ItemStyle_VAL_UpperRoman}, {HTML_ATTR_ItemStyle, TEXT("disc"), HTML_ATTR_ItemStyle_VAL_disc}, {HTML_ATTR_ItemStyle, TEXT("square"), HTML_ATTR_ItemStyle_VAL_square}, {HTML_ATTR_ItemStyle, TEXT("circle"), HTML_ATTR_ItemStyle_VAL_circle}, {HTML_ATTR_Button_type, TEXT("button"), HTML_ATTR_Button_type_VAL_button}, {HTML_ATTR_Button_type, TEXT("submit"), HTML_ATTR_Button_type_VAL_submit}, {HTML_ATTR_Button_type, TEXT("reset"), HTML_ATTR_Button_type_VAL_reset}, {HTML_ATTR_frame, TEXT("void"), HTML_ATTR_frame_VAL_void}, {HTML_ATTR_frame, TEXT("above"), HTML_ATTR_frame_VAL_above}, {HTML_ATTR_frame, TEXT("below"), HTML_ATTR_frame_VAL_below}, {HTML_ATTR_frame, TEXT("hsides"), HTML_ATTR_frame_VAL_hsides}, {HTML_ATTR_frame, TEXT("lhs"), HTML_ATTR_frame_VAL_lhs}, {HTML_ATTR_frame, TEXT("rhs"), HTML_ATTR_frame_VAL_rhs}, {HTML_ATTR_frame, TEXT("vsides"), HTML_ATTR_frame_VAL_vsides}, {HTML_ATTR_frame, TEXT("box"), HTML_ATTR_frame_VAL_box}, {HTML_ATTR_frame, TEXT("border"), HTML_ATTR_frame_VAL_border}, {HTML_ATTR_frameborder, TEXT("0"), HTML_ATTR_frameborder_VAL_Border0}, {HTML_ATTR_frameborder, TEXT("1"), HTML_ATTR_frameborder_VAL_Border1}, {HTML_ATTR_scrolling, TEXT("yes"), HTML_ATTR_scrolling_VAL_Yes_}, {HTML_ATTR_scrolling, TEXT("no"), HTML_ATTR_scrolling_VAL_No_}, {HTML_ATTR_scrolling, TEXT("auto"), HTML_ATTR_scrolling_VAL_auto_}, {HTML_ATTR_rules_, TEXT("none"), HTML_ATTR_rules__VAL_none_}, {HTML_ATTR_rules_, TEXT("groups"), HTML_ATTR_rules__VAL_groups}, {HTML_ATTR_rules_, TEXT("rows"), HTML_ATTR_rules__VAL_rows}, {HTML_ATTR_rules_, TEXT("cols"), HTML_ATTR_rules__VAL_cols}, {HTML_ATTR_rules_, TEXT("all"), HTML_ATTR_rules__VAL_all}, {HTML_ATTR_Cell_align, TEXT("left"), HTML_ATTR_Cell_align_VAL_Cell_left}, {HTML_ATTR_Cell_align, TEXT("center"), HTML_ATTR_Cell_align_VAL_Cell_center}, {HTML_ATTR_Cell_align, TEXT("right"), HTML_ATTR_Cell_align_VAL_Cell_right}, {HTML_ATTR_Cell_align, TEXT("justify"), HTML_ATTR_Cell_align_VAL_Cell_justify}, {HTML_ATTR_Cell_align, TEXT("char"), HTML_ATTR_Cell_align_VAL_Cell_char}, {HTML_ATTR_Alignment, TEXT("top"), HTML_ATTR_Alignment_VAL_Top_}, {HTML_ATTR_Alignment, TEXT("middle"), HTML_ATTR_Alignment_VAL_Middle_}, {HTML_ATTR_Alignment, TEXT("bottom"), HTML_ATTR_Alignment_VAL_Bottom_}, {HTML_ATTR_Alignment, TEXT("left"), HTML_ATTR_Alignment_VAL_Left_}, {HTML_ATTR_Alignment, TEXT("right"), HTML_ATTR_Alignment_VAL_Right_}, {HTML_ATTR_METHOD, TEXT("get"), HTML_ATTR_METHOD_VAL_Get_}, {HTML_ATTR_METHOD, TEXT("post"), HTML_ATTR_METHOD_VAL_Post_}, {HTML_ATTR_Position, TEXT("top"), HTML_ATTR_Position_VAL_Position_top}, {HTML_ATTR_Position, TEXT("bottom"), HTML_ATTR_Position_VAL_Position_bottom}, {HTML_ATTR_Position, TEXT("left"), HTML_ATTR_Position_VAL_Position_left}, {HTML_ATTR_Position, TEXT("right"), HTML_ATTR_Position_VAL_Position_right}, {HTML_ATTR_Row_valign, TEXT("top"), HTML_ATTR_Row_valign_VAL_Row_top}, {HTML_ATTR_Row_valign, TEXT("middle"), HTML_ATTR_Row_valign_VAL_Row_middle}, {HTML_ATTR_Row_valign, TEXT("bottom"), HTML_ATTR_Row_valign_VAL_Row_bottom}, {HTML_ATTR_Row_valign, TEXT("baseline"), HTML_ATTR_Row_valign_VAL_Row_baseline}, {HTML_ATTR_Cell_valign, TEXT("top"), HTML_ATTR_Cell_valign_VAL_Cell_top}, {HTML_ATTR_Cell_valign, TEXT("middle"), HTML_ATTR_Cell_valign_VAL_Cell_middle}, {HTML_ATTR_Cell_valign, TEXT("bottom"), HTML_ATTR_Cell_valign_VAL_Cell_bottom}, {HTML_ATTR_Cell_valign, TEXT("baseline"), HTML_ATTR_Cell_valign_VAL_Cell_baseline}, {HTML_ATTR_shape, TEXT("rect"), HTML_ATTR_shape_VAL_rectangle}, {HTML_ATTR_shape, TEXT("circle"), HTML_ATTR_shape_VAL_circle}, {HTML_ATTR_shape, TEXT("poly"), HTML_ATTR_shape_VAL_polygon}, {HTML_ATTR_valuetype, TEXT("data"), HTML_ATTR_valuetype_VAL_data_}, {HTML_ATTR_valuetype, TEXT("ref"), HTML_ATTR_valuetype_VAL_ref}, {HTML_ATTR_valuetype, TEXT("object"), HTML_ATTR_valuetype_VAL_object_}, /* HTML attribute TYPE generates a Thot element */ {DummyAttribute, TEXT("button"), HTML_EL_Button_Input}, {DummyAttribute, TEXT("checkbox"), HTML_EL_Checkbox_Input}, {DummyAttribute, TEXT("file"), HTML_EL_File_Input}, {DummyAttribute, TEXT("hidden"), HTML_EL_Hidden_Input}, {DummyAttribute, TEXT("image"), HTML_EL_PICTURE_UNIT}, {DummyAttribute, TEXT("password"), HTML_EL_Password_Input}, {DummyAttribute, TEXT("radio"), HTML_EL_Radio_Input}, {DummyAttribute, TEXT("reset"), HTML_EL_Reset_Input}, {DummyAttribute, TEXT("submit"), HTML_EL_Submit_Input}, {DummyAttribute, TEXT("text"), HTML_EL_Text_Input}, /* The following declarations allow the parser to accept boolean attributes */ /* written "checked=CHECKED"), for instance */ {HTML_ATTR_ISMAP, TEXT("ismap"), HTML_ATTR_ISMAP_VAL_Yes_}, {HTML_ATTR_nohref, TEXT("nohref"), HTML_ATTR_nohref_VAL_Yes_}, {HTML_ATTR_COMPACT, TEXT("compact"), HTML_ATTR_COMPACT_VAL_Yes_}, {HTML_ATTR_Multiple, TEXT("multiple"), HTML_ATTR_Multiple_VAL_Yes_}, {HTML_ATTR_Selected, TEXT("selected"), HTML_ATTR_Selected_VAL_Yes_}, {HTML_ATTR_Checked, TEXT("checked"), HTML_ATTR_Checked_VAL_Yes_}, {HTML_ATTR_No_wrap, TEXT("nowrap"), HTML_ATTR_No_wrap_VAL_no_wrap}, {HTML_ATTR_NoShade, TEXT("noshade"), HTML_ATTR_NoShade_VAL_NoShade_}, {HTML_ATTR_declare, TEXT("declare"), HTML_ATTR_declare_VAL_Yes_}, {HTML_ATTR_defer, TEXT("defer"), HTML_ATTR_defer_VAL_Yes_}, {HTML_ATTR_disabled, TEXT("disabled"), HTML_ATTR_disabled_VAL_Yes_}, {HTML_ATTR_readonly, TEXT("readonly"), HTML_ATTR_readonly_VAL_Yes_}, {HTML_ATTR_no_resize, TEXT("noresize"), HTML_ATTR_no_resize_VAL_Yes_}, /* XML attribute xml:space */ {HTML_ATTR_xml_space, TEXT("default"), HTML_ATTR_xml_space_VAL_xml_space_default}, {HTML_ATTR_xml_space, TEXT("preserve"), HTML_ATTR_xml_space_VAL_xml_space_preserve}, {0, TEXT(""), 0} /* Last entry. Mandatory */ }; /*---------------------------------------------------------------------- ParseCharset: Parses the element HTTP-EQUIV and looks for the charset value. ----------------------------------------------------------------------*/ #ifdef __STDC__ void ParseCharset (Element el, Document doc) #else /* !__STDC__ */ void ParseCharset (el, doc) Element el; Document doc; #endif /* !__STDC__ */ { AttributeType attrType; Attribute attr; SSchema docSSchema; CHARSET charset; CHAR_T *text, *text2, *ptrText, *str; CHAR_T charsetname[MAX_LENGTH]; int length; int pos, index = 0; charset = TtaGetDocumentCharset (doc); if (charset != UNDEFINED_CHARSET) /* the charset was already defined by the http header */ return; docSSchema = TtaGetDocumentSSchema (doc); attrType.AttrSSchema = docSSchema; attrType.AttrTypeNum = HTML_ATTR_http_equiv; attr = TtaGetAttribute (el, attrType); if (attr != NULL) { /* There is a HTTP-EQUIV attribute */ length = TtaGetTextAttributeLength (attr); if (length > 0) { text = TtaAllocString (length + 1); TtaGiveTextAttributeValue (attr, text, &length); if (!ustrcasecmp (text, TEXT("content-type"))) { attrType.AttrTypeNum = HTML_ATTR_meta_content; attr = TtaGetAttribute (el, attrType); if (attr != NULL) { length = TtaGetTextAttributeLength (attr); if (length > 0) { text2 = TtaAllocString (length + 1); TtaGiveTextAttributeValue (attr, text2, &length); ptrText = text2; while (*ptrText) { *ptrText = utolower (*ptrText); ptrText++; } str = ustrstr (text2, TEXT("charset=")); if (str) { pos = str - text2 + 8; while (text2[pos] != WC_SPACE && text2[pos] != WC_TAB && text2[pos] != WC_EOS) charsetname[index++] = text2[pos++]; charsetname[index] = WC_EOS; charset = TtaGetCharset (charsetname); if (charset != UNDEFINED_CHARSET) TtaSetDocumentCharset (doc, charset); } TtaFreeMemory (text2); } } } TtaFreeMemory (text); } } } /*--------------------------------------------------------------------------- XhtmlMapEntity Search that entity in the entity table and return the corresponding value. ---------------------------------------------------------------------------*/ #ifdef __STDC__ void XhtmlMapEntity (STRING entityName, int *entityValue, STRING alphabet) #else void XhtmlMapEntity (entityName, entityValue, alphabet) STRING entityName; int *entityValue; STRING alphabet; #endif { int i; ThotBool found; found = FALSE; for (i = 0; XhtmlEntityTable[i].charCode >= 0 && ! found; i++) found = !ustrcmp (XhtmlEntityTable[i].charName, entityName); if (found) { /* entity found */ i--; *entityValue = XhtmlEntityTable[i].charCode; *alphabet = XhtmlEntityTable[i].charAlphabet; } else *alphabet = WC_EOS; } /*---------------------------------------------------------------------- PutNonISOlatin1Char Put a Unicode character in the input buffer. ----------------------------------------------------------------------*/ #ifdef __STDC__ static void PutNonISOlatin1Char (int code, STRING prefix, STRING entityName, ParserData *context) #else static void PutNonISOlatin1Char (code, prefix, entityName, context) int code; STRING prefix; STRING entityName; ParserData *context; #endif { Language lang, l; ElementType elType; Element elText; AttributeType attrType; Attribute attr; CHAR_T buffer[MaxEntityLength+10]; /* create a new text leaf */ elType.ElSSchema = TtaGetDocumentSSchema (context->doc); elType.ElTypeNum = HTML_EL_TEXT_UNIT; elText = TtaNewElement (context->doc, elType); XmlSetElemLineNumber (elText); XhtmlInsertElement (&elText); context->lastElement = elText; context->lastElementClosed = TRUE; /* try to find a fallback character */ l = context->language; GetFallbackCharacter (code, buffer, &lang); /* put that fallback character in the new text leaf */ TtaSetTextContent (elText, buffer, lang, context->doc); context->language = l; /* make that text leaf read-only */ TtaSetAccessRight (elText, ReadOnly, context->doc); /* associate an attribute EntityName with the new text leaf */ attrType.AttrSSchema = TtaGetDocumentSSchema (context->doc); attrType.AttrTypeNum = HTML_ATTR_EntityName; attr = TtaNewAttribute (attrType); TtaAttachAttribute (elText, attr, context->doc); buffer[0] = '&'; ustrcpy (&buffer[1], prefix); ustrcat (buffer, entityName); ustrcat (buffer, TEXT(";")); TtaSetAttributeText (attr, buffer, elText, context->doc); context->mergeText = FALSE; } /*---------------------------------------------------------------------- XhtmlEntityCreated A XTHML entity has been created by the XML parser. ----------------------------------------------------------------------*/ #ifdef __STDC__ void XhtmlEntityCreated (int entityValue, Language lang, STRING entityName, ParserData *context) #else void XhtmlEntityCreated (entityValue, lang, entityName, context) int entityValue; Language lang; STRING entityName; ParserData *context; #endif { CHAR_T buffer[2]; if (lang < 0) PutInXmlElement (entityName); else { if (entityValue < 255) { buffer[0] = ((UCHAR_T) entityValue); buffer[1] = WC_EOS; PutInXmlElement (buffer); } else PutNonISOlatin1Char (entityValue, TEXT(""), entityName, context); } } /*---------------------------------------------------------------------- XhtmlElementComplete Complete Xhtml elements. Check its attributes and its contents. ----------------------------------------------------------------------*/ #ifdef __STDC__ void XhtmlElementComplete (Element el, Document doc, int *error) #else void XhtmlElementComplete (el, doc, error) Element el; Document doc; int *error; #endif { ElementType elType, newElType, childType; Element constElem, child, desc, leaf, prev, next, last, elFrames, lastFrame, lastChild; Attribute attr; AttributeType attrType; Language lang; STRING text; CHAR_T lastChar[2]; STRING name1; int length; SSchema docSSchema; *error = 0; docSSchema = TtaGetDocumentSSchema (doc); elType = TtaGetElementType (el); /* is this a block-level element in a character-level element? */ if (!IsXMLElementInline (el) && elType.ElTypeNum != HTML_EL_Comment_ && elType.ElTypeNum != HTML_EL_XMLPI) BlockInCharLevelElem (el); newElType.ElSSchema = elType.ElSSchema; switch (elType.ElTypeNum) { case HTML_EL_Object: /* it's an object */ /* create Object_Content */ child = TtaGetFirstChild (el); if (child != NULL) elType = TtaGetElementType (child); /* is it the PICTURE element ? */ if (child == NULL || elType.ElTypeNum != HTML_EL_PICTURE_UNIT) { desc = child; /* create the PICTURE element */ elType.ElTypeNum = HTML_EL_PICTURE_UNIT; child = TtaNewTree (doc, elType, ""); if (desc == NULL) TtaInsertFirstChild (&child, el, doc); else TtaInsertSibling (child, desc, TRUE, doc); } /* copy attribute data into SRC attribute of Object_Image */ attrType.AttrSSchema = docSSchema; attrType.AttrTypeNum = HTML_ATTR_data; attr = TtaGetAttribute (el, attrType); if (attr != NULL) { length = TtaGetTextAttributeLength (attr); if (length > 0) { name1 = TtaAllocString (length + 1); TtaGiveTextAttributeValue (attr, name1, &length); attrType.AttrTypeNum = HTML_ATTR_SRC; attr = TtaGetAttribute (child, attrType); if (attr == NULL) { attr = TtaNewAttribute (attrType); TtaAttachAttribute (child, attr, doc); } TtaSetAttributeText (attr, name1, child, doc); TtaFreeMemory (name1); } } /* is the Object_Content element already created ? */ desc = child; TtaNextSibling(&desc); if (desc != NULL) elType = TtaGetElementType (desc); /* is it the Object_Content element ? */ if (desc == NULL || elType.ElTypeNum != HTML_EL_Object_Content) { /* create Object_Content */ elType.ElTypeNum = HTML_EL_Object_Content; desc = TtaNewTree (doc, elType, ""); TtaInsertSibling (desc, child, FALSE, doc); /* move previous existing children into Object_Content */ child = TtaGetLastChild(el); while (child != desc) { TtaRemoveTree (child, doc); TtaInsertFirstChild (&child, desc, doc); child = TtaGetLastChild(el); } } break; case HTML_EL_Unnumbered_List: case HTML_EL_Numbered_List: case HTML_EL_Menu: case HTML_EL_Directory: /* It's a List element. It should only have List_Item children. If it has List element chidren, move these List elements within their previous List_Item sibling. This is to fix a bug in document generated by Mozilla. */ prev = NULL; next = NULL; child = TtaGetFirstChild (el); while (child != NULL) { next = child; TtaNextSibling (&next); elType = TtaGetElementType (child); if (elType.ElTypeNum == HTML_EL_Unnumbered_List || elType.ElTypeNum == HTML_EL_Numbered_List || elType.ElTypeNum == HTML_EL_Menu || elType.ElTypeNum == HTML_EL_Directory) /* this list element is a child of another list element */ if (prev) { elType = TtaGetElementType (prev); if (elType.ElTypeNum == HTML_EL_List_Item) { /* get the last child of the previous List_Item */ desc = TtaGetFirstChild (prev); last = NULL; while (desc) { last = desc; TtaNextSibling (&desc); } /* move the list element after the last child of the previous List_Item */ TtaRemoveTree (child, doc); if (last) TtaInsertSibling (child, last, FALSE, doc); else TtaInsertFirstChild (&child, prev, doc); child = prev; } } prev = child; child = next; } break; case HTML_EL_FRAMESET: /* The FRAMESET element is now complete. Gather all its FRAMESET and FRAME children and wrap them up in a Frames element */ elFrames = NULL; lastFrame = NULL; lastChild = NULL; child = TtaGetFirstChild (el); while (child != NULL) { next = child; TtaNextSibling (&next); elType = TtaGetElementType (child); if (elType.ElTypeNum == HTML_EL_FRAMESET || elType.ElTypeNum == HTML_EL_FRAME || elType.ElTypeNum == HTML_EL_Comment_) { /* create the Frames element if it does not exist */ if (elFrames == NULL) { newElType.ElSSchema = docSSchema; newElType.ElTypeNum = HTML_EL_Frames; elFrames = TtaNewElement (doc, newElType); XmlSetElemLineNumber (elFrames); TtaInsertSibling (elFrames, child, TRUE, doc); } /* move the element as the last child of the Frames element */ TtaRemoveTree (child, doc); if (lastFrame == NULL) TtaInsertFirstChild (&child, elFrames, doc); else TtaInsertSibling (child, lastFrame, FALSE, doc); lastFrame = child; } child = next; } break; case HTML_EL_Input: /* it's an INPUT without any TYPE attribute */ /* Create a child of type Text_Input */ elType.ElTypeNum = HTML_EL_Text_Input; child = TtaNewTree (doc, elType, ""); XmlSetElemLineNumber (child); TtaInsertFirstChild (&child, el, doc); /* now, process it like a Text_Input element */ case HTML_EL_Text_Input: case HTML_EL_Password_Input: case HTML_EL_File_Input: /* get element Inserted_Text */ child = TtaGetFirstChild (el); if (child != NULL) { attrType.AttrSSchema = docSSchema; attrType.AttrTypeNum = HTML_ATTR_Value_; attr = TtaGetAttribute (el, attrType); if (attr != NULL) { /* copy the value of attribute "value" into the first text leaf of element */ length = TtaGetTextAttributeLength (attr); if (length > 0) { /* get the text leaf */ leaf = TtaGetFirstChild (child); if (leaf != NULL) { childType = TtaGetElementType (leaf); if (childType.ElTypeNum == HTML_EL_TEXT_UNIT) { /* copy attribute value into the text leaf */ text = TtaAllocString (length + 1); TtaGiveTextAttributeValue (attr, text, &length); TtaSetTextContent (leaf, text, TtaGetDefaultLanguage (), doc); TtaFreeMemory (text); } } } } } break; case HTML_EL_META: ParseCharset (el, doc); break; case HTML_EL_STYLE_: /* it's a STYLE element */ case HTML_EL_SCRIPT: /* it's a SCRIPT element */ case HTML_EL_Preformatted: /* it's a PRE */ /* if the last line of the Preformatted is empty, remove it */ leaf = XmlLastLeafInElement (el); if (leaf != NULL) { elType = TtaGetElementType (leaf); if (elType.ElTypeNum == HTML_EL_TEXT_UNIT) /* the last leaf is a TEXT element */ { length = TtaGetTextLength (leaf); if (length > 0) { TtaGiveSubString (leaf, lastChar, length, 1); if (lastChar[0] == EOL) /* last character is new line, delete it */ { if (length == 1) /* empty TEXT element */ TtaDeleteTree (leaf, doc); else /* remove the last character */ TtaDeleteTextContent (leaf, length, 1, doc); } } } } if (IsParsingCSS ()) { text = GetStyleContents (el); if (text) { ReadCSSRules (doc, NULL, text, FALSE); TtaFreeMemory (text); } SetParsingCSS (FALSE); } /* and continue as if it were a Preformatted or a Script */ break; case HTML_EL_Text_Area: /* it's a Text_Area */ SetParsingTextArea (FALSE); child = TtaGetFirstChild (el); if (child == NULL) /* it's an empty Text_Area */ /* insert a Inserted_Text element in the element */ { newElType.ElTypeNum = HTML_EL_Inserted_Text; child = TtaNewTree (doc, newElType, ""); TtaInsertFirstChild (&child, el, doc); } else { /* save the text into Default_Value attribute */ attrType.AttrSSchema = docSSchema; attrType.AttrTypeNum = HTML_ATTR_Default_Value; if (TtaGetAttribute (el, attrType) == NULL) /* attribute Default_Value is missing */ { attr = TtaNewAttribute (attrType); TtaAttachAttribute (el, attr, doc); desc = TtaGetFirstChild (child); length = TtaGetTextLength (desc) + 1; text = TtaAllocString (length); TtaGiveTextContent (desc, text, &length, &lang); TtaSetAttributeText (attr, text, el, doc); TtaFreeMemory (text); } } /* insert a Frame element */ newElType.ElTypeNum = HTML_EL_Frame; constElem = TtaNewTree (doc, newElType, ""); TtaInsertSibling (constElem, child, FALSE, doc); break; case HTML_EL_Radio_Input: case HTML_EL_Checkbox_Input: /* put an attribute Checked if it is missing */ attrType.AttrSSchema = docSSchema; attrType.AttrTypeNum = HTML_ATTR_Checked; if (TtaGetAttribute (el, attrType) == NULL) /* attribute Checked is missing */ { attr = TtaNewAttribute (attrType); TtaAttachAttribute (el, attr, doc); TtaSetAttributeValue (attr, HTML_ATTR_Checked_VAL_No_, el, doc); } break; case HTML_EL_Option_Menu: /* Check that at least one option has a SELECTED attribute */ OnlyOneOptionSelected (el, doc, TRUE); break; case HTML_EL_PICTURE_UNIT: break; case HTML_EL_LINK: CheckCSSLink (el, doc, docSSchema); break; case HTML_EL_Data_cell: case HTML_EL_Heading_cell: /* insert a pseudo paragraph into empty cells */ child = TtaGetFirstChild (el); if (child == NULL) { elType.ElTypeNum = HTML_EL_Pseudo_paragraph; child = TtaNewTree (doc, elType, ""); if (child != NULL) TtaInsertFirstChild (&child, el, doc); } /* detect whether we're parsing a whole table or just a cell */ if (IsWithinTable ()) NewCell (el, doc, FALSE); break; case HTML_EL_Table: CheckTable (el, doc); SubWithinTable (); break; case HTML_EL_TITLE: /* show the TITLE in the main window */ UpdateTitle (el, doc); break; default: break; } } /*---------------------------------------------------------------------- MapHTMLAttributeValue Search in the Attribute Value Mapping Table the entry for the attribute ThotAtt and its value AttrVal. Returns the corresponding Thot value. ----------------------------------------------------------------------*/ #ifdef __STDC__ void MapHTMLAttributeValue (CHAR_T* AttrVal, AttributeType attrType, int* value) #else void MapHTMLAttributeValue (AttrVal, attrType, value) CHAR_T* AttrVal; AttributeType attrType; int* value; #endif { int i; *value = 0; i = 0; while (XhtmlAttrValueMappingTable[i].ThotAttr != attrType.AttrTypeNum && XhtmlAttrValueMappingTable[i].ThotAttr != 0) i++; if (XhtmlAttrValueMappingTable[i].ThotAttr == attrType.AttrTypeNum) { do if (!ustrcmp (XhtmlAttrValueMappingTable[i].XMLattrValue, AttrVal)) *value = XhtmlAttrValueMappingTable[i].ThotAttrValue; else i++; while (*value <= 0 && XhtmlAttrValueMappingTable[i].ThotAttr != 0); } }