Return to XHTMLbuilder.c CVS log | Up to [Public] / Amaya / amaya |
1.1 cvs 1: /* 2: * 3: * (c) COPYRIGHT MIT and INRIA, 1996. 4: * Please first read the full copyright statement in file COPYRIGHT. 5: * 6: */ 7: 8: /* 9: * 10: * html2thot parses a HTML file and builds the corresponding abstract tree 11: * for a Thot document of type HTML. 12: * 13: * Author: V. Quint 14: * L. Carcone 15: */ 16: 17: #define THOT_EXPORT extern 18: #include "amaya.h" 19: #include "css.h" 1.2 cvs 20: #include "parser.h" 21: #include "HTML.h" 22: 1.13 cvs 23: #include "css_f.h" 24: #include "fetchXMLname_f.h" 1.1 cvs 25: #include "HTMLactions_f.h" 26: #include "HTMLedit_f.h" 27: #include "HTMLimage_f.h" 28: #include "HTMLtable_f.h" 29: #include "HTMLimage_f.h" 30: #include "UIcss_f.h" 1.13 cvs 31: #include "styleparser_f.h" 1.2 cvs 32: #include "XHTMLbuilder_f.h" 1.13 cvs 33: #include "Xml2thot_f.h" 1.1 cvs 34: 35: /* maximum length of a Thot structure schema name */ 36: #define MAX_SS_NAME_LENGTH 32 37: 1.2 cvs 38: XhtmlEntity XhtmlEntityTable[] = 1.1 cvs 39: { 40: /* This table MUST be in alphabetical order */ 41: {TEXT("AElig"), 198}, /* latin capital letter AE = */ 42: /* latin capital ligature AE, U+00C6 ISOlat1 */ 43: {TEXT("Aacute"), 193}, /* latin capital letter A with acute, U+00C1 ISOlat1 */ 44: {TEXT("Acirc"), 194}, /* latin capital letter A with circumflex, U+00C2 ISOlat1 */ 45: {TEXT("Agrave"), 192}, /* latin capital letter A with grave = */ 46: /* latin capital letter A grave, U+00C0 ISOlat1 */ 47: {TEXT("Alpha"), 913}, /* greek capital letter alpha, U+0391 */ 48: {TEXT("Aring"), 197}, /* latin capital letter A with ring above = */ 49: /* latin capital letter A ring, U+00C5 ISOlat1 */ 50: {TEXT("Atilde"), 195}, /* latin capital letter A with tilde, U+00C3 ISOlat1 */ 51: {TEXT("Auml"), 196}, /* latin capital letter A with diaeresis, U+00C4 ISOlat1 */ 52: {TEXT("Beta"), 914}, /* greek capital letter beta, U+0392 */ 53: {TEXT("Ccedil"), 199}, /* latin capital letter C with cedilla, U+00C7 ISOlat1 */ 54: {TEXT("Chi"), 935}, /* greek capital letter chi, U+03A7 */ 55: {TEXT("Dagger"), 8225}, /* double dagger, U+2021 ISOpub */ 56: {TEXT("Delta"), 916}, /* greek capital letter delta, U+0394 ISOgrk3 */ 57: {TEXT("ETH"), 208}, /* latin capital letter ETH, U+00D0 ISOlat1 */ 58: {TEXT("Eacute"), 201}, /* latin capital letter E with acute, U+00C9 ISOlat1 */ 59: {TEXT("Ecirc"), 202}, /* latin capital letter E with circumflex, U+00CA ISOlat1 */ 60: {TEXT("Egrave"), 200}, /* latin capital letter E with grave, U+00C8 ISOlat1 */ 61: {TEXT("Epsilon"), 917}, /* greek capital letter epsilon, U+0395 */ 62: {TEXT("Eta"), 919}, /* greek capital letter eta, U+0397 */ 63: {TEXT("Euml"), 203}, /* latin capital letter E with diaeresis, U+00CB ISOlat1 */ 64: {TEXT("Gamma"), 915}, /* greek capital letter gamma, U+0393 ISOgrk3 */ 65: {TEXT("Iacute"), 205}, /* latin capital letter I with acute, U+00CD ISOlat1 */ 66: {TEXT("Icirc"), 206}, /* latin capital letter I with circumflex, U+00CE ISOlat1 */ 67: {TEXT("Igrave"), 204}, /* latin capital letter I with grave, U+00CC ISOlat1 */ 68: {TEXT("Iota"), 921}, /* greek capital letter iota, U+0399 */ 69: {TEXT("Iuml"), 207}, /* latin capital letter I with diaeresis, U+00CF ISOlat1 */ 70: {TEXT("Kappa"), 922}, /* greek capital letter kappa, U+039A */ 71: {TEXT("Lambda"), 923}, /* greek capital letter lambda, U+039B ISOgrk3 */ 72: {TEXT("Mu"), 924}, /* greek capital letter mu, U+039C */ 73: {TEXT("Ntilde"), 209}, /* latin capital letter N with tilde, U+00D1 ISOlat1 */ 74: {TEXT("Nu"), 925}, /* greek capital letter nu, U+039D */ 75: {TEXT("OElig"), 338}, /* latin capital ligature OE, U+0152 ISOlat2 */ 76: {TEXT("Oacute"), 211}, /* latin capital letter O with acute, U+00D3 ISOlat1 */ 77: {TEXT("Ocirc"), 212}, /* latin capital letter O with circumflex, U+00D4 ISOlat1 */ 78: {TEXT("Ograve"), 210}, /* latin capital letter O with grave, U+00D2 ISOlat1 */ 79: {TEXT("Omega"), 937}, /* greek capital letter omega, U+03A9 ISOgrk3 */ 80: {TEXT("Omicron"), 927}, /* greek capital letter omicron, U+039F */ 81: {TEXT("Oslash"), 216}, /* latin capital letter O with stroke = */ 82: /* latin capital letter O slash, U+00D8 ISOlat1 */ 83: {TEXT("Otilde"), 213}, /* latin capital letter O with tilde, U+00D5 ISOlat1 */ 84: {TEXT("Ouml"), 214}, /* latin capital letter O with diaeresis, U+00D6 ISOlat1 */ 85: {TEXT("Phi"), 934}, /* greek capital letter phi, U+03A6 ISOgrk3 */ 86: {TEXT("Pi"), 928}, /* greek capital letter pi, U+03A0 ISOgrk3 */ 87: {TEXT("Prime"), 8243}, /* double prime = seconds = inches, U+2033 ISOtech */ 88: {TEXT("Psi"), 936}, /* greek capital letter psi, U+03A8 ISOgrk3 */ 89: {TEXT("Rho"), 929}, /* greek capital letter rho, U+03A1 */ 90: {TEXT("Scaron"), 352}, /* latin capital letter S with caron, U+0160 ISOlat2 */ 91: {TEXT("Sigma"), 931}, /* greek capital letter sigma, U+03A3 ISOgrk3 */ 92: {TEXT("THORN"), 222}, /* latin capital letter THORN, U+00DE ISOlat1 */ 93: {TEXT("Tau"), 932}, /* greek capital letter tau, U+03A4 */ 94: {TEXT("Theta"), 920}, /* greek capital letter theta, U+0398 ISOgrk3 */ 95: {TEXT("Uacute"), 218}, /* latin capital letter U with acute, U+00DA ISOlat1 */ 96: {TEXT("Ucirc"), 219}, /* latin capital letter U with circumflex, U+00DB ISOlat1 */ 97: {TEXT("Ugrave"), 217}, /* latin capital letter U with grave, U+00D9 ISOlat1 */ 98: {TEXT("Upsilon"), 933}, /* greek capital letter upsilon, U+03A5 ISOgrk3 */ 99: {TEXT("Uuml"), 220}, /* latin capital letter U with diaeresis, U+00DC ISOlat1 */ 100: {TEXT("Xi"), 926}, /* greek capital letter xi, U+039E ISOgrk3 */ 101: {TEXT("Yacute"), 221}, /* latin capital letter Y with acute, U+00DD ISOlat1 */ 102: {TEXT("Yuml"), 376}, /* latin capital letter Y with diaeresis, U+0178 ISOlat2 */ 103: {TEXT("Zeta"), 918}, /* greek capital letter zeta, U+0396 */ 104: {TEXT("aacute"), 225}, /* latin small letter a with acute, U+00E1 ISOlat1 */ 105: {TEXT("acirc"), 226}, /* latin small letter a with circumflex, U+00E2 ISOlat1 */ 106: {TEXT("acute"), 180}, /* acute accent = spacing acute, U+00B4 ISOdia */ 107: {TEXT("aelig"), 230}, /* latin small letter ae = */ 108: /* latin small ligature ae, U+00E6 ISOlat1 */ 109: {TEXT("agrave"), 224}, /* latin small letter a with grave = */ 110: /* latin small letter a grave, U+00E0 ISOlat1 */ 111: {TEXT("alefsym"), 8501},/* alef symbol = first transfinite cardinal, U+2135 NEW */ 112: {TEXT("alpha"), 945}, /* greek small letter alpha, U+03B1 ISOgrk3 */ 113: {TEXT("amp"), 38}, /* ampersand, U+0026 ISOnum */ 114: {TEXT("and"), 8743}, /* logical and = wedge, U+2227 ISOtech */ 115: {TEXT("ang"), 8736}, /* angle, U+2220 ISOamso */ 116: {TEXT("aring"), 229}, /* latin small letter a with ring above = */ 117: /* latin small letter a ring, U+00E5 ISOlat1 */ 118: {TEXT("asymp"), 8776}, /* almost equal to = asymptotic to, U+2248 ISOamsr */ 119: {TEXT("atilde"), 227}, /* latin small letter a with tilde, U+00E3 ISOlat1 */ 120: {TEXT("auml"), 228}, /* latin small letter a with diaeresis, U+00E4 ISOlat1 */ 121: {TEXT("bdquo"), 8222}, /* double low-9 quotation mark, U+201E NEW */ 122: {TEXT("beta"), 946}, /* greek small letter beta, U+03B2 ISOgrk3 */ 123: {TEXT("brvbar"), 166}, /* broken bar = broken vertical bar, U+00A6 ISOnum */ 124: {TEXT("bull"), 8226}, /* bullet = black small circle, U+2022 ISOpub */ 125: {TEXT("cap"), 8745}, /* intersection = cap, U+2229 ISOtech */ 126: {TEXT("ccedil"), 231}, /* latin small letter c with cedilla, U+00E7 ISOlat1 */ 127: {TEXT("cedil"), 184}, /* cedilla = spacing cedilla, U+00B8 ISOdia */ 128: {TEXT("cent"), 162}, /* cent sign, U+00A2 ISOnum */ 129: {TEXT("chi"), 967}, /* greek small letter chi, U+03C7 ISOgrk3 */ 130: {TEXT("circ"), 710}, /* modifier letter circumflex accent, U+02C6 ISOpub */ 131: {TEXT("clubs"), 9827}, /* black club suit = shamrock, U+2663 ISOpub */ 132: {TEXT("cong"), 8773}, /* approximately equal to, U+2245 ISOtech */ 133: {TEXT("copy"), 169}, /* copyright sign, U+00A9 ISOnum */ 134: {TEXT("crarr"), 8629}, /* downwards arrow with corner leftwards = */ 135: /* carriage return, U+21B5 NEW */ 136: {TEXT("cup"), 8746}, /* union = cup, U+222A ISOtech */ 137: {TEXT("curren"), 164}, /* currency sign, U+00A4 ISOnum */ 138: {TEXT("dArr"), 8659}, /* downwards double arrow, U+21D3 ISOamsa */ 139: {TEXT("dagger"), 8224}, /* dagger, U+2020 ISOpub */ 140: {TEXT("darr"), 8595}, /* downwards arrow, U+2193 ISOnum */ 141: {TEXT("deg"), 176}, /* degree sign, U+00B0 ISOnum */ 142: {TEXT("delta"), 948}, /* greek small letter delta, U+03B4 ISOgrk3 */ 143: {TEXT("diams"), 9830}, /* black diamond suit, U+2666 ISOpub */ 144: {TEXT("divide"), 247}, /* division sign, U+00F7 ISOnum */ 145: {TEXT("eacute"), 233}, /* latin small letter e with acute, U+00E9 ISOlat1 */ 146: {TEXT("ecirc"), 234}, /* latin small letter e with circumflex, U+00EA ISOlat1 */ 147: {TEXT("egrave"), 232}, /* latin small letter e with grave, U+00E8 ISOlat1 */ 148: {TEXT("empty"), 8709}, /* empty set = null set = diameter, U+2205 ISOamso */ 149: {TEXT("emsp"), 8195}, /* em space, U+2003 ISOpub */ 150: {TEXT("ensp"), 8194}, /* en space, U+2002 ISOpub */ 151: {TEXT("epsilon"), 949}, /* greek small letter epsilon, U+03B5 ISOgrk3 */ 152: {TEXT("equiv"), 8801}, /* identical to, U+2261 ISOtech */ 153: {TEXT("eta"), 951}, /* greek small letter eta, U+03B7 ISOgrk3 */ 154: {TEXT("eth"), 240}, /* latin small letter eth, U+00F0 ISOlat1 */ 155: {TEXT("euml"), 235}, /* latin small letter e with diaeresis, U+00EB ISOlat1 */ 156: {TEXT("euro"), 8364}, /* euro sign, U+20AC NEW */ 157: {TEXT("exist"), 8707}, /* there exists, U+2203 ISOtech */ 158: {TEXT("fnof"), 402}, /* latin small f with hook = function = */ 159: /* florin, U+0192 ISOtech */ 160: {TEXT("forall"), 8704}, /* for all, U+2200 ISOtech */ 161: {TEXT("frac12"), 189}, /* vulgar fraction one half = */ 162: /*fraction one half, U+00BD ISOnum */ 163: {TEXT("frac14"), 188}, /* vulgar fraction one quarter = */ 164: /* fraction one quarter, U+00BC ISOnum */ 165: {TEXT("frac34"), 190}, /* vulgar fraction three quarters = */ 166: /* fraction three quarters, U+00BE ISOnum */ 167: {TEXT("frasl"), 8260}, /* fraction slash, U+2044 NEW */ 168: {TEXT("gamma"), 947}, /* greek small letter gamma, U+03B3 ISOgrk3 */ 169: {TEXT("ge"), 8805}, /* greater-than or equal to, U+2265 ISOtech */ 170: {TEXT("gt"), 62}, /* greater-than sign, U+003E ISOnum */ 171: {TEXT("hArr"), 8660}, /* left right double arrow, U+21D4 ISOamsa */ 172: {TEXT("harr"), 8596}, /* left right arrow, U+2194 ISOamsa */ 173: {TEXT("hearts"), 9829}, /* black heart suit = valentine, U+2665 ISOpub */ 174: {TEXT("hellip"), 8230}, /* horizontal ellipsis = three dot leader, U+2026 ISOpub */ 175: {TEXT("hyphen"), 173}, /* hyphen = discretionary hyphen, U+00AD ISOnum */ 176: {TEXT("iacute"), 237}, /* latin small letter i with acute, U+00ED ISOlat1 */ 177: {TEXT("icirc"), 238}, /* latin small letter i with circumflex, U+00EE ISOlat1 */ 178: {TEXT("iexcl"), 161}, /* inverted exclamation mark, U+00A1 ISOnum */ 179: {TEXT("igrave"), 236}, /* latin small letter i with grave, U+00EC ISOlat1 */ 180: {TEXT("image"), 8465}, /* blackletter capital I = imaginary part, U+2111 ISOamso */ 181: {TEXT("infin"), 8734}, /* infinity, U+221E ISOtech */ 182: {TEXT("int"), 8747}, /* integral, U+222B ISOtech */ 183: {TEXT("iota"), 953}, /* greek small letter iota, U+03B9 ISOgrk3 */ 184: {TEXT("iquest"), 191}, /* inverted question mark = */ 185: /* turned question mark, U+00BF ISOnum */ 186: {TEXT("isin"), 8712}, /* element of, U+2208 ISOtech */ 187: {TEXT("iuml"), 239}, /* latin small letter i with diaeresis, U+00EF ISOlat1 */ 188: {TEXT("kappa"), 954}, /* greek small letter kappa, U+03BA ISOgrk3 */ 189: {TEXT("lArr"), 8656}, /* leftwards double arrow, U+21D0 ISOtech */ 190: {TEXT("lambda"), 955}, /* greek small letter lambda, U+03BB ISOgrk3 */ 191: {TEXT("lang"), 9001}, /* left-pointing angle bracket = bra, U+2329 ISOtech */ 192: {TEXT("laquo"), 171}, /* left-pointing double angle quotation mark = */ 193: /* left pointing guillemet, U+00AB ISOnum */ 194: {TEXT("larr"), 8592}, /* leftwards arrow, U+2190 ISOnum */ 195: {TEXT("lceil"), 8968}, /* left ceiling = apl upstile, U+2308 ISOamsc */ 196: {TEXT("ldquo"), 8220}, /* left double quotation mark, U+201C ISOnum */ 197: {TEXT("le"), 8804}, /* less-than or equal to, U+2264 ISOtech */ 198: {TEXT("lfloor"), 8970}, /* left floor = apl downstile, U+230A ISOamsc */ 199: {TEXT("lowast"), 8727}, /* asterisk operator, U+2217 ISOtech */ 200: {TEXT("loz"), 9674}, /* lozenge, U+25CA ISOpub */ 201: {TEXT("lrm"), 8206}, /* left-to-right mark, U+200E NEW RFC 2070 */ 202: {TEXT("lsaquo"), 8249}, /* single left-pointing angle quotation mark, */ 203: /* U+2039 ISO proposed */ 204: {TEXT("lsquo"), 8216}, /* left single quotation mark, U+2018 ISOnum */ 205: {TEXT("lt"), 60}, /* less-than sign, U+003C ISOnum */ 206: {TEXT("macr"), 175}, /* macron = spacing macron = overline = APL overbar, */ 207: /* U+00AF ISOdia */ 208: {TEXT("mdash"), 8212}, /* em dash, U+2014 ISOpub */ 209: {TEXT("micro"), 181}, /* micro sign, U+00B5 ISOnum */ 210: {TEXT("middot"), 183}, /* middle dot = Georgian comma = */ 211: /* Greek middle dot, U+00B7 ISOnum */ 212: {TEXT("minus"), 8722}, /* minus sign, U+2212 ISOtech */ 213: {TEXT("mu"), 956}, /* greek small letter mu, U+03BC ISOgrk3 */ 214: {TEXT("nabla"), 8711}, /* nabla = backward difference, U+2207 ISOtech */ 215: {TEXT("nbsp"), 160}, /* no-break space = non-breaking space, U+00A0 ISOnum */ 216: {TEXT("ndash"), 8211}, /* en dash, U+2013 ISOpub */ 217: {TEXT("ne"), 8800}, /* not equal to, U+2260 ISOtech */ 218: {TEXT("ni"), 8715}, /* contains as member, U+220B ISOtech */ 219: {TEXT("not"), 172}, /* not sign, U+00AC ISOnum */ 220: {TEXT("notin"), 8713}, /* not an element of, U+2209 ISOtech */ 221: {TEXT("nsub"), 8836}, /* not a subset of, U+2284 ISOamsn */ 222: {TEXT("ntilde"), 241}, /* latin small letter n with tilde, U+00F1 ISOlat1 */ 223: {TEXT("nu"), 957}, /* greek small letter nu, U+03BD ISOgrk3 */ 224: {TEXT("oacute"), 243}, /* latin small letter o with acute, U+00F3 ISOlat1 */ 225: {TEXT("ocirc"), 244}, /* latin small letter o with circumflex, U+00F4 ISOlat1 */ 226: {TEXT("oelig"), 339}, /* latin small ligature oe, U+0153 ISOlat2 */ 227: {TEXT("ograve"), 242}, /* latin small letter o with grave, U+00F2 ISOlat1 */ 228: {TEXT("oline"), 8254}, /* overline = spacing overscore, U+203E NEW */ 229: {TEXT("omega"), 969}, /* greek small letter omega, U+03C9 ISOgrk3 */ 230: {TEXT("omicron"), 959}, /* greek small letter omicron, U+03BF NEW */ 231: {TEXT("oplus"), 8853}, /* circled plus = direct sum, U+2295 ISOamsb */ 232: {TEXT("or"), 8744}, /* logical or = vee, U+2228 ISOtech */ 233: {TEXT("ordf"), 170}, /* feminine ordinal indicator, U+00AA ISOnum */ 234: {TEXT("ordm"), 186}, /* masculine ordinal indicator, U+00BA ISOnum */ 235: {TEXT("oslash"), 248}, /* latin small letter o with stroke, = */ 236: /* latin small letter o slash, U+00F8 ISOlat1 */ 237: {TEXT("otilde"), 245}, /* latin small letter o with tilde, U+00F5 ISOlat1 */ 238: {TEXT("otimes"), 8855}, /* circled times = vector product, U+2297 ISOamsb */ 239: {TEXT("ouml"), 246}, /* latin small letter o with diaeresis, U+00F6 ISOlat1 */ 240: {TEXT("para"), 182}, /* pilcrow sign = paragraph sign, U+00B6 ISOnum */ 241: {TEXT("part"), 8706}, /* partial differential, U+2202 ISOtech */ 242: {TEXT("permil"), 8240}, /* per mille sign, U+2030 ISOtech */ 243: {TEXT("perp"), 8869}, /* up tack = orthogonal to = perpendicular, U+22A5 ISOtech */ 244: {TEXT("phi"), 966}, /* greek small letter phi, U+03C6 ISOgrk3 */ 245: {TEXT("pi"), 960}, /* greek small letter pi, U+03C0 ISOgrk3 */ 246: {TEXT("piv"), 982}, /* greek pi symbol, U+03D6 ISOgrk3 */ 247: {TEXT("plusmn"), 177}, /* plus-minus sign = plus-or-minus sign, U+00B1 ISOnum */ 248: {TEXT("pound"), 163}, /* pound sign, U+00A3 ISOnum */ 249: {TEXT("prime"), 8242}, /* prime = minutes = feet, U+2032 ISOtech */ 250: {TEXT("prod"), 8719}, /* n-ary product = product sign, U+220F ISOamsb */ 251: {TEXT("prop"), 8733}, /* proportional to, U+221D ISOtech */ 252: {TEXT("psi"), 968}, /* greek small letter psi, U+03C8 ISOgrk3 */ 253: {TEXT("quot"), 34}, /* quotation mark = APL quote, U+0022 ISOnum */ 254: {TEXT("rArr"), 8658}, /* rightwards double arrow, U+21D2 ISOtech */ 255: {TEXT("radic"), 8730}, /* square root = radical sign, U+221A ISOtech */ 256: {TEXT("rang"), 9002}, /* right-pointing angle bracket = ket, U+232A ISOtech */ 257: {TEXT("raquo"), 187}, /* right-pointing double angle quotation mark = */ 258: /* right pointing guillemet, U+00BB ISOnum */ 259: {TEXT("rarr"), 8594}, /* rightwards arrow, U+2192 ISOnum */ 260: {TEXT("rceil"), 8969}, /* right ceiling, U+2309 ISOamsc */ 261: {TEXT("rdquo"), 8221}, /* right double quotation mark, U+201D ISOnum */ 262: {TEXT("real"), 8476}, /* blackletter capital R = real part symbol, U+211C ISOamso */ 263: {TEXT("reg"), 174}, /* registered sign = registered trade mark sign, */ 264: /* U+00AE ISOnum */ 265: {TEXT("rfloor"), 8971}, /* right floor, U+230B ISOamsc */ 266: {TEXT("rho"), 961}, /* greek small letter rho, U+03C1 ISOgrk3 */ 267: {TEXT("rlm"), 8207}, /* right-to-left mark, U+200F NEW RFC 2070 */ 268: {TEXT("rsaquo"), 8250}, /* single right-pointing angle quotation mark, */ 269: /* U+203A ISO proposed */ 270: {TEXT("rsquo"), 8217}, /* right single quotation mark, U+2019 ISOnum */ 271: {TEXT("sbquo"), 8218}, /* single low-9 quotation mark, U+201A NEW */ 272: {TEXT("scaron"), 353}, /* latin small letter s with caron, U+0161 ISOlat2 */ 273: {TEXT("sdot"), 8901}, /* dot operator, U+22C5 ISOamsb */ 274: {TEXT("sect"), 167}, /* section sign, U+00A7 ISOnum */ 275: {TEXT("shy"), 173}, /* soft hyphen = discretionary hyphen, U+00AD ISOnum */ 276: {TEXT("sigma"), 963}, /* greek small letter sigma, U+03C3 ISOgrk3 */ 277: {TEXT("sigmaf"), 962}, /* greek small letter final sigma, U+03C2 ISOgrk3 */ 278: {TEXT("sim"), 8764}, /* tilde operator = varies with = similar to, U+223C ISOtech */ 279: {TEXT("spades"), 9824}, /* black spade suit, U+2660 ISOpub */ 280: {TEXT("sub"), 8834}, /* subset of, U+2282 ISOtech */ 281: {TEXT("sube"), 8838}, /* subset of or equal to, U+2286 ISOtech */ 282: {TEXT("sum"), 8721}, /* n-ary sumation, U+2211 ISOamsb */ 283: {TEXT("sup"), 8835}, /* superset of, U+2283 ISOtech */ 284: {TEXT("sup1"), 185}, /* superscript one = superscript digit one, U+00B9 ISOnum */ 285: {TEXT("sup2"), 178}, /* superscript two = superscript digit two = squared, */ 286: /* U+00B2 ISOnum */ 287: {TEXT("sup3"), 179}, /* superscript three = superscript digit three = cubed, */ 288: /* U+00B3 ISOnum */ 289: {TEXT("supe"), 8839}, /* superset of or equal to, U+2287 ISOtech */ 290: {TEXT("szlig"), 223}, /* latin small letter sharp s = ess-zed, U+00DF ISOlat1 */ 291: {TEXT("tau"), 964}, /* greek small letter tau, U+03C4 ISOgrk3 */ 292: {TEXT("there4"), 8756}, /* therefore, U+2234 ISOtech */ 293: {TEXT("theta"), 952}, /* greek small letter theta, U+03B8 ISOgrk3 */ 294: {TEXT("thetasym"), 977},/* greek small letter theta symbol, U+03D1 NEW */ 295: {TEXT("thinsp"), 8201}, /* thin space, U+2009 ISOpub */ 296: {TEXT("thorn"), 254}, /* latin small letter thorn with, U+00FE ISOlat1 */ 297: {TEXT("tilde"), 732}, /* small tilde, U+02DC ISOdia */ 298: {TEXT("times"), 215}, /* multiplication sign, U+00D7 ISOnum */ 299: {TEXT("trade"), 8482}, /* trade mark sign, U+2122 ISOnum */ 300: {TEXT("uArr"), 8657}, /* upwards double arrow, U+21D1 ISOamsa */ 301: {TEXT("uacute"), 250}, /* latin small letter u with acute, U+00FA ISOlat1 */ 302: {TEXT("uarr"), 8593}, /* upwards arrow, U+2191 ISOnum*/ 303: {TEXT("ucirc"), 251}, /* latin small letter u with circumflex, U+00FB ISOlat1 */ 304: {TEXT("ugrave"), 249}, /* latin small letter u with grave, U+00F9 ISOlat1 */ 305: {TEXT("uml"), 168}, /* diaeresis = spacing diaeresis, U+00A8 ISOdia */ 306: {TEXT("upsih"), 978}, /* greek upsilon with hook symbol, U+03D2 NEW */ 307: {TEXT("upsilon"), 965}, /* greek small letter upsilon, U+03C5 ISOgrk3 */ 308: {TEXT("uuml"), 252}, /* latin small letter u with diaeresis, U+00FC ISOlat1 */ 309: {TEXT("weierp"), 8472}, /* script capital P = power set = Weierstrass p, */ 310: /* U+2118 ISOamso */ 311: {TEXT("xi"), 958}, /* greek small letter xi, U+03BE ISOgrk3 */ 312: {TEXT("yacute"), 253}, /* latin small letter y with acute, U+00FD ISOlat1 */ 313: {TEXT("yen"), 165}, /* yen sign = yuan sign, U+00A5 ISOnum */ 314: {TEXT("yuml"), 255}, /* latin small letter y with diaeresis, U+00FF ISOlat1 */ 315: {TEXT("zeta"), 950}, /* greek small letter zeta, U+03B6 ISOgrk3 */ 316: {TEXT("zwj"), 8205}, /* zero width joiner, U+200D NEW RFC 2070 */ 317: {TEXT("zwnj"), 8204}, /* zero width non-joiner, U+200C NEW RFC 2070 */ 318: {TEXT("zzzz"), 0} /* this last entry is required */ 319: }; 320: 1.14 cvs 321: /* tables defined in fetchHTMLname.c */ 322: extern AttributeMapping XhtmlAttributeMappingTable[]; 323: /* Mapping table of HTML attribute values */ 324: AttrValueMapping XhtmlAttrValueMappingTable[] = 1.1 cvs 325: { 326: {HTML_ATTR_dir, TEXT("ltr"), HTML_ATTR_dir_VAL_ltr}, 327: {HTML_ATTR_dir, TEXT("rtl"), HTML_ATTR_dir_VAL_rtl}, 328: 329: {HTML_ATTR_TextAlign, TEXT("left"), HTML_ATTR_TextAlign_VAL_left_}, 330: {HTML_ATTR_TextAlign, TEXT("center"), HTML_ATTR_TextAlign_VAL_center_}, 331: {HTML_ATTR_TextAlign, TEXT("right"), HTML_ATTR_TextAlign_VAL_right_}, 332: {HTML_ATTR_TextAlign, TEXT("justify"), HTML_ATTR_TextAlign_VAL_justify_}, 333: 334: {HTML_ATTR_Align, TEXT("left"), HTML_ATTR_Align_VAL_left_}, 335: {HTML_ATTR_Align, TEXT("center"), HTML_ATTR_Align_VAL_center_}, 336: {HTML_ATTR_Align, TEXT("right"), HTML_ATTR_Align_VAL_right_}, 337: 338: {HTML_ATTR_LAlign, TEXT("top"), HTML_ATTR_LAlign_VAL_Top_}, 339: {HTML_ATTR_LAlign, TEXT("bottom"), HTML_ATTR_LAlign_VAL_Bottom_}, 340: {HTML_ATTR_LAlign, TEXT("left"), HTML_ATTR_LAlign_VAL_Left_}, 341: {HTML_ATTR_LAlign, TEXT("right"), HTML_ATTR_LAlign_VAL_Right_}, 342: 343: {HTML_ATTR_Clear, TEXT("left"), HTML_ATTR_Clear_VAL_Left_}, 344: {HTML_ATTR_Clear, TEXT("right"), HTML_ATTR_Clear_VAL_Right_}, 345: {HTML_ATTR_Clear, TEXT("all"), HTML_ATTR_Clear_VAL_All_}, 346: {HTML_ATTR_Clear, TEXT("none"), HTML_ATTR_Clear_VAL_None_}, 347: 348: {HTML_ATTR_NumberStyle, TEXT("1"), HTML_ATTR_NumberStyle_VAL_Arabic_}, 349: {HTML_ATTR_NumberStyle, TEXT("a"), HTML_ATTR_NumberStyle_VAL_LowerAlpha}, 350: {HTML_ATTR_NumberStyle, TEXT("A"), HTML_ATTR_NumberStyle_VAL_UpperAlpha}, 351: {HTML_ATTR_NumberStyle, TEXT("i"), HTML_ATTR_NumberStyle_VAL_LowerRoman}, 352: {HTML_ATTR_NumberStyle, TEXT("I"), HTML_ATTR_NumberStyle_VAL_UpperRoman}, 353: 354: {HTML_ATTR_BulletStyle, TEXT("disc"), HTML_ATTR_BulletStyle_VAL_disc}, 355: {HTML_ATTR_BulletStyle, TEXT("square"), HTML_ATTR_BulletStyle_VAL_square}, 356: {HTML_ATTR_BulletStyle, TEXT("circle"), HTML_ATTR_BulletStyle_VAL_circle}, 357: 358: {HTML_ATTR_ItemStyle, TEXT("1"), HTML_ATTR_ItemStyle_VAL_Arabic_}, 359: {HTML_ATTR_ItemStyle, TEXT("a"), HTML_ATTR_ItemStyle_VAL_LowerAlpha}, 360: {HTML_ATTR_ItemStyle, TEXT("A"), HTML_ATTR_ItemStyle_VAL_UpperAlpha}, 361: {HTML_ATTR_ItemStyle, TEXT("i"), HTML_ATTR_ItemStyle_VAL_LowerRoman}, 362: {HTML_ATTR_ItemStyle, TEXT("I"), HTML_ATTR_ItemStyle_VAL_UpperRoman}, 363: {HTML_ATTR_ItemStyle, TEXT("disc"), HTML_ATTR_ItemStyle_VAL_disc}, 364: {HTML_ATTR_ItemStyle, TEXT("square"), HTML_ATTR_ItemStyle_VAL_square}, 365: {HTML_ATTR_ItemStyle, TEXT("circle"), HTML_ATTR_ItemStyle_VAL_circle}, 366: 367: {HTML_ATTR_Button_type, TEXT("button"), HTML_ATTR_Button_type_VAL_button}, 368: {HTML_ATTR_Button_type, TEXT("submit"), HTML_ATTR_Button_type_VAL_submit}, 369: {HTML_ATTR_Button_type, TEXT("reset"), HTML_ATTR_Button_type_VAL_reset}, 370: 371: {HTML_ATTR_frame, TEXT("void"), HTML_ATTR_frame_VAL_void}, 372: {HTML_ATTR_frame, TEXT("above"), HTML_ATTR_frame_VAL_above}, 373: {HTML_ATTR_frame, TEXT("below"), HTML_ATTR_frame_VAL_below}, 374: {HTML_ATTR_frame, TEXT("hsides"), HTML_ATTR_frame_VAL_hsides}, 375: {HTML_ATTR_frame, TEXT("lhs"), HTML_ATTR_frame_VAL_lhs}, 376: {HTML_ATTR_frame, TEXT("rhs"), HTML_ATTR_frame_VAL_rhs}, 377: {HTML_ATTR_frame, TEXT("vsides"), HTML_ATTR_frame_VAL_vsides}, 378: {HTML_ATTR_frame, TEXT("box"), HTML_ATTR_frame_VAL_box}, 379: {HTML_ATTR_frame, TEXT("border"), HTML_ATTR_frame_VAL_border}, 380: 381: {HTML_ATTR_frameborder, TEXT("0"), HTML_ATTR_frameborder_VAL_Border0}, 382: {HTML_ATTR_frameborder, TEXT("1"), HTML_ATTR_frameborder_VAL_Border1}, 383: 384: {HTML_ATTR_scrolling, TEXT("yes"), HTML_ATTR_scrolling_VAL_Yes_}, 385: {HTML_ATTR_scrolling, TEXT("no"), HTML_ATTR_scrolling_VAL_No_}, 386: {HTML_ATTR_scrolling, TEXT("auto"), HTML_ATTR_scrolling_VAL_auto_}, 387: 388: {HTML_ATTR_rules_, TEXT("none"), HTML_ATTR_rules__VAL_none_}, 389: {HTML_ATTR_rules_, TEXT("groups"), HTML_ATTR_rules__VAL_groups}, 390: {HTML_ATTR_rules_, TEXT("rows"), HTML_ATTR_rules__VAL_rows}, 391: {HTML_ATTR_rules_, TEXT("cols"), HTML_ATTR_rules__VAL_cols}, 392: {HTML_ATTR_rules_, TEXT("all"), HTML_ATTR_rules__VAL_all}, 393: 394: {HTML_ATTR_Cell_align, TEXT("left"), HTML_ATTR_Cell_align_VAL_Cell_left}, 395: {HTML_ATTR_Cell_align, TEXT("center"), HTML_ATTR_Cell_align_VAL_Cell_center}, 396: {HTML_ATTR_Cell_align, TEXT("right"), HTML_ATTR_Cell_align_VAL_Cell_right}, 397: {HTML_ATTR_Cell_align, TEXT("justify"), HTML_ATTR_Cell_align_VAL_Cell_justify}, 398: {HTML_ATTR_Cell_align, TEXT("char"), HTML_ATTR_Cell_align_VAL_Cell_char}, 399: 400: {HTML_ATTR_Alignment, TEXT("top"), HTML_ATTR_Alignment_VAL_Top_}, 401: {HTML_ATTR_Alignment, TEXT("middle"), HTML_ATTR_Alignment_VAL_Middle_}, 402: {HTML_ATTR_Alignment, TEXT("bottom"), HTML_ATTR_Alignment_VAL_Bottom_}, 403: {HTML_ATTR_Alignment, TEXT("left"), HTML_ATTR_Alignment_VAL_Left_}, 404: {HTML_ATTR_Alignment, TEXT("right"), HTML_ATTR_Alignment_VAL_Right_}, 405: 406: {HTML_ATTR_METHOD, TEXT("get"), HTML_ATTR_METHOD_VAL_Get_}, 407: {HTML_ATTR_METHOD, TEXT("post"), HTML_ATTR_METHOD_VAL_Post_}, 408: 409: {HTML_ATTR_Position, TEXT("top"), HTML_ATTR_Position_VAL_Position_top}, 410: {HTML_ATTR_Position, TEXT("bottom"), HTML_ATTR_Position_VAL_Position_bottom}, 411: {HTML_ATTR_Position, TEXT("left"), HTML_ATTR_Position_VAL_Position_left}, 412: {HTML_ATTR_Position, TEXT("right"), HTML_ATTR_Position_VAL_Position_right}, 413: 414: {HTML_ATTR_Row_valign, TEXT("top"), HTML_ATTR_Row_valign_VAL_Row_top}, 415: {HTML_ATTR_Row_valign, TEXT("middle"), HTML_ATTR_Row_valign_VAL_Row_middle}, 416: {HTML_ATTR_Row_valign, TEXT("bottom"), HTML_ATTR_Row_valign_VAL_Row_bottom}, 417: {HTML_ATTR_Row_valign, TEXT("baseline"), HTML_ATTR_Row_valign_VAL_Row_baseline}, 418: 419: {HTML_ATTR_Cell_valign, TEXT("top"), HTML_ATTR_Cell_valign_VAL_Cell_top}, 420: {HTML_ATTR_Cell_valign, TEXT("middle"), HTML_ATTR_Cell_valign_VAL_Cell_middle}, 421: {HTML_ATTR_Cell_valign, TEXT("bottom"), HTML_ATTR_Cell_valign_VAL_Cell_bottom}, 422: {HTML_ATTR_Cell_valign, TEXT("baseline"), HTML_ATTR_Cell_valign_VAL_Cell_baseline}, 423: 424: {HTML_ATTR_shape, TEXT("rect"), HTML_ATTR_shape_VAL_rectangle}, 425: {HTML_ATTR_shape, TEXT("circle"), HTML_ATTR_shape_VAL_circle}, 426: {HTML_ATTR_shape, TEXT("poly"), HTML_ATTR_shape_VAL_polygon}, 427: 428: {HTML_ATTR_valuetype, TEXT("data"), HTML_ATTR_valuetype_VAL_data_}, 429: {HTML_ATTR_valuetype, TEXT("ref"), HTML_ATTR_valuetype_VAL_ref}, 430: {HTML_ATTR_valuetype, TEXT("object"), HTML_ATTR_valuetype_VAL_object_}, 431: 432: /* HTML attribute TYPE generates a Thot element */ 433: {DummyAttribute, TEXT("button"), HTML_EL_Button_Input}, 434: {DummyAttribute, TEXT("checkbox"), HTML_EL_Checkbox_Input}, 435: {DummyAttribute, TEXT("file"), HTML_EL_File_Input}, 436: {DummyAttribute, TEXT("hidden"), HTML_EL_Hidden_Input}, 437: {DummyAttribute, TEXT("image"), HTML_EL_PICTURE_UNIT}, 438: {DummyAttribute, TEXT("password"), HTML_EL_Password_Input}, 439: {DummyAttribute, TEXT("radio"), HTML_EL_Radio_Input}, 440: {DummyAttribute, TEXT("reset"), HTML_EL_Reset_Input}, 441: {DummyAttribute, TEXT("submit"), HTML_EL_Submit_Input}, 442: {DummyAttribute, TEXT("text"), HTML_EL_Text_Input}, 443: 444: /* The following declarations allow the parser to accept boolean attributes */ 445: /* written "checked=CHECKED"), for instance */ 446: {HTML_ATTR_ISMAP, TEXT("ismap"), HTML_ATTR_ISMAP_VAL_Yes_}, 447: {HTML_ATTR_nohref, TEXT("nohref"), HTML_ATTR_nohref_VAL_Yes_}, 448: {HTML_ATTR_COMPACT, TEXT("compact"), HTML_ATTR_COMPACT_VAL_Yes_}, 449: {HTML_ATTR_Multiple, TEXT("multiple"), HTML_ATTR_Multiple_VAL_Yes_}, 450: {HTML_ATTR_Selected, TEXT("selected"), HTML_ATTR_Selected_VAL_Yes_}, 451: {HTML_ATTR_Checked, TEXT("checked"), HTML_ATTR_Checked_VAL_Yes_}, 452: {HTML_ATTR_No_wrap, TEXT("nowrap"), HTML_ATTR_No_wrap_VAL_no_wrap}, 453: {HTML_ATTR_NoShade, TEXT("noshade"), HTML_ATTR_NoShade_VAL_NoShade_}, 454: {HTML_ATTR_declare, TEXT("declare"), HTML_ATTR_declare_VAL_Yes_}, 455: {HTML_ATTR_defer, TEXT("defer"), HTML_ATTR_defer_VAL_Yes_}, 456: {HTML_ATTR_disabled, TEXT("disabled"), HTML_ATTR_disabled_VAL_Yes_}, 457: {HTML_ATTR_readonly, TEXT("readonly"), HTML_ATTR_readonly_VAL_Yes_}, 458: {HTML_ATTR_no_resize, TEXT("noresize"), HTML_ATTR_no_resize_VAL_Yes_}, 459: {0, TEXT(""), 0} /* Last entry. Mandatory */ 460: }; 1.6 cvs 461: 462: 463: /*---------------------------------------------------------------------- 1.15 cvs 464: ParseCharset: 1.6 cvs 465: Parses the element HTTP-EQUIV and looks for the charset value. 466: ----------------------------------------------------------------------*/ 467: #ifdef __STDC__ 1.15 cvs 468: void ParseCharset (Element el, Document doc) 1.6 cvs 469: #else /* !__STDC__ */ 1.15 cvs 470: void ParseCharset (el, doc) 1.6 cvs 471: Element el; 472: Document doc; 473: #endif /* !__STDC__ */ 474: { 1.15 cvs 475: AttributeType attrType; 476: Attribute attr; 477: SSchema docSSchema; 478: CHARSET charset; 1.6 cvs 479: CHAR_T *text, *text2, *ptrText, *str; 480: CHAR_T charsetname[MAX_LENGTH]; 1.15 cvs 481: int length; 1.6 cvs 482: int pos, index = 0; 483: 1.15 cvs 484: charset = TtaGetDocumentCharset (doc); 485: if (charset != UNDEFINED_CHARSET) 486: /* the charset was already defined by the http header */ 487: return; 1.6 cvs 488: 489: docSSchema = TtaGetDocumentSSchema (doc); 490: attrType.AttrSSchema = docSSchema; 491: attrType.AttrTypeNum = HTML_ATTR_http_equiv; 492: attr = TtaGetAttribute (el, attrType); 493: if (attr != NULL) 494: { 495: /* There is a HTTP-EQUIV attribute */ 496: length = TtaGetTextAttributeLength (attr); 497: if (length > 0) 498: { 499: text = TtaAllocString (length + 1); 500: TtaGiveTextAttributeValue (attr, text, &length); 501: if (!ustrcasecmp (text, TEXT("content-type"))) 502: { 503: attrType.AttrTypeNum = HTML_ATTR_meta_content; 504: attr = TtaGetAttribute (el, attrType); 505: if (attr != NULL) 506: { 507: length = TtaGetTextAttributeLength (attr); 508: if (length > 0) 509: { 510: text2 = TtaAllocString (length + 1); 511: TtaGiveTextAttributeValue (attr, text2, &length); 512: ptrText = text2; 513: while (*ptrText) 514: { 515: *ptrText = utolower (*ptrText); 516: ptrText++; 517: } 518: 519: str = ustrstr (text2, TEXT("charset=")); 520: if (str) 521: { 522: pos = str - text2 + 8; 523: while (text2[pos] != WC_SPACE && 524: text2[pos] != WC_TAB && text2[pos] != WC_EOS) 525: charsetname[index++] = text2[pos++]; 526: charsetname[index] = WC_EOS; 1.15 cvs 527: charset = TtaGetCharset (charsetname); 528: if (charset != UNDEFINED_CHARSET) 529: TtaSetDocumentCharset (doc, charset); 1.6 cvs 530: } 531: TtaFreeMemory (text2); 532: } 533: } 534: } 535: TtaFreeMemory (text); 536: } 537: } 538: } 539: 540: /*---------------------------------------------------------------------- 541: XhtmlElementComplete 542: Complete XHTML elements. 543: Check its attributes and its contents. 544: ----------------------------------------------------------------------*/ 545: #ifdef __STDC__ 1.15 cvs 546: void XhtmlElementComplete (Element el, Document doc, int *error) 1.6 cvs 547: #else 1.15 cvs 548: void XhtmlElementComplete (el, doc, error) 549: Element el; 550: Document doc; 551: int *error; 1.6 cvs 552: #endif 553: { 1.8 cvs 554: #ifdef EXPAT_PARSER 1.6 cvs 555: ElementType elType, newElType, childType; 556: Element constElem, child, desc, leaf, prev, next, last, 557: elFrames, lastFrame, lastChild; 558: Attribute attr; 559: AttributeType attrType; 560: Language lang; 561: STRING text; 562: CHAR_T lastChar[2]; 563: STRING name1; 564: int length; 565: SSchema docSSchema; 566: 567: *error = 0; 568: docSSchema = TtaGetDocumentSSchema (doc); 569: 570: elType = TtaGetElementType (el); 571: /* is this a block-level element in a character-level element? */ 1.19 ! cvs 572: if (!IsXMLElementInline (el) && ! 573: elType.ElTypeNum != HTML_EL_Comment_) 1.6 cvs 574: BlockInCharLevelElem (el); 575: 576: newElType.ElSSchema = elType.ElSSchema; 577: switch (elType.ElTypeNum) 578: { 579: case HTML_EL_Object: /* it's an object */ 580: /* create Object_Content */ 581: child = TtaGetFirstChild (el); 582: if (child != NULL) 583: elType = TtaGetElementType (child); 584: 585: /* is it the PICTURE element ? */ 586: if (child == NULL || elType.ElTypeNum != HTML_EL_PICTURE_UNIT) 587: { 588: desc = child; 589: /* create the PICTURE element */ 590: elType.ElTypeNum = HTML_EL_PICTURE_UNIT; 591: child = TtaNewTree (doc, elType, ""); 592: if (desc == NULL) 593: TtaInsertFirstChild (&child, el, doc); 594: else 595: TtaInsertSibling (child, desc, TRUE, doc); 596: } 597: 598: /* copy attribute data into SRC attribute of Object_Image */ 599: attrType.AttrSSchema = docSSchema; 600: attrType.AttrTypeNum = HTML_ATTR_data; 601: attr = TtaGetAttribute (el, attrType); 602: if (attr != NULL) 603: { 604: length = TtaGetTextAttributeLength (attr); 605: if (length > 0) 606: { 607: name1 = TtaAllocString (length + 1); 608: TtaGiveTextAttributeValue (attr, name1, &length); 609: attrType.AttrTypeNum = HTML_ATTR_SRC; 610: attr = TtaGetAttribute (child, attrType); 611: if (attr == NULL) 612: { 613: attr = TtaNewAttribute (attrType); 614: TtaAttachAttribute (child, attr, doc); 615: } 616: TtaSetAttributeText (attr, name1, child, doc); 617: TtaFreeMemory (name1); 618: } 619: } 620: 621: /* is the Object_Content element already created ? */ 622: desc = child; 623: TtaNextSibling(&desc); 624: if (desc != NULL) 625: elType = TtaGetElementType (desc); 626: 627: /* is it the Object_Content element ? */ 628: if (desc == NULL || elType.ElTypeNum != HTML_EL_Object_Content) 629: { 630: /* create Object_Content */ 631: elType.ElTypeNum = HTML_EL_Object_Content; 632: desc = TtaNewTree (doc, elType, ""); 633: TtaInsertSibling (desc, child, FALSE, doc); 634: /* move previous existing children into Object_Content */ 635: child = TtaGetLastChild(el); 636: while (child != desc) 637: { 638: TtaRemoveTree (child, doc); 639: TtaInsertFirstChild (&child, desc, doc); 640: child = TtaGetLastChild(el); 641: } 642: } 643: break; 644: 645: case HTML_EL_Unnumbered_List: 646: case HTML_EL_Numbered_List: 647: case HTML_EL_Menu: 648: case HTML_EL_Directory: 649: /* It's a List element. It should only have List_Item children. 650: If it has List element chidren, move these List elements 651: within their previous List_Item sibling. This is to fix 652: a bug in document generated by Mozilla. */ 653: prev = NULL; 654: next = NULL; 655: child = TtaGetFirstChild (el); 656: while (child != NULL) 657: { 658: next = child; 659: TtaNextSibling (&next); 660: elType = TtaGetElementType (child); 661: if (elType.ElTypeNum == HTML_EL_Unnumbered_List || 662: elType.ElTypeNum == HTML_EL_Numbered_List || 663: elType.ElTypeNum == HTML_EL_Menu || 664: elType.ElTypeNum == HTML_EL_Directory) 665: /* this list element is a child of another list element */ 666: if (prev) 667: { 668: elType = TtaGetElementType (prev); 669: if (elType.ElTypeNum == HTML_EL_List_Item) 670: { 671: /* get the last child of the previous List_Item */ 672: desc = TtaGetFirstChild (prev); 673: last = NULL; 674: while (desc) 675: { 676: last = desc; 677: TtaNextSibling (&desc); 678: } 679: /* move the list element after the last child of the 680: previous List_Item */ 681: TtaRemoveTree (child, doc); 682: if (last) 683: TtaInsertSibling (child, last, FALSE, doc); 684: else 685: TtaInsertFirstChild (&child, prev, doc); 686: child = prev; 687: } 688: } 689: prev = child; 690: child = next; 691: } 692: break; 693: 694: case HTML_EL_FRAMESET: 695: /* The FRAMESET element is now complete. Gather all its FRAMESET 696: and FRAME children and wrap them up in a Frames element */ 697: elFrames = NULL; lastFrame = NULL; 698: lastChild = NULL; 699: child = TtaGetFirstChild (el); 700: while (child != NULL) 701: { 702: next = child; 703: TtaNextSibling (&next); 704: elType = TtaGetElementType (child); 705: if (elType.ElTypeNum == HTML_EL_FRAMESET || 706: elType.ElTypeNum == HTML_EL_FRAME || 707: elType.ElTypeNum == HTML_EL_Comment_) 708: { 709: /* create the Frames element if it does not exist */ 710: if (elFrames == NULL) 711: { 712: newElType.ElSSchema = docSSchema; 713: newElType.ElTypeNum = HTML_EL_Frames; 714: elFrames = TtaNewElement (doc, newElType); 715: XmlSetElemLineNumber (elFrames); 716: TtaInsertSibling (elFrames, child, TRUE, doc); 717: } 718: /* move the element as the last child of the Frames element */ 719: TtaRemoveTree (child, doc); 720: if (lastFrame == NULL) 721: TtaInsertFirstChild (&child, elFrames, doc); 722: else 723: TtaInsertSibling (child, lastFrame, FALSE, doc); 724: lastFrame = child; 725: } 726: child = next; 727: } 728: break; 729: 730: case HTML_EL_Input: /* it's an INPUT without any TYPE attribute */ 731: /* Create a child of type Text_Input */ 732: elType.ElTypeNum = HTML_EL_Text_Input; 733: child = TtaNewTree (doc, elType, ""); 734: XmlSetElemLineNumber (child); 735: TtaInsertFirstChild (&child, el, doc); 736: /* now, process it like a Text_Input element */ 737: 738: case HTML_EL_Text_Input: 739: case HTML_EL_Password_Input: 740: case HTML_EL_File_Input: 741: /* get element Inserted_Text */ 742: child = TtaGetFirstChild (el); 743: if (child != NULL) 744: { 745: attrType.AttrSSchema = docSSchema; 746: attrType.AttrTypeNum = HTML_ATTR_Value_; 747: attr = TtaGetAttribute (el, attrType); 748: if (attr != NULL) 749: { 750: /* copy the value of attribute "value" into the first text 751: leaf of element */ 752: length = TtaGetTextAttributeLength (attr); 753: if (length > 0) 754: { 755: /* get the text leaf */ 756: leaf = TtaGetFirstChild (child); 757: if (leaf != NULL) 758: { 759: childType = TtaGetElementType (leaf); 760: if (childType.ElTypeNum == HTML_EL_TEXT_UNIT) 761: { 762: /* copy attribute value into the text leaf */ 763: text = TtaAllocString (length + 1); 764: TtaGiveTextAttributeValue (attr, text, &length); 765: TtaSetTextContent (leaf, text, 766: TtaGetDefaultLanguage (), doc); 767: TtaFreeMemory (text); 768: } 769: } 770: } 771: } 772: } 773: break; 774: 775: case HTML_EL_META: 1.15 cvs 776: ParseCharset (el, doc); 1.6 cvs 777: break; 778: 779: case HTML_EL_STYLE_: /* it's a STYLE element */ 1.8 cvs 780: case HTML_EL_SCRIPT: /* it's a SCRIPT element */ 1.6 cvs 781: case HTML_EL_Preformatted: /* it's a PRE */ 782: /* if the last line of the Preformatted is empty, remove it */ 783: leaf = XmlLastLeafInElement (el); 784: if (leaf != NULL) 785: { 786: elType = TtaGetElementType (leaf); 787: if (elType.ElTypeNum == HTML_EL_TEXT_UNIT) 788: /* the last leaf is a TEXT element */ 789: { 790: length = TtaGetTextLength (leaf); 791: if (length > 0) 792: { 793: TtaGiveSubString (leaf, lastChar, length, 1); 794: if (lastChar[0] == EOL) 795: /* last character is new line, delete it */ 796: { 797: if (length == 1) 798: /* empty TEXT element */ 799: TtaDeleteTree (leaf, doc); 800: else 801: /* remove the last character */ 802: TtaDeleteTextContent (leaf, length, 1, doc); 803: } 804: } 805: } 806: } 807: if (IsParsingCSS ()) 808: { 809: text = GetStyleContents (el); 810: if (text) 811: { 812: ReadCSSRules (doc, NULL, text, FALSE); 813: TtaFreeMemory (text); 814: } 815: SetParsingCSS (FALSE); 816: } 817: /* and continue as if it were a Preformatted or a Script */ 818: break; 819: 820: case HTML_EL_Text_Area: /* it's a Text_Area */ 821: SetParsingTextArea (FALSE); 822: child = TtaGetFirstChild (el); 823: if (child == NULL) 824: /* it's an empty Text_Area */ 825: /* insert a Inserted_Text element in the element */ 826: { 827: newElType.ElTypeNum = HTML_EL_Inserted_Text; 828: child = TtaNewTree (doc, newElType, ""); 829: TtaInsertFirstChild (&child, el, doc); 830: } 831: else 832: { 833: /* save the text into Default_Value attribute */ 834: attrType.AttrSSchema = docSSchema; 835: attrType.AttrTypeNum = HTML_ATTR_Default_Value; 836: if (TtaGetAttribute (el, attrType) == NULL) 837: /* attribute Default_Value is missing */ 838: { 839: attr = TtaNewAttribute (attrType); 840: TtaAttachAttribute (el, attr, doc); 841: desc = TtaGetFirstChild (child); 842: length = TtaGetTextLength (desc) + 1; 843: text = TtaAllocString (length); 844: TtaGiveTextContent (desc, text, &length, &lang); 845: TtaSetAttributeText (attr, text, el, doc); 846: TtaFreeMemory (text); 847: } 848: } 849: /* insert a Frame element */ 850: newElType.ElTypeNum = HTML_EL_Frame; 851: constElem = TtaNewTree (doc, newElType, ""); 852: TtaInsertSibling (constElem, child, FALSE, doc); 853: break; 854: 855: case HTML_EL_Radio_Input: 856: case HTML_EL_Checkbox_Input: 857: /* put an attribute Checked if it is missing */ 858: attrType.AttrSSchema = docSSchema; 859: attrType.AttrTypeNum = HTML_ATTR_Checked; 860: if (TtaGetAttribute (el, attrType) == NULL) 861: /* attribute Checked is missing */ 862: { 863: attr = TtaNewAttribute (attrType); 864: TtaAttachAttribute (el, attr, doc); 865: TtaSetAttributeValue (attr, HTML_ATTR_Checked_VAL_No_, el, doc); 866: } 867: break; 868: 869: case HTML_EL_Option_Menu: 870: /* Check that at least one option has a SELECTED attribute */ 871: OnlyOneOptionSelected (el, doc, TRUE); 872: break; 873: 874: case HTML_EL_PICTURE_UNIT: 875: break; 876: 877: case HTML_EL_LINK: 878: CheckCSSLink (el, doc, docSSchema); 879: break; 880: 881: case HTML_EL_Data_cell: 882: case HTML_EL_Heading_cell: 883: /* insert a pseudo paragraph into empty cells */ 884: child = TtaGetFirstChild (el); 885: if (child == NULL) 886: { 887: elType.ElTypeNum = HTML_EL_Pseudo_paragraph; 888: child = TtaNewTree (doc, elType, ""); 889: if (child != NULL) 890: TtaInsertFirstChild (&child, el, doc); 891: } 892: 893: /* detect whether we're parsing a whole table or just a cell */ 894: if (IsWithinTable ()) 895: NewCell (el, doc, FALSE); 896: break; 897: 898: case HTML_EL_Table: 899: CheckTable (el, doc); 900: SubWithinTable (); 901: break; 902: 903: case HTML_EL_TITLE: 904: /* show the TITLE in the main window */ 905: UpdateTitle (el, doc); 906: break; 907: 908: default: 909: break; 910: } 1.8 cvs 911: #endif /* EXPAT_PARSER */ 1.6 cvs 912: } 1.1 cvs 913: 914: /*---------------------------------------------------------------------- 1.2 cvs 915: XhtmlGetDTDName 916: Return in DTDname the name of the DTD to be used for parsing the 917: content of element named elementName. 918: This element type appear with an 'X' in the ElemMappingTable. 1.1 cvs 919: ----------------------------------------------------------------------*/ 920: #ifdef __STDC__ 1.2 cvs 921: void XhtmlGetDTDName (STRING DTDname, 922: STRING elementName) 1.1 cvs 923: #else 1.2 cvs 924: void XhtmlGetDTDName (DTDname, 925: elementName) 926: STRING DTDname; 927: STRING elementName; 928: 1.1 cvs 929: #endif 930: { 1.8 cvs 931: #ifdef EXPAT_PARSER 1.2 cvs 932: if (ustrcmp (elementName, TEXT("math")) == 0) 933: ustrcpy (DTDname, TEXT("MathML")); 1.1 cvs 934: else 1.2 cvs 935: if (ustrcmp (elementName, TEXT("label")) == 0 || 936: ustrcmp (elementName, TEXT("text")) == 0) 937: ustrcpy (DTDname, TEXT("HTML")); 938: else 939: ustrcpy (DTDname, TEXT("")); 1.17 cvs 940: #endif /* EXPAT_PARSER */ 941: } 942: 1.1 cvs 943: 944: /*---------------------------------------------------------------------- 1.16 cvs 945: MapHTMLAttributeValue 1.2 cvs 946: Search in the Attribute Value Mapping Table the entry for the attribute 947: ThotAtt and its value AttrVal. Returns the corresponding Thot value. 1.1 cvs 948: ----------------------------------------------------------------------*/ 949: #ifdef __STDC__ 1.16 cvs 950: void MapHTMLAttributeValue (CHAR_T* AttrVal, 1.11 cvs 951: AttributeType attrType, 952: int* value) 1.1 cvs 953: #else 1.16 cvs 954: void MapHTMLAttributeValue (AttrVal, 1.11 cvs 955: attrType, 956: value) 957: CHAR_T* AttrVal; 958: AttributeType attrType; 959: int* value; 1.1 cvs 960: #endif 961: { 1.8 cvs 962: #ifdef EXPAT_PARSER 1.2 cvs 963: int i; 1.1 cvs 964: 1.2 cvs 965: *value = 0; 1.1 cvs 966: i = 0; 967: 1.2 cvs 968: while (XhtmlAttrValueMappingTable[i].ThotAttr != attrType.AttrTypeNum && 969: XhtmlAttrValueMappingTable[i].ThotAttr != 0) 970: i++; 971: 972: if (XhtmlAttrValueMappingTable[i].ThotAttr == attrType.AttrTypeNum) 973: { 974: do 1.18 cvs 975: if (!ustrcmp (XhtmlAttrValueMappingTable[i].XMLattrValue, AttrVal)) 1.2 cvs 976: *value = XhtmlAttrValueMappingTable[i].ThotAttrValue; 1.1 cvs 977: else 1.2 cvs 978: i++; 979: while (*value <= 0 && 980: XhtmlAttrValueMappingTable[i].ThotAttr != 0); 1.1 cvs 981: } 1.8 cvs 982: #endif /* EXPAT_PARSER */ 1.1 cvs 983: } 984: 985: /*--------------------------------------------------------------------------- 1.2 cvs 986: XhtmlMapEntity 1.1 cvs 987: Search that entity in the entity table and return the corresponding value. 988: ---------------------------------------------------------------------------*/ 989: #ifdef __STDC__ 1.13 cvs 990: void XhtmlMapEntity (STRING entityName, int *entityValue, STRING alphabet) 1.1 cvs 991: #else 1.13 cvs 992: void XhtmlMapEntity (entityName, entityValue, alphabet) 993: STRING entityName; 994: int *entityValue; 995: STRING alphabet; 1.1 cvs 996: #endif 997: { 1.8 cvs 998: #ifdef EXPAT_PARSER 1.12 cvs 999: int i; 1.13 cvs 1000: ThotBool found; 1.1 cvs 1001: 1.13 cvs 1002: found = FALSE; 1003: for (i = 0; XhtmlEntityTable[i].charCode >= 0 && ! found; i++) 1004: found = !ustrcmp (XhtmlEntityTable[i].charName, entityName); 1.3 cvs 1005: 1.13 cvs 1006: if (found) 1.12 cvs 1007: { 1008: /* entity found */ 1.13 cvs 1009: i--; 1.12 cvs 1010: *entityValue = XhtmlEntityTable[i].charCode; 1011: *alphabet = 'L'; 1012: } 1013: else 1014: *alphabet = EOS; 1.8 cvs 1015: #endif /* EXPAT_PARSER */ 1.11 cvs 1016: } 1017: 1.13 cvs 1018: #ifdef EXPAT_PARSER 1019: /*---------------------------------------------------------------------- 1020: PutNonISOlatin1Char 1021: Put a Unicode character in the input buffer. 1022: ----------------------------------------------------------------------*/ 1023: #ifdef __STDC__ 1024: static void PutNonISOlatin1Char (int code, STRING prefix, STRING entityName, ParserData *context) 1025: #else 1026: static void PutNonISOlatin1Char (code, prefix, entityName, context) 1027: int code; 1028: STRING prefix; 1029: STRING entityName; 1030: ParserData *context; 1031: #endif 1032: { 1033: Language lang, l; 1034: ElementType elType; 1035: Element elText; 1036: AttributeType attrType; 1037: Attribute attr; 1038: CHAR_T buffer[MaxEntityLength+10]; 1039: 1040: if (context->readingAnAttrValue) 1041: /* this entity belongs to an attribute value */ 1042: { 1043: /* Thot can't mix different languages in the same attribute value */ 1044: /* just discard that character */ 1045: ; 1046: } 1047: else 1048: /* this entity belongs to the element contents */ 1049: { 1050: /* create a new text leaf */ 1051: elType.ElSSchema = TtaGetDocumentSSchema (context->doc); 1052: elType.ElTypeNum = HTML_EL_TEXT_UNIT; 1053: elText = TtaNewElement (context->doc, elType); 1054: XmlSetElemLineNumber (elText); 1055: XhtmlInsertElement (&elText); 1056: context->lastElement = elText; 1057: context->lastElementClosed = FALSE; 1058: context->lastElementClosed = TRUE; 1059: 1060: /* try to find a fallback character */ 1061: l = context->language; 1062: GetFallbackCharacter (code, buffer, &lang); 1063: 1064: /* put that fallback character in the new text leaf */ 1065: TtaSetTextContent (elText, buffer, lang, context->doc); 1066: context->language = l; 1067: 1068: /* make that text leaf read-only */ 1069: TtaSetAccessRight (elText, ReadOnly, context->doc); 1070: 1071: /* associate an attribute EntityName with the new text leaf */ 1072: attrType.AttrSSchema = TtaGetDocumentSSchema (context->doc); 1073: attrType.AttrTypeNum = HTML_ATTR_EntityName; 1074: attr = TtaNewAttribute (attrType); 1075: TtaAttachAttribute (elText, attr, context->doc); 1076: ustrcpy (buffer, prefix); 1077: ustrcat (buffer, entityName); 1078: TtaSetAttributeText (attr, buffer, elText, context->doc); 1079: context->mergeText = FALSE; 1080: } 1081: } 1082: #endif /* EXPAT_PARSER */ 1083: 1.11 cvs 1084: /*---------------------------------------------------------------------- 1085: XhtmlEntityCreated 1086: A XTHML entity has been created by the XML parser. 1087: ----------------------------------------------------------------------*/ 1088: #ifdef __STDC__ 1.13 cvs 1089: void XhtmlEntityCreated (int entityVal, Language lang, STRING entityName, ParserData *context) 1.11 cvs 1090: #else 1.13 cvs 1091: void XhtmlEntityCreated (entityVal, lang, entityName, context) 1092: int entityVal; 1093: Language lang; 1094: STRING entityName; 1095: ParserData *context; 1.11 cvs 1096: #endif 1097: { 1.12 cvs 1098: #ifdef EXPAT_PARSER 1.13 cvs 1099: CHAR_T buffer[2]; 1100: 1.11 cvs 1101: if (lang < 0) 1.13 cvs 1102: PutInXmlElement (entityName); 1.11 cvs 1103: else 1104: { 1105: #ifdef LC 1106: printf (" \n code=%d", entityVal); 1107: #endif /* LC */ 1108: if (entityVal < 255) 1.13 cvs 1109: { 1110: buffer[0] = TEXT(entityVal); 1111: buffer[1] = WC_EOS; 1112: PutInXmlElement (buffer); 1113: } 1.11 cvs 1114: else 1.13 cvs 1115: PutNonISOlatin1Char (entityVal, TEXT(""), entityName, context); 1.11 cvs 1116: } 1.12 cvs 1117: #endif /* EXPAT_PARSER */ 1.1 cvs 1118: } 1119: 1120: /*-------------------- Entities (end) ---------------------*/