Annotation of Amaya/amaya/XHTMLbuilder.c, revision 1.26
1.1 cvs 1: /*
2: *
3: * (c) COPYRIGHT MIT and INRIA, 1996.
4: * Please first read the full copyright statement in file COPYRIGHT.
5: *
6: */
7:
8: /*
9: *
1.23 cvs 10: * Builds the corresponding abstract tree for a Thot document of type HTML.
1.1 cvs 11: *
1.20 cvs 12: * Authors: L. Carcone
13: * V. Quint
1.1 cvs 14: */
15:
16: #define THOT_EXPORT extern
17: #include "amaya.h"
18: #include "css.h"
1.2 cvs 19: #include "parser.h"
20: #include "HTML.h"
21:
1.13 cvs 22: #include "css_f.h"
23: #include "fetchXMLname_f.h"
1.22 cvs 24: #include "html2thot_f.h"
1.1 cvs 25: #include "HTMLactions_f.h"
26: #include "HTMLedit_f.h"
1.22 cvs 27: #include "HTMLform_f.h"
1.1 cvs 28: #include "HTMLimage_f.h"
29: #include "HTMLtable_f.h"
30: #include "HTMLimage_f.h"
31: #include "UIcss_f.h"
1.13 cvs 32: #include "styleparser_f.h"
1.2 cvs 33: #include "XHTMLbuilder_f.h"
1.13 cvs 34: #include "Xml2thot_f.h"
1.1 cvs 35:
36: /* maximum length of a Thot structure schema name */
37: #define MAX_SS_NAME_LENGTH 32
38:
1.23 cvs 39: XmlEntity XhtmlEntityTable[] =
1.1 cvs 40: {
41: /* This table MUST be in alphabetical order */
1.23 cvs 42: {TEXT("AElig"), 198, TEXT(' ')}, /* latin capital letter AE = */
43: /* latin capital ligature AE, U+00C6 ISOlat1 */
44: {TEXT("Aacute"), 193, TEXT(' ')}, /* latin capital letter A with acute, U+00C1 ISOlat1 */
45: {TEXT("Acirc"), 194, TEXT(' ')}, /* latin capital letter A with circumflex, U+00C2 ISOlat1 */
46: {TEXT("Agrave"), 192, TEXT(' ')}, /* latin capital letter A with grave = */
47: /* latin capital letter A grave, U+00C0 ISOlat1 */
48: {TEXT("Alpha"), 913, TEXT(' ')}, /* greek capital letter alpha, U+0391 */
49: {TEXT("Aring"), 197, TEXT(' ')}, /* latin capital letter A with ring above = */
50: /* latin capital letter A ring, U+00C5 ISOlat1 */
51: {TEXT("Atilde"), 195, TEXT(' ')}, /* latin capital letter A with tilde, U+00C3 ISOlat1 */
52: {TEXT("Auml"), 196, TEXT(' ')}, /* latin capital letter A with diaeresis, U+00C4 ISOlat1 */
53: {TEXT("Beta"), 914, TEXT(' ')}, /* greek capital letter beta, U+0392 */
54: {TEXT("Ccedil"), 199, TEXT(' ')}, /* latin capital letter C with cedilla, U+00C7 ISOlat1 */
55: {TEXT("Chi"), 935, TEXT(' ')}, /* greek capital letter chi, U+03A7 */
56: {TEXT("Dagger"), 8225, TEXT(' ')}, /* double dagger, U+2021 ISOpub */
57: {TEXT("Delta"), 916, TEXT(' ')}, /* greek capital letter delta, U+0394 ISOgrk3 */
58: {TEXT("ETH"), 208, TEXT(' ')}, /* latin capital letter ETH, U+00D0 ISOlat1 */
59: {TEXT("Eacute"), 201, TEXT(' ')}, /* latin capital letter E with acute, U+00C9 ISOlat1 */
60: {TEXT("Ecirc"), 202, TEXT(' ')}, /* latin capital letter E with circumflex, U+00CA ISOlat1 */
61: {TEXT("Egrave"), 200, TEXT(' ')}, /* latin capital letter E with grave, U+00C8 ISOlat1 */
62: {TEXT("Epsilon"), 917, TEXT(' ')}, /* greek capital letter epsilon, U+0395 */
63: {TEXT("Eta"), 919, TEXT(' ')}, /* greek capital letter eta, U+0397 */
64: {TEXT("Euml"), 203, TEXT(' ')}, /* latin capital letter E with diaeresis, U+00CB ISOlat1 */
65: {TEXT("Gamma"), 915, TEXT(' ')}, /* greek capital letter gamma, U+0393 ISOgrk3 */
66: {TEXT("Iacute"), 205, TEXT(' ')}, /* latin capital letter I with acute, U+00CD ISOlat1 */
67: {TEXT("Icirc"), 206, TEXT(' ')}, /* latin capital letter I with circumflex, U+00CE ISOlat1 */
68: {TEXT("Igrave"), 204, TEXT(' ')}, /* latin capital letter I with grave, U+00CC ISOlat1 */
69: {TEXT("Iota"), 921, TEXT(' ')}, /* greek capital letter iota, U+0399 */
70: {TEXT("Iuml"), 207, TEXT(' ')}, /* latin capital letter I with diaeresis, U+00CF ISOlat1 */
71: {TEXT("Kappa"), 922, TEXT(' ')}, /* greek capital letter kappa, U+039A */
72: {TEXT("Lambda"), 923, TEXT(' ')}, /* greek capital letter lambda, U+039B ISOgrk3 */
73: {TEXT("Mu"), 924, TEXT(' ')}, /* greek capital letter mu, U+039C */
74: {TEXT("Ntilde"), 209, TEXT(' ')}, /* latin capital letter N with tilde, U+00D1 ISOlat1 */
75: {TEXT("Nu"), 925, TEXT(' ')}, /* greek capital letter nu, U+039D */
76: {TEXT("OElig"), 338, TEXT(' ')}, /* latin capital ligature OE, U+0152 ISOlat2 */
77: {TEXT("Oacute"), 211, TEXT(' ')}, /* latin capital letter O with acute, U+00D3 ISOlat1 */
78: {TEXT("Ocirc"), 212, TEXT(' ')}, /* latin capital letter O with circumflex, U+00D4 ISOlat1 */
79: {TEXT("Ograve"), 210, TEXT(' ')}, /* latin capital letter O with grave, U+00D2 ISOlat1 */
80: {TEXT("Omega"), 937, TEXT(' ')}, /* greek capital letter omega, U+03A9 ISOgrk3 */
81: {TEXT("Omicron"), 927, TEXT(' ')}, /* greek capital letter omicron, U+039F */
82: {TEXT("Oslash"), 216, TEXT(' ')}, /* latin capital letter O with stroke = */
83: /* latin capital letter O slash, U+00D8 ISOlat1 */
84: {TEXT("Otilde"), 213, TEXT(' ')}, /* latin capital letter O with tilde, U+00D5 ISOlat1 */
85: {TEXT("Ouml"), 214, TEXT(' ')}, /* latin capital letter O with diaeresis, U+00D6 ISOlat1 */
86: {TEXT("Phi"), 934, TEXT(' ')}, /* greek capital letter phi, U+03A6 ISOgrk3 */
87: {TEXT("Pi"), 928, TEXT(' ')}, /* greek capital letter pi, U+03A0 ISOgrk3 */
88: {TEXT("Prime"), 8243, TEXT(' ')}, /* double prime = seconds = inches, U+2033 ISOtech */
89: {TEXT("Psi"), 936, TEXT(' ')}, /* greek capital letter psi, U+03A8 ISOgrk3 */
90: {TEXT("Rho"), 929, TEXT(' ')}, /* greek capital letter rho, U+03A1 */
91: {TEXT("Scaron"), 352, TEXT(' ')}, /* latin capital letter S with caron, U+0160 ISOlat2 */
92: {TEXT("Sigma"), 931, TEXT(' ')}, /* greek capital letter sigma, U+03A3 ISOgrk3 */
93: {TEXT("THORN"), 222, TEXT(' ')}, /* latin capital letter THORN, U+00DE ISOlat1 */
94: {TEXT("Tau"), 932, TEXT(' ')}, /* greek capital letter tau, U+03A4 */
95: {TEXT("Theta"), 920, TEXT(' ')}, /* greek capital letter theta, U+0398 ISOgrk3 */
96: {TEXT("Uacute"), 218, TEXT(' ')}, /* latin capital letter U with acute, U+00DA ISOlat1 */
97: {TEXT("Ucirc"), 219, TEXT(' ')}, /* latin capital letter U with circumflex, U+00DB ISOlat1 */
98: {TEXT("Ugrave"), 217, TEXT(' ')}, /* latin capital letter U with grave, U+00D9 ISOlat1 */
99: {TEXT("Upsilon"), 933, TEXT(' ')}, /* greek capital letter upsilon, U+03A5 ISOgrk3 */
100: {TEXT("Uuml"), 220, TEXT(' ')}, /* latin capital letter U with diaeresis, U+00DC ISOlat1 */
101: {TEXT("Xi"), 926, TEXT(' ')}, /* greek capital letter xi, U+039E ISOgrk3 */
102: {TEXT("Yacute"), 221, TEXT(' ')}, /* latin capital letter Y with acute, U+00DD ISOlat1 */
103: {TEXT("Yuml"), 376, TEXT(' ')}, /* latin capital letter Y with diaeresis, U+0178 ISOlat2 */
104: {TEXT("Zeta"), 918, TEXT(' ')}, /* greek capital letter zeta, U+0396 */
105: {TEXT("aacute"), 225, TEXT(' ')}, /* latin small letter a with acute, U+00E1 ISOlat1 */
106: {TEXT("acirc"), 226, TEXT(' ')}, /* latin small letter a with circumflex, U+00E2 ISOlat1 */
107: {TEXT("acute"), 180, TEXT(' ')}, /* acute accent = spacing acute, U+00B4 ISOdia */
108: {TEXT("aelig"), 230, TEXT(' ')}, /* latin small letter ae = */
109: /* latin small ligature ae, U+00E6 ISOlat1 */
110: {TEXT("agrave"), 224, TEXT(' ')}, /* latin small letter a with grave = */
111: /* latin small letter a grave, U+00E0 ISOlat1 */
112: {TEXT("alefsym"), 8501, TEXT(' ')}, /* alef symbol = first transfinite cardinal, U+2135 NEW */
113: {TEXT("alpha"), 945, TEXT(' ')}, /* greek small letter alpha, U+03B1 ISOgrk3 */
114: {TEXT("amp"), 38, TEXT(' ')}, /* ampersand, U+0026 ISOnum */
115: {TEXT("and"), 8743, TEXT(' ')}, /* logical and = wedge, U+2227 ISOtech */
116: {TEXT("ang"), 8736, TEXT(' ')}, /* angle, U+2220 ISOamso */
117: {TEXT("aring"), 229, TEXT(' ')}, /* latin small letter a with ring above = */
118: /* latin small letter a ring, U+00E5 ISOlat1 */
119: {TEXT("asymp"), 8776, TEXT(' ')}, /* almost equal to = asymptotic to, U+2248 ISOamsr */
120: {TEXT("atilde"), 227, TEXT(' ')}, /* latin small letter a with tilde, U+00E3 ISOlat1 */
121: {TEXT("auml"), 228, TEXT(' ')}, /* latin small letter a with diaeresis, U+00E4 ISOlat1 */
122: {TEXT("bdquo"), 8222}, /* double low-9 quotation mark, U+201E NEW */
123: {TEXT("beta"), 946, TEXT(' ')}, /* greek small letter beta, U+03B2 ISOgrk3 */
124: {TEXT("brvbar"), 166, TEXT(' ')}, /* broken bar = broken vertical bar, U+00A6 ISOnum */
125: {TEXT("bull"), 8226, TEXT(' ')}, /* bullet = black small circle, U+2022 ISOpub */
126: {TEXT("cap"), 8745, TEXT(' ')}, /* intersection = cap, U+2229 ISOtech */
127: {TEXT("ccedil"), 231, TEXT(' ')}, /* latin small letter c with cedilla, U+00E7 ISOlat1 */
128: {TEXT("cedil"), 184, TEXT(' ')}, /* cedilla = spacing cedilla, U+00B8 ISOdia */
129: {TEXT("cent"), 162, TEXT(' ')}, /* cent sign, U+00A2 ISOnum */
130: {TEXT("chi"), 967, TEXT(' ')}, /* greek small letter chi, U+03C7 ISOgrk3 */
131: {TEXT("circ"), 710, TEXT(' ')}, /* modifier letter circumflex accent, U+02C6 ISOpub */
132: {TEXT("clubs"), 9827, TEXT(' ')}, /* black club suit = shamrock, U+2663 ISOpub */
133: {TEXT("cong"), 8773, TEXT(' ')}, /* approximately equal to, U+2245 ISOtech */
134: {TEXT("copy"), 169, TEXT(' ')}, /* copyright sign, U+00A9 ISOnum */
135: {TEXT("crarr"), 8629, TEXT(' ')}, /* downwards arrow with corner leftwards = */
136: /* carriage return, U+21B5 NEW */
137: {TEXT("cup"), 8746, TEXT(' ')}, /* union = cup, U+222A ISOtech */
138: {TEXT("curren"), 164, TEXT(' ')}, /* currency sign, U+00A4 ISOnum */
139: {TEXT("dArr"), 8659, TEXT(' ')}, /* downwards double arrow, U+21D3 ISOamsa */
140: {TEXT("dagger"), 8224, TEXT(' ')}, /* dagger, U+2020 ISOpub */
141: {TEXT("darr"), 8595, TEXT(' ')}, /* downwards arrow, U+2193 ISOnum */
142: {TEXT("deg"), 176, TEXT(' ')}, /* degree sign, U+00B0 ISOnum */
143: {TEXT("delta"), 948, TEXT(' ')}, /* greek small letter delta, U+03B4 ISOgrk3 */
144: {TEXT("diams"), 9830, TEXT(' ')}, /* black diamond suit, U+2666 ISOpub */
145: {TEXT("divide"), 247, TEXT(' ')}, /* division sign, U+00F7 ISOnum */
146: {TEXT("eacute"), 233, TEXT(' ')}, /* latin small letter e with acute, U+00E9 ISOlat1 */
147: {TEXT("ecirc"), 234, TEXT(' ')}, /* latin small letter e with circumflex, U+00EA ISOlat1 */
148: {TEXT("egrave"), 232, TEXT(' ')}, /* latin small letter e with grave, U+00E8 ISOlat1 */
149: {TEXT("empty"), 8709, TEXT(' ')}, /* empty set = null set = diameter, U+2205 ISOamso */
150: {TEXT("emsp"), 8195, TEXT(' ')}, /* em space, U+2003 ISOpub */
151: {TEXT("ensp"), 8194, TEXT(' ')}, /* en space, U+2002 ISOpub */
152: {TEXT("epsilon"), 949, TEXT(' ')}, /* greek small letter epsilon, U+03B5 ISOgrk3 */
153: {TEXT("equiv"), 8801, TEXT(' ')}, /* identical to, U+2261 ISOtech */
154: {TEXT("eta"), 951, TEXT(' ')}, /* greek small letter eta, U+03B7 ISOgrk3 */
155: {TEXT("eth"), 240, TEXT(' ')}, /* latin small letter eth, U+00F0 ISOlat1 */
156: {TEXT("euml"), 235, TEXT(' ')}, /* latin small letter e with diaeresis, U+00EB ISOlat1 */
157: {TEXT("euro"), 8364, TEXT(' ')}, /* euro sign, U+20AC NEW */
158: {TEXT("exist"), 8707, TEXT(' ')}, /* there exists, U+2203 ISOtech */
159: {TEXT("fnof"), 402, TEXT(' ')}, /* latin small f with hook = function = */
160: /* florin, U+0192 ISOtech */
161: {TEXT("forall"), 8704, TEXT(' ')}, /* for all, U+2200 ISOtech */
162: {TEXT("frac12"), 189, TEXT(' ')}, /* vulgar fraction one half = */
163: /*fraction one half, U+00BD ISOnum */
164: {TEXT("frac14"), 188, TEXT(' ')}, /* vulgar fraction one quarter = */
165: /* fraction one quarter, U+00BC ISOnum */
166: {TEXT("frac34"), 190, TEXT(' ')}, /* vulgar fraction three quarters = */
167: /* fraction three quarters, U+00BE ISOnum */
168: {TEXT("frasl"), 8260, TEXT(' ')}, /* fraction slash, U+2044 NEW */
169: {TEXT("gamma"), 947, TEXT(' ')}, /* greek small letter gamma, U+03B3 ISOgrk3 */
170: {TEXT("ge"), 8805, TEXT(' ')}, /* greater-than or equal to, U+2265 ISOtech */
171: {TEXT("gt"), 62, TEXT(' ')}, /* greater-than sign, U+003E ISOnum */
172: {TEXT("hArr"), 8660, TEXT(' ')}, /* left right double arrow, U+21D4 ISOamsa */
173: {TEXT("harr"), 8596, TEXT(' ')}, /* left right arrow, U+2194 ISOamsa */
174: {TEXT("hearts"), 9829, TEXT(' ')}, /* black heart suit = valentine, U+2665 ISOpub */
175: {TEXT("hellip"), 8230, TEXT(' ')}, /* horizontal ellipsis = three dot leader, U+2026 ISOpub */
176: {TEXT("hyphen"), 173, TEXT(' ')}, /* hyphen = discretionary hyphen, U+00AD ISOnum */
177: {TEXT("iacute"), 237, TEXT(' ')}, /* latin small letter i with acute, U+00ED ISOlat1 */
178: {TEXT("icirc"), 238, TEXT(' ')}, /* latin small letter i with circumflex, U+00EE ISOlat1 */
179: {TEXT("iexcl"), 161, TEXT(' ')}, /* inverted exclamation mark, U+00A1 ISOnum */
180: {TEXT("igrave"), 236, TEXT(' ')}, /* latin small letter i with grave, U+00EC ISOlat1 */
181: {TEXT("image"), 8465, TEXT(' ')}, /* blackletter capital I = imaginary part, U+2111 ISOamso */
182: {TEXT("infin"), 8734, TEXT(' ')}, /* infinity, U+221E ISOtech */
183: {TEXT("int"), 8747, TEXT(' ')}, /* integral, U+222B ISOtech */
184: {TEXT("iota"), 953, TEXT(' ')}, /* greek small letter iota, U+03B9 ISOgrk3 */
185: {TEXT("iquest"), 191, TEXT(' ')}, /* inverted question mark = */
186: /* turned question mark, U+00BF ISOnum */
187: {TEXT("isin"), 8712, TEXT(' ')}, /* element of, U+2208 ISOtech */
188: {TEXT("iuml"), 239, TEXT(' ')}, /* latin small letter i with diaeresis, U+00EF ISOlat1 */
189: {TEXT("kappa"), 954, TEXT(' ')}, /* greek small letter kappa, U+03BA ISOgrk3 */
190: {TEXT("lArr"), 8656, TEXT(' ')}, /* leftwards double arrow, U+21D0 ISOtech */
191: {TEXT("lambda"), 955, TEXT(' ')}, /* greek small letter lambda, U+03BB ISOgrk3 */
192: {TEXT("lang"), 9001, TEXT(' ')}, /* left-pointing angle bracket = bra, U+2329 ISOtech */
193: {TEXT("laquo"), 171, TEXT(' ')}, /* left-pointing double angle quotation mark = */
194: /* left pointing guillemet, U+00AB ISOnum */
195: {TEXT("larr"), 8592, TEXT(' ')}, /* leftwards arrow, U+2190 ISOnum */
196: {TEXT("lceil"), 8968, TEXT(' ')}, /* left ceiling = apl upstile, U+2308 ISOamsc */
197: {TEXT("ldquo"), 8220, TEXT(' ')}, /* left double quotation mark, U+201C ISOnum */
198: {TEXT("le"), 8804, TEXT(' ')}, /* less-than or equal to, U+2264 ISOtech */
199: {TEXT("lfloor"), 8970, TEXT(' ')}, /* left floor = apl downstile, U+230A ISOamsc */
200: {TEXT("lowast"), 8727, TEXT(' ')}, /* asterisk operator, U+2217 ISOtech */
201: {TEXT("loz"), 9674, TEXT(' ')}, /* lozenge, U+25CA ISOpub */
202: {TEXT("lrm"), 8206, TEXT(' ')}, /* left-to-right mark, U+200E NEW RFC 2070 */
203: {TEXT("lsaquo"), 8249, TEXT(' ')}, /* single left-pointing angle quotation mark, */
204: /* U+2039 ISO proposed */
205: {TEXT("lsquo"), 8216, TEXT(' ')}, /* left single quotation mark, U+2018 ISOnum */
206: {TEXT("lt"), 60, TEXT(' ')}, /* less-than sign, U+003C ISOnum */
207: {TEXT("macr"), 175, TEXT(' ')}, /* macron = spacing macron = overline = APL overbar, */
208: /* U+00AF ISOdia */
209: {TEXT("mdash"), 8212, TEXT(' ')}, /* em dash, U+2014 ISOpub */
210: {TEXT("micro"), 181, TEXT(' ')}, /* micro sign, U+00B5 ISOnum */
211: {TEXT("middot"), 183, TEXT(' ')}, /* middle dot = Georgian comma = */
212: /* Greek middle dot, U+00B7 ISOnum */
213: {TEXT("minus"), 8722, TEXT(' ')}, /* minus sign, U+2212 ISOtech */
214: {TEXT("mu"), 956, TEXT(' ')}, /* greek small letter mu, U+03BC ISOgrk3 */
215: {TEXT("nabla"), 8711, TEXT(' ')}, /* nabla = backward difference, U+2207 ISOtech */
216: {TEXT("nbsp"), 160, TEXT(' ')}, /* no-break space = non-breaking space, U+00A0 ISOnum */
217: {TEXT("ndash"), 8211, TEXT(' ')}, /* en dash, U+2013 ISOpub */
218: {TEXT("ne"), 8800, TEXT(' ')}, /* not equal to, U+2260 ISOtech */
219: {TEXT("ni"), 8715, TEXT(' ')}, /* contains as member, U+220B ISOtech */
220: {TEXT("not"), 172, TEXT(' ')}, /* not sign, U+00AC ISOnum */
221: {TEXT("notin"), 8713, TEXT(' ')}, /* not an element of, U+2209 ISOtech */
222: {TEXT("nsub"), 8836, TEXT(' ')}, /* not a subset of, U+2284 ISOamsn */
223: {TEXT("ntilde"), 241, TEXT(' ')}, /* latin small letter n with tilde, U+00F1 ISOlat1 */
224: {TEXT("nu"), 957, TEXT(' ')}, /* greek small letter nu, U+03BD ISOgrk3 */
225: {TEXT("oacute"), 243, TEXT(' ')}, /* latin small letter o with acute, U+00F3 ISOlat1 */
226: {TEXT("ocirc"), 244, TEXT(' ')}, /* latin small letter o with circumflex, U+00F4 ISOlat1 */
227: {TEXT("oelig"), 339, TEXT(' ')}, /* latin small ligature oe, U+0153 ISOlat2 */
228: {TEXT("ograve"), 242, TEXT(' ')}, /* latin small letter o with grave, U+00F2 ISOlat1 */
229: {TEXT("oline"), 8254, TEXT(' ')}, /* overline = spacing overscore, U+203E NEW */
230: {TEXT("omega"), 969, TEXT(' ')}, /* greek small letter omega, U+03C9 ISOgrk3 */
231: {TEXT("omicron"), 959, TEXT(' ')}, /* greek small letter omicron, U+03BF NEW */
232: {TEXT("oplus"), 8853, TEXT(' ')}, /* circled plus = direct sum, U+2295 ISOamsb */
233: {TEXT("or"), 8744, TEXT(' ')}, /* logical or = vee, U+2228 ISOtech */
234: {TEXT("ordf"), 170, TEXT(' ')}, /* feminine ordinal indicator, U+00AA ISOnum */
235: {TEXT("ordm"), 186, TEXT(' ')}, /* masculine ordinal indicator, U+00BA ISOnum */
236: {TEXT("oslash"), 248, TEXT(' ')}, /* latin small letter o with stroke, = */
237: /* latin small letter o slash, U+00F8 ISOlat1 */
238: {TEXT("otilde"), 245, TEXT(' ')}, /* latin small letter o with tilde, U+00F5 ISOlat1 */
239: {TEXT("otimes"), 8855, TEXT(' ')}, /* circled times = vector product, U+2297 ISOamsb */
240: {TEXT("ouml"), 246, TEXT(' ')}, /* latin small letter o with diaeresis, U+00F6 ISOlat1 */
241: {TEXT("para"), 182, TEXT(' ')}, /* pilcrow sign = paragraph sign, U+00B6 ISOnum */
242: {TEXT("part"), 8706, TEXT(' ')}, /* partial differential, U+2202 ISOtech */
243: {TEXT("permil"), 8240, TEXT(' ')}, /* per mille sign, U+2030 ISOtech */
244: {TEXT("perp"), 8869, TEXT(' ')}, /* up tack = orthogonal to = perpendicular, U+22A5 ISOtech */
245: {TEXT("phi"), 966, TEXT(' ')}, /* greek small letter phi, U+03C6 ISOgrk3 */
246: {TEXT("pi"), 960, TEXT(' ')}, /* greek small letter pi, U+03C0 ISOgrk3 */
247: {TEXT("piv"), 982, TEXT(' ')}, /* greek pi symbol, U+03D6 ISOgrk3 */
248: {TEXT("plusmn"), 177, TEXT(' ')}, /* plus-minus sign = plus-or-minus sign, U+00B1 ISOnum */
249: {TEXT("pound"), 163, TEXT(' ')}, /* pound sign, U+00A3 ISOnum */
250: {TEXT("prime"), 8242, TEXT(' ')}, /* prime = minutes = feet, U+2032 ISOtech */
251: {TEXT("prod"), 8719, TEXT(' ')}, /* n-ary product = product sign, U+220F ISOamsb */
252: {TEXT("prop"), 8733, TEXT(' ')}, /* proportional to, U+221D ISOtech */
253: {TEXT("psi"), 968, TEXT(' ')}, /* greek small letter psi, U+03C8 ISOgrk3 */
254: {TEXT("quot"), 34, TEXT(' ')}, /* quotation mark = APL quote, U+0022 ISOnum */
255: {TEXT("rArr"), 8658, TEXT(' ')}, /* rightwards double arrow, U+21D2 ISOtech */
256: {TEXT("radic"), 8730, TEXT(' ')}, /* square root = radical sign, U+221A ISOtech */
257: {TEXT("rang"), 9002, TEXT(' ')}, /* right-pointing angle bracket = ket, U+232A ISOtech */
258: {TEXT("raquo"), 187, TEXT(' ')}, /* right-pointing double angle quotation mark = */
259: /* right pointing guillemet, U+00BB ISOnum */
260: {TEXT("rarr"), 8594, TEXT(' ')}, /* rightwards arrow, U+2192 ISOnum */
261: {TEXT("rceil"), 8969, TEXT(' ')}, /* right ceiling, U+2309 ISOamsc */
262: {TEXT("rdquo"), 8221, TEXT(' ')}, /* right double quotation mark, U+201D ISOnum */
263: {TEXT("real"), 8476, TEXT(' ')}, /* blackletter capital R = real part symbol, U+211C ISOamso */
264: {TEXT("reg"), 174, TEXT(' ')}, /* registered sign = registered trade mark sign, */
265: /* U+00AE ISOnum */
266: {TEXT("rfloor"), 8971, TEXT(' ')}, /* right floor, U+230B ISOamsc */
267: {TEXT("rho"), 961, TEXT(' ')}, /* greek small letter rho, U+03C1 ISOgrk3 */
268: {TEXT("rlm"), 8207, TEXT(' ')}, /* right-to-left mark, U+200F NEW RFC 2070 */
269: {TEXT("rsaquo"), 8250, TEXT(' ')}, /* single right-pointing angle quotation mark, */
270: /* U+203A ISO proposed */
271: {TEXT("rsquo"), 8217, TEXT(' ')}, /* right single quotation mark, U+2019 ISOnum */
272: {TEXT("sbquo"), 8218, TEXT(' ')}, /* single low-9 quotation mark, U+201A NEW */
273: {TEXT("scaron"), 353, TEXT(' ')}, /* latin small letter s with caron, U+0161 ISOlat2 */
274: {TEXT("sdot"), 8901, TEXT(' ')}, /* dot operator, U+22C5 ISOamsb */
275: {TEXT("sect"), 167, TEXT(' ')}, /* section sign, U+00A7 ISOnum */
276: {TEXT("shy"), 173, TEXT(' ')}, /* soft hyphen = discretionary hyphen, U+00AD ISOnum */
277: {TEXT("sigma"), 963, TEXT(' ')}, /* greek small letter sigma, U+03C3 ISOgrk3 */
278: {TEXT("sigmaf"), 962, TEXT(' ')}, /* greek small letter final sigma, U+03C2 ISOgrk3 */
279: {TEXT("sim"), 8764, TEXT(' ')}, /* tilde operator = varies with = similar to, U+223C ISOtech */
280: {TEXT("spades"), 9824, TEXT(' ')}, /* black spade suit, U+2660 ISOpub */
281: {TEXT("sub"), 8834, TEXT(' ')}, /* subset of, U+2282 ISOtech */
282: {TEXT("sube"), 8838, TEXT(' ')}, /* subset of or equal to, U+2286 ISOtech */
283: {TEXT("sum"), 8721, TEXT(' ')}, /* n-ary sumation, U+2211 ISOamsb */
284: {TEXT("sup"), 8835, TEXT(' ')}, /* superset of, U+2283 ISOtech */
285: {TEXT("sup1"), 185, TEXT(' ')}, /* superscript one = superscript digit one, U+00B9 ISOnum */
286: {TEXT("sup2"), 178, TEXT(' ')}, /* superscript two = superscript digit two = squared, */
287: /* U+00B2 ISOnum */
288: {TEXT("sup3"), 179, TEXT(' ')}, /* superscript three = superscript digit three = cubed, */
289: /* U+00B3 ISOnum */
290: {TEXT("supe"), 8839, TEXT(' ')}, /* superset of or equal to, U+2287 ISOtech */
291: {TEXT("szlig"), 223, TEXT(' ')}, /* latin small letter sharp s = ess-zed, U+00DF ISOlat1 */
292: {TEXT("tau"), 964, TEXT(' ')}, /* greek small letter tau, U+03C4 ISOgrk3 */
293: {TEXT("there4"), 8756, TEXT(' ')}, /* therefore, U+2234 ISOtech */
294: {TEXT("theta"), 952, TEXT(' ')}, /* greek small letter theta, U+03B8 ISOgrk3 */
295: {TEXT("thetasym"), 977, TEXT(' ')}, /* greek small letter theta symbol, U+03D1 NEW */
296: {TEXT("thinsp"), 8201, TEXT(' ')}, /* thin space, U+2009 ISOpub */
297: {TEXT("thorn"), 254, TEXT(' ')}, /* latin small letter thorn with, U+00FE ISOlat1 */
298: {TEXT("tilde"), 732, TEXT(' ')}, /* small tilde, U+02DC ISOdia */
299: {TEXT("times"), 215, TEXT(' ')}, /* multiplication sign, U+00D7 ISOnum */
300: {TEXT("trade"), 8482, TEXT(' ')}, /* trade mark sign, U+2122 ISOnum */
301: {TEXT("uArr"), 8657, TEXT(' ')}, /* upwards double arrow, U+21D1 ISOamsa */
302: {TEXT("uacute"), 250, TEXT(' ')}, /* latin small letter u with acute, U+00FA ISOlat1 */
303: {TEXT("uarr"), 8593, TEXT(' ')}, /* upwards arrow, U+2191 ISOnum*/
304: {TEXT("ucirc"), 251, TEXT(' ')}, /* latin small letter u with circumflex, U+00FB ISOlat1 */
305: {TEXT("ugrave"), 249, TEXT(' ')}, /* latin small letter u with grave, U+00F9 ISOlat1 */
306: {TEXT("uml"), 168, TEXT(' ')}, /* diaeresis = spacing diaeresis, U+00A8 ISOdia */
307: {TEXT("upsih"), 978, TEXT(' ')}, /* greek upsilon with hook symbol, U+03D2 NEW */
308: {TEXT("upsilon"), 965, TEXT(' ')}, /* greek small letter upsilon, U+03C5 ISOgrk3 */
309: {TEXT("uuml"), 252, TEXT(' ')}, /* latin small letter u with diaeresis, U+00FC ISOlat1 */
310: {TEXT("weierp"), 8472, TEXT(' ')}, /* script capital P = power set = Weierstrass p, */
311: /* U+2118 ISOamso */
312: {TEXT("xi"), 958, TEXT(' ')}, /* greek small letter xi, U+03BE ISOgrk3 */
313: {TEXT("yacute"), 253, TEXT(' ')}, /* latin small letter y with acute, U+00FD ISOlat1 */
314: {TEXT("yen"), 165, TEXT(' ')}, /* yen sign = yuan sign, U+00A5 ISOnum */
315: {TEXT("yuml"), 255, TEXT(' ')}, /* latin small letter y with diaeresis, U+00FF ISOlat1 */
316: {TEXT("zeta"), 950, TEXT(' ')}, /* greek small letter zeta, U+03B6 ISOgrk3 */
317: {TEXT("zwj"), 8205, TEXT(' ')}, /* zero width joiner, U+200D NEW RFC 2070 */
318: {TEXT("zwnj"), 8204, TEXT(' ')}, /* zero width non-joiner, U+200C NEW RFC 2070 */
319: {TEXT("zzzz"), 0, TEXT(' ')} /* this last entry is required */
1.1 cvs 320: };
321:
1.14 cvs 322: /* tables defined in fetchHTMLname.c */
323: extern AttributeMapping XhtmlAttributeMappingTable[];
1.21 cvs 324:
1.14 cvs 325: /* Mapping table of HTML attribute values */
326: AttrValueMapping XhtmlAttrValueMappingTable[] =
1.1 cvs 327: {
328: {HTML_ATTR_dir, TEXT("ltr"), HTML_ATTR_dir_VAL_ltr},
329: {HTML_ATTR_dir, TEXT("rtl"), HTML_ATTR_dir_VAL_rtl},
330:
331: {HTML_ATTR_TextAlign, TEXT("left"), HTML_ATTR_TextAlign_VAL_left_},
332: {HTML_ATTR_TextAlign, TEXT("center"), HTML_ATTR_TextAlign_VAL_center_},
333: {HTML_ATTR_TextAlign, TEXT("right"), HTML_ATTR_TextAlign_VAL_right_},
334: {HTML_ATTR_TextAlign, TEXT("justify"), HTML_ATTR_TextAlign_VAL_justify_},
335:
336: {HTML_ATTR_Align, TEXT("left"), HTML_ATTR_Align_VAL_left_},
337: {HTML_ATTR_Align, TEXT("center"), HTML_ATTR_Align_VAL_center_},
338: {HTML_ATTR_Align, TEXT("right"), HTML_ATTR_Align_VAL_right_},
339:
340: {HTML_ATTR_LAlign, TEXT("top"), HTML_ATTR_LAlign_VAL_Top_},
341: {HTML_ATTR_LAlign, TEXT("bottom"), HTML_ATTR_LAlign_VAL_Bottom_},
342: {HTML_ATTR_LAlign, TEXT("left"), HTML_ATTR_LAlign_VAL_Left_},
343: {HTML_ATTR_LAlign, TEXT("right"), HTML_ATTR_LAlign_VAL_Right_},
344:
345: {HTML_ATTR_Clear, TEXT("left"), HTML_ATTR_Clear_VAL_Left_},
346: {HTML_ATTR_Clear, TEXT("right"), HTML_ATTR_Clear_VAL_Right_},
347: {HTML_ATTR_Clear, TEXT("all"), HTML_ATTR_Clear_VAL_All_},
348: {HTML_ATTR_Clear, TEXT("none"), HTML_ATTR_Clear_VAL_None_},
349:
350: {HTML_ATTR_NumberStyle, TEXT("1"), HTML_ATTR_NumberStyle_VAL_Arabic_},
351: {HTML_ATTR_NumberStyle, TEXT("a"), HTML_ATTR_NumberStyle_VAL_LowerAlpha},
352: {HTML_ATTR_NumberStyle, TEXT("A"), HTML_ATTR_NumberStyle_VAL_UpperAlpha},
353: {HTML_ATTR_NumberStyle, TEXT("i"), HTML_ATTR_NumberStyle_VAL_LowerRoman},
354: {HTML_ATTR_NumberStyle, TEXT("I"), HTML_ATTR_NumberStyle_VAL_UpperRoman},
355:
356: {HTML_ATTR_BulletStyle, TEXT("disc"), HTML_ATTR_BulletStyle_VAL_disc},
357: {HTML_ATTR_BulletStyle, TEXT("square"), HTML_ATTR_BulletStyle_VAL_square},
358: {HTML_ATTR_BulletStyle, TEXT("circle"), HTML_ATTR_BulletStyle_VAL_circle},
359:
360: {HTML_ATTR_ItemStyle, TEXT("1"), HTML_ATTR_ItemStyle_VAL_Arabic_},
361: {HTML_ATTR_ItemStyle, TEXT("a"), HTML_ATTR_ItemStyle_VAL_LowerAlpha},
362: {HTML_ATTR_ItemStyle, TEXT("A"), HTML_ATTR_ItemStyle_VAL_UpperAlpha},
363: {HTML_ATTR_ItemStyle, TEXT("i"), HTML_ATTR_ItemStyle_VAL_LowerRoman},
364: {HTML_ATTR_ItemStyle, TEXT("I"), HTML_ATTR_ItemStyle_VAL_UpperRoman},
365: {HTML_ATTR_ItemStyle, TEXT("disc"), HTML_ATTR_ItemStyle_VAL_disc},
366: {HTML_ATTR_ItemStyle, TEXT("square"), HTML_ATTR_ItemStyle_VAL_square},
367: {HTML_ATTR_ItemStyle, TEXT("circle"), HTML_ATTR_ItemStyle_VAL_circle},
368:
369: {HTML_ATTR_Button_type, TEXT("button"), HTML_ATTR_Button_type_VAL_button},
370: {HTML_ATTR_Button_type, TEXT("submit"), HTML_ATTR_Button_type_VAL_submit},
371: {HTML_ATTR_Button_type, TEXT("reset"), HTML_ATTR_Button_type_VAL_reset},
372:
373: {HTML_ATTR_frame, TEXT("void"), HTML_ATTR_frame_VAL_void},
374: {HTML_ATTR_frame, TEXT("above"), HTML_ATTR_frame_VAL_above},
375: {HTML_ATTR_frame, TEXT("below"), HTML_ATTR_frame_VAL_below},
376: {HTML_ATTR_frame, TEXT("hsides"), HTML_ATTR_frame_VAL_hsides},
377: {HTML_ATTR_frame, TEXT("lhs"), HTML_ATTR_frame_VAL_lhs},
378: {HTML_ATTR_frame, TEXT("rhs"), HTML_ATTR_frame_VAL_rhs},
379: {HTML_ATTR_frame, TEXT("vsides"), HTML_ATTR_frame_VAL_vsides},
380: {HTML_ATTR_frame, TEXT("box"), HTML_ATTR_frame_VAL_box},
381: {HTML_ATTR_frame, TEXT("border"), HTML_ATTR_frame_VAL_border},
382:
383: {HTML_ATTR_frameborder, TEXT("0"), HTML_ATTR_frameborder_VAL_Border0},
384: {HTML_ATTR_frameborder, TEXT("1"), HTML_ATTR_frameborder_VAL_Border1},
385:
386: {HTML_ATTR_scrolling, TEXT("yes"), HTML_ATTR_scrolling_VAL_Yes_},
387: {HTML_ATTR_scrolling, TEXT("no"), HTML_ATTR_scrolling_VAL_No_},
388: {HTML_ATTR_scrolling, TEXT("auto"), HTML_ATTR_scrolling_VAL_auto_},
389:
390: {HTML_ATTR_rules_, TEXT("none"), HTML_ATTR_rules__VAL_none_},
391: {HTML_ATTR_rules_, TEXT("groups"), HTML_ATTR_rules__VAL_groups},
392: {HTML_ATTR_rules_, TEXT("rows"), HTML_ATTR_rules__VAL_rows},
393: {HTML_ATTR_rules_, TEXT("cols"), HTML_ATTR_rules__VAL_cols},
394: {HTML_ATTR_rules_, TEXT("all"), HTML_ATTR_rules__VAL_all},
395:
396: {HTML_ATTR_Cell_align, TEXT("left"), HTML_ATTR_Cell_align_VAL_Cell_left},
397: {HTML_ATTR_Cell_align, TEXT("center"), HTML_ATTR_Cell_align_VAL_Cell_center},
398: {HTML_ATTR_Cell_align, TEXT("right"), HTML_ATTR_Cell_align_VAL_Cell_right},
399: {HTML_ATTR_Cell_align, TEXT("justify"), HTML_ATTR_Cell_align_VAL_Cell_justify},
400: {HTML_ATTR_Cell_align, TEXT("char"), HTML_ATTR_Cell_align_VAL_Cell_char},
401:
402: {HTML_ATTR_Alignment, TEXT("top"), HTML_ATTR_Alignment_VAL_Top_},
403: {HTML_ATTR_Alignment, TEXT("middle"), HTML_ATTR_Alignment_VAL_Middle_},
404: {HTML_ATTR_Alignment, TEXT("bottom"), HTML_ATTR_Alignment_VAL_Bottom_},
405: {HTML_ATTR_Alignment, TEXT("left"), HTML_ATTR_Alignment_VAL_Left_},
406: {HTML_ATTR_Alignment, TEXT("right"), HTML_ATTR_Alignment_VAL_Right_},
407:
408: {HTML_ATTR_METHOD, TEXT("get"), HTML_ATTR_METHOD_VAL_Get_},
409: {HTML_ATTR_METHOD, TEXT("post"), HTML_ATTR_METHOD_VAL_Post_},
410:
411: {HTML_ATTR_Position, TEXT("top"), HTML_ATTR_Position_VAL_Position_top},
412: {HTML_ATTR_Position, TEXT("bottom"), HTML_ATTR_Position_VAL_Position_bottom},
413: {HTML_ATTR_Position, TEXT("left"), HTML_ATTR_Position_VAL_Position_left},
414: {HTML_ATTR_Position, TEXT("right"), HTML_ATTR_Position_VAL_Position_right},
415:
416: {HTML_ATTR_Row_valign, TEXT("top"), HTML_ATTR_Row_valign_VAL_Row_top},
417: {HTML_ATTR_Row_valign, TEXT("middle"), HTML_ATTR_Row_valign_VAL_Row_middle},
418: {HTML_ATTR_Row_valign, TEXT("bottom"), HTML_ATTR_Row_valign_VAL_Row_bottom},
419: {HTML_ATTR_Row_valign, TEXT("baseline"), HTML_ATTR_Row_valign_VAL_Row_baseline},
420:
421: {HTML_ATTR_Cell_valign, TEXT("top"), HTML_ATTR_Cell_valign_VAL_Cell_top},
422: {HTML_ATTR_Cell_valign, TEXT("middle"), HTML_ATTR_Cell_valign_VAL_Cell_middle},
423: {HTML_ATTR_Cell_valign, TEXT("bottom"), HTML_ATTR_Cell_valign_VAL_Cell_bottom},
424: {HTML_ATTR_Cell_valign, TEXT("baseline"), HTML_ATTR_Cell_valign_VAL_Cell_baseline},
425:
426: {HTML_ATTR_shape, TEXT("rect"), HTML_ATTR_shape_VAL_rectangle},
427: {HTML_ATTR_shape, TEXT("circle"), HTML_ATTR_shape_VAL_circle},
428: {HTML_ATTR_shape, TEXT("poly"), HTML_ATTR_shape_VAL_polygon},
429:
430: {HTML_ATTR_valuetype, TEXT("data"), HTML_ATTR_valuetype_VAL_data_},
431: {HTML_ATTR_valuetype, TEXT("ref"), HTML_ATTR_valuetype_VAL_ref},
432: {HTML_ATTR_valuetype, TEXT("object"), HTML_ATTR_valuetype_VAL_object_},
433:
434: /* HTML attribute TYPE generates a Thot element */
435: {DummyAttribute, TEXT("button"), HTML_EL_Button_Input},
436: {DummyAttribute, TEXT("checkbox"), HTML_EL_Checkbox_Input},
437: {DummyAttribute, TEXT("file"), HTML_EL_File_Input},
438: {DummyAttribute, TEXT("hidden"), HTML_EL_Hidden_Input},
439: {DummyAttribute, TEXT("image"), HTML_EL_PICTURE_UNIT},
440: {DummyAttribute, TEXT("password"), HTML_EL_Password_Input},
441: {DummyAttribute, TEXT("radio"), HTML_EL_Radio_Input},
442: {DummyAttribute, TEXT("reset"), HTML_EL_Reset_Input},
443: {DummyAttribute, TEXT("submit"), HTML_EL_Submit_Input},
444: {DummyAttribute, TEXT("text"), HTML_EL_Text_Input},
445:
446: /* The following declarations allow the parser to accept boolean attributes */
447: /* written "checked=CHECKED"), for instance */
448: {HTML_ATTR_ISMAP, TEXT("ismap"), HTML_ATTR_ISMAP_VAL_Yes_},
449: {HTML_ATTR_nohref, TEXT("nohref"), HTML_ATTR_nohref_VAL_Yes_},
450: {HTML_ATTR_COMPACT, TEXT("compact"), HTML_ATTR_COMPACT_VAL_Yes_},
451: {HTML_ATTR_Multiple, TEXT("multiple"), HTML_ATTR_Multiple_VAL_Yes_},
452: {HTML_ATTR_Selected, TEXT("selected"), HTML_ATTR_Selected_VAL_Yes_},
453: {HTML_ATTR_Checked, TEXT("checked"), HTML_ATTR_Checked_VAL_Yes_},
454: {HTML_ATTR_No_wrap, TEXT("nowrap"), HTML_ATTR_No_wrap_VAL_no_wrap},
455: {HTML_ATTR_NoShade, TEXT("noshade"), HTML_ATTR_NoShade_VAL_NoShade_},
456: {HTML_ATTR_declare, TEXT("declare"), HTML_ATTR_declare_VAL_Yes_},
457: {HTML_ATTR_defer, TEXT("defer"), HTML_ATTR_defer_VAL_Yes_},
458: {HTML_ATTR_disabled, TEXT("disabled"), HTML_ATTR_disabled_VAL_Yes_},
459: {HTML_ATTR_readonly, TEXT("readonly"), HTML_ATTR_readonly_VAL_Yes_},
460: {HTML_ATTR_no_resize, TEXT("noresize"), HTML_ATTR_no_resize_VAL_Yes_},
1.21 cvs 461:
462: /* XML attribute xml:space */
463: {HTML_ATTR_xml_space, TEXT("default"), HTML_ATTR_xml_space_VAL_xml_space_default},
464: {HTML_ATTR_xml_space, TEXT("preserve"), HTML_ATTR_xml_space_VAL_xml_space_preserve},
465:
1.1 cvs 466: {0, TEXT(""), 0} /* Last entry. Mandatory */
467: };
1.6 cvs 468:
469:
470: /*----------------------------------------------------------------------
1.15 cvs 471: ParseCharset:
1.6 cvs 472: Parses the element HTTP-EQUIV and looks for the charset value.
473: ----------------------------------------------------------------------*/
474: #ifdef __STDC__
1.15 cvs 475: void ParseCharset (Element el, Document doc)
1.6 cvs 476: #else /* !__STDC__ */
1.15 cvs 477: void ParseCharset (el, doc)
1.6 cvs 478: Element el;
479: Document doc;
480: #endif /* !__STDC__ */
481: {
1.15 cvs 482: AttributeType attrType;
483: Attribute attr;
484: SSchema docSSchema;
485: CHARSET charset;
1.6 cvs 486: CHAR_T *text, *text2, *ptrText, *str;
487: CHAR_T charsetname[MAX_LENGTH];
1.15 cvs 488: int length;
1.6 cvs 489: int pos, index = 0;
490:
1.15 cvs 491: charset = TtaGetDocumentCharset (doc);
492: if (charset != UNDEFINED_CHARSET)
493: /* the charset was already defined by the http header */
494: return;
1.6 cvs 495:
496: docSSchema = TtaGetDocumentSSchema (doc);
497: attrType.AttrSSchema = docSSchema;
498: attrType.AttrTypeNum = HTML_ATTR_http_equiv;
499: attr = TtaGetAttribute (el, attrType);
500: if (attr != NULL)
501: {
502: /* There is a HTTP-EQUIV attribute */
503: length = TtaGetTextAttributeLength (attr);
504: if (length > 0)
505: {
506: text = TtaAllocString (length + 1);
507: TtaGiveTextAttributeValue (attr, text, &length);
508: if (!ustrcasecmp (text, TEXT("content-type")))
509: {
510: attrType.AttrTypeNum = HTML_ATTR_meta_content;
511: attr = TtaGetAttribute (el, attrType);
512: if (attr != NULL)
513: {
514: length = TtaGetTextAttributeLength (attr);
515: if (length > 0)
516: {
517: text2 = TtaAllocString (length + 1);
518: TtaGiveTextAttributeValue (attr, text2, &length);
519: ptrText = text2;
520: while (*ptrText)
521: {
522: *ptrText = utolower (*ptrText);
523: ptrText++;
524: }
525:
526: str = ustrstr (text2, TEXT("charset="));
527: if (str)
528: {
529: pos = str - text2 + 8;
530: while (text2[pos] != WC_SPACE &&
531: text2[pos] != WC_TAB && text2[pos] != WC_EOS)
532: charsetname[index++] = text2[pos++];
533: charsetname[index] = WC_EOS;
1.15 cvs 534: charset = TtaGetCharset (charsetname);
535: if (charset != UNDEFINED_CHARSET)
536: TtaSetDocumentCharset (doc, charset);
1.6 cvs 537: }
538: TtaFreeMemory (text2);
539: }
540: }
541: }
542: TtaFreeMemory (text);
543: }
544: }
545: }
546:
1.23 cvs 547: /*---------------------------------------------------------------------------
548: XhtmlMapEntity
549: Search that entity in the entity table and return the corresponding value.
550: ---------------------------------------------------------------------------*/
551: #ifdef __STDC__
1.25 cvs 552: void XhtmlMapEntity (STRING entityName, int *entityValue, STRING alphabet)
1.23 cvs 553: #else
554: void XhtmlMapEntity (entityName, entityValue, alphabet)
555: STRING entityName;
556: int *entityValue;
557: STRING alphabet;
558: #endif
559: {
560: int i;
561: ThotBool found;
562:
563: found = FALSE;
564: for (i = 0; XhtmlEntityTable[i].charCode >= 0 && ! found; i++)
565: found = !ustrcmp (XhtmlEntityTable[i].charName, entityName);
566:
567: if (found)
568: {
569: /* entity found */
570: i--;
571: *entityValue = XhtmlEntityTable[i].charCode;
572: *alphabet = XhtmlEntityTable[i].charAlphabet;
573: }
574: else
575: *alphabet = WC_EOS;
576: }
577:
578: /*----------------------------------------------------------------------
579: PutNonISOlatin1Char
580: Put a Unicode character in the input buffer.
581: ----------------------------------------------------------------------*/
582: #ifdef __STDC__
583: static void PutNonISOlatin1Char (int code,
584: STRING prefix,
585: STRING entityName,
586: ParserData *context)
587: #else
588: static void PutNonISOlatin1Char (code, prefix, entityName, context)
589: int code;
590: STRING prefix;
591: STRING entityName;
592: ParserData *context;
593: #endif
594: {
595: Language lang, l;
596: ElementType elType;
597: Element elText;
598: AttributeType attrType;
599: Attribute attr;
600: CHAR_T buffer[MaxEntityLength+10];
601:
602: /* create a new text leaf */
603: elType.ElSSchema = TtaGetDocumentSSchema (context->doc);
604: elType.ElTypeNum = HTML_EL_TEXT_UNIT;
605: elText = TtaNewElement (context->doc, elType);
606: XmlSetElemLineNumber (elText);
1.26 ! cvs 607: InsertXmlElement (&elText);
1.23 cvs 608: context->lastElement = elText;
609: context->lastElementClosed = TRUE;
610:
611: /* try to find a fallback character */
612: l = context->language;
613: GetFallbackCharacter (code, buffer, &lang);
614:
615: /* put that fallback character in the new text leaf */
616: TtaSetTextContent (elText, buffer, lang, context->doc);
617: context->language = l;
618:
619: /* make that text leaf read-only */
620: TtaSetAccessRight (elText, ReadOnly, context->doc);
621:
622: /* associate an attribute EntityName with the new text leaf */
623: attrType.AttrSSchema = TtaGetDocumentSSchema (context->doc);
624: attrType.AttrTypeNum = HTML_ATTR_EntityName;
625: attr = TtaNewAttribute (attrType);
626: TtaAttachAttribute (elText, attr, context->doc);
1.25 cvs 627: buffer[0] = '&';
628: ustrcpy (&buffer[1], prefix);
1.23 cvs 629: ustrcat (buffer, entityName);
1.25 cvs 630: ustrcat (buffer, TEXT(";"));
1.23 cvs 631: TtaSetAttributeText (attr, buffer, elText, context->doc);
632: context->mergeText = FALSE;
633: }
634:
635: /*----------------------------------------------------------------------
636: XhtmlEntityCreated
637: A XTHML entity has been created by the XML parser.
638: ----------------------------------------------------------------------*/
639: #ifdef __STDC__
640: void XhtmlEntityCreated (int entityValue, Language lang,
641: STRING entityName, ParserData *context)
642: #else
643: void XhtmlEntityCreated (entityValue, lang, entityName, context)
644: int entityValue;
645: Language lang;
646: STRING entityName;
647: ParserData *context;
648: #endif
649: {
650: CHAR_T buffer[2];
651:
652: if (lang < 0)
653: PutInXmlElement (entityName);
654: else
655: {
656: if (entityValue < 255)
657: {
658: buffer[0] = ((UCHAR_T) entityValue);
659: buffer[1] = WC_EOS;
660: PutInXmlElement (buffer);
661: }
662: else
663: PutNonISOlatin1Char (entityValue, TEXT(""), entityName, context);
664: }
665: }
666:
1.6 cvs 667: /*----------------------------------------------------------------------
668: XhtmlElementComplete
1.20 cvs 669: Complete Xhtml elements.
1.6 cvs 670: Check its attributes and its contents.
671: ----------------------------------------------------------------------*/
672: #ifdef __STDC__
1.15 cvs 673: void XhtmlElementComplete (Element el, Document doc, int *error)
1.6 cvs 674: #else
1.15 cvs 675: void XhtmlElementComplete (el, doc, error)
676: Element el;
677: Document doc;
678: int *error;
1.6 cvs 679: #endif
680: {
1.23 cvs 681: ElementType elType, newElType, childType;
682: Element constElem, child, desc, leaf, prev, next, last,
683: elFrames, lastFrame, lastChild;
684: Attribute attr;
685: AttributeType attrType;
686: Language lang;
687: STRING text;
688: CHAR_T lastChar[2];
689: STRING name1;
690: int length;
691: SSchema docSSchema;
1.6 cvs 692:
693: *error = 0;
694: docSSchema = TtaGetDocumentSSchema (doc);
695:
696: elType = TtaGetElementType (el);
697: /* is this a block-level element in a character-level element? */
1.19 cvs 698: if (!IsXMLElementInline (el) &&
1.24 cvs 699: elType.ElTypeNum != HTML_EL_Comment_ &&
700: elType.ElTypeNum != HTML_EL_XMLPI)
1.6 cvs 701: BlockInCharLevelElem (el);
702:
703: newElType.ElSSchema = elType.ElSSchema;
704: switch (elType.ElTypeNum)
705: {
706: case HTML_EL_Object: /* it's an object */
707: /* create Object_Content */
708: child = TtaGetFirstChild (el);
709: if (child != NULL)
710: elType = TtaGetElementType (child);
711:
712: /* is it the PICTURE element ? */
713: if (child == NULL || elType.ElTypeNum != HTML_EL_PICTURE_UNIT)
714: {
715: desc = child;
716: /* create the PICTURE element */
717: elType.ElTypeNum = HTML_EL_PICTURE_UNIT;
718: child = TtaNewTree (doc, elType, "");
719: if (desc == NULL)
720: TtaInsertFirstChild (&child, el, doc);
721: else
722: TtaInsertSibling (child, desc, TRUE, doc);
723: }
724:
725: /* copy attribute data into SRC attribute of Object_Image */
726: attrType.AttrSSchema = docSSchema;
727: attrType.AttrTypeNum = HTML_ATTR_data;
728: attr = TtaGetAttribute (el, attrType);
729: if (attr != NULL)
730: {
731: length = TtaGetTextAttributeLength (attr);
732: if (length > 0)
733: {
734: name1 = TtaAllocString (length + 1);
735: TtaGiveTextAttributeValue (attr, name1, &length);
736: attrType.AttrTypeNum = HTML_ATTR_SRC;
737: attr = TtaGetAttribute (child, attrType);
738: if (attr == NULL)
739: {
740: attr = TtaNewAttribute (attrType);
741: TtaAttachAttribute (child, attr, doc);
742: }
743: TtaSetAttributeText (attr, name1, child, doc);
744: TtaFreeMemory (name1);
745: }
746: }
747:
748: /* is the Object_Content element already created ? */
749: desc = child;
750: TtaNextSibling(&desc);
751: if (desc != NULL)
752: elType = TtaGetElementType (desc);
753:
754: /* is it the Object_Content element ? */
755: if (desc == NULL || elType.ElTypeNum != HTML_EL_Object_Content)
756: {
757: /* create Object_Content */
758: elType.ElTypeNum = HTML_EL_Object_Content;
759: desc = TtaNewTree (doc, elType, "");
760: TtaInsertSibling (desc, child, FALSE, doc);
761: /* move previous existing children into Object_Content */
762: child = TtaGetLastChild(el);
763: while (child != desc)
764: {
765: TtaRemoveTree (child, doc);
766: TtaInsertFirstChild (&child, desc, doc);
767: child = TtaGetLastChild(el);
768: }
769: }
770: break;
771:
772: case HTML_EL_Unnumbered_List:
773: case HTML_EL_Numbered_List:
774: case HTML_EL_Menu:
775: case HTML_EL_Directory:
776: /* It's a List element. It should only have List_Item children.
777: If it has List element chidren, move these List elements
778: within their previous List_Item sibling. This is to fix
779: a bug in document generated by Mozilla. */
780: prev = NULL;
781: next = NULL;
782: child = TtaGetFirstChild (el);
783: while (child != NULL)
784: {
785: next = child;
786: TtaNextSibling (&next);
787: elType = TtaGetElementType (child);
788: if (elType.ElTypeNum == HTML_EL_Unnumbered_List ||
789: elType.ElTypeNum == HTML_EL_Numbered_List ||
790: elType.ElTypeNum == HTML_EL_Menu ||
791: elType.ElTypeNum == HTML_EL_Directory)
792: /* this list element is a child of another list element */
793: if (prev)
794: {
795: elType = TtaGetElementType (prev);
796: if (elType.ElTypeNum == HTML_EL_List_Item)
797: {
798: /* get the last child of the previous List_Item */
799: desc = TtaGetFirstChild (prev);
800: last = NULL;
801: while (desc)
802: {
803: last = desc;
804: TtaNextSibling (&desc);
805: }
806: /* move the list element after the last child of the
807: previous List_Item */
808: TtaRemoveTree (child, doc);
809: if (last)
810: TtaInsertSibling (child, last, FALSE, doc);
811: else
812: TtaInsertFirstChild (&child, prev, doc);
813: child = prev;
814: }
815: }
816: prev = child;
817: child = next;
818: }
819: break;
820:
821: case HTML_EL_FRAMESET:
822: /* The FRAMESET element is now complete. Gather all its FRAMESET
823: and FRAME children and wrap them up in a Frames element */
824: elFrames = NULL; lastFrame = NULL;
825: lastChild = NULL;
826: child = TtaGetFirstChild (el);
827: while (child != NULL)
828: {
829: next = child;
830: TtaNextSibling (&next);
831: elType = TtaGetElementType (child);
832: if (elType.ElTypeNum == HTML_EL_FRAMESET ||
833: elType.ElTypeNum == HTML_EL_FRAME ||
834: elType.ElTypeNum == HTML_EL_Comment_)
835: {
836: /* create the Frames element if it does not exist */
837: if (elFrames == NULL)
838: {
839: newElType.ElSSchema = docSSchema;
840: newElType.ElTypeNum = HTML_EL_Frames;
841: elFrames = TtaNewElement (doc, newElType);
842: XmlSetElemLineNumber (elFrames);
843: TtaInsertSibling (elFrames, child, TRUE, doc);
844: }
845: /* move the element as the last child of the Frames element */
846: TtaRemoveTree (child, doc);
847: if (lastFrame == NULL)
848: TtaInsertFirstChild (&child, elFrames, doc);
849: else
850: TtaInsertSibling (child, lastFrame, FALSE, doc);
851: lastFrame = child;
852: }
853: child = next;
854: }
855: break;
856:
857: case HTML_EL_Input: /* it's an INPUT without any TYPE attribute */
858: /* Create a child of type Text_Input */
859: elType.ElTypeNum = HTML_EL_Text_Input;
860: child = TtaNewTree (doc, elType, "");
861: XmlSetElemLineNumber (child);
862: TtaInsertFirstChild (&child, el, doc);
863: /* now, process it like a Text_Input element */
864:
865: case HTML_EL_Text_Input:
866: case HTML_EL_Password_Input:
867: case HTML_EL_File_Input:
868: /* get element Inserted_Text */
869: child = TtaGetFirstChild (el);
870: if (child != NULL)
871: {
872: attrType.AttrSSchema = docSSchema;
873: attrType.AttrTypeNum = HTML_ATTR_Value_;
874: attr = TtaGetAttribute (el, attrType);
875: if (attr != NULL)
876: {
877: /* copy the value of attribute "value" into the first text
878: leaf of element */
879: length = TtaGetTextAttributeLength (attr);
880: if (length > 0)
881: {
882: /* get the text leaf */
883: leaf = TtaGetFirstChild (child);
884: if (leaf != NULL)
885: {
886: childType = TtaGetElementType (leaf);
887: if (childType.ElTypeNum == HTML_EL_TEXT_UNIT)
888: {
889: /* copy attribute value into the text leaf */
890: text = TtaAllocString (length + 1);
891: TtaGiveTextAttributeValue (attr, text, &length);
892: TtaSetTextContent (leaf, text,
893: TtaGetDefaultLanguage (), doc);
894: TtaFreeMemory (text);
895: }
896: }
897: }
898: }
899: }
900: break;
901:
902: case HTML_EL_META:
1.15 cvs 903: ParseCharset (el, doc);
1.6 cvs 904: break;
905:
906: case HTML_EL_STYLE_: /* it's a STYLE element */
1.8 cvs 907: case HTML_EL_SCRIPT: /* it's a SCRIPT element */
1.6 cvs 908: case HTML_EL_Preformatted: /* it's a PRE */
909: /* if the last line of the Preformatted is empty, remove it */
910: leaf = XmlLastLeafInElement (el);
911: if (leaf != NULL)
912: {
913: elType = TtaGetElementType (leaf);
914: if (elType.ElTypeNum == HTML_EL_TEXT_UNIT)
915: /* the last leaf is a TEXT element */
916: {
917: length = TtaGetTextLength (leaf);
918: if (length > 0)
919: {
920: TtaGiveSubString (leaf, lastChar, length, 1);
921: if (lastChar[0] == EOL)
922: /* last character is new line, delete it */
923: {
924: if (length == 1)
925: /* empty TEXT element */
926: TtaDeleteTree (leaf, doc);
927: else
928: /* remove the last character */
929: TtaDeleteTextContent (leaf, length, 1, doc);
930: }
931: }
932: }
933: }
934: if (IsParsingCSS ())
935: {
936: text = GetStyleContents (el);
937: if (text)
938: {
939: ReadCSSRules (doc, NULL, text, FALSE);
940: TtaFreeMemory (text);
941: }
942: SetParsingCSS (FALSE);
943: }
944: /* and continue as if it were a Preformatted or a Script */
945: break;
946:
947: case HTML_EL_Text_Area: /* it's a Text_Area */
948: SetParsingTextArea (FALSE);
949: child = TtaGetFirstChild (el);
950: if (child == NULL)
951: /* it's an empty Text_Area */
952: /* insert a Inserted_Text element in the element */
953: {
954: newElType.ElTypeNum = HTML_EL_Inserted_Text;
955: child = TtaNewTree (doc, newElType, "");
956: TtaInsertFirstChild (&child, el, doc);
957: }
958: else
959: {
960: /* save the text into Default_Value attribute */
961: attrType.AttrSSchema = docSSchema;
962: attrType.AttrTypeNum = HTML_ATTR_Default_Value;
963: if (TtaGetAttribute (el, attrType) == NULL)
964: /* attribute Default_Value is missing */
965: {
966: attr = TtaNewAttribute (attrType);
967: TtaAttachAttribute (el, attr, doc);
968: desc = TtaGetFirstChild (child);
969: length = TtaGetTextLength (desc) + 1;
970: text = TtaAllocString (length);
971: TtaGiveTextContent (desc, text, &length, &lang);
972: TtaSetAttributeText (attr, text, el, doc);
973: TtaFreeMemory (text);
974: }
975: }
976: /* insert a Frame element */
977: newElType.ElTypeNum = HTML_EL_Frame;
978: constElem = TtaNewTree (doc, newElType, "");
979: TtaInsertSibling (constElem, child, FALSE, doc);
980: break;
981:
982: case HTML_EL_Radio_Input:
983: case HTML_EL_Checkbox_Input:
984: /* put an attribute Checked if it is missing */
985: attrType.AttrSSchema = docSSchema;
986: attrType.AttrTypeNum = HTML_ATTR_Checked;
987: if (TtaGetAttribute (el, attrType) == NULL)
988: /* attribute Checked is missing */
989: {
990: attr = TtaNewAttribute (attrType);
991: TtaAttachAttribute (el, attr, doc);
992: TtaSetAttributeValue (attr, HTML_ATTR_Checked_VAL_No_, el, doc);
993: }
994: break;
995:
996: case HTML_EL_Option_Menu:
997: /* Check that at least one option has a SELECTED attribute */
998: OnlyOneOptionSelected (el, doc, TRUE);
999: break;
1000:
1001: case HTML_EL_PICTURE_UNIT:
1002: break;
1003:
1004: case HTML_EL_LINK:
1005: CheckCSSLink (el, doc, docSSchema);
1006: break;
1007:
1008: case HTML_EL_Data_cell:
1009: case HTML_EL_Heading_cell:
1010: /* insert a pseudo paragraph into empty cells */
1011: child = TtaGetFirstChild (el);
1012: if (child == NULL)
1013: {
1014: elType.ElTypeNum = HTML_EL_Pseudo_paragraph;
1015: child = TtaNewTree (doc, elType, "");
1016: if (child != NULL)
1017: TtaInsertFirstChild (&child, el, doc);
1018: }
1019:
1020: /* detect whether we're parsing a whole table or just a cell */
1021: if (IsWithinTable ())
1022: NewCell (el, doc, FALSE);
1023: break;
1024:
1025: case HTML_EL_Table:
1026: CheckTable (el, doc);
1027: SubWithinTable ();
1028: break;
1029:
1030: case HTML_EL_TITLE:
1031: /* show the TITLE in the main window */
1032: UpdateTitle (el, doc);
1033: break;
1034:
1035: default:
1036: break;
1037: }
1038: }
1.1 cvs 1039:
1040: /*----------------------------------------------------------------------
1.16 cvs 1041: MapHTMLAttributeValue
1.2 cvs 1042: Search in the Attribute Value Mapping Table the entry for the attribute
1043: ThotAtt and its value AttrVal. Returns the corresponding Thot value.
1.1 cvs 1044: ----------------------------------------------------------------------*/
1045: #ifdef __STDC__
1.16 cvs 1046: void MapHTMLAttributeValue (CHAR_T* AttrVal,
1.20 cvs 1047: AttributeType attrType,
1048: int* value)
1.1 cvs 1049: #else
1.23 cvs 1050: void MapHTMLAttributeValue (AttrVal, attrType, value)
1.11 cvs 1051: CHAR_T* AttrVal;
1052: AttributeType attrType;
1053: int* value;
1.1 cvs 1054: #endif
1055: {
1.2 cvs 1056: int i;
1.1 cvs 1057:
1.2 cvs 1058: *value = 0;
1.1 cvs 1059: i = 0;
1060:
1.2 cvs 1061: while (XhtmlAttrValueMappingTable[i].ThotAttr != attrType.AttrTypeNum &&
1062: XhtmlAttrValueMappingTable[i].ThotAttr != 0)
1063: i++;
1064:
1065: if (XhtmlAttrValueMappingTable[i].ThotAttr == attrType.AttrTypeNum)
1066: {
1067: do
1.18 cvs 1068: if (!ustrcmp (XhtmlAttrValueMappingTable[i].XMLattrValue, AttrVal))
1.2 cvs 1069: *value = XhtmlAttrValueMappingTable[i].ThotAttrValue;
1.1 cvs 1070: else
1.2 cvs 1071: i++;
1072: while (*value <= 0 &&
1073: XhtmlAttrValueMappingTable[i].ThotAttr != 0);
1.1 cvs 1074: }
1075: }
Webmaster