Annotation of Amaya/amaya/XHTMLbuilder.c, revision 1.18
1.1 cvs 1: /*
2: *
3: * (c) COPYRIGHT MIT and INRIA, 1996.
4: * Please first read the full copyright statement in file COPYRIGHT.
5: *
6: */
7:
8: /*
9: *
10: * html2thot parses a HTML file and builds the corresponding abstract tree
11: * for a Thot document of type HTML.
12: *
13: * Author: V. Quint
14: * L. Carcone
15: */
16:
17: #define THOT_EXPORT extern
18: #include "amaya.h"
19: #include "css.h"
1.2 cvs 20: #include "parser.h"
21: #include "HTML.h"
22:
1.13 cvs 23: #include "css_f.h"
24: #include "fetchXMLname_f.h"
1.1 cvs 25: #include "HTMLactions_f.h"
26: #include "HTMLedit_f.h"
27: #include "HTMLimage_f.h"
28: #include "HTMLtable_f.h"
29: #include "HTMLimage_f.h"
30: #include "UIcss_f.h"
1.13 cvs 31: #include "styleparser_f.h"
1.2 cvs 32: #include "XHTMLbuilder_f.h"
1.13 cvs 33: #include "Xml2thot_f.h"
1.1 cvs 34:
35: /* maximum length of a Thot structure schema name */
36: #define MAX_SS_NAME_LENGTH 32
37:
1.2 cvs 38: XhtmlEntity XhtmlEntityTable[] =
1.1 cvs 39: {
40: /* This table MUST be in alphabetical order */
41: {TEXT("AElig"), 198}, /* latin capital letter AE = */
42: /* latin capital ligature AE, U+00C6 ISOlat1 */
43: {TEXT("Aacute"), 193}, /* latin capital letter A with acute, U+00C1 ISOlat1 */
44: {TEXT("Acirc"), 194}, /* latin capital letter A with circumflex, U+00C2 ISOlat1 */
45: {TEXT("Agrave"), 192}, /* latin capital letter A with grave = */
46: /* latin capital letter A grave, U+00C0 ISOlat1 */
47: {TEXT("Alpha"), 913}, /* greek capital letter alpha, U+0391 */
48: {TEXT("Aring"), 197}, /* latin capital letter A with ring above = */
49: /* latin capital letter A ring, U+00C5 ISOlat1 */
50: {TEXT("Atilde"), 195}, /* latin capital letter A with tilde, U+00C3 ISOlat1 */
51: {TEXT("Auml"), 196}, /* latin capital letter A with diaeresis, U+00C4 ISOlat1 */
52: {TEXT("Beta"), 914}, /* greek capital letter beta, U+0392 */
53: {TEXT("Ccedil"), 199}, /* latin capital letter C with cedilla, U+00C7 ISOlat1 */
54: {TEXT("Chi"), 935}, /* greek capital letter chi, U+03A7 */
55: {TEXT("Dagger"), 8225}, /* double dagger, U+2021 ISOpub */
56: {TEXT("Delta"), 916}, /* greek capital letter delta, U+0394 ISOgrk3 */
57: {TEXT("ETH"), 208}, /* latin capital letter ETH, U+00D0 ISOlat1 */
58: {TEXT("Eacute"), 201}, /* latin capital letter E with acute, U+00C9 ISOlat1 */
59: {TEXT("Ecirc"), 202}, /* latin capital letter E with circumflex, U+00CA ISOlat1 */
60: {TEXT("Egrave"), 200}, /* latin capital letter E with grave, U+00C8 ISOlat1 */
61: {TEXT("Epsilon"), 917}, /* greek capital letter epsilon, U+0395 */
62: {TEXT("Eta"), 919}, /* greek capital letter eta, U+0397 */
63: {TEXT("Euml"), 203}, /* latin capital letter E with diaeresis, U+00CB ISOlat1 */
64: {TEXT("Gamma"), 915}, /* greek capital letter gamma, U+0393 ISOgrk3 */
65: {TEXT("Iacute"), 205}, /* latin capital letter I with acute, U+00CD ISOlat1 */
66: {TEXT("Icirc"), 206}, /* latin capital letter I with circumflex, U+00CE ISOlat1 */
67: {TEXT("Igrave"), 204}, /* latin capital letter I with grave, U+00CC ISOlat1 */
68: {TEXT("Iota"), 921}, /* greek capital letter iota, U+0399 */
69: {TEXT("Iuml"), 207}, /* latin capital letter I with diaeresis, U+00CF ISOlat1 */
70: {TEXT("Kappa"), 922}, /* greek capital letter kappa, U+039A */
71: {TEXT("Lambda"), 923}, /* greek capital letter lambda, U+039B ISOgrk3 */
72: {TEXT("Mu"), 924}, /* greek capital letter mu, U+039C */
73: {TEXT("Ntilde"), 209}, /* latin capital letter N with tilde, U+00D1 ISOlat1 */
74: {TEXT("Nu"), 925}, /* greek capital letter nu, U+039D */
75: {TEXT("OElig"), 338}, /* latin capital ligature OE, U+0152 ISOlat2 */
76: {TEXT("Oacute"), 211}, /* latin capital letter O with acute, U+00D3 ISOlat1 */
77: {TEXT("Ocirc"), 212}, /* latin capital letter O with circumflex, U+00D4 ISOlat1 */
78: {TEXT("Ograve"), 210}, /* latin capital letter O with grave, U+00D2 ISOlat1 */
79: {TEXT("Omega"), 937}, /* greek capital letter omega, U+03A9 ISOgrk3 */
80: {TEXT("Omicron"), 927}, /* greek capital letter omicron, U+039F */
81: {TEXT("Oslash"), 216}, /* latin capital letter O with stroke = */
82: /* latin capital letter O slash, U+00D8 ISOlat1 */
83: {TEXT("Otilde"), 213}, /* latin capital letter O with tilde, U+00D5 ISOlat1 */
84: {TEXT("Ouml"), 214}, /* latin capital letter O with diaeresis, U+00D6 ISOlat1 */
85: {TEXT("Phi"), 934}, /* greek capital letter phi, U+03A6 ISOgrk3 */
86: {TEXT("Pi"), 928}, /* greek capital letter pi, U+03A0 ISOgrk3 */
87: {TEXT("Prime"), 8243}, /* double prime = seconds = inches, U+2033 ISOtech */
88: {TEXT("Psi"), 936}, /* greek capital letter psi, U+03A8 ISOgrk3 */
89: {TEXT("Rho"), 929}, /* greek capital letter rho, U+03A1 */
90: {TEXT("Scaron"), 352}, /* latin capital letter S with caron, U+0160 ISOlat2 */
91: {TEXT("Sigma"), 931}, /* greek capital letter sigma, U+03A3 ISOgrk3 */
92: {TEXT("THORN"), 222}, /* latin capital letter THORN, U+00DE ISOlat1 */
93: {TEXT("Tau"), 932}, /* greek capital letter tau, U+03A4 */
94: {TEXT("Theta"), 920}, /* greek capital letter theta, U+0398 ISOgrk3 */
95: {TEXT("Uacute"), 218}, /* latin capital letter U with acute, U+00DA ISOlat1 */
96: {TEXT("Ucirc"), 219}, /* latin capital letter U with circumflex, U+00DB ISOlat1 */
97: {TEXT("Ugrave"), 217}, /* latin capital letter U with grave, U+00D9 ISOlat1 */
98: {TEXT("Upsilon"), 933}, /* greek capital letter upsilon, U+03A5 ISOgrk3 */
99: {TEXT("Uuml"), 220}, /* latin capital letter U with diaeresis, U+00DC ISOlat1 */
100: {TEXT("Xi"), 926}, /* greek capital letter xi, U+039E ISOgrk3 */
101: {TEXT("Yacute"), 221}, /* latin capital letter Y with acute, U+00DD ISOlat1 */
102: {TEXT("Yuml"), 376}, /* latin capital letter Y with diaeresis, U+0178 ISOlat2 */
103: {TEXT("Zeta"), 918}, /* greek capital letter zeta, U+0396 */
104: {TEXT("aacute"), 225}, /* latin small letter a with acute, U+00E1 ISOlat1 */
105: {TEXT("acirc"), 226}, /* latin small letter a with circumflex, U+00E2 ISOlat1 */
106: {TEXT("acute"), 180}, /* acute accent = spacing acute, U+00B4 ISOdia */
107: {TEXT("aelig"), 230}, /* latin small letter ae = */
108: /* latin small ligature ae, U+00E6 ISOlat1 */
109: {TEXT("agrave"), 224}, /* latin small letter a with grave = */
110: /* latin small letter a grave, U+00E0 ISOlat1 */
111: {TEXT("alefsym"), 8501},/* alef symbol = first transfinite cardinal, U+2135 NEW */
112: {TEXT("alpha"), 945}, /* greek small letter alpha, U+03B1 ISOgrk3 */
113: {TEXT("amp"), 38}, /* ampersand, U+0026 ISOnum */
114: {TEXT("and"), 8743}, /* logical and = wedge, U+2227 ISOtech */
115: {TEXT("ang"), 8736}, /* angle, U+2220 ISOamso */
116: {TEXT("aring"), 229}, /* latin small letter a with ring above = */
117: /* latin small letter a ring, U+00E5 ISOlat1 */
118: {TEXT("asymp"), 8776}, /* almost equal to = asymptotic to, U+2248 ISOamsr */
119: {TEXT("atilde"), 227}, /* latin small letter a with tilde, U+00E3 ISOlat1 */
120: {TEXT("auml"), 228}, /* latin small letter a with diaeresis, U+00E4 ISOlat1 */
121: {TEXT("bdquo"), 8222}, /* double low-9 quotation mark, U+201E NEW */
122: {TEXT("beta"), 946}, /* greek small letter beta, U+03B2 ISOgrk3 */
123: {TEXT("brvbar"), 166}, /* broken bar = broken vertical bar, U+00A6 ISOnum */
124: {TEXT("bull"), 8226}, /* bullet = black small circle, U+2022 ISOpub */
125: {TEXT("cap"), 8745}, /* intersection = cap, U+2229 ISOtech */
126: {TEXT("ccedil"), 231}, /* latin small letter c with cedilla, U+00E7 ISOlat1 */
127: {TEXT("cedil"), 184}, /* cedilla = spacing cedilla, U+00B8 ISOdia */
128: {TEXT("cent"), 162}, /* cent sign, U+00A2 ISOnum */
129: {TEXT("chi"), 967}, /* greek small letter chi, U+03C7 ISOgrk3 */
130: {TEXT("circ"), 710}, /* modifier letter circumflex accent, U+02C6 ISOpub */
131: {TEXT("clubs"), 9827}, /* black club suit = shamrock, U+2663 ISOpub */
132: {TEXT("cong"), 8773}, /* approximately equal to, U+2245 ISOtech */
133: {TEXT("copy"), 169}, /* copyright sign, U+00A9 ISOnum */
134: {TEXT("crarr"), 8629}, /* downwards arrow with corner leftwards = */
135: /* carriage return, U+21B5 NEW */
136: {TEXT("cup"), 8746}, /* union = cup, U+222A ISOtech */
137: {TEXT("curren"), 164}, /* currency sign, U+00A4 ISOnum */
138: {TEXT("dArr"), 8659}, /* downwards double arrow, U+21D3 ISOamsa */
139: {TEXT("dagger"), 8224}, /* dagger, U+2020 ISOpub */
140: {TEXT("darr"), 8595}, /* downwards arrow, U+2193 ISOnum */
141: {TEXT("deg"), 176}, /* degree sign, U+00B0 ISOnum */
142: {TEXT("delta"), 948}, /* greek small letter delta, U+03B4 ISOgrk3 */
143: {TEXT("diams"), 9830}, /* black diamond suit, U+2666 ISOpub */
144: {TEXT("divide"), 247}, /* division sign, U+00F7 ISOnum */
145: {TEXT("eacute"), 233}, /* latin small letter e with acute, U+00E9 ISOlat1 */
146: {TEXT("ecirc"), 234}, /* latin small letter e with circumflex, U+00EA ISOlat1 */
147: {TEXT("egrave"), 232}, /* latin small letter e with grave, U+00E8 ISOlat1 */
148: {TEXT("empty"), 8709}, /* empty set = null set = diameter, U+2205 ISOamso */
149: {TEXT("emsp"), 8195}, /* em space, U+2003 ISOpub */
150: {TEXT("ensp"), 8194}, /* en space, U+2002 ISOpub */
151: {TEXT("epsilon"), 949}, /* greek small letter epsilon, U+03B5 ISOgrk3 */
152: {TEXT("equiv"), 8801}, /* identical to, U+2261 ISOtech */
153: {TEXT("eta"), 951}, /* greek small letter eta, U+03B7 ISOgrk3 */
154: {TEXT("eth"), 240}, /* latin small letter eth, U+00F0 ISOlat1 */
155: {TEXT("euml"), 235}, /* latin small letter e with diaeresis, U+00EB ISOlat1 */
156: {TEXT("euro"), 8364}, /* euro sign, U+20AC NEW */
157: {TEXT("exist"), 8707}, /* there exists, U+2203 ISOtech */
158: {TEXT("fnof"), 402}, /* latin small f with hook = function = */
159: /* florin, U+0192 ISOtech */
160: {TEXT("forall"), 8704}, /* for all, U+2200 ISOtech */
161: {TEXT("frac12"), 189}, /* vulgar fraction one half = */
162: /*fraction one half, U+00BD ISOnum */
163: {TEXT("frac14"), 188}, /* vulgar fraction one quarter = */
164: /* fraction one quarter, U+00BC ISOnum */
165: {TEXT("frac34"), 190}, /* vulgar fraction three quarters = */
166: /* fraction three quarters, U+00BE ISOnum */
167: {TEXT("frasl"), 8260}, /* fraction slash, U+2044 NEW */
168: {TEXT("gamma"), 947}, /* greek small letter gamma, U+03B3 ISOgrk3 */
169: {TEXT("ge"), 8805}, /* greater-than or equal to, U+2265 ISOtech */
170: {TEXT("gt"), 62}, /* greater-than sign, U+003E ISOnum */
171: {TEXT("hArr"), 8660}, /* left right double arrow, U+21D4 ISOamsa */
172: {TEXT("harr"), 8596}, /* left right arrow, U+2194 ISOamsa */
173: {TEXT("hearts"), 9829}, /* black heart suit = valentine, U+2665 ISOpub */
174: {TEXT("hellip"), 8230}, /* horizontal ellipsis = three dot leader, U+2026 ISOpub */
175: {TEXT("hyphen"), 173}, /* hyphen = discretionary hyphen, U+00AD ISOnum */
176: {TEXT("iacute"), 237}, /* latin small letter i with acute, U+00ED ISOlat1 */
177: {TEXT("icirc"), 238}, /* latin small letter i with circumflex, U+00EE ISOlat1 */
178: {TEXT("iexcl"), 161}, /* inverted exclamation mark, U+00A1 ISOnum */
179: {TEXT("igrave"), 236}, /* latin small letter i with grave, U+00EC ISOlat1 */
180: {TEXT("image"), 8465}, /* blackletter capital I = imaginary part, U+2111 ISOamso */
181: {TEXT("infin"), 8734}, /* infinity, U+221E ISOtech */
182: {TEXT("int"), 8747}, /* integral, U+222B ISOtech */
183: {TEXT("iota"), 953}, /* greek small letter iota, U+03B9 ISOgrk3 */
184: {TEXT("iquest"), 191}, /* inverted question mark = */
185: /* turned question mark, U+00BF ISOnum */
186: {TEXT("isin"), 8712}, /* element of, U+2208 ISOtech */
187: {TEXT("iuml"), 239}, /* latin small letter i with diaeresis, U+00EF ISOlat1 */
188: {TEXT("kappa"), 954}, /* greek small letter kappa, U+03BA ISOgrk3 */
189: {TEXT("lArr"), 8656}, /* leftwards double arrow, U+21D0 ISOtech */
190: {TEXT("lambda"), 955}, /* greek small letter lambda, U+03BB ISOgrk3 */
191: {TEXT("lang"), 9001}, /* left-pointing angle bracket = bra, U+2329 ISOtech */
192: {TEXT("laquo"), 171}, /* left-pointing double angle quotation mark = */
193: /* left pointing guillemet, U+00AB ISOnum */
194: {TEXT("larr"), 8592}, /* leftwards arrow, U+2190 ISOnum */
195: {TEXT("lceil"), 8968}, /* left ceiling = apl upstile, U+2308 ISOamsc */
196: {TEXT("ldquo"), 8220}, /* left double quotation mark, U+201C ISOnum */
197: {TEXT("le"), 8804}, /* less-than or equal to, U+2264 ISOtech */
198: {TEXT("lfloor"), 8970}, /* left floor = apl downstile, U+230A ISOamsc */
199: {TEXT("lowast"), 8727}, /* asterisk operator, U+2217 ISOtech */
200: {TEXT("loz"), 9674}, /* lozenge, U+25CA ISOpub */
201: {TEXT("lrm"), 8206}, /* left-to-right mark, U+200E NEW RFC 2070 */
202: {TEXT("lsaquo"), 8249}, /* single left-pointing angle quotation mark, */
203: /* U+2039 ISO proposed */
204: {TEXT("lsquo"), 8216}, /* left single quotation mark, U+2018 ISOnum */
205: {TEXT("lt"), 60}, /* less-than sign, U+003C ISOnum */
206: {TEXT("macr"), 175}, /* macron = spacing macron = overline = APL overbar, */
207: /* U+00AF ISOdia */
208: {TEXT("mdash"), 8212}, /* em dash, U+2014 ISOpub */
209: {TEXT("micro"), 181}, /* micro sign, U+00B5 ISOnum */
210: {TEXT("middot"), 183}, /* middle dot = Georgian comma = */
211: /* Greek middle dot, U+00B7 ISOnum */
212: {TEXT("minus"), 8722}, /* minus sign, U+2212 ISOtech */
213: {TEXT("mu"), 956}, /* greek small letter mu, U+03BC ISOgrk3 */
214: {TEXT("nabla"), 8711}, /* nabla = backward difference, U+2207 ISOtech */
215: {TEXT("nbsp"), 160}, /* no-break space = non-breaking space, U+00A0 ISOnum */
216: {TEXT("ndash"), 8211}, /* en dash, U+2013 ISOpub */
217: {TEXT("ne"), 8800}, /* not equal to, U+2260 ISOtech */
218: {TEXT("ni"), 8715}, /* contains as member, U+220B ISOtech */
219: {TEXT("not"), 172}, /* not sign, U+00AC ISOnum */
220: {TEXT("notin"), 8713}, /* not an element of, U+2209 ISOtech */
221: {TEXT("nsub"), 8836}, /* not a subset of, U+2284 ISOamsn */
222: {TEXT("ntilde"), 241}, /* latin small letter n with tilde, U+00F1 ISOlat1 */
223: {TEXT("nu"), 957}, /* greek small letter nu, U+03BD ISOgrk3 */
224: {TEXT("oacute"), 243}, /* latin small letter o with acute, U+00F3 ISOlat1 */
225: {TEXT("ocirc"), 244}, /* latin small letter o with circumflex, U+00F4 ISOlat1 */
226: {TEXT("oelig"), 339}, /* latin small ligature oe, U+0153 ISOlat2 */
227: {TEXT("ograve"), 242}, /* latin small letter o with grave, U+00F2 ISOlat1 */
228: {TEXT("oline"), 8254}, /* overline = spacing overscore, U+203E NEW */
229: {TEXT("omega"), 969}, /* greek small letter omega, U+03C9 ISOgrk3 */
230: {TEXT("omicron"), 959}, /* greek small letter omicron, U+03BF NEW */
231: {TEXT("oplus"), 8853}, /* circled plus = direct sum, U+2295 ISOamsb */
232: {TEXT("or"), 8744}, /* logical or = vee, U+2228 ISOtech */
233: {TEXT("ordf"), 170}, /* feminine ordinal indicator, U+00AA ISOnum */
234: {TEXT("ordm"), 186}, /* masculine ordinal indicator, U+00BA ISOnum */
235: {TEXT("oslash"), 248}, /* latin small letter o with stroke, = */
236: /* latin small letter o slash, U+00F8 ISOlat1 */
237: {TEXT("otilde"), 245}, /* latin small letter o with tilde, U+00F5 ISOlat1 */
238: {TEXT("otimes"), 8855}, /* circled times = vector product, U+2297 ISOamsb */
239: {TEXT("ouml"), 246}, /* latin small letter o with diaeresis, U+00F6 ISOlat1 */
240: {TEXT("para"), 182}, /* pilcrow sign = paragraph sign, U+00B6 ISOnum */
241: {TEXT("part"), 8706}, /* partial differential, U+2202 ISOtech */
242: {TEXT("permil"), 8240}, /* per mille sign, U+2030 ISOtech */
243: {TEXT("perp"), 8869}, /* up tack = orthogonal to = perpendicular, U+22A5 ISOtech */
244: {TEXT("phi"), 966}, /* greek small letter phi, U+03C6 ISOgrk3 */
245: {TEXT("pi"), 960}, /* greek small letter pi, U+03C0 ISOgrk3 */
246: {TEXT("piv"), 982}, /* greek pi symbol, U+03D6 ISOgrk3 */
247: {TEXT("plusmn"), 177}, /* plus-minus sign = plus-or-minus sign, U+00B1 ISOnum */
248: {TEXT("pound"), 163}, /* pound sign, U+00A3 ISOnum */
249: {TEXT("prime"), 8242}, /* prime = minutes = feet, U+2032 ISOtech */
250: {TEXT("prod"), 8719}, /* n-ary product = product sign, U+220F ISOamsb */
251: {TEXT("prop"), 8733}, /* proportional to, U+221D ISOtech */
252: {TEXT("psi"), 968}, /* greek small letter psi, U+03C8 ISOgrk3 */
253: {TEXT("quot"), 34}, /* quotation mark = APL quote, U+0022 ISOnum */
254: {TEXT("rArr"), 8658}, /* rightwards double arrow, U+21D2 ISOtech */
255: {TEXT("radic"), 8730}, /* square root = radical sign, U+221A ISOtech */
256: {TEXT("rang"), 9002}, /* right-pointing angle bracket = ket, U+232A ISOtech */
257: {TEXT("raquo"), 187}, /* right-pointing double angle quotation mark = */
258: /* right pointing guillemet, U+00BB ISOnum */
259: {TEXT("rarr"), 8594}, /* rightwards arrow, U+2192 ISOnum */
260: {TEXT("rceil"), 8969}, /* right ceiling, U+2309 ISOamsc */
261: {TEXT("rdquo"), 8221}, /* right double quotation mark, U+201D ISOnum */
262: {TEXT("real"), 8476}, /* blackletter capital R = real part symbol, U+211C ISOamso */
263: {TEXT("reg"), 174}, /* registered sign = registered trade mark sign, */
264: /* U+00AE ISOnum */
265: {TEXT("rfloor"), 8971}, /* right floor, U+230B ISOamsc */
266: {TEXT("rho"), 961}, /* greek small letter rho, U+03C1 ISOgrk3 */
267: {TEXT("rlm"), 8207}, /* right-to-left mark, U+200F NEW RFC 2070 */
268: {TEXT("rsaquo"), 8250}, /* single right-pointing angle quotation mark, */
269: /* U+203A ISO proposed */
270: {TEXT("rsquo"), 8217}, /* right single quotation mark, U+2019 ISOnum */
271: {TEXT("sbquo"), 8218}, /* single low-9 quotation mark, U+201A NEW */
272: {TEXT("scaron"), 353}, /* latin small letter s with caron, U+0161 ISOlat2 */
273: {TEXT("sdot"), 8901}, /* dot operator, U+22C5 ISOamsb */
274: {TEXT("sect"), 167}, /* section sign, U+00A7 ISOnum */
275: {TEXT("shy"), 173}, /* soft hyphen = discretionary hyphen, U+00AD ISOnum */
276: {TEXT("sigma"), 963}, /* greek small letter sigma, U+03C3 ISOgrk3 */
277: {TEXT("sigmaf"), 962}, /* greek small letter final sigma, U+03C2 ISOgrk3 */
278: {TEXT("sim"), 8764}, /* tilde operator = varies with = similar to, U+223C ISOtech */
279: {TEXT("spades"), 9824}, /* black spade suit, U+2660 ISOpub */
280: {TEXT("sub"), 8834}, /* subset of, U+2282 ISOtech */
281: {TEXT("sube"), 8838}, /* subset of or equal to, U+2286 ISOtech */
282: {TEXT("sum"), 8721}, /* n-ary sumation, U+2211 ISOamsb */
283: {TEXT("sup"), 8835}, /* superset of, U+2283 ISOtech */
284: {TEXT("sup1"), 185}, /* superscript one = superscript digit one, U+00B9 ISOnum */
285: {TEXT("sup2"), 178}, /* superscript two = superscript digit two = squared, */
286: /* U+00B2 ISOnum */
287: {TEXT("sup3"), 179}, /* superscript three = superscript digit three = cubed, */
288: /* U+00B3 ISOnum */
289: {TEXT("supe"), 8839}, /* superset of or equal to, U+2287 ISOtech */
290: {TEXT("szlig"), 223}, /* latin small letter sharp s = ess-zed, U+00DF ISOlat1 */
291: {TEXT("tau"), 964}, /* greek small letter tau, U+03C4 ISOgrk3 */
292: {TEXT("there4"), 8756}, /* therefore, U+2234 ISOtech */
293: {TEXT("theta"), 952}, /* greek small letter theta, U+03B8 ISOgrk3 */
294: {TEXT("thetasym"), 977},/* greek small letter theta symbol, U+03D1 NEW */
295: {TEXT("thinsp"), 8201}, /* thin space, U+2009 ISOpub */
296: {TEXT("thorn"), 254}, /* latin small letter thorn with, U+00FE ISOlat1 */
297: {TEXT("tilde"), 732}, /* small tilde, U+02DC ISOdia */
298: {TEXT("times"), 215}, /* multiplication sign, U+00D7 ISOnum */
299: {TEXT("trade"), 8482}, /* trade mark sign, U+2122 ISOnum */
300: {TEXT("uArr"), 8657}, /* upwards double arrow, U+21D1 ISOamsa */
301: {TEXT("uacute"), 250}, /* latin small letter u with acute, U+00FA ISOlat1 */
302: {TEXT("uarr"), 8593}, /* upwards arrow, U+2191 ISOnum*/
303: {TEXT("ucirc"), 251}, /* latin small letter u with circumflex, U+00FB ISOlat1 */
304: {TEXT("ugrave"), 249}, /* latin small letter u with grave, U+00F9 ISOlat1 */
305: {TEXT("uml"), 168}, /* diaeresis = spacing diaeresis, U+00A8 ISOdia */
306: {TEXT("upsih"), 978}, /* greek upsilon with hook symbol, U+03D2 NEW */
307: {TEXT("upsilon"), 965}, /* greek small letter upsilon, U+03C5 ISOgrk3 */
308: {TEXT("uuml"), 252}, /* latin small letter u with diaeresis, U+00FC ISOlat1 */
309: {TEXT("weierp"), 8472}, /* script capital P = power set = Weierstrass p, */
310: /* U+2118 ISOamso */
311: {TEXT("xi"), 958}, /* greek small letter xi, U+03BE ISOgrk3 */
312: {TEXT("yacute"), 253}, /* latin small letter y with acute, U+00FD ISOlat1 */
313: {TEXT("yen"), 165}, /* yen sign = yuan sign, U+00A5 ISOnum */
314: {TEXT("yuml"), 255}, /* latin small letter y with diaeresis, U+00FF ISOlat1 */
315: {TEXT("zeta"), 950}, /* greek small letter zeta, U+03B6 ISOgrk3 */
316: {TEXT("zwj"), 8205}, /* zero width joiner, U+200D NEW RFC 2070 */
317: {TEXT("zwnj"), 8204}, /* zero width non-joiner, U+200C NEW RFC 2070 */
318: {TEXT("zzzz"), 0} /* this last entry is required */
319: };
320:
1.14 cvs 321: /* tables defined in fetchHTMLname.c */
322: extern AttributeMapping XhtmlAttributeMappingTable[];
323: /* Mapping table of HTML attribute values */
324: AttrValueMapping XhtmlAttrValueMappingTable[] =
1.1 cvs 325: {
326: {HTML_ATTR_dir, TEXT("ltr"), HTML_ATTR_dir_VAL_ltr},
327: {HTML_ATTR_dir, TEXT("rtl"), HTML_ATTR_dir_VAL_rtl},
328:
329: {HTML_ATTR_TextAlign, TEXT("left"), HTML_ATTR_TextAlign_VAL_left_},
330: {HTML_ATTR_TextAlign, TEXT("center"), HTML_ATTR_TextAlign_VAL_center_},
331: {HTML_ATTR_TextAlign, TEXT("right"), HTML_ATTR_TextAlign_VAL_right_},
332: {HTML_ATTR_TextAlign, TEXT("justify"), HTML_ATTR_TextAlign_VAL_justify_},
333:
334: {HTML_ATTR_Align, TEXT("left"), HTML_ATTR_Align_VAL_left_},
335: {HTML_ATTR_Align, TEXT("center"), HTML_ATTR_Align_VAL_center_},
336: {HTML_ATTR_Align, TEXT("right"), HTML_ATTR_Align_VAL_right_},
337:
338: {HTML_ATTR_LAlign, TEXT("top"), HTML_ATTR_LAlign_VAL_Top_},
339: {HTML_ATTR_LAlign, TEXT("bottom"), HTML_ATTR_LAlign_VAL_Bottom_},
340: {HTML_ATTR_LAlign, TEXT("left"), HTML_ATTR_LAlign_VAL_Left_},
341: {HTML_ATTR_LAlign, TEXT("right"), HTML_ATTR_LAlign_VAL_Right_},
342:
343: {HTML_ATTR_Clear, TEXT("left"), HTML_ATTR_Clear_VAL_Left_},
344: {HTML_ATTR_Clear, TEXT("right"), HTML_ATTR_Clear_VAL_Right_},
345: {HTML_ATTR_Clear, TEXT("all"), HTML_ATTR_Clear_VAL_All_},
346: {HTML_ATTR_Clear, TEXT("none"), HTML_ATTR_Clear_VAL_None_},
347:
348: {HTML_ATTR_NumberStyle, TEXT("1"), HTML_ATTR_NumberStyle_VAL_Arabic_},
349: {HTML_ATTR_NumberStyle, TEXT("a"), HTML_ATTR_NumberStyle_VAL_LowerAlpha},
350: {HTML_ATTR_NumberStyle, TEXT("A"), HTML_ATTR_NumberStyle_VAL_UpperAlpha},
351: {HTML_ATTR_NumberStyle, TEXT("i"), HTML_ATTR_NumberStyle_VAL_LowerRoman},
352: {HTML_ATTR_NumberStyle, TEXT("I"), HTML_ATTR_NumberStyle_VAL_UpperRoman},
353:
354: {HTML_ATTR_BulletStyle, TEXT("disc"), HTML_ATTR_BulletStyle_VAL_disc},
355: {HTML_ATTR_BulletStyle, TEXT("square"), HTML_ATTR_BulletStyle_VAL_square},
356: {HTML_ATTR_BulletStyle, TEXT("circle"), HTML_ATTR_BulletStyle_VAL_circle},
357:
358: {HTML_ATTR_ItemStyle, TEXT("1"), HTML_ATTR_ItemStyle_VAL_Arabic_},
359: {HTML_ATTR_ItemStyle, TEXT("a"), HTML_ATTR_ItemStyle_VAL_LowerAlpha},
360: {HTML_ATTR_ItemStyle, TEXT("A"), HTML_ATTR_ItemStyle_VAL_UpperAlpha},
361: {HTML_ATTR_ItemStyle, TEXT("i"), HTML_ATTR_ItemStyle_VAL_LowerRoman},
362: {HTML_ATTR_ItemStyle, TEXT("I"), HTML_ATTR_ItemStyle_VAL_UpperRoman},
363: {HTML_ATTR_ItemStyle, TEXT("disc"), HTML_ATTR_ItemStyle_VAL_disc},
364: {HTML_ATTR_ItemStyle, TEXT("square"), HTML_ATTR_ItemStyle_VAL_square},
365: {HTML_ATTR_ItemStyle, TEXT("circle"), HTML_ATTR_ItemStyle_VAL_circle},
366:
367: {HTML_ATTR_Button_type, TEXT("button"), HTML_ATTR_Button_type_VAL_button},
368: {HTML_ATTR_Button_type, TEXT("submit"), HTML_ATTR_Button_type_VAL_submit},
369: {HTML_ATTR_Button_type, TEXT("reset"), HTML_ATTR_Button_type_VAL_reset},
370:
371: {HTML_ATTR_frame, TEXT("void"), HTML_ATTR_frame_VAL_void},
372: {HTML_ATTR_frame, TEXT("above"), HTML_ATTR_frame_VAL_above},
373: {HTML_ATTR_frame, TEXT("below"), HTML_ATTR_frame_VAL_below},
374: {HTML_ATTR_frame, TEXT("hsides"), HTML_ATTR_frame_VAL_hsides},
375: {HTML_ATTR_frame, TEXT("lhs"), HTML_ATTR_frame_VAL_lhs},
376: {HTML_ATTR_frame, TEXT("rhs"), HTML_ATTR_frame_VAL_rhs},
377: {HTML_ATTR_frame, TEXT("vsides"), HTML_ATTR_frame_VAL_vsides},
378: {HTML_ATTR_frame, TEXT("box"), HTML_ATTR_frame_VAL_box},
379: {HTML_ATTR_frame, TEXT("border"), HTML_ATTR_frame_VAL_border},
380:
381: {HTML_ATTR_frameborder, TEXT("0"), HTML_ATTR_frameborder_VAL_Border0},
382: {HTML_ATTR_frameborder, TEXT("1"), HTML_ATTR_frameborder_VAL_Border1},
383:
384: {HTML_ATTR_scrolling, TEXT("yes"), HTML_ATTR_scrolling_VAL_Yes_},
385: {HTML_ATTR_scrolling, TEXT("no"), HTML_ATTR_scrolling_VAL_No_},
386: {HTML_ATTR_scrolling, TEXT("auto"), HTML_ATTR_scrolling_VAL_auto_},
387:
388: {HTML_ATTR_rules_, TEXT("none"), HTML_ATTR_rules__VAL_none_},
389: {HTML_ATTR_rules_, TEXT("groups"), HTML_ATTR_rules__VAL_groups},
390: {HTML_ATTR_rules_, TEXT("rows"), HTML_ATTR_rules__VAL_rows},
391: {HTML_ATTR_rules_, TEXT("cols"), HTML_ATTR_rules__VAL_cols},
392: {HTML_ATTR_rules_, TEXT("all"), HTML_ATTR_rules__VAL_all},
393:
394: {HTML_ATTR_Cell_align, TEXT("left"), HTML_ATTR_Cell_align_VAL_Cell_left},
395: {HTML_ATTR_Cell_align, TEXT("center"), HTML_ATTR_Cell_align_VAL_Cell_center},
396: {HTML_ATTR_Cell_align, TEXT("right"), HTML_ATTR_Cell_align_VAL_Cell_right},
397: {HTML_ATTR_Cell_align, TEXT("justify"), HTML_ATTR_Cell_align_VAL_Cell_justify},
398: {HTML_ATTR_Cell_align, TEXT("char"), HTML_ATTR_Cell_align_VAL_Cell_char},
399:
400: {HTML_ATTR_Alignment, TEXT("top"), HTML_ATTR_Alignment_VAL_Top_},
401: {HTML_ATTR_Alignment, TEXT("middle"), HTML_ATTR_Alignment_VAL_Middle_},
402: {HTML_ATTR_Alignment, TEXT("bottom"), HTML_ATTR_Alignment_VAL_Bottom_},
403: {HTML_ATTR_Alignment, TEXT("left"), HTML_ATTR_Alignment_VAL_Left_},
404: {HTML_ATTR_Alignment, TEXT("right"), HTML_ATTR_Alignment_VAL_Right_},
405:
406: {HTML_ATTR_METHOD, TEXT("get"), HTML_ATTR_METHOD_VAL_Get_},
407: {HTML_ATTR_METHOD, TEXT("post"), HTML_ATTR_METHOD_VAL_Post_},
408:
409: {HTML_ATTR_Position, TEXT("top"), HTML_ATTR_Position_VAL_Position_top},
410: {HTML_ATTR_Position, TEXT("bottom"), HTML_ATTR_Position_VAL_Position_bottom},
411: {HTML_ATTR_Position, TEXT("left"), HTML_ATTR_Position_VAL_Position_left},
412: {HTML_ATTR_Position, TEXT("right"), HTML_ATTR_Position_VAL_Position_right},
413:
414: {HTML_ATTR_Row_valign, TEXT("top"), HTML_ATTR_Row_valign_VAL_Row_top},
415: {HTML_ATTR_Row_valign, TEXT("middle"), HTML_ATTR_Row_valign_VAL_Row_middle},
416: {HTML_ATTR_Row_valign, TEXT("bottom"), HTML_ATTR_Row_valign_VAL_Row_bottom},
417: {HTML_ATTR_Row_valign, TEXT("baseline"), HTML_ATTR_Row_valign_VAL_Row_baseline},
418:
419: {HTML_ATTR_Cell_valign, TEXT("top"), HTML_ATTR_Cell_valign_VAL_Cell_top},
420: {HTML_ATTR_Cell_valign, TEXT("middle"), HTML_ATTR_Cell_valign_VAL_Cell_middle},
421: {HTML_ATTR_Cell_valign, TEXT("bottom"), HTML_ATTR_Cell_valign_VAL_Cell_bottom},
422: {HTML_ATTR_Cell_valign, TEXT("baseline"), HTML_ATTR_Cell_valign_VAL_Cell_baseline},
423:
424: {HTML_ATTR_shape, TEXT("rect"), HTML_ATTR_shape_VAL_rectangle},
425: {HTML_ATTR_shape, TEXT("circle"), HTML_ATTR_shape_VAL_circle},
426: {HTML_ATTR_shape, TEXT("poly"), HTML_ATTR_shape_VAL_polygon},
427:
428: {HTML_ATTR_valuetype, TEXT("data"), HTML_ATTR_valuetype_VAL_data_},
429: {HTML_ATTR_valuetype, TEXT("ref"), HTML_ATTR_valuetype_VAL_ref},
430: {HTML_ATTR_valuetype, TEXT("object"), HTML_ATTR_valuetype_VAL_object_},
431:
432: /* HTML attribute TYPE generates a Thot element */
433: {DummyAttribute, TEXT("button"), HTML_EL_Button_Input},
434: {DummyAttribute, TEXT("checkbox"), HTML_EL_Checkbox_Input},
435: {DummyAttribute, TEXT("file"), HTML_EL_File_Input},
436: {DummyAttribute, TEXT("hidden"), HTML_EL_Hidden_Input},
437: {DummyAttribute, TEXT("image"), HTML_EL_PICTURE_UNIT},
438: {DummyAttribute, TEXT("password"), HTML_EL_Password_Input},
439: {DummyAttribute, TEXT("radio"), HTML_EL_Radio_Input},
440: {DummyAttribute, TEXT("reset"), HTML_EL_Reset_Input},
441: {DummyAttribute, TEXT("submit"), HTML_EL_Submit_Input},
442: {DummyAttribute, TEXT("text"), HTML_EL_Text_Input},
443:
444: /* The following declarations allow the parser to accept boolean attributes */
445: /* written "checked=CHECKED"), for instance */
446: {HTML_ATTR_ISMAP, TEXT("ismap"), HTML_ATTR_ISMAP_VAL_Yes_},
447: {HTML_ATTR_nohref, TEXT("nohref"), HTML_ATTR_nohref_VAL_Yes_},
448: {HTML_ATTR_COMPACT, TEXT("compact"), HTML_ATTR_COMPACT_VAL_Yes_},
449: {HTML_ATTR_Multiple, TEXT("multiple"), HTML_ATTR_Multiple_VAL_Yes_},
450: {HTML_ATTR_Selected, TEXT("selected"), HTML_ATTR_Selected_VAL_Yes_},
451: {HTML_ATTR_Checked, TEXT("checked"), HTML_ATTR_Checked_VAL_Yes_},
452: {HTML_ATTR_No_wrap, TEXT("nowrap"), HTML_ATTR_No_wrap_VAL_no_wrap},
453: {HTML_ATTR_NoShade, TEXT("noshade"), HTML_ATTR_NoShade_VAL_NoShade_},
454: {HTML_ATTR_declare, TEXT("declare"), HTML_ATTR_declare_VAL_Yes_},
455: {HTML_ATTR_defer, TEXT("defer"), HTML_ATTR_defer_VAL_Yes_},
456: {HTML_ATTR_disabled, TEXT("disabled"), HTML_ATTR_disabled_VAL_Yes_},
457: {HTML_ATTR_readonly, TEXT("readonly"), HTML_ATTR_readonly_VAL_Yes_},
458: {HTML_ATTR_no_resize, TEXT("noresize"), HTML_ATTR_no_resize_VAL_Yes_},
459: {0, TEXT(""), 0} /* Last entry. Mandatory */
460: };
1.6 cvs 461:
462:
463: /*----------------------------------------------------------------------
1.15 cvs 464: ParseCharset:
1.6 cvs 465: Parses the element HTTP-EQUIV and looks for the charset value.
466: ----------------------------------------------------------------------*/
467: #ifdef __STDC__
1.15 cvs 468: void ParseCharset (Element el, Document doc)
1.6 cvs 469: #else /* !__STDC__ */
1.15 cvs 470: void ParseCharset (el, doc)
1.6 cvs 471: Element el;
472: Document doc;
473: #endif /* !__STDC__ */
474: {
1.15 cvs 475: AttributeType attrType;
476: Attribute attr;
477: SSchema docSSchema;
478: CHARSET charset;
1.6 cvs 479: CHAR_T *text, *text2, *ptrText, *str;
480: CHAR_T charsetname[MAX_LENGTH];
1.15 cvs 481: int length;
1.6 cvs 482: int pos, index = 0;
483:
1.15 cvs 484: charset = TtaGetDocumentCharset (doc);
485: if (charset != UNDEFINED_CHARSET)
486: /* the charset was already defined by the http header */
487: return;
1.6 cvs 488:
489: docSSchema = TtaGetDocumentSSchema (doc);
490: attrType.AttrSSchema = docSSchema;
491: attrType.AttrTypeNum = HTML_ATTR_http_equiv;
492: attr = TtaGetAttribute (el, attrType);
493: if (attr != NULL)
494: {
495: /* There is a HTTP-EQUIV attribute */
496: length = TtaGetTextAttributeLength (attr);
497: if (length > 0)
498: {
499: text = TtaAllocString (length + 1);
500: TtaGiveTextAttributeValue (attr, text, &length);
501: if (!ustrcasecmp (text, TEXT("content-type")))
502: {
503: attrType.AttrTypeNum = HTML_ATTR_meta_content;
504: attr = TtaGetAttribute (el, attrType);
505: if (attr != NULL)
506: {
507: length = TtaGetTextAttributeLength (attr);
508: if (length > 0)
509: {
510: text2 = TtaAllocString (length + 1);
511: TtaGiveTextAttributeValue (attr, text2, &length);
512: ptrText = text2;
513: while (*ptrText)
514: {
515: *ptrText = utolower (*ptrText);
516: ptrText++;
517: }
518:
519: str = ustrstr (text2, TEXT("charset="));
520: if (str)
521: {
522: pos = str - text2 + 8;
523: while (text2[pos] != WC_SPACE &&
524: text2[pos] != WC_TAB && text2[pos] != WC_EOS)
525: charsetname[index++] = text2[pos++];
526: charsetname[index] = WC_EOS;
1.15 cvs 527: charset = TtaGetCharset (charsetname);
528: if (charset != UNDEFINED_CHARSET)
529: TtaSetDocumentCharset (doc, charset);
1.6 cvs 530: }
531: TtaFreeMemory (text2);
532: }
533: }
534: }
535: TtaFreeMemory (text);
536: }
537: }
538: }
539:
540: /*----------------------------------------------------------------------
541: XhtmlElementComplete
542: Complete XHTML elements.
543: Check its attributes and its contents.
544: ----------------------------------------------------------------------*/
545: #ifdef __STDC__
1.15 cvs 546: void XhtmlElementComplete (Element el, Document doc, int *error)
1.6 cvs 547: #else
1.15 cvs 548: void XhtmlElementComplete (el, doc, error)
549: Element el;
550: Document doc;
551: int *error;
1.6 cvs 552: #endif
553: {
1.8 cvs 554: #ifdef EXPAT_PARSER
1.6 cvs 555: ElementType elType, newElType, childType;
556: Element constElem, child, desc, leaf, prev, next, last,
557: elFrames, lastFrame, lastChild;
558: Attribute attr;
559: AttributeType attrType;
560: Language lang;
561: STRING text;
562: CHAR_T lastChar[2];
563: STRING name1;
564: int length;
565: SSchema docSSchema;
566:
567: *error = 0;
568: docSSchema = TtaGetDocumentSSchema (doc);
569:
570: elType = TtaGetElementType (el);
571: /* is this a block-level element in a character-level element? */
572: if (!IsCharacterLevelElement (el) && elType.ElTypeNum != HTML_EL_Comment_)
573: BlockInCharLevelElem (el);
574:
575: newElType.ElSSchema = elType.ElSSchema;
576: switch (elType.ElTypeNum)
577: {
578: case HTML_EL_Object: /* it's an object */
579: /* create Object_Content */
580: child = TtaGetFirstChild (el);
581: if (child != NULL)
582: elType = TtaGetElementType (child);
583:
584: /* is it the PICTURE element ? */
585: if (child == NULL || elType.ElTypeNum != HTML_EL_PICTURE_UNIT)
586: {
587: desc = child;
588: /* create the PICTURE element */
589: elType.ElTypeNum = HTML_EL_PICTURE_UNIT;
590: child = TtaNewTree (doc, elType, "");
591: if (desc == NULL)
592: TtaInsertFirstChild (&child, el, doc);
593: else
594: TtaInsertSibling (child, desc, TRUE, doc);
595: }
596:
597: /* copy attribute data into SRC attribute of Object_Image */
598: attrType.AttrSSchema = docSSchema;
599: attrType.AttrTypeNum = HTML_ATTR_data;
600: attr = TtaGetAttribute (el, attrType);
601: if (attr != NULL)
602: {
603: length = TtaGetTextAttributeLength (attr);
604: if (length > 0)
605: {
606: name1 = TtaAllocString (length + 1);
607: TtaGiveTextAttributeValue (attr, name1, &length);
608: attrType.AttrTypeNum = HTML_ATTR_SRC;
609: attr = TtaGetAttribute (child, attrType);
610: if (attr == NULL)
611: {
612: attr = TtaNewAttribute (attrType);
613: TtaAttachAttribute (child, attr, doc);
614: }
615: TtaSetAttributeText (attr, name1, child, doc);
616: TtaFreeMemory (name1);
617: }
618: }
619:
620: /* is the Object_Content element already created ? */
621: desc = child;
622: TtaNextSibling(&desc);
623: if (desc != NULL)
624: elType = TtaGetElementType (desc);
625:
626: /* is it the Object_Content element ? */
627: if (desc == NULL || elType.ElTypeNum != HTML_EL_Object_Content)
628: {
629: /* create Object_Content */
630: elType.ElTypeNum = HTML_EL_Object_Content;
631: desc = TtaNewTree (doc, elType, "");
632: TtaInsertSibling (desc, child, FALSE, doc);
633: /* move previous existing children into Object_Content */
634: child = TtaGetLastChild(el);
635: while (child != desc)
636: {
637: TtaRemoveTree (child, doc);
638: TtaInsertFirstChild (&child, desc, doc);
639: child = TtaGetLastChild(el);
640: }
641: }
642: break;
643:
644: case HTML_EL_Unnumbered_List:
645: case HTML_EL_Numbered_List:
646: case HTML_EL_Menu:
647: case HTML_EL_Directory:
648: /* It's a List element. It should only have List_Item children.
649: If it has List element chidren, move these List elements
650: within their previous List_Item sibling. This is to fix
651: a bug in document generated by Mozilla. */
652: prev = NULL;
653: next = NULL;
654: child = TtaGetFirstChild (el);
655: while (child != NULL)
656: {
657: next = child;
658: TtaNextSibling (&next);
659: elType = TtaGetElementType (child);
660: if (elType.ElTypeNum == HTML_EL_Unnumbered_List ||
661: elType.ElTypeNum == HTML_EL_Numbered_List ||
662: elType.ElTypeNum == HTML_EL_Menu ||
663: elType.ElTypeNum == HTML_EL_Directory)
664: /* this list element is a child of another list element */
665: if (prev)
666: {
667: elType = TtaGetElementType (prev);
668: if (elType.ElTypeNum == HTML_EL_List_Item)
669: {
670: /* get the last child of the previous List_Item */
671: desc = TtaGetFirstChild (prev);
672: last = NULL;
673: while (desc)
674: {
675: last = desc;
676: TtaNextSibling (&desc);
677: }
678: /* move the list element after the last child of the
679: previous List_Item */
680: TtaRemoveTree (child, doc);
681: if (last)
682: TtaInsertSibling (child, last, FALSE, doc);
683: else
684: TtaInsertFirstChild (&child, prev, doc);
685: child = prev;
686: }
687: }
688: prev = child;
689: child = next;
690: }
691: break;
692:
693: case HTML_EL_FRAMESET:
694: /* The FRAMESET element is now complete. Gather all its FRAMESET
695: and FRAME children and wrap them up in a Frames element */
696: elFrames = NULL; lastFrame = NULL;
697: lastChild = NULL;
698: child = TtaGetFirstChild (el);
699: while (child != NULL)
700: {
701: next = child;
702: TtaNextSibling (&next);
703: elType = TtaGetElementType (child);
704: if (elType.ElTypeNum == HTML_EL_FRAMESET ||
705: elType.ElTypeNum == HTML_EL_FRAME ||
706: elType.ElTypeNum == HTML_EL_Comment_)
707: {
708: /* create the Frames element if it does not exist */
709: if (elFrames == NULL)
710: {
711: newElType.ElSSchema = docSSchema;
712: newElType.ElTypeNum = HTML_EL_Frames;
713: elFrames = TtaNewElement (doc, newElType);
714: XmlSetElemLineNumber (elFrames);
715: TtaInsertSibling (elFrames, child, TRUE, doc);
716: }
717: /* move the element as the last child of the Frames element */
718: TtaRemoveTree (child, doc);
719: if (lastFrame == NULL)
720: TtaInsertFirstChild (&child, elFrames, doc);
721: else
722: TtaInsertSibling (child, lastFrame, FALSE, doc);
723: lastFrame = child;
724: }
725: child = next;
726: }
727: break;
728:
729: case HTML_EL_Input: /* it's an INPUT without any TYPE attribute */
730: /* Create a child of type Text_Input */
731: elType.ElTypeNum = HTML_EL_Text_Input;
732: child = TtaNewTree (doc, elType, "");
733: XmlSetElemLineNumber (child);
734: TtaInsertFirstChild (&child, el, doc);
735: /* now, process it like a Text_Input element */
736:
737: case HTML_EL_Text_Input:
738: case HTML_EL_Password_Input:
739: case HTML_EL_File_Input:
740: /* get element Inserted_Text */
741: child = TtaGetFirstChild (el);
742: if (child != NULL)
743: {
744: attrType.AttrSSchema = docSSchema;
745: attrType.AttrTypeNum = HTML_ATTR_Value_;
746: attr = TtaGetAttribute (el, attrType);
747: if (attr != NULL)
748: {
749: /* copy the value of attribute "value" into the first text
750: leaf of element */
751: length = TtaGetTextAttributeLength (attr);
752: if (length > 0)
753: {
754: /* get the text leaf */
755: leaf = TtaGetFirstChild (child);
756: if (leaf != NULL)
757: {
758: childType = TtaGetElementType (leaf);
759: if (childType.ElTypeNum == HTML_EL_TEXT_UNIT)
760: {
761: /* copy attribute value into the text leaf */
762: text = TtaAllocString (length + 1);
763: TtaGiveTextAttributeValue (attr, text, &length);
764: TtaSetTextContent (leaf, text,
765: TtaGetDefaultLanguage (), doc);
766: TtaFreeMemory (text);
767: }
768: }
769: }
770: }
771: }
772: break;
773:
774: case HTML_EL_META:
1.15 cvs 775: ParseCharset (el, doc);
1.6 cvs 776: break;
777:
778: case HTML_EL_STYLE_: /* it's a STYLE element */
1.8 cvs 779: case HTML_EL_SCRIPT: /* it's a SCRIPT element */
1.6 cvs 780: case HTML_EL_Preformatted: /* it's a PRE */
781: /* if the last line of the Preformatted is empty, remove it */
782: leaf = XmlLastLeafInElement (el);
783: if (leaf != NULL)
784: {
785: elType = TtaGetElementType (leaf);
786: if (elType.ElTypeNum == HTML_EL_TEXT_UNIT)
787: /* the last leaf is a TEXT element */
788: {
789: length = TtaGetTextLength (leaf);
790: if (length > 0)
791: {
792: TtaGiveSubString (leaf, lastChar, length, 1);
793: if (lastChar[0] == EOL)
794: /* last character is new line, delete it */
795: {
796: if (length == 1)
797: /* empty TEXT element */
798: TtaDeleteTree (leaf, doc);
799: else
800: /* remove the last character */
801: TtaDeleteTextContent (leaf, length, 1, doc);
802: }
803: }
804: }
805: }
806: if (IsParsingCSS ())
807: {
808: text = GetStyleContents (el);
809: if (text)
810: {
811: ReadCSSRules (doc, NULL, text, FALSE);
812: TtaFreeMemory (text);
813: }
814: SetParsingCSS (FALSE);
815: }
816: /* and continue as if it were a Preformatted or a Script */
817: break;
818:
819: case HTML_EL_Text_Area: /* it's a Text_Area */
820: SetParsingTextArea (FALSE);
821: child = TtaGetFirstChild (el);
822: if (child == NULL)
823: /* it's an empty Text_Area */
824: /* insert a Inserted_Text element in the element */
825: {
826: newElType.ElTypeNum = HTML_EL_Inserted_Text;
827: child = TtaNewTree (doc, newElType, "");
828: TtaInsertFirstChild (&child, el, doc);
829: }
830: else
831: {
832: /* save the text into Default_Value attribute */
833: attrType.AttrSSchema = docSSchema;
834: attrType.AttrTypeNum = HTML_ATTR_Default_Value;
835: if (TtaGetAttribute (el, attrType) == NULL)
836: /* attribute Default_Value is missing */
837: {
838: attr = TtaNewAttribute (attrType);
839: TtaAttachAttribute (el, attr, doc);
840: desc = TtaGetFirstChild (child);
841: length = TtaGetTextLength (desc) + 1;
842: text = TtaAllocString (length);
843: TtaGiveTextContent (desc, text, &length, &lang);
844: TtaSetAttributeText (attr, text, el, doc);
845: TtaFreeMemory (text);
846: }
847: }
848: /* insert a Frame element */
849: newElType.ElTypeNum = HTML_EL_Frame;
850: constElem = TtaNewTree (doc, newElType, "");
851: TtaInsertSibling (constElem, child, FALSE, doc);
852: break;
853:
854: case HTML_EL_Radio_Input:
855: case HTML_EL_Checkbox_Input:
856: /* put an attribute Checked if it is missing */
857: attrType.AttrSSchema = docSSchema;
858: attrType.AttrTypeNum = HTML_ATTR_Checked;
859: if (TtaGetAttribute (el, attrType) == NULL)
860: /* attribute Checked is missing */
861: {
862: attr = TtaNewAttribute (attrType);
863: TtaAttachAttribute (el, attr, doc);
864: TtaSetAttributeValue (attr, HTML_ATTR_Checked_VAL_No_, el, doc);
865: }
866: break;
867:
868: case HTML_EL_Option_Menu:
869: /* Check that at least one option has a SELECTED attribute */
870: OnlyOneOptionSelected (el, doc, TRUE);
871: break;
872:
873: case HTML_EL_PICTURE_UNIT:
874: break;
875:
876: case HTML_EL_LINK:
877: CheckCSSLink (el, doc, docSSchema);
878: break;
879:
880: case HTML_EL_Data_cell:
881: case HTML_EL_Heading_cell:
882: /* insert a pseudo paragraph into empty cells */
883: child = TtaGetFirstChild (el);
884: if (child == NULL)
885: {
886: elType.ElTypeNum = HTML_EL_Pseudo_paragraph;
887: child = TtaNewTree (doc, elType, "");
888: if (child != NULL)
889: TtaInsertFirstChild (&child, el, doc);
890: }
891:
892: /* detect whether we're parsing a whole table or just a cell */
893: if (IsWithinTable ())
894: NewCell (el, doc, FALSE);
895: break;
896:
897: case HTML_EL_Table:
898: CheckTable (el, doc);
899: SubWithinTable ();
900: break;
901:
902: case HTML_EL_TITLE:
903: /* show the TITLE in the main window */
904: UpdateTitle (el, doc);
905: break;
906:
907: default:
908: break;
909: }
1.8 cvs 910: #endif /* EXPAT_PARSER */
1.6 cvs 911: }
1.1 cvs 912:
913: /*----------------------------------------------------------------------
1.2 cvs 914: XhtmlGetDTDName
915: Return in DTDname the name of the DTD to be used for parsing the
916: content of element named elementName.
917: This element type appear with an 'X' in the ElemMappingTable.
1.1 cvs 918: ----------------------------------------------------------------------*/
919: #ifdef __STDC__
1.2 cvs 920: void XhtmlGetDTDName (STRING DTDname,
921: STRING elementName)
1.1 cvs 922: #else
1.2 cvs 923: void XhtmlGetDTDName (DTDname,
924: elementName)
925: STRING DTDname;
926: STRING elementName;
927:
1.1 cvs 928: #endif
929: {
1.8 cvs 930: #ifdef EXPAT_PARSER
1.2 cvs 931: if (ustrcmp (elementName, TEXT("math")) == 0)
932: ustrcpy (DTDname, TEXT("MathML"));
1.1 cvs 933: else
1.2 cvs 934: if (ustrcmp (elementName, TEXT("label")) == 0 ||
935: ustrcmp (elementName, TEXT("text")) == 0)
936: ustrcpy (DTDname, TEXT("HTML"));
937: else
938: ustrcpy (DTDname, TEXT(""));
1.17 cvs 939: #endif /* EXPAT_PARSER */
940: }
941:
1.1 cvs 942:
943: /*----------------------------------------------------------------------
1.16 cvs 944: MapHTMLAttributeValue
1.2 cvs 945: Search in the Attribute Value Mapping Table the entry for the attribute
946: ThotAtt and its value AttrVal. Returns the corresponding Thot value.
1.1 cvs 947: ----------------------------------------------------------------------*/
948: #ifdef __STDC__
1.16 cvs 949: void MapHTMLAttributeValue (CHAR_T* AttrVal,
1.11 cvs 950: AttributeType attrType,
951: int* value)
1.1 cvs 952: #else
1.16 cvs 953: void MapHTMLAttributeValue (AttrVal,
1.11 cvs 954: attrType,
955: value)
956: CHAR_T* AttrVal;
957: AttributeType attrType;
958: int* value;
1.1 cvs 959: #endif
960: {
1.8 cvs 961: #ifdef EXPAT_PARSER
1.2 cvs 962: int i;
1.1 cvs 963:
1.2 cvs 964: *value = 0;
1.1 cvs 965: i = 0;
966:
1.2 cvs 967: while (XhtmlAttrValueMappingTable[i].ThotAttr != attrType.AttrTypeNum &&
968: XhtmlAttrValueMappingTable[i].ThotAttr != 0)
969: i++;
970:
971: if (XhtmlAttrValueMappingTable[i].ThotAttr == attrType.AttrTypeNum)
972: {
973: do
1.18 ! cvs 974: if (!ustrcmp (XhtmlAttrValueMappingTable[i].XMLattrValue, AttrVal))
1.2 cvs 975: *value = XhtmlAttrValueMappingTable[i].ThotAttrValue;
1.1 cvs 976: else
1.2 cvs 977: i++;
978: while (*value <= 0 &&
979: XhtmlAttrValueMappingTable[i].ThotAttr != 0);
1.1 cvs 980: }
1.8 cvs 981: #endif /* EXPAT_PARSER */
1.1 cvs 982: }
983:
984: /*---------------------------------------------------------------------------
1.2 cvs 985: XhtmlMapEntity
1.1 cvs 986: Search that entity in the entity table and return the corresponding value.
987: ---------------------------------------------------------------------------*/
988: #ifdef __STDC__
1.13 cvs 989: void XhtmlMapEntity (STRING entityName, int *entityValue, STRING alphabet)
1.1 cvs 990: #else
1.13 cvs 991: void XhtmlMapEntity (entityName, entityValue, alphabet)
992: STRING entityName;
993: int *entityValue;
994: STRING alphabet;
1.1 cvs 995: #endif
996: {
1.8 cvs 997: #ifdef EXPAT_PARSER
1.12 cvs 998: int i;
1.13 cvs 999: ThotBool found;
1.1 cvs 1000:
1.13 cvs 1001: found = FALSE;
1002: for (i = 0; XhtmlEntityTable[i].charCode >= 0 && ! found; i++)
1003: found = !ustrcmp (XhtmlEntityTable[i].charName, entityName);
1.3 cvs 1004:
1.13 cvs 1005: if (found)
1.12 cvs 1006: {
1007: /* entity found */
1.13 cvs 1008: i--;
1.12 cvs 1009: *entityValue = XhtmlEntityTable[i].charCode;
1010: *alphabet = 'L';
1011: }
1012: else
1013: *alphabet = EOS;
1.8 cvs 1014: #endif /* EXPAT_PARSER */
1.11 cvs 1015: }
1016:
1.13 cvs 1017: #ifdef EXPAT_PARSER
1018: /*----------------------------------------------------------------------
1019: PutNonISOlatin1Char
1020: Put a Unicode character in the input buffer.
1021: ----------------------------------------------------------------------*/
1022: #ifdef __STDC__
1023: static void PutNonISOlatin1Char (int code, STRING prefix, STRING entityName, ParserData *context)
1024: #else
1025: static void PutNonISOlatin1Char (code, prefix, entityName, context)
1026: int code;
1027: STRING prefix;
1028: STRING entityName;
1029: ParserData *context;
1030: #endif
1031: {
1032: Language lang, l;
1033: ElementType elType;
1034: Element elText;
1035: AttributeType attrType;
1036: Attribute attr;
1037: CHAR_T buffer[MaxEntityLength+10];
1038:
1039: if (context->readingAnAttrValue)
1040: /* this entity belongs to an attribute value */
1041: {
1042: /* Thot can't mix different languages in the same attribute value */
1043: /* just discard that character */
1044: ;
1045: }
1046: else
1047: /* this entity belongs to the element contents */
1048: {
1049: /* create a new text leaf */
1050: elType.ElSSchema = TtaGetDocumentSSchema (context->doc);
1051: elType.ElTypeNum = HTML_EL_TEXT_UNIT;
1052: elText = TtaNewElement (context->doc, elType);
1053: XmlSetElemLineNumber (elText);
1054: XhtmlInsertElement (&elText);
1055: context->lastElement = elText;
1056: context->lastElementClosed = FALSE;
1057: context->lastElementClosed = TRUE;
1058:
1059: /* try to find a fallback character */
1060: l = context->language;
1061: GetFallbackCharacter (code, buffer, &lang);
1062:
1063: /* put that fallback character in the new text leaf */
1064: TtaSetTextContent (elText, buffer, lang, context->doc);
1065: context->language = l;
1066:
1067: /* make that text leaf read-only */
1068: TtaSetAccessRight (elText, ReadOnly, context->doc);
1069:
1070: /* associate an attribute EntityName with the new text leaf */
1071: attrType.AttrSSchema = TtaGetDocumentSSchema (context->doc);
1072: attrType.AttrTypeNum = HTML_ATTR_EntityName;
1073: attr = TtaNewAttribute (attrType);
1074: TtaAttachAttribute (elText, attr, context->doc);
1075: ustrcpy (buffer, prefix);
1076: ustrcat (buffer, entityName);
1077: TtaSetAttributeText (attr, buffer, elText, context->doc);
1078: context->mergeText = FALSE;
1079: }
1080: }
1081: #endif /* EXPAT_PARSER */
1082:
1.11 cvs 1083: /*----------------------------------------------------------------------
1084: XhtmlEntityCreated
1085: A XTHML entity has been created by the XML parser.
1086: ----------------------------------------------------------------------*/
1087: #ifdef __STDC__
1.13 cvs 1088: void XhtmlEntityCreated (int entityVal, Language lang, STRING entityName, ParserData *context)
1.11 cvs 1089: #else
1.13 cvs 1090: void XhtmlEntityCreated (entityVal, lang, entityName, context)
1091: int entityVal;
1092: Language lang;
1093: STRING entityName;
1094: ParserData *context;
1.11 cvs 1095: #endif
1096: {
1.12 cvs 1097: #ifdef EXPAT_PARSER
1.13 cvs 1098: CHAR_T buffer[2];
1099:
1.11 cvs 1100: if (lang < 0)
1.13 cvs 1101: PutInXmlElement (entityName);
1.11 cvs 1102: else
1103: {
1104: #ifdef LC
1105: printf (" \n code=%d", entityVal);
1106: #endif /* LC */
1107: if (entityVal < 255)
1.13 cvs 1108: {
1109: buffer[0] = TEXT(entityVal);
1110: buffer[1] = WC_EOS;
1111: PutInXmlElement (buffer);
1112: }
1.11 cvs 1113: else
1.13 cvs 1114: PutNonISOlatin1Char (entityVal, TEXT(""), entityName, context);
1.11 cvs 1115: }
1.12 cvs 1116: #endif /* EXPAT_PARSER */
1.1 cvs 1117: }
1118:
1119: /*-------------------- Entities (end) ---------------------*/
Webmaster