Annotation of Amaya/amaya/XHTMLbuilder.c, revision 1.19

1.1       cvs         1: /*
                      2:  *
                      3:  *  (c) COPYRIGHT MIT and INRIA, 1996.
                      4:  *  Please first read the full copyright statement in file COPYRIGHT.
                      5:  *
                      6:  */
                      7: 
                      8: /*
                      9:  *
                     10:  * html2thot parses a HTML file and builds the corresponding abstract tree
                     11:  * for a Thot document of type HTML.
                     12:  *
                     13:  * Author: V. Quint
                     14:  *         L. Carcone 
                     15:  */
                     16: 
                     17: #define THOT_EXPORT extern
                     18: #include "amaya.h"
                     19: #include "css.h"
1.2       cvs        20: #include "parser.h"
                     21: #include "HTML.h"
                     22: 
1.13      cvs        23: #include "css_f.h"
                     24: #include "fetchXMLname_f.h"
1.1       cvs        25: #include "HTMLactions_f.h"
                     26: #include "HTMLedit_f.h"
                     27: #include "HTMLimage_f.h"
                     28: #include "HTMLtable_f.h"
                     29: #include "HTMLimage_f.h"
                     30: #include "UIcss_f.h"
1.13      cvs        31: #include "styleparser_f.h"
1.2       cvs        32: #include "XHTMLbuilder_f.h"
1.13      cvs        33: #include "Xml2thot_f.h"
1.1       cvs        34: 
                     35: /* maximum length of a Thot structure schema name */
                     36: #define MAX_SS_NAME_LENGTH 32
                     37: 
1.2       cvs        38: XhtmlEntity        XhtmlEntityTable[] =
1.1       cvs        39: {
                     40: /* This table MUST be in alphabetical order */
                     41: {TEXT("AElig"), 198},  /* latin capital letter AE = */ 
                     42:                         /* latin capital ligature AE, U+00C6 ISOlat1 */
                     43: {TEXT("Aacute"), 193}, /* latin capital letter A with acute, U+00C1 ISOlat1 */
                     44: {TEXT("Acirc"), 194},  /* latin capital letter A with circumflex, U+00C2 ISOlat1 */
                     45: {TEXT("Agrave"), 192}, /* latin capital letter A with grave = */
                     46:                         /* latin capital letter A grave, U+00C0 ISOlat1 */
                     47: {TEXT("Alpha"), 913},  /* greek capital letter alpha, U+0391 */
                     48: {TEXT("Aring"), 197},  /* latin capital letter A with ring above = */ 
                     49:                         /* latin capital letter A ring, U+00C5 ISOlat1 */
                     50: {TEXT("Atilde"), 195}, /* latin capital letter A with tilde, U+00C3 ISOlat1 */
                     51: {TEXT("Auml"), 196},   /* latin capital letter A with diaeresis, U+00C4 ISOlat1 */
                     52: {TEXT("Beta"), 914},   /* greek capital letter beta, U+0392 */
                     53: {TEXT("Ccedil"), 199}, /* latin capital letter C with cedilla, U+00C7 ISOlat1 */
                     54: {TEXT("Chi"), 935},    /* greek capital letter chi, U+03A7 */
                     55: {TEXT("Dagger"), 8225},        /* double dagger, U+2021 ISOpub */
                     56: {TEXT("Delta"), 916},  /* greek capital letter delta, U+0394 ISOgrk3 */
                     57: {TEXT("ETH"), 208},    /* latin capital letter ETH, U+00D0 ISOlat1 */
                     58: {TEXT("Eacute"), 201}, /* latin capital letter E with acute, U+00C9 ISOlat1 */
                     59: {TEXT("Ecirc"), 202},  /* latin capital letter E with circumflex, U+00CA ISOlat1 */
                     60: {TEXT("Egrave"), 200}, /* latin capital letter E with grave, U+00C8 ISOlat1 */
                     61: {TEXT("Epsilon"), 917},        /* greek capital letter epsilon, U+0395 */
                     62: {TEXT("Eta"), 919},    /* greek capital letter eta, U+0397 */
                     63: {TEXT("Euml"), 203},   /* latin capital letter E with diaeresis, U+00CB ISOlat1 */
                     64: {TEXT("Gamma"), 915},  /* greek capital letter gamma, U+0393 ISOgrk3 */
                     65: {TEXT("Iacute"), 205}, /* latin capital letter I with acute, U+00CD ISOlat1 */
                     66: {TEXT("Icirc"), 206},  /* latin capital letter I with circumflex, U+00CE ISOlat1 */
                     67: {TEXT("Igrave"), 204}, /* latin capital letter I with grave, U+00CC ISOlat1 */
                     68: {TEXT("Iota"), 921},   /* greek capital letter iota, U+0399 */
                     69: {TEXT("Iuml"), 207},   /* latin capital letter I with diaeresis, U+00CF ISOlat1 */
                     70: {TEXT("Kappa"), 922},  /* greek capital letter kappa, U+039A */
                     71: {TEXT("Lambda"), 923}, /* greek capital letter lambda, U+039B ISOgrk3 */
                     72: {TEXT("Mu"), 924},     /* greek capital letter mu, U+039C */
                     73: {TEXT("Ntilde"), 209}, /* latin capital letter N with tilde, U+00D1 ISOlat1 */
                     74: {TEXT("Nu"), 925},     /* greek capital letter nu, U+039D */
                     75: {TEXT("OElig"), 338},  /* latin capital ligature OE, U+0152 ISOlat2 */
                     76: {TEXT("Oacute"), 211}, /* latin capital letter O with acute, U+00D3 ISOlat1 */
                     77: {TEXT("Ocirc"), 212},          /* latin capital letter O with circumflex, U+00D4 ISOlat1 */
                     78: {TEXT("Ograve"), 210}, /* latin capital letter O with grave, U+00D2 ISOlat1 */
                     79: {TEXT("Omega"), 937},          /* greek capital letter omega, U+03A9 ISOgrk3 */
                     80: {TEXT("Omicron"), 927},        /* greek capital letter omicron, U+039F */
                     81: {TEXT("Oslash"), 216}, /* latin capital letter O with stroke = */ 
                     82:                         /* latin capital letter O slash, U+00D8 ISOlat1 */
                     83: {TEXT("Otilde"), 213}, /* latin capital letter O with tilde, U+00D5 ISOlat1 */
                     84: {TEXT("Ouml"), 214},           /* latin capital letter O with diaeresis, U+00D6 ISOlat1 */
                     85: {TEXT("Phi"), 934},            /* greek capital letter phi, U+03A6 ISOgrk3 */
                     86: {TEXT("Pi"), 928},             /* greek capital letter pi, U+03A0 ISOgrk3 */
                     87: {TEXT("Prime"), 8243}, /* double prime = seconds = inches, U+2033 ISOtech */
                     88: {TEXT("Psi"), 936},            /* greek capital letter psi, U+03A8 ISOgrk3 */
                     89: {TEXT("Rho"), 929},            /* greek capital letter rho, U+03A1 */
                     90: {TEXT("Scaron"), 352}, /* latin capital letter S with caron, U+0160 ISOlat2 */
                     91: {TEXT("Sigma"), 931},          /* greek capital letter sigma, U+03A3 ISOgrk3 */
                     92: {TEXT("THORN"), 222},          /* latin capital letter THORN, U+00DE ISOlat1 */
                     93: {TEXT("Tau"), 932},            /* greek capital letter tau, U+03A4 */
                     94: {TEXT("Theta"), 920},          /* greek capital letter theta, U+0398 ISOgrk3 */
                     95: {TEXT("Uacute"), 218}, /* latin capital letter U with acute, U+00DA ISOlat1 */
                     96: {TEXT("Ucirc"), 219},          /* latin capital letter U with circumflex, U+00DB ISOlat1 */
                     97: {TEXT("Ugrave"), 217}, /* latin capital letter U with grave, U+00D9 ISOlat1 */
                     98: {TEXT("Upsilon"), 933},        /* greek capital letter upsilon, U+03A5 ISOgrk3 */
                     99: {TEXT("Uuml"), 220},           /* latin capital letter U with diaeresis, U+00DC ISOlat1 */
                    100: {TEXT("Xi"), 926},             /* greek capital letter xi, U+039E ISOgrk3 */
                    101: {TEXT("Yacute"), 221}, /* latin capital letter Y with acute, U+00DD ISOlat1 */
                    102: {TEXT("Yuml"), 376},           /* latin capital letter Y with diaeresis, U+0178 ISOlat2 */
                    103: {TEXT("Zeta"), 918},           /* greek capital letter zeta, U+0396 */
                    104: {TEXT("aacute"), 225}, /* latin small letter a with acute, U+00E1 ISOlat1 */
                    105: {TEXT("acirc"), 226},          /* latin small letter a with circumflex, U+00E2 ISOlat1 */
                    106: {TEXT("acute"), 180},          /* acute accent = spacing acute, U+00B4 ISOdia */
                    107: {TEXT("aelig"), 230},          /* latin small letter ae = */
                    108:                         /* latin small ligature ae, U+00E6 ISOlat1 */
                    109: {TEXT("agrave"), 224}, /* latin small letter a with grave = */
                    110:                         /* latin small letter a grave, U+00E0 ISOlat1 */
                    111: {TEXT("alefsym"), 8501},/* alef symbol = first transfinite cardinal, U+2135 NEW */
                    112: {TEXT("alpha"), 945},          /* greek small letter alpha, U+03B1 ISOgrk3 */
                    113: {TEXT("amp"), 38},             /* ampersand, U+0026 ISOnum */
                    114: {TEXT("and"), 8743},           /* logical and = wedge, U+2227 ISOtech */
                    115: {TEXT("ang"), 8736},           /* angle, U+2220 ISOamso */
                    116: {TEXT("aring"), 229},          /* latin small letter a with ring above = */
                    117:                         /* latin small letter a ring, U+00E5 ISOlat1 */
                    118: {TEXT("asymp"), 8776}, /* almost equal to = asymptotic to, U+2248 ISOamsr */
                    119: {TEXT("atilde"), 227}, /* latin small letter a with tilde, U+00E3 ISOlat1 */
                    120: {TEXT("auml"), 228},           /* latin small letter a with diaeresis, U+00E4 ISOlat1 */
                    121: {TEXT("bdquo"), 8222}, /* double low-9 quotation mark, U+201E NEW */
                    122: {TEXT("beta"), 946},           /* greek small letter beta, U+03B2 ISOgrk3 */
                    123: {TEXT("brvbar"), 166}, /* broken bar = broken vertical bar, U+00A6 ISOnum */
                    124: {TEXT("bull"), 8226},          /* bullet = black small circle, U+2022 ISOpub */
                    125: {TEXT("cap"), 8745},           /* intersection = cap, U+2229 ISOtech */
                    126: {TEXT("ccedil"), 231}, /* latin small letter c with cedilla, U+00E7 ISOlat1 */
                    127: {TEXT("cedil"), 184},          /* cedilla = spacing cedilla, U+00B8 ISOdia */
                    128: {TEXT("cent"), 162},           /* cent sign, U+00A2 ISOnum */
                    129: {TEXT("chi"), 967},            /* greek small letter chi, U+03C7 ISOgrk3 */
                    130: {TEXT("circ"), 710},           /* modifier letter circumflex accent, U+02C6 ISOpub */
                    131: {TEXT("clubs"), 9827}, /* black club suit = shamrock, U+2663 ISOpub */
                    132: {TEXT("cong"), 8773},          /* approximately equal to, U+2245 ISOtech */
                    133: {TEXT("copy"), 169},           /* copyright sign, U+00A9 ISOnum */
                    134: {TEXT("crarr"), 8629}, /* downwards arrow with corner leftwards = */
                    135:                         /* carriage return, U+21B5 NEW */
                    136: {TEXT("cup"), 8746},           /* union = cup, U+222A ISOtech */
                    137: {TEXT("curren"), 164}, /* currency sign, U+00A4 ISOnum */
                    138: {TEXT("dArr"), 8659},          /* downwards double arrow, U+21D3 ISOamsa */
                    139: {TEXT("dagger"), 8224},        /* dagger, U+2020 ISOpub */
                    140: {TEXT("darr"), 8595},          /* downwards arrow, U+2193 ISOnum */
                    141: {TEXT("deg"), 176},            /* degree sign, U+00B0 ISOnum */
                    142: {TEXT("delta"), 948},          /* greek small letter delta, U+03B4 ISOgrk3 */
                    143: {TEXT("diams"), 9830}, /* black diamond suit, U+2666 ISOpub */
                    144: {TEXT("divide"), 247}, /* division sign, U+00F7 ISOnum */
                    145: {TEXT("eacute"), 233}, /* latin small letter e with acute, U+00E9 ISOlat1 */
                    146: {TEXT("ecirc"), 234},          /* latin small letter e with circumflex, U+00EA ISOlat1 */
                    147: {TEXT("egrave"), 232}, /* latin small letter e with grave, U+00E8 ISOlat1 */
                    148: {TEXT("empty"), 8709}, /* empty set = null set = diameter, U+2205 ISOamso */
                    149: {TEXT("emsp"), 8195},          /* em space, U+2003 ISOpub */
                    150: {TEXT("ensp"), 8194},          /* en space, U+2002 ISOpub */
                    151: {TEXT("epsilon"), 949},        /* greek small letter epsilon, U+03B5 ISOgrk3 */
                    152: {TEXT("equiv"), 8801}, /* identical to, U+2261 ISOtech */
                    153: {TEXT("eta"), 951},            /* greek small letter eta, U+03B7 ISOgrk3 */
                    154: {TEXT("eth"), 240},            /* latin small letter eth, U+00F0 ISOlat1 */
                    155: {TEXT("euml"), 235},           /* latin small letter e with diaeresis, U+00EB ISOlat1 */
                    156: {TEXT("euro"), 8364},          /* euro sign, U+20AC NEW */
                    157: {TEXT("exist"), 8707},  /* there exists, U+2203 ISOtech */
                    158: {TEXT("fnof"), 402},           /* latin small f with hook = function = */
                    159:                         /* florin, U+0192 ISOtech */
                    160: {TEXT("forall"), 8704},        /* for all, U+2200 ISOtech */
                    161: {TEXT("frac12"), 189}, /* vulgar fraction one half = */
                    162:                         /*fraction one half, U+00BD ISOnum */
                    163: {TEXT("frac14"), 188}, /* vulgar fraction one quarter = */
                    164:                         /* fraction one quarter, U+00BC ISOnum */
                    165: {TEXT("frac34"), 190}, /* vulgar fraction three quarters = */
                    166:                         /* fraction three quarters, U+00BE ISOnum */
                    167: {TEXT("frasl"), 8260}, /* fraction slash, U+2044 NEW */
                    168: {TEXT("gamma"), 947},          /* greek small letter gamma, U+03B3 ISOgrk3 */
                    169: {TEXT("ge"), 8805},            /* greater-than or equal to, U+2265 ISOtech */
                    170: {TEXT("gt"), 62},              /* greater-than sign, U+003E ISOnum */
                    171: {TEXT("hArr"), 8660},          /* left right double arrow, U+21D4 ISOamsa */
                    172: {TEXT("harr"), 8596},          /* left right arrow, U+2194 ISOamsa */
                    173: {TEXT("hearts"), 9829},        /* black heart suit = valentine, U+2665 ISOpub */
                    174: {TEXT("hellip"), 8230},        /* horizontal ellipsis = three dot leader, U+2026 ISOpub */
                    175: {TEXT("hyphen"), 173}, /* hyphen = discretionary hyphen, U+00AD ISOnum */
                    176: {TEXT("iacute"), 237}, /* latin small letter i with acute, U+00ED ISOlat1 */
                    177: {TEXT("icirc"), 238},          /* latin small letter i with circumflex, U+00EE ISOlat1 */
                    178: {TEXT("iexcl"), 161},          /* inverted exclamation mark, U+00A1 ISOnum */
                    179: {TEXT("igrave"), 236}, /* latin small letter i with grave, U+00EC ISOlat1 */
                    180: {TEXT("image"), 8465}, /* blackletter capital I = imaginary part, U+2111 ISOamso */
                    181: {TEXT("infin"), 8734}, /* infinity, U+221E ISOtech */
                    182: {TEXT("int"), 8747},           /* integral, U+222B ISOtech */
                    183: {TEXT("iota"), 953},           /* greek small letter iota, U+03B9 ISOgrk3 */
                    184: {TEXT("iquest"), 191}, /* inverted question mark = */
                    185:                         /* turned question mark, U+00BF ISOnum */
                    186: {TEXT("isin"), 8712},          /* element of, U+2208 ISOtech */
                    187: {TEXT("iuml"), 239},           /* latin small letter i with diaeresis, U+00EF ISOlat1 */
                    188: {TEXT("kappa"), 954},          /* greek small letter kappa, U+03BA ISOgrk3 */
                    189: {TEXT("lArr"), 8656},          /* leftwards double arrow, U+21D0 ISOtech */
                    190: {TEXT("lambda"), 955}, /* greek small letter lambda, U+03BB ISOgrk3 */
                    191: {TEXT("lang"), 9001},          /* left-pointing angle bracket = bra, U+2329 ISOtech */
                    192: {TEXT("laquo"), 171},          /* left-pointing double angle quotation mark = */
                    193:                         /* left pointing guillemet, U+00AB ISOnum */
                    194: {TEXT("larr"), 8592},          /* leftwards arrow, U+2190 ISOnum */
                    195: {TEXT("lceil"), 8968}, /* left ceiling = apl upstile, U+2308 ISOamsc */
                    196: {TEXT("ldquo"), 8220}, /* left double quotation mark, U+201C ISOnum */
                    197: {TEXT("le"), 8804},            /* less-than or equal to, U+2264 ISOtech */
                    198: {TEXT("lfloor"), 8970},        /* left floor = apl downstile, U+230A ISOamsc */
                    199: {TEXT("lowast"), 8727},        /* asterisk operator, U+2217 ISOtech */
                    200: {TEXT("loz"), 9674},           /* lozenge, U+25CA ISOpub */
                    201: {TEXT("lrm"), 8206},           /* left-to-right mark, U+200E NEW RFC 2070 */
                    202: {TEXT("lsaquo"), 8249},        /* single left-pointing angle quotation mark, */
                    203:                         /* U+2039 ISO proposed */
                    204: {TEXT("lsquo"), 8216}, /* left single quotation mark, U+2018 ISOnum */
                    205: {TEXT("lt"), 60},              /* less-than sign, U+003C ISOnum */
                    206: {TEXT("macr"), 175},           /* macron = spacing macron = overline = APL overbar, */
                    207:                         /* U+00AF ISOdia */
                    208: {TEXT("mdash"), 8212}, /* em dash, U+2014 ISOpub */
                    209: {TEXT("micro"), 181},          /* micro sign, U+00B5 ISOnum */
                    210: {TEXT("middot"), 183}, /* middle dot = Georgian comma = */
                    211:                         /* Greek middle dot, U+00B7 ISOnum */
                    212: {TEXT("minus"), 8722}, /* minus sign, U+2212 ISOtech */
                    213: {TEXT("mu"), 956},             /* greek small letter mu, U+03BC ISOgrk3 */
                    214: {TEXT("nabla"), 8711}, /* nabla = backward difference, U+2207 ISOtech */
                    215: {TEXT("nbsp"), 160},           /* no-break space = non-breaking space, U+00A0 ISOnum */
                    216: {TEXT("ndash"), 8211}, /* en dash, U+2013 ISOpub */
                    217: {TEXT("ne"), 8800},            /* not equal to, U+2260 ISOtech */
                    218: {TEXT("ni"), 8715},            /* contains as member, U+220B ISOtech */
                    219: {TEXT("not"), 172},            /* not sign, U+00AC ISOnum */
                    220: {TEXT("notin"), 8713}, /* not an element of, U+2209 ISOtech */
                    221: {TEXT("nsub"), 8836},          /* not a subset of, U+2284 ISOamsn */
                    222: {TEXT("ntilde"), 241}, /* latin small letter n with tilde, U+00F1 ISOlat1 */
                    223: {TEXT("nu"), 957},             /* greek small letter nu, U+03BD ISOgrk3 */
                    224: {TEXT("oacute"), 243}, /* latin small letter o with acute, U+00F3 ISOlat1 */
                    225: {TEXT("ocirc"), 244},          /* latin small letter o with circumflex, U+00F4 ISOlat1 */
                    226: {TEXT("oelig"), 339},          /* latin small ligature oe, U+0153 ISOlat2 */
                    227: {TEXT("ograve"), 242}, /* latin small letter o with grave, U+00F2 ISOlat1 */
                    228: {TEXT("oline"), 8254}, /* overline = spacing overscore, U+203E NEW */
                    229: {TEXT("omega"), 969},          /* greek small letter omega, U+03C9 ISOgrk3 */
                    230: {TEXT("omicron"), 959},        /* greek small letter omicron, U+03BF NEW */
                    231: {TEXT("oplus"), 8853}, /* circled plus = direct sum, U+2295 ISOamsb */
                    232: {TEXT("or"), 8744},            /* logical or = vee, U+2228 ISOtech */
                    233: {TEXT("ordf"), 170},           /* feminine ordinal indicator, U+00AA ISOnum */
                    234: {TEXT("ordm"), 186},           /* masculine ordinal indicator, U+00BA ISOnum */
                    235: {TEXT("oslash"), 248}, /* latin small letter o with stroke, = */
                    236:                         /* latin small letter o slash, U+00F8 ISOlat1 */
                    237: {TEXT("otilde"), 245}, /* latin small letter o with tilde, U+00F5 ISOlat1 */
                    238: {TEXT("otimes"), 8855},        /* circled times = vector product, U+2297 ISOamsb */
                    239: {TEXT("ouml"), 246},           /* latin small letter o with diaeresis, U+00F6 ISOlat1 */
                    240: {TEXT("para"), 182},           /* pilcrow sign = paragraph sign, U+00B6 ISOnum */
                    241: {TEXT("part"), 8706},          /* partial differential, U+2202 ISOtech */
                    242: {TEXT("permil"), 8240},        /* per mille sign, U+2030 ISOtech */
                    243: {TEXT("perp"), 8869},          /* up tack = orthogonal to = perpendicular, U+22A5 ISOtech */
                    244: {TEXT("phi"), 966},            /* greek small letter phi, U+03C6 ISOgrk3 */
                    245: {TEXT("pi"), 960},             /* greek small letter pi, U+03C0 ISOgrk3 */
                    246: {TEXT("piv"), 982},            /* greek pi symbol, U+03D6 ISOgrk3 */
                    247: {TEXT("plusmn"), 177}, /* plus-minus sign = plus-or-minus sign, U+00B1 ISOnum */
                    248: {TEXT("pound"), 163},          /* pound sign, U+00A3 ISOnum */
                    249: {TEXT("prime"), 8242}, /* prime = minutes = feet, U+2032 ISOtech */
                    250: {TEXT("prod"), 8719},          /* n-ary product = product sign, U+220F ISOamsb */
                    251: {TEXT("prop"), 8733},          /* proportional to, U+221D ISOtech */
                    252: {TEXT("psi"), 968},            /* greek small letter psi, U+03C8 ISOgrk3 */
                    253: {TEXT("quot"), 34},            /* quotation mark = APL quote, U+0022 ISOnum */
                    254: {TEXT("rArr"), 8658},          /* rightwards double arrow, U+21D2 ISOtech */
                    255: {TEXT("radic"), 8730}, /* square root = radical sign, U+221A ISOtech */
                    256: {TEXT("rang"), 9002},          /* right-pointing angle bracket = ket, U+232A ISOtech */
                    257: {TEXT("raquo"), 187},          /* right-pointing double angle quotation mark = */
                    258:                         /* right pointing guillemet, U+00BB ISOnum */
                    259: {TEXT("rarr"), 8594},          /* rightwards arrow, U+2192 ISOnum */
                    260: {TEXT("rceil"), 8969}, /* right ceiling, U+2309 ISOamsc */
                    261: {TEXT("rdquo"), 8221}, /* right double quotation mark, U+201D ISOnum */
                    262: {TEXT("real"), 8476},          /* blackletter capital R = real part symbol, U+211C ISOamso */
                    263: {TEXT("reg"), 174},            /* registered sign = registered trade mark sign, */
                    264:                         /* U+00AE ISOnum */
                    265: {TEXT("rfloor"), 8971},        /* right floor, U+230B ISOamsc */
                    266: {TEXT("rho"), 961},     /* greek small letter rho, U+03C1 ISOgrk3 */
                    267: {TEXT("rlm"), 8207},           /* right-to-left mark, U+200F NEW RFC 2070 */
                    268: {TEXT("rsaquo"), 8250},        /* single right-pointing angle quotation mark, */
                    269:                         /* U+203A ISO proposed */
                    270: {TEXT("rsquo"), 8217}, /* right single quotation mark, U+2019 ISOnum */
                    271: {TEXT("sbquo"), 8218}, /* single low-9 quotation mark, U+201A NEW */
                    272: {TEXT("scaron"), 353}, /* latin small letter s with caron, U+0161 ISOlat2 */
                    273: {TEXT("sdot"), 8901},          /* dot operator, U+22C5 ISOamsb */
                    274: {TEXT("sect"), 167},           /* section sign, U+00A7 ISOnum */
                    275: {TEXT("shy"), 173},            /* soft hyphen = discretionary hyphen, U+00AD ISOnum */
                    276: {TEXT("sigma"), 963},          /* greek small letter sigma, U+03C3 ISOgrk3 */
                    277: {TEXT("sigmaf"), 962}, /* greek small letter final sigma, U+03C2 ISOgrk3 */
                    278: {TEXT("sim"), 8764},           /* tilde operator = varies with = similar to, U+223C ISOtech */
                    279: {TEXT("spades"), 9824},        /* black spade suit, U+2660 ISOpub */
                    280: {TEXT("sub"), 8834},           /* subset of, U+2282 ISOtech */
                    281: {TEXT("sube"), 8838},          /* subset of or equal to, U+2286 ISOtech */
                    282: {TEXT("sum"), 8721},           /* n-ary sumation, U+2211 ISOamsb */
                    283: {TEXT("sup"), 8835},           /* superset of, U+2283 ISOtech */
                    284: {TEXT("sup1"), 185},           /* superscript one = superscript digit one, U+00B9 ISOnum */
                    285: {TEXT("sup2"), 178},           /* superscript two = superscript digit two = squared, */
                    286:                         /* U+00B2 ISOnum */
                    287: {TEXT("sup3"), 179},           /* superscript three = superscript digit three = cubed, */
                    288:                         /* U+00B3 ISOnum */
                    289: {TEXT("supe"), 8839},          /* superset of or equal to, U+2287 ISOtech */
                    290: {TEXT("szlig"), 223},          /* latin small letter sharp s = ess-zed, U+00DF ISOlat1 */
                    291: {TEXT("tau"), 964},            /* greek small letter tau, U+03C4 ISOgrk3 */
                    292: {TEXT("there4"), 8756},        /* therefore, U+2234 ISOtech */
                    293: {TEXT("theta"), 952},          /* greek small letter theta, U+03B8 ISOgrk3 */
                    294: {TEXT("thetasym"), 977},/* greek small letter theta symbol, U+03D1 NEW */
                    295: {TEXT("thinsp"), 8201},        /* thin space, U+2009 ISOpub */
                    296: {TEXT("thorn"), 254},          /* latin small letter thorn with, U+00FE ISOlat1 */
                    297: {TEXT("tilde"), 732},          /* small tilde, U+02DC ISOdia */
                    298: {TEXT("times"), 215},          /* multiplication sign, U+00D7 ISOnum */
                    299: {TEXT("trade"), 8482}, /* trade mark sign, U+2122 ISOnum */
                    300: {TEXT("uArr"), 8657},          /* upwards double arrow, U+21D1 ISOamsa */
                    301: {TEXT("uacute"), 250}, /* latin small letter u with acute, U+00FA ISOlat1 */
                    302: {TEXT("uarr"), 8593},          /* upwards arrow, U+2191 ISOnum*/
                    303: {TEXT("ucirc"), 251},          /* latin small letter u with circumflex, U+00FB ISOlat1 */
                    304: {TEXT("ugrave"), 249}, /* latin small letter u with grave, U+00F9 ISOlat1 */
                    305: {TEXT("uml"), 168},            /* diaeresis = spacing diaeresis, U+00A8 ISOdia */
                    306: {TEXT("upsih"), 978},          /* greek upsilon with hook symbol, U+03D2 NEW */
                    307: {TEXT("upsilon"), 965},        /* greek small letter upsilon, U+03C5 ISOgrk3 */
                    308: {TEXT("uuml"), 252},           /* latin small letter u with diaeresis, U+00FC ISOlat1 */
                    309: {TEXT("weierp"), 8472}, /* script capital P = power set = Weierstrass p, */
                    310:                         /* U+2118 ISOamso */
                    311: {TEXT("xi"), 958},             /* greek small letter xi, U+03BE ISOgrk3 */
                    312: {TEXT("yacute"), 253}, /* latin small letter y with acute, U+00FD ISOlat1 */
                    313: {TEXT("yen"), 165},            /* yen sign = yuan sign, U+00A5 ISOnum */
                    314: {TEXT("yuml"), 255},           /* latin small letter y with diaeresis, U+00FF ISOlat1 */
                    315: {TEXT("zeta"), 950},           /* greek small letter zeta, U+03B6 ISOgrk3 */
                    316: {TEXT("zwj"), 8205},           /* zero width joiner, U+200D NEW RFC 2070 */
                    317: {TEXT("zwnj"), 8204},          /* zero width non-joiner, U+200C NEW RFC 2070 */
                    318: {TEXT("zzzz"), 0}                      /* this last entry is required */
                    319: };
                    320: 
1.14      cvs       321: /* tables defined in fetchHTMLname.c */
                    322: extern AttributeMapping XhtmlAttributeMappingTable[];
                    323: /* Mapping table of HTML attribute values */
                    324: AttrValueMapping XhtmlAttrValueMappingTable[] =
1.1       cvs       325: {
                    326:    {HTML_ATTR_dir, TEXT("ltr"), HTML_ATTR_dir_VAL_ltr},
                    327:    {HTML_ATTR_dir, TEXT("rtl"), HTML_ATTR_dir_VAL_rtl},
                    328: 
                    329:    {HTML_ATTR_TextAlign, TEXT("left"), HTML_ATTR_TextAlign_VAL_left_},
                    330:    {HTML_ATTR_TextAlign, TEXT("center"), HTML_ATTR_TextAlign_VAL_center_},
                    331:    {HTML_ATTR_TextAlign, TEXT("right"), HTML_ATTR_TextAlign_VAL_right_},
                    332:    {HTML_ATTR_TextAlign, TEXT("justify"), HTML_ATTR_TextAlign_VAL_justify_},
                    333: 
                    334:    {HTML_ATTR_Align, TEXT("left"), HTML_ATTR_Align_VAL_left_},
                    335:    {HTML_ATTR_Align, TEXT("center"), HTML_ATTR_Align_VAL_center_},
                    336:    {HTML_ATTR_Align, TEXT("right"), HTML_ATTR_Align_VAL_right_},
                    337: 
                    338:    {HTML_ATTR_LAlign, TEXT("top"), HTML_ATTR_LAlign_VAL_Top_},
                    339:    {HTML_ATTR_LAlign, TEXT("bottom"), HTML_ATTR_LAlign_VAL_Bottom_},
                    340:    {HTML_ATTR_LAlign, TEXT("left"), HTML_ATTR_LAlign_VAL_Left_},
                    341:    {HTML_ATTR_LAlign, TEXT("right"), HTML_ATTR_LAlign_VAL_Right_},
                    342: 
                    343:    {HTML_ATTR_Clear, TEXT("left"), HTML_ATTR_Clear_VAL_Left_},
                    344:    {HTML_ATTR_Clear, TEXT("right"), HTML_ATTR_Clear_VAL_Right_},
                    345:    {HTML_ATTR_Clear, TEXT("all"), HTML_ATTR_Clear_VAL_All_},
                    346:    {HTML_ATTR_Clear, TEXT("none"), HTML_ATTR_Clear_VAL_None_},
                    347: 
                    348:    {HTML_ATTR_NumberStyle, TEXT("1"), HTML_ATTR_NumberStyle_VAL_Arabic_},
                    349:    {HTML_ATTR_NumberStyle, TEXT("a"), HTML_ATTR_NumberStyle_VAL_LowerAlpha},
                    350:    {HTML_ATTR_NumberStyle, TEXT("A"), HTML_ATTR_NumberStyle_VAL_UpperAlpha},
                    351:    {HTML_ATTR_NumberStyle, TEXT("i"), HTML_ATTR_NumberStyle_VAL_LowerRoman},
                    352:    {HTML_ATTR_NumberStyle, TEXT("I"), HTML_ATTR_NumberStyle_VAL_UpperRoman},
                    353: 
                    354:    {HTML_ATTR_BulletStyle, TEXT("disc"), HTML_ATTR_BulletStyle_VAL_disc},
                    355:    {HTML_ATTR_BulletStyle, TEXT("square"), HTML_ATTR_BulletStyle_VAL_square},
                    356:    {HTML_ATTR_BulletStyle, TEXT("circle"), HTML_ATTR_BulletStyle_VAL_circle},
                    357: 
                    358:    {HTML_ATTR_ItemStyle, TEXT("1"), HTML_ATTR_ItemStyle_VAL_Arabic_},
                    359:    {HTML_ATTR_ItemStyle, TEXT("a"), HTML_ATTR_ItemStyle_VAL_LowerAlpha},
                    360:    {HTML_ATTR_ItemStyle, TEXT("A"), HTML_ATTR_ItemStyle_VAL_UpperAlpha},
                    361:    {HTML_ATTR_ItemStyle, TEXT("i"), HTML_ATTR_ItemStyle_VAL_LowerRoman},
                    362:    {HTML_ATTR_ItemStyle, TEXT("I"), HTML_ATTR_ItemStyle_VAL_UpperRoman},
                    363:    {HTML_ATTR_ItemStyle, TEXT("disc"), HTML_ATTR_ItemStyle_VAL_disc},
                    364:    {HTML_ATTR_ItemStyle, TEXT("square"), HTML_ATTR_ItemStyle_VAL_square},
                    365:    {HTML_ATTR_ItemStyle, TEXT("circle"), HTML_ATTR_ItemStyle_VAL_circle},
                    366: 
                    367:    {HTML_ATTR_Button_type, TEXT("button"), HTML_ATTR_Button_type_VAL_button},
                    368:    {HTML_ATTR_Button_type, TEXT("submit"), HTML_ATTR_Button_type_VAL_submit},
                    369:    {HTML_ATTR_Button_type, TEXT("reset"), HTML_ATTR_Button_type_VAL_reset},
                    370: 
                    371:    {HTML_ATTR_frame, TEXT("void"), HTML_ATTR_frame_VAL_void},
                    372:    {HTML_ATTR_frame, TEXT("above"), HTML_ATTR_frame_VAL_above},
                    373:    {HTML_ATTR_frame, TEXT("below"), HTML_ATTR_frame_VAL_below},
                    374:    {HTML_ATTR_frame, TEXT("hsides"), HTML_ATTR_frame_VAL_hsides},
                    375:    {HTML_ATTR_frame, TEXT("lhs"), HTML_ATTR_frame_VAL_lhs},
                    376:    {HTML_ATTR_frame, TEXT("rhs"), HTML_ATTR_frame_VAL_rhs},
                    377:    {HTML_ATTR_frame, TEXT("vsides"), HTML_ATTR_frame_VAL_vsides},
                    378:    {HTML_ATTR_frame, TEXT("box"), HTML_ATTR_frame_VAL_box},
                    379:    {HTML_ATTR_frame, TEXT("border"), HTML_ATTR_frame_VAL_border},
                    380: 
                    381:    {HTML_ATTR_frameborder, TEXT("0"), HTML_ATTR_frameborder_VAL_Border0},
                    382:    {HTML_ATTR_frameborder, TEXT("1"), HTML_ATTR_frameborder_VAL_Border1},
                    383: 
                    384:    {HTML_ATTR_scrolling, TEXT("yes"), HTML_ATTR_scrolling_VAL_Yes_},
                    385:    {HTML_ATTR_scrolling, TEXT("no"), HTML_ATTR_scrolling_VAL_No_},
                    386:    {HTML_ATTR_scrolling, TEXT("auto"), HTML_ATTR_scrolling_VAL_auto_},
                    387: 
                    388:    {HTML_ATTR_rules_, TEXT("none"), HTML_ATTR_rules__VAL_none_},
                    389:    {HTML_ATTR_rules_, TEXT("groups"), HTML_ATTR_rules__VAL_groups},
                    390:    {HTML_ATTR_rules_, TEXT("rows"), HTML_ATTR_rules__VAL_rows},
                    391:    {HTML_ATTR_rules_, TEXT("cols"), HTML_ATTR_rules__VAL_cols},
                    392:    {HTML_ATTR_rules_, TEXT("all"), HTML_ATTR_rules__VAL_all},
                    393: 
                    394:    {HTML_ATTR_Cell_align, TEXT("left"), HTML_ATTR_Cell_align_VAL_Cell_left},
                    395:    {HTML_ATTR_Cell_align, TEXT("center"), HTML_ATTR_Cell_align_VAL_Cell_center},
                    396:    {HTML_ATTR_Cell_align, TEXT("right"), HTML_ATTR_Cell_align_VAL_Cell_right},
                    397:    {HTML_ATTR_Cell_align, TEXT("justify"), HTML_ATTR_Cell_align_VAL_Cell_justify},
                    398:    {HTML_ATTR_Cell_align, TEXT("char"), HTML_ATTR_Cell_align_VAL_Cell_char},
                    399: 
                    400:    {HTML_ATTR_Alignment, TEXT("top"), HTML_ATTR_Alignment_VAL_Top_},
                    401:    {HTML_ATTR_Alignment, TEXT("middle"), HTML_ATTR_Alignment_VAL_Middle_},
                    402:    {HTML_ATTR_Alignment, TEXT("bottom"), HTML_ATTR_Alignment_VAL_Bottom_},
                    403:    {HTML_ATTR_Alignment, TEXT("left"), HTML_ATTR_Alignment_VAL_Left_},
                    404:    {HTML_ATTR_Alignment, TEXT("right"), HTML_ATTR_Alignment_VAL_Right_},
                    405: 
                    406:    {HTML_ATTR_METHOD, TEXT("get"), HTML_ATTR_METHOD_VAL_Get_},
                    407:    {HTML_ATTR_METHOD, TEXT("post"), HTML_ATTR_METHOD_VAL_Post_},
                    408: 
                    409:    {HTML_ATTR_Position, TEXT("top"), HTML_ATTR_Position_VAL_Position_top},
                    410:    {HTML_ATTR_Position, TEXT("bottom"), HTML_ATTR_Position_VAL_Position_bottom},
                    411:    {HTML_ATTR_Position, TEXT("left"), HTML_ATTR_Position_VAL_Position_left},
                    412:    {HTML_ATTR_Position, TEXT("right"), HTML_ATTR_Position_VAL_Position_right},
                    413: 
                    414:    {HTML_ATTR_Row_valign, TEXT("top"), HTML_ATTR_Row_valign_VAL_Row_top},
                    415:    {HTML_ATTR_Row_valign, TEXT("middle"), HTML_ATTR_Row_valign_VAL_Row_middle},
                    416:    {HTML_ATTR_Row_valign, TEXT("bottom"), HTML_ATTR_Row_valign_VAL_Row_bottom},
                    417:    {HTML_ATTR_Row_valign, TEXT("baseline"), HTML_ATTR_Row_valign_VAL_Row_baseline},
                    418: 
                    419:    {HTML_ATTR_Cell_valign, TEXT("top"), HTML_ATTR_Cell_valign_VAL_Cell_top},
                    420:    {HTML_ATTR_Cell_valign, TEXT("middle"), HTML_ATTR_Cell_valign_VAL_Cell_middle},
                    421:    {HTML_ATTR_Cell_valign, TEXT("bottom"), HTML_ATTR_Cell_valign_VAL_Cell_bottom},
                    422:    {HTML_ATTR_Cell_valign, TEXT("baseline"), HTML_ATTR_Cell_valign_VAL_Cell_baseline},
                    423: 
                    424:    {HTML_ATTR_shape, TEXT("rect"), HTML_ATTR_shape_VAL_rectangle},
                    425:    {HTML_ATTR_shape, TEXT("circle"), HTML_ATTR_shape_VAL_circle},
                    426:    {HTML_ATTR_shape, TEXT("poly"), HTML_ATTR_shape_VAL_polygon},
                    427: 
                    428:    {HTML_ATTR_valuetype, TEXT("data"), HTML_ATTR_valuetype_VAL_data_},
                    429:    {HTML_ATTR_valuetype, TEXT("ref"), HTML_ATTR_valuetype_VAL_ref},
                    430:    {HTML_ATTR_valuetype, TEXT("object"), HTML_ATTR_valuetype_VAL_object_},
                    431: 
                    432: /* HTML attribute TYPE generates a Thot element */
                    433:    {DummyAttribute, TEXT("button"), HTML_EL_Button_Input},
                    434:    {DummyAttribute, TEXT("checkbox"), HTML_EL_Checkbox_Input},
                    435:    {DummyAttribute, TEXT("file"), HTML_EL_File_Input},
                    436:    {DummyAttribute, TEXT("hidden"), HTML_EL_Hidden_Input},
                    437:    {DummyAttribute, TEXT("image"), HTML_EL_PICTURE_UNIT},
                    438:    {DummyAttribute, TEXT("password"), HTML_EL_Password_Input},
                    439:    {DummyAttribute, TEXT("radio"), HTML_EL_Radio_Input},
                    440:    {DummyAttribute, TEXT("reset"), HTML_EL_Reset_Input},
                    441:    {DummyAttribute, TEXT("submit"), HTML_EL_Submit_Input},
                    442:    {DummyAttribute, TEXT("text"), HTML_EL_Text_Input},
                    443: 
                    444: /* The following declarations allow the parser to accept boolean attributes */
                    445: /* written "checked=CHECKED"), for instance */
                    446:    {HTML_ATTR_ISMAP, TEXT("ismap"), HTML_ATTR_ISMAP_VAL_Yes_},
                    447:    {HTML_ATTR_nohref, TEXT("nohref"), HTML_ATTR_nohref_VAL_Yes_},
                    448:    {HTML_ATTR_COMPACT, TEXT("compact"), HTML_ATTR_COMPACT_VAL_Yes_},
                    449:    {HTML_ATTR_Multiple, TEXT("multiple"), HTML_ATTR_Multiple_VAL_Yes_},
                    450:    {HTML_ATTR_Selected, TEXT("selected"), HTML_ATTR_Selected_VAL_Yes_},
                    451:    {HTML_ATTR_Checked, TEXT("checked"), HTML_ATTR_Checked_VAL_Yes_},
                    452:    {HTML_ATTR_No_wrap, TEXT("nowrap"), HTML_ATTR_No_wrap_VAL_no_wrap},
                    453:    {HTML_ATTR_NoShade, TEXT("noshade"), HTML_ATTR_NoShade_VAL_NoShade_},
                    454:    {HTML_ATTR_declare, TEXT("declare"), HTML_ATTR_declare_VAL_Yes_},
                    455:    {HTML_ATTR_defer, TEXT("defer"), HTML_ATTR_defer_VAL_Yes_},
                    456:    {HTML_ATTR_disabled, TEXT("disabled"), HTML_ATTR_disabled_VAL_Yes_},
                    457:    {HTML_ATTR_readonly, TEXT("readonly"), HTML_ATTR_readonly_VAL_Yes_},
                    458:    {HTML_ATTR_no_resize, TEXT("noresize"), HTML_ATTR_no_resize_VAL_Yes_},
                    459:    {0, TEXT(""), 0}                    /* Last entry. Mandatory */
                    460: };
1.6       cvs       461: 
                    462: 
                    463: /*----------------------------------------------------------------------
1.15      cvs       464:   ParseCharset:
1.6       cvs       465:   Parses the element HTTP-EQUIV and looks for the charset value.
                    466:   ----------------------------------------------------------------------*/
                    467: #ifdef __STDC__
1.15      cvs       468: void     ParseCharset (Element el, Document doc) 
1.6       cvs       469: #else  /* !__STDC__ */
1.15      cvs       470: void     ParseCharset (el, doc) 
1.6       cvs       471: Element  el;
                    472: Document doc;
                    473: #endif /* !__STDC__ */
                    474: {
1.15      cvs       475:    AttributeType attrType;
                    476:    Attribute     attr;
                    477:    SSchema       docSSchema;
                    478:    CHARSET       charset;
1.6       cvs       479:    CHAR_T       *text, *text2, *ptrText, *str;
                    480:    CHAR_T        charsetname[MAX_LENGTH];
1.15      cvs       481:    int           length;
1.6       cvs       482:    int           pos, index = 0;
                    483: 
1.15      cvs       484:    charset = TtaGetDocumentCharset (doc);
                    485:    if (charset != UNDEFINED_CHARSET)
                    486:      /* the charset was already defined by the http header */
                    487:      return;
1.6       cvs       488: 
                    489:    docSSchema = TtaGetDocumentSSchema (doc);
                    490:    attrType.AttrSSchema = docSSchema;
                    491:    attrType.AttrTypeNum = HTML_ATTR_http_equiv;
                    492:    attr = TtaGetAttribute (el, attrType);
                    493:    if (attr != NULL)
                    494:      {
                    495:        /* There is a HTTP-EQUIV attribute */
                    496:        length = TtaGetTextAttributeLength (attr);
                    497:        if (length > 0)
                    498:         {
                    499:           text = TtaAllocString (length + 1);
                    500:           TtaGiveTextAttributeValue (attr, text, &length);
                    501:           if (!ustrcasecmp (text, TEXT("content-type")))
                    502:             {
                    503:               attrType.AttrTypeNum = HTML_ATTR_meta_content;
                    504:               attr = TtaGetAttribute (el, attrType);
                    505:               if (attr != NULL)
                    506:                 {
                    507:                   length = TtaGetTextAttributeLength (attr);
                    508:                   if (length > 0)
                    509:                     {
                    510:                       text2 = TtaAllocString (length + 1);
                    511:                       TtaGiveTextAttributeValue (attr, text2, &length);
                    512:                       ptrText = text2;
                    513:                       while (*ptrText)
                    514:                         {
                    515:                           *ptrText = utolower (*ptrText);
                    516:                           ptrText++;
                    517:                         }
                    518:                       
                    519:                       str = ustrstr (text2, TEXT("charset="));
                    520:                       if (str)
                    521:                         {
                    522:                           pos = str - text2 + 8;
                    523:                           while (text2[pos] != WC_SPACE &&
                    524:                                  text2[pos] != WC_TAB && text2[pos] != WC_EOS)
                    525:                             charsetname[index++] = text2[pos++];
                    526:                           charsetname[index] = WC_EOS;
1.15      cvs       527:                           charset = TtaGetCharset (charsetname);
                    528:                           if (charset != UNDEFINED_CHARSET)
                    529:                             TtaSetDocumentCharset (doc, charset);
1.6       cvs       530:                         }
                    531:                       TtaFreeMemory (text2);
                    532:                     }       
                    533:                 } 
                    534:             }
                    535:           TtaFreeMemory (text);
                    536:         }
                    537:      }
                    538: }
                    539: 
                    540: /*----------------------------------------------------------------------
                    541:   XhtmlElementComplete
                    542:   Complete XHTML elements.
                    543:   Check its attributes and its contents.
                    544:   ----------------------------------------------------------------------*/
                    545: #ifdef __STDC__
1.15      cvs       546: void       XhtmlElementComplete (Element el, Document doc, int *error)
1.6       cvs       547: #else
1.15      cvs       548: void       XhtmlElementComplete (el, doc, error)
                    549: Element    el;
                    550: Document   doc;
                    551: int       *error;
1.6       cvs       552: #endif
                    553: {
1.8       cvs       554: #ifdef EXPAT_PARSER
1.6       cvs       555:    ElementType         elType, newElType, childType;
                    556:    Element             constElem, child, desc, leaf, prev, next, last,
                    557:                       elFrames, lastFrame, lastChild;
                    558:    Attribute           attr;
                    559:    AttributeType       attrType;
                    560:    Language            lang;
                    561:    STRING              text;
                    562:    CHAR_T              lastChar[2];
                    563:    STRING              name1;
                    564:    int                 length;
                    565:    SSchema             docSSchema;
                    566: 
                    567:    *error = 0;
                    568:    docSSchema = TtaGetDocumentSSchema (doc);
                    569: 
                    570:    elType = TtaGetElementType (el);
                    571:    /* is this a block-level element in a character-level element? */
1.19    ! cvs       572:    if (!IsXMLElementInline (el) &&
        !           573:        elType.ElTypeNum != HTML_EL_Comment_)
1.6       cvs       574:        BlockInCharLevelElem (el);
                    575: 
                    576:    newElType.ElSSchema = elType.ElSSchema;
                    577:    switch (elType.ElTypeNum)
                    578:      {
                    579:      case HTML_EL_Object:      /*  it's an object */
                    580:        /* create Object_Content */
                    581:        child = TtaGetFirstChild (el);
                    582:        if (child != NULL)
                    583:           elType = TtaGetElementType (child);
                    584:         
                    585:        /* is it the PICTURE element ? */
                    586:        if (child == NULL || elType.ElTypeNum != HTML_EL_PICTURE_UNIT)
                    587:         {
                    588:           desc = child;
                    589:           /* create the PICTURE element */
                    590:           elType.ElTypeNum = HTML_EL_PICTURE_UNIT;
                    591:           child = TtaNewTree (doc, elType, "");
                    592:           if (desc == NULL)
                    593:               TtaInsertFirstChild (&child, el, doc);
                    594:           else
                    595:               TtaInsertSibling (child, desc, TRUE, doc);
                    596:         }
                    597: 
                    598:        /* copy attribute data into SRC attribute of Object_Image */
                    599:        attrType.AttrSSchema = docSSchema;
                    600:        attrType.AttrTypeNum = HTML_ATTR_data;
                    601:        attr = TtaGetAttribute (el, attrType);
                    602:        if (attr != NULL)
                    603:         {
                    604:           length = TtaGetTextAttributeLength (attr);
                    605:           if (length > 0)
                    606:             {
                    607:               name1 = TtaAllocString (length + 1);
                    608:               TtaGiveTextAttributeValue (attr, name1, &length);
                    609:               attrType.AttrTypeNum = HTML_ATTR_SRC;
                    610:               attr = TtaGetAttribute (child, attrType);
                    611:               if (attr == NULL)
                    612:                 {
                    613:                   attr = TtaNewAttribute (attrType);
                    614:                   TtaAttachAttribute (child, attr, doc);
                    615:                 }
                    616:               TtaSetAttributeText (attr, name1, child, doc);
                    617:               TtaFreeMemory (name1);
                    618:             }
                    619:         }
                    620: 
                    621:        /* is the Object_Content element already created ? */
                    622:        desc = child;
                    623:        TtaNextSibling(&desc);
                    624:        if (desc != NULL)
                    625:           elType = TtaGetElementType (desc);
                    626:         
                    627:        /* is it the Object_Content element ? */
                    628:        if (desc == NULL || elType.ElTypeNum != HTML_EL_Object_Content)
                    629:         {
                    630:           /* create Object_Content */
                    631:           elType.ElTypeNum = HTML_EL_Object_Content;
                    632:           desc = TtaNewTree (doc, elType, "");
                    633:           TtaInsertSibling (desc, child, FALSE, doc);
                    634:           /* move previous existing children into Object_Content */
                    635:           child = TtaGetLastChild(el);
                    636:           while (child != desc)
                    637:             {
                    638:               TtaRemoveTree (child, doc);
                    639:               TtaInsertFirstChild (&child, desc, doc);
                    640:               child = TtaGetLastChild(el);
                    641:             }
                    642:         }
                    643:        break;
                    644:        
                    645:      case HTML_EL_Unnumbered_List:
                    646:      case HTML_EL_Numbered_List:
                    647:      case HTML_EL_Menu:
                    648:      case HTML_EL_Directory:
                    649:        /* It's a List element. It should only have List_Item children.
                    650:          If it has List element chidren, move these List elements
                    651:          within their previous List_Item sibling.  This is to fix
                    652:          a bug in document generated by Mozilla. */
                    653:        prev = NULL;
                    654:        next = NULL;
                    655:        child = TtaGetFirstChild (el);
                    656:        while (child != NULL)
                    657:         {
                    658:           next = child;
                    659:           TtaNextSibling (&next);
                    660:           elType = TtaGetElementType (child);
                    661:           if (elType.ElTypeNum == HTML_EL_Unnumbered_List ||
                    662:               elType.ElTypeNum == HTML_EL_Numbered_List ||
                    663:               elType.ElTypeNum == HTML_EL_Menu ||
                    664:               elType.ElTypeNum == HTML_EL_Directory)
                    665:             /* this list element is a child of another list element */
                    666:             if (prev)
                    667:               {
                    668:                 elType = TtaGetElementType (prev);
                    669:                 if (elType.ElTypeNum == HTML_EL_List_Item)
                    670:                   {
                    671:                     /* get the last child of the previous List_Item */
                    672:                     desc = TtaGetFirstChild (prev);
                    673:                     last = NULL;
                    674:                     while (desc)
                    675:                       {
                    676:                         last = desc;
                    677:                         TtaNextSibling (&desc);
                    678:                       }
                    679:                     /* move the list element after the last child of the
                    680:                        previous List_Item */
                    681:                     TtaRemoveTree (child, doc);
                    682:                     if (last)
                    683:                       TtaInsertSibling (child, last, FALSE, doc);
                    684:                     else
                    685:                       TtaInsertFirstChild (&child, prev, doc);
                    686:                     child = prev;
                    687:                   }
                    688:               }
                    689:           prev = child;
                    690:           child = next;
                    691:         }
                    692:        break;
                    693:        
                    694:      case HTML_EL_FRAMESET:
                    695:        /* The FRAMESET element is now complete.  Gather all its FRAMESET
                    696:          and FRAME children and wrap them up in a Frames element */
                    697:        elFrames = NULL; lastFrame = NULL;
                    698:        lastChild = NULL;
                    699:        child = TtaGetFirstChild (el);
                    700:        while (child != NULL)
                    701:         {
                    702:           next = child;
                    703:           TtaNextSibling (&next);
                    704:           elType = TtaGetElementType (child);
                    705:           if (elType.ElTypeNum == HTML_EL_FRAMESET ||
                    706:               elType.ElTypeNum == HTML_EL_FRAME ||
                    707:               elType.ElTypeNum == HTML_EL_Comment_)
                    708:             {
                    709:               /* create the Frames element if it does not exist */
                    710:               if (elFrames == NULL)
                    711:                 {
                    712:                   newElType.ElSSchema = docSSchema;
                    713:                   newElType.ElTypeNum = HTML_EL_Frames;
                    714:                   elFrames = TtaNewElement (doc, newElType);
                    715:                   XmlSetElemLineNumber (elFrames);
                    716:                   TtaInsertSibling (elFrames, child, TRUE, doc);
                    717:                 }
                    718:               /* move the element as the last child of the Frames element */
                    719:               TtaRemoveTree (child, doc);
                    720:               if (lastFrame == NULL)
                    721:                 TtaInsertFirstChild (&child, elFrames, doc);
                    722:               else
                    723:                 TtaInsertSibling (child, lastFrame, FALSE, doc);
                    724:               lastFrame = child;
                    725:             }
                    726:           child = next;
                    727:         }
                    728:        break;
                    729:        
                    730:      case HTML_EL_Input:       /* it's an INPUT without any TYPE attribute */
                    731:        /* Create a child of type Text_Input */
                    732:        elType.ElTypeNum = HTML_EL_Text_Input;
                    733:        child = TtaNewTree (doc, elType, "");
                    734:        XmlSetElemLineNumber (child);
                    735:        TtaInsertFirstChild (&child, el, doc);
                    736:        /* now, process it like a Text_Input element */
                    737: 
                    738:      case HTML_EL_Text_Input:
                    739:      case HTML_EL_Password_Input:
                    740:      case HTML_EL_File_Input:
                    741:        /* get element Inserted_Text */
                    742:        child = TtaGetFirstChild (el);
                    743:        if (child != NULL)
                    744:         {
                    745:           attrType.AttrSSchema = docSSchema;
                    746:           attrType.AttrTypeNum = HTML_ATTR_Value_;
                    747:           attr = TtaGetAttribute (el, attrType);
                    748:           if (attr != NULL)
                    749:             {
                    750:               /* copy the value of attribute "value" into the first text
                    751:                  leaf of element */
                    752:               length = TtaGetTextAttributeLength (attr);
                    753:               if (length > 0)
                    754:                 {
                    755:                   /* get the text leaf */
                    756:                   leaf = TtaGetFirstChild (child);
                    757:                   if (leaf != NULL)
                    758:                     {
                    759:                       childType = TtaGetElementType (leaf);
                    760:                       if (childType.ElTypeNum == HTML_EL_TEXT_UNIT)
                    761:                         {
                    762:                           /* copy attribute value into the text leaf */
                    763:                           text = TtaAllocString (length + 1);
                    764:                           TtaGiveTextAttributeValue (attr, text, &length);
                    765:                           TtaSetTextContent (leaf, text, 
                    766:                                              TtaGetDefaultLanguage (), doc);
                    767:                           TtaFreeMemory (text);
                    768:                         }
                    769:                     }
                    770:                 }
                    771:             }
                    772:         }
                    773:        break;
                    774:        
                    775:      case HTML_EL_META:
1.15      cvs       776:        ParseCharset (el, doc);
1.6       cvs       777:        break;
                    778: 
                    779:      case HTML_EL_STYLE_:      /* it's a STYLE element */
1.8       cvs       780:      case HTML_EL_SCRIPT:      /* it's a SCRIPT element */
1.6       cvs       781:      case HTML_EL_Preformatted:        /* it's a PRE */
                    782:        /* if the last line of the Preformatted is empty, remove it */
                    783:        leaf = XmlLastLeafInElement (el);
                    784:        if (leaf != NULL)
                    785:         {
                    786:           elType = TtaGetElementType (leaf);
                    787:           if (elType.ElTypeNum == HTML_EL_TEXT_UNIT)
                    788:             /* the last leaf is a TEXT element */
                    789:             {
                    790:               length = TtaGetTextLength (leaf);
                    791:               if (length > 0)
                    792:                 {
                    793:                   TtaGiveSubString (leaf, lastChar, length, 1);
                    794:                   if (lastChar[0] == EOL)
                    795:                     /* last character is new line, delete it */
                    796:                     {
                    797:                       if (length == 1)
                    798:                         /* empty TEXT element */
                    799:                         TtaDeleteTree (leaf, doc);
                    800:                       else
                    801:                         /* remove the last character */
                    802:                         TtaDeleteTextContent (leaf, length, 1, doc);
                    803:                     }
                    804:                 }
                    805:             }
                    806:         }
                    807:        if (IsParsingCSS ())
                    808:         {
                    809:           text = GetStyleContents (el);
                    810:           if (text)
                    811:             {
                    812:               ReadCSSRules (doc, NULL, text, FALSE);
                    813:               TtaFreeMemory (text);
                    814:             }
                    815:           SetParsingCSS (FALSE);
                    816:         }
                    817:        /* and continue as if it were a Preformatted or a Script */
                    818:        break;
                    819:        
                    820:      case HTML_EL_Text_Area:   /* it's a Text_Area */
                    821:        SetParsingTextArea (FALSE);
                    822:        child = TtaGetFirstChild (el);
                    823:        if (child == NULL)
                    824:         /* it's an empty Text_Area */
                    825:         /* insert a Inserted_Text element in the element */
                    826:         {
                    827:           newElType.ElTypeNum = HTML_EL_Inserted_Text;
                    828:           child = TtaNewTree (doc, newElType, "");
                    829:           TtaInsertFirstChild (&child, el, doc);
                    830:         }
                    831:        else
                    832:         {
                    833:           /* save the text into Default_Value attribute */
                    834:           attrType.AttrSSchema = docSSchema;
                    835:           attrType.AttrTypeNum = HTML_ATTR_Default_Value;
                    836:           if (TtaGetAttribute (el, attrType) == NULL)
                    837:             /* attribute Default_Value is missing */
                    838:             {
                    839:               attr = TtaNewAttribute (attrType);
                    840:               TtaAttachAttribute (el, attr, doc);
                    841:               desc = TtaGetFirstChild (child);
                    842:               length = TtaGetTextLength (desc) + 1;
                    843:               text = TtaAllocString (length);
                    844:               TtaGiveTextContent (desc, text, &length, &lang);
                    845:               TtaSetAttributeText (attr, text, el, doc);
                    846:               TtaFreeMemory (text);
                    847:             }
                    848:         }
                    849:        /* insert a Frame element */
                    850:        newElType.ElTypeNum = HTML_EL_Frame;
                    851:        constElem = TtaNewTree (doc, newElType, "");
                    852:        TtaInsertSibling (constElem, child, FALSE, doc);
                    853:        break;
                    854:        
                    855:      case HTML_EL_Radio_Input:
                    856:      case HTML_EL_Checkbox_Input:
                    857:        /* put an attribute Checked if it is missing */
                    858:        attrType.AttrSSchema = docSSchema;
                    859:        attrType.AttrTypeNum = HTML_ATTR_Checked;
                    860:        if (TtaGetAttribute (el, attrType) == NULL)
                    861:         /* attribute Checked is missing */
                    862:         {
                    863:           attr = TtaNewAttribute (attrType);
                    864:           TtaAttachAttribute (el, attr, doc);
                    865:           TtaSetAttributeValue (attr, HTML_ATTR_Checked_VAL_No_, el, doc);
                    866:         }
                    867:        break;
                    868:        
                    869:      case HTML_EL_Option_Menu:
                    870:        /* Check that at least one option has a SELECTED attribute */
                    871:        OnlyOneOptionSelected (el, doc, TRUE);
                    872:        break;
                    873: 
                    874:      case HTML_EL_PICTURE_UNIT:
                    875:        break;
                    876:        
                    877:      case HTML_EL_LINK:
                    878:        CheckCSSLink (el, doc, docSSchema);
                    879:        break;
                    880:        
                    881:      case HTML_EL_Data_cell:
                    882:      case HTML_EL_Heading_cell:
                    883:        /* insert a pseudo paragraph into empty cells */
                    884:        child = TtaGetFirstChild (el);
                    885:        if (child == NULL)
                    886:         {
                    887:           elType.ElTypeNum = HTML_EL_Pseudo_paragraph;
                    888:           child = TtaNewTree (doc, elType, "");
                    889:           if (child != NULL)
                    890:               TtaInsertFirstChild (&child, el, doc);
                    891:         }
                    892:        
                    893:        /* detect whether we're parsing a whole table or just a cell */
                    894:        if (IsWithinTable ())
                    895:           NewCell (el, doc, FALSE);
                    896:        break;
                    897:        
                    898:      case HTML_EL_Table:
                    899:        CheckTable (el, doc);
                    900:        SubWithinTable ();
                    901:        break;
                    902:        
                    903:      case HTML_EL_TITLE:
                    904:        /* show the TITLE in the main window */
                    905:        UpdateTitle (el, doc);
                    906:        break;
                    907:        
                    908:      default:
                    909:        break;
                    910:      }
1.8       cvs       911: #endif /* EXPAT_PARSER */
1.6       cvs       912: }
1.1       cvs       913: 
                    914: /*----------------------------------------------------------------------
1.2       cvs       915:    XhtmlGetDTDName
                    916:    Return in DTDname the name of the DTD to be used for parsing the
                    917:    content of element named elementName.
                    918:    This element type appear with an 'X' in the ElemMappingTable.
1.1       cvs       919:   ----------------------------------------------------------------------*/
                    920: #ifdef __STDC__
1.2       cvs       921: void      XhtmlGetDTDName (STRING DTDname,
                    922:                           STRING elementName)
1.1       cvs       923: #else
1.2       cvs       924: void      XhtmlGetDTDName (DTDname,
                    925:                           elementName)
                    926: STRING     DTDname;
                    927: STRING     elementName;
                    928:  
1.1       cvs       929: #endif
                    930: {
1.8       cvs       931: #ifdef EXPAT_PARSER
1.2       cvs       932:    if (ustrcmp (elementName, TEXT("math")) == 0)
                    933:        ustrcpy (DTDname, TEXT("MathML"));
1.1       cvs       934:    else
1.2       cvs       935:        if (ustrcmp (elementName, TEXT("label")) == 0 ||
                    936:           ustrcmp (elementName, TEXT("text")) == 0)
                    937:           ustrcpy (DTDname, TEXT("HTML"));
                    938:        else
                    939:         ustrcpy (DTDname, TEXT(""));
1.17      cvs       940: #endif /* EXPAT_PARSER */
                    941: }
                    942: 
1.1       cvs       943: 
                    944: /*----------------------------------------------------------------------
1.16      cvs       945:    MapHTMLAttributeValue
1.2       cvs       946:    Search in the Attribute Value Mapping Table the entry for the attribute
                    947:    ThotAtt and its value AttrVal. Returns the corresponding Thot value.
1.1       cvs       948:   ----------------------------------------------------------------------*/
                    949: #ifdef __STDC__
1.16      cvs       950: void            MapHTMLAttributeValue (CHAR_T* AttrVal,
1.11      cvs       951:                                        AttributeType attrType,
                    952:                                        int* value)
1.1       cvs       953: #else
1.16      cvs       954: void            MapHTMLAttributeValue (AttrVal,
1.11      cvs       955:                                        attrType,
                    956:                                        value)
                    957: CHAR_T*         AttrVal;
                    958: AttributeType   attrType;
                    959: int*            value;
1.1       cvs       960: #endif
                    961: {
1.8       cvs       962: #ifdef EXPAT_PARSER
1.2       cvs       963:    int       i;
1.1       cvs       964: 
1.2       cvs       965:    *value = 0;
1.1       cvs       966:    i = 0;
                    967: 
1.2       cvs       968:    while (XhtmlAttrValueMappingTable[i].ThotAttr != attrType.AttrTypeNum &&
                    969:          XhtmlAttrValueMappingTable[i].ThotAttr != 0)
                    970:        i++;
                    971: 
                    972:    if (XhtmlAttrValueMappingTable[i].ThotAttr == attrType.AttrTypeNum)
                    973:      {
                    974:        do
1.18      cvs       975:            if (!ustrcmp (XhtmlAttrValueMappingTable[i].XMLattrValue, AttrVal))
1.2       cvs       976:               *value = XhtmlAttrValueMappingTable[i].ThotAttrValue;
1.1       cvs       977:           else 
1.2       cvs       978:               i++;
                    979:        while (*value <= 0 &&
                    980:              XhtmlAttrValueMappingTable[i].ThotAttr != 0);
1.1       cvs       981:      }
1.8       cvs       982: #endif /* EXPAT_PARSER */
1.1       cvs       983: }
                    984: 
                    985: /*---------------------------------------------------------------------------
1.2       cvs       986:    XhtmlMapEntity
1.1       cvs       987:    Search that entity in the entity table and return the corresponding value.
                    988:   ---------------------------------------------------------------------------*/
                    989: #ifdef __STDC__
1.13      cvs       990: void   XhtmlMapEntity (STRING entityName, int *entityValue, STRING alphabet)
1.1       cvs       991: #else
1.13      cvs       992: void   XhtmlMapEntity (entityName, entityValue, alphabet)
                    993: STRING  entityName;
                    994: int    *entityValue;
                    995: STRING  alphabet;
1.1       cvs       996: #endif
                    997: {
1.8       cvs       998: #ifdef EXPAT_PARSER
1.12      cvs       999:   int            i;
1.13      cvs      1000:   ThotBool       found;
1.1       cvs      1001: 
1.13      cvs      1002:   found = FALSE;
                   1003:   for (i = 0; XhtmlEntityTable[i].charCode >= 0 && ! found; i++)
                   1004:      found = !ustrcmp (XhtmlEntityTable[i].charName, entityName);
1.3       cvs      1005: 
1.13      cvs      1006:   if (found)
1.12      cvs      1007:     {
                   1008:       /* entity found */
1.13      cvs      1009:       i--;
1.12      cvs      1010:       *entityValue = XhtmlEntityTable[i].charCode;
                   1011:       *alphabet = 'L';
                   1012:     }
                   1013:   else
                   1014:     *alphabet = EOS;
1.8       cvs      1015: #endif /* EXPAT_PARSER */
1.11      cvs      1016: }
                   1017: 
1.13      cvs      1018: #ifdef EXPAT_PARSER
                   1019: /*----------------------------------------------------------------------
                   1020:   PutNonISOlatin1Char     
                   1021:   Put a Unicode character in the input buffer.
                   1022:   ----------------------------------------------------------------------*/
                   1023: #ifdef __STDC__
                   1024: static void    PutNonISOlatin1Char (int code, STRING prefix, STRING entityName, ParserData *context)
                   1025: #else
                   1026: static void    PutNonISOlatin1Char (code, prefix, entityName, context)
                   1027: int            code;
                   1028: STRING         prefix;
                   1029: STRING         entityName;
                   1030: ParserData    *context;
                   1031: #endif
                   1032: {
                   1033:    Language     lang, l;
                   1034:    ElementType  elType;
                   1035:    Element      elText;
                   1036:    AttributeType attrType;
                   1037:    Attribute    attr;
                   1038:    CHAR_T       buffer[MaxEntityLength+10];
                   1039: 
                   1040:    if (context->readingAnAttrValue)
                   1041:      /* this entity belongs to an attribute value */
                   1042:      {
                   1043:        /* Thot can't mix different languages in the same attribute value */
                   1044:        /* just discard that character */
                   1045:        ;
                   1046:      }
                   1047:    else
                   1048:      /* this entity belongs to the element contents */
                   1049:      {
                   1050:        /* create a new text leaf */
                   1051:        elType.ElSSchema = TtaGetDocumentSSchema (context->doc);
                   1052:        elType.ElTypeNum = HTML_EL_TEXT_UNIT;
                   1053:        elText = TtaNewElement (context->doc, elType);
                   1054:        XmlSetElemLineNumber (elText);
                   1055:        XhtmlInsertElement (&elText);
                   1056:        context->lastElement = elText;
                   1057:        context->lastElementClosed = FALSE;
                   1058:        context->lastElementClosed = TRUE;
                   1059: 
                   1060:        /* try to find a fallback character */
                   1061:        l = context->language;
                   1062:        GetFallbackCharacter (code, buffer, &lang);
                   1063: 
                   1064:        /* put that fallback character in the new text leaf */
                   1065:        TtaSetTextContent (elText, buffer, lang, context->doc);
                   1066:        context->language = l;
                   1067: 
                   1068:        /* make that text leaf read-only */
                   1069:        TtaSetAccessRight (elText, ReadOnly, context->doc);
                   1070: 
                   1071:        /* associate an attribute EntityName with the new text leaf */
                   1072:        attrType.AttrSSchema = TtaGetDocumentSSchema (context->doc);
                   1073:        attrType.AttrTypeNum = HTML_ATTR_EntityName;
                   1074:        attr = TtaNewAttribute (attrType);
                   1075:        TtaAttachAttribute (elText, attr, context->doc);
                   1076:        ustrcpy (buffer, prefix);
                   1077:        ustrcat (buffer, entityName);
                   1078:        TtaSetAttributeText (attr, buffer, elText, context->doc);
                   1079:        context->mergeText = FALSE;
                   1080:      }
                   1081: }
                   1082: #endif /* EXPAT_PARSER */
                   1083: 
1.11      cvs      1084: /*----------------------------------------------------------------------
                   1085:    XhtmlEntityCreated
                   1086:    A XTHML entity has been created by the XML parser.
                   1087:   ----------------------------------------------------------------------*/
                   1088: #ifdef __STDC__
1.13      cvs      1089: void        XhtmlEntityCreated (int entityVal, Language lang, STRING entityName, ParserData *context)
1.11      cvs      1090: #else
1.13      cvs      1091: void        XhtmlEntityCreated (entityVal, lang, entityName, context)
                   1092: int         entityVal;
                   1093: Language    lang;
                   1094: STRING      entityName;
                   1095: ParserData *context;
1.11      cvs      1096: #endif
                   1097: { 
1.12      cvs      1098: #ifdef EXPAT_PARSER
1.13      cvs      1099:   CHAR_T        buffer[2];
                   1100: 
1.11      cvs      1101:   if (lang < 0)
1.13      cvs      1102:     PutInXmlElement (entityName);
1.11      cvs      1103:   else
                   1104:     {
                   1105: #ifdef LC
                   1106:       printf (" \n code=%d", entityVal);
                   1107: #endif /* LC */
                   1108:       if (entityVal < 255)
1.13      cvs      1109:        {
                   1110:          buffer[0] = TEXT(entityVal);
                   1111:          buffer[1] = WC_EOS;
                   1112:          PutInXmlElement (buffer);
                   1113:        }
1.11      cvs      1114:       else
1.13      cvs      1115:        PutNonISOlatin1Char (entityVal, TEXT(""), entityName, context);
1.11      cvs      1116:     }
1.12      cvs      1117: #endif /* EXPAT_PARSER */
1.1       cvs      1118: }
                   1119: 
                   1120: /*--------------------  Entities  (end)  ---------------------*/

Webmaster