/* * * (c) COPYRIGHT INRIA and W3C, 1996-2007 * Please first read the full copyright statement in file COPYRIGHT. * */ /* * XHTMLbuilder.c * Builds the corresponding abstract tree for a Thot document of type HTML. * * Authors: L. Carcone * V. Quint */ #define THOT_EXPORT extern #include "amaya.h" #include "css.h" #include "parser.h" #include "HTML.h" #include "fetchHTMLname.h" #include "css_f.h" #include "EDITstyle_f.h" #include "fetchXMLname_f.h" #include "fetchHTMLname_f.h" #include "html2thot_f.h" #include "HTMLactions_f.h" #include "HTMLedit_f.h" #include "HTMLform_f.h" #include "HTMLimage_f.h" #include "HTMLtable_f.h" #include "HTMLimage_f.h" #include "init_f.h" #include "UIcss_f.h" #include "styleparser_f.h" #include "XHTMLbuilder_f.h" #include "Xml2thot_f.h" /* Elements that cannot contain text as immediate children. When some text is present in the HTML file it must be surrounded by a Pseudo_paragraph element */ static int NoTextChild[] = { HTML_EL_Document, HTML_EL_HTML, HTML_EL_HEAD, HTML_EL_BODY, HTML_EL_Definition_List, HTML_EL_Block_Quote, HTML_EL_Directory, HTML_EL_Form, HTML_EL_Menu, HTML_EL_FIELDSET, HTML_EL_Numbered_List, HTML_EL_Option_Menu, HTML_EL_Unnumbered_List, HTML_EL_Definition, HTML_EL_List_Item, HTML_EL_MAP, HTML_EL_map, HTML_EL_Applet, HTML_EL_Object, HTML_EL_IFRAME, HTML_EL_NOFRAMES, HTML_EL_Division, HTML_EL_Center, HTML_EL_NOSCRIPT, HTML_EL_Data_cell, HTML_EL_Heading_cell, HTML_EL_INS, HTML_EL_DEL, 0}; /* Define a pointer to let parser functions access the HTML entity table */ extern XmlEntity *pXhtmlEntityTable; /* maximum size of error messages */ #define MaxMsgLength 200 /*---------------------------------------------------------------------- ParseCharsetAndContentType: Parses the element HTTP-EQUIV and looks for the charset value. ----------------------------------------------------------------------*/ void ParseCharsetAndContentType (Element el, Document doc) { AttributeType attrType; Attribute attr; ElementType elType; CHARSET charset; char *text, *text2, *ptrText, *str; char charsetname[MAX_LENGTH]; int length; int pos, index = 0; charset = TtaGetDocumentCharset (doc); if (charset != UNDEFINED_CHARSET && DocumentMeta[doc] && DocumentMeta[doc]->content_type) return; elType = TtaGetElementType (el); attrType.AttrSSchema = elType.ElSSchema; attrType.AttrTypeNum = HTML_ATTR_http_equiv; attr = TtaGetAttribute (el, attrType); if (attr != NULL) { /* There is a HTTP-EQUIV attribute */ length = TtaGetTextAttributeLength (attr); if (length > 0) { text = (char *)TtaGetMemory (length + 1); TtaGiveTextAttributeValue (attr, text, &length); if (!strcasecmp (text, "content-type")) { attrType.AttrTypeNum = HTML_ATTR_meta_content; attr = TtaGetAttribute (el, attrType); if (attr != NULL) { length = TtaGetTextAttributeLength (attr); if (length > 0) { text2 = (char *)TtaGetMemory (length + 1); TtaGiveTextAttributeValue (attr, text2, &length); ptrText = text2; while (*ptrText) { *ptrText = tolower (*ptrText); ptrText++; } if (!DocumentMeta[doc]) DocumentMeta[doc] = DocumentMetaDataAlloc (); if (DocumentMeta[doc]->content_type == NULL) { if (!strncmp (text2, "text/html", 9)) DocumentMeta[doc]->content_type = TtaStrdup ("text/html"); else DocumentMeta[doc]->content_type = TtaStrdup (AM_XHTML_MIME_TYPE); } if (charset == UNDEFINED_CHARSET) { /* the charset is not already defined by the http header */ str = strstr (text2, "charset="); if (str) { pos = str - text2 + 8; while (text2[pos] != SPACE && text2[pos] != TAB && text2[pos] != EOS) charsetname[index++] = text2[pos++]; charsetname[index] = EOS; charset = TtaGetCharset (charsetname); if (charset != UNDEFINED_CHARSET) TtaSetDocumentCharset (doc, charset, FALSE); } } TtaFreeMemory (text2); } } } TtaFreeMemory (text); } } } #ifdef TEMPLATES /*---------------------------------------------------------------------- ----------------------------------------------------------------------*/ void ParseTemplateMeta (Element el, Document doc) { AttributeType attrType; Attribute attr; ElementType elType; char *text, *text2, *ptrText; int length; elType = TtaGetElementType (el); attrType.AttrSSchema = elType.ElSSchema; attrType.AttrTypeNum = HTML_ATTR_meta_name; attr = TtaGetAttribute (el, attrType); if (attr != NULL) { /* There is a name attribute */ length = TtaGetTextAttributeLength (attr); if (length > 0) { text = (char *)TtaGetMemory (length + 1); TtaGiveTextAttributeValue (attr, text, &length); if (!strcasecmp (text, "template")) { /* We are parsing the 'template' meta */ attrType.AttrTypeNum = HTML_ATTR_meta_content; attr = TtaGetAttribute (el, attrType); if (attr != NULL) { length = TtaGetTextAttributeLength (attr); if (length > 0) { text2 = (char *)TtaGetMemory (length + 1); TtaGiveTextAttributeValue (attr, text2, &length); ptrText = text2; /* Convert all char to lower case */ while (*ptrText) { *ptrText = tolower (*ptrText); ptrText++; } if (!DocumentMeta[doc]) DocumentMeta[doc] = DocumentMetaDataAlloc (); if (DocumentMeta[doc]->template_version == NULL) { DocumentMeta[doc]->template_version = TtaStrdup (text2); } TtaFreeMemory (text2); } } } TtaFreeMemory (text); } } } #endif /* TEMPLATES */ /*---------------------------------------------------------------------- XhtmlCannotContainText Return TRUE if element el is a block element. ----------------------------------------------------------------------*/ ThotBool XhtmlCannotContainText (ElementType elType) { int i; ThotBool ret; if (strcmp (TtaGetSSchemaName (elType.ElSSchema), "HTML")) /* not an HTML element */ ret = TRUE; else { ret = FALSE; i = 0; while (NoTextChild[i] > 0 && NoTextChild[i] != elType.ElTypeNum) i++; if (NoTextChild[i] == elType.ElTypeNum) ret = TRUE; } return ret; } /*---------------------------------------------------------------------- CheckMandatoryAttribute If attribute attrNum is not present on element el, generate a parsing error message. ----------------------------------------------------------------------*/ void CheckMandatoryAttribute (Element el, Document doc, int attrNum) { ElementType elType; Attribute attr; AttributeType attrType; int lineNum; char *name; char msgBuffer[MaxMsgLength]; elType = TtaGetElementType (el); attrType.AttrSSchema = elType.ElSSchema; attrType.AttrTypeNum = attrNum; attr = TtaGetAttribute (el, attrType); if (attr == NULL) { name = GetXMLAttributeName (attrType, elType, doc); if (name) { sprintf (msgBuffer, "Missing mandatory attribute %s for element %s", name, TtaGetElementTypeName(TtaGetElementType(el))); lineNum = TtaGetElementLineNumber(el); if (DocumentMeta[doc] && DocumentMeta[doc]->xmlformat) XmlParseError (errorParsing, (unsigned char *)msgBuffer, lineNum); else HTMLParseError (doc, msgBuffer, lineNum); } } } /*---------------------------------------------------------------------- AddRowsColumns Add default rows and columns attributes to a TEXTAREA element. ----------------------------------------------------------------------*/ void AddRowsColumns (Element el, Document doc) { ElementType elType; Attribute attr; AttributeType attrType; /* Add defaults rows and columns to display the textarea */ elType = TtaGetElementType (el); attrType.AttrSSchema = elType.ElSSchema; attrType.AttrTypeNum = HTML_ATTR_Rows; attr = TtaGetAttribute (el, attrType); if (attr == NULL) { attr = TtaNewAttribute (attrType); TtaAttachAttribute (el, attr, doc); TtaSetAttributeValue (attr, 4, el, doc); } attrType.AttrTypeNum = HTML_ATTR_Columns; attr = TtaGetAttribute (el, attrType); if (attr == NULL) { attr = TtaNewAttribute (attrType); TtaAttachAttribute (el, attr, doc); TtaSetAttributeValue (attr, 20, el, doc); } } /*---------------------------------------------------------------------- XhtmlElementComplete Complete Xhtml elements. Check its attributes and its contents. ----------------------------------------------------------------------*/ void XhtmlElementComplete (ParserData *context, Element el, int *error) { Document doc; ElementType elType, newElType, childType; Element child, desc, leaf, prev, next, last, elFrames, lastFrame, lastChild, parent, picture, content; Attribute attr; AttributeType attrType; SSchema htmlSchema; Language lang; char *text; char lastChar[2]; char *name1, *data; char msgBuffer[MaxMsgLength]; int lineNum, typenum, length; ThotBool isImage, isInline, clean; *error = 0; doc = context->doc; elType = TtaGetElementType (el); htmlSchema = elType.ElSSchema; isInline = IsXMLElementInline (elType, doc); newElType.ElSSchema = elType.ElSSchema; if (elType.ElTypeNum == HTML_EL_ins || elType.ElTypeNum == HTML_EL_del) { child = TtaGetFirstChild (el); if (IsBlockElement (child)) { // change the element type if (elType.ElTypeNum == HTML_EL_ins) TtaChangeTypeOfElement (el, doc, HTML_EL_INS); else TtaChangeTypeOfElement (el, doc, HTML_EL_DEL); isInline = FALSE; } } if (elType.ElTypeNum == HTML_EL_Paragraph || elType.ElTypeNum == HTML_EL_Address || elType.ElTypeNum == HTML_EL_H1 || elType.ElTypeNum == HTML_EL_H2 || elType.ElTypeNum == HTML_EL_H3 || elType.ElTypeNum == HTML_EL_H4 || elType.ElTypeNum == HTML_EL_H5 || elType.ElTypeNum == HTML_EL_H6 || elType.ElTypeNum == HTML_EL_Preformatted || elType.ElTypeNum == HTML_EL_Term || elType.ElTypeNum == HTML_EL_LEGEND || elType.ElTypeNum == HTML_EL_CAPTION || elType.ElTypeNum == HTML_EL_rb || elType.ElTypeNum == HTML_EL_rt || (isInline && !TtaIsLeaf (elType) && elType.ElTypeNum != HTML_EL_Text_Area)) /* It's an element that is supposed to contain at least a Basic_Elem. If it is empty, insert a Basic_Elem to allow the user to put the selection within this element */ /* Don't do it for a Text_Area, as an Inserted_Text element has to be created (see below) */ { child = TtaGetFirstChild (el); if (child == NULL) /* it's an empty inline element */ /* insert a Basic_Elem element in the element */ { newElType.ElTypeNum = HTML_EL_Basic_Elem; child = TtaNewTree (doc, newElType, ""); TtaInsertFirstChild (&child, el, doc); } } if (!isInline) /* It's a block-level element. Is it within a character-level element? */ if (elType.ElTypeNum != HTML_EL_Comment_ && elType.ElTypeNum != HTML_EL_ASP_element && elType.ElTypeNum != HTML_EL_XMLPI) BlockInCharLevelElem (el); typenum = elType.ElTypeNum; switch (typenum) { case HTML_EL_PICTURE_UNIT: /* Check the mandatory SRC attribute */ CheckMandatoryAttribute (el, doc, HTML_ATTR_SRC); break; case HTML_EL_Object: /* it's an object */ data = NULL; isImage = FALSE; /* is there a type attribute on the object element? */ attrType.AttrSSchema = elType.ElSSchema; attrType.AttrTypeNum = HTML_ATTR_Object_type; attr = TtaGetAttribute (el, attrType); if (attr) /* there is a type attribute. Get its value to see if the object represents an image */ { length = TtaGetTextAttributeLength (attr); if (length > 0) { name1 = (char *)TtaGetMemory (length + 1); TtaGiveTextAttributeValue (attr, name1, &length); if (!strcmp (name1, AM_MATHML_MIME_TYPE) || !strcmp (name1, "application/postscript") || !strcmp (name1, "image/x-bitmap") || !strcmp (name1, "image/x-xpixmap") || !strcmp (name1, "image/gif") || !strcmp (name1, "image/jpeg") || !strcmp (name1, "image/png") || !strcmp (name1, "image/svg") || !strcmp (name1, AM_SVG_MIME_TYPE) || !strcmp (name1, AM_XHTML_MIME_TYPE) || !strcmp (name1, "text/html") || !strcmp (name1, "text/htm") || !strcmp (name1, AM_GENERIC_XML_MIME_TYPE)) isImage = TRUE; TtaFreeMemory (name1); } } attrType.AttrTypeNum = HTML_ATTR_data; attr = TtaGetAttribute (el, attrType); if (attr) /* the object has a data attribute */ { length = TtaGetTextAttributeLength (attr); if (length > 0) { data = (char *)TtaGetMemory (length + 1); TtaGiveTextAttributeValue (attr, data, &length); if (!isImage && length >= 5) if (!strcmp (&data[length-4], ".mml") || !strcmp (&data[length-4], ".gif") || !strcmp (&data[length-4], ".jpg") || !strcmp (&data[length-5], ".jpeg") || !strcmp (&data[length-4], ".png") || !strcmp (&data[length-4], ".svg") || !strcmp (&data[length-5], ".svgz") || !strcmp (&data[length-4], ".htm") || !strcmp (&data[length-5], ".html") || !strcmp (&data[length-4], ".xml")) isImage = TRUE; } } else { attrType.AttrTypeNum = HTML_ATTR_classid; attr = TtaGetAttribute (el, attrType); if (attr) /* the object has a data attribute */ { length = TtaGetTextAttributeLength (attr); if (length > 0) { data = (char *)TtaGetMemory (length + 1); TtaGiveTextAttributeValue (attr, data, &length); if (!isImage && length >= 5) if (!strcmp (&data[length-4], ".mml") || !strcmp (&data[length-4], ".gif") || !strcmp (&data[length-4], ".jpg") || !strcmp (&data[length-5], ".jpeg") || !strcmp (&data[length-4], ".png") || !strcmp (&data[length-4], ".svg") || !strcmp (&data[length-5], ".svgz") || !strcmp (&data[length-4], ".htm") || !strcmp (&data[length-5], ".html") || !strcmp (&data[length-4], ".xml")) isImage = TRUE; } } } picture = NULL; /* no PICTURE element yet */ child = TtaGetFirstChild (el); if (isImage) { /* the object represents an image. We need a PICTURE element as child of the object to hold the image */ elType.ElTypeNum = HTML_EL_PICTURE_UNIT; picture = TtaNewTree (doc, elType, ""); if (child) TtaInsertSibling (picture, child, TRUE, doc); else TtaInsertFirstChild (&picture, el, doc); /* copy attribute data of the object into the SRC attribute of the PICTURE element */ if (data) /* the object has a data attribute */ { // remove extra spaces clean = FALSE; while (length > 0 && data[length-1] == SPACE) { data[length-1] = EOS; length--; clean = TRUE; } if (clean) TtaSetAttributeText (attr, data, el, doc); // copy the attribute in the picture element attrType.AttrTypeNum = HTML_ATTR_SRC; attr = TtaGetAttribute (picture, attrType); if (attr == NULL) { attr = TtaNewAttribute (attrType); TtaAttachAttribute (picture, attr, doc); } TtaSetAttributeText (attr, data, picture, doc); } attrType.AttrTypeNum = HTML_ATTR_Height_; attr = TtaGetAttribute (el, attrType); if (attr) /* the Object has a height attribute. Applies it to the picture element */ { length = TtaGetTextAttributeLength (attr); if (length > 0) { text = (char *)TtaGetMemory (length + 1); TtaGiveTextAttributeValue (attr, text, &length); /* create the corresponding attribute IntHeightPercent or */ /* IntHeightPxl */ CreateAttrHeightPercentPxl (text, el, doc, -1); TtaFreeMemory (text); } } attrType.AttrTypeNum = HTML_ATTR_Width__; attr = TtaGetAttribute (el, attrType); if (attr) /* the Object has a width attribute. Applies it to the picture element */ { length = TtaGetTextAttributeLength (attr); if (length > 0) { text = (char *)TtaGetMemory (length + 1); TtaGiveTextAttributeValue (attr, text, &length); /* create the corresponding attribute IntWidthPercent or */ /* IntWidthPxl */ CreateAttrWidthPercentPxl (text, el, doc, -1); TtaFreeMemory (text); } } } /* is the Object_Content element already created ? */ if (child) /* the object element has at least 1 child element */ { /* put an attribute NoObjects on the Object element: this attribute will be removed when and if the actual object is loaded (see module HTMLimage.c */ attrType.AttrTypeNum = HTML_ATTR_NoObjects; attr = TtaGetAttribute (el, attrType); if (!attr) { attr = TtaNewAttribute (attrType); TtaSetAttributeValue (attr, 1, el, doc); TtaAttachAttribute (el, attr, doc); } content = NULL; desc = child; elType = TtaGetElementType (desc); if (elType.ElTypeNum != HTML_EL_Object_Content) { TtaNextSibling(&desc); if (desc) elType = TtaGetElementType (desc); } /* is it the Object_Content element ? */ if (elType.ElTypeNum == HTML_EL_Object_Content) content = desc; else { /* create an Object_Content element */ elType.ElTypeNum = HTML_EL_Object_Content; content = TtaNewElement (doc, elType); if (picture) TtaInsertSibling (content, picture, FALSE, doc); else TtaInsertSibling (content, child, TRUE, doc); /* move previous existing children into Object_Content */ child = TtaGetLastChild(el); while (child != content) { TtaRemoveTree (child, doc); TtaInsertFirstChild (&child, content, doc); child = TtaGetLastChild(el); } } } TtaFreeMemory (data); break; case HTML_EL_Image_Input: case HTML_EL_IMG: /* Check the mandatory ALT attribute */ CheckMandatoryAttribute (el, doc, HTML_ATTR_ALT); /* Check the mandatory SRC attribute */ CheckMandatoryAttribute (el, doc, HTML_ATTR_SRC); /* We need a PICTURE element as child to hold the image */ picture = NULL; for (child = TtaGetFirstChild (el); child && !picture; TtaNextSibling (&child)) { childType = TtaGetElementType (child); if (childType.ElTypeNum == HTML_EL_PICTURE_UNIT && childType.ElSSchema == elType.ElSSchema) // the picture is already created picture = child; } if (picture == NULL) { childType.ElTypeNum = HTML_EL_PICTURE_UNIT; picture = TtaNewTree (doc, childType, ""); if (child) TtaInsertSibling (picture, child, TRUE, doc); else TtaInsertFirstChild (&picture, el, doc); } attrType.AttrSSchema = elType.ElSSchema; attrType.AttrTypeNum = HTML_ATTR_SRC; attr = TtaGetAttribute (el, attrType); if (attr) /* the img has a src attribute */ { length = TtaGetTextAttributeLength (attr); data = (char *)TtaGetMemory (length + 1); TtaGiveTextAttributeValue (attr, data, &length); if (data) { // remove extra spaces clean = FALSE; while (length > 0 && data[length-1] == SPACE) { data[length-1] = EOS; length--; clean = TRUE; } if (clean) TtaSetAttributeText (attr, data, el, doc); // copy the attribute in the picture element attr = TtaGetAttribute (picture, attrType); if (attr == NULL) { attr = TtaNewAttribute (attrType); TtaAttachAttribute (picture, attr, doc); } TtaSetAttributeText (attr, data, picture, doc); TtaFreeMemory (data); } } if (typenum == HTML_EL_IMG) { attrType.AttrTypeNum = HTML_ATTR_Height_; attr = TtaGetAttribute (el, attrType); if (attr) /* the img has a height attribute. Applies it to the picture element */ { length = TtaGetTextAttributeLength (attr); if (length > 0) { text = (char *)TtaGetMemory (length + 1); TtaGiveTextAttributeValue (attr, text, &length); /* create the corresponding attribute IntHeightPercent or */ /* IntHeightPxl */ CreateAttrHeightPercentPxl (text, el, doc, -1); TtaFreeMemory (text); } } attrType.AttrTypeNum = HTML_ATTR_Width__; attr = TtaGetAttribute (el, attrType); if (attr) /* the img has a width attribute. Applies it to the picture element */ { length = TtaGetTextAttributeLength (attr); if (length > 0) { text = (char *)TtaGetMemory (length + 1); TtaGiveTextAttributeValue (attr, text, &length); /* create the corresponding attribute IntWidthPercent or */ /* IntWidthPxl */ CreateAttrWidthPercentPxl (text, el, doc, -1); TtaFreeMemory (text); } } } break; case HTML_EL_Parameter: /* Check the mandatory name attribute */ CheckMandatoryAttribute (el, doc, HTML_ATTR_Param_name); break; case HTML_EL_IFRAME: /* it's an iframe */ child = TtaGetFirstChild (el); /* is the Iframe_Content element already created ? */ if (child) /* the iframe element has at least 1 child element */ { content = NULL; desc = child; elType = TtaGetElementType (desc); if (elType.ElTypeNum != HTML_EL_Iframe_Content) { TtaNextSibling(&desc); if (desc) elType = TtaGetElementType (desc); } /* is it the Iframe_Content element ? */ if (elType.ElTypeNum == HTML_EL_Iframe_Content) content = desc; else { /* create an Iframe_Content element */ elType.ElTypeNum = HTML_EL_Iframe_Content; content = TtaNewElement (doc, elType); TtaInsertSibling (content, child, TRUE, doc); /* move previous existing children into Iframe_Content */ child = TtaGetLastChild(el); while (child != content) { TtaRemoveTree (child, doc); TtaInsertFirstChild (&child, content, doc); child = TtaGetLastChild(el); } } } break; case HTML_EL_Unnumbered_List: case HTML_EL_Numbered_List: case HTML_EL_Menu: case HTML_EL_Directory: /* It's a List element. It should only have List_Item children. If it has List element chidren, move these List elements within their previous List_Item sibling. This is to fix a bug in document generated by Mozilla. */ prev = NULL; next = NULL; child = TtaGetFirstChild (el); while (child != NULL) { next = child; TtaNextSibling (&next); elType = TtaGetElementType (child); if (elType.ElTypeNum == HTML_EL_Unnumbered_List || elType.ElTypeNum == HTML_EL_Numbered_List || elType.ElTypeNum == HTML_EL_Menu || elType.ElTypeNum == HTML_EL_Directory) /* this list element is a child of another list element */ if (prev) { elType = TtaGetElementType (prev); if (elType.ElTypeNum == HTML_EL_List_Item) { /* get the last child of the previous List_Item */ desc = TtaGetFirstChild (prev); last = NULL; while (desc) { last = desc; TtaNextSibling (&desc); } /* move the list element after the last child of the previous List_Item */ TtaRemoveTree (child, doc); if (last) TtaInsertSibling (child, last, FALSE, doc); else TtaInsertFirstChild (&child, prev, doc); child = prev; } } prev = child; child = next; } break; case HTML_EL_FRAMESET: /* The FRAMESET element is now complete. Gather all its FRAMESET and FRAME children and wrap them up in a Frames element */ elFrames = NULL; lastFrame = NULL; lastChild = NULL; child = TtaGetFirstChild (el); while (child != NULL) { next = child; TtaNextSibling (&next); elType = TtaGetElementType (child); if (elType.ElTypeNum == HTML_EL_FRAMESET || elType.ElTypeNum == HTML_EL_FRAME || elType.ElTypeNum == HTML_EL_Comment_|| elType.ElTypeNum == HTML_EL_ASP_element) { /* create the Frames element if it does not exist */ if (elFrames == NULL) { newElType.ElSSchema = htmlSchema; newElType.ElTypeNum = HTML_EL_Frames; elFrames = TtaNewElement (doc, newElType); if (DocumentMeta[doc]->xmlformat) XmlSetElemLineNumber (elFrames); else SetHtmlElemLineNumber (elFrames); TtaInsertSibling (elFrames, child, TRUE, doc); } /* move the element as the last child of the Frames element */ TtaRemoveTree (child, doc); if (lastFrame == NULL) TtaInsertFirstChild (&child, elFrames, doc); else TtaInsertSibling (child, lastFrame, FALSE, doc); lastFrame = child; } child = next; } break; case HTML_EL_Form: /* Check the mandatory action attribute */ CheckMandatoryAttribute (el, doc, HTML_ATTR_Script_URL); break; case HTML_EL_Input: /* it's an INPUT without any TYPE attribute */ /* Create a child of type Text_Input */ elType.ElTypeNum = HTML_EL_Text_Input; child = TtaNewTree (doc, elType, ""); if (DocumentMeta[doc]->xmlformat) XmlSetElemLineNumber (child); else SetHtmlElemLineNumber (child); TtaInsertFirstChild (&child, el, doc); /* now, process it like a Text_Input element */ case HTML_EL_Text_Input: case HTML_EL_Password_Input: case HTML_EL_File_Input: /* set default size */ attrType.AttrSSchema = elType.ElSSchema; attrType.AttrTypeNum = HTML_ATTR_IntAreaSize; attr = TtaGetAttribute (el, attrType); if (!attr) CreateAttrIntAreaSize (20, el, doc); /* get element Inserted_Text */ child = TtaGetFirstChild (el); if (child != NULL) { attrType.AttrTypeNum = HTML_ATTR_Value_; attr = TtaGetAttribute (el, attrType); if (attr != NULL) { /* copy the value of attribute "value" into the first text leaf of element */ length = TtaGetTextAttributeLength (attr); if (length > 0) { /* get the text leaf */ leaf = TtaGetFirstChild (child); if (leaf != NULL) { childType = TtaGetElementType (leaf); if (childType.ElTypeNum == HTML_EL_TEXT_UNIT) { /* copy attribute value into the text leaf */ text = (char *)TtaGetMemory (length + 1); TtaGiveTextAttributeValue (attr, text, &length); TtaSetTextContent (leaf, (unsigned char *)text, TtaGetDefaultLanguage (), doc); TtaFreeMemory (text); } } } } } break; case HTML_EL_META: ParseCharsetAndContentType (el, doc); /* Check the mandatory CONTENT attribute */ CheckMandatoryAttribute (el, doc, HTML_ATTR_meta_content); #ifdef TEMPLATES ParseTemplateMeta (el, doc); #endif /* TEMPLATES */ break; case HTML_EL_BASE: /* Check the mandatory HREF attribute */ CheckMandatoryAttribute (el, doc, HTML_ATTR_HREF_); break; case HTML_EL_BaseFont: /* Check the mandatory size attribute */ CheckMandatoryAttribute (el, doc, HTML_ATTR_BaseFontSize); break; case HTML_EL_BDO: /* Check the mandatory DIR attribute */ CheckMandatoryAttribute (el, doc, HTML_ATTR_dir); break; case HTML_EL_STYLE_: /* it's a STYLE element */ case HTML_EL_SCRIPT_: /* it's a SCRIPT element */ case HTML_EL_Preformatted: /* it's a PRE */ if (elType.ElTypeNum == HTML_EL_SCRIPT_) if (DocumentMeta[doc]->xmlformat) SetParsingScript (FALSE); else SetHtmlParsingScript (FALSE); /* if the last line of the Preformatted is empty, remove it */ leaf = XmlLastLeafInElement (el); if (leaf != NULL) { elType = TtaGetElementType (leaf); if (elType.ElTypeNum == HTML_EL_TEXT_UNIT) /* the last leaf is a TEXT element */ { length = TtaGetTextLength (leaf); if (length > 0) { TtaGiveSubString (leaf, (unsigned char *)lastChar, length, 1); if (lastChar[0] == EOL) /* last character is new line, delete it */ { if (length == 1) /* empty TEXT element */ TtaDeleteTree (leaf, doc); else /* remove the last character */ TtaDeleteTextContent (leaf, length, 1, doc); } } } } if (elType.ElTypeNum == HTML_EL_STYLE_) /* Check the mandatory TYPE attribute */ CheckMandatoryAttribute (el, doc, HTML_ATTR_Notation); else if (elType.ElTypeNum == HTML_EL_SCRIPT_) /* Check the mandatory TYPE attribute */ CheckMandatoryAttribute (el, doc, HTML_ATTR_content_type); if (DocumentMeta[doc] && DocumentMeta[doc]->xmlformat) { if (IsXmlParsingCSS ()) { text = GetStyleContents (el); if (text) { ReadCSSRules (doc, NULL, text, NULL, TtaGetElementLineNumber (el), FALSE, el); TtaFreeMemory (text); } SetXmlParsingCSS (FALSE); } } else { if (IsHtmlParsingCSS ()) { text = GetStyleContents (el); if (text) { ReadCSSRules (doc, NULL, text, NULL, TtaGetElementLineNumber (el), FALSE, el); TtaFreeMemory (text); } SetHtmlParsingCSS (FALSE); } } /* and continue as if it were a Preformatted or a Script */ break; case HTML_EL_Text_Area: /* it's a Text_Area */ if (DocumentMeta[doc]->xmlformat) SetParsingTextArea (FALSE); else SetHtmlParsingTextArea (FALSE); child = TtaGetFirstChild (el); if (child == NULL) /* it's an empty Text_Area */ /* insert a Inserted_Text element and a child Basic_Elem in the Text_Area element */ { newElType.ElTypeNum = HTML_EL_Inserted_Text; child = TtaNewTree (doc, newElType, ""); TtaInsertFirstChild (&child, el, doc); } else { /* save the text into Default_Value attribute */ attrType.AttrSSchema = htmlSchema; attrType.AttrTypeNum = HTML_ATTR_Default_Value; if (TtaGetAttribute (el, attrType) == NULL) /* attribute Default_Value is missing */ { desc = TtaGetFirstChild (child); if (desc) { length = TtaGetTextLength (desc); if (length > 0) { length++; attr = TtaNewAttribute (attrType); TtaAttachAttribute (el, attr, doc); text = (char *)TtaGetMemory (length); TtaGiveTextContent (desc, (unsigned char *)text, &length, &lang); TtaSetAttributeText (attr, text, el, doc); TtaFreeMemory (text); } } } } /* Check the mandatory rows attribute */ CheckMandatoryAttribute (el, doc, HTML_ATTR_Rows); /* Check the mandatory columns attribute */ CheckMandatoryAttribute (el, doc, HTML_ATTR_Columns); /* Add default rows and columns attributes */ AddRowsColumns (el, doc); break; case HTML_EL_Radio_Input: case HTML_EL_Checkbox_Input: /* put an attribute Checked if it is missing */ attrType.AttrSSchema = htmlSchema; attrType.AttrTypeNum = HTML_ATTR_Checked; if (TtaGetAttribute (el, attrType) == NULL) /* attribute Checked is missing */ { attr = TtaNewAttribute (attrType); TtaAttachAttribute (el, attr, doc); TtaSetAttributeValue (attr, HTML_ATTR_Checked_VAL_No_, el, doc); } break; case HTML_EL_Option_Menu: /* Check that at least one option has a SELECTED attribute */ OnlyOneOptionSelected (el, doc, TRUE); break; case HTML_EL_OptGroup: /* Check the mandatory label attribute */ CheckMandatoryAttribute (el, doc, HTML_ATTR_label); break; case HTML_EL_MAP: case HTML_EL_map: /* Check the mandatory attributes */ if (DocumentMeta[doc] && DocumentMeta[doc]->xmlformat) /* it's XHTML. Check attribute id */ CheckMandatoryAttribute (el, doc, HTML_ATTR_ID); else /* it's a HTML document. Check attribute name */ CheckMandatoryAttribute (el, doc, HTML_ATTR_NAME); break; case HTML_EL_AREA: /* Check the mandatory alt attribute */ CheckMandatoryAttribute (el, doc, HTML_ATTR_ALT); break; case HTML_EL_LINK: CheckCSSLink (el, doc, htmlSchema); break; case HTML_EL_Data_cell: case HTML_EL_Heading_cell: case HTML_EL_List_Item: case HTML_EL_Definition: /* insert a pseudo paragraph into empty cells or list items */ child = TtaGetFirstChild (el); if (child == NULL) { elType.ElTypeNum = HTML_EL_Pseudo_paragraph; child = TtaNewTree (doc, elType, ""); if (child != NULL) TtaInsertFirstChild (&child, el, doc); } if (elType.ElTypeNum == HTML_EL_Data_cell || elType.ElTypeNum == HTML_EL_Heading_cell) /* detect whether we are parsing a whole table or just a cell */ { if (DocumentMeta[doc]->xmlformat) { if (IsWithinXmlTable ()) NewCell (el, doc, FALSE, FALSE, FALSE); } else { if (IsWithinHtmlTable ()) NewCell (el, doc, FALSE, FALSE, FALSE); } } break; case HTML_EL_Table_: CheckTable (el, doc); SubWithinTable (); break; case HTML_EL_TITLE: /* show the TITLE in the main window */ UpdateTitle (el, doc); break; case HTML_EL_rbc: /* an rbc element has been read. Its parent should be a complex_ruby. Change the type of the parent, as simple_ruby are created by default */ parent = TtaGetParent (el); if (parent) { newElType = TtaGetElementType (parent); if (newElType.ElSSchema == elType.ElSSchema && newElType.ElTypeNum == HTML_EL_simple_ruby) TtaChangeElementType (parent, HTML_EL_complex_ruby); } break; case HTML_EL_rtc1: /* an rtc element has been parsed. If it has already a rtc1 sibling, change its type to rtc2 */ prev = el; do { TtaPreviousSibling(&prev); if (prev) { newElType = TtaGetElementType (prev); if (newElType.ElSSchema == elType.ElSSchema && newElType.ElTypeNum == HTML_EL_rtc1) { TtaChangeElementType (el, HTML_EL_rtc2); prev = NULL; } } } while (prev); break; case HTML_EL_FIELDSET: childType.ElTypeNum = 0; child = TtaGetFirstChild (el); if (child != NULL) childType = TtaGetElementType (child); if (childType.ElTypeNum != HTML_EL_LEGEND) { sprintf (msgBuffer, "The