/** * SiRPAC - Simple RDF Parser & Compiler * * Copyright © World Wide Web Consortium, (Massachusetts Institute of * Technology, Institut National de Recherche en Informatique et en * Automatique, Keio University). * * All Rights Reserved. * * Please see the full Copyright clause at * * * This program translates RDF descriptions into corresponding * triple representation. * This version uses SAX V1.0 available at * * $Log: SiRPAC.java,v $ * Revision 1.17 1999/03/10 08:54:40 jsaarela * Management of parseType="Literal" and "Resource" now equally * tested. * * Revision 1.16 1999/01/13 15:00:30 jsaarela * Finished conformance testing with PR-rdf-syntax-19990105 version * of the RDF M&S spec. * * * @author Janne Saarela */ package org.w3c.rdf; import org.xml.sax.HandlerBase; import org.xml.sax.InputSource; import org.xml.sax.Locator; import org.xml.sax.AttributeList; import org.xml.sax.EntityResolver; import org.xml.sax.DTDHandler; import org.xml.sax.DocumentHandler; import org.xml.sax.ErrorHandler; import org.xml.sax.SAXParseException; import org.xml.sax.Parser; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.helpers.*; import java.net.URL; import java.util.*; import java.io.*; public class SiRPAC implements EntityResolver, DTDHandler, DocumentHandler, ErrorHandler { final static public String REVISION = "$Id: SiRPAC.java,v 1.17 1999/03/10 08:54:40 jsaarela Exp $"; public final static String RDFMS = new String ("http://www.w3.org/1999/02/22-rdf-syntax-ns#"); public final static String RDFSCHEMA = new String ("http://www.w3.org/TR/WD-rdf-schema#"); public final static String XMLSCHEMA = new String ("xml"); private Stack m_namespaceStack = new Stack (); private Stack m_elementStack = new Stack (); private Element m_root = null; private Vector m_triples = new Vector (); private String m_sErrorMsg = new String (); private String m_sWarningMsg = new String (); private String m_sSource = null; private Vector m_vAllNamespaces = new Vector (); /** * The following two variables may be changed on the fly * to change the behaviour of the parser */ private boolean m_bCreateBags = false; private boolean m_bFetchSchemas = false; /** * The following flag indicates whether the XML markup * should be stored into a string as a literal value * for RDF */ private Stack m_parseTypeStack = new Stack (); private Stack m_parseElementStack = new Stack (); private String m_sLiteral = new String (); /** * Methods to determine whether we are parsing * parseType="Literal" or parseType="Resource" */ public boolean parseLiteral() { if (!m_elementStack.empty()) { for (int x = m_elementStack.size()-1; x >= 0; x--) { Element e = (Element)m_elementStack.elementAt(x); String sParseType = e.getAttribute(RDFMS+"parseType"); if (sParseType != null) { if (!sParseType.equals ("Resource")) { return true; } } } } return false; } public boolean parseResource() { if (!m_elementStack.empty()) { for (int x = m_elementStack.size()-1; x >= 0; x--) { Element e = (Element)m_elementStack.elementAt(x); String sParseType = e.getAttribute(RDFMS+"parseType"); if (sParseType != null) { if (sParseType.equals ("Resource")) return true; } } } return false; } /** * createBags method allows one to determine whether SiRPAC * produces Bag instances for each Description block. * The default setting is to generate them. */ public void createBags (boolean b) { m_bCreateBags = b; } /** * Set whether parser recursively fetches and parses * every RDF schema it finds in the namespace declarations */ public void fetchSchemas (boolean b) { m_bFetchSchemas = b; } /** * setSource methods saves the name of the source document for * later inspection if needed */ public void setSource (String sSource) { m_sSource = sSource; } public String source () { return m_sSource; } /** * Return all non-RDF namespace URIs recognized by the parser */ public Enumeration listNamespaces () { return m_vAllNamespaces.elements(); } /** * Return the full namespace URI for a given prefix sPrefix. * The default namespace is identified with xmlns prefix. * The namespace of xmlns attribute is an empty string. */ public String namespace (String sPrefix) { if (sPrefix == null) { sPrefix = new String ("xmlns"); } for (int x = m_namespaceStack.size()-1; x >=0; x--) { Hashtable ht = (Hashtable)m_namespaceStack.elementAt (x); String sURI = (String)ht.get (sPrefix); if (sURI != null) return sURI; } /** * Give error only if * 1. the prefix is not from the reserved xml namespace * 2. the prefix is not xmlns which is to look for the default * namespace */ if (sPrefix.equals (XMLSCHEMA)) { return XMLSCHEMA; } else if (sPrefix.equals ("xmlns")) { return ""; } else { addError ("Unresolved namespace prefix "+sPrefix); } return ""; } public static void main (String args[]) throws Exception { if (args.length != 1) { System.err.println("Usage: java -Dorg.xml.sax.parser= org.w3c.rdf.SiRPAC [ URI | filename ]"); System.err.println ("This is revision "+REVISION); System.exit(1); } SiRPAC compiler = null; try { // Create a new parser. Parser p = ParserFactory.makeParser(); // Create a new handler. compiler = new SiRPAC(); // Register the handlers p.setEntityResolver(compiler); p.setDTDHandler (compiler); p.setDocumentHandler(compiler); p.setErrorHandler (compiler); InputSource source = null; try { URL url = new URL (args[0]); source = new InputSource (url.openStream()); } catch (Exception e) { FileInputStream input = new FileInputStream(args[0]); source = new InputSource(input); } source.setSystemId(args[0]); compiler.setSource (args[0]); p.parse(source); compiler.resolve (); // compiler.createBags (true); compiler.processXML (compiler.root()); // compiler.root().linearize (0, System.out); } catch (SAXException e) { if (compiler != null) { compiler.addError ("\n
" +e.getMessage()); } else e.printStackTrace (); } catch (Exception e) { if (compiler != null) { compiler.addError ("\n
Internal error "+e); e.printStackTrace (); } else e.printStackTrace (); } String sErrors = compiler.errors (); if (sErrors != null && sErrors.length() > 0) { System.out.println ("Errors during parsing:\n"+sErrors); } else { compiler.printTriples (System.out); } /* String sWarnings = compiler.warnings (); if (sWarnings != null && sWarnings.length() > 0) { System.out.println ("Warnings during parsing:\n"+sWarnings); } */ } public InputSource resolveEntity (String publicId, String systemId) { return null; } public void notationDecl (String name, String publicId, String systemId) { } /** * Display unparsed entity declarations as they are reported. * * @see org.xml.sax.DTDHandler#unparsedEntityDecl */ public void unparsedEntityDecl (String name, String publicId, String systemId, String notationName) { } public void setDocumentLocator (Locator locator) { } public void startDocument () { m_sErrorMsg = ""; } public void endDocument () throws SAXException { } public void doctype (String name, String publicID, String systemID) { } /** * Called for each new element. * Build up the document tree using an element stack * * @exception SAXException Passed on since we don't handle it. */ public void startElement (String name, AttributeList al) throws SAXException { Hashtable namespaces = new Hashtable (); /** * The following loop tries to identify special xmlns prefix * attributes and update the namespace stack accordingly. * While doing all this, it builds another AttributeList instance * which will hold the expanded names of the attributes * (I think this approach is only useful for RDF which uses * attributes as an abbreviated syntax for element names) */ AttributeListImpl newAL = new AttributeListImpl (); int iLength = al.getLength (); if (iLength == 0) { // ohwell, no attributes } else for (int x = 0; x < iLength; x++) { String aName = al.getName (x); if (aName.equals ("xmlns")) { String aValue = al.getValue (aName); if (aValue != null && aValue.length() == 0 && source() != null) aValue = source(); namespaces.put (aName, aValue); // save all non-RDF schema addresses if (!m_vAllNamespaces.contains (aValue) && !aValue.startsWith (RDFMS) && !aValue.startsWith (RDFSCHEMA)) { m_vAllNamespaces.addElement (aValue); } // special case: don't save document's own address if (source() != null && !aValue.startsWith (source())) { m_vAllNamespaces.addElement (aValue); } } else if (aName.startsWith ("xmlns:")) { String aValue = al.getValue (aName); if (aValue != null && aValue.length() == 0 && source() != null) aValue = source(); aName = aName.substring (6); namespaces.put (aName, aValue); // save all non-RDF schema addresses if (!m_vAllNamespaces.contains (aValue) && !aValue.startsWith (RDFMS) && !aValue.startsWith (RDFSCHEMA)) { m_vAllNamespaces.addElement (aValue); } // special case: don't save document's own address if (source() != null && !aValue.startsWith (source())) { m_vAllNamespaces.addElement (aValue); } } } /** * Place new namespace declarations into the stack * (Yes, I could optimize this a bit, not it wastes space * if there are no xmlns definitions) */ m_namespaceStack.push (namespaces); /** * Figure out the prefix part if it exists and * determine the namespace of the element accordingly */ String sNamespace = null; String sElementName = null; Element newElement = null; int i = name.indexOf (':'); String sPrefix2 = null; if (i > 0) { sPrefix2 = name.substring (0, i); sNamespace = namespace (sPrefix2); sElementName = name.substring (i+1); } else { sNamespace = namespace ("xmlns"); sElementName = name; } /** * Finally look for attributes other than the special xmlns, * expand them, and place to the new AttributeListImpl */ for (int x = 0; x < iLength; x++) { String sAttributeNamespace = null; String aName = al.getName (x); if (!aName.startsWith ("xmlns")) { String aValue = al.getValue (aName); String aType = al.getType (aName); int iIndex = aName.indexOf (':'); String sPrefix = null; if (iIndex > 0) { sPrefix = aName.substring (0, iIndex); sAttributeNamespace = namespace (sPrefix); aName = aName.substring (iIndex+1); } else { if (sNamespace == null) sAttributeNamespace = namespace ("xmlns"); else sAttributeNamespace = sNamespace; } if (parseLiteral()) { if (sPrefix == null) { sPrefix = "gen" + x; // x is a handy counter } newAL.addAttribute (sPrefix + ":" + aName, aType, aValue); newAL.addAttribute ("xmlns:"+sPrefix, aType, sAttributeNamespace); } else { newAL.addAttribute (sAttributeNamespace+aName, aType, aValue); } /** * This call will try to see if the user is using * RDF look-alike elements from another namespace * * Note: you can remove the call if you wish */ likeRDF (sAttributeNamespace, aName); } } /** * If we have parseType="Literal" set earlier, this element * needs some additional attributes to make it stand-alone * piece of XML */ if (parseLiteral()) { if (sPrefix2 == null) { // default namespace coming in if (sNamespace != null) { newAL.addAttribute ("xmlns:gen", "CDATA", sNamespace); } newElement = new Element ("gen:" + sElementName, newAL); newElement.prefix ("gen"); } else { String sAttributeNamespace = namespace (sPrefix2); if (sAttributeNamespace != null) newAL.addAttribute ("xmlns:"+sPrefix2, "CDATA", sAttributeNamespace); newElement = new Element (sPrefix2 + ":" + sElementName, newAL); } } else { newElement = new Element (sNamespace + sElementName, newAL); likeRDF (sNamespace, sElementName); } checkAttributes (newElement); /** * Check parseType */ String sLiteralValue = newElement.getAttribute(RDFMS+"parseType"); if (sLiteralValue != null && !sLiteralValue.equals ("Resource")) { /** * This is the management of the element where * parseType="Literal" appears * * You should notice RDF V1.0 conforming implementations * must treat other values than Literal and Resource as * Literal. This is why the condition is !equals("Resource") */ m_parseTypeStack.push (sLiteralValue); if (!m_elementStack.empty()) { Element e = (Element)m_elementStack.peek (); e.addChild (newElement); } m_elementStack.push (newElement); m_parseElementStack.push (newElement); m_sLiteral = ""; return; } if (parseLiteral()) { /** * This is the management of any element nested within * a parseType="Literal" declaration */ makeMarkupST (newElement); m_elementStack.push (newElement); return; } /** * Update the containment hierarchy * with the stack. */ if (!m_elementStack.empty()) { Element e = (Element)m_elementStack.peek (); e.addChild (newElement); } /** * Place the new element into the stack */ m_elementStack.push (newElement); if (sLiteralValue != null && sLiteralValue.equals ("Resource")) { m_parseTypeStack.push (sLiteralValue); m_parseElementStack.push (newElement); m_sLiteral = ""; /** * Since parseType="Resource" implies the following * production must match Description, let's create * an additional Description node here in the document tree. */ Element desc = new Element (RDFMS+"Description", new AttributeListImpl()); if (!m_elementStack.empty()) { Element e = (Element)m_elementStack.peek (); e.addChild (desc); } m_elementStack.push (desc); } } /** * For each end of an element scope step back in the * element and namespace stack * * @exception SAXException Passed on since we don't handle it. */ public void endElement (String name) throws SAXException { boolean bParseLiteral = parseLiteral(); m_root = (Element)m_elementStack.pop (); m_namespaceStack.pop (); if (bParseLiteral) { Element pe = (Element)m_parseElementStack.peek (); if (pe != m_root) { makeMarkupET (m_root.prefix()+name); } else { m_root.addChild (new Data (m_sLiteral)); m_sLiteral = ""; m_parseElementStack.pop(); m_parseTypeStack.pop (); } } else if (parseResource()) { /** * If we are doing parseType="Resource" * we need to explore whether the next element in * the stack is the closing element in which case * we remove it as well (remember, there's an * extra Description element to be removed) */ if (!m_elementStack.empty()) { Element pe = (Element)m_parseElementStack.peek (); if (m_elementStack.peek() == pe) { Element e = (Element)m_elementStack.pop (); m_parseElementStack.pop(); m_parseTypeStack.pop (); } } } } /** * Return the root element pointer. This requires the parsing * has been already done. */ public Element root () { return m_root; } public void characters (char ch[], int start, int length) throws SAXException { /** * Place all characters as Data instance to the containment * hierarchy with the help of the stack. */ Element e = (Element)m_elementStack.peek (); String s = new String (ch, start, length); if (parseLiteral()) { makeMarkupChar (s); return; } /** * Warning: this is not correct procedure according to XML spec. * All whitespace matters! */ String sTrimmed = s.trim(); if (sTrimmed.length() > 0) e.addChild (new Data (s)); } public void ignorableWhitespace (char ch[], int start, int length) { } public void processingInstruction (String target, String data) { } /** * Report all warnings, and continue parsing. * * @see org.xml.sax.ErrorHandler#warning */ public void warning (SAXParseException exception) { m_sWarningMsg += exception.getMessage() + " (" + exception.getSystemId() + "line " + exception.getLineNumber() + ", column " + exception.getColumnNumber() + ")"; } /** * Report all recoverable errors, and try to continue parsing. * * @see org.xml.sax.ErrorHandler#error */ public void error (SAXParseException exception) { m_sErrorMsg += "Recoverable Error: " + exception.getMessage() + " (" + // exception.getSystemId() + "line " + exception.getLineNumber() + ", column " + exception.getColumnNumber() + ")"; } /** * Report all fatal errors, and try to continue parsing. * *

Note: results are no longer reliable once a fatal error has * been reported.

* * @see org.xml.sax.ErrorHandler#fatalError */ public void fatalError (SAXParseException exception) { m_sErrorMsg = "Fatal Error: " + exception.getMessage() + " (" + // exception.getSystemId() + "line " + exception.getLineNumber() + ", column " + exception.getColumnNumber() + ")"; } /** * Generate an error message as a string */ public void addError (String sMsg) { m_sErrorMsg += sMsg + "\n"; } public String errors () { return m_sErrorMsg; } /** * Generate a warning message as a string */ public void addWarning (String sMsg) { m_sWarningMsg += sMsg + "\n"; } public String warnings () { return m_sWarningMsg; } public static Parser createParser (String className) { Parser parser = null; try { // Get the named class. Class c = Class.forName(className); // Instantiate the parser. parser = (Parser)(c.newInstance()); } catch (ClassNotFoundException e) { System.err.println("SAX parser class " + className + "cannot be loaded."); System.exit(1); } catch (IllegalAccessException e) { System.err.println("SAX parser class " + className + " does not have a zero-argument constructor."); System.exit(1); } catch (InstantiationException e) { System.err.println("SAX parser class " + className + " cannot be instantiated."); System.exit(1); } // Check the the parser object // actually implements the Parser interface. if (!(parser instanceof org.xml.sax.Parser)) { System.err.println("Class " + className + " does not implement org.xml.sax.Parser."); System.exit(1); } return parser; } /** * If a URL is relative, make it absolute against the current directory. * * @exception java.net.MalformedURLException */ private static String makeAbsoluteURL (String url) throws java.net.MalformedURLException { URL baseURL; String currentDirectory = System.getProperty("user.dir"); String fileSep = System.getProperty("file.separator"); String file = currentDirectory.replace(fileSep.charAt(0), '/') + '/'; if (file.charAt(0) != '/') { file = "/" + file; } baseURL = new URL("file", null, file); return new URL(baseURL, url).toString(); } /** * Escape special characters for display. */ private static String escapeCharacters(char ch[], int start, int length) { StringBuffer out = new StringBuffer(); for (int i = start; i < start+length; i++) { if (ch[i] >= 0x20 && ch[i] < 0x7f) { out.append(ch[i]); } else { out.append("&#" + (int)ch[i] + ';'); } } return out.toString(); } /** * Given an XML document (well-formed HTML, for example), * look for a suitable element to start parsing from * * @exception SAXException Passed on since we don't handle it. */ public void processXML (Element ele) throws SAXException { if (isRDF(ele)) { if (isRDFroot (ele)) { processRDF(ele); } else if (isDescription (ele)) { processDescription (ele, false, m_bCreateBags, m_bCreateBags); } } else { Enumeration e = ele.children(); while (e.hasMoreElements()) { Element child = (Element)e.nextElement(); processXML (child); } } /** * Recursively call myself to go through all the schemas */ if (m_bFetchSchemas) { while (m_vAllNamespaces.size() > 0) { String sURI = (String)m_vAllNamespaces.elementAt(0); m_vAllNamespaces.removeElementAt(0); setSource (sURI); try { URL url = new URL (sURI); String sContentType = url.openConnection().getContentType(); if (!sContentType.startsWith ("text/xml") && !sContentType.startsWith ("text/html")) { addError ("The RDF schema at "+sURI+" is of wrong content type '"+sContentType+"'\n(should have been 'text/xml' or 'text/html')"); } else { InputStream is = url.openStream (); InputSource source = new InputSource (is); // Create a new parser. Parser p = ParserFactory.makeParser(); // Register the handlers p.setEntityResolver(this); p.setDTDHandler (this); p.setDocumentHandler(this); p.setErrorHandler (this); p.parse(source); resolve (); processXML (root()); } } catch (Exception ex) { addError ("Could not load RDF schema from "+sURI+". Problem: "+ex); } } } } /** * Start processing an RDF/XML document instance from the * root element rdf. * * @exception SAXException Passed on since we don't handle it. */ public void processRDF (Element rdf) throws SAXException { Enumeration e = rdf.children(); if (!e.hasMoreElements()) { addError ("Empty RDF element"); return; } while (e.hasMoreElements()) { Element ele = (Element)e.nextElement(); if (isDescription (ele)) { processDescription (ele, false, m_bCreateBags, m_bCreateBags); } else if (isContainer (ele)) { processContainer (ele); } else if (isTypedPredicate (ele)) { processTypedNode (ele); } } } /** * Manage the typedNode production in the RDF grammar. * * @exception SAXException Passed on since we don't handle it. */ public String processTypedNode (Element typedNode) throws SAXException { String sID = typedNode.getAttribute (RDFMS, "ID"); // if (source() != null && sID != null) // sID = source() + sID; String sBagID = typedNode.getAttribute (RDFMS, "bagID"); String sAbout = typedNode.getAttribute (RDFMS, "about"); String sAboutEach = typedNode.getAttribute (RDFMS, "aboutEach"); String sAboutEachPrefix = typedNode.getAttribute (RDFMS, "aboutEachPrefix"); if (typedNode.getAttribute (RDFMS, "resource") != null) { addError ("'resource' attribute not allowed for a typedNode "+typedNode.name()); } /** * We are going to manage this typedNode using the processDescription * routine later on. Before that, place all properties encoded as * attributes to separate child nodes. */ Enumeration e = typedNode.attributes (); while (e.hasMoreElements()) { String sAttribute = (String)e.nextElement(); String sValue = typedNode.getAttribute (sAttribute); sValue = sValue.trim (); if (!sAttribute.startsWith (RDFMS) && !sAttribute.startsWith (XMLSCHEMA)) { if (sValue.length() > 0) { Element newPredicate = new Element (sAttribute, new AttributeListImpl ()); newPredicate.addAttribute (RDFMS + "ID", (sAbout != null ? sAbout : sID)); newPredicate.addAttribute (RDFMS + "bagID", sBagID); Data newData = new Data (sValue); newPredicate.addChild (newData); typedNode.addChild (newPredicate); typedNode.removeAttribute (sAttribute); } } } String sObject = new String (); if (sAbout != null) sObject = sAbout; else if (sID != null) sObject = sID; else sObject = newReificationID(); typedNode.ID (sObject); // special case: should the typedNode have aboutEach attribute, // the type predicate should distribute to pointed // collection also -> create a child node to the typedNode Enumeration eTargets = typedNode.targets (); if (sAboutEach != null && eTargets.hasMoreElements()) { Element newPredicate = new Element (RDFMS + "type", new AttributeListImpl()); Data newData = new Data (typedNode.name()); newPredicate.addChild (newData); typedNode.addChild (newPredicate); } else { addTriple (RDFMS + "type", sObject, typedNode.name()); } String sDesc = processDescription (typedNode, false, false, true); return sObject; } /** * processDescription manages Description elements * * @param description The Description element itself * @param inPredicate Is this is a nested description * @param reificate Do we need to reificate * @param createBag Do we create a bag container * * @return An ID for the description * * @exception SAXException Passed on since we don't handle it. */ public String processDescription (Element description, boolean inPredicate, boolean reificate, boolean createBag) throws SAXException { /** * Return immediately if the description has already been managed */ if (description.done()) return description.ID(); int iChildCount = 1; boolean bOnce = true; /** * Determine first all relevant values */ String sAbout = description.getAttribute (RDFMS, "about"); String sAboutEach = description.getAttribute (RDFMS, "aboutEach"); String sAboutEachPrefix = description.getAttribute (RDFMS, "aboutEachPrefix"); String sBagid = description.getAttribute (RDFMS, "bagID"); String sID = description.getAttribute (RDFMS, "ID"); Element target = description.target(); boolean hasTarget = description.targets().hasMoreElements(); boolean targetIsContainer = false; String sTargetAbout = null; String sTargetBagid = null; String sTargetID = null; /** * Determine what the target of the Description reference is */ if (hasTarget) { sTargetAbout = target.getAttribute (RDFMS, "about"); sTargetBagid = target.getAttribute (RDFMS, "bagID"); sTargetID = target.getAttribute (RDFMS, "ID"); if (source() != null && sTargetID != null) sTargetID = source() + sTargetID; /** * Target is collection if * 1. it is identified with bagID attribute * 2. it is identified with ID attribute and is a collection */ if (sTargetBagid != null && sAbout != null) { targetIsContainer = (sAbout.substring(1).equals (sTargetBagid)); } else { if (sTargetID != null && sAbout != null && sAbout.substring(1).equals (sTargetID) && isContainer (target)) { targetIsContainer = true; } } } /** * Check if there are properties encoded using the abbreviated * syntax */ expandAttributes (description, description); /** * Manage the aboutEach attribute here */ if (sAboutEach != null && hasTarget) { if (isContainer(target)) { Enumeration e = target.children (); while (e.hasMoreElements()) { Element ele = (Element)e.nextElement (); if (isListItem (ele)) { String sResource = getResource(ele); /** * Manage
  • case */ if (sResource != null) { Element newDescription = null; if (sResource != null) { newDescription = new Element (RDFMS + "Description", new AttributeListImpl ()); newDescription.addAttribute (RDFMS + "about", sResource); } Enumeration e2 = description.children(); while (e2.hasMoreElements()) { Element ele2 = (Element)e2.nextElement (); if (newDescription != null) { newDescription.addChild (ele2); } } if (newDescription != null) processDescription (newDescription, false, false, false); } else { /** * Otherwise we have a structured value inside
  • */ // loop through the children of
  • // (can be only one) Enumeration e2 = ele.children (); while (e2.hasMoreElements()) { Element ele2 = (Element)e2.nextElement (); // loop through the items in the // description with aboutEach // and add them to the target Element newNode = new Element (RDFMS + "Description", new AttributeListImpl()); Enumeration e3 = description.children(); while (e3.hasMoreElements()) { Element ele3 = (Element)e3.nextElement (); newNode.addChild (ele3); } newNode.addTarget (ele2); processDescription (newNode, true, false, false); } } } else if (isTypedPredicate (ele)) { Element newNode = new Element (RDFMS + "Description", new AttributeListImpl()); Enumeration e2 = description.children(); while (e2.hasMoreElements()) { Element ele2 = (Element)e2.nextElement (); newNode.addChild (ele2); } newNode.addTarget (ele); processDescription (newNode, true, false, false); } } } else if (isDescription(target)) { Enumeration e = target.children (); while (e.hasMoreElements()) { Element ele = (Element)e.nextElement (); Element newNode = new Element (RDFMS + "Description", new AttributeListImpl()); Enumeration e2 = description.children(); while (e2.hasMoreElements()) { Element ele2 = (Element)e2.nextElement (); newNode.addChild (ele2); } newNode.addTarget (ele); processDescription (newNode, true, false, false); } } return null; } /** * Manage the aboutEachPrefix attribute here */ if (sAboutEachPrefix != null) { if (hasTarget) { Enumeration e = description.targets(); while (e.hasMoreElements()) { target = (Element)e.nextElement (); sTargetAbout = target.getAttribute (RDFMS, "about"); Element newDescription = new Element (RDFMS + "Description", new AttributeListImpl ()); newDescription.addAttribute (RDFMS + "about", sTargetAbout); Enumeration e2 = description.children(); while (e2.hasMoreElements()) { Element ele2 = (Element)e2.nextElement (); newDescription.addChild (ele2); } processDescription (newDescription, false, false, false); } } return null; } /** * Enumerate through the children */ Enumeration e = description.children(); while (e.hasMoreElements()) { Element n = (Element)e.nextElement(); if (isDescription (n)) { addError ("Cannot nest Description inside Description"); } else if (isListItem (n)) { addError ("Cannot nest Listitem inside Description"); } else if (isContainer (n)) { addError ("Cannot nest container inside Description"); } else if (isTypedPredicate(n)) { String sChildID = null; if (hasTarget && targetIsContainer) { sChildID = processPredicate (n, description, (target.bagID() != null ? target.bagID() : target.ID()), false); description.ID (sChildID); createBag = false; } else if (hasTarget) { sChildID = processPredicate (n, description, (target.bagID() != null ? target.bagID() : target.ID()), reificate); description.ID (sChildID); } else if (!hasTarget && !inPredicate) { if (description.ID() == null) description.ID (newReificationID()); if (sAbout == null) if (sID != null) sAbout = sID; else sAbout = description.ID(); sChildID = processPredicate (n, description, sAbout, ( sBagid != null ? true : reificate)); //description.ID (sChildID); } else if (!hasTarget && inPredicate) { if (sAbout == null) { if (sID != null) { description.ID (sID); sAbout = sID; } else { if (description.ID() == null) description.ID (newReificationID()); sAbout = description.ID(); } } else { description.ID (sAbout); } sChildID = processPredicate (n, description, sAbout, false); } /** * Each Description block creates also a Bag node which * has links to all properties within the block IF * the m_bCreateBags variable is true */ if (sBagid != null || (m_bCreateBags && createBag)) { String sNamespace = RDFMS; // do only once and only if there is a child if (bOnce && sChildID != null) { bOnce = false; if (description.bagID() == null) description.bagID (newReificationID()); if (description.ID() == null) description.ID (description.bagID()); addTriple (sNamespace + "type", description.bagID(), sNamespace + "Bag"); } if (sChildID != null) { addTriple (sNamespace + "_" + iChildCount, description.bagID(), sChildID); iChildCount++; } } } } description.done (true); return description.ID(); } /** * processPredicate handles all elements not defined as special * RDF elements. * * @param predicate The predicate element itself * @param description Context for the predicate * @param sTarget The target resource * @param reificate Should this predicate be reificated * * @return the new ID which can be used to identify the predicate * * @exception SAXException Passed on since we don't handle it. */ private String processPredicate (Element predicate, Element description, String sTarget, boolean reificate) throws SAXException { String sStatementID = predicate.getAttribute (RDFMS, "ID"); String sBagID = predicate.getAttribute (RDFMS, "bagID"); String sResource = getResource(predicate); /** * If a predicate has other attributes than rdf:ID, rdf:bagID, * or xmlns... -> generate new triples according to the spec. * (See end of Section 6) */ // this new element may not be needed Element d = new Element (RDFMS + "Description", new AttributeListImpl()); if (expandAttributes (d, predicate)) { // error checking if (predicate.children().hasMoreElements()) { addError (predicate.name()+" must be an empty element since it uses propAttr grammar production"); return null; } // determine the 'about' part for the new statements if (sStatementID != null) { d.addAttribute (RDFMS + "about", sStatementID); // hack: make rdf:ID the value of the predicate predicate.addChild (new Data (sStatementID)); } else if (sResource != null) { d.addAttribute (RDFMS + "about", sResource); } else { sStatementID = newReificationID(); d.addAttribute (RDFMS + "about", sStatementID); } if (sBagID != null) { d.addAttribute (RDFMS + "bagID", sBagID); d.bagID (sBagID); } processDescription (d, false, false, m_bCreateBags); } /** * Tricky part: if the resource attribute is present for a predicate * AND there are no children, the value of the predicate is either * 1. the URI in the resource attribute OR * 2. the node ID of the resolved #resource attribute */ if (sResource != null && !predicate.children().hasMoreElements()) { if (predicate.target() == null) { if (reificate) { sStatementID = reificate (predicate.name(), sTarget, sResource, predicate.ID()); predicate.ID (sStatementID); } else { addTriple (predicate.name(), sTarget, sResource); } } else { if (reificate) { sStatementID = reificate (predicate.name(), sTarget, predicate.target().ID(), predicate.ID()); predicate.ID (sStatementID); } else { addTriple (predicate.name(), sTarget, predicate.target().ID()); } } return predicate.ID(); } /** * Does this predicate make a reference somewhere using the * sResource attribute */ if (sResource != null && predicate.target() != null) { sStatementID = processDescription (predicate.target(), true, false, false); if (reificate) { sStatementID = reificate (predicate.name(), sTarget, sStatementID, predicate.ID()); predicate.ID (sStatementID); } else { addTriple (predicate.name(), sTarget, sStatementID); } return sStatementID; } /** * Before looping through the children, let's check * if there are any. If not, the value of the predicate is * an anonymous node */ Enumeration e2 = predicate.children(); if (!(e2.hasMoreElements())) { if (reificate) { sStatementID = reificate (predicate.name(), sTarget, newReificationID(), predicate.ID()); } else { addTriple (predicate.name(), sTarget, newReificationID()); } } boolean bUsedTypedNodeProduction = false; while (e2.hasMoreElements()) { Element n2 = (Element)e2.nextElement(); if (isDescription (n2)) { Element d2 = n2; sStatementID = processDescription (d2, true, false, false); d2.ID (sStatementID); if (reificate) { sStatementID = reificate (predicate.name(), sTarget, sStatementID, predicate.ID()); } else { addTriple (predicate.name(), sTarget, sStatementID); } } else if (n2 instanceof Data) { /** * We've got real data */ String sValue = ((Data)n2).data(); /** * Only if the content is not empty PCDATA (whitespace that is), * print the triple */ sValue = sValue.trim(); if (sValue.length() > 0) { if (reificate) { sStatementID = reificate (predicate.name(), sTarget, sValue, predicate.ID()); predicate.ID (sStatementID); } else { addTriple (predicate.name(), sTarget, sValue); } } } else if (isContainer (n2)) { String sCollectionID = processContainer (n2); sStatementID = sCollectionID; /** * Attach the collection to the current predicate */ if (description.target() != null) { if (reificate) { sStatementID = reificate (predicate.name(), description.target().getAttribute (RDFMS, "about"), sCollectionID, predicate.ID()); predicate.ID (sStatementID); } else { addTriple (predicate.name(), description.target().getAttribute (RDFMS, "about"), sCollectionID); } } else { if (reificate) { sStatementID = reificate (predicate.name(), sTarget, sCollectionID, predicate.ID()); predicate.ID (sStatementID); } else { addTriple (predicate.name(), sTarget, sCollectionID); } } } else if (isTypedPredicate (n2)) { if (bUsedTypedNodeProduction) { addError ("Only one typedNode allowed inside a predicate (Extra typedNode:"+n2.name()+")"); } else { bUsedTypedNodeProduction = true; } sStatementID = processTypedNode (n2); addTriple (predicate.name(), sTarget, sStatementID); } } return sStatementID; } private String processContainer (Element n) throws SAXException { String sID = n.ID(); if (sID == null) sID = newReificationID(); /** * Do the instantiation only once */ if (!n.done()) { String sNamespace = RDFMS; if (isSequence (n)) { addTriple (sNamespace+"type", sID, sNamespace+"Seq"); } else if (isAlternative (n)) { addTriple (sNamespace+"type", sID, sNamespace+"Alt"); } else if (isBag (n)) { addTriple (sNamespace+"type", sID, sNamespace+"Bag"); } n.done (true); } expandAttributes (n, n); Enumeration e = ((Element)n).children(); if (!e.hasMoreElements() && isAlternative (n)) { addError ("An RDF:Alt container must have at least one listitem"); } int iCounter = 1; while (e.hasMoreElements()) { Element n2 = (Element)e.nextElement(); if (isListItem (n2)) { processListItem (sID, n2, iCounter); iCounter++; } else { addError ("Cannot nest "+n2.name()+" inside container"); } } return sID; } private void processListItem (String sID, Element listitem, int iCounter) throws SAXException { /** * Two different cases for * 1. LI element without content (resource available) * 2. LI element with content (resource unavailable) */ String sResource = getResource(listitem); if (sResource != null) { addTriple (RDFMS+"_"+iCounter, sID, sResource); // validity checking if (listitem.children().hasMoreElements()) { addError ("Listitem with 'resource' attribute cannot have child nodes"); } listitem.ID (sResource); } else { Enumeration e = listitem.children(); while (e.hasMoreElements()) { Element n = (Element)e.nextElement(); if (n instanceof Data) { addTriple (RDFMS+"_"+iCounter, sID, ((Data)n).data()); } else if (isDescription (n)) { String sNodeID = processDescription (n, false, true, false); addTriple (RDFMS+"_"+iCounter, sID, sNodeID); listitem.ID (sNodeID); } else if (isListItem (n)) { addError ("Cannot nest listitem inside listitem"); } else if (isContainer (n)) { processContainer (n); addTriple (RDFMS+"_"+iCounter, sID, n.ID()); } else if (isTypedPredicate (n)) { String sNodeID = processTypedNode (n); // addTriple (RDFMS+"_"+iCounter, sID, sNodeID); } } } } /** * checkAttributes goes through the attributes of element e * to see * 1. if there are symbolic references to other nodes in the data model. * in which case they must be stored for later resolving with * resolveLater method. * 2. if there is an identity attribute, it is registered using * registerResource or registerID method. * * @see resolveLater * @see registerResource * @see registerID */ private void checkAttributes (Element e) { String sResource = e.getAttribute (RDFMS, "resource"); if (sResource != null && sResource.startsWith("#")) { resolveLater (e); } String sAboutEach = e.getAttribute (RDFMS, "aboutEach"); if (sAboutEach != null && sAboutEach.startsWith("#")) { resolveLater (e); } String sAboutEachPrefix = e.getAttribute (RDFMS, "aboutEachPrefix"); if (sAboutEachPrefix != null) { resolveLater (e); } String sAbout = e.getAttribute (RDFMS, "about"); if (sAbout != null) { if (sAbout.startsWith("#")) { resolveLater (e); } else { registerResource (e); } } String sBagID = e.getAttribute (RDFMS, "bagID"); if (sBagID != null) { registerID (sBagID, e); e.bagID (sBagID); } String sID = e.getAttribute (RDFMS, "ID"); if (sID != null) { registerID (sID, e); e.ID (sID); // default value } if (sID != null && sAbout != null) { addError ("'ID' and 'about' attribute may not appear within the same Description block"); } } /** * Take an element ele with its parent element parent * and evaluate all its attributes to see if they are non-RDF specific * and non-XML specific in which case they must become children of * the ele node. * * @exception SAXException Passed on since we don't handle it. */ private boolean expandAttributes (Element parent, Element ele) throws SAXException { boolean foundAbbreviation = false; Enumeration e = ele.attributes (); while (e.hasMoreElements()) { String sAttribute = (String)e.nextElement(); String sValue = ele.getAttribute (sAttribute).trim(); if (sAttribute.startsWith (XMLSCHEMA)) continue; // exception: expand rdf:value if (sAttribute.startsWith (RDFMS) && !sAttribute.startsWith (RDFMS+"_") && !sAttribute.endsWith ("value") && !sAttribute.endsWith ("type")) continue; if (sValue.length() > 0) { foundAbbreviation = true; Element newElement = new Element (sAttribute, new AttributeListImpl()); Data newData = new Data (sValue); newElement.addChild (newData); parent.addChild (newElement); } } return foundAbbreviation; } /** * reificate creates one new node and four new triples * and returns the ID of the new node */ private String reificate (String sPredicate, String sSubject, String sObject, String sNodeID) { String sNamespace = RDFMS; if (sNodeID == null) sNodeID = newReificationID(); /** * The original statement must remain in the data model */ addTriple (sPredicate, sSubject, sObject); /** * Do not reificate reificated properties */ if (sPredicate.equals (sNamespace+"subject") || sPredicate.equals (sNamespace+"predicate") || sPredicate.equals (sNamespace+"object") || sPredicate.equals (sNamespace+"type")) { return null; } /** * Reificate by creating 4 new triples */ addTriple (sNamespace + "predicate", sNodeID, sPredicate); addTriple (sNamespace + "subject", sNodeID, ( sSubject.length() == 0 ? source() : sSubject)); addTriple (sNamespace + "object", sNodeID, sObject); addTriple (sNamespace + "type", sNodeID, sNamespace + "Statement"); return sNodeID; } /** * Create a new triple and add it to the m_triples Vector */ public void addTriple (String sPredicate, String sSubject, String sObject) { /** * If there is no subject (about=""), then use the URI/filename where * the RDF description came from */ if (sPredicate == null) { addWarning ("Predicate null when subject="+sSubject+" and object="+sObject); return; } if (sSubject == null) { addWarning ("Subject null when predicate="+sPredicate+" and object="+sObject); return; } if (sObject == null) { addWarning ("Object null when predicate="+sPredicate+" and subject="+sSubject); return; } if (sSubject.length() == 0) { sSubject = source(); } Triple t = new Triple (sPredicate, sSubject, sObject); m_triples.addElement (t); } /** * Print all triples to the ps PrintStream */ public void printTriples (PrintStream ps) { for (int x = 0; x < m_triples.size(); x++) { Triple t = (Triple)m_triples.elementAt (x); ps.println ("triple(\""+t.predicate()+"\",\""+t.subject()+"\",\""+t.object()+"\")."); } } /** * Return all created triples in an Enumeration instance */ public Enumeration triples () { return m_triples.elements (); } /** * Is the element a Description */ public boolean isDescription (Element e) { return isRDF(e) && e.name().endsWith ("Description"); } /** * Is the element a ListItem */ public boolean isListItem (Element e) { return isRDF(e) && ( e.name().endsWith ("li") || e.name().indexOf ("_") > -1); } /** * Is the element a Container * * @see isSequence * @see isAlternative * @see isBag */ public boolean isContainer (Element e) { return (isSequence (e) || isAlternative (e) || isBag (e)); } /** * Is the element a Sequence */ public boolean isSequence (Element e) { return isRDF(e) && e.name().endsWith ("Seq"); } /** * Is the element an Alternative */ public boolean isAlternative (Element e) { return isRDF(e) && e.name().endsWith ("Alt"); } /** * Is the element a Bag */ public boolean isBag (Element e) { return isRDF(e) && e.name().endsWith ("Bag"); } /** * This method matches all properties but those from RDF namespace */ public boolean isTypedPredicate (Element e) { if (isRDF(e)) { // list all RDF predicates known by the RDF specification if (e.name().endsWith ("predicate") || e.name().endsWith ("subject") || e.name().endsWith ("object") || e.name().endsWith ("type") || e.name().endsWith ("value") || e.name().endsWith ("Property") || e.name().endsWith ("Statement")) { return true; } return false; } if (e.name().length() > 0) return true; else return false; } public boolean isRDFroot (Element e) { return isRDF(e) && e.name().endsWith ("RDF"); } /** * Check if the element e is from the namespace * of the RDF schema by comparing only the beginning of * the expanded element name with the canonical RDFMS * URI */ public boolean isRDF (Element e) { if (e != null && e.name() != null) return e.name().startsWith (RDFMS); else return false; } /** * Methods for node reference management */ private Vector m_vResources = new Vector (); private Vector m_vResolveQueue = new Vector (); private Hashtable m_hIDtable = new Hashtable (); private int m_iReificationCounter = 0; /** * Add the element e to the m_vResolveQueue * to be resolved later. */ public void resolveLater (Element e) { m_vResolveQueue.addElement (e); } /** * Go through the m_vResolveQueue and assign * direct object reference for each symbolic reference */ public void resolve () { for (int x = 0; x < m_vResolveQueue.size(); x++) { Element e = (Element)m_vResolveQueue.elementAt(x); String sAbout = e.getAttribute (RDFMS, "about"); if (sAbout != null) { if (sAbout.startsWith ("#")) sAbout = sAbout.substring (1); Element e2 = (Element)lookforNode(sAbout); if (e2 != null) { e.addTarget (e2); } else { addError ("Unresolved internal reference to "+sAbout); } } String sResource = e.getAttribute (RDFMS, "resource"); if (sResource != null) { if (sResource.startsWith ("#")) sResource = sResource.substring (1); Element e2 = (Element)lookforNode(sResource); if (e2 != null) { e.addTarget (e2); } } String sAboutEach = e.getAttribute (RDFMS, "aboutEach"); if (sAboutEach != null) { sAboutEach = sAboutEach.substring (1); Element e2 = (Element)lookforNode(sAboutEach); if (e2 != null) { e.addTarget (e2); } } String sAboutEachPrefix = e.getAttribute (RDFMS, "aboutEachPrefix"); if (sAboutEachPrefix != null) { for (int y = 0; y < m_vResources.size(); y++) { Element ele = (Element)m_vResources.elementAt(y); String sA = ele.getAttribute (RDFMS, "about"); if (sA.startsWith (sAboutEachPrefix)) { e.addTarget (ele); } } } } m_vResolveQueue.removeAllElements(); } /** * Look for a node by name sID from the Hashtable * m_hIDtable of all registered IDs. */ public Element lookforNode (String sID) { if (sID == null) return null; else return (Element)m_hIDtable.get (sID); } /** * Add an element e to the Hashtable m_hIDtable * which stores all nodes with an ID */ public void registerID (String sID, Element e) { if (m_hIDtable.get (sID) != null) addError("Node ID '"+sID+"' redefined."); m_hIDtable.put (sID, e); } /** * Create a new reification ID by using a name part and an * incremental counter m_iReificationCounter. */ public String newReificationID () { m_iReificationCounter++; if (source() == null) return new String ("genid" + m_iReificationCounter); else return new String (source() + "#genid" + m_iReificationCounter); } /** * Special method to deal with rdf:resource attribute */ public String getResource (Element e) { String sResource = e.getAttribute (RDFMS, "resource"); if (sResource != null && sResource.length() == 0) sResource = source(); return sResource; } /** * Add an element e to the Vector m_vResources * which stores all nodes with an URI */ public void registerResource (Element e) { m_vResources.addElement (e); } public void makeMarkupST (Element ele) { m_sLiteral += "<" + ele.name(); Enumeration e = ele.attributes(); while (e.hasMoreElements()) { String sAttribute = (String)e.nextElement(); String sAttributeValue = (String)ele.getAttribute (sAttribute); m_sLiteral += " " + sAttribute + "='" + sAttributeValue + "'"; } m_sLiteral += ">"; } public void makeMarkupET (String name) { m_sLiteral += ""; } public void makeMarkupChar (String s) { m_sLiteral += s; } /** * This method adds a warning for each name (element & attribute) * which looks like it is from RDF but it is not. * * Note: this method is useful for interactive use but can be * omitted from embedded applications. */ public void likeRDF (String sNamespace, String sElement) { if (!sNamespace.equals (RDFMS)) { if (sElement.equals ("RDF") || sElement.equals ("Description") || sElement.equals ("Bag") || sElement.equals ("Alt") || sElement.equals ("Seq") || sElement.equals ("li") || sElement.equals ("_1") || sElement.equals ("ID") || sElement.equals ("resource") || sElement.equals ("about") || sElement.equals ("value") || sElement.equals ("subject") || sElement.equals ("predicate") || sElement.equals ("object") || sElement.equals ("type")) { addWarning ("Name '"+sElement+"' looks like it is from RDF but it has namespace "+sNamespace+"\n"); } } } }