[4] | 1 | /* |
---|
| 2 | * This program is free software; you can redistribute it and/or modify |
---|
| 3 | * it under the terms of the GNU General Public License as published by |
---|
| 4 | * the Free Software Foundation; either version 2 of the License, or |
---|
| 5 | * (at your option) any later version. |
---|
| 6 | * |
---|
| 7 | * This program is distributed in the hope that it will be useful, |
---|
| 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
| 10 | * GNU General Public License for more details. |
---|
| 11 | * |
---|
| 12 | * You should have received a copy of the GNU General Public License |
---|
| 13 | * along with this program; if not, write to the Free Software |
---|
| 14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
| 15 | */ |
---|
| 16 | |
---|
| 17 | /* |
---|
| 18 | * BIFParser.java |
---|
| 19 | * Copyright (C) 2003 University of Waikato, Hamilton, New Zealand |
---|
| 20 | * |
---|
| 21 | */ |
---|
| 22 | package weka.gui.graphvisualizer; |
---|
| 23 | |
---|
| 24 | import java.io.InputStream; |
---|
| 25 | import java.io.FileWriter; |
---|
| 26 | import java.io.IOException; |
---|
| 27 | import java.io.StringReader; |
---|
| 28 | import java.util.StringTokenizer; |
---|
| 29 | |
---|
| 30 | import weka.core.FastVector; |
---|
| 31 | |
---|
| 32 | import org.w3c.dom.Document; |
---|
| 33 | import org.w3c.dom.NodeList; |
---|
| 34 | import org.w3c.dom.Element; |
---|
| 35 | |
---|
| 36 | |
---|
| 37 | /** |
---|
| 38 | * This class parses an inputstream or a string in |
---|
| 39 | * XMLBIF ver. 0.3 format, and builds the datastructures |
---|
| 40 | * that are passed to it through the constructor. |
---|
| 41 | * |
---|
| 42 | * @author Ashraf M. Kibriya (amk14@cs.waikato.ac.nz) |
---|
| 43 | * @version $Revision: 1.7 $ - 24 Apr 2003 - Initial version (Ashraf M. Kibriya) |
---|
| 44 | */ |
---|
| 45 | public class BIFParser implements GraphConstants { |
---|
| 46 | |
---|
| 47 | /** These holds the nodes and edges of the graph */ |
---|
| 48 | protected FastVector m_nodes, m_edges; |
---|
| 49 | /** This holds the name of the graph (i.e. the name of network tag in XMLBIF |
---|
| 50 | * input) |
---|
| 51 | */ |
---|
| 52 | protected String graphName; |
---|
| 53 | /** This holds the string to be parsed */ |
---|
| 54 | protected String inString; |
---|
| 55 | /** This holds the InputStream to be parsed */ |
---|
| 56 | protected InputStream inStream; |
---|
| 57 | |
---|
| 58 | |
---|
| 59 | /** |
---|
| 60 | * Constructor (if our input is a String) |
---|
| 61 | * |
---|
| 62 | * @param input the string to be parsed (should not be null) |
---|
| 63 | * @param nodes vector containing GraphNode objects (should be empty) |
---|
| 64 | * @param edges vector containing GraphEdge objects (should be empty) |
---|
| 65 | */ |
---|
| 66 | public BIFParser(String input, FastVector nodes, FastVector edges) { |
---|
| 67 | m_nodes = nodes; m_edges = edges; inString = input; |
---|
| 68 | } |
---|
| 69 | |
---|
| 70 | |
---|
| 71 | /** |
---|
| 72 | * Constructor (if our input is an InputStream) |
---|
| 73 | * |
---|
| 74 | * @param instream the InputStream to be parsed (should not be null) |
---|
| 75 | * @param nodes vector containing GraphNode objects (should be empty) |
---|
| 76 | * @param edges vector containing GraphEdge objects (should be empty) |
---|
| 77 | */ |
---|
| 78 | public BIFParser(InputStream instream, FastVector nodes, FastVector edges) { |
---|
| 79 | m_nodes = nodes; m_edges = edges; inStream = instream; |
---|
| 80 | } |
---|
| 81 | |
---|
| 82 | |
---|
| 83 | /** |
---|
| 84 | * This method parses the string or the InputStream that we |
---|
| 85 | * passed in through the constructor and builds up the |
---|
| 86 | * m_nodes and m_edges vectors |
---|
| 87 | * @exception Exception if both the inString and inStream are |
---|
| 88 | * null, i.e. no input has been provided |
---|
| 89 | * @exception BIFFormatException if there is format of the |
---|
| 90 | * input is not correct. The format should conform to |
---|
| 91 | * XMLBIF version 0.3 |
---|
| 92 | * @exception NumberFormatException if there is an invalid |
---|
| 93 | * char in the probability table of a node. |
---|
| 94 | * @return returns the name of the graph |
---|
| 95 | */ |
---|
| 96 | public String parse() throws Exception { |
---|
| 97 | Document dc=null; |
---|
| 98 | |
---|
| 99 | javax.xml.parsers.DocumentBuilderFactory dbf = |
---|
| 100 | javax.xml.parsers.DocumentBuilderFactory.newInstance(); |
---|
| 101 | dbf.setIgnoringElementContentWhitespace(true); |
---|
| 102 | javax.xml.parsers.DocumentBuilder db = dbf.newDocumentBuilder(); |
---|
| 103 | |
---|
| 104 | if(inStream!=null) |
---|
| 105 | dc = db.parse(inStream); |
---|
| 106 | else if(inString!=null) |
---|
| 107 | dc = db.parse(new org.xml.sax.InputSource(new StringReader(inString))); |
---|
| 108 | else |
---|
| 109 | { throw new Exception("No input given"); } |
---|
| 110 | |
---|
| 111 | NodeList nl = dc.getElementsByTagName( "NETWORK" ); |
---|
| 112 | |
---|
| 113 | if(nl.getLength()==0) { |
---|
| 114 | throw new BIFFormatException( "NETWORK tag not found" ); |
---|
| 115 | } |
---|
| 116 | |
---|
| 117 | //take only the first network node |
---|
| 118 | NodeList templist = ((Element)nl.item(0)).getElementsByTagName( "NAME" ); |
---|
| 119 | graphName = templist.item(0).getFirstChild().getNodeValue(); |
---|
| 120 | //System.out.println("The name of the network is "+ |
---|
| 121 | //templist.item(0).getFirstChild().getNodeValue()); |
---|
| 122 | |
---|
| 123 | //Get all the variables |
---|
| 124 | nl = dc.getElementsByTagName("VARIABLE"); |
---|
| 125 | for(int i=0; i<nl.getLength(); i++) { |
---|
| 126 | |
---|
| 127 | templist = ((Element)nl.item(i)).getElementsByTagName("NAME"); |
---|
| 128 | if(templist.getLength()>1) |
---|
| 129 | throw new BIFFormatException("More than one name tags found for "+ |
---|
| 130 | "variable no. "+(i+1)); |
---|
| 131 | |
---|
| 132 | String nodename = |
---|
| 133 | ((org.w3c.dom.Node)templist.item(0)).getFirstChild().getNodeValue(); |
---|
| 134 | GraphNode n = new GraphNode( nodename, nodename, GraphNode.NORMAL ); |
---|
| 135 | m_nodes.addElement(n); |
---|
| 136 | //getting nodes position |
---|
| 137 | templist = ((Element)nl.item(i)).getElementsByTagName("PROPERTY"); |
---|
| 138 | for(int j=0; j<templist.getLength(); j++) { |
---|
| 139 | if( ((org.w3c.dom.Node)templist.item(j)).getFirstChild() |
---|
| 140 | .getNodeValue().startsWith("position") ) { |
---|
| 141 | String xy = templist.item(j).getFirstChild().getNodeValue(); |
---|
| 142 | //System.out.println("x: "+ |
---|
| 143 | // xy.substring(xy.indexOf('(')+1, xy.indexOf(','))+ |
---|
| 144 | // " y: "+ |
---|
| 145 | // xy.substring(xy.indexOf(',')+1, xy.indexOf(')')) |
---|
| 146 | // ); |
---|
| 147 | n.x = Integer.parseInt( xy.substring(xy.indexOf('(')+ |
---|
| 148 | 1, xy.indexOf(',')).trim() ); |
---|
| 149 | n.y = Integer.parseInt( xy.substring(xy.indexOf(',')+ |
---|
| 150 | 1, xy.indexOf(')')).trim() ); |
---|
| 151 | break; |
---|
| 152 | } |
---|
| 153 | } |
---|
| 154 | //getting all the outcomes of the node |
---|
| 155 | templist = ((Element)nl.item(i)).getElementsByTagName("OUTCOME"); |
---|
| 156 | n.outcomes = new String[templist.getLength()]; |
---|
| 157 | for(int j=0; j<templist.getLength(); j++) { |
---|
| 158 | n.outcomes[j] = templist.item(j).getFirstChild().getNodeValue(); |
---|
| 159 | //System.out.println("Outcome["+j+"]: "+n.outcomes[j]); |
---|
| 160 | } |
---|
| 161 | } //end for (for variables) |
---|
| 162 | |
---|
| 163 | //Get all the edges and probability tables by getting all the definitions |
---|
| 164 | nl = dc.getElementsByTagName("DEFINITION"); |
---|
| 165 | for(int i=0; i<nl.getLength(); i++) { |
---|
| 166 | |
---|
| 167 | templist = ((Element)nl.item(i)).getElementsByTagName("FOR"); |
---|
| 168 | //the Label of the node the edges are coming into |
---|
| 169 | String nid = templist.item(0).getFirstChild().getNodeValue(); |
---|
| 170 | |
---|
| 171 | //getting the GraphNode object with the above label |
---|
| 172 | GraphNode n = (GraphNode)m_nodes.elementAt(0); |
---|
| 173 | for(int j=1; j<m_nodes.size() && !n.ID.equals(nid); j++) |
---|
| 174 | n = (GraphNode)m_nodes.elementAt(j); |
---|
| 175 | |
---|
| 176 | |
---|
| 177 | templist = ((Element)nl.item(i)).getElementsByTagName("GIVEN"); |
---|
| 178 | int parntOutcomes = 1; //for creating the probability table later on |
---|
| 179 | //creating all the edges coming into the node |
---|
| 180 | for(int j=0; j<templist.getLength(); j++) { |
---|
| 181 | nid = templist.item(j).getFirstChild().getNodeValue(); |
---|
| 182 | |
---|
| 183 | GraphNode n2 = (GraphNode)m_nodes.elementAt(0); |
---|
| 184 | for(int k=1; k<m_nodes.size() && !n2.ID.equals(nid); k++) |
---|
| 185 | n2 = (GraphNode)m_nodes.elementAt(k); |
---|
| 186 | m_edges.addElement( new GraphEdge(m_nodes.indexOf(n2), |
---|
| 187 | m_nodes.indexOf(n), 1) ); |
---|
| 188 | |
---|
| 189 | parntOutcomes *= n2.outcomes.length; |
---|
| 190 | } |
---|
| 191 | |
---|
| 192 | //creating the probability table for the node |
---|
| 193 | templist = ((Element)nl.item(i)).getElementsByTagName("TABLE"); |
---|
| 194 | if(templist.getLength()>1) |
---|
| 195 | throw new BIFFormatException("More than one Probability Table for "+ |
---|
| 196 | n.ID); |
---|
| 197 | |
---|
| 198 | String probs = templist.item(0).getFirstChild().getNodeValue(); |
---|
| 199 | StringTokenizer tk = new StringTokenizer(probs, " \n\t"); |
---|
| 200 | |
---|
| 201 | if(parntOutcomes*n.outcomes.length > tk.countTokens()) |
---|
| 202 | throw new BIFFormatException("Probability Table for "+n.ID+ |
---|
| 203 | " contains more values than it should"); |
---|
| 204 | else if(parntOutcomes*n.outcomes.length < tk.countTokens()) |
---|
| 205 | throw new BIFFormatException("Probability Table for "+n.ID+ |
---|
| 206 | " contains less values than it should"); |
---|
| 207 | else { |
---|
| 208 | n.probs = new double[parntOutcomes][n.outcomes.length]; |
---|
| 209 | for(int r=0; r<parntOutcomes; r++) //row |
---|
| 210 | for(int c=0; c<n.outcomes.length; c++) //column |
---|
| 211 | try { |
---|
| 212 | n.probs[r][c] = Double.parseDouble( tk.nextToken() ); |
---|
| 213 | } |
---|
| 214 | catch(NumberFormatException ne) { throw ne; } |
---|
| 215 | } // end of creating probability table |
---|
| 216 | } //endfor (for edges) |
---|
| 217 | |
---|
| 218 | |
---|
| 219 | //int tmpMatrix[][] = new int[m_nodes.size()][m_nodes.size()]; |
---|
| 220 | //for(int i=0; i<m_edges.size(); i++) |
---|
| 221 | // tmpMatrix[((GraphEdge)m_edges.elementAt(i)).src] |
---|
| 222 | // [((GraphEdge)m_edges.elementAt(i)).dest] = |
---|
| 223 | // ((GraphEdge)m_edges.elementAt(i)).type; |
---|
| 224 | //for(int i=0; i<m_nodes.size(); i++) { |
---|
| 225 | // GraphNode n = (GraphNode)m_nodes.elementAt(i); |
---|
| 226 | // n.edges = tmpMatrix[i]; |
---|
| 227 | //} |
---|
| 228 | |
---|
| 229 | //Adding parents, and those edges to a node which are coming out from it |
---|
| 230 | int tmpEdges[], noOfEdgesOfNode[]=new int[m_nodes.size()]; |
---|
| 231 | int noOfPrntsOfNode[]=new int[m_nodes.size()]; |
---|
| 232 | for(int i=0; i<m_edges.size(); i++) { |
---|
| 233 | GraphEdge e = (GraphEdge)m_edges.elementAt(i); |
---|
| 234 | noOfEdgesOfNode[e.src]++; |
---|
| 235 | noOfPrntsOfNode[e.dest]++; |
---|
| 236 | } |
---|
| 237 | |
---|
| 238 | for(int i=0; i<m_edges.size(); i++) { |
---|
| 239 | GraphEdge e = (GraphEdge)m_edges.elementAt(i); |
---|
| 240 | GraphNode n = (GraphNode)m_nodes.elementAt(e.src); |
---|
| 241 | GraphNode n2 = (GraphNode)m_nodes.elementAt(e.dest); |
---|
| 242 | if(n.edges==null) { |
---|
| 243 | n.edges = new int[noOfEdgesOfNode[e.src]][2]; |
---|
| 244 | for(int k=0; k<n.edges.length; k++) |
---|
| 245 | n.edges[k][0]=-1; |
---|
| 246 | } |
---|
| 247 | if(n2.prnts==null) { |
---|
| 248 | n2.prnts = new int[noOfPrntsOfNode[e.dest]]; |
---|
| 249 | for(int k=0; k<n2.prnts.length; k++) |
---|
| 250 | n2.prnts[k]=-1; |
---|
| 251 | } |
---|
| 252 | |
---|
| 253 | int k=0; |
---|
| 254 | while(n.edges[k][0]!=-1) k++; |
---|
| 255 | n.edges[k][0] = e.dest; |
---|
| 256 | n.edges[k][1] = e.type; |
---|
| 257 | |
---|
| 258 | k=0; |
---|
| 259 | while(n2.prnts[k]!=-1) k++; |
---|
| 260 | n2.prnts[k] = e.src; |
---|
| 261 | } |
---|
| 262 | |
---|
| 263 | //processGraph(); |
---|
| 264 | //setAppropriateSize(); |
---|
| 265 | return graphName; |
---|
| 266 | } //end readBIF |
---|
| 267 | |
---|
| 268 | |
---|
| 269 | /** |
---|
| 270 | * This method writes a graph in XMLBIF ver. 0.3 format to a file. |
---|
| 271 | * However, if is reloaded in GraphVisualizer we would need to layout |
---|
| 272 | * the graph again to display it correctly. |
---|
| 273 | * |
---|
| 274 | * @param filename The name of the file to write in. (will overwrite) |
---|
| 275 | * @param graphName The name of the graph. (will be the name of network |
---|
| 276 | * tag in XMLBIF) |
---|
| 277 | * @param nodes Vector containing all the nodes |
---|
| 278 | * @param edges Vector containing all the edges |
---|
| 279 | */ |
---|
| 280 | public static void writeXMLBIF03(String filename, String graphName, |
---|
| 281 | FastVector nodes, FastVector edges) { |
---|
| 282 | try { |
---|
| 283 | FileWriter outfile = new FileWriter(filename); |
---|
| 284 | |
---|
| 285 | StringBuffer text = new StringBuffer(); |
---|
| 286 | |
---|
| 287 | text.append("<?xml version=\"1.0\"?>\n"); |
---|
| 288 | text.append("<!-- DTD for the XMLBIF 0.3 format -->\n"); |
---|
| 289 | text.append("<!DOCTYPE BIF [\n"); |
---|
| 290 | text.append(" <!ELEMENT BIF ( NETWORK )*>\n"); |
---|
| 291 | text.append(" <!ATTLIST BIF VERSION CDATA #REQUIRED>\n"); |
---|
| 292 | text.append(" <!ELEMENT NETWORK ( NAME, ( PROPERTY | VARIABLE | DEFI"+ |
---|
| 293 | "NITION )* )>\n"); |
---|
| 294 | text.append(" <!ELEMENT NAME (#PCDATA)>\n"); |
---|
| 295 | text.append(" <!ELEMENT VARIABLE ( NAME, ( OUTCOME | PROPERTY )* )"+ |
---|
| 296 | " >\n"); |
---|
| 297 | text.append(" <!ATTLIST VARIABLE TYPE (nature|decision|utility"+ |
---|
| 298 | ") \"nature\">\n"); |
---|
| 299 | text.append(" <!ELEMENT OUTCOME (#PCDATA)>\n"); |
---|
| 300 | text.append(" <!ELEMENT DEFINITION ( FOR | GIVEN | TABLE | PROPERTY"+ |
---|
| 301 | " )* >\n"); |
---|
| 302 | text.append(" <!ELEMENT FOR (#PCDATA)>\n"); |
---|
| 303 | text.append(" <!ELEMENT GIVEN (#PCDATA)>\n"); |
---|
| 304 | text.append(" <!ELEMENT TABLE (#PCDATA)>\n"); |
---|
| 305 | text.append(" <!ELEMENT PROPERTY (#PCDATA)>\n"); |
---|
| 306 | text.append("]>\n"); |
---|
| 307 | text.append("\n"); |
---|
| 308 | text.append("\n"); |
---|
| 309 | text.append("<BIF VERSION=\"0.3\">\n"); |
---|
| 310 | text.append("<NETWORK>\n"); |
---|
| 311 | text.append("<NAME>" + XMLNormalize(graphName) + "</NAME>\n"); |
---|
| 312 | |
---|
| 313 | //Writing all the node names and their outcomes |
---|
| 314 | //If outcome is null(ie if the graph was loaded from DOT file) then |
---|
| 315 | //simply write TRUE |
---|
| 316 | for(int nodeidx=0; nodeidx<nodes.size(); nodeidx++) { |
---|
| 317 | GraphNode n = (GraphNode)nodes.elementAt(nodeidx); |
---|
| 318 | if(n.nodeType!=GraphNode.NORMAL) |
---|
| 319 | continue; |
---|
| 320 | |
---|
| 321 | text.append("<VARIABLE TYPE=\"nature\">\n"); |
---|
| 322 | text.append("\t<NAME>" + XMLNormalize(n.ID) + "</NAME>\n"); |
---|
| 323 | |
---|
| 324 | if(n.outcomes!=null) |
---|
| 325 | for(int outidx=0; outidx<n.outcomes.length; outidx++) |
---|
| 326 | text.append("\t<OUTCOME>" + XMLNormalize(n.outcomes[outidx])+ |
---|
| 327 | "</OUTCOME>\n"); |
---|
| 328 | else |
---|
| 329 | text.append("\t<OUTCOME>true</OUTCOME>\n"); |
---|
| 330 | |
---|
| 331 | text.append("\t<PROPERTY>position = ("+n.x+","+n.y+")</PROPERTY>\n"); |
---|
| 332 | text.append("</VARIABLE>\n"); |
---|
| 333 | } |
---|
| 334 | |
---|
| 335 | //Writing all the nodes definitions and their probability tables |
---|
| 336 | //If probability table is null then simply write 1 for all |
---|
| 337 | //the posible outcomes of the parents |
---|
| 338 | for (int nodeidx=0; nodeidx<nodes.size(); nodeidx++) { |
---|
| 339 | GraphNode n = (GraphNode) nodes.elementAt(nodeidx); |
---|
| 340 | if(n.nodeType!=GraphNode.NORMAL) |
---|
| 341 | continue; |
---|
| 342 | |
---|
| 343 | text.append("<DEFINITION>\n"); |
---|
| 344 | text.append("<FOR>" + XMLNormalize(n.ID) + "</FOR>\n"); |
---|
| 345 | int parntOutcomes = 1; |
---|
| 346 | if(n.prnts!=null) |
---|
| 347 | for(int pidx=0; pidx<n.prnts.length; pidx++) { |
---|
| 348 | GraphNode prnt = (GraphNode)nodes.elementAt(n.prnts[pidx]); |
---|
| 349 | text.append("\t<GIVEN>" + XMLNormalize(prnt.ID) + "</GIVEN>\n"); |
---|
| 350 | if(prnt.outcomes!=null) |
---|
| 351 | parntOutcomes *= prnt.outcomes.length; |
---|
| 352 | } |
---|
| 353 | |
---|
| 354 | text.append("<TABLE>\n"); |
---|
| 355 | for(int i=0; i<parntOutcomes; i++) { |
---|
| 356 | if(n.outcomes!=null) |
---|
| 357 | for(int outidx=0; outidx<n.outcomes.length; outidx++){ |
---|
| 358 | text.append(n.probs[i][outidx]+" "); |
---|
| 359 | } |
---|
| 360 | else |
---|
| 361 | text.append("1"); |
---|
| 362 | text.append('\n'); |
---|
| 363 | } |
---|
| 364 | text.append("</TABLE>\n"); |
---|
| 365 | text.append("</DEFINITION>\n"); |
---|
| 366 | } |
---|
| 367 | |
---|
| 368 | text.append("</NETWORK>\n"); |
---|
| 369 | text.append("</BIF>\n"); |
---|
| 370 | |
---|
| 371 | outfile.write(text.toString()); |
---|
| 372 | outfile.close(); |
---|
| 373 | } |
---|
| 374 | catch(IOException ex) { ex.printStackTrace(); } |
---|
| 375 | } // writeXMLBIF |
---|
| 376 | |
---|
| 377 | /** XMLNormalize converts the five standard XML entities in a string |
---|
| 378 | * g.e. the string V&D's is returned as V&D's |
---|
| 379 | * @author Remco Bouckaert (rrb@xm.co.nz) |
---|
| 380 | * @param sStr string to normalize |
---|
| 381 | * @return normalized string |
---|
| 382 | */ |
---|
| 383 | private static String XMLNormalize(String sStr) { |
---|
| 384 | StringBuffer sStr2 = new StringBuffer(); |
---|
| 385 | for (int iStr = 0; iStr < sStr.length(); iStr++) { |
---|
| 386 | char c = sStr.charAt(iStr); |
---|
| 387 | switch (c) { |
---|
| 388 | case '&': sStr2.append("&"); break; |
---|
| 389 | case '\'': sStr2.append("'"); break; |
---|
| 390 | case '\"': sStr2.append("""); break; |
---|
| 391 | case '<': sStr2.append("<"); break; |
---|
| 392 | case '>': sStr2.append(">"); break; |
---|
| 393 | default: |
---|
| 394 | sStr2.append(c); |
---|
| 395 | } |
---|
| 396 | } |
---|
| 397 | return sStr2.toString(); |
---|
| 398 | } // XMLNormalize |
---|
| 399 | |
---|
| 400 | |
---|
| 401 | } // BIFParser |
---|