1 | /* |
---|
2 | * This program is free software; you can redistribute it and/or modify |
---|
3 | * it under the terms of the GNU General Public License as published by |
---|
4 | * the Free Software Foundation; either version 2 of the License, or |
---|
5 | * (at your option) any later version. |
---|
6 | * |
---|
7 | * This program is distributed in the hope that it will be useful, |
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | * GNU General Public License for more details. |
---|
11 | * |
---|
12 | * You should have received a copy of the GNU General Public License |
---|
13 | * along with this program; if not, write to the Free Software |
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
15 | */ |
---|
16 | |
---|
17 | /* |
---|
18 | * BIFParser.java |
---|
19 | * Copyright (C) 2003 University of Waikato, Hamilton, New Zealand |
---|
20 | * |
---|
21 | */ |
---|
22 | package weka.gui.graphvisualizer; |
---|
23 | |
---|
24 | import java.io.InputStream; |
---|
25 | import java.io.FileWriter; |
---|
26 | import java.io.IOException; |
---|
27 | import java.io.StringReader; |
---|
28 | import java.util.StringTokenizer; |
---|
29 | |
---|
30 | import weka.core.FastVector; |
---|
31 | |
---|
32 | import org.w3c.dom.Document; |
---|
33 | import org.w3c.dom.NodeList; |
---|
34 | import org.w3c.dom.Element; |
---|
35 | |
---|
36 | |
---|
37 | /** |
---|
38 | * This class parses an inputstream or a string in |
---|
39 | * XMLBIF ver. 0.3 format, and builds the datastructures |
---|
40 | * that are passed to it through the constructor. |
---|
41 | * |
---|
42 | * @author Ashraf M. Kibriya (amk14@cs.waikato.ac.nz) |
---|
43 | * @version $Revision: 1.7 $ - 24 Apr 2003 - Initial version (Ashraf M. Kibriya) |
---|
44 | */ |
---|
45 | public class BIFParser implements GraphConstants { |
---|
46 | |
---|
47 | /** These holds the nodes and edges of the graph */ |
---|
48 | protected FastVector m_nodes, m_edges; |
---|
49 | /** This holds the name of the graph (i.e. the name of network tag in XMLBIF |
---|
50 | * input) |
---|
51 | */ |
---|
52 | protected String graphName; |
---|
53 | /** This holds the string to be parsed */ |
---|
54 | protected String inString; |
---|
55 | /** This holds the InputStream to be parsed */ |
---|
56 | protected InputStream inStream; |
---|
57 | |
---|
58 | |
---|
59 | /** |
---|
60 | * Constructor (if our input is a String) |
---|
61 | * |
---|
62 | * @param input the string to be parsed (should not be null) |
---|
63 | * @param nodes vector containing GraphNode objects (should be empty) |
---|
64 | * @param edges vector containing GraphEdge objects (should be empty) |
---|
65 | */ |
---|
66 | public BIFParser(String input, FastVector nodes, FastVector edges) { |
---|
67 | m_nodes = nodes; m_edges = edges; inString = input; |
---|
68 | } |
---|
69 | |
---|
70 | |
---|
71 | /** |
---|
72 | * Constructor (if our input is an InputStream) |
---|
73 | * |
---|
74 | * @param instream the InputStream to be parsed (should not be null) |
---|
75 | * @param nodes vector containing GraphNode objects (should be empty) |
---|
76 | * @param edges vector containing GraphEdge objects (should be empty) |
---|
77 | */ |
---|
78 | public BIFParser(InputStream instream, FastVector nodes, FastVector edges) { |
---|
79 | m_nodes = nodes; m_edges = edges; inStream = instream; |
---|
80 | } |
---|
81 | |
---|
82 | |
---|
83 | /** |
---|
84 | * This method parses the string or the InputStream that we |
---|
85 | * passed in through the constructor and builds up the |
---|
86 | * m_nodes and m_edges vectors |
---|
87 | * @exception Exception if both the inString and inStream are |
---|
88 | * null, i.e. no input has been provided |
---|
89 | * @exception BIFFormatException if there is format of the |
---|
90 | * input is not correct. The format should conform to |
---|
91 | * XMLBIF version 0.3 |
---|
92 | * @exception NumberFormatException if there is an invalid |
---|
93 | * char in the probability table of a node. |
---|
94 | * @return returns the name of the graph |
---|
95 | */ |
---|
96 | public String parse() throws Exception { |
---|
97 | Document dc=null; |
---|
98 | |
---|
99 | javax.xml.parsers.DocumentBuilderFactory dbf = |
---|
100 | javax.xml.parsers.DocumentBuilderFactory.newInstance(); |
---|
101 | dbf.setIgnoringElementContentWhitespace(true); |
---|
102 | javax.xml.parsers.DocumentBuilder db = dbf.newDocumentBuilder(); |
---|
103 | |
---|
104 | if(inStream!=null) |
---|
105 | dc = db.parse(inStream); |
---|
106 | else if(inString!=null) |
---|
107 | dc = db.parse(new org.xml.sax.InputSource(new StringReader(inString))); |
---|
108 | else |
---|
109 | { throw new Exception("No input given"); } |
---|
110 | |
---|
111 | NodeList nl = dc.getElementsByTagName( "NETWORK" ); |
---|
112 | |
---|
113 | if(nl.getLength()==0) { |
---|
114 | throw new BIFFormatException( "NETWORK tag not found" ); |
---|
115 | } |
---|
116 | |
---|
117 | //take only the first network node |
---|
118 | NodeList templist = ((Element)nl.item(0)).getElementsByTagName( "NAME" ); |
---|
119 | graphName = templist.item(0).getFirstChild().getNodeValue(); |
---|
120 | //System.out.println("The name of the network is "+ |
---|
121 | //templist.item(0).getFirstChild().getNodeValue()); |
---|
122 | |
---|
123 | //Get all the variables |
---|
124 | nl = dc.getElementsByTagName("VARIABLE"); |
---|
125 | for(int i=0; i<nl.getLength(); i++) { |
---|
126 | |
---|
127 | templist = ((Element)nl.item(i)).getElementsByTagName("NAME"); |
---|
128 | if(templist.getLength()>1) |
---|
129 | throw new BIFFormatException("More than one name tags found for "+ |
---|
130 | "variable no. "+(i+1)); |
---|
131 | |
---|
132 | String nodename = |
---|
133 | ((org.w3c.dom.Node)templist.item(0)).getFirstChild().getNodeValue(); |
---|
134 | GraphNode n = new GraphNode( nodename, nodename, GraphNode.NORMAL ); |
---|
135 | m_nodes.addElement(n); |
---|
136 | //getting nodes position |
---|
137 | templist = ((Element)nl.item(i)).getElementsByTagName("PROPERTY"); |
---|
138 | for(int j=0; j<templist.getLength(); j++) { |
---|
139 | if( ((org.w3c.dom.Node)templist.item(j)).getFirstChild() |
---|
140 | .getNodeValue().startsWith("position") ) { |
---|
141 | String xy = templist.item(j).getFirstChild().getNodeValue(); |
---|
142 | //System.out.println("x: "+ |
---|
143 | // xy.substring(xy.indexOf('(')+1, xy.indexOf(','))+ |
---|
144 | // " y: "+ |
---|
145 | // xy.substring(xy.indexOf(',')+1, xy.indexOf(')')) |
---|
146 | // ); |
---|
147 | n.x = Integer.parseInt( xy.substring(xy.indexOf('(')+ |
---|
148 | 1, xy.indexOf(',')).trim() ); |
---|
149 | n.y = Integer.parseInt( xy.substring(xy.indexOf(',')+ |
---|
150 | 1, xy.indexOf(')')).trim() ); |
---|
151 | break; |
---|
152 | } |
---|
153 | } |
---|
154 | //getting all the outcomes of the node |
---|
155 | templist = ((Element)nl.item(i)).getElementsByTagName("OUTCOME"); |
---|
156 | n.outcomes = new String[templist.getLength()]; |
---|
157 | for(int j=0; j<templist.getLength(); j++) { |
---|
158 | n.outcomes[j] = templist.item(j).getFirstChild().getNodeValue(); |
---|
159 | //System.out.println("Outcome["+j+"]: "+n.outcomes[j]); |
---|
160 | } |
---|
161 | } //end for (for variables) |
---|
162 | |
---|
163 | //Get all the edges and probability tables by getting all the definitions |
---|
164 | nl = dc.getElementsByTagName("DEFINITION"); |
---|
165 | for(int i=0; i<nl.getLength(); i++) { |
---|
166 | |
---|
167 | templist = ((Element)nl.item(i)).getElementsByTagName("FOR"); |
---|
168 | //the Label of the node the edges are coming into |
---|
169 | String nid = templist.item(0).getFirstChild().getNodeValue(); |
---|
170 | |
---|
171 | //getting the GraphNode object with the above label |
---|
172 | GraphNode n = (GraphNode)m_nodes.elementAt(0); |
---|
173 | for(int j=1; j<m_nodes.size() && !n.ID.equals(nid); j++) |
---|
174 | n = (GraphNode)m_nodes.elementAt(j); |
---|
175 | |
---|
176 | |
---|
177 | templist = ((Element)nl.item(i)).getElementsByTagName("GIVEN"); |
---|
178 | int parntOutcomes = 1; //for creating the probability table later on |
---|
179 | //creating all the edges coming into the node |
---|
180 | for(int j=0; j<templist.getLength(); j++) { |
---|
181 | nid = templist.item(j).getFirstChild().getNodeValue(); |
---|
182 | |
---|
183 | GraphNode n2 = (GraphNode)m_nodes.elementAt(0); |
---|
184 | for(int k=1; k<m_nodes.size() && !n2.ID.equals(nid); k++) |
---|
185 | n2 = (GraphNode)m_nodes.elementAt(k); |
---|
186 | m_edges.addElement( new GraphEdge(m_nodes.indexOf(n2), |
---|
187 | m_nodes.indexOf(n), 1) ); |
---|
188 | |
---|
189 | parntOutcomes *= n2.outcomes.length; |
---|
190 | } |
---|
191 | |
---|
192 | //creating the probability table for the node |
---|
193 | templist = ((Element)nl.item(i)).getElementsByTagName("TABLE"); |
---|
194 | if(templist.getLength()>1) |
---|
195 | throw new BIFFormatException("More than one Probability Table for "+ |
---|
196 | n.ID); |
---|
197 | |
---|
198 | String probs = templist.item(0).getFirstChild().getNodeValue(); |
---|
199 | StringTokenizer tk = new StringTokenizer(probs, " \n\t"); |
---|
200 | |
---|
201 | if(parntOutcomes*n.outcomes.length > tk.countTokens()) |
---|
202 | throw new BIFFormatException("Probability Table for "+n.ID+ |
---|
203 | " contains more values than it should"); |
---|
204 | else if(parntOutcomes*n.outcomes.length < tk.countTokens()) |
---|
205 | throw new BIFFormatException("Probability Table for "+n.ID+ |
---|
206 | " contains less values than it should"); |
---|
207 | else { |
---|
208 | n.probs = new double[parntOutcomes][n.outcomes.length]; |
---|
209 | for(int r=0; r<parntOutcomes; r++) //row |
---|
210 | for(int c=0; c<n.outcomes.length; c++) //column |
---|
211 | try { |
---|
212 | n.probs[r][c] = Double.parseDouble( tk.nextToken() ); |
---|
213 | } |
---|
214 | catch(NumberFormatException ne) { throw ne; } |
---|
215 | } // end of creating probability table |
---|
216 | } //endfor (for edges) |
---|
217 | |
---|
218 | |
---|
219 | //int tmpMatrix[][] = new int[m_nodes.size()][m_nodes.size()]; |
---|
220 | //for(int i=0; i<m_edges.size(); i++) |
---|
221 | // tmpMatrix[((GraphEdge)m_edges.elementAt(i)).src] |
---|
222 | // [((GraphEdge)m_edges.elementAt(i)).dest] = |
---|
223 | // ((GraphEdge)m_edges.elementAt(i)).type; |
---|
224 | //for(int i=0; i<m_nodes.size(); i++) { |
---|
225 | // GraphNode n = (GraphNode)m_nodes.elementAt(i); |
---|
226 | // n.edges = tmpMatrix[i]; |
---|
227 | //} |
---|
228 | |
---|
229 | //Adding parents, and those edges to a node which are coming out from it |
---|
230 | int tmpEdges[], noOfEdgesOfNode[]=new int[m_nodes.size()]; |
---|
231 | int noOfPrntsOfNode[]=new int[m_nodes.size()]; |
---|
232 | for(int i=0; i<m_edges.size(); i++) { |
---|
233 | GraphEdge e = (GraphEdge)m_edges.elementAt(i); |
---|
234 | noOfEdgesOfNode[e.src]++; |
---|
235 | noOfPrntsOfNode[e.dest]++; |
---|
236 | } |
---|
237 | |
---|
238 | for(int i=0; i<m_edges.size(); i++) { |
---|
239 | GraphEdge e = (GraphEdge)m_edges.elementAt(i); |
---|
240 | GraphNode n = (GraphNode)m_nodes.elementAt(e.src); |
---|
241 | GraphNode n2 = (GraphNode)m_nodes.elementAt(e.dest); |
---|
242 | if(n.edges==null) { |
---|
243 | n.edges = new int[noOfEdgesOfNode[e.src]][2]; |
---|
244 | for(int k=0; k<n.edges.length; k++) |
---|
245 | n.edges[k][0]=-1; |
---|
246 | } |
---|
247 | if(n2.prnts==null) { |
---|
248 | n2.prnts = new int[noOfPrntsOfNode[e.dest]]; |
---|
249 | for(int k=0; k<n2.prnts.length; k++) |
---|
250 | n2.prnts[k]=-1; |
---|
251 | } |
---|
252 | |
---|
253 | int k=0; |
---|
254 | while(n.edges[k][0]!=-1) k++; |
---|
255 | n.edges[k][0] = e.dest; |
---|
256 | n.edges[k][1] = e.type; |
---|
257 | |
---|
258 | k=0; |
---|
259 | while(n2.prnts[k]!=-1) k++; |
---|
260 | n2.prnts[k] = e.src; |
---|
261 | } |
---|
262 | |
---|
263 | //processGraph(); |
---|
264 | //setAppropriateSize(); |
---|
265 | return graphName; |
---|
266 | } //end readBIF |
---|
267 | |
---|
268 | |
---|
269 | /** |
---|
270 | * This method writes a graph in XMLBIF ver. 0.3 format to a file. |
---|
271 | * However, if is reloaded in GraphVisualizer we would need to layout |
---|
272 | * the graph again to display it correctly. |
---|
273 | * |
---|
274 | * @param filename The name of the file to write in. (will overwrite) |
---|
275 | * @param graphName The name of the graph. (will be the name of network |
---|
276 | * tag in XMLBIF) |
---|
277 | * @param nodes Vector containing all the nodes |
---|
278 | * @param edges Vector containing all the edges |
---|
279 | */ |
---|
280 | public static void writeXMLBIF03(String filename, String graphName, |
---|
281 | FastVector nodes, FastVector edges) { |
---|
282 | try { |
---|
283 | FileWriter outfile = new FileWriter(filename); |
---|
284 | |
---|
285 | StringBuffer text = new StringBuffer(); |
---|
286 | |
---|
287 | text.append("<?xml version=\"1.0\"?>\n"); |
---|
288 | text.append("<!-- DTD for the XMLBIF 0.3 format -->\n"); |
---|
289 | text.append("<!DOCTYPE BIF [\n"); |
---|
290 | text.append(" <!ELEMENT BIF ( NETWORK )*>\n"); |
---|
291 | text.append(" <!ATTLIST BIF VERSION CDATA #REQUIRED>\n"); |
---|
292 | text.append(" <!ELEMENT NETWORK ( NAME, ( PROPERTY | VARIABLE | DEFI"+ |
---|
293 | "NITION )* )>\n"); |
---|
294 | text.append(" <!ELEMENT NAME (#PCDATA)>\n"); |
---|
295 | text.append(" <!ELEMENT VARIABLE ( NAME, ( OUTCOME | PROPERTY )* )"+ |
---|
296 | " >\n"); |
---|
297 | text.append(" <!ATTLIST VARIABLE TYPE (nature|decision|utility"+ |
---|
298 | ") \"nature\">\n"); |
---|
299 | text.append(" <!ELEMENT OUTCOME (#PCDATA)>\n"); |
---|
300 | text.append(" <!ELEMENT DEFINITION ( FOR | GIVEN | TABLE | PROPERTY"+ |
---|
301 | " )* >\n"); |
---|
302 | text.append(" <!ELEMENT FOR (#PCDATA)>\n"); |
---|
303 | text.append(" <!ELEMENT GIVEN (#PCDATA)>\n"); |
---|
304 | text.append(" <!ELEMENT TABLE (#PCDATA)>\n"); |
---|
305 | text.append(" <!ELEMENT PROPERTY (#PCDATA)>\n"); |
---|
306 | text.append("]>\n"); |
---|
307 | text.append("\n"); |
---|
308 | text.append("\n"); |
---|
309 | text.append("<BIF VERSION=\"0.3\">\n"); |
---|
310 | text.append("<NETWORK>\n"); |
---|
311 | text.append("<NAME>" + XMLNormalize(graphName) + "</NAME>\n"); |
---|
312 | |
---|
313 | //Writing all the node names and their outcomes |
---|
314 | //If outcome is null(ie if the graph was loaded from DOT file) then |
---|
315 | //simply write TRUE |
---|
316 | for(int nodeidx=0; nodeidx<nodes.size(); nodeidx++) { |
---|
317 | GraphNode n = (GraphNode)nodes.elementAt(nodeidx); |
---|
318 | if(n.nodeType!=GraphNode.NORMAL) |
---|
319 | continue; |
---|
320 | |
---|
321 | text.append("<VARIABLE TYPE=\"nature\">\n"); |
---|
322 | text.append("\t<NAME>" + XMLNormalize(n.ID) + "</NAME>\n"); |
---|
323 | |
---|
324 | if(n.outcomes!=null) |
---|
325 | for(int outidx=0; outidx<n.outcomes.length; outidx++) |
---|
326 | text.append("\t<OUTCOME>" + XMLNormalize(n.outcomes[outidx])+ |
---|
327 | "</OUTCOME>\n"); |
---|
328 | else |
---|
329 | text.append("\t<OUTCOME>true</OUTCOME>\n"); |
---|
330 | |
---|
331 | text.append("\t<PROPERTY>position = ("+n.x+","+n.y+")</PROPERTY>\n"); |
---|
332 | text.append("</VARIABLE>\n"); |
---|
333 | } |
---|
334 | |
---|
335 | //Writing all the nodes definitions and their probability tables |
---|
336 | //If probability table is null then simply write 1 for all |
---|
337 | //the posible outcomes of the parents |
---|
338 | for (int nodeidx=0; nodeidx<nodes.size(); nodeidx++) { |
---|
339 | GraphNode n = (GraphNode) nodes.elementAt(nodeidx); |
---|
340 | if(n.nodeType!=GraphNode.NORMAL) |
---|
341 | continue; |
---|
342 | |
---|
343 | text.append("<DEFINITION>\n"); |
---|
344 | text.append("<FOR>" + XMLNormalize(n.ID) + "</FOR>\n"); |
---|
345 | int parntOutcomes = 1; |
---|
346 | if(n.prnts!=null) |
---|
347 | for(int pidx=0; pidx<n.prnts.length; pidx++) { |
---|
348 | GraphNode prnt = (GraphNode)nodes.elementAt(n.prnts[pidx]); |
---|
349 | text.append("\t<GIVEN>" + XMLNormalize(prnt.ID) + "</GIVEN>\n"); |
---|
350 | if(prnt.outcomes!=null) |
---|
351 | parntOutcomes *= prnt.outcomes.length; |
---|
352 | } |
---|
353 | |
---|
354 | text.append("<TABLE>\n"); |
---|
355 | for(int i=0; i<parntOutcomes; i++) { |
---|
356 | if(n.outcomes!=null) |
---|
357 | for(int outidx=0; outidx<n.outcomes.length; outidx++){ |
---|
358 | text.append(n.probs[i][outidx]+" "); |
---|
359 | } |
---|
360 | else |
---|
361 | text.append("1"); |
---|
362 | text.append('\n'); |
---|
363 | } |
---|
364 | text.append("</TABLE>\n"); |
---|
365 | text.append("</DEFINITION>\n"); |
---|
366 | } |
---|
367 | |
---|
368 | text.append("</NETWORK>\n"); |
---|
369 | text.append("</BIF>\n"); |
---|
370 | |
---|
371 | outfile.write(text.toString()); |
---|
372 | outfile.close(); |
---|
373 | } |
---|
374 | catch(IOException ex) { ex.printStackTrace(); } |
---|
375 | } // writeXMLBIF |
---|
376 | |
---|
377 | /** XMLNormalize converts the five standard XML entities in a string |
---|
378 | * g.e. the string V&D's is returned as V&D's |
---|
379 | * @author Remco Bouckaert (rrb@xm.co.nz) |
---|
380 | * @param sStr string to normalize |
---|
381 | * @return normalized string |
---|
382 | */ |
---|
383 | private static String XMLNormalize(String sStr) { |
---|
384 | StringBuffer sStr2 = new StringBuffer(); |
---|
385 | for (int iStr = 0; iStr < sStr.length(); iStr++) { |
---|
386 | char c = sStr.charAt(iStr); |
---|
387 | switch (c) { |
---|
388 | case '&': sStr2.append("&"); break; |
---|
389 | case '\'': sStr2.append("'"); break; |
---|
390 | case '\"': sStr2.append("""); break; |
---|
391 | case '<': sStr2.append("<"); break; |
---|
392 | case '>': sStr2.append(">"); break; |
---|
393 | default: |
---|
394 | sStr2.append(c); |
---|
395 | } |
---|
396 | } |
---|
397 | return sStr2.toString(); |
---|
398 | } // XMLNormalize |
---|
399 | |
---|
400 | |
---|
401 | } // BIFParser |
---|