Context Navigation

K2.java

Last change on this file was 29, checked in by gnappo, 15 years ago
Taggata versione per la demo e aggiunto branch.
File size: 11.9 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* K2.java
19	* Copyright (C) 2001 University of Waikato, Hamilton, New Zealand
20	*
21	*/
22	package weka.classifiers.bayes.net.search.global;
23
24	import java.util.Enumeration;
25	import java.util.Vector;
26	import java.util.Random;
27
28	import weka.classifiers.bayes.BayesNet;
29	import weka.core.Instances;
30	import weka.core.Option;
31	import weka.core.RevisionUtils;
32	import weka.core.TechnicalInformation;
33	import weka.core.TechnicalInformation.Type;
34	import weka.core.TechnicalInformation.Field;
35	import weka.core.TechnicalInformationHandler;
36	import weka.core.Utils;
37
38	/**
39	<!-- globalinfo-start -->
40	* This Bayes Network learning algorithm uses a hill climbing algorithm restricted by an order on the variables.<br/>
41	* <br/>
42	* For more information see:<br/>
43	* <br/>
44	* G.F. Cooper, E. Herskovits (1990). A Bayesian method for constructing Bayesian belief networks from databases.<br/>
45	* <br/>
46	* G. Cooper, E. Herskovits (1992). A Bayesian method for the induction of probabilistic networks from data. Machine Learning. 9(4):309-347.<br/>
47	* <br/>
48	* Works with nominal variables and no missing values only.
49	* <p/>
50	<!-- globalinfo-end -->
51	*
52	<!-- technical-bibtex-start -->
53	* BibTeX:
54	* <pre>
55	* @proceedings{Cooper1990,
56	* author = {G.F. Cooper and E. Herskovits},
57	* booktitle = {Proceedings of the Conference on Uncertainty in AI},
58	* pages = {86-94},
59	* title = {A Bayesian method for constructing Bayesian belief networks from databases},
60	* year = {1990}
61	* }
62	*
63	* @article{Cooper1992,
64	* author = {G. Cooper and E. Herskovits},
65	* journal = {Machine Learning},
66	* number = {4},
67	* pages = {309-347},
68	* title = {A Bayesian method for the induction of probabilistic networks from data},
69	* volume = {9},
70	* year = {1992}
71	* }
72	* </pre>
73	* <p/>
74	<!-- technical-bibtex-end -->
75	*
76	<!-- options-start -->
77	* Valid options are: <p/>
78	*
79	* <pre> -N
80	* Initial structure is empty (instead of Naive Bayes)</pre>
81	*
82	* <pre> -P <nr of parents>
83	* Maximum number of parents</pre>
84	*
85	* <pre> -R
86	* Random order.
87	* (default false)</pre>
88	*
89	* <pre> -mbc
90	* Applies a Markov Blanket correction to the network structure,
91	* after a network structure is learned. This ensures that all
92	* nodes in the network are part of the Markov blanket of the
93	* classifier node.</pre>
94	*
95	* <pre> -S [LOO-CV\|k-Fold-CV\|Cumulative-CV]
96	* Score type (LOO-CV,k-Fold-CV,Cumulative-CV)</pre>
97	*
98	* <pre> -Q
99	* Use probabilistic or 0/1 scoring.
100	* (default probabilistic scoring)</pre>
101	*
102	<!-- options-end -->
103	*
104	* @author Remco Bouckaert (rrb@xm.co.nz)
105	* @version $Revision: 1.8 $
106	*/
107	public class K2
108	extends GlobalScoreSearchAlgorithm
109	implements TechnicalInformationHandler {
110
111	/** for serialization */
112	static final long serialVersionUID = -6626871067466338256L;
113
114	/ Holds flag to indicate ordering should be random /
115	boolean m_bRandomOrder = false;
116
117	/**
118	* Returns an instance of a TechnicalInformation object, containing
119	* detailed information about the technical background of this class,
120	* e.g., paper reference or book this class is based on.
121	*
122	* @return the technical information about this class
123	*/
124	public TechnicalInformation getTechnicalInformation() {
125	TechnicalInformation result;
126	TechnicalInformation additional;
127
128	result = new TechnicalInformation(Type.PROCEEDINGS);
129	result.setValue(Field.AUTHOR, "G.F. Cooper and E. Herskovits");
130	result.setValue(Field.YEAR, "1990");
131	result.setValue(Field.TITLE, "A Bayesian method for constructing Bayesian belief networks from databases");
132	result.setValue(Field.BOOKTITLE, "Proceedings of the Conference on Uncertainty in AI");
133	result.setValue(Field.PAGES, "86-94");
134
135	additional = result.add(Type.ARTICLE);
136	additional.setValue(Field.AUTHOR, "G. Cooper and E. Herskovits");
137	additional.setValue(Field.YEAR, "1992");
138	additional.setValue(Field.TITLE, "A Bayesian method for the induction of probabilistic networks from data");
139	additional.setValue(Field.JOURNAL, "Machine Learning");
140	additional.setValue(Field.VOLUME, "9");
141	additional.setValue(Field.NUMBER, "4");
142	additional.setValue(Field.PAGES, "309-347");
143
144	return result;
145	}
146
147	/**
148	* search determines the network structure/graph of the network
149	* with the K2 algorithm, restricted by its initial structure (which can
150	* be an empty graph, or a Naive Bayes graph.
151	*
152	* @param bayesNet the network
153	* @param instances the data to work with
154	* @throws Exception if something goes wrong
155	*/
156	public void search (BayesNet bayesNet, Instances instances) throws Exception {
157	int nOrder[] = new int [instances.numAttributes()];
158	nOrder[0] = instances.classIndex();
159
160	int nAttribute = 0;
161
162	for (int iOrder = 1; iOrder < instances.numAttributes(); iOrder++) {
163	if (nAttribute == instances.classIndex()) {
164	nAttribute++;
165	}
166	nOrder[iOrder] = nAttribute++;
167	}
168
169	if (m_bRandomOrder) {
170	// generate random ordering (if required)
171	Random random = new Random();
172	int iClass;
173	if (getInitAsNaiveBayes()) {
174	iClass = 0;
175	} else {
176	iClass = -1;
177	}
178	for (int iOrder = 0; iOrder < instances.numAttributes(); iOrder++) {
179	int iOrder2 = Math.abs(random.nextInt()) % instances.numAttributes();
180	if (iOrder != iClass && iOrder2 != iClass) {
181	int nTmp = nOrder[iOrder];
182	nOrder[iOrder] = nOrder[iOrder2];
183	nOrder[iOrder2] = nTmp;
184	}
185	}
186	}
187
188	// determine base scores
189	double fBaseScore = calcScore(bayesNet);
190
191	// K2 algorithm: greedy search restricted by ordering
192	for (int iOrder = 1; iOrder < instances.numAttributes(); iOrder++) {
193	int iAttribute = nOrder[iOrder];
194	double fBestScore = fBaseScore;
195
196	boolean bProgress = (bayesNet.getParentSet(iAttribute).getNrOfParents() < getMaxNrOfParents());
197	while (bProgress && (bayesNet.getParentSet(iAttribute).getNrOfParents() < getMaxNrOfParents())) {
198	int nBestAttribute = -1;
199	for (int iOrder2 = 0; iOrder2 < iOrder; iOrder2++) {
200	int iAttribute2 = nOrder[iOrder2];
201	double fScore = calcScoreWithExtraParent(iAttribute, iAttribute2);
202	if (fScore > fBestScore) {
203	fBestScore = fScore;
204	nBestAttribute = iAttribute2;
205	}
206	}
207	if (nBestAttribute != -1) {
208	bayesNet.getParentSet(iAttribute).addParent(nBestAttribute, instances);
209	fBaseScore = fBestScore;
210	bProgress = true;
211	} else {
212	bProgress = false;
213	}
214	}
215	}
216	} // search
217
218	/**
219	* Sets the max number of parents
220	*
221	* @param nMaxNrOfParents the max number of parents
222	*/
223	public void setMaxNrOfParents(int nMaxNrOfParents) {
224	m_nMaxNrOfParents = nMaxNrOfParents;
225	}
226
227	/**
228	* Gets the max number of parents.
229	*
230	* @return the max number of parents
231	*/
232	public int getMaxNrOfParents() {
233	return m_nMaxNrOfParents;
234	}
235
236	/**
237	* Sets whether to init as naive bayes
238	*
239	* @param bInitAsNaiveBayes whether to init as naive bayes
240	*/
241	public void setInitAsNaiveBayes(boolean bInitAsNaiveBayes) {
242	m_bInitAsNaiveBayes = bInitAsNaiveBayes;
243	}
244
245	/**
246	* Gets whether to init as naive bayes
247	*
248	* @return whether to init as naive bayes
249	*/
250	public boolean getInitAsNaiveBayes() {
251	return m_bInitAsNaiveBayes;
252	}
253
254	/**
255	* Set random order flag
256	*
257	* @param bRandomOrder the random order flag
258	*/
259	public void setRandomOrder(boolean bRandomOrder) {
260	m_bRandomOrder = bRandomOrder;
261	} // SetRandomOrder
262
263	/**
264	* Get random order flag
265	*
266	* @return the random order flag
267	*/
268	public boolean getRandomOrder() {
269	return m_bRandomOrder;
270	} // getRandomOrder
271
272	/**
273	* Returns an enumeration describing the available options.
274	*
275	* @return an enumeration of all the available options.
276	*/
277	public Enumeration listOptions() {
278	Vector newVector = new Vector(0);
279
280	newVector.addElement(new Option("\tInitial structure is empty (instead of Naive Bayes)",
281	"N", 0, "-N"));
282
283	newVector.addElement(new Option("\tMaximum number of parents", "P", 1,
284	"-P <nr of parents>"));
285
286	newVector.addElement(new Option(
287	"\tRandom order.\n"
288	+ "\t(default false)",
289	"R", 0, "-R"));
290
291	Enumeration enu = super.listOptions();
292	while (enu.hasMoreElements()) {
293	newVector.addElement(enu.nextElement());
294	}
295	return newVector.elements();
296	}
297
298	/**
299	* Parses a given list of options. <p/>
300	*
301	<!-- options-start -->
302	* Valid options are: <p/>
303	*
304	* <pre> -N
305	* Initial structure is empty (instead of Naive Bayes)</pre>
306	*
307	* <pre> -P <nr of parents>
308	* Maximum number of parents</pre>
309	*
310	* <pre> -R
311	* Random order.
312	* (default false)</pre>
313	*
314	* <pre> -mbc
315	* Applies a Markov Blanket correction to the network structure,
316	* after a network structure is learned. This ensures that all
317	* nodes in the network are part of the Markov blanket of the
318	* classifier node.</pre>
319	*
320	* <pre> -S [LOO-CV\|k-Fold-CV\|Cumulative-CV]
321	* Score type (LOO-CV,k-Fold-CV,Cumulative-CV)</pre>
322	*
323	* <pre> -Q
324	* Use probabilistic or 0/1 scoring.
325	* (default probabilistic scoring)</pre>
326	*
327	<!-- options-end -->
328	*
329	* @param options the list of options as an array of strings
330	* @throws Exception if an option is not supported
331	*/
332	public void setOptions(String[] options) throws Exception {
333
334	setRandomOrder(Utils.getFlag('R', options));
335
336	m_bInitAsNaiveBayes = !(Utils.getFlag('N', options));
337
338	String sMaxNrOfParents = Utils.getOption('P', options);
339
340	if (sMaxNrOfParents.length() != 0) {
341	setMaxNrOfParents(Integer.parseInt(sMaxNrOfParents));
342	} else {
343	setMaxNrOfParents(100000);
344	}
345	super.setOptions(options);
346	}
347
348	/**
349	* Gets the current settings of the search algorithm.
350	*
351	* @return an array of strings suitable for passing to setOptions
352	*/
353	public String [] getOptions() {
354	String[] superOptions = super.getOptions();
355	String[] options = new String[4 + superOptions.length];
356	int current = 0;
357	options[current++] = "-P";
358	options[current++] = "" + m_nMaxNrOfParents;
359	if (!m_bInitAsNaiveBayes) {
360	options[current++] = "-N";
361	}
362	if (getRandomOrder()) {
363	options[current++] = "-R";
364	}
365	// insert options from parent class
366	for (int iOption = 0; iOption < superOptions.length; iOption++) {
367	options[current++] = superOptions[iOption];
368	}
369	// Fill up rest with empty strings, not nulls!
370	while (current < options.length) {
371	options[current++] = "";
372	}
373	// Fill up rest with empty strings, not nulls!
374	return options;
375	}
376
377	/**
378	* @return a string to describe the RandomOrder option.
379	*/
380	public String randomOrderTipText() {
381	return "When set to true, the order of the nodes in the network is random." +
382	" Default random order is false and the order" +
383	" of the nodes in the dataset is used." +
384	" In any case, when the network was initialized as Naive Bayes Network, the" +
385	" class variable is first in the ordering though.";
386	} // randomOrderTipText
387
388	/**
389	* This will return a string describing the search algorithm.
390	* @return The string.
391	*/
392	public String globalInfo() {
393	return
394	"This Bayes Network learning algorithm uses a hill climbing algorithm "
395	+ "restricted by an order on the variables.\n\n"
396	+ "For more information see:\n\n"
397	+ getTechnicalInformation().toString() + "\n\n"
398	+ "Works with nominal variables and no missing values only.";
399	}
400
401	/**
402	* Returns the revision string.
403	*
404	* @return the revision
405	*/
406	public String getRevision() {
407	return RevisionUtils.extract("$Revision: 1.8 $");
408	}
409	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: tags/MetisMQIDemo/src/main/java/weka/classifiers/bayes/net/search/global/K2.java

Download in other formats: