Context Navigation

source: src/main/java/weka/classifiers/meta/nestedDichotomies/ClassBalancedND.java @ 9

Last change on this file since 9 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 16.0 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* ClassBalancedND.java
19	* Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
20	*
21	*/
22
23	package weka.classifiers.meta.nestedDichotomies;
24
25	import weka.classifiers.Classifier;
26	import weka.classifiers.AbstractClassifier;
27	import weka.classifiers.RandomizableSingleClassifierEnhancer;
28	import weka.classifiers.meta.FilteredClassifier;
29	import weka.core.Capabilities;
30	import weka.core.Instance;
31	import weka.core.Instances;
32	import weka.core.Range;
33	import weka.core.RevisionUtils;
34	import weka.core.TechnicalInformation;
35	import weka.core.TechnicalInformationHandler;
36	import weka.core.Utils;
37	import weka.core.Capabilities.Capability;
38	import weka.core.TechnicalInformation.Field;
39	import weka.core.TechnicalInformation.Type;
40	import weka.filters.Filter;
41	import weka.filters.unsupervised.attribute.MakeIndicator;
42	import weka.filters.unsupervised.instance.RemoveWithValues;
43
44	import java.util.Hashtable;
45	import java.util.Random;
46
47	/**
48	<!-- globalinfo-start -->
49	* A meta classifier for handling multi-class datasets with 2-class classifiers by building a random class-balanced tree structure.<br/>
50	* <br/>
51	* For more info, check<br/>
52	* <br/>
53	* Lin Dong, Eibe Frank, Stefan Kramer: Ensembles of Balanced Nested Dichotomies for Multi-class Problems. In: PKDD, 84-95, 2005.<br/>
54	* <br/>
55	* Eibe Frank, Stefan Kramer: Ensembles of nested dichotomies for multi-class problems. In: Twenty-first International Conference on Machine Learning, 2004.
56	* <p/>
57	<!-- globalinfo-end -->
58	*
59	<!-- technical-bibtex-start -->
60	* BibTeX:
61	* <pre>
62	* @inproceedings{Dong2005,
63	* author = {Lin Dong and Eibe Frank and Stefan Kramer},
64	* booktitle = {PKDD},
65	* pages = {84-95},
66	* publisher = {Springer},
67	* title = {Ensembles of Balanced Nested Dichotomies for Multi-class Problems},
68	* year = {2005}
69	* }
70	*
71	* @inproceedings{Frank2004,
72	* author = {Eibe Frank and Stefan Kramer},
73	* booktitle = {Twenty-first International Conference on Machine Learning},
74	* publisher = {ACM},
75	* title = {Ensembles of nested dichotomies for multi-class problems},
76	* year = {2004}
77	* }
78	* </pre>
79	* <p/>
80	<!-- technical-bibtex-end -->
81	*
82	<!-- options-start -->
83	* Valid options are: <p/>
84	*
85	* <pre> -S <num>
86	* Random number seed.
87	* (default 1)</pre>
88	*
89	* <pre> -D
90	* If set, classifier is run in debug mode and
91	* may output additional info to the console</pre>
92	*
93	* <pre> -W
94	* Full name of base classifier.
95	* (default: weka.classifiers.trees.J48)</pre>
96	*
97	* <pre>
98	* Options specific to classifier weka.classifiers.trees.J48:
99	* </pre>
100	*
101	* <pre> -U
102	* Use unpruned tree.</pre>
103	*
104	* <pre> -C <pruning confidence>
105	* Set confidence threshold for pruning.
106	* (default 0.25)</pre>
107	*
108	* <pre> -M <minimum number of instances>
109	* Set minimum number of instances per leaf.
110	* (default 2)</pre>
111	*
112	* <pre> -R
113	* Use reduced error pruning.</pre>
114	*
115	* <pre> -N <number of folds>
116	* Set number of folds for reduced error
117	* pruning. One fold is used as pruning set.
118	* (default 3)</pre>
119	*
120	* <pre> -B
121	* Use binary splits only.</pre>
122	*
123	* <pre> -S
124	* Don't perform subtree raising.</pre>
125	*
126	* <pre> -L
127	* Do not clean up after the tree has been built.</pre>
128	*
129	* <pre> -A
130	* Laplace smoothing for predicted probabilities.</pre>
131	*
132	* <pre> -Q <seed>
133	* Seed for random data shuffling (default 1).</pre>
134	*
135	<!-- options-end -->
136	*
137	* @author Lin Dong
138	* @author Eibe Frank
139	*/
140	public class ClassBalancedND
141	extends RandomizableSingleClassifierEnhancer
142	implements TechnicalInformationHandler {
143
144	/** for serialization */
145	static final long serialVersionUID = 5944063630650811903L;
146
147	/** The filtered classifier in which the base classifier is wrapped. */
148	protected FilteredClassifier m_FilteredClassifier;
149
150	/** The hashtable for this node. */
151	protected Hashtable m_classifiers;
152
153	/** The first successor */
154	protected ClassBalancedND m_FirstSuccessor = null;
155
156	/** The second successor */
157	protected ClassBalancedND m_SecondSuccessor = null;
158
159	/** The classes that are grouped together at the current node */
160	protected Range m_Range = null;
161
162	/** Is Hashtable given from END? */
163	protected boolean m_hashtablegiven = false;
164
165	/**
166	* Constructor.
167	*/
168	public ClassBalancedND() {
169
170	m_Classifier = new weka.classifiers.trees.J48();
171	}
172
173	/**
174	* String describing default classifier.
175	*
176	* @return the default classifier classname
177	*/
178	protected String defaultClassifierString() {
179
180	return "weka.classifiers.trees.J48";
181	}
182
183	/**
184	* Returns an instance of a TechnicalInformation object, containing
185	* detailed information about the technical background of this class,
186	* e.g., paper reference or book this class is based on.
187	*
188	* @return the technical information about this class
189	*/
190	public TechnicalInformation getTechnicalInformation() {
191	TechnicalInformation result;
192	TechnicalInformation additional;
193
194	result = new TechnicalInformation(Type.INPROCEEDINGS);
195	result.setValue(Field.AUTHOR, "Lin Dong and Eibe Frank and Stefan Kramer");
196	result.setValue(Field.TITLE, "Ensembles of Balanced Nested Dichotomies for Multi-class Problems");
197	result.setValue(Field.BOOKTITLE, "PKDD");
198	result.setValue(Field.YEAR, "2005");
199	result.setValue(Field.PAGES, "84-95");
200	result.setValue(Field.PUBLISHER, "Springer");
201
202	additional = result.add(Type.INPROCEEDINGS);
203	additional.setValue(Field.AUTHOR, "Eibe Frank and Stefan Kramer");
204	additional.setValue(Field.TITLE, "Ensembles of nested dichotomies for multi-class problems");
205	additional.setValue(Field.BOOKTITLE, "Twenty-first International Conference on Machine Learning");
206	additional.setValue(Field.YEAR, "2004");
207	additional.setValue(Field.PUBLISHER, "ACM");
208
209	return result;
210	}
211
212	/**
213	* Set hashtable from END.
214	*
215	* @param table the hashtable to use
216	*/
217	public void setHashtable(Hashtable table) {
218
219	m_hashtablegiven = true;
220	m_classifiers = table;
221	}
222
223	/**
224	* Generates a classifier for the current node and proceeds recursively.
225	*
226	* @param data contains the (multi-class) instances
227	* @param classes contains the indices of the classes that are present
228	* @param rand the random number generator to use
229	* @param classifier the classifier to use
230	* @param table the Hashtable to use
231	* @throws Exception if anything goes worng
232	*/
233	private void generateClassifierForNode(Instances data, Range classes,
234	Random rand, Classifier classifier, Hashtable table)
235	throws Exception {
236
237	// Get the indices
238	int[] indices = classes.getSelection();
239
240	// Randomize the order of the indices
241	for (int j = indices.length - 1; j > 0; j--) {
242	int randPos = rand.nextInt(j + 1);
243	int temp = indices[randPos];
244	indices[randPos] = indices[j];
245	indices[j] = temp;
246	}
247
248	// Pick the classes for the current split
249	int first = indices.length / 2;
250	int second = indices.length - first;
251	int[] firstInds = new int[first];
252	int[] secondInds = new int[second];
253	System.arraycopy(indices, 0, firstInds, 0, first);
254	System.arraycopy(indices, first, secondInds, 0, second);
255
256	// Sort the indices (important for hash key)!
257	int[] sortedFirst = Utils.sort(firstInds);
258	int[] sortedSecond = Utils.sort(secondInds);
259	int[] firstCopy = new int[first];
260	int[] secondCopy = new int[second];
261	for (int i = 0; i < sortedFirst.length; i++) {
262	firstCopy[i] = firstInds[sortedFirst[i]];
263	}
264	firstInds = firstCopy;
265	for (int i = 0; i < sortedSecond.length; i++) {
266	secondCopy[i] = secondInds[sortedSecond[i]];
267	}
268	secondInds = secondCopy;
269
270	// Unify indices to improve hashing
271	if (firstInds[0] > secondInds[0]) {
272	int[] help = secondInds;
273	secondInds = firstInds;
274	firstInds = help;
275	int help2 = second;
276	second = first;
277	first = help2;
278	}
279
280	m_Range = new Range(Range.indicesToRangeList(firstInds));
281	m_Range.setUpper(data.numClasses() - 1);
282
283	Range secondRange = new Range(Range.indicesToRangeList(secondInds));
284	secondRange.setUpper(data.numClasses() - 1);
285
286	// Change the class labels and build the classifier
287	MakeIndicator filter = new MakeIndicator();
288	filter.setAttributeIndex("" + (data.classIndex() + 1));
289	filter.setValueIndices(m_Range.getRanges());
290	filter.setNumeric(false);
291	filter.setInputFormat(data);
292	m_FilteredClassifier = new FilteredClassifier();
293	if (data.numInstances() > 0) {
294	m_FilteredClassifier.setClassifier(AbstractClassifier.makeCopies(classifier, 1)[0]);
295	} else {
296	m_FilteredClassifier.setClassifier(new weka.classifiers.rules.ZeroR());
297	}
298	m_FilteredClassifier.setFilter(filter);
299
300	// Save reference to hash table at current node
301	m_classifiers=table;
302
303	if (!m_classifiers.containsKey( getString(firstInds) + "\|" + getString(secondInds))) {
304	m_FilteredClassifier.buildClassifier(data);
305	m_classifiers.put(getString(firstInds) + "\|" + getString(secondInds), m_FilteredClassifier);
306	} else {
307	m_FilteredClassifier=(FilteredClassifier)m_classifiers.get(getString(firstInds) + "\|" +
308	getString(secondInds));
309	}
310
311	// Create two successors if necessary
312	m_FirstSuccessor = new ClassBalancedND();
313	if (first == 1) {
314	m_FirstSuccessor.m_Range = m_Range;
315	} else {
316	RemoveWithValues rwv = new RemoveWithValues();
317	rwv.setInvertSelection(true);
318	rwv.setNominalIndices(m_Range.getRanges());
319	rwv.setAttributeIndex("" + (data.classIndex() + 1));
320	rwv.setInputFormat(data);
321	Instances firstSubset = Filter.useFilter(data, rwv);
322	m_FirstSuccessor.generateClassifierForNode(firstSubset, m_Range,
323	rand, classifier, m_classifiers);
324	}
325	m_SecondSuccessor = new ClassBalancedND();
326	if (second == 1) {
327	m_SecondSuccessor.m_Range = secondRange;
328	} else {
329	RemoveWithValues rwv = new RemoveWithValues();
330	rwv.setInvertSelection(true);
331	rwv.setNominalIndices(secondRange.getRanges());
332	rwv.setAttributeIndex("" + (data.classIndex() + 1));
333	rwv.setInputFormat(data);
334	Instances secondSubset = Filter.useFilter(data, rwv);
335	m_SecondSuccessor = new ClassBalancedND();
336
337	m_SecondSuccessor.generateClassifierForNode(secondSubset, secondRange,
338	rand, classifier, m_classifiers);
339	}
340	}
341
342	/**
343	* Returns default capabilities of the classifier.
344	*
345	* @return the capabilities of this classifier
346	*/
347	public Capabilities getCapabilities() {
348	Capabilities result = super.getCapabilities();
349
350	// class
351	result.disableAllClasses();
352	result.enable(Capability.NOMINAL_CLASS);
353	result.enable(Capability.MISSING_CLASS_VALUES);
354
355	// instances
356	result.setMinimumNumberInstances(1);
357
358	return result;
359	}
360
361	/**
362	* Builds tree recursively.
363	*
364	* @param data contains the (multi-class) instances
365	* @throws Exception if the building fails
366	*/
367	public void buildClassifier(Instances data) throws Exception {
368
369	// can classifier handle the data?
370	getCapabilities().testWithFail(data);
371
372	// remove instances with missing class
373	data = new Instances(data);
374	data.deleteWithMissingClass();
375
376	Random random = data.getRandomNumberGenerator(m_Seed);
377
378	if (!m_hashtablegiven) {
379	m_classifiers = new Hashtable();
380	}
381
382	// Check which classes are present in the
383	// data and construct initial list of classes
384	boolean[] present = new boolean[data.numClasses()];
385	for (int i = 0; i < data.numInstances(); i++) {
386	present[(int)data.instance(i).classValue()] = true;
387	}
388	StringBuffer list = new StringBuffer();
389	for (int i = 0; i < present.length; i++) {
390	if (present[i]) {
391	if (list.length() > 0) {
392	list.append(",");
393	}
394	list.append(i + 1);
395	}
396	}
397
398	Range newRange = new Range(list.toString());
399	newRange.setUpper(data.numClasses() - 1);
400
401	generateClassifierForNode(data, newRange, random, m_Classifier, m_classifiers);
402	}
403
404	/**
405	* Predicts the class distribution for a given instance
406	*
407	* @param inst the (multi-class) instance to be classified
408	* @return the class distribution
409	* @throws Exception if computing fails
410	*/
411	public double[] distributionForInstance(Instance inst) throws Exception {
412
413	double[] newDist = new double[inst.numClasses()];
414	if (m_FirstSuccessor == null) {
415	for (int i = 0; i < inst.numClasses(); i++) {
416	if (m_Range.isInRange(i)) {
417	newDist[i] = 1;
418	}
419	}
420	return newDist;
421	} else {
422	double[] firstDist = m_FirstSuccessor.distributionForInstance(inst);
423	double[] secondDist = m_SecondSuccessor.distributionForInstance(inst);
424	double[] dist = m_FilteredClassifier.distributionForInstance(inst);
425	for (int i = 0; i < inst.numClasses(); i++) {
426	if ((firstDist[i] > 0) && (secondDist[i] > 0)) {
427	System.err.println("Panik!!");
428	}
429	if (m_Range.isInRange(i)) {
430	newDist[i] = dist[1] * firstDist[i];
431	} else {
432	newDist[i] = dist[0] * secondDist[i];
433	}
434	}
435	return newDist;
436	}
437	}
438
439	/**
440	* Returns the list of indices as a string.
441	*
442	* @param indices the indices to return as string
443	* @return the indices as string
444	*/
445	public String getString(int [] indices) {
446
447	StringBuffer string = new StringBuffer();
448	for (int i = 0; i < indices.length; i++) {
449	if (i > 0) {
450	string.append(',');
451	}
452	string.append(indices[i]);
453	}
454	return string.toString();
455	}
456
457	/**
458	* @return a description of the classifier suitable for
459	* displaying in the explorer/experimenter gui
460	*/
461	public String globalInfo() {
462
463	return
464	"A meta classifier for handling multi-class datasets with 2-class "
465	+ "classifiers by building a random class-balanced tree structure.\n\n"
466	+ "For more info, check\n\n"
467	+ getTechnicalInformation().toString();
468	}
469
470	/**
471	* Outputs the classifier as a string.
472	*
473	* @return a string representation of the classifier
474	*/
475	public String toString() {
476
477	if (m_classifiers == null) {
478	return "ClassBalancedND: No model built yet.";
479	}
480	StringBuffer text = new StringBuffer();
481	text.append("ClassBalancedND");
482	treeToString(text, 0);
483
484	return text.toString();
485	}
486
487	/**
488	* Returns string description of the tree.
489	*
490	* @param text the buffer to add the node to
491	* @param nn the node number
492	* @return the next node number
493	*/
494	private int treeToString(StringBuffer text, int nn) {
495
496	nn++;
497	text.append("\n\nNode number: " + nn + "\n\n");
498	if (m_FilteredClassifier != null) {
499	text.append(m_FilteredClassifier);
500	} else {
501	text.append("null");
502	}
503	if (m_FirstSuccessor != null) {
504	nn = m_FirstSuccessor.treeToString(text, nn);
505	nn = m_SecondSuccessor.treeToString(text, nn);
506	}
507	return nn;
508	}
509
510	/**
511	* Returns the revision string.
512	*
513	* @return the revision
514	*/
515	public String getRevision() {
516	return RevisionUtils.extract("$Revision: 5928 $");
517	}
518
519	/**
520	* Main method for testing this class.
521	*
522	* @param argv the options
523	*/
524	public static void main(String [] argv) {
525	runClassifier(new ClassBalancedND(), argv);
526	}
527	}
528

Note: See TracBrowser for help on using the repository browser.

Download in other formats: