Context Navigation

source: src/main/java/weka/classifiers/meta/END.java @ 14

Last change on this file since 14 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 10.8 KB

Rev	Line
[4]	1	/*
	2	* This program is free software; you can redistribute it and/or modify
	3	* it under the terms of the GNU General Public License as published by
	4	* the Free Software Foundation; either version 2 of the License, or
	5	* (at your option) any later version.
	6	*
	7	* This program is distributed in the hope that it will be useful,
	8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	* GNU General Public License for more details.
	11	*
	12	* You should have received a copy of the GNU General Public License
	13	* along with this program; if not, write to the Free Software
	14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	15	*/
	16
	17	/*
	18	* END.java
	19	* Copyright (C) 2004-2005 University of Waikato, Hamilton, New Zealand
	20	*
	21	*/
	22
	23	package weka.classifiers.meta;
	24
	25	import weka.classifiers.Classifier;
	26	import weka.classifiers.AbstractClassifier;
	27	import weka.classifiers.RandomizableIteratedSingleClassifierEnhancer;
	28	import weka.core.Capabilities;
	29	import weka.core.Instance;
	30	import weka.core.Instances;
	31	import weka.core.Randomizable;
	32	import weka.core.RevisionUtils;
	33	import weka.core.TechnicalInformation;
	34	import weka.core.TechnicalInformationHandler;
	35	import weka.core.Utils;
	36	import weka.core.TechnicalInformation.Field;
	37	import weka.core.TechnicalInformation.Type;
	38
	39	import java.util.Hashtable;
	40	import java.util.Random;
	41
	42	/**
	43	<!-- globalinfo-start -->
	44	* A meta classifier for handling multi-class datasets with 2-class classifiers by building an ensemble of nested dichotomies.<br/>
	45	* <br/>
	46	* For more info, check<br/>
	47	* <br/>
	48	* Lin Dong, Eibe Frank, Stefan Kramer: Ensembles of Balanced Nested Dichotomies for Multi-class Problems. In: PKDD, 84-95, 2005.<br/>
	49	* <br/>
	50	* Eibe Frank, Stefan Kramer: Ensembles of nested dichotomies for multi-class problems. In: Twenty-first International Conference on Machine Learning, 2004.
	51	* <p/>
	52	<!-- globalinfo-end -->
	53	*
	54	<!-- technical-bibtex-start -->
	55	* BibTeX:
	56	* <pre>
	57	* @inproceedings{Dong2005,
	58	* author = {Lin Dong and Eibe Frank and Stefan Kramer},
	59	* booktitle = {PKDD},
	60	* pages = {84-95},
	61	* publisher = {Springer},
	62	* title = {Ensembles of Balanced Nested Dichotomies for Multi-class Problems},
	63	* year = {2005}
	64	* }
	65	*
	66	* @inproceedings{Frank2004,
	67	* author = {Eibe Frank and Stefan Kramer},
	68	* booktitle = {Twenty-first International Conference on Machine Learning},
	69	* publisher = {ACM},
	70	* title = {Ensembles of nested dichotomies for multi-class problems},
	71	* year = {2004}
	72	* }
	73	* </pre>
	74	* <p/>
	75	<!-- technical-bibtex-end -->
	76	*
	77	<!-- options-start -->
	78	* Valid options are: <p/>
	79	*
	80	* <pre> -S <num>
	81	* Random number seed.
	82	* (default 1)</pre>
	83	*
	84	* <pre> -I <num>
	85	* Number of iterations.
	86	* (default 10)</pre>
	87	*
	88	* <pre> -D
	89	* If set, classifier is run in debug mode and
	90	* may output additional info to the console</pre>
	91	*
	92	* <pre> -W
	93	* Full name of base classifier.
	94	* (default: weka.classifiers.meta.nestedDichotomies.ND)</pre>
	95	*
	96	* <pre>
	97	* Options specific to classifier weka.classifiers.meta.nestedDichotomies.ND:
	98	* </pre>
	99	*
	100	* <pre> -S <num>
	101	* Random number seed.
	102	* (default 1)</pre>
	103	*
	104	* <pre> -D
	105	* If set, classifier is run in debug mode and
	106	* may output additional info to the console</pre>
	107	*
	108	* <pre> -W
	109	* Full name of base classifier.
	110	* (default: weka.classifiers.trees.J48)</pre>
	111	*
	112	* <pre>
	113	* Options specific to classifier weka.classifiers.trees.J48:
	114	* </pre>
	115	*
	116	* <pre> -U
	117	* Use unpruned tree.</pre>
	118	*
	119	* <pre> -C <pruning confidence>
	120	* Set confidence threshold for pruning.
	121	* (default 0.25)</pre>
	122	*
	123	* <pre> -M <minimum number of instances>
	124	* Set minimum number of instances per leaf.
	125	* (default 2)</pre>
	126	*
	127	* <pre> -R
	128	* Use reduced error pruning.</pre>
	129	*
	130	* <pre> -N <number of folds>
	131	* Set number of folds for reduced error
	132	* pruning. One fold is used as pruning set.
	133	* (default 3)</pre>
	134	*
	135	* <pre> -B
	136	* Use binary splits only.</pre>
	137	*
	138	* <pre> -S
	139	* Don't perform subtree raising.</pre>
	140	*
	141	* <pre> -L
	142	* Do not clean up after the tree has been built.</pre>
	143	*
	144	* <pre> -A
	145	* Laplace smoothing for predicted probabilities.</pre>
	146	*
	147	* <pre> -Q <seed>
	148	* Seed for random data shuffling (default 1).</pre>
	149	*
	150	<!-- options-end -->
	151	*
	152	* Options after -- are passed to the designated classifier.<p>
	153	*
	154	* @author Eibe Frank
	155	* @author Lin Dong
	156	* @version $Revision: 5928 $
	157	*/
	158	public class END
	159	extends RandomizableIteratedSingleClassifierEnhancer
	160	implements TechnicalInformationHandler {
	161
	162	/** for serialization */
	163	static final long serialVersionUID = -4143242362912214956L;
	164
	165	/**
	166	* The hashtable containing the classifiers for the END.
	167	*/
	168	protected Hashtable m_hashtable = null;
	169
	170	/**
	171	* Constructor.
	172	*/
	173	public END() {
	174
	175	m_Classifier = new weka.classifiers.meta.nestedDichotomies.ND();
	176	}
	177
	178	/**
	179	* String describing default classifier.
	180	*
	181	* @return the default classifier classname
	182	*/
	183	protected String defaultClassifierString() {
	184
	185	return "weka.classifiers.meta.nestedDichotomies.ND";
	186	}
	187
	188	/**
	189	* Returns a string describing classifier
	190	* @return a description suitable for
	191	* displaying in the explorer/experimenter gui
	192	*/
	193	public String globalInfo() {
	194
	195	return "A meta classifier for handling multi-class datasets with 2-class "
	196	+ "classifiers by building an ensemble of nested dichotomies.\n\n"
	197	+ "For more info, check\n\n"
	198	+ getTechnicalInformation().toString();
	199	}
	200
	201	/**
	202	* Returns an instance of a TechnicalInformation object, containing
	203	* detailed information about the technical background of this class,
	204	* e.g., paper reference or book this class is based on.
	205	*
	206	* @return the technical information about this class
	207	*/
	208	public TechnicalInformation getTechnicalInformation() {
	209	TechnicalInformation result;
	210	TechnicalInformation additional;
	211
	212	result = new TechnicalInformation(Type.INPROCEEDINGS);
	213	result.setValue(Field.AUTHOR, "Lin Dong and Eibe Frank and Stefan Kramer");
	214	result.setValue(Field.TITLE, "Ensembles of Balanced Nested Dichotomies for Multi-class Problems");
	215	result.setValue(Field.BOOKTITLE, "PKDD");
	216	result.setValue(Field.YEAR, "2005");
	217	result.setValue(Field.PAGES, "84-95");
	218	result.setValue(Field.PUBLISHER, "Springer");
	219
	220	additional = result.add(Type.INPROCEEDINGS);
	221	additional.setValue(Field.AUTHOR, "Eibe Frank and Stefan Kramer");
	222	additional.setValue(Field.TITLE, "Ensembles of nested dichotomies for multi-class problems");
	223	additional.setValue(Field.BOOKTITLE, "Twenty-first International Conference on Machine Learning");
	224	additional.setValue(Field.YEAR, "2004");
	225	additional.setValue(Field.PUBLISHER, "ACM");
	226
	227	return result;
	228	}
	229
	230	/**
	231	* Returns default capabilities of the classifier.
	232	*
	233	* @return the capabilities of this classifier
	234	*/
	235	public Capabilities getCapabilities() {
	236	Capabilities result = super.getCapabilities();
	237
	238	// instances
	239	result.setMinimumNumberInstances(1); // at least 1 for the RandomNumberGenerator!
	240
	241	return result;
	242	}
	243
	244	/**
	245	* Builds the committee of randomizable classifiers.
	246	*
	247	* @param data the training data to be used for generating the
	248	* bagged classifier.
	249	* @throws Exception if the classifier could not be built successfully
	250	*/
	251	public void buildClassifier(Instances data) throws Exception {
	252
	253	// can classifier handle the data?
	254	getCapabilities().testWithFail(data);
	255
	256	// remove instances with missing class
	257	data = new Instances(data);
	258	data.deleteWithMissingClass();
	259
	260	if (!(m_Classifier instanceof weka.classifiers.meta.nestedDichotomies.ND) &&
	261	!(m_Classifier instanceof weka.classifiers.meta.nestedDichotomies.ClassBalancedND) &&
	262	!(m_Classifier instanceof weka.classifiers.meta.nestedDichotomies.DataNearBalancedND)) {
	263	throw new IllegalArgumentException("END only works with ND, ClassBalancedND " +
	264	"or DataNearBalancedND classifier");
	265	}
	266
	267	m_hashtable = new Hashtable();
	268
	269	m_Classifiers = AbstractClassifier.makeCopies(m_Classifier, m_NumIterations);
	270
	271	Random random = data.getRandomNumberGenerator(m_Seed);
	272	for (int j = 0; j < m_Classifiers.length; j++) {
	273
	274	// Set the random number seed for the current classifier.
	275	((Randomizable) m_Classifiers[j]).setSeed(random.nextInt());
	276
	277	// Set the hashtable
	278	if (m_Classifier instanceof weka.classifiers.meta.nestedDichotomies.ND)
	279	((weka.classifiers.meta.nestedDichotomies.ND)m_Classifiers[j]).setHashtable(m_hashtable);
	280	else if (m_Classifier instanceof weka.classifiers.meta.nestedDichotomies.ClassBalancedND)
	281	((weka.classifiers.meta.nestedDichotomies.ClassBalancedND)m_Classifiers[j]).setHashtable(m_hashtable);
	282	else if (m_Classifier instanceof weka.classifiers.meta.nestedDichotomies.DataNearBalancedND)
	283	((weka.classifiers.meta.nestedDichotomies.DataNearBalancedND)m_Classifiers[j]).
	284	setHashtable(m_hashtable);
	285
	286	// Build the classifier.
	287	m_Classifiers[j].buildClassifier(data);
	288	}
	289	}
	290
	291	/**
	292	* Calculates the class membership probabilities for the given test
	293	* instance.
	294	*
	295	* @param instance the instance to be classified
	296	* @return preedicted class probability distribution
	297	* @throws Exception if distribution can't be computed successfully
	298	*/
	299	public double[] distributionForInstance(Instance instance) throws Exception {
	300
	301	double [] sums = new double [instance.numClasses()], newProbs;
	302
	303	for (int i = 0; i < m_NumIterations; i++) {
	304	if (instance.classAttribute().isNumeric() == true) {
	305	sums[0] += m_Classifiers[i].classifyInstance(instance);
	306	} else {
	307	newProbs = m_Classifiers[i].distributionForInstance(instance);
	308	for (int j = 0; j < newProbs.length; j++)
	309	sums[j] += newProbs[j];
	310	}
	311	}
	312	if (instance.classAttribute().isNumeric() == true) {
	313	sums[0] /= (double)m_NumIterations;
	314	return sums;
	315	} else if (Utils.eq(Utils.sum(sums), 0)) {
	316	return sums;
	317	} else {
	318	Utils.normalize(sums);
	319	return sums;
	320	}
	321	}
	322
	323	/**
	324	* Returns description of the committee.
	325	*
	326	* @return description of the committee as a string
	327	*/
	328	public String toString() {
	329
	330	if (m_Classifiers == null) {
	331	return "END: No model built yet.";
	332	}
	333	StringBuffer text = new StringBuffer();
	334	text.append("All the base classifiers: \n\n");
	335	for (int i = 0; i < m_Classifiers.length; i++)
	336	text.append(m_Classifiers[i].toString() + "\n\n");
	337
	338	return text.toString();
	339	}
	340
	341	/**
	342	* Returns the revision string.
	343	*
	344	* @return the revision
	345	*/
	346	public String getRevision() {
	347	return RevisionUtils.extract("$Revision: 5928 $");
	348	}
	349
	350	/**
	351	* Main method for testing this class.
	352	*
	353	* @param argv the options
	354	*/
	355	public static void main(String [] argv) {
	356	runClassifier(new END(), argv);
	357	}
	358	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: