Context Navigation

source: src/main/java/weka/classifiers/meta/Dagging.java @ 14

Last change on this file since 14 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 16.4 KB

Rev	Line
[4]	1	/*
	2	* This program is free software; you can redistribute it and/or modify
	3	* it under the terms of the GNU General Public License as published by
	4	* the Free Software Foundation; either version 2 of the License, or
	5	* (at your option) any later version.
	6	*
	7	* This program is distributed in the hope that it will be useful,
	8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	* GNU General Public License for more details.
	11	*
	12	* You should have received a copy of the GNU General Public License
	13	* along with this program; if not, write to the Free Software
	14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	15	*/
	16
	17	/*
	18	* Dagging.java
	19	* Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
	20	*
	21	*/
	22
	23	package weka.classifiers.meta;
	24
	25	import weka.classifiers.Classifier;
	26	import weka.classifiers.AbstractClassifier;
	27	import weka.classifiers.RandomizableSingleClassifierEnhancer;
	28	import weka.core.Instance;
	29	import weka.core.Instances;
	30	import weka.core.Option;
	31	import weka.core.RevisionUtils;
	32	import weka.core.TechnicalInformation;
	33	import weka.core.TechnicalInformationHandler;
	34	import weka.core.Utils;
	35	import weka.core.TechnicalInformation.Field;
	36	import weka.core.TechnicalInformation.Type;
	37
	38	import java.util.Enumeration;
	39	import java.util.Vector;
	40
	41	/**
	42	<!-- globalinfo-start -->
	43	* This meta classifier creates a number of disjoint, stratified folds out of the data and feeds each chunk of data to a copy of the supplied base classifier. Predictions are made via majority vote, since all the generated base classifiers are put into the Vote meta classifier. <br/>
	44	* Useful for base classifiers that are quadratic or worse in time behavior, regarding number of instances in the training data. <br/>
	45	* <br/>
	46	* For more information, see: <br/>
	47	* Ting, K. M., Witten, I. H.: Stacking Bagged and Dagged Models. In: Fourteenth international Conference on Machine Learning, San Francisco, CA, 367-375, 1997.
	48	* <p/>
	49	<!-- globalinfo-end -->
	50	*
	51	<!-- technical-bibtex-start -->
	52	* BibTeX:
	53	* <pre>
	54	* @inproceedings{Ting1997,
	55	* address = {San Francisco, CA},
	56	* author = {Ting, K. M. and Witten, I. H.},
	57	* booktitle = {Fourteenth international Conference on Machine Learning},
	58	* editor = {D. H. Fisher},
	59	* pages = {367-375},
	60	* publisher = {Morgan Kaufmann Publishers},
	61	* title = {Stacking Bagged and Dagged Models},
	62	* year = {1997}
	63	* }
	64	* </pre>
	65	* <p/>
	66	<!-- technical-bibtex-end -->
	67	*
	68	<!-- options-start -->
	69	* Valid options are: <p/>
	70	*
	71	* <pre> -F <folds>
	72	* The number of folds for splitting the training set into
	73	* smaller chunks for the base classifier.
	74	* (default 10)</pre>
	75	*
	76	* <pre> -verbose
	77	* Whether to print some more information during building the
	78	* classifier.
	79	* (default is off)</pre>
	80	*
	81	* <pre> -S <num>
	82	* Random number seed.
	83	* (default 1)</pre>
	84	*
	85	* <pre> -D
	86	* If set, classifier is run in debug mode and
	87	* may output additional info to the console</pre>
	88	*
	89	* <pre> -W
	90	* Full name of base classifier.
	91	* (default: weka.classifiers.functions.SMO)</pre>
	92	*
	93	* <pre>
	94	* Options specific to classifier weka.classifiers.functions.SMO:
	95	* </pre>
	96	*
	97	* <pre> -D
	98	* If set, classifier is run in debug mode and
	99	* may output additional info to the console</pre>
	100	*
	101	* <pre> -no-checks
	102	* Turns off all checks - use with caution!
	103	* Turning them off assumes that data is purely numeric, doesn't
	104	* contain any missing values, and has a nominal class. Turning them
	105	* off also means that no header information will be stored if the
	106	* machine is linear. Finally, it also assumes that no instance has
	107	* a weight equal to 0.
	108	* (default: checks on)</pre>
	109	*
	110	* <pre> -C <double>
	111	* The complexity constant C. (default 1)</pre>
	112	*
	113	* <pre> -N
	114	* Whether to 0=normalize/1=standardize/2=neither. (default 0=normalize)</pre>
	115	*
	116	* <pre> -L <double>
	117	* The tolerance parameter. (default 1.0e-3)</pre>
	118	*
	119	* <pre> -P <double>
	120	* The epsilon for round-off error. (default 1.0e-12)</pre>
	121	*
	122	* <pre> -M
	123	* Fit logistic models to SVM outputs. </pre>
	124	*
	125	* <pre> -V <double>
	126	* The number of folds for the internal
	127	* cross-validation. (default -1, use training data)</pre>
	128	*
	129	* <pre> -W <double>
	130	* The random number seed. (default 1)</pre>
	131	*
	132	* <pre> -K <classname and parameters>
	133	* The Kernel to use.
	134	* (default: weka.classifiers.functions.supportVector.PolyKernel)</pre>
	135	*
	136	* <pre>
	137	* Options specific to kernel weka.classifiers.functions.supportVector.PolyKernel:
	138	* </pre>
	139	*
	140	* <pre> -D
	141	* Enables debugging output (if available) to be printed.
	142	* (default: off)</pre>
	143	*
	144	* <pre> -no-checks
	145	* Turns off all checks - use with caution!
	146	* (default: checks on)</pre>
	147	*
	148	* <pre> -C <num>
	149	* The size of the cache (a prime number), 0 for full cache and
	150	* -1 to turn it off.
	151	* (default: 250007)</pre>
	152	*
	153	* <pre> -E <num>
	154	* The Exponent to use.
	155	* (default: 1.0)</pre>
	156	*
	157	* <pre> -L
	158	* Use lower-order terms.
	159	* (default: no)</pre>
	160	*
	161	<!-- options-end -->
	162	*
	163	* Options after -- are passed to the designated classifier.<p/>
	164	*
	165	* @author Bernhard Pfahringer (bernhard at cs dot waikato dot ac dot nz)
	166	* @author FracPete (fracpete at waikato dot ac dot nz)
	167	* @version $Revision: 5928 $
	168	* @see Vote
	169	*/
	170	public class Dagging
	171	extends RandomizableSingleClassifierEnhancer
	172	implements TechnicalInformationHandler {
	173
	174	/** for serialization */
	175	static final long serialVersionUID = 4560165876570074309L;
	176
	177	/** the number of folds to use to split the training data */
	178	protected int m_NumFolds = 10;
	179
	180	/** the classifier used for voting */
	181	protected Vote m_Vote = null;
	182
	183	/** whether to output some progress information during building */
	184	protected boolean m_Verbose = false;
	185
	186	/**
	187	* Returns a string describing classifier
	188	* @return a description suitable for
	189	* displaying in the explorer/experimenter gui
	190	*/
	191	public String globalInfo() {
	192	return
	193	"This meta classifier creates a number of disjoint, stratified folds out "
	194	+ "of the data and feeds each chunk of data to a copy of the supplied "
	195	+ "base classifier. Predictions are made via averaging, since all the "
	196	+ "generated base classifiers are put into the Vote meta classifier. \n"
	197	+ "Useful for base classifiers that are quadratic or worse in time "
	198	+ "behavior, regarding number of instances in the training data. \n"
	199	+ "\n"
	200	+ "For more information, see: \n"
	201	+ getTechnicalInformation().toString();
	202	}
	203
	204	/**
	205	* Returns an instance of a TechnicalInformation object, containing
	206	* detailed information about the technical background of this class,
	207	* e.g., paper reference or book this class is based on.
	208	*
	209	* @return the technical information about this class
	210	*/
	211	public TechnicalInformation getTechnicalInformation() {
	212	TechnicalInformation result;
	213
	214	result = new TechnicalInformation(Type.INPROCEEDINGS);
	215	result.setValue(Field.AUTHOR, "Ting, K. M. and Witten, I. H.");
	216	result.setValue(Field.TITLE, "Stacking Bagged and Dagged Models");
	217	result.setValue(Field.BOOKTITLE, "Fourteenth international Conference on Machine Learning");
	218	result.setValue(Field.EDITOR, "D. H. Fisher");
	219	result.setValue(Field.YEAR, "1997");
	220	result.setValue(Field.PAGES, "367-375");
	221	result.setValue(Field.PUBLISHER, "Morgan Kaufmann Publishers");
	222	result.setValue(Field.ADDRESS, "San Francisco, CA");
	223
	224	return result;
	225	}
	226
	227	/**
	228	* Constructor.
	229	*/
	230	public Dagging() {
	231	m_Classifier = new weka.classifiers.functions.SMO();
	232	}
	233
	234	/**
	235	* String describing default classifier.
	236	*
	237	* @return the default classifier classname
	238	*/
	239	protected String defaultClassifierString() {
	240	return weka.classifiers.functions.SMO.class.getName();
	241	}
	242
	243	/**
	244	* Returns an enumeration describing the available options.
	245	*
	246	* @return an enumeration of all the available options.
	247	*/
	248	public Enumeration listOptions() {
	249	Vector result = new Vector();
	250
	251	result.addElement(new Option(
	252	"\tThe number of folds for splitting the training set into\n"
	253	+ "\tsmaller chunks for the base classifier.\n"
	254	+ "\t(default 10)",
	255	"F", 1, "-F <folds>"));
	256
	257	result.addElement(new Option(
	258	"\tWhether to print some more information during building the\n"
	259	+ "\tclassifier.\n"
	260	+ "\t(default is off)",
	261	"verbose", 0, "-verbose"));
	262
	263	Enumeration en = super.listOptions();
	264	while (en.hasMoreElements())
	265	result.addElement(en.nextElement());
	266
	267	return result.elements();
	268	}
	269
	270
	271	/**
	272	* Parses a given list of options. <p/>
	273	*
	274	<!-- options-start -->
	275	* Valid options are: <p/>
	276	*
	277	* <pre> -F <folds>
	278	* The number of folds for splitting the training set into
	279	* smaller chunks for the base classifier.
	280	* (default 10)</pre>
	281	*
	282	* <pre> -verbose
	283	* Whether to print some more information during building the
	284	* classifier.
	285	* (default is off)</pre>
	286	*
	287	* <pre> -S <num>
	288	* Random number seed.
	289	* (default 1)</pre>
	290	*
	291	* <pre> -D
	292	* If set, classifier is run in debug mode and
	293	* may output additional info to the console</pre>
	294	*
	295	* <pre> -W
	296	* Full name of base classifier.
	297	* (default: weka.classifiers.functions.SMO)</pre>
	298	*
	299	* <pre>
	300	* Options specific to classifier weka.classifiers.functions.SMO:
	301	* </pre>
	302	*
	303	* <pre> -D
	304	* If set, classifier is run in debug mode and
	305	* may output additional info to the console</pre>
	306	*
	307	* <pre> -no-checks
	308	* Turns off all checks - use with caution!
	309	* Turning them off assumes that data is purely numeric, doesn't
	310	* contain any missing values, and has a nominal class. Turning them
	311	* off also means that no header information will be stored if the
	312	* machine is linear. Finally, it also assumes that no instance has
	313	* a weight equal to 0.
	314	* (default: checks on)</pre>
	315	*
	316	* <pre> -C <double>
	317	* The complexity constant C. (default 1)</pre>
	318	*
	319	* <pre> -N
	320	* Whether to 0=normalize/1=standardize/2=neither. (default 0=normalize)</pre>
	321	*
	322	* <pre> -L <double>
	323	* The tolerance parameter. (default 1.0e-3)</pre>
	324	*
	325	* <pre> -P <double>
	326	* The epsilon for round-off error. (default 1.0e-12)</pre>
	327	*
	328	* <pre> -M
	329	* Fit logistic models to SVM outputs. </pre>
	330	*
	331	* <pre> -V <double>
	332	* The number of folds for the internal
	333	* cross-validation. (default -1, use training data)</pre>
	334	*
	335	* <pre> -W <double>
	336	* The random number seed. (default 1)</pre>
	337	*
	338	* <pre> -K <classname and parameters>
	339	* The Kernel to use.
	340	* (default: weka.classifiers.functions.supportVector.PolyKernel)</pre>
	341	*
	342	* <pre>
	343	* Options specific to kernel weka.classifiers.functions.supportVector.PolyKernel:
	344	* </pre>
	345	*
	346	* <pre> -D
	347	* Enables debugging output (if available) to be printed.
	348	* (default: off)</pre>
	349	*
	350	* <pre> -no-checks
	351	* Turns off all checks - use with caution!
	352	* (default: checks on)</pre>
	353	*
	354	* <pre> -C <num>
	355	* The size of the cache (a prime number), 0 for full cache and
	356	* -1 to turn it off.
	357	* (default: 250007)</pre>
	358	*
	359	* <pre> -E <num>
	360	* The Exponent to use.
	361	* (default: 1.0)</pre>
	362	*
	363	* <pre> -L
	364	* Use lower-order terms.
	365	* (default: no)</pre>
	366	*
	367	<!-- options-end -->
	368	*
	369	* Options after -- are passed to the designated classifier.<p>
	370	*
	371	* @param options the list of options as an array of strings
	372	* @throws Exception if an option is not supported
	373	*/
	374	public void setOptions(String[] options) throws Exception {
	375	String tmpStr;
	376
	377	tmpStr = Utils.getOption('F', options);
	378	if (tmpStr.length() != 0)
	379	setNumFolds(Integer.parseInt(tmpStr));
	380	else
	381	setNumFolds(10);
	382
	383	setVerbose(Utils.getFlag("verbose", options));
	384
	385	super.setOptions(options);
	386	}
	387
	388	/**
	389	* Gets the current settings of the Classifier.
	390	*
	391	* @return an array of strings suitable for passing to setOptions
	392	*/
	393	public String[] getOptions() {
	394	Vector result;
	395	String[] options;
	396	int i;
	397
	398	result = new Vector();
	399
	400	result.add("-F");
	401	result.add("" + getNumFolds());
	402
	403	if (getVerbose())
	404	result.add("-verbose");
	405
	406	options = super.getOptions();
	407	for (i = 0; i < options.length; i++)
	408	result.add(options[i]);
	409
	410	return (String[]) result.toArray(new String[result.size()]);
	411	}
	412
	413	/**
	414	* Gets the number of folds to use for splitting the training set.
	415	*
	416	* @return the number of folds
	417	*/
	418	public int getNumFolds() {
	419	return m_NumFolds;
	420	}
	421
	422	/**
	423	* Sets the number of folds to use for splitting the training set.
	424	*
	425	* @param value the new number of folds
	426	*/
	427	public void setNumFolds(int value) {
	428	if (value > 0)
	429	m_NumFolds = value;
	430	else
	431	System.out.println(
	432	"At least 1 fold is necessary (provided: " + value + ")!");
	433	}
	434
	435	/**
	436	* Returns the tip text for this property
	437	*
	438	* @return tip text for this property suitable for
	439	* displaying in the explorer/experimenter gui
	440	*/
	441	public String numFoldsTipText() {
	442	return "The number of folds to use for splitting the training set into smaller chunks for the base classifier.";
	443	}
	444
	445	/**
	446	* Set the verbose state.
	447	*
	448	* @param value the verbose state
	449	*/
	450	public void setVerbose(boolean value) {
	451	m_Verbose = value;
	452	}
	453
	454	/**
	455	* Gets the verbose state
	456	*
	457	* @return the verbose state
	458	*/
	459	public boolean getVerbose() {
	460	return m_Verbose;
	461	}
	462
	463	/**
	464	* Returns the tip text for this property
	465	* @return tip text for this property suitable for
	466	* displaying in the explorer/experimenter gui
	467	*/
	468	public String verboseTipText() {
	469	return "Whether to ouput some additional information during building.";
	470	}
	471
	472	/**
	473	* Bagging method.
	474	*
	475	* @param data the training data to be used for generating the
	476	* bagged classifier.
	477	* @throws Exception if the classifier could not be built successfully
	478	*/
	479	public void buildClassifier(Instances data) throws Exception {
	480	Classifier[] base;
	481	int i;
	482	int n;
	483	int fromIndex;
	484	int toIndex;
	485	Instances train;
	486	double chunkSize;
	487
	488	// can classifier handle the data?
	489	getCapabilities().testWithFail(data);
	490
	491	// remove instances with missing class
	492	data = new Instances(data);
	493	data.deleteWithMissingClass();
	494
	495	m_Vote = new Vote();
	496	base = new Classifier[getNumFolds()];
	497	chunkSize = (double) data.numInstances() / (double) getNumFolds();
	498
	499	// stratify data
	500	if (getNumFolds() > 1) {
	501	data.randomize(data.getRandomNumberGenerator(getSeed()));
	502	data.stratify(getNumFolds());
	503	}
	504
	505	// generate <folds> classifiers
	506	for (i = 0; i < getNumFolds(); i++) {
	507	base[i] = makeCopy(getClassifier());
	508
	509	// generate training data
	510	if (getNumFolds() > 1) {
	511	// some progress information
	512	if (getVerbose())
	513	System.out.print(".");
	514
	515	train = data.testCV(getNumFolds(), i);
	516	}
	517	else {
	518	train = data;
	519	}
	520
	521	// train classifier
	522	base[i].buildClassifier(train);
	523	}
	524
	525	// init vote
	526	m_Vote.setClassifiers(base);
	527
	528	if (getVerbose())
	529	System.out.println();
	530	}
	531
	532	/**
	533	* Calculates the class membership probabilities for the given test
	534	* instance.
	535	*
	536	* @param instance the instance to be classified
	537	* @return preedicted class probability distribution
	538	* @throws Exception if distribution can't be computed successfully
	539	*/
	540	public double[] distributionForInstance(Instance instance) throws Exception {
	541	return m_Vote.distributionForInstance(instance);
	542	}
	543
	544	/**
	545	* Returns description of the classifier.
	546	*
	547	* @return description of the classifier as a string
	548	*/
	549	public String toString() {
	550	if (m_Vote == null)
	551	return this.getClass().getName().replaceAll(".*\\.", "")
	552	+ ": No model built yet.";
	553	else
	554	return m_Vote.toString();
	555	}
	556
	557	/**
	558	* Returns the revision string.
	559	*
	560	* @return the revision
	561	*/
	562	public String getRevision() {
	563	return RevisionUtils.extract("$Revision: 5928 $");
	564	}
	565
	566	/**
	567	* Main method for testing this class.
	568	*
	569	* @param args the options
	570	*/
	571	public static void main(String[] args) {
	572	runClassifier(new Dagging(), args);
	573	}
	574	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: