Context Navigation

source: src/main/java/weka/experiment/CrossValidationSplitResultProducer.java @ 11

Last change on this file since 11 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 8.8 KB

Rev	Line
[4]	1	/*
	2	* This program is free software; you can redistribute it and/or modify
	3	* it under the terms of the GNU General Public License as published by
	4	* the Free Software Foundation; either version 2 of the License, or
	5	* (at your option) any later version.
	6	*
	7	* This program is distributed in the hope that it will be useful,
	8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	* GNU General Public License for more details.
	11	*
	12	* You should have received a copy of the GNU General Public License
	13	* along with this program; if not, write to the Free Software
	14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	15	*/
	16
	17	/*
	18	* CrossValidationSplitResultProducer.java
	19	* Copyright (C) 1999, 2009 University of Waikato, Hamilton, New Zealand
	20	*
	21	*/
	22
	23
	24	package weka.experiment;
	25
	26	import weka.core.AdditionalMeasureProducer;
	27	import weka.core.Instance;
	28	import weka.core.Instances;
	29	import weka.core.Option;
	30	import weka.core.OptionHandler;
	31	import weka.core.RevisionHandler;
	32	import weka.core.RevisionUtils;
	33	import weka.core.Utils;
	34
	35	import java.io.File;
	36	import java.util.Calendar;
	37	import java.util.Enumeration;
	38	import java.util.Random;
	39	import java.util.TimeZone;
	40	import java.util.Vector;
	41
	42	/**
	43	<!-- globalinfo-start -->
	44	* Carries out one split of a repeated k-fold cross-validation, using the set SplitEvaluator to generate some results. Note that the run number is actually the nth split of a repeated k-fold cross-validation, i.e. if k=10, run number 100 is the 10th fold of the 10th cross-validation run. This producer's sole purpose is to allow more fine-grained distribution of cross-validation experiments. If the class attribute is nominal, the dataset is stratified.
	45	* <p/>
	46	<!-- globalinfo-end -->
	47	*
	48	<!-- options-start -->
	49	* Valid options are: <p/>
	50	*
	51	* <pre> -X <number of folds>
	52	* The number of folds to use for the cross-validation.
	53	* (default 10)</pre>
	54	*
	55	* <pre> -D
	56	* Save raw split evaluator output.</pre>
	57	*
	58	* <pre> -O <file/directory name/path>
	59	* The filename where raw output will be stored.
	60	* If a directory name is specified then then individual
	61	* outputs will be gzipped, otherwise all output will be
	62	* zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)</pre>
	63	*
	64	* <pre> -W <class name>
	65	* The full class name of a SplitEvaluator.
	66	* eg: weka.experiment.ClassifierSplitEvaluator</pre>
	67	*
	68	* <pre>
	69	* Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
	70	* </pre>
	71	*
	72	* <pre> -W <class name>
	73	* The full class name of the classifier.
	74	* eg: weka.classifiers.bayes.NaiveBayes</pre>
	75	*
	76	* <pre> -C <index>
	77	* The index of the class for which IR statistics
	78	* are to be output. (default 1)</pre>
	79	*
	80	* <pre> -I <index>
	81	* The index of an attribute to output in the
	82	* results. This attribute should identify an
	83	* instance in order to know which instances are
	84	* in the test set of a cross validation. if 0
	85	* no output (default 0).</pre>
	86	*
	87	* <pre> -P
	88	* Add target and prediction columns to the result
	89	* for each fold.</pre>
	90	*
	91	* <pre>
	92	* Options specific to classifier weka.classifiers.rules.ZeroR:
	93	* </pre>
	94	*
	95	* <pre> -D
	96	* If set, classifier is run in debug mode and
	97	* may output additional info to the console</pre>
	98	*
	99	<!-- options-end -->
	100	*
	101	* All options after -- will be passed to the split evaluator.
	102	*
	103	* @author Len Trigg
	104	* @author Eibe Frank
	105	* @version $Revision: 5828 $
	106	*/
	107	public class CrossValidationSplitResultProducer
	108	extends CrossValidationResultProducer {
	109
	110	/** for serialization */
	111	static final long serialVersionUID = 1403798164046795073L;
	112
	113	/**
	114	* Returns a string describing this result producer
	115	* @return a description of the result producer suitable for
	116	* displaying in the explorer/experimenter gui
	117	*/
	118	public String globalInfo() {
	119	return
	120	"Carries out one split of a repeated k-fold cross-validation, "
	121	+ "using the set SplitEvaluator to generate some results. "
	122	+ "Note that the run number is actually the nth split of a repeated "
	123	+ "k-fold cross-validation, i.e. if k=10, run number 100 is the 10th "
	124	+ "fold of the 10th cross-validation run. This producer's sole purpose "
	125	+ "is to allow more fine-grained distribution of cross-validation "
	126	+ "experiments. If the class attribute is nominal, the dataset is stratified.";
	127	}
	128
	129	/**
	130	* Gets the keys for a specified run number. Different run
	131	* numbers correspond to different randomizations of the data. Keys
	132	* produced should be sent to the current ResultListener
	133	*
	134	* @param run the run number to get keys for.
	135	* @throws Exception if a problem occurs while getting the keys
	136	*/
	137	public void doRunKeys(int run) throws Exception {
	138	if (m_Instances == null) {
	139	throw new Exception("No Instances set");
	140	}
	141
	142	// Add in some fields to the key like run and fold number, dataset name
	143	Object [] seKey = m_SplitEvaluator.getKey();
	144	Object [] key = new Object [seKey.length + 3];
	145	key[0] = Utils.backQuoteChars(m_Instances.relationName());
	146	key[2] = "" + (((run - 1) % m_NumFolds) + 1);
	147	key[1] = "" + (((run - 1) / m_NumFolds) + 1);
	148	System.arraycopy(seKey, 0, key, 3, seKey.length);
	149	if (m_ResultListener.isResultRequired(this, key)) {
	150	try {
	151	m_ResultListener.acceptResult(this, key, null);
	152	} catch (Exception ex) {
	153	// Save the train and test datasets for debugging purposes?
	154	throw ex;
	155	}
	156	}
	157	}
	158
	159	/**
	160	* Gets the results for a specified run number. Different run
	161	* numbers correspond to different randomizations of the data. Results
	162	* produced should be sent to the current ResultListener
	163	*
	164	* @param run the run number to get results for.
	165	* @throws Exception if a problem occurs while getting the results
	166	*/
	167	public void doRun(int run) throws Exception {
	168
	169	if (getRawOutput()) {
	170	if (m_ZipDest == null) {
	171	m_ZipDest = new OutputZipper(m_OutputFile);
	172	}
	173	}
	174
	175	if (m_Instances == null) {
	176	throw new Exception("No Instances set");
	177	}
	178
	179	// Compute run and fold number from given run
	180	int fold = (run - 1) % m_NumFolds;
	181	run = ((run - 1) / m_NumFolds) + 1;
	182
	183
	184	// Randomize on a copy of the original dataset
	185	Instances runInstances = new Instances(m_Instances);
	186	Random random = new Random(run);
	187	runInstances.randomize(random);
	188	if (runInstances.classAttribute().isNominal()) {
	189	runInstances.stratify(m_NumFolds);
	190	}
	191
	192	// Add in some fields to the key like run and fold number, dataset name
	193	Object [] seKey = m_SplitEvaluator.getKey();
	194	Object [] key = new Object [seKey.length + 3];
	195	key[0] = Utils.backQuoteChars(m_Instances.relationName());
	196	key[1] = "" + run;
	197	key[2] = "" + (fold + 1);
	198	System.arraycopy(seKey, 0, key, 3, seKey.length);
	199	if (m_ResultListener.isResultRequired(this, key)) {
	200	Instances train = runInstances.trainCV(m_NumFolds, fold, random);
	201	Instances test = runInstances.testCV(m_NumFolds, fold);
	202	try {
	203	Object [] seResults = m_SplitEvaluator.getResult(train, test);
	204	Object [] results = new Object [seResults.length + 1];
	205	results[0] = getTimestamp();
	206	System.arraycopy(seResults, 0, results, 1,
	207	seResults.length);
	208	if (m_debugOutput) {
	209	String resultName = (""+run+"."+(fold+1)+"."
	210	+ Utils.backQuoteChars(runInstances.relationName())
	211	+"."
	212	+m_SplitEvaluator.toString()).replace(' ','_');
	213	resultName = Utils.removeSubstring(resultName,
	214	"weka.classifiers.");
	215	resultName = Utils.removeSubstring(resultName,
	216	"weka.filters.");
	217	resultName = Utils.removeSubstring(resultName,
	218	"weka.attributeSelection.");
	219	m_ZipDest.zipit(m_SplitEvaluator.getRawResultOutput(), resultName);
	220	}
	221	m_ResultListener.acceptResult(this, key, results);
	222	} catch (Exception ex) {
	223	// Save the train and test datasets for debugging purposes?
	224	throw ex;
	225	}
	226	}
	227	}
	228
	229	/**
	230	* Gets a text descrption of the result producer.
	231	*
	232	* @return a text description of the result producer.
	233	*/
	234	public String toString() {
	235
	236	String result = "CrossValidationSplitResultProducer: ";
	237	result += getCompatibilityState();
	238	if (m_Instances == null) {
	239	result += ": <null Instances>";
	240	} else {
	241	result += ": " + Utils.backQuoteChars(m_Instances.relationName());
	242	}
	243	return result;
	244	}
	245
	246	/**
	247	* Returns the revision string.
	248	*
	249	* @return the revision
	250	*/
	251	public String getRevision() {
	252	return RevisionUtils.extract("$Revision: 5828 $");
	253	}
	254	} // CrossValidationSplitResultProducer
	255

Note: See TracBrowser for help on using the repository browser.

Download in other formats: