Context Navigation

RandomSplitResultProducer.java

Last change on this file was 29, checked in by gnappo, 14 years ago
Taggata versione per la demo e aggiunto branch.
File size: 27.5 KB

Rev	Line
[29]	1	/*
	2	* This program is free software; you can redistribute it and/or modify
	3	* it under the terms of the GNU General Public License as published by
	4	* the Free Software Foundation; either version 2 of the License, or
	5	* (at your option) any later version.
	6	*
	7	* This program is distributed in the hope that it will be useful,
	8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	* GNU General Public License for more details.
	11	*
	12	* You should have received a copy of the GNU General Public License
	13	* along with this program; if not, write to the Free Software
	14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	15	*/
	16
	17	/*
	18	* RandomSplitResultProducer.java
	19	* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
	20	*
	21	*/
	22
	23
	24	package weka.experiment;
	25
	26	import weka.core.AdditionalMeasureProducer;
	27	import weka.core.Instance;
	28	import weka.core.Instances;
	29	import weka.core.Option;
	30	import weka.core.OptionHandler;
	31	import weka.core.RevisionHandler;
	32	import weka.core.RevisionUtils;
	33	import weka.core.Utils;
	34
	35	import java.io.File;
	36	import java.util.Calendar;
	37	import java.util.Enumeration;
	38	import java.util.Random;
	39	import java.util.TimeZone;
	40	import java.util.Vector;
	41
	42	/**
	43	<!-- globalinfo-start -->
	44	* Generates a single train/test split and calls the appropriate SplitEvaluator to generate some results.
	45	* <p/>
	46	<!-- globalinfo-end -->
	47	*
	48	<!-- options-start -->
	49	* Valid options are: <p/>
	50	*
	51	* <pre> -P <percent>
	52	* The percentage of instances to use for training.
	53	* (default 66)</pre>
	54	*
	55	* <pre> -D
	56	* Save raw split evaluator output.</pre>
	57	*
	58	* <pre> -O <file/directory name/path>
	59	* The filename where raw output will be stored.
	60	* If a directory name is specified then then individual
	61	* outputs will be gzipped, otherwise all output will be
	62	* zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)</pre>
	63	*
	64	* <pre> -W <class name>
	65	* The full class name of a SplitEvaluator.
	66	* eg: weka.experiment.ClassifierSplitEvaluator</pre>
	67	*
	68	* <pre> -R
	69	* Set when data is not to be randomized and the data sets' size.
	70	* Is not to be determined via probabilistic rounding.</pre>
	71	*
	72	* <pre>
	73	* Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
	74	* </pre>
	75	*
	76	* <pre> -W <class name>
	77	* The full class name of the classifier.
	78	* eg: weka.classifiers.bayes.NaiveBayes</pre>
	79	*
	80	* <pre> -C <index>
	81	* The index of the class for which IR statistics
	82	* are to be output. (default 1)</pre>
	83	*
	84	* <pre> -I <index>
	85	* The index of an attribute to output in the
	86	* results. This attribute should identify an
	87	* instance in order to know which instances are
	88	* in the test set of a cross validation. if 0
	89	* no output (default 0).</pre>
	90	*
	91	* <pre> -P
	92	* Add target and prediction columns to the result
	93	* for each fold.</pre>
	94	*
	95	* <pre>
	96	* Options specific to classifier weka.classifiers.rules.ZeroR:
	97	* </pre>
	98	*
	99	* <pre> -D
	100	* If set, classifier is run in debug mode and
	101	* may output additional info to the console</pre>
	102	*
	103	<!-- options-end -->
	104	*
	105	* All options after -- will be passed to the split evaluator.
	106	*
	107	* @author Len Trigg (trigg@cs.waikato.ac.nz)
	108	* @version $Revision: 1.20 $
	109	*/
	110	public class RandomSplitResultProducer
	111	implements ResultProducer, OptionHandler, AdditionalMeasureProducer,
	112	RevisionHandler {
	113
	114	/** for serialization */
	115	static final long serialVersionUID = 1403798165056795073L;
	116
	117	/** The dataset of interest */
	118	protected Instances m_Instances;
	119
	120	/** The ResultListener to send results to */
	121	protected ResultListener m_ResultListener = new CSVResultListener();
	122
	123	/** The percentage of instances to use for training */
	124	protected double m_TrainPercent = 66;
	125
	126	/** Whether dataset is to be randomized */
	127	protected boolean m_randomize = true;
	128
	129	/** The SplitEvaluator used to generate results */
	130	protected SplitEvaluator m_SplitEvaluator = new ClassifierSplitEvaluator();
	131
	132	/** The names of any additional measures to look for in SplitEvaluators */
	133	protected String [] m_AdditionalMeasures = null;
	134
	135	/** Save raw output of split evaluators --- for debugging purposes */
	136	protected boolean m_debugOutput = false;
	137
	138	/** The output zipper to use for saving raw splitEvaluator output */
	139	protected OutputZipper m_ZipDest = null;
	140
	141	/** The destination output file/directory for raw output */
	142	protected File m_OutputFile = new File(
	143	new File(System.getProperty("user.dir")),
	144	"splitEvalutorOut.zip");
	145
	146	/** The name of the key field containing the dataset name */
	147	public static String DATASET_FIELD_NAME = "Dataset";
	148
	149	/** The name of the key field containing the run number */
	150	public static String RUN_FIELD_NAME = "Run";
	151
	152	/** The name of the result field containing the timestamp */
	153	public static String TIMESTAMP_FIELD_NAME = "Date_time";
	154
	155	/**
	156	* Returns a string describing this result producer
	157	* @return a description of the result producer suitable for
	158	* displaying in the explorer/experimenter gui
	159	*/
	160	public String globalInfo() {
	161	return
	162	"Generates a single train/test split and calls the appropriate "
	163	+ "SplitEvaluator to generate some results.";
	164	}
	165
	166	/**
	167	* Sets the dataset that results will be obtained for.
	168	*
	169	* @param instances a value of type 'Instances'.
	170	*/
	171	public void setInstances(Instances instances) {
	172
	173	m_Instances = instances;
	174	}
	175
	176	/**
	177	* Set a list of method names for additional measures to look for
	178	* in SplitEvaluators. This could contain many measures (of which only a
	179	* subset may be produceable by the current SplitEvaluator) if an experiment
	180	* is the type that iterates over a set of properties.
	181	* @param additionalMeasures an array of measure names, null if none
	182	*/
	183	public void setAdditionalMeasures(String [] additionalMeasures) {
	184	m_AdditionalMeasures = additionalMeasures;
	185
	186	if (m_SplitEvaluator != null) {
	187	System.err.println("RandomSplitResultProducer: setting additional "
	188	+"measures for "
	189	+"split evaluator");
	190	m_SplitEvaluator.setAdditionalMeasures(m_AdditionalMeasures);
	191	}
	192	}
	193
	194	/**
	195	* Returns an enumeration of any additional measure names that might be
	196	* in the SplitEvaluator
	197	* @return an enumeration of the measure names
	198	*/
	199	public Enumeration enumerateMeasures() {
	200	Vector newVector = new Vector();
	201	if (m_SplitEvaluator instanceof AdditionalMeasureProducer) {
	202	Enumeration en = ((AdditionalMeasureProducer)m_SplitEvaluator).
	203	enumerateMeasures();
	204	while (en.hasMoreElements()) {
	205	String mname = (String)en.nextElement();
	206	newVector.addElement(mname);
	207	}
	208	}
	209	return newVector.elements();
	210	}
	211
	212	/**
	213	* Returns the value of the named measure
	214	* @param additionalMeasureName the name of the measure to query for its value
	215	* @return the value of the named measure
	216	* @throws IllegalArgumentException if the named measure is not supported
	217	*/
	218	public double getMeasure(String additionalMeasureName) {
	219	if (m_SplitEvaluator instanceof AdditionalMeasureProducer) {
	220	return ((AdditionalMeasureProducer)m_SplitEvaluator).
	221	getMeasure(additionalMeasureName);
	222	} else {
	223	throw new IllegalArgumentException("RandomSplitResultProducer: "
	224	+"Can't return value for : "+additionalMeasureName
	225	+". "+m_SplitEvaluator.getClass().getName()+" "
	226	+"is not an AdditionalMeasureProducer");
	227	}
	228	}
	229
	230	/**
	231	* Sets the object to send results of each run to.
	232	*
	233	* @param listener a value of type 'ResultListener'
	234	*/
	235	public void setResultListener(ResultListener listener) {
	236
	237	m_ResultListener = listener;
	238	}
	239
	240	/**
	241	* Gets a Double representing the current date and time.
	242	* eg: 1:46pm on 20/5/1999 -> 19990520.1346
	243	*
	244	* @return a value of type Double
	245	*/
	246	public static Double getTimestamp() {
	247
	248	Calendar now = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
	249	double timestamp = now.get(Calendar.YEAR) * 10000
	250	+ (now.get(Calendar.MONTH) + 1) * 100
	251	+ now.get(Calendar.DAY_OF_MONTH)
	252	+ now.get(Calendar.HOUR_OF_DAY) / 100.0
	253	+ now.get(Calendar.MINUTE) / 10000.0;
	254	return new Double(timestamp);
	255	}
	256
	257	/**
	258	* Prepare to generate results.
	259	*
	260	* @throws Exception if an error occurs during preprocessing.
	261	*/
	262	public void preProcess() throws Exception {
	263
	264	if (m_SplitEvaluator == null) {
	265	throw new Exception("No SplitEvalutor set");
	266	}
	267	if (m_ResultListener == null) {
	268	throw new Exception("No ResultListener set");
	269	}
	270	m_ResultListener.preProcess(this);
	271	}
	272
	273	/**
	274	* Perform any postprocessing. When this method is called, it indicates
	275	* that no more requests to generate results for the current experiment
	276	* will be sent.
	277	*
	278	* @throws Exception if an error occurs
	279	*/
	280	public void postProcess() throws Exception {
	281
	282	m_ResultListener.postProcess(this);
	283	if (m_debugOutput) {
	284	if (m_ZipDest != null) {
	285	m_ZipDest.finished();
	286	m_ZipDest = null;
	287	}
	288	}
	289	}
	290
	291	/**
	292	* Gets the keys for a specified run number. Different run
	293	* numbers correspond to different randomizations of the data. Keys
	294	* produced should be sent to the current ResultListener
	295	*
	296	* @param run the run number to get keys for.
	297	* @throws Exception if a problem occurs while getting the keys
	298	*/
	299	public void doRunKeys(int run) throws Exception {
	300	if (m_Instances == null) {
	301	throw new Exception("No Instances set");
	302	}
	303	// Add in some fields to the key like run number, dataset name
	304	Object [] seKey = m_SplitEvaluator.getKey();
	305	Object [] key = new Object [seKey.length + 2];
	306	key[0] = Utils.backQuoteChars(m_Instances.relationName());
	307	key[1] = "" + run;
	308	System.arraycopy(seKey, 0, key, 2, seKey.length);
	309	if (m_ResultListener.isResultRequired(this, key)) {
	310	try {
	311	m_ResultListener.acceptResult(this, key, null);
	312	} catch (Exception ex) {
	313	// Save the train and test datasets for debugging purposes?
	314	throw ex;
	315	}
	316	}
	317	}
	318
	319	/**
	320	* Gets the results for a specified run number. Different run
	321	* numbers correspond to different randomizations of the data. Results
	322	* produced should be sent to the current ResultListener
	323	*
	324	* @param run the run number to get results for.
	325	* @throws Exception if a problem occurs while getting the results
	326	*/
	327	public void doRun(int run) throws Exception {
	328
	329	if (getRawOutput()) {
	330	if (m_ZipDest == null) {
	331	m_ZipDest = new OutputZipper(m_OutputFile);
	332	}
	333	}
	334
	335	if (m_Instances == null) {
	336	throw new Exception("No Instances set");
	337	}
	338	// Add in some fields to the key like run number, dataset name
	339	Object [] seKey = m_SplitEvaluator.getKey();
	340	Object [] key = new Object [seKey.length + 2];
	341	key[0] = Utils.backQuoteChars(m_Instances.relationName());
	342	key[1] = "" + run;
	343	System.arraycopy(seKey, 0, key, 2, seKey.length);
	344	if (m_ResultListener.isResultRequired(this, key)) {
	345
	346	// Randomize on a copy of the original dataset
	347	Instances runInstances = new Instances(m_Instances);
	348
	349	Instances train;
	350	Instances test;
	351
	352	if (!m_randomize) {
	353
	354	// Don't do any randomization
	355	int trainSize = Utils.round(runInstances.numInstances() * m_TrainPercent / 100);
	356	int testSize = runInstances.numInstances() - trainSize;
	357	train = new Instances(runInstances, 0, trainSize);
	358	test = new Instances(runInstances, trainSize, testSize);
	359	} else {
	360	Random rand = new Random(run);
	361	runInstances.randomize(rand);
	362
	363	// Nominal class
	364	if (runInstances.classAttribute().isNominal()) {
	365
	366	// create the subset for each classs
	367	int numClasses = runInstances.numClasses();
	368	Instances[] subsets = new Instances[numClasses + 1];
	369	for (int i=0; i < numClasses + 1; i++) {
	370	subsets[i] = new Instances(runInstances, 10);
	371	}
	372
	373	// divide instances into subsets
	374	Enumeration e = runInstances.enumerateInstances();
	375	while(e.hasMoreElements()) {
	376	Instance inst = (Instance) e.nextElement();
	377	if (inst.classIsMissing()) {
	378	subsets[numClasses].add(inst);
	379	} else {
	380	subsets[(int) inst.classValue()].add(inst);
	381	}
	382	}
	383
	384	// Compactify them
	385	for (int i=0; i < numClasses + 1; i++) {
	386	subsets[i].compactify();
	387	}
	388
	389	// merge into train and test sets
	390	train = new Instances(runInstances, runInstances.numInstances());
	391	test = new Instances(runInstances, runInstances.numInstances());
	392	for (int i = 0; i < numClasses + 1; i++) {
	393	int trainSize =
	394	Utils.probRound(subsets[i].numInstances() * m_TrainPercent / 100, rand);
	395	for (int j = 0; j < trainSize; j++) {
	396	train.add(subsets[i].instance(j));
	397	}
	398	for (int j = trainSize; j < subsets[i].numInstances(); j++) {
	399	test.add(subsets[i].instance(j));
	400	}
	401	// free memory
	402	subsets[i] = null;
	403	}
	404	train.compactify();
	405	test.compactify();
	406
	407	// randomize the final sets
	408	train.randomize(rand);
	409	test.randomize(rand);
	410	} else {
	411
	412	// Numeric target
	413	int trainSize =
	414	Utils.probRound(runInstances.numInstances() * m_TrainPercent / 100, rand);
	415	int testSize = runInstances.numInstances() - trainSize;
	416	train = new Instances(runInstances, 0, trainSize);
	417	test = new Instances(runInstances, trainSize, testSize);
	418	}
	419	}
	420	try {
	421	Object [] seResults = m_SplitEvaluator.getResult(train, test);
	422	Object [] results = new Object [seResults.length + 1];
	423	results[0] = getTimestamp();
	424	System.arraycopy(seResults, 0, results, 1,
	425	seResults.length);
	426	if (m_debugOutput) {
	427	String resultName =
	428	(""+run+"."+
	429	Utils.backQuoteChars(runInstances.relationName())
	430	+"."
	431	+m_SplitEvaluator.toString()).replace(' ','_');
	432	resultName = Utils.removeSubstring(resultName,
	433	"weka.classifiers.");
	434	resultName = Utils.removeSubstring(resultName,
	435	"weka.filters.");
	436	resultName = Utils.removeSubstring(resultName,
	437	"weka.attributeSelection.");
	438	m_ZipDest.zipit(m_SplitEvaluator.getRawResultOutput(), resultName);
	439	}
	440	m_ResultListener.acceptResult(this, key, results);
	441	} catch (Exception ex) {
	442	// Save the train and test datasets for debugging purposes?
	443	throw ex;
	444	}
	445	}
	446	}
	447
	448	/**
	449	* Gets the names of each of the columns produced for a single run.
	450	* This method should really be static.
	451	*
	452	* @return an array containing the name of each column
	453	*/
	454	public String [] getKeyNames() {
	455
	456	String [] keyNames = m_SplitEvaluator.getKeyNames();
	457	// Add in the names of our extra key fields
	458	String [] newKeyNames = new String [keyNames.length + 2];
	459	newKeyNames[0] = DATASET_FIELD_NAME;
	460	newKeyNames[1] = RUN_FIELD_NAME;
	461	System.arraycopy(keyNames, 0, newKeyNames, 2, keyNames.length);
	462	return newKeyNames;
	463	}
	464
	465	/**
	466	* Gets the data types of each of the columns produced for a single run.
	467	* This method should really be static.
	468	*
	469	* @return an array containing objects of the type of each column. The
	470	* objects should be Strings, or Doubles.
	471	*/
	472	public Object [] getKeyTypes() {
	473
	474	Object [] keyTypes = m_SplitEvaluator.getKeyTypes();
	475	// Add in the types of our extra fields
	476	Object [] newKeyTypes = new String [keyTypes.length + 2];
	477	newKeyTypes[0] = new String();
	478	newKeyTypes[1] = new String();
	479	System.arraycopy(keyTypes, 0, newKeyTypes, 2, keyTypes.length);
	480	return newKeyTypes;
	481	}
	482
	483	/**
	484	* Gets the names of each of the columns produced for a single run.
	485	* This method should really be static.
	486	*
	487	* @return an array containing the name of each column
	488	*/
	489	public String [] getResultNames() {
	490
	491	String [] resultNames = m_SplitEvaluator.getResultNames();
	492	// Add in the names of our extra Result fields
	493	String [] newResultNames = new String [resultNames.length + 1];
	494	newResultNames[0] = TIMESTAMP_FIELD_NAME;
	495	System.arraycopy(resultNames, 0, newResultNames, 1, resultNames.length);
	496	return newResultNames;
	497	}
	498
	499	/**
	500	* Gets the data types of each of the columns produced for a single run.
	501	* This method should really be static.
	502	*
	503	* @return an array containing objects of the type of each column. The
	504	* objects should be Strings, or Doubles.
	505	*/
	506	public Object [] getResultTypes() {
	507
	508	Object [] resultTypes = m_SplitEvaluator.getResultTypes();
	509	// Add in the types of our extra Result fields
	510	Object [] newResultTypes = new Object [resultTypes.length + 1];
	511	newResultTypes[0] = new Double(0);
	512	System.arraycopy(resultTypes, 0, newResultTypes, 1, resultTypes.length);
	513	return newResultTypes;
	514	}
	515
	516	/**
	517	* Gets a description of the internal settings of the result
	518	* producer, sufficient for distinguishing a ResultProducer
	519	* instance from another with different settings (ignoring
	520	* those settings set through this interface). For example,
	521	* a cross-validation ResultProducer may have a setting for the
	522	* number of folds. For a given state, the results produced should
	523	* be compatible. Typically if a ResultProducer is an OptionHandler,
	524	* this string will represent the command line arguments required
	525	* to set the ResultProducer to that state.
	526	*
	527	* @return the description of the ResultProducer state, or null
	528	* if no state is defined
	529	*/
	530	public String getCompatibilityState() {
	531
	532	String result = "-P " + m_TrainPercent;
	533	if (!getRandomizeData()) {
	534	result += " -R";
	535	}
	536	if (m_SplitEvaluator == null) {
	537	result += " <null SplitEvaluator>";
	538	} else {
	539	result += " -W " + m_SplitEvaluator.getClass().getName();
	540	}
	541	return result + " --";
	542	}
	543
	544	/**
	545	* Returns the tip text for this property
	546	* @return tip text for this property suitable for
	547	* displaying in the explorer/experimenter gui
	548	*/
	549	public String outputFileTipText() {
	550	return "Set the destination for saving raw output. If the rawOutput "
	551	+"option is selected, then output from the splitEvaluator for "
	552	+"individual train-test splits is saved. If the destination is a "
	553	+"directory, "
	554	+"then each output is saved to an individual gzip file; if the "
	555	+"destination is a file, then each output is saved as an entry "
	556	+"in a zip file.";
	557	}
	558
	559	/**
	560	* Get the value of OutputFile.
	561	*
	562	* @return Value of OutputFile.
	563	*/
	564	public File getOutputFile() {
	565
	566	return m_OutputFile;
	567	}
	568
	569	/**
	570	* Set the value of OutputFile.
	571	*
	572	* @param newOutputFile Value to assign to OutputFile.
	573	*/
	574	public void setOutputFile(File newOutputFile) {
	575
	576	m_OutputFile = newOutputFile;
	577	}
	578
	579	/**
	580	* Returns the tip text for this property
	581	* @return tip text for this property suitable for
	582	* displaying in the explorer/experimenter gui
	583	*/
	584	public String randomizeDataTipText() {
	585	return "Do not randomize dataset and do not perform probabilistic rounding " +
	586	"if true";
	587	}
	588
	589	/**
	590	* Get if dataset is to be randomized
	591	* @return true if dataset is to be randomized
	592	*/
	593	public boolean getRandomizeData() {
	594	return m_randomize;
	595	}
	596
	597	/**
	598	* Set to true if dataset is to be randomized
	599	* @param d true if dataset is to be randomized
	600	*/
	601	public void setRandomizeData(boolean d) {
	602	m_randomize = d;
	603	}
	604
	605	/**
	606	* Returns the tip text for this property
	607	* @return tip text for this property suitable for
	608	* displaying in the explorer/experimenter gui
	609	*/
	610	public String rawOutputTipText() {
	611	return "Save raw output (useful for debugging). If set, then output is "
	612	+"sent to the destination specified by outputFile";
	613	}
	614
	615	/**
	616	* Get if raw split evaluator output is to be saved
	617	* @return true if raw split evalutor output is to be saved
	618	*/
	619	public boolean getRawOutput() {
	620	return m_debugOutput;
	621	}
	622
	623	/**
	624	* Set to true if raw split evaluator output is to be saved
	625	* @param d true if output is to be saved
	626	*/
	627	public void setRawOutput(boolean d) {
	628	m_debugOutput = d;
	629	}
	630
	631	/**
	632	* Returns the tip text for this property
	633	* @return tip text for this property suitable for
	634	* displaying in the explorer/experimenter gui
	635	*/
	636	public String trainPercentTipText() {
	637	return "Set the percentage of data to use for training.";
	638	}
	639
	640	/**
	641	* Get the value of TrainPercent.
	642	*
	643	* @return Value of TrainPercent.
	644	*/
	645	public double getTrainPercent() {
	646
	647	return m_TrainPercent;
	648	}
	649
	650	/**
	651	* Set the value of TrainPercent.
	652	*
	653	* @param newTrainPercent Value to assign to TrainPercent.
	654	*/
	655	public void setTrainPercent(double newTrainPercent) {
	656
	657	m_TrainPercent = newTrainPercent;
	658	}
	659
	660	/**
	661	* Returns the tip text for this property
	662	* @return tip text for this property suitable for
	663	* displaying in the explorer/experimenter gui
	664	*/
	665	public String splitEvaluatorTipText() {
	666	return "The evaluator to apply to the test data. "
	667	+"This may be a classifier, regression scheme etc.";
	668	}
	669
	670	/**
	671	* Get the SplitEvaluator.
	672	*
	673	* @return the SplitEvaluator.
	674	*/
	675	public SplitEvaluator getSplitEvaluator() {
	676
	677	return m_SplitEvaluator;
	678	}
	679
	680	/**
	681	* Set the SplitEvaluator.
	682	*
	683	* @param newSplitEvaluator new SplitEvaluator to use.
	684	*/
	685	public void setSplitEvaluator(SplitEvaluator newSplitEvaluator) {
	686
	687	m_SplitEvaluator = newSplitEvaluator;
	688	m_SplitEvaluator.setAdditionalMeasures(m_AdditionalMeasures);
	689	}
	690
	691	/**
	692	* Returns an enumeration describing the available options..
	693	*
	694	* @return an enumeration of all the available options.
	695	*/
	696	public Enumeration listOptions() {
	697
	698	Vector newVector = new Vector(5);
	699
	700	newVector.addElement(new Option(
	701	"\tThe percentage of instances to use for training.\n"
	702	+"\t(default 66)",
	703	"P", 1,
	704	"-P <percent>"));
	705
	706	newVector.addElement(new Option(
	707	"Save raw split evaluator output.",
	708	"D",0,"-D"));
	709
	710	newVector.addElement(new Option(
	711	"\tThe filename where raw output will be stored.\n"
	712	+"\tIf a directory name is specified then then individual\n"
	713	+"\toutputs will be gzipped, otherwise all output will be\n"
	714	+"\tzipped to the named file. Use in conjuction with -D."
	715	+"\t(default splitEvalutorOut.zip)",
	716	"O", 1,
	717	"-O <file/directory name/path>"));
	718
	719	newVector.addElement(new Option(
	720	"\tThe full class name of a SplitEvaluator.\n"
	721	+"\teg: weka.experiment.ClassifierSplitEvaluator",
	722	"W", 1,
	723	"-W <class name>"));
	724
	725	newVector.addElement(new Option(
	726	"\tSet when data is not to be randomized and the data sets' size.\n"
	727	+ "\tIs not to be determined via probabilistic rounding.",
	728	"R",0,"-R"));
	729
	730
	731	if ((m_SplitEvaluator != null) &&
	732	(m_SplitEvaluator instanceof OptionHandler)) {
	733	newVector.addElement(new Option(
	734	"",
	735	"", 0, "\nOptions specific to split evaluator "
	736	+ m_SplitEvaluator.getClass().getName() + ":"));
	737	Enumeration enu = ((OptionHandler)m_SplitEvaluator).listOptions();
	738	while (enu.hasMoreElements()) {
	739	newVector.addElement(enu.nextElement());
	740	}
	741	}
	742	return newVector.elements();
	743	}
	744
	745	/**
	746	* Parses a given list of options. <p/>
	747	*
	748	<!-- options-start -->
	749	* Valid options are: <p/>
	750	*
	751	* <pre> -P <percent>
	752	* The percentage of instances to use for training.
	753	* (default 66)</pre>
	754	*
	755	* <pre> -D
	756	* Save raw split evaluator output.</pre>
	757	*
	758	* <pre> -O <file/directory name/path>
	759	* The filename where raw output will be stored.
	760	* If a directory name is specified then then individual
	761	* outputs will be gzipped, otherwise all output will be
	762	* zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)</pre>
	763	*
	764	* <pre> -W <class name>
	765	* The full class name of a SplitEvaluator.
	766	* eg: weka.experiment.ClassifierSplitEvaluator</pre>
	767	*
	768	* <pre> -R
	769	* Set when data is not to be randomized and the data sets' size.
	770	* Is not to be determined via probabilistic rounding.</pre>
	771	*
	772	* <pre>
	773	* Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
	774	* </pre>
	775	*
	776	* <pre> -W <class name>
	777	* The full class name of the classifier.
	778	* eg: weka.classifiers.bayes.NaiveBayes</pre>
	779	*
	780	* <pre> -C <index>
	781	* The index of the class for which IR statistics
	782	* are to be output. (default 1)</pre>
	783	*
	784	* <pre> -I <index>
	785	* The index of an attribute to output in the
	786	* results. This attribute should identify an
	787	* instance in order to know which instances are
	788	* in the test set of a cross validation. if 0
	789	* no output (default 0).</pre>
	790	*
	791	* <pre> -P
	792	* Add target and prediction columns to the result
	793	* for each fold.</pre>
	794	*
	795	* <pre>
	796	* Options specific to classifier weka.classifiers.rules.ZeroR:
	797	* </pre>
	798	*
	799	* <pre> -D
	800	* If set, classifier is run in debug mode and
	801	* may output additional info to the console</pre>
	802	*
	803	<!-- options-end -->
	804	*
	805	* All options after -- will be passed to the split evaluator.
	806	*
	807	* @param options the list of options as an array of strings
	808	* @throws Exception if an option is not supported
	809	*/
	810	public void setOptions(String[] options) throws Exception {
	811
	812	setRawOutput(Utils.getFlag('D', options));
	813	setRandomizeData(!Utils.getFlag('R', options));
	814
	815	String fName = Utils.getOption('O', options);
	816	if (fName.length() != 0) {
	817	setOutputFile(new File(fName));
	818	}
	819
	820	String trainPct = Utils.getOption('P', options);
	821	if (trainPct.length() != 0) {
	822	setTrainPercent((new Double(trainPct)).doubleValue());
	823	} else {
	824	setTrainPercent(66);
	825	}
	826
	827	String seName = Utils.getOption('W', options);
	828	if (seName.length() == 0) {
	829	throw new Exception("A SplitEvaluator must be specified with"
	830	+ " the -W option.");
	831	}
	832	// Do it first without options, so if an exception is thrown during
	833	// the option setting, listOptions will contain options for the actual
	834	// SE.
	835	setSplitEvaluator((SplitEvaluator)Utils.forName(
	836	SplitEvaluator.class,
	837	seName,
	838	null));
	839	if (getSplitEvaluator() instanceof OptionHandler) {
	840	((OptionHandler) getSplitEvaluator())
	841	.setOptions(Utils.partitionOptions(options));
	842	}
	843	}
	844
	845	/**
	846	* Gets the current settings of the result producer.
	847	*
	848	* @return an array of strings suitable for passing to setOptions
	849	*/
	850	public String [] getOptions() {
	851
	852	String [] seOptions = new String [0];
	853	if ((m_SplitEvaluator != null) &&
	854	(m_SplitEvaluator instanceof OptionHandler)) {
	855	seOptions = ((OptionHandler)m_SplitEvaluator).getOptions();
	856	}
	857
	858	String [] options = new String [seOptions.length + 9];
	859	int current = 0;
	860
	861	options[current++] = "-P"; options[current++] = "" + getTrainPercent();
	862
	863	if (getRawOutput()) {
	864	options[current++] = "-D";
	865	}
	866
	867	if (!getRandomizeData()) {
	868	options[current++] = "-R";
	869	}
	870
	871	options[current++] = "-O";
	872	options[current++] = getOutputFile().getName();
	873
	874	if (getSplitEvaluator() != null) {
	875	options[current++] = "-W";
	876	options[current++] = getSplitEvaluator().getClass().getName();
	877	}
	878	options[current++] = "--";
	879
	880	System.arraycopy(seOptions, 0, options, current,
	881	seOptions.length);
	882	current += seOptions.length;
	883	while (current < options.length) {
	884	options[current++] = "";
	885	}
	886	return options;
	887	}
	888
	889	/**
	890	* Gets a text descrption of the result producer.
	891	*
	892	* @return a text description of the result producer.
	893	*/
	894	public String toString() {
	895
	896	String result = "RandomSplitResultProducer: ";
	897	result += getCompatibilityState();
	898	if (m_Instances == null) {
	899	result += ": <null Instances>";
	900	} else {
	901	result += ": " + Utils.backQuoteChars(m_Instances.relationName());
	902	}
	903	return result;
	904	}
	905
	906	/**
	907	* Returns the revision string.
	908	*
	909	* @return the revision
	910	*/
	911	public String getRevision() {
	912	return RevisionUtils.extract("$Revision: 1.20 $");
	913	}
	914	} // RandomSplitResultProducer

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: branches/MetisMQI/src/main/java/weka/experiment/RandomSplitResultProducer.java

Download in other formats: