Context Navigation

RandomSplitResultProducer.java

Last change on this file was 29, checked in by gnappo, 14 years ago
Taggata versione per la demo e aggiunto branch.
File size: 27.5 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* RandomSplitResultProducer.java
19	* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20	*
21	*/
22
23
24	package weka.experiment;
25
26	import weka.core.AdditionalMeasureProducer;
27	import weka.core.Instance;
28	import weka.core.Instances;
29	import weka.core.Option;
30	import weka.core.OptionHandler;
31	import weka.core.RevisionHandler;
32	import weka.core.RevisionUtils;
33	import weka.core.Utils;
34
35	import java.io.File;
36	import java.util.Calendar;
37	import java.util.Enumeration;
38	import java.util.Random;
39	import java.util.TimeZone;
40	import java.util.Vector;
41
42	/**
43	<!-- globalinfo-start -->
44	* Generates a single train/test split and calls the appropriate SplitEvaluator to generate some results.
45	* <p/>
46	<!-- globalinfo-end -->
47	*
48	<!-- options-start -->
49	* Valid options are: <p/>
50	*
51	* <pre> -P <percent>
52	* The percentage of instances to use for training.
53	* (default 66)</pre>
54	*
55	* <pre> -D
56	* Save raw split evaluator output.</pre>
57	*
58	* <pre> -O <file/directory name/path>
59	* The filename where raw output will be stored.
60	* If a directory name is specified then then individual
61	* outputs will be gzipped, otherwise all output will be
62	* zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)</pre>
63	*
64	* <pre> -W <class name>
65	* The full class name of a SplitEvaluator.
66	* eg: weka.experiment.ClassifierSplitEvaluator</pre>
67	*
68	* <pre> -R
69	* Set when data is not to be randomized and the data sets' size.
70	* Is not to be determined via probabilistic rounding.</pre>
71	*
72	* <pre>
73	* Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
74	* </pre>
75	*
76	* <pre> -W <class name>
77	* The full class name of the classifier.
78	* eg: weka.classifiers.bayes.NaiveBayes</pre>
79	*
80	* <pre> -C <index>
81	* The index of the class for which IR statistics
82	* are to be output. (default 1)</pre>
83	*
84	* <pre> -I <index>
85	* The index of an attribute to output in the
86	* results. This attribute should identify an
87	* instance in order to know which instances are
88	* in the test set of a cross validation. if 0
89	* no output (default 0).</pre>
90	*
91	* <pre> -P
92	* Add target and prediction columns to the result
93	* for each fold.</pre>
94	*
95	* <pre>
96	* Options specific to classifier weka.classifiers.rules.ZeroR:
97	* </pre>
98	*
99	* <pre> -D
100	* If set, classifier is run in debug mode and
101	* may output additional info to the console</pre>
102	*
103	<!-- options-end -->
104	*
105	* All options after -- will be passed to the split evaluator.
106	*
107	* @author Len Trigg (trigg@cs.waikato.ac.nz)
108	* @version $Revision: 1.20 $
109	*/
110	public class RandomSplitResultProducer
111	implements ResultProducer, OptionHandler, AdditionalMeasureProducer,
112	RevisionHandler {
113
114	/** for serialization */
115	static final long serialVersionUID = 1403798165056795073L;
116
117	/** The dataset of interest */
118	protected Instances m_Instances;
119
120	/** The ResultListener to send results to */
121	protected ResultListener m_ResultListener = new CSVResultListener();
122
123	/** The percentage of instances to use for training */
124	protected double m_TrainPercent = 66;
125
126	/** Whether dataset is to be randomized */
127	protected boolean m_randomize = true;
128
129	/** The SplitEvaluator used to generate results */
130	protected SplitEvaluator m_SplitEvaluator = new ClassifierSplitEvaluator();
131
132	/** The names of any additional measures to look for in SplitEvaluators */
133	protected String [] m_AdditionalMeasures = null;
134
135	/** Save raw output of split evaluators --- for debugging purposes */
136	protected boolean m_debugOutput = false;
137
138	/** The output zipper to use for saving raw splitEvaluator output */
139	protected OutputZipper m_ZipDest = null;
140
141	/** The destination output file/directory for raw output */
142	protected File m_OutputFile = new File(
143	new File(System.getProperty("user.dir")),
144	"splitEvalutorOut.zip");
145
146	/** The name of the key field containing the dataset name */
147	public static String DATASET_FIELD_NAME = "Dataset";
148
149	/** The name of the key field containing the run number */
150	public static String RUN_FIELD_NAME = "Run";
151
152	/** The name of the result field containing the timestamp */
153	public static String TIMESTAMP_FIELD_NAME = "Date_time";
154
155	/**
156	* Returns a string describing this result producer
157	* @return a description of the result producer suitable for
158	* displaying in the explorer/experimenter gui
159	*/
160	public String globalInfo() {
161	return
162	"Generates a single train/test split and calls the appropriate "
163	+ "SplitEvaluator to generate some results.";
164	}
165
166	/**
167	* Sets the dataset that results will be obtained for.
168	*
169	* @param instances a value of type 'Instances'.
170	*/
171	public void setInstances(Instances instances) {
172
173	m_Instances = instances;
174	}
175
176	/**
177	* Set a list of method names for additional measures to look for
178	* in SplitEvaluators. This could contain many measures (of which only a
179	* subset may be produceable by the current SplitEvaluator) if an experiment
180	* is the type that iterates over a set of properties.
181	* @param additionalMeasures an array of measure names, null if none
182	*/
183	public void setAdditionalMeasures(String [] additionalMeasures) {
184	m_AdditionalMeasures = additionalMeasures;
185
186	if (m_SplitEvaluator != null) {
187	System.err.println("RandomSplitResultProducer: setting additional "
188	+"measures for "
189	+"split evaluator");
190	m_SplitEvaluator.setAdditionalMeasures(m_AdditionalMeasures);
191	}
192	}
193
194	/**
195	* Returns an enumeration of any additional measure names that might be
196	* in the SplitEvaluator
197	* @return an enumeration of the measure names
198	*/
199	public Enumeration enumerateMeasures() {
200	Vector newVector = new Vector();
201	if (m_SplitEvaluator instanceof AdditionalMeasureProducer) {
202	Enumeration en = ((AdditionalMeasureProducer)m_SplitEvaluator).
203	enumerateMeasures();
204	while (en.hasMoreElements()) {
205	String mname = (String)en.nextElement();
206	newVector.addElement(mname);
207	}
208	}
209	return newVector.elements();
210	}
211
212	/**
213	* Returns the value of the named measure
214	* @param additionalMeasureName the name of the measure to query for its value
215	* @return the value of the named measure
216	* @throws IllegalArgumentException if the named measure is not supported
217	*/
218	public double getMeasure(String additionalMeasureName) {
219	if (m_SplitEvaluator instanceof AdditionalMeasureProducer) {
220	return ((AdditionalMeasureProducer)m_SplitEvaluator).
221	getMeasure(additionalMeasureName);
222	} else {
223	throw new IllegalArgumentException("RandomSplitResultProducer: "
224	+"Can't return value for : "+additionalMeasureName
225	+". "+m_SplitEvaluator.getClass().getName()+" "
226	+"is not an AdditionalMeasureProducer");
227	}
228	}
229
230	/**
231	* Sets the object to send results of each run to.
232	*
233	* @param listener a value of type 'ResultListener'
234	*/
235	public void setResultListener(ResultListener listener) {
236
237	m_ResultListener = listener;
238	}
239
240	/**
241	* Gets a Double representing the current date and time.
242	* eg: 1:46pm on 20/5/1999 -> 19990520.1346
243	*
244	* @return a value of type Double
245	*/
246	public static Double getTimestamp() {
247
248	Calendar now = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
249	double timestamp = now.get(Calendar.YEAR) * 10000
250	+ (now.get(Calendar.MONTH) + 1) * 100
251	+ now.get(Calendar.DAY_OF_MONTH)
252	+ now.get(Calendar.HOUR_OF_DAY) / 100.0
253	+ now.get(Calendar.MINUTE) / 10000.0;
254	return new Double(timestamp);
255	}
256
257	/**
258	* Prepare to generate results.
259	*
260	* @throws Exception if an error occurs during preprocessing.
261	*/
262	public void preProcess() throws Exception {
263
264	if (m_SplitEvaluator == null) {
265	throw new Exception("No SplitEvalutor set");
266	}
267	if (m_ResultListener == null) {
268	throw new Exception("No ResultListener set");
269	}
270	m_ResultListener.preProcess(this);
271	}
272
273	/**
274	* Perform any postprocessing. When this method is called, it indicates
275	* that no more requests to generate results for the current experiment
276	* will be sent.
277	*
278	* @throws Exception if an error occurs
279	*/
280	public void postProcess() throws Exception {
281
282	m_ResultListener.postProcess(this);
283	if (m_debugOutput) {
284	if (m_ZipDest != null) {
285	m_ZipDest.finished();
286	m_ZipDest = null;
287	}
288	}
289	}
290
291	/**
292	* Gets the keys for a specified run number. Different run
293	* numbers correspond to different randomizations of the data. Keys
294	* produced should be sent to the current ResultListener
295	*
296	* @param run the run number to get keys for.
297	* @throws Exception if a problem occurs while getting the keys
298	*/
299	public void doRunKeys(int run) throws Exception {
300	if (m_Instances == null) {
301	throw new Exception("No Instances set");
302	}
303	// Add in some fields to the key like run number, dataset name
304	Object [] seKey = m_SplitEvaluator.getKey();
305	Object [] key = new Object [seKey.length + 2];
306	key[0] = Utils.backQuoteChars(m_Instances.relationName());
307	key[1] = "" + run;
308	System.arraycopy(seKey, 0, key, 2, seKey.length);
309	if (m_ResultListener.isResultRequired(this, key)) {
310	try {
311	m_ResultListener.acceptResult(this, key, null);
312	} catch (Exception ex) {
313	// Save the train and test datasets for debugging purposes?
314	throw ex;
315	}
316	}
317	}
318
319	/**
320	* Gets the results for a specified run number. Different run
321	* numbers correspond to different randomizations of the data. Results
322	* produced should be sent to the current ResultListener
323	*
324	* @param run the run number to get results for.
325	* @throws Exception if a problem occurs while getting the results
326	*/
327	public void doRun(int run) throws Exception {
328
329	if (getRawOutput()) {
330	if (m_ZipDest == null) {
331	m_ZipDest = new OutputZipper(m_OutputFile);
332	}
333	}
334
335	if (m_Instances == null) {
336	throw new Exception("No Instances set");
337	}
338	// Add in some fields to the key like run number, dataset name
339	Object [] seKey = m_SplitEvaluator.getKey();
340	Object [] key = new Object [seKey.length + 2];
341	key[0] = Utils.backQuoteChars(m_Instances.relationName());
342	key[1] = "" + run;
343	System.arraycopy(seKey, 0, key, 2, seKey.length);
344	if (m_ResultListener.isResultRequired(this, key)) {
345
346	// Randomize on a copy of the original dataset
347	Instances runInstances = new Instances(m_Instances);
348
349	Instances train;
350	Instances test;
351
352	if (!m_randomize) {
353
354	// Don't do any randomization
355	int trainSize = Utils.round(runInstances.numInstances() * m_TrainPercent / 100);
356	int testSize = runInstances.numInstances() - trainSize;
357	train = new Instances(runInstances, 0, trainSize);
358	test = new Instances(runInstances, trainSize, testSize);
359	} else {
360	Random rand = new Random(run);
361	runInstances.randomize(rand);
362
363	// Nominal class
364	if (runInstances.classAttribute().isNominal()) {
365
366	// create the subset for each classs
367	int numClasses = runInstances.numClasses();
368	Instances[] subsets = new Instances[numClasses + 1];
369	for (int i=0; i < numClasses + 1; i++) {
370	subsets[i] = new Instances(runInstances, 10);
371	}
372
373	// divide instances into subsets
374	Enumeration e = runInstances.enumerateInstances();
375	while(e.hasMoreElements()) {
376	Instance inst = (Instance) e.nextElement();
377	if (inst.classIsMissing()) {
378	subsets[numClasses].add(inst);
379	} else {
380	subsets[(int) inst.classValue()].add(inst);
381	}
382	}
383
384	// Compactify them
385	for (int i=0; i < numClasses + 1; i++) {
386	subsets[i].compactify();
387	}
388
389	// merge into train and test sets
390	train = new Instances(runInstances, runInstances.numInstances());
391	test = new Instances(runInstances, runInstances.numInstances());
392	for (int i = 0; i < numClasses + 1; i++) {
393	int trainSize =
394	Utils.probRound(subsets[i].numInstances() * m_TrainPercent / 100, rand);
395	for (int j = 0; j < trainSize; j++) {
396	train.add(subsets[i].instance(j));
397	}
398	for (int j = trainSize; j < subsets[i].numInstances(); j++) {
399	test.add(subsets[i].instance(j));
400	}
401	// free memory
402	subsets[i] = null;
403	}
404	train.compactify();
405	test.compactify();
406
407	// randomize the final sets
408	train.randomize(rand);
409	test.randomize(rand);
410	} else {
411
412	// Numeric target
413	int trainSize =
414	Utils.probRound(runInstances.numInstances() * m_TrainPercent / 100, rand);
415	int testSize = runInstances.numInstances() - trainSize;
416	train = new Instances(runInstances, 0, trainSize);
417	test = new Instances(runInstances, trainSize, testSize);
418	}
419	}
420	try {
421	Object [] seResults = m_SplitEvaluator.getResult(train, test);
422	Object [] results = new Object [seResults.length + 1];
423	results[0] = getTimestamp();
424	System.arraycopy(seResults, 0, results, 1,
425	seResults.length);
426	if (m_debugOutput) {
427	String resultName =
428	(""+run+"."+
429	Utils.backQuoteChars(runInstances.relationName())
430	+"."
431	+m_SplitEvaluator.toString()).replace(' ','_');
432	resultName = Utils.removeSubstring(resultName,
433	"weka.classifiers.");
434	resultName = Utils.removeSubstring(resultName,
435	"weka.filters.");
436	resultName = Utils.removeSubstring(resultName,
437	"weka.attributeSelection.");
438	m_ZipDest.zipit(m_SplitEvaluator.getRawResultOutput(), resultName);
439	}
440	m_ResultListener.acceptResult(this, key, results);
441	} catch (Exception ex) {
442	// Save the train and test datasets for debugging purposes?
443	throw ex;
444	}
445	}
446	}
447
448	/**
449	* Gets the names of each of the columns produced for a single run.
450	* This method should really be static.
451	*
452	* @return an array containing the name of each column
453	*/
454	public String [] getKeyNames() {
455
456	String [] keyNames = m_SplitEvaluator.getKeyNames();
457	// Add in the names of our extra key fields
458	String [] newKeyNames = new String [keyNames.length + 2];
459	newKeyNames[0] = DATASET_FIELD_NAME;
460	newKeyNames[1] = RUN_FIELD_NAME;
461	System.arraycopy(keyNames, 0, newKeyNames, 2, keyNames.length);
462	return newKeyNames;
463	}
464
465	/**
466	* Gets the data types of each of the columns produced for a single run.
467	* This method should really be static.
468	*
469	* @return an array containing objects of the type of each column. The
470	* objects should be Strings, or Doubles.
471	*/
472	public Object [] getKeyTypes() {
473
474	Object [] keyTypes = m_SplitEvaluator.getKeyTypes();
475	// Add in the types of our extra fields
476	Object [] newKeyTypes = new String [keyTypes.length + 2];
477	newKeyTypes[0] = new String();
478	newKeyTypes[1] = new String();
479	System.arraycopy(keyTypes, 0, newKeyTypes, 2, keyTypes.length);
480	return newKeyTypes;
481	}
482
483	/**
484	* Gets the names of each of the columns produced for a single run.
485	* This method should really be static.
486	*
487	* @return an array containing the name of each column
488	*/
489	public String [] getResultNames() {
490
491	String [] resultNames = m_SplitEvaluator.getResultNames();
492	// Add in the names of our extra Result fields
493	String [] newResultNames = new String [resultNames.length + 1];
494	newResultNames[0] = TIMESTAMP_FIELD_NAME;
495	System.arraycopy(resultNames, 0, newResultNames, 1, resultNames.length);
496	return newResultNames;
497	}
498
499	/**
500	* Gets the data types of each of the columns produced for a single run.
501	* This method should really be static.
502	*
503	* @return an array containing objects of the type of each column. The
504	* objects should be Strings, or Doubles.
505	*/
506	public Object [] getResultTypes() {
507
508	Object [] resultTypes = m_SplitEvaluator.getResultTypes();
509	// Add in the types of our extra Result fields
510	Object [] newResultTypes = new Object [resultTypes.length + 1];
511	newResultTypes[0] = new Double(0);
512	System.arraycopy(resultTypes, 0, newResultTypes, 1, resultTypes.length);
513	return newResultTypes;
514	}
515
516	/**
517	* Gets a description of the internal settings of the result
518	* producer, sufficient for distinguishing a ResultProducer
519	* instance from another with different settings (ignoring
520	* those settings set through this interface). For example,
521	* a cross-validation ResultProducer may have a setting for the
522	* number of folds. For a given state, the results produced should
523	* be compatible. Typically if a ResultProducer is an OptionHandler,
524	* this string will represent the command line arguments required
525	* to set the ResultProducer to that state.
526	*
527	* @return the description of the ResultProducer state, or null
528	* if no state is defined
529	*/
530	public String getCompatibilityState() {
531
532	String result = "-P " + m_TrainPercent;
533	if (!getRandomizeData()) {
534	result += " -R";
535	}
536	if (m_SplitEvaluator == null) {
537	result += " <null SplitEvaluator>";
538	} else {
539	result += " -W " + m_SplitEvaluator.getClass().getName();
540	}
541	return result + " --";
542	}
543
544	/**
545	* Returns the tip text for this property
546	* @return tip text for this property suitable for
547	* displaying in the explorer/experimenter gui
548	*/
549	public String outputFileTipText() {
550	return "Set the destination for saving raw output. If the rawOutput "
551	+"option is selected, then output from the splitEvaluator for "
552	+"individual train-test splits is saved. If the destination is a "
553	+"directory, "
554	+"then each output is saved to an individual gzip file; if the "
555	+"destination is a file, then each output is saved as an entry "
556	+"in a zip file.";
557	}
558
559	/**
560	* Get the value of OutputFile.
561	*
562	* @return Value of OutputFile.
563	*/
564	public File getOutputFile() {
565
566	return m_OutputFile;
567	}
568
569	/**
570	* Set the value of OutputFile.
571	*
572	* @param newOutputFile Value to assign to OutputFile.
573	*/
574	public void setOutputFile(File newOutputFile) {
575
576	m_OutputFile = newOutputFile;
577	}
578
579	/**
580	* Returns the tip text for this property
581	* @return tip text for this property suitable for
582	* displaying in the explorer/experimenter gui
583	*/
584	public String randomizeDataTipText() {
585	return "Do not randomize dataset and do not perform probabilistic rounding " +
586	"if true";
587	}
588
589	/**
590	* Get if dataset is to be randomized
591	* @return true if dataset is to be randomized
592	*/
593	public boolean getRandomizeData() {
594	return m_randomize;
595	}
596
597	/**
598	* Set to true if dataset is to be randomized
599	* @param d true if dataset is to be randomized
600	*/
601	public void setRandomizeData(boolean d) {
602	m_randomize = d;
603	}
604
605	/**
606	* Returns the tip text for this property
607	* @return tip text for this property suitable for
608	* displaying in the explorer/experimenter gui
609	*/
610	public String rawOutputTipText() {
611	return "Save raw output (useful for debugging). If set, then output is "
612	+"sent to the destination specified by outputFile";
613	}
614
615	/**
616	* Get if raw split evaluator output is to be saved
617	* @return true if raw split evalutor output is to be saved
618	*/
619	public boolean getRawOutput() {
620	return m_debugOutput;
621	}
622
623	/**
624	* Set to true if raw split evaluator output is to be saved
625	* @param d true if output is to be saved
626	*/
627	public void setRawOutput(boolean d) {
628	m_debugOutput = d;
629	}
630
631	/**
632	* Returns the tip text for this property
633	* @return tip text for this property suitable for
634	* displaying in the explorer/experimenter gui
635	*/
636	public String trainPercentTipText() {
637	return "Set the percentage of data to use for training.";
638	}
639
640	/**
641	* Get the value of TrainPercent.
642	*
643	* @return Value of TrainPercent.
644	*/
645	public double getTrainPercent() {
646
647	return m_TrainPercent;
648	}
649
650	/**
651	* Set the value of TrainPercent.
652	*
653	* @param newTrainPercent Value to assign to TrainPercent.
654	*/
655	public void setTrainPercent(double newTrainPercent) {
656
657	m_TrainPercent = newTrainPercent;
658	}
659
660	/**
661	* Returns the tip text for this property
662	* @return tip text for this property suitable for
663	* displaying in the explorer/experimenter gui
664	*/
665	public String splitEvaluatorTipText() {
666	return "The evaluator to apply to the test data. "
667	+"This may be a classifier, regression scheme etc.";
668	}
669
670	/**
671	* Get the SplitEvaluator.
672	*
673	* @return the SplitEvaluator.
674	*/
675	public SplitEvaluator getSplitEvaluator() {
676
677	return m_SplitEvaluator;
678	}
679
680	/**
681	* Set the SplitEvaluator.
682	*
683	* @param newSplitEvaluator new SplitEvaluator to use.
684	*/
685	public void setSplitEvaluator(SplitEvaluator newSplitEvaluator) {
686
687	m_SplitEvaluator = newSplitEvaluator;
688	m_SplitEvaluator.setAdditionalMeasures(m_AdditionalMeasures);
689	}
690
691	/**
692	* Returns an enumeration describing the available options..
693	*
694	* @return an enumeration of all the available options.
695	*/
696	public Enumeration listOptions() {
697
698	Vector newVector = new Vector(5);
699
700	newVector.addElement(new Option(
701	"\tThe percentage of instances to use for training.\n"
702	+"\t(default 66)",
703	"P", 1,
704	"-P <percent>"));
705
706	newVector.addElement(new Option(
707	"Save raw split evaluator output.",
708	"D",0,"-D"));
709
710	newVector.addElement(new Option(
711	"\tThe filename where raw output will be stored.\n"
712	+"\tIf a directory name is specified then then individual\n"
713	+"\toutputs will be gzipped, otherwise all output will be\n"
714	+"\tzipped to the named file. Use in conjuction with -D."
715	+"\t(default splitEvalutorOut.zip)",
716	"O", 1,
717	"-O <file/directory name/path>"));
718
719	newVector.addElement(new Option(
720	"\tThe full class name of a SplitEvaluator.\n"
721	+"\teg: weka.experiment.ClassifierSplitEvaluator",
722	"W", 1,
723	"-W <class name>"));
724
725	newVector.addElement(new Option(
726	"\tSet when data is not to be randomized and the data sets' size.\n"
727	+ "\tIs not to be determined via probabilistic rounding.",
728	"R",0,"-R"));
729
730
731	if ((m_SplitEvaluator != null) &&
732	(m_SplitEvaluator instanceof OptionHandler)) {
733	newVector.addElement(new Option(
734	"",
735	"", 0, "\nOptions specific to split evaluator "
736	+ m_SplitEvaluator.getClass().getName() + ":"));
737	Enumeration enu = ((OptionHandler)m_SplitEvaluator).listOptions();
738	while (enu.hasMoreElements()) {
739	newVector.addElement(enu.nextElement());
740	}
741	}
742	return newVector.elements();
743	}
744
745	/**
746	* Parses a given list of options. <p/>
747	*
748	<!-- options-start -->
749	* Valid options are: <p/>
750	*
751	* <pre> -P <percent>
752	* The percentage of instances to use for training.
753	* (default 66)</pre>
754	*
755	* <pre> -D
756	* Save raw split evaluator output.</pre>
757	*
758	* <pre> -O <file/directory name/path>
759	* The filename where raw output will be stored.
760	* If a directory name is specified then then individual
761	* outputs will be gzipped, otherwise all output will be
762	* zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)</pre>
763	*
764	* <pre> -W <class name>
765	* The full class name of a SplitEvaluator.
766	* eg: weka.experiment.ClassifierSplitEvaluator</pre>
767	*
768	* <pre> -R
769	* Set when data is not to be randomized and the data sets' size.
770	* Is not to be determined via probabilistic rounding.</pre>
771	*
772	* <pre>
773	* Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
774	* </pre>
775	*
776	* <pre> -W <class name>
777	* The full class name of the classifier.
778	* eg: weka.classifiers.bayes.NaiveBayes</pre>
779	*
780	* <pre> -C <index>
781	* The index of the class for which IR statistics
782	* are to be output. (default 1)</pre>
783	*
784	* <pre> -I <index>
785	* The index of an attribute to output in the
786	* results. This attribute should identify an
787	* instance in order to know which instances are
788	* in the test set of a cross validation. if 0
789	* no output (default 0).</pre>
790	*
791	* <pre> -P
792	* Add target and prediction columns to the result
793	* for each fold.</pre>
794	*
795	* <pre>
796	* Options specific to classifier weka.classifiers.rules.ZeroR:
797	* </pre>
798	*
799	* <pre> -D
800	* If set, classifier is run in debug mode and
801	* may output additional info to the console</pre>
802	*
803	<!-- options-end -->
804	*
805	* All options after -- will be passed to the split evaluator.
806	*
807	* @param options the list of options as an array of strings
808	* @throws Exception if an option is not supported
809	*/
810	public void setOptions(String[] options) throws Exception {
811
812	setRawOutput(Utils.getFlag('D', options));
813	setRandomizeData(!Utils.getFlag('R', options));
814
815	String fName = Utils.getOption('O', options);
816	if (fName.length() != 0) {
817	setOutputFile(new File(fName));
818	}
819
820	String trainPct = Utils.getOption('P', options);
821	if (trainPct.length() != 0) {
822	setTrainPercent((new Double(trainPct)).doubleValue());
823	} else {
824	setTrainPercent(66);
825	}
826
827	String seName = Utils.getOption('W', options);
828	if (seName.length() == 0) {
829	throw new Exception("A SplitEvaluator must be specified with"
830	+ " the -W option.");
831	}
832	// Do it first without options, so if an exception is thrown during
833	// the option setting, listOptions will contain options for the actual
834	// SE.
835	setSplitEvaluator((SplitEvaluator)Utils.forName(
836	SplitEvaluator.class,
837	seName,
838	null));
839	if (getSplitEvaluator() instanceof OptionHandler) {
840	((OptionHandler) getSplitEvaluator())
841	.setOptions(Utils.partitionOptions(options));
842	}
843	}
844
845	/**
846	* Gets the current settings of the result producer.
847	*
848	* @return an array of strings suitable for passing to setOptions
849	*/
850	public String [] getOptions() {
851
852	String [] seOptions = new String [0];
853	if ((m_SplitEvaluator != null) &&
854	(m_SplitEvaluator instanceof OptionHandler)) {
855	seOptions = ((OptionHandler)m_SplitEvaluator).getOptions();
856	}
857
858	String [] options = new String [seOptions.length + 9];
859	int current = 0;
860
861	options[current++] = "-P"; options[current++] = "" + getTrainPercent();
862
863	if (getRawOutput()) {
864	options[current++] = "-D";
865	}
866
867	if (!getRandomizeData()) {
868	options[current++] = "-R";
869	}
870
871	options[current++] = "-O";
872	options[current++] = getOutputFile().getName();
873
874	if (getSplitEvaluator() != null) {
875	options[current++] = "-W";
876	options[current++] = getSplitEvaluator().getClass().getName();
877	}
878	options[current++] = "--";
879
880	System.arraycopy(seOptions, 0, options, current,
881	seOptions.length);
882	current += seOptions.length;
883	while (current < options.length) {
884	options[current++] = "";
885	}
886	return options;
887	}
888
889	/**
890	* Gets a text descrption of the result producer.
891	*
892	* @return a text description of the result producer.
893	*/
894	public String toString() {
895
896	String result = "RandomSplitResultProducer: ";
897	result += getCompatibilityState();
898	if (m_Instances == null) {
899	result += ": <null Instances>";
900	} else {
901	result += ": " + Utils.backQuoteChars(m_Instances.relationName());
902	}
903	return result;
904	}
905
906	/**
907	* Returns the revision string.
908	*
909	* @return the revision
910	*/
911	public String getRevision() {
912	return RevisionUtils.extract("$Revision: 1.20 $");
913	}
914	} // RandomSplitResultProducer

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: branches/MetisMQI/src/main/java/weka/experiment/RandomSplitResultProducer.java

Download in other formats: