Context Navigation

source: src/main/java/weka/experiment/ExplicitTestsetResultProducer.java @ 22

Last change on this file since 22 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 34.2 KB

Rev	Line
[4]	1	/*
	2	* This program is free software; you can redistribute it and/or modify
	3	* it under the terms of the GNU General Public License as published by
	4	* the Free Software Foundation; either version 2 of the License, or
	5	* (at your option) any later version.
	6	*
	7	* This program is distributed in the hope that it will be useful,
	8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	* GNU General Public License for more details.
	11	*
	12	* You should have received a copy of the GNU General Public License
	13	* along with this program; if not, write to the Free Software
	14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	15	*/
	16
	17	/*
	18	* ExplicitTestsetResultProducer.java
	19	* Copyright (C) 2009 University of Waikato, Hamilton, New Zealand
	20	*
	21	*/
	22
	23	package weka.experiment;
	24
	25	import weka.core.AdditionalMeasureProducer;
	26	import weka.core.Instances;
	27	import weka.core.Option;
	28	import weka.core.OptionHandler;
	29	import weka.core.RevisionHandler;
	30	import weka.core.RevisionUtils;
	31	import weka.core.Utils;
	32	import weka.core.WekaException;
	33	import weka.core.converters.ConverterUtils.DataSource;
	34
	35	import java.io.File;
	36	import java.util.Calendar;
	37	import java.util.Enumeration;
	38	import java.util.Random;
	39	import java.util.TimeZone;
	40	import java.util.Vector;
	41
	42	/**
	43	<!-- globalinfo-start -->
	44	* Loads the external test set and calls the appropriate SplitEvaluator to generate some results.<br/>
	45	* The filename of the test set is constructed as follows:<br/>
	46	* <dir> + / + <prefix> + <relation-name> + <suffix><br/>
	47	* The relation-name can be modified by using the regular expression to replace the matching sub-string with a specified replacement string. In order to get rid of the string that the Weka filters add to the end of the relation name, just use '.*-weka' as the regular expression to find.<br/>
	48	* The suffix determines the type of file to load, i.e., one is not restricted to ARFF files. As long as Weka recognizes the extension specified in the suffix, the data will be loaded with one of Weka's converters.
	49	* <p/>
	50	<!-- globalinfo-end -->
	51	*
	52	<!-- options-start -->
	53	* Valid options are: <p/>
	54	*
	55	* <pre> -D
	56	* Save raw split evaluator output.</pre>
	57	*
	58	* <pre> -O <file/directory name/path>
	59	* The filename where raw output will be stored.
	60	* If a directory name is specified then then individual
	61	* outputs will be gzipped, otherwise all output will be
	62	* zipped to the named file. Use in conjuction with -D.
	63	* (default: splitEvalutorOut.zip)</pre>
	64	*
	65	* <pre> -W <class name>
	66	* The full class name of a SplitEvaluator.
	67	* eg: weka.experiment.ClassifierSplitEvaluator</pre>
	68	*
	69	* <pre> -R
	70	* Set when data is to be randomized.</pre>
	71	*
	72	* <pre> -dir <directory>
	73	* The directory containing the test sets.
	74	* (default: current directory)</pre>
	75	*
	76	* <pre> -prefix <string>
	77	* An optional prefix for the test sets (before the relation name).
	78	* (default: empty string)</pre>
	79	*
	80	* <pre> -suffix <string>
	81	* The suffix to append to the test set.
	82	* (default: _test.arff)</pre>
	83	*
	84	* <pre> -find <regular expression>
	85	* The regular expression to search the relation name with.
	86	* Not used if an empty string.
	87	* (default: empty string)</pre>
	88	*
	89	* <pre> -replace <string>
	90	* The replacement string for the all the matches of '-find'.
	91	* (default: empty string)</pre>
	92	*
	93	* <pre>
	94	* Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
	95	* </pre>
	96	*
	97	* <pre> -W <class name>
	98	* The full class name of the classifier.
	99	* eg: weka.classifiers.bayes.NaiveBayes</pre>
	100	*
	101	* <pre> -C <index>
	102	* The index of the class for which IR statistics
	103	* are to be output. (default 1)</pre>
	104	*
	105	* <pre> -I <index>
	106	* The index of an attribute to output in the
	107	* results. This attribute should identify an
	108	* instance in order to know which instances are
	109	* in the test set of a cross validation. if 0
	110	* no output (default 0).</pre>
	111	*
	112	* <pre> -P
	113	* Add target and prediction columns to the result
	114	* for each fold.</pre>
	115	*
	116	* <pre>
	117	* Options specific to classifier weka.classifiers.rules.ZeroR:
	118	* </pre>
	119	*
	120	* <pre> -D
	121	* If set, classifier is run in debug mode and
	122	* may output additional info to the console</pre>
	123	*
	124	<!-- options-end -->
	125	*
	126	* All options after -- will be passed to the split evaluator.
	127	*
	128	* @author Len Trigg (trigg@cs.waikato.ac.nz)
	129	* @author FracPete (fracpete at waikato dot ac dot nz)
	130	* @version $Revision: 5353 $
	131	*/
	132	public class ExplicitTestsetResultProducer
	133	implements ResultProducer, OptionHandler, AdditionalMeasureProducer,
	134	RevisionHandler {
	135
	136	/** for serialization. */
	137	private static final long serialVersionUID = 2613585409333652530L;
	138
	139	/** the default suffix. */
	140	public final static String DEFAULT_SUFFIX = "_test.arff";
	141
	142	/** The dataset of interest. */
	143	protected Instances m_Instances;
	144
	145	/** The ResultListener to send results to. */
	146	protected ResultListener m_ResultListener = new CSVResultListener();
	147
	148	/** The directory containing all the test sets. */
	149	protected File m_TestsetDir = new File(System.getProperty("user.dir"));
	150
	151	/** The prefix for all the test sets. */
	152	protected String m_TestsetPrefix = "";
	153
	154	/** The suffix for all the test sets. */
	155	protected String m_TestsetSuffix = DEFAULT_SUFFIX;
	156
	157	/** The regular expression to search for in the relation name. */
	158	protected String m_RelationFind = "";
	159
	160	/** The string to use to replace the matches of the regular expression. */
	161	protected String m_RelationReplace = "";
	162
	163	/** Whether dataset is to be randomized. */
	164	protected boolean m_randomize = false;
	165
	166	/** The SplitEvaluator used to generate results. */
	167	protected SplitEvaluator m_SplitEvaluator = new ClassifierSplitEvaluator();
	168
	169	/** The names of any additional measures to look for in SplitEvaluators. */
	170	protected String[] m_AdditionalMeasures = null;
	171
	172	/** Save raw output of split evaluators --- for debugging purposes. */
	173	protected boolean m_debugOutput = false;
	174
	175	/** The output zipper to use for saving raw splitEvaluator output. */
	176	protected OutputZipper m_ZipDest = null;
	177
	178	/** The destination output file/directory for raw output. */
	179	protected File m_OutputFile = new File(
	180	new File(System.getProperty("user.dir")),
	181	"splitEvalutorOut.zip");
	182
	183	/** The name of the key field containing the dataset name. */
	184	public static String DATASET_FIELD_NAME = "Dataset";
	185
	186	/** The name of the key field containing the run number. */
	187	public static String RUN_FIELD_NAME = "Run";
	188
	189	/** The name of the result field containing the timestamp. */
	190	public static String TIMESTAMP_FIELD_NAME = "Date_time";
	191
	192	/**
	193	* Returns a string describing this result producer.
	194	*
	195	* @return a description of the result producer suitable for
	196	* displaying in the explorer/experimenter gui
	197	*/
	198	public String globalInfo() {
	199	return
	200	"Loads the external test set and calls the appropriate "
	201	+ "SplitEvaluator to generate some results.\n"
	202	+ "The filename of the test set is constructed as follows:\n"
	203	+ " <dir> + / + <prefix> + <relation-name> + <suffix>\n"
	204	+ "The relation-name can be modified by using the regular expression "
	205	+ "to replace the matching sub-string with a specified replacement "
	206	+ "string. In order to get rid of the string that the Weka filters "
	207	+ "add to the end of the relation name, just use '.*-weka' as the "
	208	+ "regular expression to find.\n"
	209	+ "The suffix determines the type of file to load, i.e., one is "
	210	+ "not restricted to ARFF files. As long as Weka recognizes the "
	211	+ "extension specified in the suffix, the data will be loaded with "
	212	+ "one of Weka's converters.";
	213	}
	214
	215	/**
	216	* Returns an enumeration describing the available options..
	217	*
	218	* @return an enumeration of all the available options.
	219	*/
	220	public Enumeration listOptions() {
	221	Vector result = new Vector();
	222
	223	result.addElement(new Option(
	224	"Save raw split evaluator output.",
	225	"D", 0, "-D"));
	226
	227	result.addElement(new Option(
	228	"\tThe filename where raw output will be stored.\n"
	229	+"\tIf a directory name is specified then then individual\n"
	230	+"\toutputs will be gzipped, otherwise all output will be\n"
	231	+"\tzipped to the named file. Use in conjuction with -D.\n"
	232	+"\t(default: splitEvalutorOut.zip)",
	233	"O", 1, "-O <file/directory name/path>"));
	234
	235	result.addElement(new Option(
	236	"\tThe full class name of a SplitEvaluator.\n"
	237	+"\teg: weka.experiment.ClassifierSplitEvaluator",
	238	"W", 1, "-W <class name>"));
	239
	240	result.addElement(new Option(
	241	"\tSet when data is to be randomized.",
	242	"R", 0 ,"-R"));
	243
	244	result.addElement(new Option(
	245	"\tThe directory containing the test sets.\n"
	246	+ "\t(default: current directory)",
	247	"dir", 1, "-dir <directory>"));
	248
	249	result.addElement(new Option(
	250	"\tAn optional prefix for the test sets (before the relation name).\n"
	251	+ "(default: empty string)",
	252	"prefix", 1, "-prefix <string>"));
	253
	254	result.addElement(new Option(
	255	"\tThe suffix to append to the test set.\n"
	256	+ "\t(default: " + DEFAULT_SUFFIX + ")",
	257	"suffix", 1, "-suffix <string>"));
	258
	259	result.addElement(new Option(
	260	"\tThe regular expression to search the relation name with.\n"
	261	+ "\tNot used if an empty string.\n"
	262	+ "\t(default: empty string)",
	263	"find", 1, "-find <regular expression>"));
	264
	265	result.addElement(new Option(
	266	"\tThe replacement string for the all the matches of '-find'.\n"
	267	+ "\t(default: empty string)",
	268	"replace", 1, "-replace <string>"));
	269
	270	if ((m_SplitEvaluator != null) && (m_SplitEvaluator instanceof OptionHandler)) {
	271	result.addElement(new Option(
	272	"",
	273	"", 0, "\nOptions specific to split evaluator "
	274	+ m_SplitEvaluator.getClass().getName() + ":"));
	275	Enumeration enu = ((OptionHandler)m_SplitEvaluator).listOptions();
	276	while (enu.hasMoreElements())
	277	result.addElement(enu.nextElement());
	278	}
	279
	280	return result.elements();
	281	}
	282
	283	/**
	284	* Parses a given list of options. <p/>
	285	*
	286	<!-- options-start -->
	287	* Valid options are: <p/>
	288	*
	289	* <pre> -D
	290	* Save raw split evaluator output.</pre>
	291	*
	292	* <pre> -O <file/directory name/path>
	293	* The filename where raw output will be stored.
	294	* If a directory name is specified then then individual
	295	* outputs will be gzipped, otherwise all output will be
	296	* zipped to the named file. Use in conjuction with -D.
	297	* (default: splitEvalutorOut.zip)</pre>
	298	*
	299	* <pre> -W <class name>
	300	* The full class name of a SplitEvaluator.
	301	* eg: weka.experiment.ClassifierSplitEvaluator</pre>
	302	*
	303	* <pre> -R
	304	* Set when data is to be randomized.</pre>
	305	*
	306	* <pre> -dir <directory>
	307	* The directory containing the test sets.
	308	* (default: current directory)</pre>
	309	*
	310	* <pre> -prefix <string>
	311	* An optional prefix for the test sets (before the relation name).
	312	* (default: empty string)</pre>
	313	*
	314	* <pre> -suffix <string>
	315	* The suffix to append to the test set.
	316	* (default: _test.arff)</pre>
	317	*
	318	* <pre> -find <regular expression>
	319	* The regular expression to search the relation name with.
	320	* Not used if an empty string.
	321	* (default: empty string)</pre>
	322	*
	323	* <pre> -replace <string>
	324	* The replacement string for the all the matches of '-find'.
	325	* (default: empty string)</pre>
	326	*
	327	* <pre>
	328	* Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
	329	* </pre>
	330	*
	331	* <pre> -W <class name>
	332	* The full class name of the classifier.
	333	* eg: weka.classifiers.bayes.NaiveBayes</pre>
	334	*
	335	* <pre> -C <index>
	336	* The index of the class for which IR statistics
	337	* are to be output. (default 1)</pre>
	338	*
	339	* <pre> -I <index>
	340	* The index of an attribute to output in the
	341	* results. This attribute should identify an
	342	* instance in order to know which instances are
	343	* in the test set of a cross validation. if 0
	344	* no output (default 0).</pre>
	345	*
	346	* <pre> -P
	347	* Add target and prediction columns to the result
	348	* for each fold.</pre>
	349	*
	350	* <pre>
	351	* Options specific to classifier weka.classifiers.rules.ZeroR:
	352	* </pre>
	353	*
	354	* <pre> -D
	355	* If set, classifier is run in debug mode and
	356	* may output additional info to the console</pre>
	357	*
	358	<!-- options-end -->
	359	*
	360	* All options after -- will be passed to the split evaluator.
	361	*
	362	* @param options the list of options as an array of strings
	363	* @throws Exception if an option is not supported
	364	*/
	365	public void setOptions(String[] options) throws Exception {
	366	String tmpStr;
	367
	368	setRawOutput(Utils.getFlag('D', options));
	369	setRandomizeData(!Utils.getFlag('R', options));
	370
	371	tmpStr = Utils.getOption('O', options);
	372	if (tmpStr.length() != 0)
	373	setOutputFile(new File(tmpStr));
	374
	375	tmpStr = Utils.getOption("dir", options);
	376	if (tmpStr.length() > 0)
	377	setTestsetDir(new File(tmpStr));
	378	else
	379	setTestsetDir(new File(System.getProperty("user.dir")));
	380
	381	tmpStr = Utils.getOption("prefix", options);
	382	if (tmpStr.length() > 0)
	383	setTestsetPrefix(tmpStr);
	384	else
	385	setTestsetPrefix("");
	386
	387	tmpStr = Utils.getOption("suffix", options);
	388	if (tmpStr.length() > 0)
	389	setTestsetSuffix(tmpStr);
	390	else
	391	setTestsetSuffix(DEFAULT_SUFFIX);
	392
	393	tmpStr = Utils.getOption("find", options);
	394	if (tmpStr.length() > 0)
	395	setRelationFind(tmpStr);
	396	else
	397	setRelationFind("");
	398
	399	tmpStr = Utils.getOption("replace", options);
	400	if ((tmpStr.length() > 0) && (getRelationFind().length() > 0))
	401	setRelationReplace(tmpStr);
	402	else
	403	setRelationReplace("");
	404
	405	tmpStr = Utils.getOption('W', options);
	406	if (tmpStr.length() == 0)
	407	throw new Exception("A SplitEvaluator must be specified with the -W option.");
	408
	409	// Do it first without options, so if an exception is thrown during
	410	// the option setting, listOptions will contain options for the actual
	411	// SE.
	412	setSplitEvaluator((SplitEvaluator)Utils.forName(SplitEvaluator.class, tmpStr, null));
	413	if (getSplitEvaluator() instanceof OptionHandler)
	414	((OptionHandler) getSplitEvaluator()).setOptions(Utils.partitionOptions(options));
	415	}
	416
	417	/**
	418	* Gets the current settings of the result producer.
	419	*
	420	* @return an array of strings suitable for passing to setOptions
	421	*/
	422	public String[] getOptions() {
	423	Vector<String> result;
	424	String[] seOptions;
	425	int i;
	426
	427	result = new Vector<String>();
	428
	429	seOptions = new String [0];
	430	if ((m_SplitEvaluator != null) && (m_SplitEvaluator instanceof OptionHandler))
	431	seOptions = ((OptionHandler)m_SplitEvaluator).getOptions();
	432
	433	if (getRawOutput())
	434	result.add("-D");
	435
	436	if (!getRandomizeData())
	437	result.add("-R");
	438
	439	result.add("-O");
	440	result.add(getOutputFile().getName());
	441
	442	result.add("-dir");
	443	result.add(getTestsetDir().getPath());
	444
	445	if (getTestsetPrefix().length() > 0) {
	446	result.add("-prefix");
	447	result.add(getTestsetPrefix());
	448	}
	449
	450	result.add("-suffix");
	451	result.add(getTestsetSuffix());
	452
	453	if (getRelationFind().length() > 0) {
	454	result.add("-find");
	455	result.add(getRelationFind());
	456
	457	if (getRelationReplace().length() > 0) {
	458	result.add("-replace");
	459	result.add(getRelationReplace());
	460	}
	461	}
	462
	463	if (getSplitEvaluator() != null) {
	464	result.add("-W");
	465	result.add(getSplitEvaluator().getClass().getName());
	466	}
	467
	468	if (seOptions.length > 0) {
	469	result.add("--");
	470	for (i = 0; i < seOptions.length; i++)
	471	result.add(seOptions[i]);
	472	}
	473
	474	return result.toArray(new String[result.size()]);
	475	}
	476
	477	/**
	478	* Sets the dataset that results will be obtained for.
	479	*
	480	* @param instances a value of type 'Instances'.
	481	*/
	482	public void setInstances(Instances instances) {
	483	m_Instances = instances;
	484	}
	485
	486	/**
	487	* Set a list of method names for additional measures to look for
	488	* in SplitEvaluators. This could contain many measures (of which only a
	489	* subset may be produceable by the current SplitEvaluator) if an experiment
	490	* is the type that iterates over a set of properties.
	491	*
	492	* @param additionalMeasures an array of measure names, null if none
	493	*/
	494	public void setAdditionalMeasures(String[] additionalMeasures) {
	495	m_AdditionalMeasures = additionalMeasures;
	496
	497	if (m_SplitEvaluator != null) {
	498	System.err.println(
	499	"ExplicitTestsetResultProducer: setting additional "
	500	+ "measures for split evaluator");
	501	m_SplitEvaluator.setAdditionalMeasures(m_AdditionalMeasures);
	502	}
	503	}
	504
	505	/**
	506	* Returns an enumeration of any additional measure names that might be
	507	* in the SplitEvaluator.
	508	*
	509	* @return an enumeration of the measure names
	510	*/
	511	public Enumeration enumerateMeasures() {
	512	Vector result = new Vector();
	513	if (m_SplitEvaluator instanceof AdditionalMeasureProducer) {
	514	Enumeration en = ((AdditionalMeasureProducer)m_SplitEvaluator).enumerateMeasures();
	515	while (en.hasMoreElements()) {
	516	String mname = (String) en.nextElement();
	517	result.addElement(mname);
	518	}
	519	}
	520	return result.elements();
	521	}
	522
	523	/**
	524	* Returns the value of the named measure.
	525	*
	526	* @param additionalMeasureName the name of the measure to query for its value
	527	* @return the value of the named measure
	528	* @throws IllegalArgumentException if the named measure is not supported
	529	*/
	530	public double getMeasure(String additionalMeasureName) {
	531	if (m_SplitEvaluator instanceof AdditionalMeasureProducer)
	532	return ((AdditionalMeasureProducer)m_SplitEvaluator).getMeasure(additionalMeasureName);
	533	else
	534	throw new IllegalArgumentException(
	535	"ExplicitTestsetResultProducer: "
	536	+ "Can't return value for : " + additionalMeasureName
	537	+ ". " + m_SplitEvaluator.getClass().getName() + " "
	538	+ "is not an AdditionalMeasureProducer");
	539	}
	540
	541	/**
	542	* Sets the object to send results of each run to.
	543	*
	544	* @param listener a value of type 'ResultListener'
	545	*/
	546	public void setResultListener(ResultListener listener) {
	547	m_ResultListener = listener;
	548	}
	549
	550	/**
	551	* Gets a Double representing the current date and time.
	552	* eg: 1:46pm on 20/5/1999 -> 19990520.1346
	553	*
	554	* @return a value of type Double
	555	*/
	556	public static Double getTimestamp() {
	557	Calendar now = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
	558	double timestamp = now.get(Calendar.YEAR) * 10000
	559	+ (now.get(Calendar.MONTH) + 1) * 100
	560	+ now.get(Calendar.DAY_OF_MONTH)
	561	+ now.get(Calendar.HOUR_OF_DAY) / 100.0
	562	+ now.get(Calendar.MINUTE) / 10000.0;
	563	return new Double(timestamp);
	564	}
	565
	566	/**
	567	* Prepare to generate results.
	568	*
	569	* @throws Exception if an error occurs during preprocessing.
	570	*/
	571	public void preProcess() throws Exception {
	572	if (m_SplitEvaluator == null)
	573	throw new Exception("No SplitEvalutor set");
	574
	575	if (m_ResultListener == null)
	576	throw new Exception("No ResultListener set");
	577
	578	m_ResultListener.preProcess(this);
	579	}
	580
	581	/**
	582	* Perform any postprocessing. When this method is called, it indicates
	583	* that no more requests to generate results for the current experiment
	584	* will be sent.
	585	*
	586	* @throws Exception if an error occurs
	587	*/
	588	public void postProcess() throws Exception {
	589	m_ResultListener.postProcess(this);
	590	if (m_debugOutput) {
	591	if (m_ZipDest != null) {
	592	m_ZipDest.finished();
	593	m_ZipDest = null;
	594	}
	595	}
	596	}
	597
	598	/**
	599	* Gets the keys for a specified run number. Different run
	600	* numbers correspond to different randomizations of the data. Keys
	601	* produced should be sent to the current ResultListener
	602	*
	603	* @param run the run number to get keys for.
	604	* @throws Exception if a problem occurs while getting the keys
	605	*/
	606	public void doRunKeys(int run) throws Exception {
	607	if (m_Instances == null)
	608	throw new Exception("No Instances set");
	609
	610	// Add in some fields to the key like run number, dataset name
	611	Object[] seKey = m_SplitEvaluator.getKey();
	612	Object[] key = new Object [seKey.length + 2];
	613	key[0] = Utils.backQuoteChars(m_Instances.relationName());
	614	key[1] = "" + run;
	615	System.arraycopy(seKey, 0, key, 2, seKey.length);
	616	if (m_ResultListener.isResultRequired(this, key)) {
	617	try {
	618	m_ResultListener.acceptResult(this, key, null);
	619	}
	620	catch (Exception ex) {
	621	// Save the train and test datasets for debugging purposes?
	622	throw ex;
	623	}
	624	}
	625	}
	626
	627	/**
	628	* Generates a new filename for the given relation based on the current
	629	* setup.
	630	*
	631	* @param inst the instances to create the filename for
	632	* @return the generated filename
	633	*/
	634	protected String createFilename(Instances inst) {
	635	String result;
	636	String name;
	637
	638	name = inst.relationName();
	639	if (getRelationFind().length() > 0)
	640	name = name.replaceAll(getRelationFind(), getRelationReplace());
	641
	642	result = getTestsetDir().getPath() + File.separator;
	643	result += getTestsetPrefix() + name + getTestsetSuffix();
	644
	645	return result;
	646	}
	647
	648	/**
	649	* Gets the results for a specified run number. Different run
	650	* numbers correspond to different randomizations of the data. Results
	651	* produced should be sent to the current ResultListener
	652	*
	653	* @param run the run number to get results for.
	654	* @throws Exception if a problem occurs while getting the results
	655	*/
	656	public void doRun(int run) throws Exception {
	657	if (getRawOutput()) {
	658	if (m_ZipDest == null)
	659	m_ZipDest = new OutputZipper(m_OutputFile);
	660	}
	661
	662	if (m_Instances == null)
	663	throw new Exception("No Instances set");
	664
	665	// Add in some fields to the key like run number, dataset name
	666	Object[] seKey = m_SplitEvaluator.getKey();
	667	Object[] key = new Object [seKey.length + 2];
	668	key[0] = Utils.backQuoteChars(m_Instances.relationName());
	669	key[1] = "" + run;
	670	System.arraycopy(seKey, 0, key, 2, seKey.length);
	671	if (m_ResultListener.isResultRequired(this, key)) {
	672	// training set
	673	Instances train = new Instances(m_Instances);
	674	if (m_randomize) {
	675	Random rand = new Random(run);
	676	train.randomize(rand);
	677	}
	678
	679	// test set
	680	String filename = createFilename(train);
	681	File file = new File(filename);
	682	if (!file.exists())
	683	throw new WekaException("Test set '" + filename + "' not found!");
	684	Instances test = DataSource.read(filename);
	685	// can we set the class attribute safely?
	686	if (train.numAttributes() == test.numAttributes())
	687	test.setClassIndex(train.classIndex());
	688	else
	689	throw new WekaException(
	690	"Train and test set (= " + filename + ") "
	691	+ "differ in number of attributes: "
	692	+ train.numAttributes() + " != " + test.numAttributes());
	693	// test headers
	694	if (!train.equalHeaders(test))
	695	throw new WekaException(
	696	"Train and test set (= " + filename + ") "
	697	+ "are not compatible:\n"
	698	+ train.equalHeadersMsg(test));
	699
	700	try {
	701	Object[] seResults = m_SplitEvaluator.getResult(train, test);
	702	Object[] results = new Object [seResults.length + 1];
	703	results[0] = getTimestamp();
	704	System.arraycopy(seResults, 0, results, 1,
	705	seResults.length);
	706	if (m_debugOutput) {
	707	String resultName =
	708	(""+run+"."+
	709	Utils.backQuoteChars(train.relationName())
	710	+"."
	711	+m_SplitEvaluator.toString()).replace(' ','_');
	712	resultName = Utils.removeSubstring(resultName,
	713	"weka.classifiers.");
	714	resultName = Utils.removeSubstring(resultName,
	715	"weka.filters.");
	716	resultName = Utils.removeSubstring(resultName,
	717	"weka.attributeSelection.");
	718	m_ZipDest.zipit(m_SplitEvaluator.getRawResultOutput(), resultName);
	719	}
	720	m_ResultListener.acceptResult(this, key, results);
	721	}
	722	catch (Exception e) {
	723	// Save the train and test datasets for debugging purposes?
	724	throw e;
	725	}
	726	}
	727	}
	728
	729	/**
	730	* Gets the names of each of the columns produced for a single run.
	731	* This method should really be static.
	732	*
	733	* @return an array containing the name of each column
	734	*/
	735	public String[] getKeyNames() {
	736	String[] keyNames = m_SplitEvaluator.getKeyNames();
	737	// Add in the names of our extra key fields
	738	String[] newKeyNames = new String [keyNames.length + 2];
	739	newKeyNames[0] = DATASET_FIELD_NAME;
	740	newKeyNames[1] = RUN_FIELD_NAME;
	741	System.arraycopy(keyNames, 0, newKeyNames, 2, keyNames.length);
	742	return newKeyNames;
	743	}
	744
	745	/**
	746	* Gets the data types of each of the columns produced for a single run.
	747	* This method should really be static.
	748	*
	749	* @return an array containing objects of the type of each column.
	750	* The objects should be Strings, or Doubles.
	751	*/
	752	public Object[] getKeyTypes() {
	753	Object[] keyTypes = m_SplitEvaluator.getKeyTypes();
	754	// Add in the types of our extra fields
	755	Object[] newKeyTypes = new String [keyTypes.length + 2];
	756	newKeyTypes[0] = new String();
	757	newKeyTypes[1] = new String();
	758	System.arraycopy(keyTypes, 0, newKeyTypes, 2, keyTypes.length);
	759	return newKeyTypes;
	760	}
	761
	762	/**
	763	* Gets the names of each of the columns produced for a single run.
	764	* This method should really be static.
	765	*
	766	* @return an array containing the name of each column
	767	*/
	768	public String[] getResultNames() {
	769	String[] resultNames = m_SplitEvaluator.getResultNames();
	770	// Add in the names of our extra Result fields
	771	String[] newResultNames = new String [resultNames.length + 1];
	772	newResultNames[0] = TIMESTAMP_FIELD_NAME;
	773	System.arraycopy(resultNames, 0, newResultNames, 1, resultNames.length);
	774	return newResultNames;
	775	}
	776
	777	/**
	778	* Gets the data types of each of the columns produced for a single run.
	779	* This method should really be static.
	780	*
	781	* @return an array containing objects of the type of each column.
	782	* The objects should be Strings, or Doubles.
	783	*/
	784	public Object[] getResultTypes() {
	785	Object[] resultTypes = m_SplitEvaluator.getResultTypes();
	786	// Add in the types of our extra Result fields
	787	Object[] newResultTypes = new Object [resultTypes.length + 1];
	788	newResultTypes[0] = new Double(0);
	789	System.arraycopy(resultTypes, 0, newResultTypes, 1, resultTypes.length);
	790	return newResultTypes;
	791	}
	792
	793	/**
	794	* Gets a description of the internal settings of the result
	795	* producer, sufficient for distinguishing a ResultProducer
	796	* instance from another with different settings (ignoring
	797	* those settings set through this interface). For example,
	798	* a cross-validation ResultProducer may have a setting for the
	799	* number of folds. For a given state, the results produced should
	800	* be compatible. Typically if a ResultProducer is an OptionHandler,
	801	* this string will represent the command line arguments required
	802	* to set the ResultProducer to that state.
	803	*
	804	* @return the description of the ResultProducer state, or null
	805	* if no state is defined
	806	*/
	807	public String getCompatibilityState() {
	808	String result;
	809
	810	result = "";
	811	if (getRandomizeData())
	812	result += " -R";
	813
	814	result += " -dir " + getTestsetDir();
	815
	816	if (getTestsetPrefix().length() > 0)
	817	result += " -prefix " + getTestsetPrefix();
	818
	819	result += " -suffix " + getTestsetSuffix();
	820
	821	if (getRelationFind().length() > 0) {
	822	result += " -find " + getRelationFind();
	823
	824	if (getRelationReplace().length() > 0)
	825	result += " -replace " + getRelationReplace();
	826	}
	827
	828	if (m_SplitEvaluator == null)
	829	result += " <null SplitEvaluator>";
	830	else
	831	result += " -W " + m_SplitEvaluator.getClass().getName();
	832
	833	return result + " --";
	834	}
	835
	836	/**
	837	* Returns the tip text for this property.
	838	*
	839	* @return tip text for this property suitable for
	840	* displaying in the explorer/experimenter gui
	841	*/
	842	public String outputFileTipText() {
	843	return "Set the destination for saving raw output. If the rawOutput "
	844	+"option is selected, then output from the splitEvaluator for "
	845	+"individual train-test splits is saved. If the destination is a "
	846	+"directory, "
	847	+"then each output is saved to an individual gzip file; if the "
	848	+"destination is a file, then each output is saved as an entry "
	849	+"in a zip file.";
	850	}
	851
	852	/**
	853	* Get the value of OutputFile.
	854	*
	855	* @return Value of OutputFile.
	856	*/
	857	public File getOutputFile() {
	858	return m_OutputFile;
	859	}
	860
	861	/**
	862	* Set the value of OutputFile.
	863	*
	864	* @param value Value to assign to OutputFile.
	865	*/
	866	public void setOutputFile(File value) {
	867	m_OutputFile = value;
	868	}
	869
	870	/**
	871	* Returns the tip text for this property.
	872	*
	873	* @return tip text for this property suitable for
	874	* displaying in the explorer/experimenter gui
	875	*/
	876	public String randomizeDataTipText() {
	877	return "Do not randomize dataset and do not perform probabilistic rounding " +
	878	"if true";
	879	}
	880
	881	/**
	882	* Get if dataset is to be randomized.
	883	*
	884	* @return true if dataset is to be randomized
	885	*/
	886	public boolean getRandomizeData() {
	887	return m_randomize;
	888	}
	889
	890	/**
	891	* Set to true if dataset is to be randomized.
	892	*
	893	* @param value true if dataset is to be randomized
	894	*/
	895	public void setRandomizeData(boolean value) {
	896	m_randomize = value;
	897	}
	898
	899	/**
	900	* Returns the tip text for this property.
	901	*
	902	* @return tip text for this property suitable for
	903	* displaying in the explorer/experimenter gui
	904	*/
	905	public String rawOutputTipText() {
	906	return "Save raw output (useful for debugging). If set, then output is "
	907	+"sent to the destination specified by outputFile";
	908	}
	909
	910	/**
	911	* Get if raw split evaluator output is to be saved.
	912	*
	913	* @return true if raw split evalutor output is to be saved
	914	*/
	915	public boolean getRawOutput() {
	916	return m_debugOutput;
	917	}
	918
	919	/**
	920	* Set to true if raw split evaluator output is to be saved.
	921	*
	922	* @param value true if output is to be saved
	923	*/
	924	public void setRawOutput(boolean value) {
	925	m_debugOutput = value;
	926	}
	927
	928	/**
	929	* Returns the tip text for this property.
	930	*
	931	* @return tip text for this property suitable for
	932	* displaying in the explorer/experimenter gui
	933	*/
	934	public String splitEvaluatorTipText() {
	935	return "The evaluator to apply to the test data. "
	936	+"This may be a classifier, regression scheme etc.";
	937	}
	938
	939	/**
	940	* Get the SplitEvaluator.
	941	*
	942	* @return the SplitEvaluator.
	943	*/
	944	public SplitEvaluator getSplitEvaluator() {
	945	return m_SplitEvaluator;
	946	}
	947
	948	/**
	949	* Set the SplitEvaluator.
	950	*
	951	* @param value new SplitEvaluator to use.
	952	*/
	953	public void setSplitEvaluator(SplitEvaluator value) {
	954	m_SplitEvaluator = value;
	955	m_SplitEvaluator.setAdditionalMeasures(m_AdditionalMeasures);
	956	}
	957
	958	/**
	959	* Returns the tip text for this property.
	960	*
	961	* @return tip text for this property suitable for
	962	* displaying in the explorer/experimenter gui
	963	*/
	964	public String testsetDirTipText() {
	965	return "The directory containing the test sets.";
	966	}
	967
	968	/**
	969	* Returns the currently set directory for the test sets.
	970	*
	971	* @return the directory
	972	*/
	973	public File getTestsetDir() {
	974	return m_TestsetDir;
	975	}
	976
	977	/**
	978	* Sets the directory to use for the test sets.
	979	*
	980	* @param value the directory to use
	981	*/
	982	public void setTestsetDir(File value) {
	983	m_TestsetDir = value;
	984	}
	985
	986	/**
	987	* Returns the tip text for this property.
	988	*
	989	* @return tip text for this property suitable for
	990	* displaying in the explorer/experimenter gui
	991	*/
	992	public String testsetPrefixTipText() {
	993	return "The prefix to use for the filename of the test sets.";
	994	}
	995
	996	/**
	997	* Returns the currently set prefix.
	998	*
	999	* @return the prefix
	1000	*/
	1001	public String getTestsetPrefix() {
	1002	return m_TestsetPrefix;
	1003	}
	1004
	1005	/**
	1006	* Sets the prefix to use for the test sets.
	1007	*
	1008	* @param value the prefix
	1009	*/
	1010	public void setTestsetPrefix(String value) {
	1011	m_TestsetPrefix = value;
	1012	}
	1013
	1014	/**
	1015	* Returns the tip text for this property.
	1016	*
	1017	* @return tip text for this property suitable for
	1018	* displaying in the explorer/experimenter gui
	1019	*/
	1020	public String testsetSuffixTipText() {
	1021	return
	1022	"The suffix to use for the filename of the test sets - must contain "
	1023	+ "the file extension.";
	1024	}
	1025
	1026	/**
	1027	* Returns the currently set suffix.
	1028	*
	1029	* @return the suffix
	1030	*/
	1031	public String getTestsetSuffix() {
	1032	return m_TestsetSuffix;
	1033	}
	1034
	1035	/**
	1036	* Sets the suffix to use for the test sets.
	1037	*
	1038	* @param value the suffix
	1039	*/
	1040	public void setTestsetSuffix(String value) {
	1041	if ((value == null) \|\| (value.length() == 0))
	1042	value = DEFAULT_SUFFIX;
	1043	m_TestsetSuffix = value;
	1044	}
	1045
	1046	/**
	1047	* Returns the tip text for this property.
	1048	*
	1049	* @return tip text for this property suitable for
	1050	* displaying in the explorer/experimenter gui
	1051	*/
	1052	public String relationFindTipText() {
	1053	return
	1054	"The regular expression to use for removing parts of the relation "
	1055	+ "name, ignored if empty.";
	1056	}
	1057
	1058	/**
	1059	* Returns the currently set regular expression to use on the relation name.
	1060	*
	1061	* @return the regular expression
	1062	*/
	1063	public String getRelationFind() {
	1064	return m_RelationFind;
	1065	}
	1066
	1067	/**
	1068	* Sets the regular expression to use on the relation name.
	1069	*
	1070	* @param value the regular expression
	1071	*/
	1072	public void setRelationFind(String value) {
	1073	m_RelationFind = value;
	1074	}
	1075
	1076	/**
	1077	* Returns the tip text for this property.
	1078	*
	1079	* @return tip text for this property suitable for
	1080	* displaying in the explorer/experimenter gui
	1081	*/
	1082	public String relationReplaceTipText() {
	1083	return "The string to replace all matches of the regular expression with.";
	1084	}
	1085
	1086	/**
	1087	* Returns the currently set replacement string to use on the relation name.
	1088	*
	1089	* @return the replacement string
	1090	*/
	1091	public String getRelationReplace() {
	1092	return m_RelationReplace;
	1093	}
	1094
	1095	/**
	1096	* Sets the replacement string to use on the relation name.
	1097	*
	1098	* @param value the regular expression
	1099	*/
	1100	public void setRelationReplace(String value) {
	1101	m_RelationReplace = value;
	1102	}
	1103
	1104	/**
	1105	* Gets a text descrption of the result producer.
	1106	*
	1107	* @return a text description of the result producer.
	1108	*/
	1109	public String toString() {
	1110	String result = "ExplicitTestsetResultProducer: ";
	1111	result += getCompatibilityState();
	1112	if (m_Instances == null)
	1113	result += ": <null Instances>";
	1114	else
	1115	result += ": " + Utils.backQuoteChars(m_Instances.relationName());
	1116	return result;
	1117	}
	1118
	1119	/**
	1120	* Returns the revision string.
	1121	*
	1122	* @return the revision
	1123	*/
	1124	public String getRevision() {
	1125	return RevisionUtils.extract("$Revision: 5353 $");
	1126	}
	1127	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: