Context Navigation

source: src/main/java/weka/experiment/AveragingResultProducer.java @ 9

Last change on this file since 9 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 36.5 KB

Rev	Line
[4]	1	/*
	2	* This program is free software; you can redistribute it and/or modify
	3	* it under the terms of the GNU General Public License as published by
	4	* the Free Software Foundation; either version 2 of the License, or
	5	* (at your option) any later version.
	6	*
	7	* This program is distributed in the hope that it will be useful,
	8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	* GNU General Public License for more details.
	11	*
	12	* You should have received a copy of the GNU General Public License
	13	* along with this program; if not, write to the Free Software
	14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	15	*/
	16
	17	/*
	18	* AveragingResultProducer.java
	19	* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
	20	*
	21	*/
	22
	23
	24	package weka.experiment;
	25
	26	import weka.core.AdditionalMeasureProducer;
	27	import weka.core.FastVector;
	28	import weka.core.Instances;
	29	import weka.core.Option;
	30	import weka.core.OptionHandler;
	31	import weka.core.RevisionHandler;
	32	import weka.core.RevisionUtils;
	33	import weka.core.Utils;
	34
	35	import java.util.Enumeration;
	36	import java.util.Hashtable;
	37	import java.util.Vector;
	38
	39	/**
	40	<!-- globalinfo-start -->
	41	* Takes the results from a ResultProducer and submits the average to the result listener. Normally used with a CrossValidationResultProducer to perform n x m fold cross validation. For non-numeric result fields, the first value is used.
	42	* <p/>
	43	<!-- globalinfo-end -->
	44	*
	45	<!-- options-start -->
	46	* Valid options are: <p/>
	47	*
	48	* <pre> -F <field name>
	49	* The name of the field to average over.
	50	* (default "Fold")</pre>
	51	*
	52	* <pre> -X <num results>
	53	* The number of results expected per average.
	54	* (default 10)</pre>
	55	*
	56	* <pre> -S
	57	* Calculate standard deviations.
	58	* (default only averages)</pre>
	59	*
	60	* <pre> -W <class name>
	61	* The full class name of a ResultProducer.
	62	* eg: weka.experiment.CrossValidationResultProducer</pre>
	63	*
	64	* <pre>
	65	* Options specific to result producer weka.experiment.CrossValidationResultProducer:
	66	* </pre>
	67	*
	68	* <pre> -X <number of folds>
	69	* The number of folds to use for the cross-validation.
	70	* (default 10)</pre>
	71	*
	72	* <pre> -D
	73	* Save raw split evaluator output.</pre>
	74	*
	75	* <pre> -O <file/directory name/path>
	76	* The filename where raw output will be stored.
	77	* If a directory name is specified then then individual
	78	* outputs will be gzipped, otherwise all output will be
	79	* zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)</pre>
	80	*
	81	* <pre> -W <class name>
	82	* The full class name of a SplitEvaluator.
	83	* eg: weka.experiment.ClassifierSplitEvaluator</pre>
	84	*
	85	* <pre>
	86	* Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
	87	* </pre>
	88	*
	89	* <pre> -W <class name>
	90	* The full class name of the classifier.
	91	* eg: weka.classifiers.bayes.NaiveBayes</pre>
	92	*
	93	* <pre> -C <index>
	94	* The index of the class for which IR statistics
	95	* are to be output. (default 1)</pre>
	96	*
	97	* <pre> -I <index>
	98	* The index of an attribute to output in the
	99	* results. This attribute should identify an
	100	* instance in order to know which instances are
	101	* in the test set of a cross validation. if 0
	102	* no output (default 0).</pre>
	103	*
	104	* <pre> -P
	105	* Add target and prediction columns to the result
	106	* for each fold.</pre>
	107	*
	108	* <pre>
	109	* Options specific to classifier weka.classifiers.rules.ZeroR:
	110	* </pre>
	111	*
	112	* <pre> -D
	113	* If set, classifier is run in debug mode and
	114	* may output additional info to the console</pre>
	115	*
	116	<!-- options-end -->
	117	*
	118	* All options after -- will be passed to the result producer.
	119	*
	120	* @author Len Trigg (trigg@cs.waikato.ac.nz)
	121	* @version $Revision: 1.18 $
	122	*/
	123	public class AveragingResultProducer
	124	implements ResultListener, ResultProducer, OptionHandler,
	125	AdditionalMeasureProducer, RevisionHandler {
	126
	127	/** for serialization */
	128	static final long serialVersionUID = 2551284958501991352L;
	129
	130	/** The dataset of interest */
	131	protected Instances m_Instances;
	132
	133	/** The ResultListener to send results to */
	134	protected ResultListener m_ResultListener = new CSVResultListener();
	135
	136	/** The ResultProducer used to generate results */
	137	protected ResultProducer m_ResultProducer
	138	= new CrossValidationResultProducer();
	139
	140	/** The names of any additional measures to look for in SplitEvaluators */
	141	protected String [] m_AdditionalMeasures = null;
	142
	143	/** The number of results expected to average over for each run */
	144	protected int m_ExpectedResultsPerAverage = 10;
	145
	146	/** True if standard deviation fields should be produced */
	147	protected boolean m_CalculateStdDevs;
	148
	149	/**
	150	* The name of the field that will contain the number of results
	151	* averaged over.
	152	*/
	153	protected String m_CountFieldName = "Num_" + CrossValidationResultProducer
	154	.FOLD_FIELD_NAME;
	155
	156	/** The name of the key field to average over */
	157	protected String m_KeyFieldName = CrossValidationResultProducer
	158	.FOLD_FIELD_NAME;
	159
	160	/** The index of the field to average over in the resultproducers key */
	161	protected int m_KeyIndex = -1;
	162
	163	/** Collects the keys from a single run */
	164	protected FastVector m_Keys = new FastVector();
	165
	166	/** Collects the results from a single run */
	167	protected FastVector m_Results = new FastVector();
	168
	169	/**
	170	* Returns a string describing this result producer
	171	* @return a description of the result producer suitable for
	172	* displaying in the explorer/experimenter gui
	173	*/
	174	public String globalInfo() {
	175	return "Takes the results from a ResultProducer "
	176	+"and submits the average to the result listener. Normally used with "
	177	+"a CrossValidationResultProducer to perform n x m fold cross "
	178	+"validation. For non-numeric result fields, the first value is used.";
	179	}
	180
	181	/**
	182	* Scans through the key field names of the result producer to find
	183	* the index of the key field to average over. Sets the value of
	184	* m_KeyIndex to the index, or -1 if no matching key field was found.
	185	*
	186	* @return the index of the key field to average over
	187	*/
	188	protected int findKeyIndex() {
	189
	190	m_KeyIndex = -1;
	191	try {
	192	if (m_ResultProducer != null) {
	193	String [] keyNames = m_ResultProducer.getKeyNames();
	194	for (int i = 0; i < keyNames.length; i++) {
	195	if (keyNames[i].equals(m_KeyFieldName)) {
	196	m_KeyIndex = i;
	197	break;
	198	}
	199	}
	200	}
	201	} catch (Exception ex) {
	202	}
	203	return m_KeyIndex;
	204	}
	205
	206	/**
	207	* Determines if there are any constraints (imposed by the
	208	* destination) on the result columns to be produced by
	209	* resultProducers. Null should be returned if there are NO
	210	* constraints, otherwise a list of column names should be
	211	* returned as an array of Strings.
	212	* @param rp the ResultProducer to which the constraints will apply
	213	* @return an array of column names to which resutltProducer's
	214	* results will be restricted.
	215	* @throws Exception if constraints can't be determined
	216	*/
	217	public String [] determineColumnConstraints(ResultProducer rp)
	218	throws Exception {
	219	return null;
	220	}
	221
	222	/**
	223	* Simulates a run to collect the keys the sub-resultproducer could
	224	* generate. Does some checking on the keys and determines the
	225	* template key.
	226	*
	227	* @param run the run number
	228	* @return a template key (null for the field being averaged)
	229	* @throws Exception if an error occurs
	230	*/
	231	protected Object [] determineTemplate(int run) throws Exception {
	232
	233	if (m_Instances == null) {
	234	throw new Exception("No Instances set");
	235	}
	236	m_ResultProducer.setInstances(m_Instances);
	237
	238	// Clear the collected results
	239	m_Keys.removeAllElements();
	240	m_Results.removeAllElements();
	241
	242	m_ResultProducer.doRunKeys(run);
	243	checkForMultipleDifferences();
	244
	245	Object [] template = (Object [])((Object [])m_Keys.elementAt(0)).clone();
	246	template[m_KeyIndex] = null;
	247	// Check for duplicate keys
	248	checkForDuplicateKeys(template);
	249
	250	return template;
	251	}
	252
	253	/**
	254	* Gets the keys for a specified run number. Different run
	255	* numbers correspond to different randomizations of the data. Keys
	256	* produced should be sent to the current ResultListener
	257	*
	258	* @param run the run number to get keys for.
	259	* @throws Exception if a problem occurs while getting the keys
	260	*/
	261	public void doRunKeys(int run) throws Exception {
	262
	263	// Generate the template
	264	Object [] template = determineTemplate(run);
	265	String [] newKey = new String [template.length - 1];
	266	System.arraycopy(template, 0, newKey, 0, m_KeyIndex);
	267	System.arraycopy(template, m_KeyIndex + 1,
	268	newKey, m_KeyIndex,
	269	template.length - m_KeyIndex - 1);
	270	m_ResultListener.acceptResult(this, newKey, null);
	271	}
	272
	273	/**
	274	* Gets the results for a specified run number. Different run
	275	* numbers correspond to different randomizations of the data. Results
	276	* produced should be sent to the current ResultListener
	277	*
	278	* @param run the run number to get results for.
	279	* @throws Exception if a problem occurs while getting the results
	280	*/
	281	public void doRun(int run) throws Exception {
	282
	283	// Generate the key and ask whether the result is required
	284	Object [] template = determineTemplate(run);
	285	String [] newKey = new String [template.length - 1];
	286	System.arraycopy(template, 0, newKey, 0, m_KeyIndex);
	287	System.arraycopy(template, m_KeyIndex + 1,
	288	newKey, m_KeyIndex,
	289	template.length - m_KeyIndex - 1);
	290
	291	if (m_ResultListener.isResultRequired(this, newKey)) {
	292	// Clear the collected keys
	293	m_Keys.removeAllElements();
	294	m_Results.removeAllElements();
	295
	296	m_ResultProducer.doRun(run);
	297
	298	// Average the results collected
	299	//System.err.println("Number of results collected: " + m_Keys.size());
	300
	301	// Check that the keys only differ on the selected key field
	302	checkForMultipleDifferences();
	303
	304	template = (Object [])((Object [])m_Keys.elementAt(0)).clone();
	305	template[m_KeyIndex] = null;
	306	// Check for duplicate keys
	307	checkForDuplicateKeys(template);
	308	// Calculate the average and submit it if necessary
	309	doAverageResult(template);
	310	}
	311	}
	312
	313
	314	/**
	315	* Compares a key to a template to see whether they match. Null
	316	* fields in the template are ignored in the matching.
	317	*
	318	* @param template the template to match against
	319	* @param test the key to test
	320	* @return true if the test key matches the template on all non-null template
	321	* fields
	322	*/
	323	protected boolean matchesTemplate(Object [] template, Object [] test) {
	324
	325	if (template.length != test.length) {
	326	return false;
	327	}
	328	for (int i = 0; i < test.length; i++) {
	329	if ((template[i] != null) && (!template[i].equals(test[i]))) {
	330	return false;
	331	}
	332	}
	333	return true;
	334	}
	335
	336	/**
	337	* Asks the resultlistener whether an average result is required, and
	338	* if so, calculates it.
	339	*
	340	* @param template the template to match keys against when calculating the
	341	* average
	342	* @throws Exception if an error occurs
	343	*/
	344	protected void doAverageResult(Object [] template) throws Exception {
	345
	346	// Generate the key and ask whether the result is required
	347	String [] newKey = new String [template.length - 1];
	348	System.arraycopy(template, 0, newKey, 0, m_KeyIndex);
	349	System.arraycopy(template, m_KeyIndex + 1,
	350	newKey, m_KeyIndex,
	351	template.length - m_KeyIndex - 1);
	352	if (m_ResultListener.isResultRequired(this, newKey)) {
	353	Object [] resultTypes = m_ResultProducer.getResultTypes();
	354	Stats [] stats = new Stats [resultTypes.length];
	355	for (int i = 0; i < stats.length; i++) {
	356	stats[i] = new Stats();
	357	}
	358	Object [] result = getResultTypes();
	359	int numMatches = 0;
	360	for (int i = 0; i < m_Keys.size(); i++) {
	361	Object [] currentKey = (Object [])m_Keys.elementAt(i);
	362	// Skip non-matching keys
	363	if (!matchesTemplate(template, currentKey)) {
	364	continue;
	365	}
	366	// Add the results to the stats accumulator
	367	Object [] currentResult = (Object [])m_Results.elementAt(i);
	368	numMatches++;
	369	for (int j = 0; j < resultTypes.length; j++) {
	370	if (resultTypes[j] instanceof Double) {
	371	if (currentResult[j] == null) {
	372
	373	// set the stats object for this result to null---
	374	// more than likely this is an additional measure field
	375	// not supported by the low level split evaluator
	376	if (stats[j] != null) {
	377	stats[j] = null;
	378	}
	379
	380	/* throw new Exception("Null numeric result field found:\n"
	381	+ DatabaseUtils.arrayToString(currentKey)
	382	+ " -- "
	383	+ DatabaseUtils
	384	.arrayToString(currentResult)); */
	385	}
	386	if (stats[j] != null) {
	387	double currentVal = ((Double)currentResult[j]).doubleValue();
	388	stats[j].add(currentVal);
	389	}
	390	}
	391	}
	392	}
	393	if (numMatches != m_ExpectedResultsPerAverage) {
	394	throw new Exception("Expected " + m_ExpectedResultsPerAverage
	395	+ " results matching key \""
	396	+ DatabaseUtils.arrayToString(template)
	397	+ "\" but got "
	398	+ numMatches);
	399	}
	400	result[0] = new Double(numMatches);
	401	Object [] currentResult = (Object [])m_Results.elementAt(0);
	402	int k = 1;
	403	for (int j = 0; j < resultTypes.length; j++) {
	404	if (resultTypes[j] instanceof Double) {
	405	if (stats[j] != null) {
	406	stats[j].calculateDerived();
	407	result[k++] = new Double(stats[j].mean);
	408	} else {
	409	result[k++] = null;
	410	}
	411	if (getCalculateStdDevs()) {
	412	if (stats[j] != null) {
	413	result[k++] = new Double(stats[j].stdDev);
	414	} else {
	415	result[k++] = null;
	416	}
	417	}
	418	} else {
	419	result[k++] = currentResult[j];
	420	}
	421	}
	422	m_ResultListener.acceptResult(this, newKey, result);
	423	}
	424	}
	425
	426	/**
	427	* Checks whether any duplicate results (with respect to a key template)
	428	* were received.
	429	*
	430	* @param template the template key.
	431	* @throws Exception if duplicate results are detected
	432	*/
	433	protected void checkForDuplicateKeys(Object [] template) throws Exception {
	434
	435	Hashtable hash = new Hashtable();
	436	int numMatches = 0;
	437	for (int i = 0; i < m_Keys.size(); i++) {
	438	Object [] current = (Object [])m_Keys.elementAt(i);
	439	// Skip non-matching keys
	440	if (!matchesTemplate(template, current)) {
	441	continue;
	442	}
	443	if (hash.containsKey(current[m_KeyIndex])) {
	444	throw new Exception("Duplicate result received:"
	445	+ DatabaseUtils.arrayToString(current));
	446	}
	447	numMatches++;
	448	hash.put(current[m_KeyIndex], current[m_KeyIndex]);
	449	}
	450	if (numMatches != m_ExpectedResultsPerAverage) {
	451	throw new Exception("Expected " + m_ExpectedResultsPerAverage
	452	+ " results matching key \""
	453	+ DatabaseUtils.arrayToString(template)
	454	+ "\" but got "
	455	+ numMatches);
	456	}
	457	}
	458
	459	/**
	460	* Checks that the keys for a run only differ in one key field. If they
	461	* differ in more than one field, a more sophisticated averager will submit
	462	* multiple results - for now an exception is thrown. Currently assumes that
	463	* the most differences will be shown between the first and last
	464	* result received.
	465	*
	466	* @throws Exception if the keys differ on fields other than the
	467	* key averaging field
	468	*/
	469	protected void checkForMultipleDifferences() throws Exception {
	470
	471	Object [] firstKey = (Object [])m_Keys.elementAt(0);
	472	Object [] lastKey = (Object [])m_Keys.elementAt(m_Keys.size() - 1);
	473	/*
	474	System.err.println("First key:" + DatabaseUtils.arrayToString(firstKey));
	475	System.err.println("Last key :" + DatabaseUtils.arrayToString(lastKey));
	476	*/
	477	for (int i = 0; i < firstKey.length; i++) {
	478	if ((i != m_KeyIndex) && !firstKey[i].equals(lastKey[i])) {
	479	throw new Exception("Keys differ on fields other than \""
	480	+ m_KeyFieldName
	481	+ "\" -- time to implement multiple averaging");
	482	}
	483	}
	484	}
	485
	486	/**
	487	* Prepare for the results to be received.
	488	*
	489	* @param rp the ResultProducer that will generate the results
	490	* @throws Exception if an error occurs during preprocessing.
	491	*/
	492	public void preProcess(ResultProducer rp) throws Exception {
	493
	494	if (m_ResultListener == null) {
	495	throw new Exception("No ResultListener set");
	496	}
	497	m_ResultListener.preProcess(this);
	498	}
	499
	500	/**
	501	* Prepare to generate results. The ResultProducer should call
	502	* preProcess(this) on the ResultListener it is to send results to.
	503	*
	504	* @throws Exception if an error occurs during preprocessing.
	505	*/
	506	public void preProcess() throws Exception {
	507
	508	if (m_ResultProducer == null) {
	509	throw new Exception("No ResultProducer set");
	510	}
	511	// Tell the resultproducer to send results to us
	512	m_ResultProducer.setResultListener(this);
	513	findKeyIndex();
	514	if (m_KeyIndex == -1) {
	515	throw new Exception("No key field called " + m_KeyFieldName
	516	+ " produced by "
	517	+ m_ResultProducer.getClass().getName());
	518	}
	519	m_ResultProducer.preProcess();
	520	}
	521
	522	/**
	523	* When this method is called, it indicates that no more results
	524	* will be sent that need to be grouped together in any way.
	525	*
	526	* @param rp the ResultProducer that generated the results
	527	* @throws Exception if an error occurs
	528	*/
	529	public void postProcess(ResultProducer rp) throws Exception {
	530
	531	m_ResultListener.postProcess(this);
	532	}
	533
	534	/**
	535	* When this method is called, it indicates that no more requests to
	536	* generate results for the current experiment will be sent. The
	537	* ResultProducer should call preProcess(this) on the
	538	* ResultListener it is to send results to.
	539	*
	540	* @throws Exception if an error occurs
	541	*/
	542	public void postProcess() throws Exception {
	543
	544	m_ResultProducer.postProcess();
	545	}
	546
	547	/**
	548	* Accepts results from a ResultProducer.
	549	*
	550	* @param rp the ResultProducer that generated the results
	551	* @param key an array of Objects (Strings or Doubles) that uniquely
	552	* identify a result for a given ResultProducer with given compatibilityState
	553	* @param result the results stored in an array. The objects stored in
	554	* the array may be Strings, Doubles, or null (for the missing value).
	555	* @throws Exception if the result could not be accepted.
	556	*/
	557	public void acceptResult(ResultProducer rp, Object [] key, Object [] result)
	558	throws Exception {
	559
	560	if (m_ResultProducer != rp) {
	561	throw new Error("Unrecognized ResultProducer sending results!!");
	562	}
	563	m_Keys.addElement(key);
	564	m_Results.addElement(result);
	565	}
	566
	567	/**
	568	* Determines whether the results for a specified key must be
	569	* generated.
	570	*
	571	* @param rp the ResultProducer wanting to generate the results
	572	* @param key an array of Objects (Strings or Doubles) that uniquely
	573	* identify a result for a given ResultProducer with given compatibilityState
	574	* @return true if the result should be generated
	575	* @throws Exception if it could not be determined if the result
	576	* is needed.
	577	*/
	578	public boolean isResultRequired(ResultProducer rp, Object [] key)
	579	throws Exception {
	580
	581	if (m_ResultProducer != rp) {
	582	throw new Error("Unrecognized ResultProducer sending results!!");
	583	}
	584	return true;
	585	}
	586
	587	/**
	588	* Gets the names of each of the columns produced for a single run.
	589	*
	590	* @return an array containing the name of each column
	591	* @throws Exception if key names cannot be generated
	592	*/
	593	public String [] getKeyNames() throws Exception {
	594
	595	if (m_KeyIndex == -1) {
	596	throw new Exception("No key field called " + m_KeyFieldName
	597	+ " produced by "
	598	+ m_ResultProducer.getClass().getName());
	599	}
	600	String [] keyNames = m_ResultProducer.getKeyNames();
	601	String [] newKeyNames = new String [keyNames.length - 1];
	602	System.arraycopy(keyNames, 0, newKeyNames, 0, m_KeyIndex);
	603	System.arraycopy(keyNames, m_KeyIndex + 1,
	604	newKeyNames, m_KeyIndex,
	605	keyNames.length - m_KeyIndex - 1);
	606	return newKeyNames;
	607	}
	608
	609	/**
	610	* Gets the data types of each of the columns produced for a single run.
	611	* This method should really be static.
	612	*
	613	* @return an array containing objects of the type of each column. The
	614	* objects should be Strings, or Doubles.
	615	* @throws Exception if the key types could not be determined (perhaps
	616	* because of a problem from a nested sub-resultproducer)
	617	*/
	618	public Object [] getKeyTypes() throws Exception {
	619
	620	if (m_KeyIndex == -1) {
	621	throw new Exception("No key field called " + m_KeyFieldName
	622	+ " produced by "
	623	+ m_ResultProducer.getClass().getName());
	624	}
	625	Object [] keyTypes = m_ResultProducer.getKeyTypes();
	626	// Find and remove the key field that is being averaged over
	627	Object [] newKeyTypes = new String [keyTypes.length - 1];
	628	System.arraycopy(keyTypes, 0, newKeyTypes, 0, m_KeyIndex);
	629	System.arraycopy(keyTypes, m_KeyIndex + 1,
	630	newKeyTypes, m_KeyIndex,
	631	keyTypes.length - m_KeyIndex - 1);
	632	return newKeyTypes;
	633	}
	634
	635	/**
	636	* Gets the names of each of the columns produced for a single run.
	637	* A new result field is added for the number of results used to
	638	* produce each average.
	639	* If only averages are being produced the names are not altered, if
	640	* standard deviations are produced then "Dev_" and "Avg_" are prepended
	641	* to each result deviation and average field respectively.
	642	*
	643	* @return an array containing the name of each column
	644	* @throws Exception if the result names could not be determined (perhaps
	645	* because of a problem from a nested sub-resultproducer)
	646	*/
	647	public String [] getResultNames() throws Exception {
	648
	649	String [] resultNames = m_ResultProducer.getResultNames();
	650	// Add in the names of our extra Result fields
	651	if (getCalculateStdDevs()) {
	652	Object [] resultTypes = m_ResultProducer.getResultTypes();
	653	int numNumeric = 0;
	654	for (int i = 0; i < resultTypes.length; i++) {
	655	if (resultTypes[i] instanceof Double) {
	656	numNumeric++;
	657	}
	658	}
	659	String [] newResultNames = new String [resultNames.length +
	660	1 + numNumeric];
	661	newResultNames[0] = m_CountFieldName;
	662	int j = 1;
	663	for (int i = 0; i < resultNames.length; i++) {
	664	newResultNames[j++] = "Avg_" + resultNames[i];
	665	if (resultTypes[i] instanceof Double) {
	666	newResultNames[j++] = "Dev_" + resultNames[i];
	667	}
	668	}
	669	return newResultNames;
	670	} else {
	671	String [] newResultNames = new String [resultNames.length + 1];
	672	newResultNames[0] = m_CountFieldName;
	673	System.arraycopy(resultNames, 0, newResultNames, 1, resultNames.length);
	674	return newResultNames;
	675	}
	676	}
	677
	678	/**
	679	* Gets the data types of each of the columns produced for a single run.
	680	*
	681	* @return an array containing objects of the type of each column. The
	682	* objects should be Strings, or Doubles.
	683	* @throws Exception if the result types could not be determined (perhaps
	684	* because of a problem from a nested sub-resultproducer)
	685	*/
	686	public Object [] getResultTypes() throws Exception {
	687
	688	Object [] resultTypes = m_ResultProducer.getResultTypes();
	689	// Add in the types of our extra Result fields
	690	if (getCalculateStdDevs()) {
	691	int numNumeric = 0;
	692	for (int i = 0; i < resultTypes.length; i++) {
	693	if (resultTypes[i] instanceof Double) {
	694	numNumeric++;
	695	}
	696	}
	697	Object [] newResultTypes = new Object [resultTypes.length +
	698	1 + numNumeric];
	699	newResultTypes[0] = new Double(0);
	700	int j = 1;
	701	for (int i = 0; i < resultTypes.length; i++) {
	702	newResultTypes[j++] = resultTypes[i];
	703	if (resultTypes[i] instanceof Double) {
	704	newResultTypes[j++] = new Double(0);
	705	}
	706	}
	707	return newResultTypes;
	708	} else {
	709	Object [] newResultTypes = new Object [resultTypes.length + 1];
	710	newResultTypes[0] = new Double(0);
	711	System.arraycopy(resultTypes, 0, newResultTypes, 1, resultTypes.length);
	712	return newResultTypes;
	713	}
	714	}
	715
	716	/**
	717	* Gets a description of the internal settings of the result
	718	* producer, sufficient for distinguishing a ResultProducer
	719	* instance from another with different settings (ignoring
	720	* those settings set through this interface). For example,
	721	* a cross-validation ResultProducer may have a setting for the
	722	* number of folds. For a given state, the results produced should
	723	* be compatible. Typically if a ResultProducer is an OptionHandler,
	724	* this string will represent the command line arguments required
	725	* to set the ResultProducer to that state.
	726	*
	727	* @return the description of the ResultProducer state, or null
	728	* if no state is defined
	729	*/
	730	public String getCompatibilityState() {
	731
	732	String result = // "-F " + Utils.quote(getKeyFieldName())
	733	" -X " + getExpectedResultsPerAverage() + " ";
	734	if (getCalculateStdDevs()) {
	735	result += "-S ";
	736	}
	737	if (m_ResultProducer == null) {
	738	result += "<null ResultProducer>";
	739	} else {
	740	result += "-W " + m_ResultProducer.getClass().getName();
	741	}
	742	result += " -- " + m_ResultProducer.getCompatibilityState();
	743	return result.trim();
	744	}
	745
	746
	747	/**
	748	* Returns an enumeration describing the available options..
	749	*
	750	* @return an enumeration of all the available options.
	751	*/
	752	public Enumeration listOptions() {
	753
	754	Vector newVector = new Vector(2);
	755
	756	newVector.addElement(new Option(
	757	"\tThe name of the field to average over.\n"
	758	+"\t(default \"Fold\")",
	759	"F", 1,
	760	"-F <field name>"));
	761	newVector.addElement(new Option(
	762	"\tThe number of results expected per average.\n"
	763	+"\t(default 10)",
	764	"X", 1,
	765	"-X <num results>"));
	766	newVector.addElement(new Option(
	767	"\tCalculate standard deviations.\n"
	768	+"\t(default only averages)",
	769	"S", 0,
	770	"-S"));
	771	newVector.addElement(new Option(
	772	"\tThe full class name of a ResultProducer.\n"
	773	+"\teg: weka.experiment.CrossValidationResultProducer",
	774	"W", 1,
	775	"-W <class name>"));
	776
	777	if ((m_ResultProducer != null) &&
	778	(m_ResultProducer instanceof OptionHandler)) {
	779	newVector.addElement(new Option(
	780	"",
	781	"", 0, "\nOptions specific to result producer "
	782	+ m_ResultProducer.getClass().getName() + ":"));
	783	Enumeration enu = ((OptionHandler)m_ResultProducer).listOptions();
	784	while (enu.hasMoreElements()) {
	785	newVector.addElement(enu.nextElement());
	786	}
	787	}
	788	return newVector.elements();
	789	}
	790
	791	/**
	792	* Parses a given list of options. <p/>
	793	*
	794	<!-- options-start -->
	795	* Valid options are: <p/>
	796	*
	797	* <pre> -F <field name>
	798	* The name of the field to average over.
	799	* (default "Fold")</pre>
	800	*
	801	* <pre> -X <num results>
	802	* The number of results expected per average.
	803	* (default 10)</pre>
	804	*
	805	* <pre> -S
	806	* Calculate standard deviations.
	807	* (default only averages)</pre>
	808	*
	809	* <pre> -W <class name>
	810	* The full class name of a ResultProducer.
	811	* eg: weka.experiment.CrossValidationResultProducer</pre>
	812	*
	813	* <pre>
	814	* Options specific to result producer weka.experiment.CrossValidationResultProducer:
	815	* </pre>
	816	*
	817	* <pre> -X <number of folds>
	818	* The number of folds to use for the cross-validation.
	819	* (default 10)</pre>
	820	*
	821	* <pre> -D
	822	* Save raw split evaluator output.</pre>
	823	*
	824	* <pre> -O <file/directory name/path>
	825	* The filename where raw output will be stored.
	826	* If a directory name is specified then then individual
	827	* outputs will be gzipped, otherwise all output will be
	828	* zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)</pre>
	829	*
	830	* <pre> -W <class name>
	831	* The full class name of a SplitEvaluator.
	832	* eg: weka.experiment.ClassifierSplitEvaluator</pre>
	833	*
	834	* <pre>
	835	* Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
	836	* </pre>
	837	*
	838	* <pre> -W <class name>
	839	* The full class name of the classifier.
	840	* eg: weka.classifiers.bayes.NaiveBayes</pre>
	841	*
	842	* <pre> -C <index>
	843	* The index of the class for which IR statistics
	844	* are to be output. (default 1)</pre>
	845	*
	846	* <pre> -I <index>
	847	* The index of an attribute to output in the
	848	* results. This attribute should identify an
	849	* instance in order to know which instances are
	850	* in the test set of a cross validation. if 0
	851	* no output (default 0).</pre>
	852	*
	853	* <pre> -P
	854	* Add target and prediction columns to the result
	855	* for each fold.</pre>
	856	*
	857	* <pre>
	858	* Options specific to classifier weka.classifiers.rules.ZeroR:
	859	* </pre>
	860	*
	861	* <pre> -D
	862	* If set, classifier is run in debug mode and
	863	* may output additional info to the console</pre>
	864	*
	865	<!-- options-end -->
	866	*
	867	* All options after -- will be passed to the result producer.
	868	*
	869	* @param options the list of options as an array of strings
	870	* @throws Exception if an option is not supported
	871	*/
	872	public void setOptions(String[] options) throws Exception {
	873
	874	String keyFieldName = Utils.getOption('F', options);
	875	if (keyFieldName.length() != 0) {
	876	setKeyFieldName(keyFieldName);
	877	} else {
	878	setKeyFieldName(CrossValidationResultProducer.FOLD_FIELD_NAME);
	879	}
	880
	881	String numResults = Utils.getOption('X', options);
	882	if (numResults.length() != 0) {
	883	setExpectedResultsPerAverage(Integer.parseInt(numResults));
	884	} else {
	885	setExpectedResultsPerAverage(10);
	886	}
	887
	888	setCalculateStdDevs(Utils.getFlag('S', options));
	889
	890	String rpName = Utils.getOption('W', options);
	891	if (rpName.length() == 0) {
	892	throw new Exception("A ResultProducer must be specified with"
	893	+ " the -W option.");
	894	}
	895	// Do it first without options, so if an exception is thrown during
	896	// the option setting, listOptions will contain options for the actual
	897	// RP.
	898	setResultProducer((ResultProducer)Utils.forName(
	899	ResultProducer.class,
	900	rpName,
	901	null));
	902	if (getResultProducer() instanceof OptionHandler) {
	903	((OptionHandler) getResultProducer())
	904	.setOptions(Utils.partitionOptions(options));
	905	}
	906	}
	907
	908	/**
	909	* Gets the current settings of the result producer.
	910	*
	911	* @return an array of strings suitable for passing to setOptions
	912	*/
	913	public String [] getOptions() {
	914
	915	String [] seOptions = new String [0];
	916	if ((m_ResultProducer != null) &&
	917	(m_ResultProducer instanceof OptionHandler)) {
	918	seOptions = ((OptionHandler)m_ResultProducer).getOptions();
	919	}
	920
	921	String [] options = new String [seOptions.length + 8];
	922	int current = 0;
	923
	924	options[current++] = "-F";
	925	options[current++] = "" + getKeyFieldName();
	926	options[current++] = "-X";
	927	options[current++] = "" + getExpectedResultsPerAverage();
	928	if (getCalculateStdDevs()) {
	929	options[current++] = "-S";
	930	}
	931	if (getResultProducer() != null) {
	932	options[current++] = "-W";
	933	options[current++] = getResultProducer().getClass().getName();
	934	}
	935	options[current++] = "--";
	936
	937	System.arraycopy(seOptions, 0, options, current,
	938	seOptions.length);
	939	current += seOptions.length;
	940	while (current < options.length) {
	941	options[current++] = "";
	942	}
	943	return options;
	944	}
	945
	946	/**
	947	* Set a list of method names for additional measures to look for
	948	* in SplitEvaluators. This could contain many measures (of which only a
	949	* subset may be produceable by the current resultProducer) if an experiment
	950	* is the type that iterates over a set of properties.
	951	* @param additionalMeasures an array of measure names, null if none
	952	*/
	953	public void setAdditionalMeasures(String [] additionalMeasures) {
	954	m_AdditionalMeasures = additionalMeasures;
	955
	956	if (m_ResultProducer != null) {
	957	System.err.println("AveragingResultProducer: setting additional "
	958	+"measures for "
	959	+"ResultProducer");
	960	m_ResultProducer.setAdditionalMeasures(m_AdditionalMeasures);
	961	}
	962	}
	963
	964	/**
	965	* Returns an enumeration of any additional measure names that might be
	966	* in the result producer
	967	* @return an enumeration of the measure names
	968	*/
	969	public Enumeration enumerateMeasures() {
	970	Vector newVector = new Vector();
	971	if (m_ResultProducer instanceof AdditionalMeasureProducer) {
	972	Enumeration en = ((AdditionalMeasureProducer)m_ResultProducer).
	973	enumerateMeasures();
	974	while (en.hasMoreElements()) {
	975	String mname = (String)en.nextElement();
	976	newVector.addElement(mname);
	977	}
	978	}
	979	return newVector.elements();
	980	}
	981
	982	/**
	983	* Returns the value of the named measure
	984	* @param additionalMeasureName the name of the measure to query for its value
	985	* @return the value of the named measure
	986	* @throws IllegalArgumentException if the named measure is not supported
	987	*/
	988	public double getMeasure(String additionalMeasureName) {
	989	if (m_ResultProducer instanceof AdditionalMeasureProducer) {
	990	return ((AdditionalMeasureProducer)m_ResultProducer).
	991	getMeasure(additionalMeasureName);
	992	} else {
	993	throw new IllegalArgumentException("AveragingResultProducer: "
	994	+"Can't return value for : "+additionalMeasureName
	995	+". "+m_ResultProducer.getClass().getName()+" "
	996	+"is not an AdditionalMeasureProducer");
	997	}
	998	}
	999
	1000	/**
	1001	* Sets the dataset that results will be obtained for.
	1002	*
	1003	* @param instances a value of type 'Instances'.
	1004	*/
	1005	public void setInstances(Instances instances) {
	1006
	1007	m_Instances = instances;
	1008	}
	1009
	1010	/**
	1011	* Returns the tip text for this property
	1012	* @return tip text for this property suitable for
	1013	* displaying in the explorer/experimenter gui
	1014	*/
	1015	public String calculateStdDevsTipText() {
	1016	return "Record standard deviations for each run.";
	1017	}
	1018
	1019	/**
	1020	* Get the value of CalculateStdDevs.
	1021	*
	1022	* @return Value of CalculateStdDevs.
	1023	*/
	1024	public boolean getCalculateStdDevs() {
	1025
	1026	return m_CalculateStdDevs;
	1027	}
	1028
	1029	/**
	1030	* Set the value of CalculateStdDevs.
	1031	*
	1032	* @param newCalculateStdDevs Value to assign to CalculateStdDevs.
	1033	*/
	1034	public void setCalculateStdDevs(boolean newCalculateStdDevs) {
	1035
	1036	m_CalculateStdDevs = newCalculateStdDevs;
	1037	}
	1038
	1039	/**
	1040	* Returns the tip text for this property
	1041	* @return tip text for this property suitable for
	1042	* displaying in the explorer/experimenter gui
	1043	*/
	1044	public String expectedResultsPerAverageTipText() {
	1045	return "Set the expected number of results to average per run. "
	1046	+"For example if a CrossValidationResultProducer is being used "
	1047	+"(with the number of folds set to 10), then the expected number "
	1048	+"of results per run is 10.";
	1049	}
	1050
	1051	/**
	1052	* Get the value of ExpectedResultsPerAverage.
	1053	*
	1054	* @return Value of ExpectedResultsPerAverage.
	1055	*/
	1056	public int getExpectedResultsPerAverage() {
	1057
	1058	return m_ExpectedResultsPerAverage;
	1059	}
	1060
	1061	/**
	1062	* Set the value of ExpectedResultsPerAverage.
	1063	*
	1064	* @param newExpectedResultsPerAverage Value to assign to
	1065	* ExpectedResultsPerAverage.
	1066	*/
	1067	public void setExpectedResultsPerAverage(int newExpectedResultsPerAverage) {
	1068
	1069	m_ExpectedResultsPerAverage = newExpectedResultsPerAverage;
	1070	}
	1071
	1072	/**
	1073	* Returns the tip text for this property
	1074	* @return tip text for this property suitable for
	1075	* displaying in the explorer/experimenter gui
	1076	*/
	1077	public String keyFieldNameTipText() {
	1078	return "Set the field name that will be unique for a run.";
	1079	}
	1080
	1081	/**
	1082	* Get the value of KeyFieldName.
	1083	*
	1084	* @return Value of KeyFieldName.
	1085	*/
	1086	public String getKeyFieldName() {
	1087
	1088	return m_KeyFieldName;
	1089	}
	1090
	1091	/**
	1092	* Set the value of KeyFieldName.
	1093	*
	1094	* @param newKeyFieldName Value to assign to KeyFieldName.
	1095	*/
	1096	public void setKeyFieldName(String newKeyFieldName) {
	1097
	1098	m_KeyFieldName = newKeyFieldName;
	1099	m_CountFieldName = "Num_" + m_KeyFieldName;
	1100	findKeyIndex();
	1101	}
	1102
	1103	/**
	1104	* Sets the object to send results of each run to.
	1105	*
	1106	* @param listener a value of type 'ResultListener'
	1107	*/
	1108	public void setResultListener(ResultListener listener) {
	1109
	1110	m_ResultListener = listener;
	1111	}
	1112
	1113	/**
	1114	* Returns the tip text for this property
	1115	* @return tip text for this property suitable for
	1116	* displaying in the explorer/experimenter gui
	1117	*/
	1118	public String resultProducerTipText() {
	1119	return "Set the resultProducer for which results are to be averaged.";
	1120	}
	1121
	1122	/**
	1123	* Get the ResultProducer.
	1124	*
	1125	* @return the ResultProducer.
	1126	*/
	1127	public ResultProducer getResultProducer() {
	1128
	1129	return m_ResultProducer;
	1130	}
	1131
	1132	/**
	1133	* Set the ResultProducer.
	1134	*
	1135	* @param newResultProducer new ResultProducer to use.
	1136	*/
	1137	public void setResultProducer(ResultProducer newResultProducer) {
	1138
	1139	m_ResultProducer = newResultProducer;
	1140	m_ResultProducer.setResultListener(this);
	1141	findKeyIndex();
	1142	}
	1143
	1144	/**
	1145	* Gets a text descrption of the result producer.
	1146	*
	1147	* @return a text description of the result producer.
	1148	*/
	1149	public String toString() {
	1150
	1151	String result = "AveragingResultProducer: ";
	1152	result += getCompatibilityState();
	1153	if (m_Instances == null) {
	1154	result += ": <null Instances>";
	1155	} else {
	1156	result += ": " + Utils.backQuoteChars(m_Instances.relationName());
	1157	}
	1158	return result;
	1159	}
	1160
	1161	/**
	1162	* Returns the revision string.
	1163	*
	1164	* @return the revision
	1165	*/
	1166	public String getRevision() {
	1167	return RevisionUtils.extract("$Revision: 1.18 $");
	1168	}
	1169	} // AveragingResultProducer

Note: See TracBrowser for help on using the repository browser.

Download in other formats: