/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * Experiment.java * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand * */ package weka.experiment; import weka.core.AdditionalMeasureProducer; import weka.core.FastVector; import weka.core.Instances; import weka.core.Option; import weka.core.OptionHandler; import weka.core.RevisionHandler; import weka.core.RevisionUtils; import weka.core.Utils; import weka.core.converters.AbstractFileLoader; import weka.core.converters.ConverterUtils; import weka.core.xml.KOML; import weka.core.xml.XMLOptions; import weka.experiment.xml.XMLExperiment; import java.beans.PropertyDescriptor; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.Serializable; import java.lang.reflect.Array; import java.lang.reflect.Method; import java.util.Enumeration; import java.util.Vector; import javax.swing.DefaultListModel; /** * Holds all the necessary configuration information for a standard * type experiment. This object is able to be serialized for storage * on disk. * * Valid options are:
* *-L <num> * The lower run number to start the experiment from. * (default 1)* *
-U <num> * The upper run number to end the experiment at (inclusive). * (default 10)* *
-T <arff file> * The dataset to run the experiment on. * (required, may be specified multiple times)* *
-P <class name> * The full class name of a ResultProducer (required). * eg: weka.experiment.RandomSplitResultProducer* *
-D <class name> * The full class name of a ResultListener (required). * eg: weka.experiment.CSVResultListener* *
-N <string> * A string containing any notes about the experiment. * (default none)* *
* Options specific to result producer weka.experiment.RandomSplitResultProducer: ** *
-P <percent> * The percentage of instances to use for training. * (default 66)* *
-D * Save raw split evaluator output.* *
-O <file/directory name/path> * The filename where raw output will be stored. * If a directory name is specified then then individual * outputs will be gzipped, otherwise all output will be * zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)* *
-W <class name> * The full class name of a SplitEvaluator. * eg: weka.experiment.ClassifierSplitEvaluator* *
-R * Set when data is not to be randomized and the data sets' size. * Is not to be determined via probabilistic rounding.* *
* Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator: ** *
-W <class name> * The full class name of the classifier. * eg: weka.classifiers.bayes.NaiveBayes* *
-C <index> * The index of the class for which IR statistics * are to be output. (default 1)* *
-I <index> * The index of an attribute to output in the * results. This attribute should identify an * instance in order to know which instances are * in the test set of a cross validation. if 0 * no output (default 0).* *
-P * Add target and prediction columns to the result * for each fold.* *
* Options specific to classifier weka.classifiers.rules.ZeroR: ** *
-D * If set, classifier is run in debug mode and * may output additional info to the console* * * All options after -- will be passed to the result producer.
*
* @author Len Trigg (trigg@cs.waikato.ac.nz)
* @version $Revision: 5399 $
*/
public class Experiment
implements Serializable, OptionHandler, RevisionHandler {
/** for serialization */
static final long serialVersionUID = 44945596742646663L;
/** The filename extension that should be used for experiment files */
public static String FILE_EXTENSION = ".exp";
/** Where results will be sent */
protected ResultListener m_ResultListener = new InstancesResultListener();
/** The result producer */
protected ResultProducer m_ResultProducer = new RandomSplitResultProducer();
/** Lower run number */
protected int m_RunLower = 1;
/** Upper run number */
protected int m_RunUpper = 10;
/** An array of dataset files */
protected DefaultListModel m_Datasets = new DefaultListModel();
/** True if the exp should also iterate over a property of the RP */
protected boolean m_UsePropertyIterator = false;
/** The path to the iterator property */
protected PropertyNode [] m_PropertyPath;
/** The array of values to set the property to */
protected Object m_PropertyArray;
/** User notes about the experiment */
protected String m_Notes = "";
/** Method names of additional measures of objects contained in the
custom property iterator. Only methods names beginning with "measure"
and returning doubles are recognised */
protected String [] m_AdditionalMeasures = null;
/** True if the class attribute is the first attribute for all
datasets involved in this experiment. */
protected boolean m_ClassFirst = false;
/** If true an experiment will advance the current data set befor
any custom itererator */
protected boolean m_AdvanceDataSetFirst = true;
/**
* Sets whether the first attribute is treated as the class
* for all datasets involved in the experiment. This information
* is not output with the result of the experiments!
*
* @param flag whether the class attribute is the first and not the last
*/
public void classFirst(boolean flag) {
m_ClassFirst = flag;
}
/**
* Get the value of m_DataSetFirstFirst.
*
* @return Value of m_DataSetFirstFirst.
*/
public boolean getAdvanceDataSetFirst() {
return m_AdvanceDataSetFirst;
}
/**
* Set the value of m_AdvanceDataSetFirst.
*
* @param newAdvanceDataSetFirst Value to assign to m_AdvanceRunFirst.
*/
public void setAdvanceDataSetFirst(boolean newAdvanceDataSetFirst) {
m_AdvanceDataSetFirst = newAdvanceDataSetFirst;
}
/**
* Gets whether the custom property iterator should be used.
*
* @return true if so
*/
public boolean getUsePropertyIterator() {
return m_UsePropertyIterator;
}
/**
* Sets whether the custom property iterator should be used.
*
* @param newUsePropertyIterator true if so
*/
public void setUsePropertyIterator(boolean newUsePropertyIterator) {
m_UsePropertyIterator = newUsePropertyIterator;
}
/**
* Gets the path of properties taken to get to the custom property
* to iterate over.
*
* @return an array of PropertyNodes
*/
public PropertyNode [] getPropertyPath() {
return m_PropertyPath;
}
/**
* Sets the path of properties taken to get to the custom property
* to iterate over.
*
* @param newPropertyPath an array of PropertyNodes
*/
public void setPropertyPath(PropertyNode [] newPropertyPath) {
m_PropertyPath = newPropertyPath;
}
/**
* Sets the array of values to set the custom property to.
*
* @param newPropArray a value of type Object which should be an
* array of the appropriate values.
*/
public void setPropertyArray(Object newPropArray) {
m_PropertyArray = newPropArray;
}
/**
* Gets the array of values to set the custom property to.
*
* @return a value of type Object which should be an
* array of the appropriate values.
*/
public Object getPropertyArray() {
return m_PropertyArray;
}
/**
* Gets the number of custom iterator values that have been defined
* for the experiment.
*
* @return the number of custom property iterator values.
*/
public int getPropertyArrayLength() {
return Array.getLength(m_PropertyArray);
}
/**
* Gets a specified value from the custom property iterator array.
*
* @param index the index of the value wanted
* @return the property array value
*/
public Object getPropertyArrayValue(int index) {
return Array.get(m_PropertyArray, index);
}
/* These may potentially want to be made un-transient if it is decided
* that experiments may be saved mid-run and later resumed
*/
/** The current run number when the experiment is running */
protected transient int m_RunNumber;
/** The current dataset number when the experiment is running */
protected transient int m_DatasetNumber;
/** The current custom property value index when the experiment is running */
protected transient int m_PropertyNumber;
/** True if the experiment has finished running */
protected transient boolean m_Finished = true;
/** The dataset currently being used */
protected transient Instances m_CurrentInstances;
/** The custom property value that has actually been set */
protected transient int m_CurrentProperty;
/**
* When an experiment is running, this returns the current run number.
*
* @return the current run number.
*/
public int getCurrentRunNumber() {
return m_RunNumber;
}
/**
* When an experiment is running, this returns the current dataset number.
*
* @return the current dataset number.
*/
public int getCurrentDatasetNumber() {
return m_DatasetNumber;
}
/**
* When an experiment is running, this returns the index of the
* current custom property value.
*
* @return the index of the current custom property value.
*/
public int getCurrentPropertyNumber() {
return m_PropertyNumber;
}
/**
* Prepares an experiment for running, initializing current iterator
* settings.
*
* @throws Exception if an error occurs
*/
public void initialize() throws Exception {
m_RunNumber = getRunLower();
m_DatasetNumber = 0;
m_PropertyNumber = 0;
m_CurrentProperty = -1;
m_CurrentInstances = null;
m_Finished = false;
if (m_UsePropertyIterator && (m_PropertyArray == null)) {
throw new Exception("Null array for property iterator");
}
if (getRunLower() > getRunUpper()) {
throw new Exception("Lower run number is greater than upper run number");
}
if (getDatasets().size() == 0) {
throw new Exception("No datasets have been specified");
}
if (m_ResultProducer == null) {
throw new Exception("No ResultProducer set");
}
if (m_ResultListener == null) {
throw new Exception("No ResultListener set");
}
// if (m_UsePropertyIterator && (m_PropertyArray != null)) {
determineAdditionalResultMeasures();
// }
m_ResultProducer.setResultListener(m_ResultListener);
m_ResultProducer.setAdditionalMeasures(m_AdditionalMeasures);
m_ResultProducer.preProcess();
// constrain the additional measures to be only those allowable
// by the ResultListener
String [] columnConstraints = m_ResultListener.
determineColumnConstraints(m_ResultProducer);
if (columnConstraints != null) {
m_ResultProducer.setAdditionalMeasures(columnConstraints);
}
}
/**
* Iterate over the objects in the property array to determine what
* (if any) additional measures they support
*
* @throws Exception if additional measures don't comply to the naming
* convention (starting with "measure")
*/
private void determineAdditionalResultMeasures() throws Exception {
m_AdditionalMeasures = null;
FastVector measureNames = new FastVector();
// first try the result producer, then property array if applicable
if (m_ResultProducer instanceof AdditionalMeasureProducer) {
Enumeration am = ((AdditionalMeasureProducer)m_ResultProducer).
enumerateMeasures();
while (am.hasMoreElements()) {
String mname = (String)am.nextElement();
if (mname.startsWith("measure")) {
if (measureNames.indexOf(mname) == -1) {
measureNames.addElement(mname);
}
} else {
throw new Exception ("Additional measures in "
+ m_ResultProducer.getClass().getName()
+" must obey the naming convention"
+" of starting with \"measure\"");
}
}
}
if (m_UsePropertyIterator && (m_PropertyArray != null)) {
for (int i = 0; i < Array.getLength(m_PropertyArray); i++) {
Object current = Array.get(m_PropertyArray, i);
if (current instanceof AdditionalMeasureProducer) {
Enumeration am = ((AdditionalMeasureProducer)current).
enumerateMeasures();
while (am.hasMoreElements()) {
String mname = (String)am.nextElement();
if (mname.startsWith("measure")) {
if (measureNames.indexOf(mname) == -1) {
measureNames.addElement(mname);
}
} else {
throw new Exception ("Additional measures in "
+ current.getClass().getName()
+" must obey the naming convention"
+" of starting with \"measure\"");
}
}
}
}
}
if (measureNames.size() > 0) {
m_AdditionalMeasures = new String [measureNames.size()];
for (int i=0;i
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions(String [] options) throws Exception {
String lowerString = Utils.getOption('L', options);
if (lowerString.length() != 0) {
setRunLower(Integer.parseInt(lowerString));
} else {
setRunLower(1);
}
String upperString = Utils.getOption('U', options);
if (upperString.length() != 0) {
setRunUpper(Integer.parseInt(upperString));
} else {
setRunUpper(10);
}
if (getRunLower() > getRunUpper()) {
throw new Exception("Lower (" + getRunLower()
+ ") is greater than upper ("
+ getRunUpper() + ")");
}
setNotes(Utils.getOption('N', options));
getDatasets().removeAllElements();
String dataName;
do {
dataName = Utils.getOption('T', options);
if (dataName.length() != 0) {
File dataset = new File(dataName);
getDatasets().addElement(dataset);
}
} while (dataName.length() != 0);
if (getDatasets().size() == 0) {
throw new Exception("Required: -T -L <num>
* The lower run number to start the experiment from.
* (default 1)
*
* -U <num>
* The upper run number to end the experiment at (inclusive).
* (default 10)
*
* -T <arff file>
* The dataset to run the experiment on.
* (required, may be specified multiple times)
*
* -P <class name>
* The full class name of a ResultProducer (required).
* eg: weka.experiment.RandomSplitResultProducer
*
* -D <class name>
* The full class name of a ResultListener (required).
* eg: weka.experiment.CSVResultListener
*
* -N <string>
* A string containing any notes about the experiment.
* (default none)
*
*
* Options specific to result producer weka.experiment.RandomSplitResultProducer:
*
*
* -P <percent>
* The percentage of instances to use for training.
* (default 66)
*
* -D
* Save raw split evaluator output.
*
* -O <file/directory name/path>
* The filename where raw output will be stored.
* If a directory name is specified then then individual
* outputs will be gzipped, otherwise all output will be
* zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)
*
* -W <class name>
* The full class name of a SplitEvaluator.
* eg: weka.experiment.ClassifierSplitEvaluator
*
* -R
* Set when data is not to be randomized and the data sets' size.
* Is not to be determined via probabilistic rounding.
*
*
* Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
*
*
* -W <class name>
* The full class name of the classifier.
* eg: weka.classifiers.bayes.NaiveBayes
*
* -C <index>
* The index of the class for which IR statistics
* are to be output. (default 1)
*
* -I <index>
* The index of an attribute to output in the
* results. This attribute should identify an
* instance in order to know which instances are
* in the test set of a cross validation. if 0
* no output (default 0).
*
* -P
* Add target and prediction columns to the result
* for each fold.
*
*
* Options specific to classifier weka.classifiers.rules.ZeroR:
*
*
* -D
* If set, classifier is run in debug mode and
* may output additional info to the console
*
*
* All options after -- will be passed to the result producer.