/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * RemoteExperiment.java * Copyright (C) 2000 University of Waikato, Hamilton, New Zealand * */ package weka.experiment; import weka.core.FastVector; import weka.core.Option; import weka.core.OptionHandler; import weka.core.Queue; import weka.core.RevisionUtils; import weka.core.SerializedObject; import weka.core.Utils; import weka.core.xml.KOML; import weka.core.xml.XMLOptions; import weka.experiment.xml.XMLExperiment; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.rmi.Naming; import java.util.Enumeration; import javax.swing.DefaultListModel; /** * Holds all the necessary configuration information for a distributed * experiment. This object is able to be serialized for storage on disk.
* * This class is experimental at present. Has been tested using * CSVResultListener (sending results to standard out) and * DatabaseResultListener (InstantDB + RmiJdbc bridge).
* * Getting started:
*
* Start InstantDB (with the RMI bridge) on some machine. If using java2
* then specify -Djava.security.policy=db.policy to the
* virtual machine. Where db.policy is as follows:
*
* grant { * permission java.security.AllPermission; * }; *
* * Start RemoteEngine servers on x machines as per the instructons in the * README_Experiment_Gui file. There must be a * DatabaseUtils.props in either the HOME or current directory of each * machine, listing all necessary jdbc drivers.
* * The machine where a RemoteExperiment is started must also have a copy * of DatabaseUtils.props listing the URL to the machine where the * database server is running (RmiJdbc + InstantDB).
* * Here is an example of starting a RemoteExperiment:
* *
* * java -Djava.rmi.server.codebase=file:/path to weka classes/ \ * weka.experiment.RemoteExperiment -L 1 -U 10 \ * -T /home/ml/datasets/UCI/iris.arff \ * -D "weka.experiment.DatabaseResultListener" \ * -P "weka.experiment.RandomSplitResultProducer" \ * -h rosebud.cs.waikato.ac.nz -h blackbird.cs.waikato.ac.nz -r -- \ * -W weka.experiment.ClassifierSplitEvaluator -- \ * -W weka.classifiers.bayes.NaiveBayes * *
* The "codebase" property tells rmi where to serve up weka classes from. * This can either be a file url (as long as a shared file system is being * used that is accessable by the remoteEngine servers), or http url (which * of course supposes that a web server is running and you have put your * weka classes somewhere that is web accessable). If using a file url the * trailing "/" is *most* important unless the weka classes are in a jar * file.
* * Valid options are:
* *-L <num> * The lower run number to start the experiment from. * (default 1)* *
-U <num> * The upper run number to end the experiment at (inclusive). * (default 10)* *
-T <arff file> * The dataset to run the experiment on. * (required, may be specified multiple times)* *
-P <class name> * The full class name of a ResultProducer (required). * eg: weka.experiment.RandomSplitResultProducer* *
-D <class name> * The full class name of a ResultListener (required). * eg: weka.experiment.CSVResultListener* *
-N <string> * A string containing any notes about the experiment. * (default none)* *
* Options specific to result producer weka.experiment.RandomSplitResultProducer: ** *
-P <percent> * The percentage of instances to use for training. * (default 66)* *
-D * Save raw split evaluator output.* *
-O <file/directory name/path> * The filename where raw output will be stored. * If a directory name is specified then then individual * outputs will be gzipped, otherwise all output will be * zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)* *
-W <class name> * The full class name of a SplitEvaluator. * eg: weka.experiment.ClassifierSplitEvaluator* *
-R * Set when data is not to be randomized and the data sets' size. * Is not to be determined via probabilistic rounding.* *
* Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator: ** *
-W <class name> * The full class name of the classifier. * eg: weka.classifiers.bayes.NaiveBayes* *
-C <index> * The index of the class for which IR statistics * are to be output. (default 1)* *
-I <index> * The index of an attribute to output in the * results. This attribute should identify an * instance in order to know which instances are * in the test set of a cross validation. if 0 * no output (default 0).* *
-P * Add target and prediction columns to the result * for each fold.* *
* Options specific to classifier weka.classifiers.rules.ZeroR: ** *
-D * If set, classifier is run in debug mode and * may output additional info to the console* * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision: 1.16 $ */ public class RemoteExperiment extends Experiment { /** for serialization */ static final long serialVersionUID = -7357668825635314937L; /** The list of objects listening for remote experiment events */ private FastVector m_listeners = new FastVector(); /** Holds the names of machines with remoteEngine servers running */ protected DefaultListModel m_remoteHosts = new DefaultListModel(); /** The queue of available hosts */ private Queue m_remoteHostsQueue = new Queue(); /** The status of each of the remote hosts */ private int [] m_remoteHostsStatus; /** The number of times tasks have failed on each remote host */ private int [] m_remoteHostFailureCounts; /** status of the remote host: available */ protected static final int AVAILABLE=0; /** status of the remote host: in use */ protected static final int IN_USE=1; /** status of the remote host: connection failed */ protected static final int CONNECTION_FAILED=2; /** status of the remote host: some other failure */ protected static final int SOME_OTHER_FAILURE=3; // protected static final int TO_BE_RUN=0; // protected static final int PROCESSING=1; // protected static final int FAILED=2; // protected static final int FINISHED=3; /** allow at most 3 failures on a host before it is removed from the list of usable hosts */ protected static final int MAX_FAILURES=3; /** Set to true if MAX_FAILURES exceeded on all hosts or connections fail on all hosts or user aborts experiment (via gui) */ private boolean m_experimentAborted = false; /** The number of hosts removed due to exceeding max failures */ private int m_removedHosts; /** The count of failed sub-experiments */ private int m_failedCount; /** The count of successfully completed sub-experiments */ private int m_finishedCount; /** The base experiment to split up into sub experiments for remote execution */ private Experiment m_baseExperiment = null; /** The sub experiments */ protected Experiment [] m_subExperiments; /** The queue of sub experiments waiting to be processed */ private Queue m_subExpQueue = new Queue(); /** The status of each of the sub-experiments */ protected int [] m_subExpComplete; /** * If true, then sub experiments are created on the basis of data sets * rather than run number. */ protected boolean m_splitByDataSet = true; /** * Returns true if sub experiments are to be created on the basis of * data set.. * * @return a
boolean
value indicating whether sub
* experiments are to be created on the basis of data set (true) or
* run number (false).
*/
public boolean getSplitByDataSet() {
return m_splitByDataSet;
}
/**
* Set whether sub experiments are to be created on the basis of
* data set.
*
* @param sd true if sub experiments are to be created on the basis
* of data set. Otherwise sub experiments are created on the basis of
* run number.
*/
public void setSplitByDataSet(boolean sd) {
m_splitByDataSet = sd;
}
/**
* Construct a new RemoteExperiment using an empty Experiment as base
* Experiment
* @throws Exception if the base experiment is null
*/
public RemoteExperiment() throws Exception {
this(new Experiment());
}
/**
* Construct a new RemoteExperiment using a base Experiment
* @param base the base experiment to use
* @throws Exception if the base experiment is null
*/
public RemoteExperiment(Experiment base) throws Exception {
setBaseExperiment(base);
}
/**
* Add an object to the list of those interested in recieving update
* information from the RemoteExperiment
* @param r a listener
*/
public void addRemoteExperimentListener(RemoteExperimentListener r) {
m_listeners.addElement(r);
}
/**
* Get the base experiment used by this remote experiment
* @return the base experiment
*/
public Experiment getBaseExperiment() {
return m_baseExperiment;
}
/**
* Set the base experiment. A sub experiment will be created for each
* run in the base experiment.
* @param base the base experiment to use.
* @throws Exception if supplied base experiment is null
*/
public void setBaseExperiment(Experiment base) throws Exception {
if (base == null) {
throw new Exception("Base experiment is null!");
}
m_baseExperiment = base;
setRunLower(m_baseExperiment.getRunLower());
setRunUpper(m_baseExperiment.getRunUpper());
setResultListener(m_baseExperiment.getResultListener());
setResultProducer(m_baseExperiment.getResultProducer());
setDatasets(m_baseExperiment.getDatasets());
setUsePropertyIterator(m_baseExperiment.getUsePropertyIterator());
setPropertyPath(m_baseExperiment.getPropertyPath());
setPropertyArray(m_baseExperiment.getPropertyArray());
setNotes(m_baseExperiment.getNotes());
m_ClassFirst = m_baseExperiment.m_ClassFirst;
m_AdvanceDataSetFirst = m_baseExperiment.m_AdvanceDataSetFirst;
}
/**
* Set the user notes.
*
* @param newNotes New user notes.
*/
public void setNotes(String newNotes) {
super.setNotes(newNotes);
m_baseExperiment.setNotes(newNotes);
}
/**
* Set the lower run number for the experiment.
*
* @param newRunLower the lower run number for the experiment.
*/
public void setRunLower(int newRunLower) {
super.setRunLower(newRunLower);
m_baseExperiment.setRunLower(newRunLower);
}
/**
* Set the upper run number for the experiment.
*
* @param newRunUpper the upper run number for the experiment.
*/
public void setRunUpper(int newRunUpper) {
super.setRunUpper(newRunUpper);
m_baseExperiment.setRunUpper(newRunUpper);
}
/**
* Sets the result listener where results will be sent.
*
* @param newResultListener the result listener where results will be sent.
*/
public void setResultListener(ResultListener newResultListener) {
super.setResultListener(newResultListener);
m_baseExperiment.setResultListener(newResultListener);
}
/**
* Set the result producer used for the current experiment.
*
* @param newResultProducer result producer to use for the current
* experiment.
*/
public void setResultProducer(ResultProducer newResultProducer) {
super.setResultProducer(newResultProducer);
m_baseExperiment.setResultProducer(newResultProducer);
}
/**
* Set the datasets to use in the experiment
* @param ds the list of datasets to use
*/
public void setDatasets(DefaultListModel ds) {
super.setDatasets(ds);
m_baseExperiment.setDatasets(ds);
}
/**
* Sets whether the custom property iterator should be used.
*
* @param newUsePropertyIterator true if so
*/
public void setUsePropertyIterator(boolean newUsePropertyIterator) {
super.setUsePropertyIterator(newUsePropertyIterator);
m_baseExperiment.setUsePropertyIterator(newUsePropertyIterator);
}
/**
* Sets the path of properties taken to get to the custom property
* to iterate over.
*
* @param newPropertyPath an array of PropertyNodes
*/
public void setPropertyPath(PropertyNode [] newPropertyPath) {
super.setPropertyPath(newPropertyPath);
m_baseExperiment.setPropertyPath(newPropertyPath);
}
/**
* Sets the array of values to set the custom property to.
*
* @param newPropArray a value of type Object which should be an
* array of the appropriate values.
*/
public void setPropertyArray(Object newPropArray) {
super.setPropertyArray(newPropArray);
m_baseExperiment.setPropertyArray(newPropArray);
}
/**
* Prepares a remote experiment for running, creates sub experiments
*
* @throws Exception if an error occurs
*/
public void initialize() throws Exception {
if (m_baseExperiment == null) {
throw new Exception("No base experiment specified!");
}
m_experimentAborted = false;
m_finishedCount = 0;
m_failedCount = 0;
m_RunNumber = getRunLower();
m_DatasetNumber = 0;
m_PropertyNumber = 0;
m_CurrentProperty = -1;
m_CurrentInstances = null;
m_Finished = false;
if (m_remoteHosts.size() == 0) {
throw new Exception("No hosts specified!");
}
// initialize all remote hosts to available
m_remoteHostsStatus = new int [m_remoteHosts.size()];
m_remoteHostFailureCounts = new int [m_remoteHosts.size()];
m_remoteHostsQueue = new Queue();
// prime the hosts queue
for (int i=0;i