/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* DensityBasedClustererSplitEvaluator.java
* Copyright (C) 2008 University of Waikato, Hamilton, New Zealand
*
*/
package weka.experiment;
import weka.clusterers.ClusterEvaluation;
import weka.clusterers.Clusterer;
import weka.clusterers.AbstractClusterer;
import weka.clusterers.AbstractDensityBasedClusterer;
import weka.clusterers.DensityBasedClusterer;
import weka.clusterers.EM;
import weka.core.AdditionalMeasureProducer;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;
import java.io.ObjectStreamClass;
import java.io.Serializable;
import java.util.Enumeration;
import java.util.Vector;
/**
* A SplitEvaluator that produces results for a density based clusterer.
*
* -W classname
* Specify the full class name of the clusterer to evaluate.
*
* @author Mark Hall (mhall{[at]}pentaho{[dot]}org
* @version $Revision: 5563 $
*/
public class DensityBasedClustererSplitEvaluator
implements SplitEvaluator,
OptionHandler,
AdditionalMeasureProducer,
RevisionHandler {
/** Remove the class column (if set) from the data */
protected boolean m_removeClassColumn = true;
/** The clusterer used for evaluation */
protected DensityBasedClusterer m_clusterer = new EM();
/** The names of any additional measures to look for in SplitEvaluators */
protected String [] m_additionalMeasures = null;
/** Array of booleans corresponding to the measures in m_AdditionalMeasures
indicating which of the AdditionalMeasures the current clusterer
can produce */
protected boolean [] m_doesProduce = null;
/** The number of additional measures that need to be filled in
after taking into account column constraints imposed by the final
destination for results */
protected int m_numberAdditionalMeasures = 0;
/** Holds the statistics for the most recent application of the clusterer */
protected String m_result = null;
/** The clusterer options (if any) */
protected String m_clustererOptions = "";
/** The clusterer version */
protected String m_clustererVersion = "";
/** The length of a key */
private static final int KEY_SIZE = 3;
/** The length of a result */
private static final int RESULT_SIZE = 6;
public DensityBasedClustererSplitEvaluator() {
updateOptions();
}
/**
* Returns a string describing this split evaluator
* @return a description of the split evaluator suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return " A SplitEvaluator that produces results for a density based clusterer. ";
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(1);
newVector.addElement(new Option(
"\tThe full class name of the density based clusterer.\n"
+"\teg: weka.clusterers.EM",
"W", 1,
"-W
*
* -W classname
*
* All option after -- will be passed to the classifier.
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String cName = Utils.getOption('W', options);
if (cName.length() == 0) {
throw new Exception("A clusterer must be specified with"
+ " the -W option.");
}
// Do it first without options, so if an exception is thrown during
// the option setting, listOptions will contain options for the actual
// Classifier.
setClusterer((DensityBasedClusterer)AbstractClusterer.forName(cName, null));
if (getClusterer() instanceof OptionHandler) {
((OptionHandler) getClusterer())
.setOptions(Utils.partitionOptions(options));
updateOptions();
}
}
/**
* Gets the current settings of the Classifier.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions() {
String [] clustererOptions = new String [0];
if ((m_clusterer != null) &&
(m_clusterer instanceof OptionHandler)) {
clustererOptions = ((OptionHandler)m_clusterer).getOptions();
}
String [] options = new String [clustererOptions.length + 3];
int current = 0;
if (getClusterer() != null) {
options[current++] = "-W";
options[current++] = getClusterer().getClass().getName();
}
options[current++] = "--";
System.arraycopy(clustererOptions, 0, options, current,
clustererOptions.length);
current += clustererOptions.length;
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Set a list of method names for additional measures to look for
* in Classifiers. This could contain many measures (of which only a
* subset may be produceable by the current Classifier) if an experiment
* is the type that iterates over a set of properties.
* @param additionalMeasures a list of method names
*/
public void setAdditionalMeasures(String [] additionalMeasures) {
// System.err.println("ClassifierSplitEvaluator: setting additional measures");
m_additionalMeasures = additionalMeasures;
// determine which (if any) of the additional measures this clusterer
// can produce
if (m_additionalMeasures != null && m_additionalMeasures.length > 0) {
m_doesProduce = new boolean [m_additionalMeasures.length];
if (m_clusterer instanceof AdditionalMeasureProducer) {
Enumeration en = ((AdditionalMeasureProducer)m_clusterer).
enumerateMeasures();
while (en.hasMoreElements()) {
String mname = (String)en.nextElement();
for (int j=0;j
* Specify the full class name of the clusterer to evaluate.