/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
*/
package weka.clusterers;
import weka.core.CheckGOE;
import weka.core.CheckOptionHandler;
import weka.core.Instances;
import weka.core.OptionHandler;
import weka.test.Regression;
import junit.framework.TestCase;
/**
* Abstract Test class for Clusterers. Internally it uses the class
* CheckClusterer
to determine success or failure of the
* tests. It follows basically the runTests
method.
*
* @author FracPete (fracpete at waikato dot ac dot nz)
* @version $Revision: 1.9 $
*
* @see CheckClusterer
* @see CheckClusterer#runTests(boolean, boolean, boolean)
*/
public abstract class AbstractClustererTest
extends TestCase {
/** The clusterer to be tested */
protected Clusterer m_Clusterer;
/** For testing the clusterer */
protected CheckClusterer m_Tester;
/** whether classifier is updateable */
protected boolean m_updateableClusterer;
/** whether clusterer handles weighted instances */
protected boolean m_weightedInstancesHandler;
/** whether clusterer handles multi-instance data */
protected boolean m_multiInstanceHandler;
/** whether to run CheckClusterer in DEBUG mode */
protected boolean DEBUG = false;
/** wether clusterer can predict nominal attributes (array index is attribute type of class) */
protected boolean m_NominalPredictors;
/** wether clusterer can predict numeric attributes (array index is attribute type of class) */
protected boolean m_NumericPredictors;
/** wether clusterer can predict string attributes (array index is attribute type of class) */
protected boolean m_StringPredictors;
/** wether clusterer can predict date attributes (array index is attribute type of class) */
protected boolean m_DatePredictors;
/** wether clusterer can predict relational attributes (array index is attribute type of class) */
protected boolean m_RelationalPredictors;
/** whether clusterer handles missing values */
protected boolean m_handleMissingPredictors;
/** the result of the regression test */
protected String m_RegressionResults;
/** the OptionHandler tester */
protected CheckOptionHandler m_OptionTester;
/** for testing GOE stuff */
protected CheckGOE m_GOETester;
/**
* Constructs the AbstractClustererTest
. Called by subclasses.
*
* @param name the name of the test class
*/
public AbstractClustererTest(String name) {
super(name);
}
/**
* Called by JUnit before each test method. This implementation creates
* the default clusterer to test and loads a test set of Instances.
*
* @exception Exception if an error occurs reading the example instances.
*/
protected void setUp() throws Exception {
m_Clusterer = getClusterer();
m_Tester = new CheckClusterer();
m_Tester.setSilent(true);
m_Tester.setClusterer(m_Clusterer);
m_Tester.setNumInstances(20);
m_Tester.setDebug(DEBUG);
m_OptionTester = getOptionTester();
m_GOETester = getGOETester();
m_updateableClusterer = m_Tester.updateableClusterer()[0];
m_weightedInstancesHandler = m_Tester.weightedInstancesHandler()[0];
m_multiInstanceHandler = m_Tester.multiInstanceHandler()[0];
m_NominalPredictors = false;
m_NumericPredictors = false;
m_StringPredictors = false;
m_DatePredictors = false;
m_RelationalPredictors = false;
m_handleMissingPredictors = false;
m_RegressionResults = "";
// initialize attributes
checkAttributes(true, false, false, false, false, false);
checkAttributes(false, true, false, false, false, false);
checkAttributes(false, false, true, false, false, false);
checkAttributes(false, false, false, true, false, false);
checkAttributes(false, false, false, false, true, false);
// 20% missing values
m_handleMissingPredictors = checkMissingPredictors(20, false);
}
/** Called by JUnit after each test method */
protected void tearDown() {
m_Clusterer = null;
m_Tester = null;
m_OptionTester = null;
m_GOETester = null;
m_updateableClusterer = false;
m_weightedInstancesHandler = false;
m_NominalPredictors = false;
m_NumericPredictors = false;
m_StringPredictors = false;
m_DatePredictors = false;
m_RelationalPredictors = false;
m_handleMissingPredictors = false;
m_RegressionResults = "";
}
/**
* Configures the CheckOptionHandler uses for testing the optionhandling.
* Sets the scheme to test.
*
* @return the fully configured CheckOptionHandler
*/
protected CheckOptionHandler getOptionTester() {
CheckOptionHandler result;
result = new CheckOptionHandler();
if (getClusterer() instanceof OptionHandler)
result.setOptionHandler((OptionHandler) getClusterer());
else
result.setOptionHandler(null);
result.setUserOptions(new String[0]);
result.setSilent(true);
return result;
}
/**
* Configures the CheckGOE used for testing GOE stuff.
* Sets the Clusterer returned from the getClusterer() method.
*
* @return the fully configured CheckGOE
* @see #getClusterer()
*/
protected CheckGOE getGOETester() {
CheckGOE result;
result = new CheckGOE();
result.setObject(getClusterer());
result.setSilent(true);
return result;
}
/**
* Used to create an instance of a specific clusterer.
*
* @return a suitably configured Clusterer
value
*/
public abstract Clusterer getClusterer();
/**
* checks whether at least one attribute type can be handled
*
* @return true if at least one attribute type can be handled
*/
protected boolean canPredict() {
return m_NominalPredictors
|| m_NumericPredictors
|| m_StringPredictors
|| m_DatePredictors
|| m_RelationalPredictors;
}
/**
* tests whether the clusterer can handle certain attributes and if not,
* if the exception is OK
*
* @param nom to check for nominal attributes
* @param num to check for numeric attributes
* @param str to check for string attributes
* @param dat to check for date attributes
* @param rel to check for relational attributes
* @param allowFail whether a junit fail can be executed
* @see CheckClusterer#canPredict(boolean, boolean, boolean, boolean, boolean, boolean)
* @see CheckClusterer#runTests(boolean, boolean, boolean)
*/
protected void checkAttributes(boolean nom, boolean num, boolean str,
boolean dat, boolean rel,
boolean allowFail) {
boolean[] result;
String att;
// determine text for type of attributes
att = "";
if (nom)
att = "nominal";
else if (num)
att = "numeric";
else if (str)
att = "string";
else if (dat)
att = "date";
else if (rel)
att = "relational";
result = m_Tester.canPredict(nom, num, str, dat, rel, m_multiInstanceHandler);
if (nom)
m_NominalPredictors = result[0];
else if (num)
m_NumericPredictors = result[0];
else if (str)
m_StringPredictors = result[0];
else if (dat)
m_DatePredictors = result[0];
else if (rel)
m_RelationalPredictors = result[0];
if (!result[0] && !result[1] && allowFail)
fail("Error handling " + att + " attributes!");
}
/**
* tests whether the clusterer can handle different types of attributes and
* if not, if the exception is OK
*
* @see #checkAttributes(boolean, boolean, boolean, boolean, boolean, boolean)
*/
public void testAttributes() {
// nominal
checkAttributes(true, false, false, false, false, true);
// numeric
checkAttributes(false, true, false, false, false, true);
// string
checkAttributes(false, false, true, false, false, true);
// date
checkAttributes(false, false, false, true, false, true);
// relational
if (!m_multiInstanceHandler)
checkAttributes(false, false, false, false, true, true);
}
/**
* tests whether the scheme declares a serialVersionUID.
*/
public void testSerialVersionUID() {
boolean[] result;
result = m_Tester.declaresSerialVersionUID();
if (!result[0])
fail("Doesn't declare serialVersionUID!");
}
/**
* tests whether the clusterer handles instance weights correctly
*
* @see CheckClusterer#instanceWeights(boolean, boolean, boolean, boolean, boolean, boolean)
* @see CheckClusterer#runTests(boolean, boolean, boolean)
*/
public void testInstanceWeights() {
boolean[] result;
if (m_weightedInstancesHandler) {
if (!canPredict())
return;
result = m_Tester.instanceWeights(
m_NominalPredictors,
m_NumericPredictors,
m_StringPredictors,
m_DatePredictors,
m_RelationalPredictors,
m_multiInstanceHandler);
if (!result[0])
System.err.println("Error handling instance weights!");
}
}
/**
* tests whether the clusterer can handle zero training instances
*
* @see CheckClusterer#canHandleZeroTraining(boolean, boolean, boolean, boolean, boolean, boolean)
* @see CheckClusterer#runTests(boolean, boolean, boolean)
*/
public void testZeroTraining() {
boolean[] result;
if (!canPredict())
return;
result = m_Tester.canHandleZeroTraining(
m_NominalPredictors,
m_NumericPredictors,
m_StringPredictors,
m_DatePredictors,
m_RelationalPredictors,
m_multiInstanceHandler);
if (!result[0] && !result[1])
fail("Error handling zero training instances!");
}
/**
* checks whether the clusterer can handle the given percentage of
* missing predictors
*
* @param percent the percentage of missing predictors
* @param allowFail if true a fail statement may be executed
* @return true if the clusterer can handle it
*/
protected boolean checkMissingPredictors(int percent, boolean allowFail) {
boolean[] result;
result = m_Tester.canHandleMissing(
m_NominalPredictors,
m_NumericPredictors,
m_StringPredictors,
m_DatePredictors,
m_RelationalPredictors,
m_multiInstanceHandler,
true,
percent);
if (allowFail) {
if (!result[0] && !result[1])
fail("Error handling " + percent + "% missing predictors!");
}
return result[0];
}
/**
* tests whether the clusterer can handle missing predictors (20% and 100%)
*
* @see CheckClusterer#canHandleMissing(boolean, boolean, boolean, boolean, boolean, boolean, boolean, int)
* @see CheckClusterer#runTests(boolean, boolean, boolean)
*/
public void testMissingPredictors() {
if (!canPredict())
return;
// 20% missing
checkMissingPredictors(20, true);
// 100% missing
if (m_handleMissingPredictors)
checkMissingPredictors(100, true);
}
/**
* tests whether the clusterer correctly initializes in the
* buildClusterer method
*
* @see CheckClusterer#correctBuildInitialisation(boolean, boolean, boolean, boolean, boolean, boolean)
* @see CheckClusterer#runTests(boolean, boolean, boolean)
*/
public void testBuildInitialization() {
boolean[] result;
if (!canPredict())
return;
result = m_Tester.correctBuildInitialisation(
m_NominalPredictors,
m_NumericPredictors,
m_StringPredictors,
m_DatePredictors,
m_RelationalPredictors,
m_multiInstanceHandler);
if (!result[0] && !result[1])
fail("Incorrect build initialization!");
}
/**
* tests whether the clusterer alters the training set during training.
*
* @see CheckClusterer#datasetIntegrity(boolean, boolean, boolean, boolean, boolean, boolean, boolean)
* @see CheckClusterer#runTests(boolean, boolean, boolean)
*/
public void testDatasetIntegrity() {
boolean[] result;
if (!canPredict())
return;
result = m_Tester.datasetIntegrity(
m_NominalPredictors,
m_NumericPredictors,
m_StringPredictors,
m_DatePredictors,
m_RelationalPredictors,
m_multiInstanceHandler,
m_handleMissingPredictors);
if (!result[0] && !result[1])
fail("Training set is altered during training!");
}
/**
* tests whether the classifier produces the same model when trained
* incrementally as when batch trained.
*
* @see CheckClusterer#updatingEquality(boolean, boolean, boolean, boolean, boolean, boolean)
* @see CheckClusterer#runTests(boolean, boolean, boolean)
*/
public void testUpdatingEquality() {
boolean[] result;
if (m_updateableClusterer) {
result = m_Tester.updatingEquality(
m_NominalPredictors,
m_NumericPredictors,
m_StringPredictors,
m_DatePredictors,
m_RelationalPredictors,
m_multiInstanceHandler);
if (!result[0])
System.err.println(
"Incremental training does not produce same result as batch training!");
}
}
/**
* Builds a model using the current Clusterer using the given data and
* returns the produced cluster assignments.
*
* @param data the instances to test the Clusterer on
* @return a String containing the cluster assignments.
*/
protected String useClusterer(Instances data) throws Exception {
String result;
Clusterer clusterer;
int i;
double cluster;
try {
clusterer = AbstractClusterer.makeCopy(m_Clusterer);
}
catch (Exception e) {
clusterer = null;
e.printStackTrace();
fail("Problem setting up to use Clusterer: " + e);
}
clusterer.buildClusterer(data);
// generate result
result = "";
for (i = 0; i < data.numInstances(); i++) {
if (i > 0)
result += "\n";
try {
cluster = clusterer.clusterInstance(data.instance(i));
result += "" + (i+1) + ": " + cluster;
}
catch (Exception e) {
result += "" + (i+1) + ": " + e.toString();
}
}
return result;
}
/**
* Runs a regression test -- this checks that the output of the tested
* object matches that in a reference version. When this test is
* run without any pre-existing reference output, the reference version
* is created.
*/
public void testRegression() throws Exception {
boolean succeeded;
Regression reg;
Instances train;
// don't bother if not working correctly
if (m_Tester.hasClasspathProblems())
return;
reg = new Regression(this.getClass());
train = null;
succeeded = false;
train = m_Tester.makeTestDataset(
42, m_Tester.getNumInstances(),
m_NominalPredictors ? 2 : 0,
m_NumericPredictors ? 1 : 0,
m_StringPredictors ? 1 : 0,
m_DatePredictors ? 1 : 0,
m_RelationalPredictors ? 1 : 0,
m_multiInstanceHandler);
try {
m_RegressionResults = useClusterer(train);
succeeded = true;
reg.println(m_RegressionResults);
}
catch (Exception e) {
String msg = e.getMessage().toLowerCase();
if (msg.indexOf("not in classpath") > -1)
return;
m_RegressionResults = null;
}
if (!succeeded) {
fail("Problem during regression testing: no successful output generated");
}
try {
String diff = reg.diff();
if (diff == null) {
System.err.println("Warning: No reference available, creating.");
} else if (!diff.equals("")) {
fail("Regression test failed. Difference:\n" + diff);
}
}
catch (java.io.IOException ex) {
fail("Problem during regression testing.\n" + ex);
}
}
/**
* tests the listing of the options
*/
public void testListOptions() {
if (m_OptionTester.getOptionHandler() != null) {
if (!m_OptionTester.checkListOptions())
fail("Options cannot be listed via listOptions.");
}
}
/**
* tests the setting of the options
*/
public void testSetOptions() {
if (m_OptionTester.getOptionHandler() != null) {
if (!m_OptionTester.checkSetOptions())
fail("setOptions method failed.");
}
}
/**
* tests whether the default settings are processed correctly
*/
public void testDefaultOptions() {
if (m_OptionTester.getOptionHandler() != null) {
if (!m_OptionTester.checkDefaultOptions())
fail("Default options were not processed correctly.");
}
}
/**
* tests whether there are any remaining options
*/
public void testRemainingOptions() {
if (m_OptionTester.getOptionHandler() != null) {
if (!m_OptionTester.checkRemainingOptions())
fail("There were 'left-over' options.");
}
}
/**
* tests the whether the user-supplied options stay the same after setting.
* getting, and re-setting again.
*
* @see #getOptionTester()
*/
public void testCanonicalUserOptions() {
if (m_OptionTester.getOptionHandler() != null) {
if (!m_OptionTester.checkCanonicalUserOptions())
fail("setOptions method failed");
}
}
/**
* tests the resetting of the options to the default ones
*/
public void testResettingOptions() {
if (m_OptionTester.getOptionHandler() != null) {
if (!m_OptionTester.checkSetOptions())
fail("Resetting of options failed");
}
}
/**
* tests for a globalInfo method
*/
public void testGlobalInfo() {
if (!m_GOETester.checkGlobalInfo())
fail("No globalInfo method");
}
/**
* tests the tool tips
*/
public void testToolTips() {
if (!m_GOETester.checkToolTips())
fail("Tool tips inconsistent");
}
}