/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* Copyright (C) 2006 University of Waikato
*/
package weka.attributeSelection;
import weka.core.Attribute;
import weka.core.CheckGOE;
import weka.core.CheckOptionHandler;
import weka.core.Instances;
import weka.core.CheckScheme.PostProcessor;
import weka.test.Regression;
import junit.framework.TestCase;
/**
* Abstract Test class for attribute selection schemes. Internally it uses the
* class CheckAttributeSelection
to determine success or failure
* of the tests. It follows basically the testsPerClassType
* method.
*
* @author FracPete (fracpete at waikato dot ac dot nz)
* @version $Revision: 1.5 $
*
* @see CheckAttributeSelection
* @see CheckAttributeSelection#testsPerClassType(int, boolean, boolean)
* @see PostProcessor
*/
public abstract class AbstractAttributeSelectionTest
extends TestCase {
/** The search scheme to be tested */
protected ASSearch m_Search;
/** The evaluator to test */
protected ASEvaluation m_Evaluator;
/** For testing the attribute selection scheme */
protected CheckAttributeSelection m_Tester;
/** whether scheme handles weighted instances */
protected boolean m_weightedInstancesHandler;
/** whether scheme handles multi-instance data */
protected boolean m_multiInstanceHandler;
/** the number of classes to test with testNClasses()
* @see #testNClasses() */
protected int m_NClasses;
/** whether to run CheckAttributeSelection in DEBUG mode */
protected boolean DEBUG = false;
/** the attribute type with the lowest value */
protected final static int FIRST_CLASSTYPE = Attribute.NUMERIC;
/** the attribute type with the highest value */
protected final static int LAST_CLASSTYPE = Attribute.RELATIONAL;
/** wether scheme can predict nominal attributes (array index is attribute type of class) */
protected boolean[] m_NominalPredictors;
/** wether scheme can predict numeric attributes (array index is attribute type of class) */
protected boolean[] m_NumericPredictors;
/** wether scheme can predict string attributes (array index is attribute type of class) */
protected boolean[] m_StringPredictors;
/** wether scheme can predict date attributes (array index is attribute type of class) */
protected boolean[] m_DatePredictors;
/** wether scheme can predict relational attributes (array index is attribute type of class) */
protected boolean[] m_RelationalPredictors;
/** whether scheme handles missing values */
protected boolean[] m_handleMissingPredictors;
/** whether scheme handles class with only missing values */
protected boolean[] m_handleMissingClass;
/** whether scheme handles class as first attribute */
protected boolean[] m_handleClassAsFirstAttribute;
/** whether scheme handles class as second attribute */
protected boolean[] m_handleClassAsSecondAttribute;
/** the results of the regression tests */
protected String[] m_RegressionResults;
/** the OptionHandler tester */
protected CheckOptionHandler m_OptionTester;
/** for testing GOE stuff */
protected CheckGOE m_GOETester;
/**
* Constructs the AbstractAttributeSelectionTest
. Called by subclasses.
*
* @param name the name of the test class
*/
public AbstractAttributeSelectionTest(String name) {
super(name);
}
/**
* returns a custom PostProcessor for the CheckAttributeSelection datasets,
* currently only null.
*
* @return a custom PostProcessor, if necessary
* @see PostProcessor
*/
protected PostProcessor getPostProcessor() {
return null;
}
/**
* configures the CheckAttributeSelection instance used throughout the tests
*
* @return the fully configured CheckAttributeSelection instance used for testing
*/
protected CheckAttributeSelection getTester() {
CheckAttributeSelection result;
result = new CheckAttributeSelection();
result.setSilent(true);
result.setSearch(m_Search);
result.setEvaluator(m_Evaluator);
result.setNumInstances(20);
result.setDebug(DEBUG);
result.setPostProcessor(getPostProcessor());
return result;
}
/**
* Configures the CheckOptionHandler uses for testing the optionhandling.
* Sets the scheme to test.
*
* @return the fully configured CheckOptionHandler
*/
protected CheckOptionHandler getOptionTester() {
CheckOptionHandler result;
result = new CheckOptionHandler();
result.setOptionHandler(null);
result.setUserOptions(new String[0]);
result.setSilent(true);
return result;
}
/**
* Configures the CheckGOE used for testing GOE stuff.
*
* @return the fully configured CheckGOE
*/
protected CheckGOE getGOETester() {
CheckGOE result;
result = new CheckGOE();
result.setObject(null);
result.setSilent(true);
return result;
}
/**
* Called by JUnit before each test method. This implementation creates
* the default scheme to test and loads a test set of Instances.
*
* @exception Exception if an error occurs reading the example instances.
*/
protected void setUp() throws Exception {
m_Search = getSearch();
m_Evaluator = getEvaluator();
m_Tester = getTester();
m_OptionTester = getOptionTester();
m_GOETester = getGOETester();
m_weightedInstancesHandler = m_Tester.weightedInstancesHandler()[0];
m_multiInstanceHandler = m_Tester.multiInstanceHandler()[0];
m_NominalPredictors = new boolean[LAST_CLASSTYPE + 1];
m_NumericPredictors = new boolean[LAST_CLASSTYPE + 1];
m_StringPredictors = new boolean[LAST_CLASSTYPE + 1];
m_DatePredictors = new boolean[LAST_CLASSTYPE + 1];
m_RelationalPredictors = new boolean[LAST_CLASSTYPE + 1];
m_handleMissingPredictors = new boolean[LAST_CLASSTYPE + 1];
m_handleMissingClass = new boolean[LAST_CLASSTYPE + 1];
m_handleClassAsFirstAttribute = new boolean[LAST_CLASSTYPE + 1];
m_handleClassAsSecondAttribute = new boolean[LAST_CLASSTYPE + 1];
m_RegressionResults = new String[LAST_CLASSTYPE + 1];
m_NClasses = 4;
// initialize attributes
checkAttributes(true, false, false, false, false, false);
checkAttributes(false, true, false, false, false, false);
checkAttributes(false, false, true, false, false, false);
checkAttributes(false, false, false, true, false, false);
checkAttributes(false, false, false, false, true, false);
// initialize missing values handling
for (int i = FIRST_CLASSTYPE; i <= LAST_CLASSTYPE; i++) {
// does the scheme support this type of class at all?
if (!canPredict(i))
continue;
// 20% missing
m_handleMissingPredictors[i] = checkMissingPredictors(i, 20, false);
m_handleMissingClass[i] = checkMissingClass(i, 20, false);
}
}
/** Called by JUnit after each test method */
protected void tearDown() {
m_Search = null;
m_Evaluator = null;
m_Tester = null;
m_OptionTester = null;
m_GOETester = null;
m_weightedInstancesHandler = false;
m_NominalPredictors = new boolean[LAST_CLASSTYPE + 1];
m_NumericPredictors = new boolean[LAST_CLASSTYPE + 1];
m_StringPredictors = new boolean[LAST_CLASSTYPE + 1];
m_DatePredictors = new boolean[LAST_CLASSTYPE + 1];
m_RelationalPredictors = new boolean[LAST_CLASSTYPE + 1];
m_handleMissingPredictors = new boolean[LAST_CLASSTYPE + 1];
m_handleMissingClass = new boolean[LAST_CLASSTYPE + 1];
m_handleClassAsFirstAttribute = new boolean[LAST_CLASSTYPE + 1];
m_handleClassAsSecondAttribute = new boolean[LAST_CLASSTYPE + 1];
m_RegressionResults = new String[LAST_CLASSTYPE + 1];
m_NClasses = 4;
}
/**
* Used to create an instance of a specific search scheme.
*
* @return a suitably configured ASSearch
value
*/
public abstract ASSearch getSearch();
/**
* Used to create an instance of a specific evaluator.
*
* @return a suitably configured ASEvaluation
value
*/
public abstract ASEvaluation getEvaluator();
/**
* checks whether at least one attribute type can be handled with the
* given class type
*
* @param type the class type to check for
* @return true if at least one attribute type can be predicted with
* the given class
*/
protected boolean canPredict(int type) {
return m_NominalPredictors[type]
|| m_NumericPredictors[type]
|| m_StringPredictors[type]
|| m_DatePredictors[type]
|| m_RelationalPredictors[type];
}
/**
* returns a string for the class type
*
* @param type the class type
* @return the class type as string
*/
protected String getClassTypeString(int type) {
return CheckAttributeSelection.attributeTypeToString(type);
}
/**
* tests whether the scheme can handle certain attributes and if not,
* if the exception is OK
*
* @param nom to check for nominal attributes
* @param num to check for numeric attributes
* @param str to check for string attributes
* @param dat to check for date attributes
* @param rel to check for relational attributes
* @param allowFail whether a junit fail can be executed
* @see CheckAttributeSelection#canPredict(boolean, boolean, boolean, boolean, boolean, boolean, int)
* @see CheckAttributeSelection#testsPerClassType(int, boolean, boolean)
*/
protected void checkAttributes(boolean nom, boolean num, boolean str,
boolean dat, boolean rel,
boolean allowFail) {
boolean[] result;
String att;
int i;
// determine text for type of attributes
att = "";
if (nom)
att = "nominal";
else if (num)
att = "numeric";
else if (str)
att = "string";
else if (dat)
att = "date";
else if (rel)
att = "relational";
for (i = FIRST_CLASSTYPE; i <= LAST_CLASSTYPE; i++) {
result = m_Tester.canPredict(nom, num, str, dat, rel, m_multiInstanceHandler, i);
if (nom)
m_NominalPredictors[i] = result[0];
else if (num)
m_NumericPredictors[i] = result[0];
else if (str)
m_StringPredictors[i] = result[0];
else if (dat)
m_DatePredictors[i] = result[0];
else if (rel)
m_RelationalPredictors[i] = result[0];
if (!result[0] && !result[1] && allowFail)
fail("Error handling " + att + " attributes (" + getClassTypeString(i)
+ " class)!");
}
}
/**
* tests whether the scheme can handle different types of attributes and
* if not, if the exception is OK
*
* @see #checkAttributes(boolean, boolean, boolean, boolean, boolean, boolean)
*/
public void testAttributes() {
// nominal
checkAttributes(true, false, false, false, false, true);
// numeric
checkAttributes(false, true, false, false, false, true);
// string
checkAttributes(false, false, true, false, false, true);
// date
checkAttributes(false, false, false, true, false, true);
// relational
if (!m_multiInstanceHandler)
checkAttributes(false, false, false, false, true, true);
}
/**
* tests whether the scheme declares a serialVersionUID.
*/
public void testSerialVersionUID() {
boolean[] result;
result = m_Tester.declaresSerialVersionUID();
if (!result[0])
fail("Doesn't declare serialVersionUID!");
}
/**
* tests whether the scheme handles instance weights correctly
*
* @see CheckAttributeSelection#instanceWeights(boolean, boolean, boolean, boolean, boolean, boolean, int)
* @see CheckAttributeSelection#testsPerClassType(int, boolean, boolean)
*/
public void testInstanceWeights() {
boolean[] result;
int i;
if (m_weightedInstancesHandler) {
for (i = FIRST_CLASSTYPE; i <= LAST_CLASSTYPE; i++) {
// does the scheme support this type of class at all?
if (!canPredict(i))
continue;
result = m_Tester.instanceWeights(
m_NominalPredictors[i],
m_NumericPredictors[i],
m_StringPredictors[i],
m_DatePredictors[i],
m_RelationalPredictors[i],
m_multiInstanceHandler,
i);
if (!result[0])
System.err.println("Error handling instance weights (" + getClassTypeString(i)
+ " class)!");
}
}
}
/**
* tests whether scheme handles N classes
*
* @see CheckAttributeSelection#canHandleNClasses(boolean, boolean, boolean, boolean, boolean, boolean, int)
* @see CheckAttributeSelection#testsPerClassType(int, boolean, boolean)
* @see #m_NClasses
*/
public void testNClasses() {
boolean[] result;
if (!canPredict(Attribute.NOMINAL))
return;
result = m_Tester.canHandleNClasses(
m_NominalPredictors[Attribute.NOMINAL],
m_NumericPredictors[Attribute.NOMINAL],
m_StringPredictors[Attribute.NOMINAL],
m_DatePredictors[Attribute.NOMINAL],
m_RelationalPredictors[Attribute.NOMINAL],
m_multiInstanceHandler,
m_NClasses);
if (!result[0] && !result[1])
fail("Error handling " + m_NClasses + " classes!");
}
/**
* checks whether the scheme can handle the class attribute at a given
* position (0-based index, -1 means last).
*
* @param type the class type
* @param position the position of the class attribute (0-based, -1 means last)
* @return true if the scheme can handle it
*/
protected boolean checkClassAsNthAttribute(int type, int position) {
boolean[] result;
String indexStr;
result = m_Tester.canHandleClassAsNthAttribute(
m_NominalPredictors[type],
m_NumericPredictors[type],
m_StringPredictors[type],
m_DatePredictors[type],
m_RelationalPredictors[type],
m_multiInstanceHandler,
type,
position);
if (position == -1)
indexStr = "last";
else
indexStr = (position + 1) + ".";
if (!result[0] && !result[1])
fail("Error handling class as " + indexStr + " attribute ("
+ getClassTypeString(type) + " class)!");
return result[0];
}
/**
* Tests whether the scheme can handle class attributes as Nth
* attribute. In case of multi-instance handler it performs no tests,
* since the multi-instance data has a fixed format (bagID,bag,class).
*
* @see CheckAttributeSelection#canHandleClassAsNthAttribute(boolean, boolean, boolean, boolean, boolean, boolean, int, int)
* @see CheckAttributeSelection#testsPerClassType(int, boolean, boolean)
*/
public void testClassAsNthAttribute() {
int i;
// multi-Instance data has fixed format!
if (m_multiInstanceHandler)
return;
for (i = FIRST_CLASSTYPE; i <= LAST_CLASSTYPE; i++) {
// does the scheme support this type of class at all?
if (!canPredict(i))
continue;
// first attribute
m_handleClassAsFirstAttribute[i] = checkClassAsNthAttribute(i, 0);
// second attribute
m_handleClassAsSecondAttribute[i] = checkClassAsNthAttribute(i, 1);
}
}
/**
* tests whether the scheme can handle zero training instances
*
* @see CheckAttributeSelection#canHandleZeroTraining(boolean, boolean, boolean, boolean, boolean, boolean, int)
* @see CheckAttributeSelection#testsPerClassType(int, boolean, boolean)
*/
public void testZeroTraining() {
boolean[] result;
int i;
for (i = FIRST_CLASSTYPE; i <= LAST_CLASSTYPE; i++) {
// does the scheme support this type of class at all?
if (!canPredict(i))
continue;
result = m_Tester.canHandleZeroTraining(
m_NominalPredictors[i],
m_NumericPredictors[i],
m_StringPredictors[i],
m_DatePredictors[i],
m_RelationalPredictors[i],
m_multiInstanceHandler,
i);
if (!result[0] && !result[1])
fail("Error handling zero training instances (" + getClassTypeString(i)
+ " class)!");
}
}
/**
* checks whether the scheme can handle the given percentage of
* missing predictors
*
* @param type the class type
* @param percent the percentage of missing predictors
* @param allowFail if true a fail statement may be executed
* @return true if the scheme can handle it
*/
protected boolean checkMissingPredictors(int type, int percent, boolean allowFail) {
boolean[] result;
result = m_Tester.canHandleMissing(
m_NominalPredictors[type],
m_NumericPredictors[type],
m_StringPredictors[type],
m_DatePredictors[type],
m_RelationalPredictors[type],
m_multiInstanceHandler,
type,
true,
false,
percent);
if (allowFail) {
if (!result[0] && !result[1])
fail("Error handling " + percent + "% missing predictors ("
+ getClassTypeString(type) + " class)!");
}
return result[0];
}
/**
* tests whether the scheme can handle missing predictors (20% and 100%)
*
* @see CheckAttributeSelection#canHandleMissing(boolean, boolean, boolean, boolean, boolean, boolean, int, boolean, boolean, int)
* @see CheckAttributeSelection#testsPerClassType(int, boolean, boolean)
*/
public void testMissingPredictors() {
int i;
for (i = FIRST_CLASSTYPE; i <= LAST_CLASSTYPE; i++) {
// does the scheme support this type of class at all?
if (!canPredict(i))
continue;
// 20% missing
checkMissingPredictors(i, 20, true);
// 100% missing
if (m_handleMissingPredictors[i])
checkMissingPredictors(i, 100, true);
}
}
/**
* checks whether the scheme can handle the given percentage of
* missing class labels
*
* @param type the class type
* @param percent the percentage of missing class labels
* @param allowFail if true a fail statement may be executed
* @return true if the scheme can handle it
*/
protected boolean checkMissingClass(int type, int percent, boolean allowFail) {
boolean[] result;
result = m_Tester.canHandleMissing(
m_NominalPredictors[type],
m_NumericPredictors[type],
m_StringPredictors[type],
m_DatePredictors[type],
m_RelationalPredictors[type],
m_multiInstanceHandler,
type,
false,
true,
percent);
if (allowFail) {
if (!result[0] && !result[1])
fail("Error handling " + percent + "% missing class labels ("
+ getClassTypeString(type) + " class)!");
}
return result[0];
}
/**
* tests whether the scheme can handle missing class values (20% and
* 100%)
*
* @see CheckAttributeSelection#canHandleMissing(boolean, boolean, boolean, boolean, boolean, boolean, int, boolean, boolean, int)
* @see CheckAttributeSelection#testsPerClassType(int, boolean, boolean)
*/
public void testMissingClass() {
int i;
for (i = FIRST_CLASSTYPE; i <= LAST_CLASSTYPE; i++) {
// does the scheme support this type of class at all?
if (!canPredict(i))
continue;
// 20% missing
checkMissingClass(i, 20, true);
// 100% missing
if (m_handleMissingClass[i])
checkMissingClass(i, 100, true);
}
}
/**
* tests whether the scheme correctly initializes in the
* search method
*
* @see CheckAttributeSelection#correctSearchInitialisation(boolean, boolean, boolean, boolean, boolean, boolean, int)
* @see CheckAttributeSelection#testsPerClassType(int, boolean, boolean)
*/
public void testBuildInitialization() {
boolean[] result;
int i;
for (i = FIRST_CLASSTYPE; i <= LAST_CLASSTYPE; i++) {
// does the scheme support this type of class at all?
if (!canPredict(i))
continue;
result = m_Tester.correctSearchInitialisation(
m_NominalPredictors[i],
m_NumericPredictors[i],
m_StringPredictors[i],
m_DatePredictors[i],
m_RelationalPredictors[i],
m_multiInstanceHandler,
i);
if (!result[0] && !result[1])
fail("Incorrect build initialization (" + getClassTypeString(i)
+ " class)!");
}
}
/**
* tests whether the scheme alters the training set during training.
*
* @see CheckAttributeSelection#datasetIntegrity(boolean, boolean, boolean, boolean, boolean, boolean, int, boolean, boolean)
* @see CheckAttributeSelection#testsPerClassType(int, boolean, boolean)
*/
public void testDatasetIntegrity() {
boolean[] result;
int i;
for (i = FIRST_CLASSTYPE; i <= LAST_CLASSTYPE; i++) {
// does the scheme support this type of class at all?
if (!canPredict(i))
continue;
result = m_Tester.datasetIntegrity(
m_NominalPredictors[i],
m_NumericPredictors[i],
m_StringPredictors[i],
m_DatePredictors[i],
m_RelationalPredictors[i],
m_multiInstanceHandler,
i,
m_handleMissingPredictors[i],
m_handleMissingClass[i]);
if (!result[0] && !result[1])
fail("Dataset is altered during training ("
+ getClassTypeString(i) + " class)!");
}
}
/**
* Builds a model using the current scheme using the given data.
*
* @param data the instances to test the selection scheme on
* @return a string containing the results.
*/
protected String useScheme(Instances data) throws Exception {
AttributeSelection attsel = null;
try {
attsel = new AttributeSelection();
attsel.setSearch(m_Search);
attsel.setEvaluator(m_Evaluator);
attsel.setSeed(42);
}
catch (Exception e) {
e.printStackTrace();
fail("Problem setting up attribute selection: " + e);
}
attsel.SelectAttributes(data);
return attsel.toResultsString();
}
/**
* Provides a hook for derived classes to further modify the data. Currently,
* the data is just passed through.
*
* @param data the data to process
* @return the processed data
*/
protected Instances process(Instances data) {
return data;
}
/**
* Runs a regression test -- this checks that the output of the tested
* object matches that in a reference version. When this test is
* run without any pre-existing reference output, the reference version
* is created.
*/
public void testRegression() throws Exception {
int i;
boolean succeeded;
Regression reg;
Instances train;
// don't bother if not working correctly
if (m_Tester.hasClasspathProblems())
return;
reg = new Regression(this.getClass());
succeeded = false;
train = null;
for (i = FIRST_CLASSTYPE; i <= LAST_CLASSTYPE; i++) {
// does the scheme support this type of class at all?
if (!canPredict(i))
continue;
train = m_Tester.makeTestDataset(
42, m_Tester.getNumInstances(),
m_NominalPredictors[i] ? 2 : 0,
m_NumericPredictors[i] ? 1 : 0,
m_StringPredictors[i] ? 1 : 0,
m_DatePredictors[i] ? 1 : 0,
m_RelationalPredictors[i] ? 1 : 0,
2,
i,
m_multiInstanceHandler);
try {
m_RegressionResults[i] = useScheme(train);
succeeded = true;
reg.println(m_RegressionResults[i]);
}
catch (Exception e) {
String msg = e.getMessage().toLowerCase();
if (msg.indexOf("not in classpath") > -1)
return;
m_RegressionResults[i] = null;
}
}
if (!succeeded) {
fail("Problem during regression testing: no successful predictions for any class type");
}
try {
String diff = reg.diff();
if (diff == null) {
System.err.println("Warning: No reference available, creating.");
} else if (!diff.equals("")) {
fail("Regression test failed. Difference:\n" + diff);
}
}
catch (java.io.IOException ex) {
fail("Problem during regression testing.\n" + ex);
}
}
/**
* tests the listing of the options
*/
public void testListOptions() {
if (m_OptionTester.getOptionHandler() != null) {
if (!m_OptionTester.checkListOptions())
fail("Options cannot be listed via listOptions.");
}
}
/**
* tests the setting of the options
*/
public void testSetOptions() {
if (m_OptionTester.getOptionHandler() != null) {
if (!m_OptionTester.checkSetOptions())
fail("setOptions method failed.");
}
}
/**
* tests whether the default settings are processed correctly
*/
public void testDefaultOptions() {
if (m_OptionTester.getOptionHandler() != null) {
if (!m_OptionTester.checkDefaultOptions())
fail("Default options were not processed correctly.");
}
}
/**
* tests whether there are any remaining options
*/
public void testRemainingOptions() {
if (m_OptionTester.getOptionHandler() != null) {
if (!m_OptionTester.checkRemainingOptions())
fail("There were 'left-over' options.");
}
}
/**
* tests the whether the user-supplied options stay the same after setting.
* getting, and re-setting again.
*
* @see #getOptionTester()
*/
public void testCanonicalUserOptions() {
if (m_OptionTester.getOptionHandler() != null) {
if (!m_OptionTester.checkCanonicalUserOptions())
fail("setOptions method failed");
}
}
/**
* tests the resetting of the options to the default ones
*/
public void testResettingOptions() {
if (m_OptionTester.getOptionHandler() != null) {
if (!m_OptionTester.checkSetOptions())
fail("Resetting of options failed");
}
}
/**
* tests for a globalInfo method
*/
public void testGlobalInfo() {
if (!m_GOETester.checkGlobalInfo())
fail("No globalInfo method");
}
/**
* tests the tool tips
*/
public void testToolTips() {
if (!m_GOETester.checkToolTips())
fail("Tool tips inconsistent");
}
}