/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* MIWrapper.java
* Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
*
*/
package weka.classifiers.mi;
import weka.classifiers.SingleClassifierEnhancer;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.MultiInstanceCapabilitiesHandler;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.MultiInstanceToPropositional;
import java.util.Enumeration;
import java.util.Vector;
/**
* A simple Wrapper method for applying standard propositional learners to multi-instance data.
*
* For more information see:
*
* E. T. Frank, X. Xu (2003). Applying propositional learning algorithms to multi-instance data. Department of Computer Science, University of Waikato, Hamilton, NZ.
*
* @techreport{Frank2003, * address = {Department of Computer Science, University of Waikato, Hamilton, NZ}, * author = {E. T. Frank and X. Xu}, * institution = {University of Waikato}, * month = {06}, * title = {Applying propositional learning algorithms to multi-instance data}, * year = {2003} * } ** * * Valid options are: * *
-P [1|2|3] * The method used in testing: * 1.arithmetic average * 2.geometric average * 3.max probability of positive bag. * (default: 1)* *
-A [0|1|2|3] * The type of weight setting for each single-instance: * 0.keep the weight to be the same as the original value; * 1.weight = 1.0 * 2.weight = 1.0/Total number of single-instance in the * corresponding bag * 3. weight = Total number of single-instance / (Total * number of bags * Total number of single-instance * in the corresponding bag). * (default: 3)* *
-D * If set, classifier is run in debug mode and * may output additional info to the console* *
-W * Full name of base classifier. * (default: weka.classifiers.rules.ZeroR)* *
* Options specific to classifier weka.classifiers.rules.ZeroR: ** *
-D * If set, classifier is run in debug mode and * may output additional info to the console* * * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @author Xin Xu (xx5@cs.waikato.ac.nz) * @version $Revision: 1.5 $ */ public class MIWrapper extends SingleClassifierEnhancer implements MultiInstanceCapabilitiesHandler, OptionHandler, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = -7707766152904315910L; /** The number of the class labels */ protected int m_NumClasses; /** arithmetic average */ public static final int TESTMETHOD_ARITHMETIC = 1; /** geometric average */ public static final int TESTMETHOD_GEOMETRIC = 2; /** max probability of positive bag */ public static final int TESTMETHOD_MAXPROB = 3; /** the test methods */ public static final Tag[] TAGS_TESTMETHOD = { new Tag(TESTMETHOD_ARITHMETIC, "arithmetic average"), new Tag(TESTMETHOD_GEOMETRIC, "geometric average"), new Tag(TESTMETHOD_MAXPROB, "max probability of positive bag") }; /** the test method */ protected int m_Method = TESTMETHOD_GEOMETRIC; /** Filter used to convert MI dataset into single-instance dataset */ protected MultiInstanceToPropositional m_ConvertToProp = new MultiInstanceToPropositional(); /** the single-instance weight setting method */ protected int m_WeightMethod = MultiInstanceToPropositional.WEIGHTMETHOD_INVERSE2; /** * Returns a string describing this filter * * @return a description of the filter suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "A simple Wrapper method for applying standard propositional learners " + "to multi-instance data.\n\n" + "For more information see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.TECHREPORT); result.setValue(Field.AUTHOR, "E. T. Frank and X. Xu"); result.setValue(Field.TITLE, "Applying propositional learning algorithms to multi-instance data"); result.setValue(Field.YEAR, "2003"); result.setValue(Field.MONTH, "06"); result.setValue(Field.INSTITUTION, "University of Waikato"); result.setValue(Field.ADDRESS, "Department of Computer Science, University of Waikato, Hamilton, NZ"); return result; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector result = new Vector(); result.addElement(new Option( "\tThe method used in testing:\n" + "\t1.arithmetic average\n" + "\t2.geometric average\n" + "\t3.max probability of positive bag.\n" + "\t(default: 1)", "P", 1, "-P [1|2|3]")); result.addElement(new Option( "\tThe type of weight setting for each single-instance:\n" + "\t0.keep the weight to be the same as the original value;\n" + "\t1.weight = 1.0\n" + "\t2.weight = 1.0/Total number of single-instance in the\n" + "\t\tcorresponding bag\n" + "\t3. weight = Total number of single-instance / (Total\n" + "\t\tnumber of bags * Total number of single-instance \n" + "\t\tin the corresponding bag).\n" + "\t(default: 3)", "A", 1, "-A [0|1|2|3]")); Enumeration enu = super.listOptions(); while (enu.hasMoreElements()) { result.addElement(enu.nextElement()); } return result.elements(); } /** * Parses a given list of options. * * Valid options are: * *
-P [1|2|3] * The method used in testing: * 1.arithmetic average * 2.geometric average * 3.max probability of positive bag. * (default: 1)* *
-A [0|1|2|3] * The type of weight setting for each single-instance: * 0.keep the weight to be the same as the original value; * 1.weight = 1.0 * 2.weight = 1.0/Total number of single-instance in the * corresponding bag * 3. weight = Total number of single-instance / (Total * number of bags * Total number of single-instance * in the corresponding bag). * (default: 3)* *
-D * If set, classifier is run in debug mode and * may output additional info to the console* *
-W * Full name of base classifier. * (default: weka.classifiers.rules.ZeroR)* *
* Options specific to classifier weka.classifiers.rules.ZeroR: ** *
-D * If set, classifier is run in debug mode and * may output additional info to the console* * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { setDebug(Utils.getFlag('D', options)); String methodString = Utils.getOption('P', options); if (methodString.length() != 0) { setMethod( new SelectedTag(Integer.parseInt(methodString), TAGS_TESTMETHOD)); } else { setMethod( new SelectedTag(TESTMETHOD_ARITHMETIC, TAGS_TESTMETHOD)); } String weightString = Utils.getOption('A', options); if (weightString.length() != 0) { setWeightMethod( new SelectedTag( Integer.parseInt(weightString), MultiInstanceToPropositional.TAGS_WEIGHTMETHOD)); } else { setWeightMethod( new SelectedTag( MultiInstanceToPropositional.WEIGHTMETHOD_INVERSE2, MultiInstanceToPropositional.TAGS_WEIGHTMETHOD)); } super.setOptions(options); } /** * Gets the current settings of the Classifier. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { Vector result; String[] options; int i; result = new Vector(); result.add("-P"); result.add("" + m_Method); result.add("-A"); result.add("" + m_WeightMethod); options = super.getOptions(); for (i = 0; i < options.length; i++) result.add(options[i]); return (String[]) result.toArray(new String[result.size()]); } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String weightMethodTipText() { return "The method used for weighting the instances."; } /** * The new method for weighting the instances. * * @param method the new method */ public void setWeightMethod(SelectedTag method){ if (method.getTags() == MultiInstanceToPropositional.TAGS_WEIGHTMETHOD) m_WeightMethod = method.getSelectedTag().getID(); } /** * Returns the current weighting method for instances. * * @return the current weighting method */ public SelectedTag getWeightMethod(){ return new SelectedTag( m_WeightMethod, MultiInstanceToPropositional.TAGS_WEIGHTMETHOD); } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String methodTipText() { return "The method used for testing."; } /** * Set the method used in testing. * * @param method the index of method to use. */ public void setMethod(SelectedTag method) { if (method.getTags() == TAGS_TESTMETHOD) m_Method = method.getSelectedTag().getID(); } /** * Get the method used in testing. * * @return the index of method used in testing. */ public SelectedTag getMethod() { return new SelectedTag(m_Method, TAGS_TESTMETHOD); } /** * Returns default capabilities of the classifier. * * @return the capabilities of this classifier */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); // class result.disableAllClasses(); result.disableAllClassDependencies(); if (super.getCapabilities().handles(Capability.NOMINAL_CLASS)) result.enable(Capability.NOMINAL_CLASS); if (super.getCapabilities().handles(Capability.BINARY_CLASS)) result.enable(Capability.BINARY_CLASS); result.enable(Capability.RELATIONAL_ATTRIBUTES); result.enable(Capability.MISSING_CLASS_VALUES); // other result.enable(Capability.ONLY_MULTIINSTANCE); return result; } /** * Returns the capabilities of this multi-instance classifier for the * relational data. * * @return the capabilities of this object * @see Capabilities */ public Capabilities getMultiInstanceCapabilities() { Capabilities result = super.getCapabilities(); // class result.disableAllClasses(); result.enable(Capability.NO_CLASS); return result; } /** * Builds the classifier * * @param data the training data to be used for generating the * boosted classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class Instances train = new Instances(data); train.deleteWithMissingClass(); if (m_Classifier == null) { throw new Exception("A base classifier has not been specified!"); } if (getDebug()) System.out.println("Start training ..."); m_NumClasses = train.numClasses(); //convert the training dataset into single-instance dataset m_ConvertToProp.setWeightMethod(getWeightMethod()); m_ConvertToProp.setInputFormat(train); train = Filter.useFilter(train, m_ConvertToProp); train.deleteAttributeAt(0); // remove the bag index attribute m_Classifier.buildClassifier(train); } /** * Computes the distribution for a given exemplar * * @param exmp the exemplar for which distribution is computed * @return the distribution * @throws Exception if the distribution can't be computed successfully */ public double[] distributionForInstance(Instance exmp) throws Exception { Instances testData = new Instances (exmp.dataset(),0); testData.add(exmp); // convert the training dataset into single-instance dataset m_ConvertToProp.setWeightMethod( new SelectedTag( MultiInstanceToPropositional.WEIGHTMETHOD_ORIGINAL, MultiInstanceToPropositional.TAGS_WEIGHTMETHOD)); testData = Filter.useFilter(testData, m_ConvertToProp); testData.deleteAttributeAt(0); //remove the bag index attribute // Compute the log-probability of the bag double [] distribution = new double[m_NumClasses]; double nI = (double)testData.numInstances(); double [] maxPr = new double [m_NumClasses]; for(int i=0; i