/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * MIWrapper.java * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand * */ package weka.classifiers.mi; import weka.classifiers.SingleClassifierEnhancer; import weka.core.Capabilities; import weka.core.Instance; import weka.core.Instances; import weka.core.MultiInstanceCapabilitiesHandler; import weka.core.Option; import weka.core.OptionHandler; import weka.core.RevisionUtils; import weka.core.SelectedTag; import weka.core.Tag; import weka.core.TechnicalInformation; import weka.core.TechnicalInformationHandler; import weka.core.Utils; import weka.core.Capabilities.Capability; import weka.core.TechnicalInformation.Field; import weka.core.TechnicalInformation.Type; import weka.filters.Filter; import weka.filters.unsupervised.attribute.MultiInstanceToPropositional; import java.util.Enumeration; import java.util.Vector; /** * A simple Wrapper method for applying standard propositional learners to multi-instance data.
*
* For more information see:
*
* E. T. Frank, X. Xu (2003). Applying propositional learning algorithms to multi-instance data. Department of Computer Science, University of Waikato, Hamilton, NZ. *

* * BibTeX: *

 * @techreport{Frank2003,
 *    address = {Department of Computer Science, University of Waikato, Hamilton, NZ},
 *    author = {E. T. Frank and X. Xu},
 *    institution = {University of Waikato},
 *    month = {06},
 *    title = {Applying propositional learning algorithms to multi-instance data},
 *    year = {2003}
 * }
 * 
*

* * Valid options are:

* *

 -P [1|2|3]
 *  The method used in testing:
 *  1.arithmetic average
 *  2.geometric average
 *  3.max probability of positive bag.
 *  (default: 1)
* *
 -A [0|1|2|3]
 *  The type of weight setting for each single-instance:
 *  0.keep the weight to be the same as the original value;
 *  1.weight = 1.0
 *  2.weight = 1.0/Total number of single-instance in the
 *   corresponding bag
 *  3. weight = Total number of single-instance / (Total
 *   number of bags * Total number of single-instance 
 *   in the corresponding bag).
 *  (default: 3)
* *
 -D
 *  If set, classifier is run in debug mode and
 *  may output additional info to the console
* *
 -W
 *  Full name of base classifier.
 *  (default: weka.classifiers.rules.ZeroR)
* *
 
 * Options specific to classifier weka.classifiers.rules.ZeroR:
 * 
* *
 -D
 *  If set, classifier is run in debug mode and
 *  may output additional info to the console
* * * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @author Xin Xu (xx5@cs.waikato.ac.nz) * @version $Revision: 1.5 $ */ public class MIWrapper extends SingleClassifierEnhancer implements MultiInstanceCapabilitiesHandler, OptionHandler, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = -7707766152904315910L; /** The number of the class labels */ protected int m_NumClasses; /** arithmetic average */ public static final int TESTMETHOD_ARITHMETIC = 1; /** geometric average */ public static final int TESTMETHOD_GEOMETRIC = 2; /** max probability of positive bag */ public static final int TESTMETHOD_MAXPROB = 3; /** the test methods */ public static final Tag[] TAGS_TESTMETHOD = { new Tag(TESTMETHOD_ARITHMETIC, "arithmetic average"), new Tag(TESTMETHOD_GEOMETRIC, "geometric average"), new Tag(TESTMETHOD_MAXPROB, "max probability of positive bag") }; /** the test method */ protected int m_Method = TESTMETHOD_GEOMETRIC; /** Filter used to convert MI dataset into single-instance dataset */ protected MultiInstanceToPropositional m_ConvertToProp = new MultiInstanceToPropositional(); /** the single-instance weight setting method */ protected int m_WeightMethod = MultiInstanceToPropositional.WEIGHTMETHOD_INVERSE2; /** * Returns a string describing this filter * * @return a description of the filter suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "A simple Wrapper method for applying standard propositional learners " + "to multi-instance data.\n\n" + "For more information see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.TECHREPORT); result.setValue(Field.AUTHOR, "E. T. Frank and X. Xu"); result.setValue(Field.TITLE, "Applying propositional learning algorithms to multi-instance data"); result.setValue(Field.YEAR, "2003"); result.setValue(Field.MONTH, "06"); result.setValue(Field.INSTITUTION, "University of Waikato"); result.setValue(Field.ADDRESS, "Department of Computer Science, University of Waikato, Hamilton, NZ"); return result; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector result = new Vector(); result.addElement(new Option( "\tThe method used in testing:\n" + "\t1.arithmetic average\n" + "\t2.geometric average\n" + "\t3.max probability of positive bag.\n" + "\t(default: 1)", "P", 1, "-P [1|2|3]")); result.addElement(new Option( "\tThe type of weight setting for each single-instance:\n" + "\t0.keep the weight to be the same as the original value;\n" + "\t1.weight = 1.0\n" + "\t2.weight = 1.0/Total number of single-instance in the\n" + "\t\tcorresponding bag\n" + "\t3. weight = Total number of single-instance / (Total\n" + "\t\tnumber of bags * Total number of single-instance \n" + "\t\tin the corresponding bag).\n" + "\t(default: 3)", "A", 1, "-A [0|1|2|3]")); Enumeration enu = super.listOptions(); while (enu.hasMoreElements()) { result.addElement(enu.nextElement()); } return result.elements(); } /** * Parses a given list of options.

* * Valid options are:

* *

 -P [1|2|3]
   *  The method used in testing:
   *  1.arithmetic average
   *  2.geometric average
   *  3.max probability of positive bag.
   *  (default: 1)
* *
 -A [0|1|2|3]
   *  The type of weight setting for each single-instance:
   *  0.keep the weight to be the same as the original value;
   *  1.weight = 1.0
   *  2.weight = 1.0/Total number of single-instance in the
   *   corresponding bag
   *  3. weight = Total number of single-instance / (Total
   *   number of bags * Total number of single-instance 
   *   in the corresponding bag).
   *  (default: 3)
* *
 -D
   *  If set, classifier is run in debug mode and
   *  may output additional info to the console
* *
 -W
   *  Full name of base classifier.
   *  (default: weka.classifiers.rules.ZeroR)
* *
 
   * Options specific to classifier weka.classifiers.rules.ZeroR:
   * 
* *
 -D
   *  If set, classifier is run in debug mode and
   *  may output additional info to the console
* * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { setDebug(Utils.getFlag('D', options)); String methodString = Utils.getOption('P', options); if (methodString.length() != 0) { setMethod( new SelectedTag(Integer.parseInt(methodString), TAGS_TESTMETHOD)); } else { setMethod( new SelectedTag(TESTMETHOD_ARITHMETIC, TAGS_TESTMETHOD)); } String weightString = Utils.getOption('A', options); if (weightString.length() != 0) { setWeightMethod( new SelectedTag( Integer.parseInt(weightString), MultiInstanceToPropositional.TAGS_WEIGHTMETHOD)); } else { setWeightMethod( new SelectedTag( MultiInstanceToPropositional.WEIGHTMETHOD_INVERSE2, MultiInstanceToPropositional.TAGS_WEIGHTMETHOD)); } super.setOptions(options); } /** * Gets the current settings of the Classifier. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { Vector result; String[] options; int i; result = new Vector(); result.add("-P"); result.add("" + m_Method); result.add("-A"); result.add("" + m_WeightMethod); options = super.getOptions(); for (i = 0; i < options.length; i++) result.add(options[i]); return (String[]) result.toArray(new String[result.size()]); } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String weightMethodTipText() { return "The method used for weighting the instances."; } /** * The new method for weighting the instances. * * @param method the new method */ public void setWeightMethod(SelectedTag method){ if (method.getTags() == MultiInstanceToPropositional.TAGS_WEIGHTMETHOD) m_WeightMethod = method.getSelectedTag().getID(); } /** * Returns the current weighting method for instances. * * @return the current weighting method */ public SelectedTag getWeightMethod(){ return new SelectedTag( m_WeightMethod, MultiInstanceToPropositional.TAGS_WEIGHTMETHOD); } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String methodTipText() { return "The method used for testing."; } /** * Set the method used in testing. * * @param method the index of method to use. */ public void setMethod(SelectedTag method) { if (method.getTags() == TAGS_TESTMETHOD) m_Method = method.getSelectedTag().getID(); } /** * Get the method used in testing. * * @return the index of method used in testing. */ public SelectedTag getMethod() { return new SelectedTag(m_Method, TAGS_TESTMETHOD); } /** * Returns default capabilities of the classifier. * * @return the capabilities of this classifier */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); // class result.disableAllClasses(); result.disableAllClassDependencies(); if (super.getCapabilities().handles(Capability.NOMINAL_CLASS)) result.enable(Capability.NOMINAL_CLASS); if (super.getCapabilities().handles(Capability.BINARY_CLASS)) result.enable(Capability.BINARY_CLASS); result.enable(Capability.RELATIONAL_ATTRIBUTES); result.enable(Capability.MISSING_CLASS_VALUES); // other result.enable(Capability.ONLY_MULTIINSTANCE); return result; } /** * Returns the capabilities of this multi-instance classifier for the * relational data. * * @return the capabilities of this object * @see Capabilities */ public Capabilities getMultiInstanceCapabilities() { Capabilities result = super.getCapabilities(); // class result.disableAllClasses(); result.enable(Capability.NO_CLASS); return result; } /** * Builds the classifier * * @param data the training data to be used for generating the * boosted classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class Instances train = new Instances(data); train.deleteWithMissingClass(); if (m_Classifier == null) { throw new Exception("A base classifier has not been specified!"); } if (getDebug()) System.out.println("Start training ..."); m_NumClasses = train.numClasses(); //convert the training dataset into single-instance dataset m_ConvertToProp.setWeightMethod(getWeightMethod()); m_ConvertToProp.setInputFormat(train); train = Filter.useFilter(train, m_ConvertToProp); train.deleteAttributeAt(0); // remove the bag index attribute m_Classifier.buildClassifier(train); } /** * Computes the distribution for a given exemplar * * @param exmp the exemplar for which distribution is computed * @return the distribution * @throws Exception if the distribution can't be computed successfully */ public double[] distributionForInstance(Instance exmp) throws Exception { Instances testData = new Instances (exmp.dataset(),0); testData.add(exmp); // convert the training dataset into single-instance dataset m_ConvertToProp.setWeightMethod( new SelectedTag( MultiInstanceToPropositional.WEIGHTMETHOD_ORIGINAL, MultiInstanceToPropositional.TAGS_WEIGHTMETHOD)); testData = Filter.useFilter(testData, m_ConvertToProp); testData.deleteAttributeAt(0); //remove the bag index attribute // Compute the log-probability of the bag double [] distribution = new double[m_NumClasses]; double nI = (double)testData.numInstances(); double [] maxPr = new double [m_NumClasses]; for(int i=0; i0.999) dist[j] = 0.999; distribution[j] += Math.log(dist[j])/nI; break; case TESTMETHOD_MAXPROB: if (dist[j]>maxPr[j]) maxPr[j] = dist[j]; break; } } } if(m_Method == TESTMETHOD_GEOMETRIC) for(int j=0; j