/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* ExhaustiveSearch.java
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
*
*/
package weka.attributeSelection;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import java.math.BigInteger;
import java.util.BitSet;
import java.util.Enumeration;
import java.util.Vector;
/**
* ExhaustiveSearch :
*
* Performs an exhaustive search through the space of attribute subsets starting from the empty set of attrubutes. Reports the best subset found.
*
-V * Output subsets as the search progresses. * (default = false).* * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision: 1.15 $ */ public class ExhaustiveSearch extends ASSearch implements OptionHandler { /** for serialization */ static final long serialVersionUID = 5741842861142379712L; /** the best feature set found during the search */ private BitSet m_bestGroup; /** the merit of the best subset found */ private double m_bestMerit; /** does the data have a class */ private boolean m_hasClass; /** holds the class index */ private int m_classIndex; /** number of attributes in the data */ private int m_numAttribs; /** if true, then ouput new best subsets as the search progresses */ private boolean m_verbose; /** the number of subsets evaluated during the search */ private int m_evaluations; /** * Returns a string describing this search method * @return a description of the search suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "ExhaustiveSearch : \n\nPerforms an exhaustive search through " +"the space of attribute subsets starting from the empty set of " +"attrubutes. Reports the best subset found."; } /** * Constructor */ public ExhaustiveSearch () { resetOptions(); } /** * Returns an enumeration describing the available options. * @return an enumeration of all the available options. **/ public Enumeration listOptions () { Vector newVector = new Vector(2); newVector.addElement(new Option("\tOutput subsets as the search progresses." +"\n\t(default = false)." , "V", 0 , "-V")); return newVector.elements(); } /** * Parses a given list of options. * * Valid options are: * *
-V * Output subsets as the search progresses. * (default = false).* * * @param options the list of options as an array of strings * @throws Exception if an option is not supported * **/ public void setOptions (String[] options) throws Exception { resetOptions(); setVerbose(Utils.getFlag('V',options)); } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String verboseTipText() { return "Print progress information. Sends progress info to the terminal " +"as the search progresses."; } /** * set whether or not to output new best subsets as the search proceeds * @param v true if output is to be verbose */ public void setVerbose(boolean v) { m_verbose = v; } /** * get whether or not output is verbose * @return true if output is set to verbose */ public boolean getVerbose() { return m_verbose; } /** * Gets the current settings of RandomSearch. * @return an array of strings suitable for passing to setOptions() */ public String[] getOptions () { String[] options = new String[1]; int current = 0; if (m_verbose) { options[current++] = "-V"; } while (current < options.length) { options[current++] = ""; } return options; } /** * prints a description of the search * @return a description of the search as a string */ public String toString() { StringBuffer text = new StringBuffer(); text.append("\tExhaustive Search.\n\tStart set: "); text.append("no attributes\n"); text.append("\tNumber of evaluations: "+m_evaluations+"\n"); text.append("\tMerit of best subset found: " +Utils.doubleToString(Math.abs(m_bestMerit),8,3)+"\n"); return text.toString(); } /** * Searches the attribute subset space using an exhaustive search. * * @param ASEval the attribute evaluator to guide the search * @param data the training instances. * @return an array (not necessarily ordered) of selected attribute indexes * @throws Exception if the search can't be completed */ public int[] search (ASEvaluation ASEval, Instances data) throws Exception { double best_merit; double tempMerit; boolean done = false; int sizeOfBest; int tempSize; BigInteger space = BigInteger.ZERO; m_evaluations = 0; m_numAttribs = data.numAttributes(); m_bestGroup = new BitSet(m_numAttribs); if (!(ASEval instanceof SubsetEvaluator)) { throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator!"); } if (ASEval instanceof UnsupervisedSubsetEvaluator) { m_hasClass = false; } else { m_hasClass = true; m_classIndex = data.classIndex(); } SubsetEvaluator ASEvaluator = (SubsetEvaluator)ASEval; m_numAttribs = data.numAttributes(); best_merit = ASEvaluator.evaluateSubset(m_bestGroup); m_evaluations++; sizeOfBest = countFeatures(m_bestGroup); BitSet tempGroup = new BitSet(m_numAttribs); tempMerit = ASEvaluator.evaluateSubset(tempGroup); if (m_verbose) { System.out.println("Zero feature subset (" +Utils.doubleToString(Math. abs(tempMerit),8,5) +")"); } if (tempMerit >= best_merit) { tempSize = countFeatures(tempGroup); if (tempMerit > best_merit || (tempSize < sizeOfBest)) { best_merit = tempMerit; m_bestGroup = (BitSet)(tempGroup.clone()); sizeOfBest = tempSize; } } int numatts = (m_hasClass) ? m_numAttribs - 1 : m_numAttribs; BigInteger searchSpaceEnd = BigInteger.ONE.add(BigInteger.ONE).pow(numatts).subtract(BigInteger.ONE); while (!done) { // the next subset space = space.add(BigInteger.ONE); if (space.equals(searchSpaceEnd)) { done = true; } tempGroup.clear(); for (int i = 0; i < numatts; i++) { if (space.testBit(i)) { if (!m_hasClass) { tempGroup.set(i); } else { int j = (i >= m_classIndex) ? i + 1 : i; tempGroup.set(j); } } } tempMerit = ASEvaluator.evaluateSubset(tempGroup); m_evaluations++; if (tempMerit >= best_merit) { tempSize = countFeatures(tempGroup); if (tempMerit > best_merit || (tempSize < sizeOfBest)) { best_merit = tempMerit; m_bestGroup = (BitSet)(tempGroup.clone()); sizeOfBest = tempSize; if (m_verbose) { System.out.println("New best subset (" +Utils.doubleToString(Math. abs(best_merit),8,5) +"): "+printSubset(m_bestGroup)); } } } } m_bestMerit = best_merit; return attributeList(m_bestGroup); } /** * counts the number of features in a subset * @param featureSet the feature set for which to count the features * @return the number of features in the subset */ private int countFeatures(BitSet featureSet) { int count = 0; for (int i=0;i