/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * ScatterSearchV1.java * Copyright (C) 2008 Adrian Pino * Copyright (C) 2008 University of Waikato, Hamilton, NZ * */ package weka.attributeSelection; import java.io.Serializable; import java.util.ArrayList; import java.util.BitSet; import java.util.Enumeration; import java.util.List; import java.util.Random; import java.util.Vector; import weka.core.Instances; import weka.core.Option; import weka.core.OptionHandler; import weka.core.RevisionUtils; import weka.core.SelectedTag; import weka.core.Tag; import weka.core.TechnicalInformation; import weka.core.TechnicalInformationHandler; import weka.core.Utils; import weka.core.TechnicalInformation.Field; import weka.core.TechnicalInformation.Type; /** * Class for performing the Sequential Scatter Search.

* * Scatter Search :
*
* Performs an Scatter Search through the space of attribute subsets. Start with a population of many significants and diverses subset stops when the result is higher than a given treshold or there's not more improvement
* For more information see:
*
* Felix Garcia Lopez (2004). Solving feature subset selection problem by a Parallel Scatter Search. Elsevier. *

* * Valid options are:

* *

 -Z <num>
 *  Specify the number of subsets to generate 
 *  in the initial population..
* *
 -T <threshold>
 *  Specify the treshold used for considering when a subset is significant.
* *
 -R <0 = greedy combination | 1 = reduced greedy combination >
 *  Specify the kind of combiantion 
 *  for using it in the combination method.
* *
 -S <seed>
 *  Set the random number seed.
 *  (default = 1)
* *
 -D
 *  Verbose output for monitoring the search.
* * * BibTeX: *
 * @book{Lopez2004,
 *    author = {Felix Garcia Lopez},
 *    month = {October},
 *    publisher = {Elsevier},
 *    title = {Solving feature subset selection problem by a Parallel Scatter Search},
 *    year = {2004},
 *    language = {English}
 * }
 * 
*

* * from the Book: Solving feature subset selection problem by a Parallel Scatter Search, Felix Garcia Lopez. * @author Adrian Pino (apinoa@facinf.uho.edu.cu) * @version $Revision: 4977 $ * */ public class ScatterSearchV1 extends ASSearch implements OptionHandler, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = -8512041420388121326L; /** number of attributes in the data */ private int m_numAttribs; /** holds the class index */ private int m_classIndex; /** holds the treshhold that delimits the good attributes */ private double m_treshold; /** the initial threshold */ private double m_initialThreshold; /** the kind of comination betwen parents ((0)greedy combination/(1)reduced greedy combination)*/ int m_typeOfCombination; /** random number generation */ private Random m_random; /** seed for random number generation */ private int m_seed; /** verbose output for monitoring the search and debugging */ private boolean m_debug = false; /** holds a report of the search */ private StringBuffer m_InformationReports; /** total number of subsets evaluated during a search */ private int m_totalEvals; /** holds the merit of the best subset found */ protected double m_bestMerit; /** time for procesing the search method */ private long m_processinTime; /** holds the Initial Population of Subsets*/ private List m_population; /** holds the population size*/ private int m_popSize; /** holds the user selected initial population size */ private int m_initialPopSize; /** if no initial user pop size, then this holds the initial * pop size calculated from the number of attributes in the data * (for use in the toString() method) */ private int m_calculatedInitialPopSize; /** holds the subsets most significants and diverses * of the population (ReferenceSet). * * (transient because the subList() method returns * a non serializable Object). */ private transient List m_ReferenceSet; /** holds the greedy combination(reduced or not) of all the subsets of the ReferenceSet*/ private transient List m_parentsCombination; /**holds the attributes ranked*/ private List m_attributeRanking; /**Evaluator used to know the significance of a subset (for guiding the search)*/ private SubsetEvaluator ASEvaluator =null; /** kind of combination */ protected static final int COMBINATION_NOT_REDUCED = 0; protected static final int COMBINATION_REDUCED = 1; ; public static final Tag [] TAGS_SELECTION = { new Tag(COMBINATION_NOT_REDUCED, "Greedy Combination"), new Tag(COMBINATION_REDUCED, "Reduced Greedy Combination") }; /** * Returns a string describing this search method * @return a description of the search suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Scatter Search :\n\nPerforms an Scatter Search " +"through " +"the space of attribute subsets. Start with a population of many significants and diverses subset " +" stops when the result is higher than a given treshold or there's not more improvement\n" + "For more information see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.BOOK); result.setValue(Field.AUTHOR, "Felix Garcia Lopez"); result.setValue(Field.MONTH, "October"); result.setValue(Field.YEAR, "2004"); result.setValue(Field.TITLE, "Solving feature subset selection problem by a Parallel Scatter Search"); result.setValue(Field.PUBLISHER, "Elsevier"); result.setValue(Field.LANGUAGE, "English"); return result; } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision: 1.0$"); } public ScatterSearchV1 () { resetOptions(); } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String tresholdTipText() { return "Set the treshold that subsets most overcome to be considered as significants"; } /** * Set the treshold * * @param treshold for identifyng significant subsets */ public void setTreshold (double treshold) { m_initialThreshold = treshold; } /** * Get the treshold * * @return the treshold that subsets most overcome to be considered as significants */ public double getTreshold () { return m_initialThreshold; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String populationSizeTipText() { return "Set the number of subset to generate in the initial Population"; } /** * Set the population size * * @param size the number of subset in the initial population */ public void setPopulationSize (int size) { m_initialPopSize = size; } /** * Get the population size * * @return the number of subsets to generate in the initial population */ public int getPopulationSize () { return m_initialPopSize; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String combinationTipText() { return "Set the kind of combination for using it to combine ReferenceSet subsets."; } /** * Set the kind of combination * * @param c the kind of combination of the search */ public void setCombination (SelectedTag c) { if (c.getTags() == TAGS_SELECTION) { m_typeOfCombination = c.getSelectedTag().getID(); } } /** * Get the combination * * @return the kind of combination used in the Combination method */ public SelectedTag getCombination () { return new SelectedTag(m_typeOfCombination, TAGS_SELECTION); } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String seedTipText() { return "Set the random seed."; } /** * set the seed for random number generation * @param s seed value */ public void setSeed(int s) { m_seed = s; } /** * get the value of the random number generator's seed * @return the seed for random number generation */ public int getSeed() { return m_seed; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String debugTipText() { return "Turn on verbose output for monitoring the search's progress."; } /** * Set whether verbose output should be generated. * @param d true if output is to be verbose. */ public void setDebug(boolean d) { m_debug = d; } /** * Get whether output is to be verbose * @return true if output will be verbose */ public boolean getDebug() { return m_debug; } /** * Returns an enumeration describing the available options. * @return an enumeration of all the available options. **/ public Enumeration listOptions () { Vector newVector = new Vector(6); newVector.addElement(new Option("\tSpecify the number of subsets to generate " + "\n\tin the initial population.." ,"Z",1 , "-Z ")); newVector.addElement(new Option("\tSpecify the treshold used for considering when a subset is significant." , "T", 1 , "-T ")); newVector.addElement(new Option("\tSpecify the kind of combiantion " + "\n\tfor using it in the combination method." , "R", 1, "-R <0 = greedy combination | 1 = reduced greedy combination >")); newVector.addElement(new Option("\tSet the random number seed." +"\n\t(default = 1)" , "S", 1, "-S ")); newVector.addElement(new Option("\tVerbose output for monitoring the search.","D",0,"-D")); return newVector.elements(); } /** * Parses a given list of options. * * Valid options are:

* * -Z
* Specify the number of subsets to generate in the initial population.

* * -T
* Specify the treshold used for considering when a subset is significant.

* * -R
* Specify the kind of combiantion.

* * -S
* Set the random number seed. * (default = 1) * * -D
* Verbose output for monitoring the search * (default = false) * * * @param options the list of options as an array of strings * @exception Exception if an option is not supported * **/ public void setOptions (String[] options) throws Exception { String optionString; resetOptions(); optionString = Utils.getOption('Z', options); if (optionString.length() != 0) { setPopulationSize(Integer.parseInt(optionString)); } optionString = Utils.getOption('T', options); if (optionString.length() != 0) { setTreshold(Double.parseDouble(optionString)); } optionString = Utils.getOption('R', options); if (optionString.length() != 0) { setCombination(new SelectedTag(Integer.parseInt(optionString), TAGS_SELECTION)); } else { setCombination(new SelectedTag(COMBINATION_NOT_REDUCED, TAGS_SELECTION)); } optionString = Utils.getOption('S', options); if (optionString.length() != 0) { setSeed(Integer.parseInt(optionString)); } setDebug(Utils.getFlag('D', options)); } /** * Gets the current settings of ScatterSearchV1. * * @return an array of strings suitable for passing to setOptions() */ public String[] getOptions () { String[] options = new String[9]; int current = 0; options[current++] = "-T"; options[current++] = "" + getTreshold (); options[current++] = "-Z"; options[current++] = ""+getPopulationSize (); options[current++] = "-R"; options[current++] = ""+String.valueOf (getCombination ().getSelectedTag ().getID ()); options[current++] = "-S"; options[current++] = "" + getSeed(); if (getDebug()) options[current++] = "-D"; while (current < options.length) options[current++] = ""; return options; } /** * returns a description of the search. * @return a description of the search as a String. */ public String toString() { StringBuffer FString = new StringBuffer(); FString.append("\tScatter Search " + "\n\tInit Population: "+m_calculatedInitialPopSize); FString.append("\n\tKind of Combination: " +getCombination ().getSelectedTag ().getReadable ()); FString.append("\n\tRandom number seed: "+m_seed); FString.append("\n\tDebug: "+m_debug); FString.append("\n\tTreshold: " +Utils.doubleToString(Math.abs(getTreshold ()),8,3)+"\n"); FString.append("\tTotal number of subsets evaluated: " + m_totalEvals + "\n"); FString.append("\tMerit of best subset found: " +Utils.doubleToString(Math.abs(m_bestMerit),8,3)+"\n"); /* FString.append("\tTime procesing the search space: " +(double)m_processinTime/1000+" seconds\n"); */ if(m_debug) return FString.toString()+"\n\n"+m_InformationReports.toString (); return FString.toString(); } /** * Searches the attribute subset space using Scatter Search. * * @param ASEval the attribute evaluator to guide the search * @param data the training instances. * @return an array of selected attribute indexes * @exception Exception if the search can't be completed */ public int[] search(ASEvaluation ASEval, Instances data) throws Exception{ m_totalEvals = 0; m_popSize = m_initialPopSize; m_calculatedInitialPopSize = m_initialPopSize; m_treshold = m_initialThreshold; m_processinTime =System.currentTimeMillis (); m_InformationReports = new StringBuffer(); m_numAttribs =data.numAttributes (); m_classIndex =data.classIndex (); if(m_popSize<=0) { m_popSize =m_numAttribs/2; m_calculatedInitialPopSize = m_popSize; } ASEvaluator =(SubsetEvaluator)ASEval; if(!(m_treshold >= 0)){ m_treshold =calculateTreshhold(); m_totalEvals++; } m_random = new Random(m_seed); m_attributeRanking =RankEachAttribute(); CreatePopulation(m_popSize); int bestSolutions =m_popSize/4; int divSolutions =m_popSize/4; if(m_popSize < 4){ bestSolutions = m_popSize/2; divSolutions = m_popSize/2; if(m_popSize == 1) return attributeList(((Subset)m_population.get (0)).subset); } m_ReferenceSet =new ArrayList(); for (int i = 0; i bestTemp.merit)*/) { //while(){ CombineParents(); ImproveSolutions(); // } bestTemp =m_ReferenceSet.get (0); int numBest =m_ReferenceSet.size ()/2; int numDiverses =m_ReferenceSet.size ()/2; UpdateReferenceSet(numBest, numDiverses); m_ReferenceSet = m_ReferenceSet.subList (0,numBest+numDiverses); } m_InformationReports.append("\nLast Reference Set Updated:\n"); m_InformationReports.append ("merit \tsubset\n"); for (int i = 0; i ReferenceSet, int bestSolutions, int divSolutions){ //Sorting the Initial ReferenceSet ReferenceSet =bubbleSubsetSort (ReferenceSet); // storing all the attributes that are now in the ReferenceSet (just till bestSolutions) BitSet allBits_RefSet =getAllBits (ReferenceSet.subList (0,bestSolutions)); // for stopping when ReferenceSet.size () ==bestSolutions+divSolutions int refSetlength =bestSolutions; int total =bestSolutions+divSolutions; while (refSetlength aux =new ArrayList(); for (int i =refSetlength; i ranking =new ArrayList(); /* for(int j=aux1.nextClearBit (0); j<=m_numAttribs; j=aux1.nextClearBit(j+1)){ if(j ==m_classIndex)continue; BitSet aux2 =new BitSet(m_numAttribs); aux2.set (j); double merit =ASEvaluator.evaluateSubset (aux2); m_totalEvals++; ranking.add (new Subset((BitSet)aux2.clone (), merit)); } ranking =bubbleSubsetSort (ranking); */ for (int k =0; k (); // this two 'for' are for selecting parents in the refSet for (int i= 0; i merit2){ child1 =best1.clone (); continue; } if(merit1 best2.subset.cardinality ()){ child2 =best2.clone (); continue; } if(best1.subset.cardinality () < best2.subset.cardinality ()){ child1 =best1.clone (); continue; } if(best1.subset.cardinality () == best2.subset.cardinality ()){ double random = m_random.nextDouble (); if(random < 0.5)child1 =best1.clone (); else child2 =best2.clone (); continue; } } } }else{ m_parentsCombination.add (child1); m_parentsCombination.add (child2); improvement =false; } } } } m_parentsCombination = filterSubset (m_parentsCombination,m_ReferenceSet.size()); GenerateReferenceSet (m_parentsCombination,m_ReferenceSet.size ()/2, m_ReferenceSet.size ()/2); m_parentsCombination = m_parentsCombination.subList (0, m_ReferenceSet.size ()); } /** * Create the initial Population * * @param popSize the size of the initial population * @exception Exception if there is a trouble evaluating any solution */ public void CreatePopulation(int popSize) throws Exception{ InitPopulation(popSize); /** Delimit the best attributes from the worst*/ int segmentation =m_numAttribs/2; /*TEST*/ /* System.out.println ("AttributeRanking"); for (int i = 0; i attributeRankingCopy = new ArrayList(); for (int j = 0; j last_evaluation){ m_population.set (i,joiners); last_evaluation =current_evaluation; try { attributeRankingCopy.set (random_number, attributeRankingCopy.get (segmentation+1)); attributeRankingCopy.remove (segmentation+1); }catch (IndexOutOfBoundsException ex) { attributeRankingCopy.set (random_number,new Subset(new BitSet(m_numAttribs),0)); continue; } } else{ // there's not more improvement break; } } } //m_population =bubbleSubsetSort (m_population); } /** * Rank all the attributes individually acording to their merits * * @return an ordered List of Subsets with just one attribute * @exception Exception if the evaluation can not be completed */ public List RankEachAttribute() throws Exception{ List result =new ArrayList(); for (int i = 0; i=0; i =gens.nextSetBit(i+1)){ BitSet aux =(BitSet)subset.subset.clone (); if(aux.get (i))continue; aux.set (i); double merit2 =ASEvaluator.evaluateSubset (aux); m_totalEvals++; if(merit2 >merit1){ merit1 =merit2; result =new Subset(aux,merit1); } } return result; } /** * Sort a List of subsets according to their merits * * @param subsetList the subsetList to be ordered * @return a List with ordered subsets */ public List bubbleSubsetSort(List subsetList){ List result =new ArrayList(); for (int i = 0; i merit1){ Subset temp =subset1; subsetList.set (i,subset2); subsetList.set (j,temp); subset1 =subset2; merit1 =subset1.merit; } } } return subsetList; } /** * get the index in a List where this have the biggest number * * @param simDif the Lists of numbers for getting from them the index of the bigger * @return an index that represents where the bigest number is. */ public int getIndexofBiggest(List simDif){ int aux =-99999; int result1 =-1; List equalSimDif =new ArrayList(); if(simDif.size ()==0) return -1; for (int i = 0; iaux){ aux =simDif.get (i); result1 =i; } } for (int i =0; i subsets){ BitSet result =new BitSet(m_numAttribs); for (int i =0; i =0; j=aux.nextSetBit(j+1)) { result.set (j); } } return result; } /** * Creating space for introducing the population * * @param popSize the number of subset in the initial population */ public void InitPopulation(int popSize){ m_population =new ArrayList(); for (int i = 0; i weightVector =new ArrayList(); BitSet res =result.subset; for(int i=res.nextSetBit(0); i>=0; i=res.nextSetBit(i+1)){ double merits =0; int numSolutions =0; Subset solution =null; for (int j = 0; j =avgAcurracy){ newResult.or (aux.subset); } } double merit =ASEvaluator.evaluateSubset (newResult); result =new Subset(newResult, merit); } return result; } public int generateRandomNumber(int limit){ return (int)Math.round (Math.random ()*(limit+0.4)); } /** * Calculate the treshold of a dataSet given an evaluator * * @return the treshhold of the dataSet * @exception Exception if the calculation can not be completed */ public double calculateTreshhold() throws Exception{ BitSet fullSet =new BitSet(m_numAttribs); for (int i= 0; i< m_numAttribs; i++) { if(i ==m_classIndex)continue; fullSet.set (i); } return ASEvaluator.evaluateSubset (fullSet); } /** * Calculate the Simetric Diference of two subsets * * @return the Simetric Diference * @exception Exception if the calculation can not be completed */ public int SimetricDiference(Subset subset, BitSet bitset){ BitSet aux =subset.clone ().subset; aux.xor (bitset); return aux.cardinality (); } /** * Filter a given Lis of Subsets removing the equals subsets * @param subsetList to filter * @param preferredSize the preferred size of the new List (if it is -1, then the filter is make it * for all subsets, else then the filter method stops when the given preferred * size is reached or all the subset have been filtered). * @return a new List filtered * @exception Exception if the calculation can not be completed */ public List filterSubset(List subsetList, int preferredSize){ if(subsetList.size () <=preferredSize && preferredSize !=-1)return subsetList; for (int i =0; i indexes =new ArrayList(); for (int i = 0; i