/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * GeneticSearch.java * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand * */ package weka.attributeSelection; import weka.core.Instances; import weka.core.Option; import weka.core.OptionHandler; import weka.core.Range; import weka.core.RevisionHandler; import weka.core.RevisionUtils; import weka.core.TechnicalInformation; import weka.core.TechnicalInformationHandler; import weka.core.Utils; import weka.core.TechnicalInformation.Field; import weka.core.TechnicalInformation.Type; import java.io.Serializable; import java.util.BitSet; import java.util.Enumeration; import java.util.Hashtable; import java.util.Random; import java.util.Vector; /** * GeneticSearch:
*
* Performs a search using the simple genetic algorithm described in Goldberg (1989).
*
* For more information see:
*
* David E. Goldberg (1989). Genetic algorithms in search, optimization and machine learning. Addison-Wesley. *

* * BibTeX: *

 * @book{Goldberg1989,
 *    author = {David E. Goldberg},
 *    publisher = {Addison-Wesley},
 *    title = {Genetic algorithms in search, optimization and machine learning},
 *    year = {1989},
 *    ISBN = {0201157675}
 * }
 * 
*

* * Valid options are:

* *

 -P <start set>
 *  Specify a starting set of attributes.
 *  Eg. 1,3,5-7.If supplied, the starting set becomes
 *  one member of the initial random
 *  population.
* *
 -Z <population size>
 *  Set the size of the population (even number).
 *  (default = 20).
* *
 -G <number of generations>
 *  Set the number of generations.
 *  (default = 20)
* *
 -C <probability of crossover>
 *  Set the probability of crossover.
 *  (default = 0.6)
* *
 -M <probability of mutation>
 *  Set the probability of mutation.
 *  (default = 0.033)
* *
 -R <report frequency>
 *  Set frequency of generation reports.
 *  e.g, setting the value to 5 will 
 *  report every 5th generation
 *  (default = number of generations)
* *
 -S <seed>
 *  Set the random number seed.
 *  (default = 1)
* * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision: 5286 $ */ public class GeneticSearch extends ASSearch implements StartSetHandler, OptionHandler, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = -1618264232838472679L; /** * holds a starting set as an array of attributes. Becomes one member of the * initial random population */ private int[] m_starting; /** holds the start set for the search as a Range */ private Range m_startRange; /** does the data have a class */ private boolean m_hasClass; /** holds the class index */ private int m_classIndex; /** number of attributes in the data */ private int m_numAttribs; /** the current population */ private GABitSet [] m_population; /** the number of individual solutions */ private int m_popSize; /** the best population member found during the search */ private GABitSet m_best; /** the number of features in the best population member */ private int m_bestFeatureCount; /** the number of entries to cache for lookup */ private int m_lookupTableSize; /** the lookup table */ private Hashtable m_lookupTable; /** random number generation */ private Random m_random; /** seed for random number generation */ private int m_seed; /** the probability of crossover occuring */ private double m_pCrossover; /** the probability of mutation occuring */ private double m_pMutation; /** sum of the current population fitness */ private double m_sumFitness; private double m_maxFitness; private double m_minFitness; private double m_avgFitness; /** the maximum number of generations to evaluate */ private int m_maxGenerations; /** how often reports are generated */ private int m_reportFrequency; /** holds the generation reports */ private StringBuffer m_generationReports; // Inner class /** * A bitset for the genetic algorithm */ protected class GABitSet implements Cloneable, Serializable, RevisionHandler { /** for serialization */ static final long serialVersionUID = -2930607837482622224L; /** the bitset */ private BitSet m_chromosome; /** holds raw merit */ private double m_objective = -Double.MAX_VALUE; /** the fitness */ private double m_fitness; /** * Constructor */ public GABitSet () { m_chromosome = new BitSet(); } /** * makes a copy of this GABitSet * @return a copy of the object * @throws CloneNotSupportedException if something goes wrong */ public Object clone() throws CloneNotSupportedException { GABitSet temp = new GABitSet(); temp.setObjective(this.getObjective()); temp.setFitness(this.getFitness()); temp.setChromosome((BitSet)(this.m_chromosome.clone())); return temp; //return super.clone(); } /** * sets the objective merit value * @param objective the objective value of this population member */ public void setObjective(double objective) { m_objective = objective; } /** * gets the objective merit * @return the objective merit of this population member */ public double getObjective() { return m_objective; } /** * sets the scaled fitness * @param fitness the scaled fitness of this population member */ public void setFitness(double fitness) { m_fitness = fitness; } /** * gets the scaled fitness * @return the scaled fitness of this population member */ public double getFitness() { return m_fitness; } /** * get the chromosome * @return the chromosome of this population member */ public BitSet getChromosome() { return m_chromosome; } /** * set the chromosome * @param c the chromosome to be set for this population member */ public void setChromosome(BitSet c) { m_chromosome = c; } /** * unset a bit in the chromosome * @param bit the bit to be cleared */ public void clear(int bit) { m_chromosome.clear(bit); } /** * set a bit in the chromosome * @param bit the bit to be set */ public void set(int bit) { m_chromosome.set(bit); } /** * get the value of a bit in the chromosome * @param bit the bit to query * @return the value of the bit */ public boolean get(int bit) { return m_chromosome.get(bit); } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision: 5286 $"); } } /** * Returns an enumeration describing the available options. * @return an enumeration of all the available options. **/ public Enumeration listOptions () { Vector newVector = new Vector(6); newVector.addElement(new Option("\tSpecify a starting set of attributes." + "\n\tEg. 1,3,5-7." +"If supplied, the starting set becomes" +"\n\tone member of the initial random" +"\n\tpopulation." ,"P",1 , "-P ")); newVector.addElement(new Option("\tSet the size of the population (even number)." +"\n\t(default = 20)." , "Z", 1 , "-Z ")); newVector.addElement(new Option("\tSet the number of generations." +"\n\t(default = 20)" , "G", 1, "-G ")); newVector.addElement(new Option("\tSet the probability of crossover." +"\n\t(default = 0.6)" , "C", 1, "-C ")); newVector.addElement(new Option("\tSet the probability of mutation." +"\n\t(default = 0.033)" , "M", 1, "-M ")); newVector.addElement(new Option("\tSet frequency of generation reports." +"\n\te.g, setting the value to 5 will " +"\n\treport every 5th generation" +"\n\t(default = number of generations)" , "R", 1, "-R ")); newVector.addElement(new Option("\tSet the random number seed." +"\n\t(default = 1)" , "S", 1, "-S ")); return newVector.elements(); } /** * Parses a given list of options.

* * Valid options are:

* *

 -P <start set>
   *  Specify a starting set of attributes.
   *  Eg. 1,3,5-7.If supplied, the starting set becomes
   *  one member of the initial random
   *  population.
* *
 -Z <population size>
   *  Set the size of the population (even number).
   *  (default = 20).
* *
 -G <number of generations>
   *  Set the number of generations.
   *  (default = 20)
* *
 -C <probability of crossover>
   *  Set the probability of crossover.
   *  (default = 0.6)
* *
 -M <probability of mutation>
   *  Set the probability of mutation.
   *  (default = 0.033)
* *
 -R <report frequency>
   *  Set frequency of generation reports.
   *  e.g, setting the value to 5 will 
   *  report every 5th generation
   *  (default = number of generations)
* *
 -S <seed>
   *  Set the random number seed.
   *  (default = 1)
* * * @param options the list of options as an array of strings * @throws Exception if an option is not supported * **/ public void setOptions (String[] options) throws Exception { String optionString; resetOptions(); optionString = Utils.getOption('P', options); if (optionString.length() != 0) { setStartSet(optionString); } optionString = Utils.getOption('Z', options); if (optionString.length() != 0) { setPopulationSize(Integer.parseInt(optionString)); } optionString = Utils.getOption('G', options); if (optionString.length() != 0) { setMaxGenerations(Integer.parseInt(optionString)); setReportFrequency(Integer.parseInt(optionString)); } optionString = Utils.getOption('C', options); if (optionString.length() != 0) { setCrossoverProb((new Double(optionString)).doubleValue()); } optionString = Utils.getOption('M', options); if (optionString.length() != 0) { setMutationProb((new Double(optionString)).doubleValue()); } optionString = Utils.getOption('R', options); if (optionString.length() != 0) { setReportFrequency(Integer.parseInt(optionString)); } optionString = Utils.getOption('S', options); if (optionString.length() != 0) { setSeed(Integer.parseInt(optionString)); } } /** * Gets the current settings of ReliefFAttributeEval. * * @return an array of strings suitable for passing to setOptions() */ public String[] getOptions () { String[] options = new String[14]; int current = 0; if (!(getStartSet().equals(""))) { options[current++] = "-P"; options[current++] = ""+startSetToString(); } options[current++] = "-Z"; options[current++] = "" + getPopulationSize(); options[current++] = "-G"; options[current++] = "" + getMaxGenerations(); options[current++] = "-C"; options[current++] = "" + getCrossoverProb(); options[current++] = "-M"; options[current++] = "" + getMutationProb(); options[current++] = "-R"; options[current++] = "" + getReportFrequency(); options[current++] = "-S"; options[current++] = "" + getSeed(); while (current < options.length) { options[current++] = ""; } return options; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String startSetTipText() { return "Set a start point for the search. This is specified as a comma " +"seperated list off attribute indexes starting at 1. It can include " +"ranges. Eg. 1,2,5-9,17. The start set becomes one of the population " +"members of the initial population."; } /** * Sets a starting set of attributes for the search. It is the * search method's responsibility to report this start set (if any) * in its toString() method. * @param startSet a string containing a list of attributes (and or ranges), * eg. 1,2,6,10-15. * @throws Exception if start set can't be set. */ public void setStartSet (String startSet) throws Exception { m_startRange.setRanges(startSet); } /** * Returns a list of attributes (and or attribute ranges) as a String * @return a list of attributes (and or attribute ranges) */ public String getStartSet () { return m_startRange.getRanges(); } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String seedTipText() { return "Set the random seed."; } /** * set the seed for random number generation * @param s seed value */ public void setSeed(int s) { m_seed = s; } /** * get the value of the random number generator's seed * @return the seed for random number generation */ public int getSeed() { return m_seed; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String reportFrequencyTipText() { return "Set how frequently reports are generated. Default is equal to " +"the number of generations meaning that a report will be printed for " +"initial and final generations. Setting the value to 5 will result in " +"a report being printed every 5 generations."; } /** * set how often reports are generated * @param f generate reports every f generations */ public void setReportFrequency(int f) { m_reportFrequency = f; } /** * get how often repports are generated * @return how often reports are generated */ public int getReportFrequency() { return m_reportFrequency; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String mutationProbTipText() { return "Set the probability of mutation occuring."; } /** * set the probability of mutation * @param m the probability for mutation occuring */ public void setMutationProb(double m) { m_pMutation = m; } /** * get the probability of mutation * @return the probability of mutation occuring */ public double getMutationProb() { return m_pMutation; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String crossoverProbTipText() { return "Set the probability of crossover. This is the probability that " +"two population members will exchange genetic material."; } /** * set the probability of crossover * @param c the probability that two population members will exchange * genetic material */ public void setCrossoverProb(double c) { m_pCrossover = c; } /** * get the probability of crossover * @return the probability of crossover */ public double getCrossoverProb() { return m_pCrossover; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String maxGenerationsTipText() { return "Set the number of generations to evaluate."; } /** * set the number of generations to evaluate * @param m the number of generations */ public void setMaxGenerations(int m) { m_maxGenerations = m; } /** * get the number of generations * @return the maximum number of generations */ public int getMaxGenerations() { return m_maxGenerations; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String populationSizeTipText() { return "Set the population size (even number), this is the number of individuals " +"(attribute sets) in the population."; } /** * set the population size * @param p the size of the population */ public void setPopulationSize(int p) { if (p % 2 == 0) m_popSize = p; else System.err.println("Population size needs to be an even number!"); } /** * get the size of the population * @return the population size */ public int getPopulationSize() { return m_popSize; } /** * Returns a string describing this search method * @return a description of the search suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "GeneticSearch:\n\nPerforms a search using the simple genetic " + "algorithm described in Goldberg (1989).\n\n" + "For more information see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.BOOK); result.setValue(Field.AUTHOR, "David E. Goldberg"); result.setValue(Field.YEAR, "1989"); result.setValue(Field.TITLE, "Genetic algorithms in search, optimization and machine learning"); result.setValue(Field.ISBN, "0201157675"); result.setValue(Field.PUBLISHER, "Addison-Wesley"); return result; } /** * Constructor. Make a new GeneticSearch object */ public GeneticSearch() { resetOptions(); } /** * converts the array of starting attributes to a string. This is * used by getOptions to return the actual attributes specified * as the starting set. This is better than using m_startRanges.getRanges() * as the same start set can be specified in different ways from the * command line---eg 1,2,3 == 1-3. This is to ensure that stuff that * is stored in a database is comparable. * @return a comma seperated list of individual attribute numbers as a String */ private String startSetToString() { StringBuffer FString = new StringBuffer(); boolean didPrint; if (m_starting == null) { return getStartSet(); } for (int i = 0; i < m_starting.length; i++) { didPrint = false; if ((m_hasClass == false) || (m_hasClass == true && i != m_classIndex)) { FString.append((m_starting[i] + 1)); didPrint = true; } if (i == (m_starting.length - 1)) { FString.append(""); } else { if (didPrint) { FString.append(","); } } } return FString.toString(); } /** * returns a description of the search * @return a description of the search as a String */ public String toString() { StringBuffer GAString = new StringBuffer(); GAString.append("\tGenetic search.\n\tStart set: "); if (m_starting == null) { GAString.append("no attributes\n"); } else { GAString.append(startSetToString()+"\n"); } GAString.append("\tPopulation size: "+m_popSize); GAString.append("\n\tNumber of generations: "+m_maxGenerations); GAString.append("\n\tProbability of crossover: " +Utils.doubleToString(m_pCrossover,6,3)); GAString.append("\n\tProbability of mutation: " +Utils.doubleToString(m_pMutation,6,3)); GAString.append("\n\tReport frequency: "+m_reportFrequency); GAString.append("\n\tRandom number seed: "+m_seed+"\n"); GAString.append(m_generationReports.toString()); return GAString.toString(); } /** * Searches the attribute subset space using a genetic algorithm. * * @param ASEval the attribute evaluator to guide the search * @param data the training instances. * @return an array (not necessarily ordered) of selected attribute indexes * @throws Exception if the search can't be completed */ public int[] search (ASEvaluation ASEval, Instances data) throws Exception { m_best = null; m_generationReports = new StringBuffer(); if (!(ASEval instanceof SubsetEvaluator)) { throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator!"); } if (ASEval instanceof UnsupervisedSubsetEvaluator) { m_hasClass = false; } else { m_hasClass = true; m_classIndex = data.classIndex(); } SubsetEvaluator ASEvaluator = (SubsetEvaluator)ASEval; m_numAttribs = data.numAttributes(); m_startRange.setUpper(m_numAttribs-1); if (!(getStartSet().equals(""))) { m_starting = m_startRange.getSelection(); } // initial random population m_lookupTable = new Hashtable(m_lookupTableSize); m_random = new Random(m_seed); m_population = new GABitSet [m_popSize]; // set up random initial population initPopulation(); evaluatePopulation(ASEvaluator); populationStatistics(); scalePopulation(); checkBest(); m_generationReports.append(populationReport(0)); boolean converged; for (int i=1;i<=m_maxGenerations;i++) { generation(); evaluatePopulation(ASEvaluator); populationStatistics(); scalePopulation(); // find the best pop member and check for convergence converged = checkBest(); if ((i == m_maxGenerations) || ((i % m_reportFrequency) == 0) || (converged == true)) { m_generationReports.append(populationReport(i)); if (converged == true) { break; } } } return attributeList(m_best.getChromosome()); } /** * converts a BitSet into a list of attribute indexes * @param group the BitSet to convert * @return an array of attribute indexes **/ private int[] attributeList (BitSet group) { int count = 0; // count how many were selected for (int i = 0; i < m_numAttribs; i++) { if (group.get(i)) { count++; } } int[] list = new int[count]; count = 0; for (int i = 0; i < m_numAttribs; i++) { if (group.get(i)) { list[count++] = i; } } return list; } /** * checks to see if any population members in the current * population are better than the best found so far. Also checks * to see if the search has converged---that is there is no difference * in fitness between the best and worse population member * @return true is the search has converged * @throws Exception if something goes wrong */ private boolean checkBest() throws Exception { int i,count,lowestCount = m_numAttribs; double b = -Double.MAX_VALUE; GABitSet localbest = null; BitSet temp; boolean converged = false; int oldcount = Integer.MAX_VALUE; if (m_maxFitness - m_minFitness > 0) { // find the best in this population for (i=0;i b) { b = m_population[i].getObjective(); localbest = m_population[i]; oldcount = countFeatures(localbest.getChromosome()); } else if (Utils.eq(m_population[i].getObjective(), b)) { // see if it contains fewer features count = countFeatures(m_population[i].getChromosome()); if (count < oldcount) { b = m_population[i].getObjective(); localbest = m_population[i]; oldcount = count; } } } } else { // look for the smallest subset for (i=0;i m_best.getObjective()) { m_best = (GABitSet)localbest.clone(); m_bestFeatureCount = count; } else if (Utils.eq(m_best.getObjective(), b)) { // see if the localbest has fewer features than the best so far if (count < m_bestFeatureCount) { m_best = (GABitSet)localbest.clone(); m_bestFeatureCount = count; } } return converged; } /** * counts the number of features in a subset * @param featureSet the feature set for which to count the features * @return the number of features in the subset */ private int countFeatures(BitSet featureSet) { int count = 0; for (int i=0;i best_fit) { j = i; best_fit = m_population[i].getFitness(); old_count = countFeatures(m_population[i].getChromosome()); } else if (Utils.eq(m_population[i].getFitness(), best_fit)) { count = countFeatures(m_population[i].getChromosome()); if (count < old_count) { j = i; best_fit = m_population[i].getFitness(); old_count = count; } } } newPop[0] = (GABitSet)(m_population[j].clone()); newPop[1] = newPop[0]; for (j=2;j= 3) { if (r < m_pCrossover) { // cross point int cp = Math.abs(m_random.nextInt()); cp %= (m_numAttribs-2); cp ++; for (i=0;i= r) { break; } } // if none was found, take first if (i == m_popSize) i = 0; return i; } /** * evaluates an entire population. Population members are looked up in * a hash table and if they are not found then they are evaluated using * ASEvaluator. * @param ASEvaluator the subset evaluator to use for evaluating population * members * @throws Exception if something goes wrong during evaluation */ private void evaluatePopulation (SubsetEvaluator ASEvaluator) throws Exception { int i; double merit; for (i=0;i m_numAttribs) { throw new Exception("Problem in population init"); } m_population[i].set(bit); } } } /** * calculates summary statistics for the current population */ private void populationStatistics() { int i; m_sumFitness = m_minFitness = m_maxFitness = m_population[0].getObjective(); for (i=1;i m_maxFitness) { m_maxFitness = m_population[i].getObjective(); } else if (m_population[i].getObjective() < m_minFitness) { m_minFitness = m_population[i].getObjective(); } } m_avgFitness = (m_sumFitness / m_popSize); } /** * scales the raw (objective) merit of the population members */ private void scalePopulation() { int j; double a = 0; double b = 0; double fmultiple = 2.0; double delta; // prescale if (m_minFitness > ((fmultiple * m_avgFitness - m_maxFitness) / (fmultiple - 1.0))) { delta = m_maxFitness - m_avgFitness; a = ((fmultiple - 1.0) * m_avgFitness / delta); b = m_avgFitness * (m_maxFitness - fmultiple * m_avgFitness) / delta; } else { delta = m_avgFitness - m_minFitness; a = m_avgFitness / delta; b = -m_minFitness * m_avgFitness / delta; } // scalepop m_sumFitness = 0; for (j=0;j