/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* GeneticSearch.java
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
*
*/
package weka.attributeSelection;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import java.io.Serializable;
import java.util.BitSet;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Random;
import java.util.Vector;
/**
* GeneticSearch:
*
* Performs a search using the simple genetic algorithm described in Goldberg (1989).
*
* For more information see:
*
* David E. Goldberg (1989). Genetic algorithms in search, optimization and machine learning. Addison-Wesley.
*
* @book{Goldberg1989, * author = {David E. Goldberg}, * publisher = {Addison-Wesley}, * title = {Genetic algorithms in search, optimization and machine learning}, * year = {1989}, * ISBN = {0201157675} * } ** * * Valid options are: * *
-P <start set> * Specify a starting set of attributes. * Eg. 1,3,5-7.If supplied, the starting set becomes * one member of the initial random * population.* *
-Z <population size> * Set the size of the population (even number). * (default = 20).* *
-G <number of generations> * Set the number of generations. * (default = 20)* *
-C <probability of crossover> * Set the probability of crossover. * (default = 0.6)* *
-M <probability of mutation> * Set the probability of mutation. * (default = 0.033)* *
-R <report frequency> * Set frequency of generation reports. * e.g, setting the value to 5 will * report every 5th generation * (default = number of generations)* *
-S <seed> * Set the random number seed. * (default = 1)* * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision: 5286 $ */ public class GeneticSearch extends ASSearch implements StartSetHandler, OptionHandler, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = -1618264232838472679L; /** * holds a starting set as an array of attributes. Becomes one member of the * initial random population */ private int[] m_starting; /** holds the start set for the search as a Range */ private Range m_startRange; /** does the data have a class */ private boolean m_hasClass; /** holds the class index */ private int m_classIndex; /** number of attributes in the data */ private int m_numAttribs; /** the current population */ private GABitSet [] m_population; /** the number of individual solutions */ private int m_popSize; /** the best population member found during the search */ private GABitSet m_best; /** the number of features in the best population member */ private int m_bestFeatureCount; /** the number of entries to cache for lookup */ private int m_lookupTableSize; /** the lookup table */ private Hashtable m_lookupTable; /** random number generation */ private Random m_random; /** seed for random number generation */ private int m_seed; /** the probability of crossover occuring */ private double m_pCrossover; /** the probability of mutation occuring */ private double m_pMutation; /** sum of the current population fitness */ private double m_sumFitness; private double m_maxFitness; private double m_minFitness; private double m_avgFitness; /** the maximum number of generations to evaluate */ private int m_maxGenerations; /** how often reports are generated */ private int m_reportFrequency; /** holds the generation reports */ private StringBuffer m_generationReports; // Inner class /** * A bitset for the genetic algorithm */ protected class GABitSet implements Cloneable, Serializable, RevisionHandler { /** for serialization */ static final long serialVersionUID = -2930607837482622224L; /** the bitset */ private BitSet m_chromosome; /** holds raw merit */ private double m_objective = -Double.MAX_VALUE; /** the fitness */ private double m_fitness; /** * Constructor */ public GABitSet () { m_chromosome = new BitSet(); } /** * makes a copy of this GABitSet * @return a copy of the object * @throws CloneNotSupportedException if something goes wrong */ public Object clone() throws CloneNotSupportedException { GABitSet temp = new GABitSet(); temp.setObjective(this.getObjective()); temp.setFitness(this.getFitness()); temp.setChromosome((BitSet)(this.m_chromosome.clone())); return temp; //return super.clone(); } /** * sets the objective merit value * @param objective the objective value of this population member */ public void setObjective(double objective) { m_objective = objective; } /** * gets the objective merit * @return the objective merit of this population member */ public double getObjective() { return m_objective; } /** * sets the scaled fitness * @param fitness the scaled fitness of this population member */ public void setFitness(double fitness) { m_fitness = fitness; } /** * gets the scaled fitness * @return the scaled fitness of this population member */ public double getFitness() { return m_fitness; } /** * get the chromosome * @return the chromosome of this population member */ public BitSet getChromosome() { return m_chromosome; } /** * set the chromosome * @param c the chromosome to be set for this population member */ public void setChromosome(BitSet c) { m_chromosome = c; } /** * unset a bit in the chromosome * @param bit the bit to be cleared */ public void clear(int bit) { m_chromosome.clear(bit); } /** * set a bit in the chromosome * @param bit the bit to be set */ public void set(int bit) { m_chromosome.set(bit); } /** * get the value of a bit in the chromosome * @param bit the bit to query * @return the value of the bit */ public boolean get(int bit) { return m_chromosome.get(bit); } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision: 5286 $"); } } /** * Returns an enumeration describing the available options. * @return an enumeration of all the available options. **/ public Enumeration listOptions () { Vector newVector = new Vector(6); newVector.addElement(new Option("\tSpecify a starting set of attributes." + "\n\tEg. 1,3,5-7." +"If supplied, the starting set becomes" +"\n\tone member of the initial random" +"\n\tpopulation." ,"P",1 , "-P
-P <start set> * Specify a starting set of attributes. * Eg. 1,3,5-7.If supplied, the starting set becomes * one member of the initial random * population.* *
-Z <population size> * Set the size of the population (even number). * (default = 20).* *
-G <number of generations> * Set the number of generations. * (default = 20)* *
-C <probability of crossover> * Set the probability of crossover. * (default = 0.6)* *
-M <probability of mutation> * Set the probability of mutation. * (default = 0.033)* *
-R <report frequency> * Set frequency of generation reports. * e.g, setting the value to 5 will * report every 5th generation * (default = number of generations)* *
-S <seed> * Set the random number seed. * (default = 1)* * * @param options the list of options as an array of strings * @throws Exception if an option is not supported * **/ public void setOptions (String[] options) throws Exception { String optionString; resetOptions(); optionString = Utils.getOption('P', options); if (optionString.length() != 0) { setStartSet(optionString); } optionString = Utils.getOption('Z', options); if (optionString.length() != 0) { setPopulationSize(Integer.parseInt(optionString)); } optionString = Utils.getOption('G', options); if (optionString.length() != 0) { setMaxGenerations(Integer.parseInt(optionString)); setReportFrequency(Integer.parseInt(optionString)); } optionString = Utils.getOption('C', options); if (optionString.length() != 0) { setCrossoverProb((new Double(optionString)).doubleValue()); } optionString = Utils.getOption('M', options); if (optionString.length() != 0) { setMutationProb((new Double(optionString)).doubleValue()); } optionString = Utils.getOption('R', options); if (optionString.length() != 0) { setReportFrequency(Integer.parseInt(optionString)); } optionString = Utils.getOption('S', options); if (optionString.length() != 0) { setSeed(Integer.parseInt(optionString)); } } /** * Gets the current settings of ReliefFAttributeEval. * * @return an array of strings suitable for passing to setOptions() */ public String[] getOptions () { String[] options = new String[14]; int current = 0; if (!(getStartSet().equals(""))) { options[current++] = "-P"; options[current++] = ""+startSetToString(); } options[current++] = "-Z"; options[current++] = "" + getPopulationSize(); options[current++] = "-G"; options[current++] = "" + getMaxGenerations(); options[current++] = "-C"; options[current++] = "" + getCrossoverProb(); options[current++] = "-M"; options[current++] = "" + getMutationProb(); options[current++] = "-R"; options[current++] = "" + getReportFrequency(); options[current++] = "-S"; options[current++] = "" + getSeed(); while (current < options.length) { options[current++] = ""; } return options; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String startSetTipText() { return "Set a start point for the search. This is specified as a comma " +"seperated list off attribute indexes starting at 1. It can include " +"ranges. Eg. 1,2,5-9,17. The start set becomes one of the population " +"members of the initial population."; } /** * Sets a starting set of attributes for the search. It is the * search method's responsibility to report this start set (if any) * in its toString() method. * @param startSet a string containing a list of attributes (and or ranges), * eg. 1,2,6,10-15. * @throws Exception if start set can't be set. */ public void setStartSet (String startSet) throws Exception { m_startRange.setRanges(startSet); } /** * Returns a list of attributes (and or attribute ranges) as a String * @return a list of attributes (and or attribute ranges) */ public String getStartSet () { return m_startRange.getRanges(); } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String seedTipText() { return "Set the random seed."; } /** * set the seed for random number generation * @param s seed value */ public void setSeed(int s) { m_seed = s; } /** * get the value of the random number generator's seed * @return the seed for random number generation */ public int getSeed() { return m_seed; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String reportFrequencyTipText() { return "Set how frequently reports are generated. Default is equal to " +"the number of generations meaning that a report will be printed for " +"initial and final generations. Setting the value to 5 will result in " +"a report being printed every 5 generations."; } /** * set how often reports are generated * @param f generate reports every f generations */ public void setReportFrequency(int f) { m_reportFrequency = f; } /** * get how often repports are generated * @return how often reports are generated */ public int getReportFrequency() { return m_reportFrequency; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String mutationProbTipText() { return "Set the probability of mutation occuring."; } /** * set the probability of mutation * @param m the probability for mutation occuring */ public void setMutationProb(double m) { m_pMutation = m; } /** * get the probability of mutation * @return the probability of mutation occuring */ public double getMutationProb() { return m_pMutation; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String crossoverProbTipText() { return "Set the probability of crossover. This is the probability that " +"two population members will exchange genetic material."; } /** * set the probability of crossover * @param c the probability that two population members will exchange * genetic material */ public void setCrossoverProb(double c) { m_pCrossover = c; } /** * get the probability of crossover * @return the probability of crossover */ public double getCrossoverProb() { return m_pCrossover; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String maxGenerationsTipText() { return "Set the number of generations to evaluate."; } /** * set the number of generations to evaluate * @param m the number of generations */ public void setMaxGenerations(int m) { m_maxGenerations = m; } /** * get the number of generations * @return the maximum number of generations */ public int getMaxGenerations() { return m_maxGenerations; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String populationSizeTipText() { return "Set the population size (even number), this is the number of individuals " +"(attribute sets) in the population."; } /** * set the population size * @param p the size of the population */ public void setPopulationSize(int p) { if (p % 2 == 0) m_popSize = p; else System.err.println("Population size needs to be an even number!"); } /** * get the size of the population * @return the population size */ public int getPopulationSize() { return m_popSize; } /** * Returns a string describing this search method * @return a description of the search suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "GeneticSearch:\n\nPerforms a search using the simple genetic " + "algorithm described in Goldberg (1989).\n\n" + "For more information see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.BOOK); result.setValue(Field.AUTHOR, "David E. Goldberg"); result.setValue(Field.YEAR, "1989"); result.setValue(Field.TITLE, "Genetic algorithms in search, optimization and machine learning"); result.setValue(Field.ISBN, "0201157675"); result.setValue(Field.PUBLISHER, "Addison-Wesley"); return result; } /** * Constructor. Make a new GeneticSearch object */ public GeneticSearch() { resetOptions(); } /** * converts the array of starting attributes to a string. This is * used by getOptions to return the actual attributes specified * as the starting set. This is better than using m_startRanges.getRanges() * as the same start set can be specified in different ways from the * command line---eg 1,2,3 == 1-3. This is to ensure that stuff that * is stored in a database is comparable. * @return a comma seperated list of individual attribute numbers as a String */ private String startSetToString() { StringBuffer FString = new StringBuffer(); boolean didPrint; if (m_starting == null) { return getStartSet(); } for (int i = 0; i < m_starting.length; i++) { didPrint = false; if ((m_hasClass == false) || (m_hasClass == true && i != m_classIndex)) { FString.append((m_starting[i] + 1)); didPrint = true; } if (i == (m_starting.length - 1)) { FString.append(""); } else { if (didPrint) { FString.append(","); } } } return FString.toString(); } /** * returns a description of the search * @return a description of the search as a String */ public String toString() { StringBuffer GAString = new StringBuffer(); GAString.append("\tGenetic search.\n\tStart set: "); if (m_starting == null) { GAString.append("no attributes\n"); } else { GAString.append(startSetToString()+"\n"); } GAString.append("\tPopulation size: "+m_popSize); GAString.append("\n\tNumber of generations: "+m_maxGenerations); GAString.append("\n\tProbability of crossover: " +Utils.doubleToString(m_pCrossover,6,3)); GAString.append("\n\tProbability of mutation: " +Utils.doubleToString(m_pMutation,6,3)); GAString.append("\n\tReport frequency: "+m_reportFrequency); GAString.append("\n\tRandom number seed: "+m_seed+"\n"); GAString.append(m_generationReports.toString()); return GAString.toString(); } /** * Searches the attribute subset space using a genetic algorithm. * * @param ASEval the attribute evaluator to guide the search * @param data the training instances. * @return an array (not necessarily ordered) of selected attribute indexes * @throws Exception if the search can't be completed */ public int[] search (ASEvaluation ASEval, Instances data) throws Exception { m_best = null; m_generationReports = new StringBuffer(); if (!(ASEval instanceof SubsetEvaluator)) { throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator!"); } if (ASEval instanceof UnsupervisedSubsetEvaluator) { m_hasClass = false; } else { m_hasClass = true; m_classIndex = data.classIndex(); } SubsetEvaluator ASEvaluator = (SubsetEvaluator)ASEval; m_numAttribs = data.numAttributes(); m_startRange.setUpper(m_numAttribs-1); if (!(getStartSet().equals(""))) { m_starting = m_startRange.getSelection(); } // initial random population m_lookupTable = new Hashtable(m_lookupTableSize); m_random = new Random(m_seed); m_population = new GABitSet [m_popSize]; // set up random initial population initPopulation(); evaluatePopulation(ASEvaluator); populationStatistics(); scalePopulation(); checkBest(); m_generationReports.append(populationReport(0)); boolean converged; for (int i=1;i<=m_maxGenerations;i++) { generation(); evaluatePopulation(ASEvaluator); populationStatistics(); scalePopulation(); // find the best pop member and check for convergence converged = checkBest(); if ((i == m_maxGenerations) || ((i % m_reportFrequency) == 0) || (converged == true)) { m_generationReports.append(populationReport(i)); if (converged == true) { break; } } } return attributeList(m_best.getChromosome()); } /** * converts a BitSet into a list of attribute indexes * @param group the BitSet to convert * @return an array of attribute indexes **/ private int[] attributeList (BitSet group) { int count = 0; // count how many were selected for (int i = 0; i < m_numAttribs; i++) { if (group.get(i)) { count++; } } int[] list = new int[count]; count = 0; for (int i = 0; i < m_numAttribs; i++) { if (group.get(i)) { list[count++] = i; } } return list; } /** * checks to see if any population members in the current * population are better than the best found so far. Also checks * to see if the search has converged---that is there is no difference * in fitness between the best and worse population member * @return true is the search has converged * @throws Exception if something goes wrong */ private boolean checkBest() throws Exception { int i,count,lowestCount = m_numAttribs; double b = -Double.MAX_VALUE; GABitSet localbest = null; BitSet temp; boolean converged = false; int oldcount = Integer.MAX_VALUE; if (m_maxFitness - m_minFitness > 0) { // find the best in this population for (i=0;i