source: src/main/java/weka/attributeSelection/GeneticSearch.java @ 6

Last change on this file since 6 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 36.5 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    GeneticSearch.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package  weka.attributeSelection;
24
25import weka.core.Instances;
26import weka.core.Option;
27import weka.core.OptionHandler;
28import weka.core.Range;
29import weka.core.RevisionHandler;
30import weka.core.RevisionUtils;
31import weka.core.TechnicalInformation;
32import weka.core.TechnicalInformationHandler;
33import weka.core.Utils;
34import weka.core.TechnicalInformation.Field;
35import weka.core.TechnicalInformation.Type;
36
37import java.io.Serializable;
38import java.util.BitSet;
39import java.util.Enumeration;
40import java.util.Hashtable;
41import java.util.Random;
42import java.util.Vector;
43
44/**
45 <!-- globalinfo-start -->
46 * GeneticSearch:<br/>
47 * <br/>
48 * Performs a search using the simple genetic algorithm described in Goldberg (1989).<br/>
49 * <br/>
50 * For more information see:<br/>
51 * <br/>
52 * David E. Goldberg (1989). Genetic algorithms in search, optimization and machine learning. Addison-Wesley.
53 * <p/>
54 <!-- globalinfo-end -->
55 *
56 <!-- technical-bibtex-start -->
57 * BibTeX:
58 * <pre>
59 * &#64;book{Goldberg1989,
60 *    author = {David E. Goldberg},
61 *    publisher = {Addison-Wesley},
62 *    title = {Genetic algorithms in search, optimization and machine learning},
63 *    year = {1989},
64 *    ISBN = {0201157675}
65 * }
66 * </pre>
67 * <p/>
68 <!-- technical-bibtex-end -->
69 *
70 <!-- options-start -->
71 * Valid options are: <p/>
72 *
73 * <pre> -P &lt;start set&gt;
74 *  Specify a starting set of attributes.
75 *  Eg. 1,3,5-7.If supplied, the starting set becomes
76 *  one member of the initial random
77 *  population.</pre>
78 *
79 * <pre> -Z &lt;population size&gt;
80 *  Set the size of the population (even number).
81 *  (default = 20).</pre>
82 *
83 * <pre> -G &lt;number of generations&gt;
84 *  Set the number of generations.
85 *  (default = 20)</pre>
86 *
87 * <pre> -C &lt;probability of crossover&gt;
88 *  Set the probability of crossover.
89 *  (default = 0.6)</pre>
90 *
91 * <pre> -M &lt;probability of mutation&gt;
92 *  Set the probability of mutation.
93 *  (default = 0.033)</pre>
94 *
95 * <pre> -R &lt;report frequency&gt;
96 *  Set frequency of generation reports.
97 *  e.g, setting the value to 5 will
98 *  report every 5th generation
99 *  (default = number of generations)</pre>
100 *
101 * <pre> -S &lt;seed&gt;
102 *  Set the random number seed.
103 *  (default = 1)</pre>
104 *
105 <!-- options-end -->
106 *
107 * @author Mark Hall (mhall@cs.waikato.ac.nz)
108 * @version $Revision: 5286 $
109 */
110public class GeneticSearch 
111  extends ASSearch
112  implements StartSetHandler, OptionHandler, TechnicalInformationHandler {
113
114  /** for serialization */
115  static final long serialVersionUID = -1618264232838472679L;
116 
117  /**
118   * holds a starting set as an array of attributes. Becomes one member of the
119   * initial random population
120   */
121  private int[] m_starting;
122
123  /** holds the start set for the search as a Range */
124  private Range m_startRange;
125 
126 /** does the data have a class */
127  private boolean m_hasClass;
128 
129  /** holds the class index */
130  private int m_classIndex;
131 
132  /** number of attributes in the data */
133  private int m_numAttribs;
134
135  /** the current population */
136  private GABitSet [] m_population;
137
138  /** the number of individual solutions */
139  private int m_popSize;
140
141  /** the best population member found during the search */
142  private GABitSet m_best;
143
144  /** the number of features in the best population member */
145  private int m_bestFeatureCount;
146
147  /** the number of entries to cache for lookup */
148  private int m_lookupTableSize;
149
150  /** the lookup table */
151  private Hashtable m_lookupTable;
152
153  /** random number generation */
154  private Random m_random;
155
156  /** seed for random number generation */
157  private int m_seed;
158
159  /** the probability of crossover occuring */
160  private double m_pCrossover;
161
162  /** the probability of mutation occuring */
163  private double m_pMutation;
164
165  /** sum of the current population fitness */
166  private double m_sumFitness;
167
168  private double m_maxFitness;
169  private double m_minFitness;
170  private double m_avgFitness;
171
172  /** the maximum number of generations to evaluate */
173  private int m_maxGenerations;
174
175  /** how often reports are generated */
176  private int m_reportFrequency;
177
178  /** holds the generation reports */
179  private StringBuffer m_generationReports;
180
181  // Inner class
182  /**
183   * A bitset for the genetic algorithm
184   */
185  protected class GABitSet 
186    implements Cloneable, Serializable, RevisionHandler {
187   
188    /** for serialization */
189    static final long serialVersionUID = -2930607837482622224L;
190   
191    /** the bitset */
192    private BitSet m_chromosome;
193
194    /** holds raw merit */
195    private double m_objective = -Double.MAX_VALUE;
196   
197    /** the fitness */
198    private double m_fitness;
199   
200    /**
201     * Constructor
202     */
203    public GABitSet () {
204      m_chromosome = new BitSet();
205    }
206
207    /**
208     * makes a copy of this GABitSet
209     * @return a copy of the object
210     * @throws CloneNotSupportedException if something goes wrong
211     */
212    public Object clone() throws CloneNotSupportedException {
213      GABitSet temp = new GABitSet();
214     
215      temp.setObjective(this.getObjective());
216      temp.setFitness(this.getFitness());
217      temp.setChromosome((BitSet)(this.m_chromosome.clone()));
218      return temp;
219      //return super.clone();
220    }
221
222    /**
223     * sets the objective merit value
224     * @param objective the objective value of this population member
225     */
226    public void setObjective(double objective) {
227      m_objective = objective;
228    }
229     
230    /**
231     * gets the objective merit
232     * @return the objective merit of this population member
233     */
234    public double getObjective() {
235      return m_objective;
236    }
237
238    /**
239     * sets the scaled fitness
240     * @param fitness the scaled fitness of this population member
241     */
242    public void setFitness(double fitness) {
243      m_fitness = fitness;
244    }
245
246    /**
247     * gets the scaled fitness
248     * @return the scaled fitness of this population member
249     */
250    public double getFitness() {
251      return m_fitness;
252    }
253
254    /**
255     * get the chromosome
256     * @return the chromosome of this population member
257     */
258    public BitSet getChromosome() {
259      return m_chromosome;
260    }
261
262    /**
263     * set the chromosome
264     * @param c the chromosome to be set for this population member
265     */
266    public void setChromosome(BitSet c) {
267      m_chromosome = c;
268    }
269
270    /**
271     * unset a bit in the chromosome
272     * @param bit the bit to be cleared
273     */
274    public void clear(int bit) {
275      m_chromosome.clear(bit);
276    }
277
278    /**
279     * set a bit in the chromosome
280     * @param bit the bit to be set
281     */
282    public void set(int bit) {
283      m_chromosome.set(bit);
284    }
285
286    /**
287     * get the value of a bit in the chromosome
288     * @param bit the bit to query
289     * @return the value of the bit
290     */
291    public boolean get(int bit) {
292      return m_chromosome.get(bit);
293    }
294   
295    /**
296     * Returns the revision string.
297     *
298     * @return          the revision
299     */
300    public String getRevision() {
301      return RevisionUtils.extract("$Revision: 5286 $");
302    }
303  }
304
305  /**
306   * Returns an enumeration describing the available options.
307   * @return an enumeration of all the available options.
308   **/
309  public Enumeration listOptions () {
310    Vector newVector = new Vector(6);
311
312    newVector.addElement(new Option("\tSpecify a starting set of attributes." 
313                                    + "\n\tEg. 1,3,5-7."
314                                    +"If supplied, the starting set becomes"
315                                    +"\n\tone member of the initial random"
316                                    +"\n\tpopulation."
317                                    ,"P",1
318                                    , "-P <start set>"));
319    newVector.addElement(new Option("\tSet the size of the population (even number)."
320                                    +"\n\t(default = 20)."
321                                    , "Z", 1
322                                    , "-Z <population size>"));
323    newVector.addElement(new Option("\tSet the number of generations."
324                                    +"\n\t(default = 20)" 
325                                    , "G", 1, "-G <number of generations>"));
326    newVector.addElement(new Option("\tSet the probability of crossover."
327                                    +"\n\t(default = 0.6)" 
328                                    , "C", 1, "-C <probability of"
329                                    +" crossover>"));   
330    newVector.addElement(new Option("\tSet the probability of mutation."
331                                    +"\n\t(default = 0.033)" 
332                                    , "M", 1, "-M <probability of mutation>"));
333
334    newVector.addElement(new Option("\tSet frequency of generation reports."
335                                    +"\n\te.g, setting the value to 5 will "
336                                    +"\n\treport every 5th generation"
337                                    +"\n\t(default = number of generations)" 
338                                    , "R", 1, "-R <report frequency>"));
339    newVector.addElement(new Option("\tSet the random number seed."
340                                    +"\n\t(default = 1)" 
341                                    , "S", 1, "-S <seed>"));
342    return  newVector.elements();
343  }
344
345  /**
346   * Parses a given list of options. <p/>
347   *
348   <!-- options-start -->
349   * Valid options are: <p/>
350   *
351   * <pre> -P &lt;start set&gt;
352   *  Specify a starting set of attributes.
353   *  Eg. 1,3,5-7.If supplied, the starting set becomes
354   *  one member of the initial random
355   *  population.</pre>
356   *
357   * <pre> -Z &lt;population size&gt;
358   *  Set the size of the population (even number).
359   *  (default = 20).</pre>
360   *
361   * <pre> -G &lt;number of generations&gt;
362   *  Set the number of generations.
363   *  (default = 20)</pre>
364   *
365   * <pre> -C &lt;probability of crossover&gt;
366   *  Set the probability of crossover.
367   *  (default = 0.6)</pre>
368   *
369   * <pre> -M &lt;probability of mutation&gt;
370   *  Set the probability of mutation.
371   *  (default = 0.033)</pre>
372   *
373   * <pre> -R &lt;report frequency&gt;
374   *  Set frequency of generation reports.
375   *  e.g, setting the value to 5 will
376   *  report every 5th generation
377   *  (default = number of generations)</pre>
378   *
379   * <pre> -S &lt;seed&gt;
380   *  Set the random number seed.
381   *  (default = 1)</pre>
382   *
383   <!-- options-end -->
384   *
385   * @param options the list of options as an array of strings
386   * @throws Exception if an option is not supported
387   *
388   **/
389  public void setOptions (String[] options)
390    throws Exception {
391    String optionString;
392    resetOptions();
393
394    optionString = Utils.getOption('P', options);
395    if (optionString.length() != 0) {
396      setStartSet(optionString);
397    }
398
399    optionString = Utils.getOption('Z', options);
400    if (optionString.length() != 0) {
401      setPopulationSize(Integer.parseInt(optionString));
402    }
403
404    optionString = Utils.getOption('G', options);
405    if (optionString.length() != 0) {
406      setMaxGenerations(Integer.parseInt(optionString));
407      setReportFrequency(Integer.parseInt(optionString));
408    }
409
410    optionString = Utils.getOption('C', options);
411    if (optionString.length() != 0) {
412      setCrossoverProb((new Double(optionString)).doubleValue());
413    }
414
415    optionString = Utils.getOption('M', options);
416    if (optionString.length() != 0) {
417      setMutationProb((new Double(optionString)).doubleValue());
418    }
419
420    optionString = Utils.getOption('R', options);
421    if (optionString.length() != 0) {
422      setReportFrequency(Integer.parseInt(optionString));
423    }
424
425    optionString = Utils.getOption('S', options);
426    if (optionString.length() != 0) {
427      setSeed(Integer.parseInt(optionString));
428    }
429  }
430
431  /**
432   * Gets the current settings of ReliefFAttributeEval.
433   *
434   * @return an array of strings suitable for passing to setOptions()
435   */
436  public String[] getOptions () {
437    String[] options = new String[14];
438    int current = 0;
439
440    if (!(getStartSet().equals(""))) {
441      options[current++] = "-P";
442      options[current++] = ""+startSetToString();
443    }
444    options[current++] = "-Z";
445    options[current++] = "" + getPopulationSize();
446    options[current++] = "-G";
447    options[current++] = "" + getMaxGenerations();
448    options[current++] = "-C";
449    options[current++] = "" + getCrossoverProb();
450    options[current++] = "-M";
451    options[current++] = "" + getMutationProb();
452    options[current++] = "-R";
453    options[current++] = "" + getReportFrequency();
454    options[current++] = "-S";
455    options[current++] = "" + getSeed();
456
457    while (current < options.length) {
458      options[current++] = "";
459    }
460    return  options;
461  }
462
463  /**
464   * Returns the tip text for this property
465   * @return tip text for this property suitable for
466   * displaying in the explorer/experimenter gui
467   */
468  public String startSetTipText() {
469    return "Set a start point for the search. This is specified as a comma "
470      +"seperated list off attribute indexes starting at 1. It can include "
471      +"ranges. Eg. 1,2,5-9,17. The start set becomes one of the population "
472      +"members of the initial population.";
473  }
474
475  /**
476   * Sets a starting set of attributes for the search. It is the
477   * search method's responsibility to report this start set (if any)
478   * in its toString() method.
479   * @param startSet a string containing a list of attributes (and or ranges),
480   * eg. 1,2,6,10-15.
481   * @throws Exception if start set can't be set.
482   */
483  public void setStartSet (String startSet) throws Exception {
484    m_startRange.setRanges(startSet);
485  }
486
487  /**
488   * Returns a list of attributes (and or attribute ranges) as a String
489   * @return a list of attributes (and or attribute ranges)
490   */
491  public String getStartSet () {
492    return m_startRange.getRanges();
493  }
494
495  /**
496   * Returns the tip text for this property
497   * @return tip text for this property suitable for
498   * displaying in the explorer/experimenter gui
499   */
500  public String seedTipText() {
501    return "Set the random seed.";
502  }
503
504  /**
505   * set the seed for random number generation
506   * @param s seed value
507   */
508  public void setSeed(int s) {
509    m_seed = s;
510  }
511
512  /**
513   * get the value of the random number generator's seed
514   * @return the seed for random number generation
515   */
516  public int getSeed() {
517    return m_seed;
518  }
519 
520  /**
521   * Returns the tip text for this property
522   * @return tip text for this property suitable for
523   * displaying in the explorer/experimenter gui
524   */
525  public String reportFrequencyTipText() {
526    return "Set how frequently reports are generated. Default is equal to "
527      +"the number of generations meaning that a report will be printed for "
528      +"initial and final generations. Setting the value to 5 will result in "
529      +"a report being printed every 5 generations.";
530  }
531
532  /**
533   * set how often reports are generated
534   * @param f generate reports every f generations
535   */
536  public void setReportFrequency(int f) {
537    m_reportFrequency = f;
538  }
539
540  /**
541   * get how often repports are generated
542   * @return how often reports are generated
543   */
544  public int getReportFrequency() {
545    return m_reportFrequency;
546  }
547
548  /**
549   * Returns the tip text for this property
550   * @return tip text for this property suitable for
551   * displaying in the explorer/experimenter gui
552   */
553  public String mutationProbTipText() {
554    return "Set the probability of mutation occuring.";
555  }
556
557  /**
558   * set the probability of mutation
559   * @param m the probability for mutation occuring
560   */
561  public void setMutationProb(double m) {
562    m_pMutation = m;
563  }
564
565  /**
566   * get the probability of mutation
567   * @return the probability of mutation occuring
568   */
569  public double getMutationProb() {
570    return m_pMutation;
571  }
572
573  /**
574   * Returns the tip text for this property
575   * @return tip text for this property suitable for
576   * displaying in the explorer/experimenter gui
577   */
578  public String crossoverProbTipText() {
579    return "Set the probability of crossover. This is the probability that "
580      +"two population members will exchange genetic material."; 
581  }
582
583  /**
584   * set the probability of crossover
585   * @param c the probability that two population members will exchange
586   * genetic material
587   */
588  public void setCrossoverProb(double c) {
589    m_pCrossover = c;
590  }
591
592  /**
593   * get the probability of crossover
594   * @return the probability of crossover
595   */
596  public double getCrossoverProb() {
597    return m_pCrossover;
598  }
599
600  /**
601   * Returns the tip text for this property
602   * @return tip text for this property suitable for
603   * displaying in the explorer/experimenter gui
604   */
605  public String maxGenerationsTipText() {
606    return "Set the number of generations to evaluate.";
607  }
608
609  /**
610   * set the number of generations to evaluate
611   * @param m the number of generations
612   */
613  public void setMaxGenerations(int m) {
614    m_maxGenerations = m;
615  }
616
617  /**
618   * get the number of generations
619   * @return the maximum number of generations
620   */
621  public int getMaxGenerations() {
622    return m_maxGenerations;
623  }
624
625  /**
626   * Returns the tip text for this property
627   * @return tip text for this property suitable for
628   * displaying in the explorer/experimenter gui
629   */
630  public String populationSizeTipText() {
631    return "Set the population size (even number), this is the number of individuals "
632      +"(attribute sets) in the population.";
633  }
634
635  /**
636   * set the population size
637   * @param p the size of the population
638   */
639  public void setPopulationSize(int p) {
640    if (p % 2 == 0)
641      m_popSize = p;
642    else
643      System.err.println("Population size needs to be an even number!");
644  }
645
646  /**
647   * get the size of the population
648   * @return the population size
649   */
650  public int getPopulationSize() {
651    return m_popSize;
652  }
653
654  /**
655   * Returns a string describing this search method
656   * @return a description of the search suitable for
657   * displaying in the explorer/experimenter gui
658   */
659  public String globalInfo() {
660    return 
661        "GeneticSearch:\n\nPerforms a search using the simple genetic "
662      + "algorithm described in Goldberg (1989).\n\n"
663      + "For more information see:\n\n"
664      + getTechnicalInformation().toString();
665  }
666
667  /**
668   * Returns an instance of a TechnicalInformation object, containing
669   * detailed information about the technical background of this class,
670   * e.g., paper reference or book this class is based on.
671   *
672   * @return the technical information about this class
673   */
674  public TechnicalInformation getTechnicalInformation() {
675    TechnicalInformation        result;
676   
677    result = new TechnicalInformation(Type.BOOK);
678    result.setValue(Field.AUTHOR, "David E. Goldberg");
679    result.setValue(Field.YEAR, "1989");
680    result.setValue(Field.TITLE, "Genetic algorithms in search, optimization and machine learning");
681    result.setValue(Field.ISBN, "0201157675");
682    result.setValue(Field.PUBLISHER, "Addison-Wesley");
683   
684    return result;
685  }
686
687  /**
688   * Constructor. Make a new GeneticSearch object
689   */
690  public GeneticSearch() {
691    resetOptions();
692  }
693
694  /**
695   * converts the array of starting attributes to a string. This is
696   * used by getOptions to return the actual attributes specified
697   * as the starting set. This is better than using m_startRanges.getRanges()
698   * as the same start set can be specified in different ways from the
699   * command line---eg 1,2,3 == 1-3. This is to ensure that stuff that
700   * is stored in a database is comparable.
701   * @return a comma seperated list of individual attribute numbers as a String
702   */
703  private String startSetToString() {
704    StringBuffer FString = new StringBuffer();
705    boolean didPrint;
706   
707    if (m_starting == null) {
708      return getStartSet();
709    }
710
711    for (int i = 0; i < m_starting.length; i++) {
712      didPrint = false;
713     
714      if ((m_hasClass == false) || 
715          (m_hasClass == true && i != m_classIndex)) {
716        FString.append((m_starting[i] + 1));
717        didPrint = true;
718      }
719     
720      if (i == (m_starting.length - 1)) {
721        FString.append("");
722      }
723      else {
724        if (didPrint) {
725          FString.append(",");
726          }
727      }
728    }
729
730    return FString.toString();
731  }
732
733  /**
734   * returns a description of the search
735   * @return a description of the search as a String
736   */
737  public String toString() {
738    StringBuffer GAString = new StringBuffer();
739    GAString.append("\tGenetic search.\n\tStart set: ");
740
741    if (m_starting == null) {
742      GAString.append("no attributes\n");
743    }
744    else {
745      GAString.append(startSetToString()+"\n");
746    }
747    GAString.append("\tPopulation size: "+m_popSize);
748    GAString.append("\n\tNumber of generations: "+m_maxGenerations);
749    GAString.append("\n\tProbability of crossover: "
750                +Utils.doubleToString(m_pCrossover,6,3));
751    GAString.append("\n\tProbability of mutation: "
752                +Utils.doubleToString(m_pMutation,6,3));
753    GAString.append("\n\tReport frequency: "+m_reportFrequency);
754    GAString.append("\n\tRandom number seed: "+m_seed+"\n");
755    GAString.append(m_generationReports.toString());
756    return GAString.toString();
757  }
758
759  /**
760   * Searches the attribute subset space using a genetic algorithm.
761   *
762   * @param ASEval the attribute evaluator to guide the search
763   * @param data the training instances.
764   * @return an array (not necessarily ordered) of selected attribute indexes
765   * @throws Exception if the search can't be completed
766   */
767   public int[] search (ASEvaluation ASEval, Instances data)
768    throws Exception {
769
770     m_best = null;
771     m_generationReports = new StringBuffer();
772
773     if (!(ASEval instanceof SubsetEvaluator)) {
774       throw  new Exception(ASEval.getClass().getName() 
775                            + " is not a " 
776                            + "Subset evaluator!");
777     }
778     
779    if (ASEval instanceof UnsupervisedSubsetEvaluator) {
780      m_hasClass = false;
781    }
782    else {
783      m_hasClass = true;
784      m_classIndex = data.classIndex();
785    }
786
787    SubsetEvaluator ASEvaluator = (SubsetEvaluator)ASEval;
788    m_numAttribs = data.numAttributes();
789
790    m_startRange.setUpper(m_numAttribs-1);
791    if (!(getStartSet().equals(""))) {
792      m_starting = m_startRange.getSelection();
793    }
794
795    // initial random population
796    m_lookupTable = new Hashtable(m_lookupTableSize);
797    m_random = new Random(m_seed);
798    m_population = new GABitSet [m_popSize];
799
800    // set up random initial population
801    initPopulation();
802    evaluatePopulation(ASEvaluator);
803    populationStatistics();
804    scalePopulation();
805    checkBest();
806    m_generationReports.append(populationReport(0));
807
808    boolean converged;
809    for (int i=1;i<=m_maxGenerations;i++) {
810      generation();
811      evaluatePopulation(ASEvaluator);
812      populationStatistics();
813      scalePopulation();
814      // find the best pop member and check for convergence
815      converged = checkBest();
816
817      if ((i == m_maxGenerations) || 
818          ((i % m_reportFrequency) == 0) ||
819          (converged == true)) {
820        m_generationReports.append(populationReport(i));
821        if (converged == true) {
822          break;
823        }
824      }
825    }
826    return attributeList(m_best.getChromosome());
827   }
828
829  /**
830   * converts a BitSet into a list of attribute indexes
831   * @param group the BitSet to convert
832   * @return an array of attribute indexes
833   **/
834  private int[] attributeList (BitSet group) {
835    int count = 0;
836
837    // count how many were selected
838    for (int i = 0; i < m_numAttribs; i++) {
839      if (group.get(i)) {
840        count++;
841      }
842    }
843
844    int[] list = new int[count];
845    count = 0;
846
847    for (int i = 0; i < m_numAttribs; i++) {
848      if (group.get(i)) {
849        list[count++] = i;
850      }
851    }
852
853    return  list;
854  }
855
856  /**
857   * checks to see if any population members in the current
858   * population are better than the best found so far. Also checks
859   * to see if the search has converged---that is there is no difference
860   * in fitness between the best and worse population member
861   * @return true is the search has converged
862   * @throws Exception if something goes wrong
863   */
864  private boolean checkBest() throws Exception {
865    int i,count,lowestCount = m_numAttribs;
866    double b = -Double.MAX_VALUE;
867    GABitSet localbest = null;
868    BitSet temp;
869    boolean converged = false;
870    int oldcount = Integer.MAX_VALUE;
871
872    if (m_maxFitness - m_minFitness > 0) {
873      // find the best in this population
874      for (i=0;i<m_popSize;i++) {
875        if (m_population[i].getObjective() > b) {
876          b = m_population[i].getObjective();
877          localbest = m_population[i];
878          oldcount = countFeatures(localbest.getChromosome());
879        } else if (Utils.eq(m_population[i].getObjective(), b)) {
880          // see if it contains fewer features
881          count = countFeatures(m_population[i].getChromosome());
882          if (count < oldcount) {
883            b = m_population[i].getObjective();
884            localbest = m_population[i];
885            oldcount = count;
886          }
887        }
888      }
889    } else {
890      // look for the smallest subset
891      for (i=0;i<m_popSize;i++) {
892        temp = m_population[i].getChromosome();
893        count = countFeatures(temp);;
894
895        if (count < lowestCount) {
896          lowestCount = count;
897          localbest = m_population[i];
898          b = localbest.getObjective();
899        }
900      }
901      converged = true;
902    }
903
904    // count the number of features in localbest
905    count = 0;
906    temp = localbest.getChromosome();
907    count = countFeatures(temp);
908
909    // compare to the best found so far
910    if (m_best == null) {
911      m_best = (GABitSet)localbest.clone();
912      m_bestFeatureCount = count;
913    } else if (b > m_best.getObjective()) {
914      m_best = (GABitSet)localbest.clone();
915      m_bestFeatureCount = count;
916    } else if (Utils.eq(m_best.getObjective(), b)) {
917      // see if the localbest has fewer features than the best so far
918      if (count < m_bestFeatureCount) {
919        m_best = (GABitSet)localbest.clone();
920        m_bestFeatureCount = count;
921      }
922    }
923    return converged;
924  }
925
926  /**
927   * counts the number of features in a subset
928   * @param featureSet the feature set for which to count the features
929   * @return the number of features in the subset
930   */
931  private int countFeatures(BitSet featureSet) {
932    int count = 0;
933    for (int i=0;i<m_numAttribs;i++) {
934      if (featureSet.get(i)) {
935        count++;
936      }
937    }
938    return count;
939  }
940
941  /**
942   * performs a single generation---selection, crossover, and mutation
943   * @throws Exception if an error occurs
944   */
945  private void generation () throws Exception {
946    int i,j=0;
947    double best_fit = -Double.MAX_VALUE;
948    int old_count = 0;
949    int count;
950    GABitSet [] newPop = new GABitSet [m_popSize];
951    int parent1,parent2;
952
953    /** first ensure that the population best is propogated into the new
954        generation */
955    for (i=0;i<m_popSize;i++) {
956      if (m_population[i].getFitness() > best_fit) {
957        j = i;
958        best_fit = m_population[i].getFitness();
959        old_count = countFeatures(m_population[i].getChromosome());
960      } else if (Utils.eq(m_population[i].getFitness(), best_fit)) {
961        count = countFeatures(m_population[i].getChromosome());
962        if (count < old_count) {
963          j = i;
964          best_fit = m_population[i].getFitness();
965          old_count = count;
966        }
967      }
968    }
969    newPop[0] = (GABitSet)(m_population[j].clone());
970    newPop[1] = newPop[0];
971
972    for (j=2;j<m_popSize;j+=2) {
973      parent1 = select();
974      parent2 = select();
975      newPop[j] = (GABitSet)(m_population[parent1].clone());
976      newPop[j+1] = (GABitSet)(m_population[parent2].clone());
977      // if parents are equal mutate one bit
978      if (parent1 == parent2) {
979        int r;
980        if (m_hasClass) {
981          while ((r = (Math.abs(m_random.nextInt()) % m_numAttribs)) == m_classIndex);
982        }
983        else {
984          r = m_random.nextInt() % m_numAttribs;
985        }
986       
987        if (newPop[j].get(r)) {
988          newPop[j].clear(r);
989        }
990        else {
991          newPop[j].set(r);
992        }
993      } else {
994        // crossover
995        double r = m_random.nextDouble();
996        if (m_numAttribs >= 3) {
997          if (r < m_pCrossover) {
998            // cross point
999            int cp = Math.abs(m_random.nextInt());
1000           
1001            cp %= (m_numAttribs-2);
1002            cp ++;
1003           
1004            for (i=0;i<cp;i++) {
1005              if (m_population[parent1].get(i)) {
1006                newPop[j+1].set(i);
1007              }
1008              else {
1009                newPop[j+1].clear(i);
1010              }
1011              if (m_population[parent2].get(i)) {
1012                newPop[j].set(i);
1013              }
1014              else {
1015                newPop[j].clear(i);
1016              }
1017            }
1018          }
1019        }
1020
1021        // mutate
1022        for (int k=0;k<2;k++) {
1023          for (i=0;i<m_numAttribs;i++) {
1024            r = m_random.nextDouble();
1025            if (r < m_pMutation) {
1026              if (m_hasClass && (i == m_classIndex)) {
1027                // ignore class attribute
1028              }
1029              else {
1030                if (newPop[j+k].get(i)) {
1031                  newPop[j+k].clear(i);
1032                }
1033                else {
1034                  newPop[j+k].set(i);
1035                }
1036              }
1037            }
1038          }
1039        }
1040                 
1041      }
1042    }
1043
1044    m_population = newPop;
1045  }
1046
1047  /**
1048   * selects a population member to be considered for crossover
1049   * @return the index of the selected population member
1050   */
1051  private int select() {
1052    int i;
1053    double r,partsum;
1054
1055    partsum = 0;
1056    r = m_random.nextDouble() * m_sumFitness;
1057    for (i=0;i<m_popSize;i++) {
1058      partsum += m_population[i].getFitness();
1059      if (partsum >= r) {
1060        break;
1061      }
1062    }
1063   
1064    // if none was found, take first
1065    if (i == m_popSize)
1066      i = 0;
1067   
1068    return i;
1069  }
1070
1071  /**
1072   * evaluates an entire population. Population members are looked up in
1073   * a hash table and if they are not found then they are evaluated using
1074   * ASEvaluator.
1075   * @param ASEvaluator the subset evaluator to use for evaluating population
1076   * members
1077   * @throws Exception if something goes wrong during evaluation
1078   */
1079  private void evaluatePopulation (SubsetEvaluator ASEvaluator)
1080    throws Exception {
1081    int i;
1082    double merit;
1083
1084    for (i=0;i<m_popSize;i++) {
1085      // if its not in the lookup table then evaluate and insert
1086      if (m_lookupTable.containsKey(m_population[i]
1087                                    .getChromosome()) == false) {
1088        merit = ASEvaluator.evaluateSubset(m_population[i].getChromosome());
1089        m_population[i].setObjective(merit);
1090        m_lookupTable.put(m_population[i].getChromosome(),m_population[i]);
1091      } else {
1092        GABitSet temp = (GABitSet)m_lookupTable.
1093          get(m_population[i].getChromosome());
1094        m_population[i].setObjective(temp.getObjective());
1095      }
1096    }
1097  }
1098
1099  /**
1100   * creates random population members for the initial population. Also
1101   * sets the first population member to be a start set (if any)
1102   * provided by the user
1103   * @throws Exception if the population can't be created
1104   */
1105  private void initPopulation () throws Exception {
1106    int i,j,bit;
1107    int num_bits;
1108    boolean ok;
1109    int start = 0;
1110
1111    // add the start set as the first population member (if specified)
1112    if (m_starting != null) {
1113      m_population[0] = new GABitSet();
1114      for (i=0;i<m_starting.length;i++) {
1115        if ((m_starting[i]) != m_classIndex) {
1116          m_population[0].set(m_starting[i]);
1117        }
1118      }
1119      start = 1;
1120    }
1121
1122    for (i=start;i<m_popSize;i++) {
1123      m_population[i] = new GABitSet();
1124     
1125      num_bits = m_random.nextInt();
1126      num_bits = num_bits % m_numAttribs-1;
1127      if (num_bits < 0) {
1128        num_bits *= -1;
1129      }
1130      if (num_bits == 0) {
1131        num_bits = 1;
1132      }
1133
1134      for (j=0;j<num_bits;j++) {
1135        ok = false;
1136        do {
1137          bit = m_random.nextInt();
1138          if (bit < 0) {
1139            bit *= -1;
1140          }
1141          bit = bit % m_numAttribs;
1142          if (m_hasClass) {
1143            if (bit != m_classIndex) {
1144              ok = true;
1145            }
1146          }
1147          else {
1148            ok = true;
1149          }
1150        } while (!ok);
1151       
1152        if (bit > m_numAttribs) {
1153          throw new Exception("Problem in population init");
1154        }
1155        m_population[i].set(bit);
1156      }
1157    }
1158  }
1159
1160  /**
1161   * calculates summary statistics for the current population
1162   */
1163  private void populationStatistics() {
1164    int i;
1165   
1166    m_sumFitness = m_minFitness = m_maxFitness = 
1167      m_population[0].getObjective();
1168
1169    for (i=1;i<m_popSize;i++) {
1170      m_sumFitness += m_population[i].getObjective();
1171      if (m_population[i].getObjective() > m_maxFitness) {
1172        m_maxFitness = m_population[i].getObjective();
1173      }
1174      else if (m_population[i].getObjective() < m_minFitness) {
1175        m_minFitness = m_population[i].getObjective();
1176      }
1177    }
1178    m_avgFitness = (m_sumFitness / m_popSize);
1179  }
1180
1181  /**
1182   * scales the raw (objective) merit of the population members
1183   */
1184  private void scalePopulation() {
1185    int j;
1186    double a = 0;
1187    double b = 0;
1188    double fmultiple = 2.0;
1189    double delta;
1190   
1191    // prescale
1192    if (m_minFitness > ((fmultiple * m_avgFitness - m_maxFitness) / 
1193                        (fmultiple - 1.0))) {
1194      delta = m_maxFitness - m_avgFitness;
1195      a = ((fmultiple - 1.0) * m_avgFitness / delta);
1196      b = m_avgFitness * (m_maxFitness - fmultiple * m_avgFitness) / delta;
1197    }
1198    else {
1199      delta = m_avgFitness - m_minFitness;
1200      a = m_avgFitness / delta;
1201      b = -m_minFitness * m_avgFitness / delta;
1202    }
1203     
1204    // scalepop
1205    m_sumFitness = 0;
1206    for (j=0;j<m_popSize;j++) {
1207      if (a == Double.POSITIVE_INFINITY || a == Double.NEGATIVE_INFINITY ||
1208          b == Double.POSITIVE_INFINITY || b == Double.NEGATIVE_INFINITY) {
1209        m_population[j].setFitness(m_population[j].getObjective());
1210      } else {
1211        m_population[j].
1212          setFitness(Math.abs((a * m_population[j].getObjective() + b)));
1213      }
1214      m_sumFitness += m_population[j].getFitness();
1215    }
1216  }
1217 
1218  /**
1219   * generates a report on the current population
1220   * @return a report as a String
1221   */
1222  private String populationReport (int genNum) {
1223    int i;
1224    StringBuffer temp = new StringBuffer();
1225
1226    if (genNum == 0) {
1227      temp.append("\nInitial population\n");
1228    }
1229    else {
1230      temp.append("\nGeneration: "+genNum+"\n");
1231    }
1232    temp.append("merit   \tscaled  \tsubset\n");
1233   
1234    for (i=0;i<m_popSize;i++) {
1235      temp.append(Utils.doubleToString(Math.
1236                                       abs(m_population[i].getObjective()),
1237                                       8,5)
1238                  +"\t"
1239                  +Utils.doubleToString(m_population[i].getFitness(),
1240                                        8,5)
1241                  +"\t");
1242
1243      temp.append(printPopMember(m_population[i].getChromosome())+"\n");
1244    }
1245    return temp.toString();
1246  }
1247
1248  /**
1249   * prints a population member as a series of attribute numbers
1250   * @param temp the chromosome of a population member
1251   * @return a population member as a String of attribute numbers
1252   */
1253  private String printPopMember(BitSet temp) {
1254    StringBuffer text = new StringBuffer();
1255
1256    for (int j=0;j<m_numAttribs;j++) {
1257      if (temp.get(j)) {
1258        text.append((j+1)+" ");
1259      }
1260    }
1261    return text.toString();
1262  }
1263
1264  /**
1265   * prints a population member's chromosome
1266   * @param temp the chromosome of a population member
1267   * @return a population member's chromosome as a String
1268   */
1269  private String printPopChrom(BitSet temp) {
1270    StringBuffer text = new StringBuffer();
1271
1272    for (int j=0;j<m_numAttribs;j++) {
1273      if (temp.get(j)) {
1274        text.append("1");
1275      } else {
1276        text.append("0");
1277      }
1278    }
1279    return text.toString();
1280  }
1281
1282  /**
1283   * reset to default values for options
1284   */
1285  private void resetOptions () {
1286    m_population = null;
1287    m_popSize = 20;
1288    m_lookupTableSize = 1001;
1289    m_pCrossover = 0.6;
1290    m_pMutation = 0.033;
1291    m_maxGenerations = 20;
1292    m_reportFrequency = m_maxGenerations;
1293    m_starting = null;
1294    m_startRange = new Range();
1295    m_seed = 1;
1296  }
1297 
1298  /**
1299   * Returns the revision string.
1300   *
1301   * @return            the revision
1302   */
1303  public String getRevision() {
1304    return RevisionUtils.extract("$Revision: 5286 $");
1305  }
1306}
Note: See TracBrowser for help on using the repository browser.