source: src/main/java/weka/attributeSelection/ScatterSearchV1.java @ 5

Last change on this file since 5 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 36.4 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    ScatterSearchV1.java
19 *    Copyright (C) 2008 Adrian Pino
20 *    Copyright (C) 2008 University of Waikato, Hamilton, NZ
21 *
22 */
23
24package weka.attributeSelection;
25
26
27import java.io.Serializable;
28import java.util.ArrayList;
29import java.util.BitSet;
30import java.util.Enumeration;
31import java.util.List;
32import java.util.Random;
33import java.util.Vector;
34
35import weka.core.Instances;
36import weka.core.Option;
37import weka.core.OptionHandler;
38import weka.core.RevisionUtils;
39import weka.core.SelectedTag;
40import weka.core.Tag;
41import weka.core.TechnicalInformation;
42import weka.core.TechnicalInformationHandler;
43import weka.core.Utils;
44import weka.core.TechnicalInformation.Field;
45import weka.core.TechnicalInformation.Type;
46
47
48/**
49 * Class for performing the Sequential Scatter Search. <p>
50 *
51 <!-- globalinfo-start -->
52 * Scatter Search :<br/>
53 * <br/>
54 * Performs an Scatter Search  through the space of attribute subsets. Start with a population of many significants and diverses subset  stops when the result is higher than a given treshold or there's not more improvement<br/>
55 * For more information see:<br/>
56 * <br/>
57 * Felix Garcia Lopez (2004). Solving feature subset selection problem by a Parallel Scatter Search. Elsevier.
58 * <p/>
59 <!-- globalinfo-end -->
60 *
61 <!-- options-start -->
62 * Valid options are: <p/>
63 *
64 * <pre> -Z &lt;num&gt;
65 *  Specify the number of subsets to generate
66 *  in the initial population..</pre>
67 *
68 * <pre> -T &lt;threshold&gt;
69 *  Specify the treshold used for considering when a subset is significant.</pre>
70 *
71 * <pre> -R &lt;0 = greedy combination | 1 = reduced greedy combination &gt;
72 *  Specify the kind of combiantion
73 *  for using it in the combination method.</pre>
74 *
75 * <pre> -S &lt;seed&gt;
76 *  Set the random number seed.
77 *  (default = 1)</pre>
78 *
79 * <pre> -D
80 *  Verbose output for monitoring the search.</pre>
81 *
82 <!-- options-end -->
83 *
84 <!-- technical-bibtex-start -->
85 * BibTeX:
86 * <pre>
87 * &#64;book{Lopez2004,
88 *    author = {Felix Garcia Lopez},
89 *    month = {October},
90 *    publisher = {Elsevier},
91 *    title = {Solving feature subset selection problem by a Parallel Scatter Search},
92 *    year = {2004},
93 *    language = {English}
94 * }
95 * </pre>
96 * <p/>
97 <!-- technical-bibtex-end -->
98 *
99 * from the Book: Solving feature subset selection problem by a Parallel Scatter Search, Felix Garcia Lopez.
100 * @author Adrian Pino (apinoa@facinf.uho.edu.cu)
101 * @version $Revision: 4977 $
102 *
103 */
104
105public class ScatterSearchV1 extends ASSearch
106  implements OptionHandler, TechnicalInformationHandler {
107
108  /** for serialization */
109  static final long serialVersionUID = -8512041420388121326L;
110
111  /** number of attributes in the data */
112  private int m_numAttribs;
113
114  /** holds the class index */
115  private int m_classIndex;
116
117  /** holds the treshhold that delimits the good attributes */
118  private double m_treshold;
119
120  /** the initial threshold */
121  private double m_initialThreshold;
122
123  /** the kind of comination betwen parents ((0)greedy combination/(1)reduced greedy combination)*/
124  int m_typeOfCombination;
125
126  /** random number generation */
127  private Random m_random;
128
129  /** seed for random number generation */
130  private int m_seed;
131
132  /** verbose output for monitoring the search and debugging */
133  private boolean m_debug = false;
134
135  /** holds a report of the search */
136  private StringBuffer m_InformationReports;
137
138  /** total number of subsets evaluated during a search */
139  private int m_totalEvals;
140
141  /** holds the merit of the best subset found */
142  protected double m_bestMerit;
143
144  /** time for procesing the search method */
145  private long m_processinTime;
146
147  /** holds the Initial Population of Subsets*/
148  private List<Subset> m_population;
149
150  /** holds the population size*/
151  private int m_popSize;
152
153  /** holds the user selected initial population size */
154  private int m_initialPopSize;
155
156  /** if no initial user pop size, then this holds the initial
157   * pop size calculated from the number of attributes in the data
158   * (for use in the toString() method)
159   */
160  private int m_calculatedInitialPopSize;
161
162  /** holds the subsets most significants and diverses
163   * of the population (ReferenceSet).
164   *
165   * (transient because the subList() method returns
166   * a non serializable Object).
167   */
168  private transient List<Subset> m_ReferenceSet;
169
170  /** holds the greedy combination(reduced or not) of all the subsets of the ReferenceSet*/
171  private transient List<Subset> m_parentsCombination;
172
173  /**holds the attributes ranked*/
174  private List<Subset> m_attributeRanking;
175
176  /**Evaluator used to know the significance of a subset (for guiding the search)*/
177  private SubsetEvaluator ASEvaluator =null;
178
179
180  /** kind of combination */
181  protected static final int COMBINATION_NOT_REDUCED = 0;
182  protected static final int COMBINATION_REDUCED = 1;  ;
183  public static final Tag [] TAGS_SELECTION = {
184    new Tag(COMBINATION_NOT_REDUCED, "Greedy Combination"),
185    new Tag(COMBINATION_REDUCED, "Reduced Greedy Combination")
186  };
187
188  /**
189   * Returns a string describing this search method
190   * @return a description of the search suitable for
191   * displaying in the explorer/experimenter gui
192   */
193  public String globalInfo() {
194    return "Scatter Search :\n\nPerforms an Scatter Search  "
195      +"through "
196      +"the space of attribute subsets. Start with a population of many significants and diverses subset "
197      +" stops when the result is higher than a given treshold or there's not more improvement\n"
198      + "For more information see:\n\n"
199      + getTechnicalInformation().toString();
200  }
201
202  /**
203   * Returns an instance of a TechnicalInformation object, containing
204   * detailed information about the technical background of this class,
205   * e.g., paper reference or book this class is based on.
206   *
207   * @return the technical information about this class
208   */
209  public TechnicalInformation getTechnicalInformation() {
210    TechnicalInformation        result;
211
212    result = new TechnicalInformation(Type.BOOK);
213    result.setValue(Field.AUTHOR, "Felix Garcia Lopez");
214    result.setValue(Field.MONTH, "October");
215    result.setValue(Field.YEAR, "2004");
216    result.setValue(Field.TITLE, "Solving feature subset selection problem by a Parallel Scatter Search");
217    result.setValue(Field.PUBLISHER, "Elsevier");
218    result.setValue(Field.LANGUAGE, "English");
219
220    return result;
221  }
222
223  /**
224   * Returns the revision string.
225   *
226   * @return the revision
227   */
228  public String getRevision() {
229    return RevisionUtils.extract("$Revision: 1.0$");
230  }
231
232  public ScatterSearchV1 () {
233    resetOptions();
234  }
235
236  /**
237   * Returns the tip text for this property
238   * @return tip text for this property suitable for
239   * displaying in the explorer/experimenter gui
240   */
241  public String tresholdTipText() {
242    return "Set the treshold that subsets most overcome to be considered as significants";
243  }
244
245  /**
246   * Set the treshold
247   *
248   * @param treshold for identifyng significant subsets
249   */
250  public void setTreshold (double treshold) {
251    m_initialThreshold = treshold;
252  }
253
254  /**
255   * Get the treshold
256   *
257   * @return the treshold that subsets most overcome to be considered as significants
258   */
259  public double getTreshold () {
260    return m_initialThreshold;
261  }
262
263
264  /**
265   * Returns the tip text for this property
266   * @return tip text for this property suitable for
267   * displaying in the explorer/experimenter gui
268   */
269  public String populationSizeTipText() {
270    return "Set the number of subset to generate in the initial Population";
271  }
272
273  /**
274   * Set the population size
275   *
276   * @param size the number of subset in the initial population
277   */
278  public void setPopulationSize (int size) {
279    m_initialPopSize = size;
280  }
281
282  /**
283   * Get the population size
284   *
285   * @return the number of subsets to generate in the initial population
286   */
287  public int getPopulationSize () {
288    return m_initialPopSize;
289  }
290
291
292  /**
293   * Returns the tip text for this property
294   * @return tip text for this property suitable for
295   * displaying in the explorer/experimenter gui
296   */
297  public String combinationTipText() {
298    return "Set the kind of combination for using it to combine ReferenceSet subsets.";
299  }
300
301  /**
302   * Set the kind of combination
303   *
304   * @param c the kind of combination of the search
305   */
306  public void setCombination (SelectedTag c) {
307    if (c.getTags() == TAGS_SELECTION) {
308      m_typeOfCombination = c.getSelectedTag().getID();
309    }
310  }
311
312  /**
313   * Get the combination
314   *
315   * @return the kind of combination used in the Combination method
316   */
317  public SelectedTag getCombination () {
318    return new SelectedTag(m_typeOfCombination, TAGS_SELECTION);
319  }
320
321  /**
322   * Returns the tip text for this property
323   * @return tip text for this property suitable for
324   * displaying in the explorer/experimenter gui
325   */
326  public String seedTipText() {
327    return "Set the random seed.";
328  }
329
330  /**
331   * set the seed for random number generation
332   * @param s seed value
333   */
334  public void setSeed(int s) {
335    m_seed = s;
336  }
337
338  /**
339   * get the value of the random number generator's seed
340   * @return the seed for random number generation
341   */
342  public int getSeed() {
343    return m_seed;
344  }
345
346  /**
347   * Returns the tip text for this property
348   * @return tip text for this property suitable for
349   * displaying in the explorer/experimenter gui
350   */
351  public String debugTipText() {
352    return "Turn on verbose output for monitoring the search's progress.";
353  }
354
355  /**
356   * Set whether verbose output should be generated.
357   * @param d true if output is to be verbose.
358   */
359  public void setDebug(boolean d) {
360    m_debug = d;
361  }
362
363  /**
364   * Get whether output is to be verbose
365   * @return true if output will be verbose
366   */
367  public boolean getDebug() {
368    return m_debug;
369  }
370
371  /**
372   * Returns an enumeration describing the available options.
373   * @return an enumeration of all the available options.
374   **/
375  public Enumeration listOptions () {
376    Vector newVector = new Vector(6);
377
378    newVector.addElement(new Option("\tSpecify the number of subsets to generate "
379                                    + "\n\tin the initial population.."
380                                    ,"Z",1
381                                    , "-Z <num>"));
382    newVector.addElement(new Option("\tSpecify the treshold used for considering when a subset is significant."
383                                    , "T", 1
384                                    , "-T <threshold>"));
385    newVector.addElement(new Option("\tSpecify the kind of combiantion "
386                                    + "\n\tfor using it in the combination method."
387                                    , "R", 1, "-R <0 = greedy combination | 1 = reduced greedy combination >"));
388          newVector.addElement(new Option("\tSet the random number seed."
389                                    +"\n\t(default = 1)"
390                                    , "S", 1, "-S <seed>"));
391    newVector.addElement(new Option("\tVerbose output for monitoring the search.","D",0,"-D"));
392
393    return  newVector.elements();
394  }
395
396  /**
397   * Parses a given list of options.
398   *
399   <!-- options-start -->
400   * Valid options are: <p>
401   *
402   * -Z <br>
403   * Specify the number of subsets to generate in the initial population.<p>
404   *
405   * -T <start set> <br>
406   * Specify the treshold used for considering when a subset is significant. <p>
407   *
408   * -R <br>
409   * Specify the kind of combiantion. <p>
410   *
411   * -S <br>
412   *  Set the random number seed.
413   *  (default = 1)
414   *
415   * -D <br>
416   *  Verbose output for monitoring the search
417   *  (default = false)
418   * 
419   <!-- options-end -->
420   *
421   * @param options the list of options as an array of strings
422   * @exception Exception if an option is not supported
423   *
424   **/
425  public void setOptions (String[] options)
426    throws Exception {
427    String optionString;
428    resetOptions();
429
430    optionString = Utils.getOption('Z', options);
431    if (optionString.length() != 0) {
432      setPopulationSize(Integer.parseInt(optionString));
433    }
434
435    optionString = Utils.getOption('T', options);
436    if (optionString.length() != 0) {
437      setTreshold(Double.parseDouble(optionString));
438    }
439
440    optionString = Utils.getOption('R', options);
441    if (optionString.length() != 0) {
442      setCombination(new SelectedTag(Integer.parseInt(optionString),
443                                   TAGS_SELECTION));
444    } else {
445      setCombination(new SelectedTag(COMBINATION_NOT_REDUCED, TAGS_SELECTION));
446    }
447
448    optionString = Utils.getOption('S', options);
449    if (optionString.length() != 0) {
450      setSeed(Integer.parseInt(optionString));
451    }
452
453    setDebug(Utils.getFlag('D', options));
454  }
455
456  /**
457   * Gets the current settings of ScatterSearchV1.
458   *
459   * @return an array of strings suitable for passing to setOptions()
460   */
461  public String[] getOptions () {
462    String[] options = new String[9];
463    int current = 0;
464
465    options[current++] = "-T";
466    options[current++] = "" + getTreshold ();
467
468    options[current++] = "-Z";
469    options[current++] = ""+getPopulationSize ();
470
471    options[current++] = "-R";
472    options[current++] = ""+String.valueOf (getCombination ().getSelectedTag ().getID ());
473
474    options[current++] = "-S";
475    options[current++] = "" + getSeed();
476
477    if (getDebug())
478      options[current++] = "-D";
479
480    while (current < options.length)
481      options[current++] = "";
482
483    return  options;
484  }
485
486  /**
487   * returns a description of the search.
488   * @return a description of the search as a String.
489   */
490  public String toString() {
491    StringBuffer FString = new StringBuffer();
492    FString.append("\tScatter Search "
493                   + "\n\tInit Population: "+m_calculatedInitialPopSize);
494
495    FString.append("\n\tKind of Combination: "
496                   +getCombination ().getSelectedTag ().getReadable ());
497
498                FString.append("\n\tRandom number seed: "+m_seed);
499
500                FString.append("\n\tDebug: "+m_debug);
501
502    FString.append("\n\tTreshold: "
503                   +Utils.doubleToString(Math.abs(getTreshold ()),8,3)+"\n");
504
505    FString.append("\tTotal number of subsets evaluated: "
506                    + m_totalEvals + "\n");
507
508    FString.append("\tMerit of best subset found: "
509                    +Utils.doubleToString(Math.abs(m_bestMerit),8,3)+"\n");
510
511    /* FString.append("\tTime procesing the search space: "
512                    +(double)m_processinTime/1000+" seconds\n"); */
513
514    if(m_debug)
515      return FString.toString()+"\n\n"+m_InformationReports.toString ();
516
517    return FString.toString();
518  }
519
520
521  /**
522   * Searches the attribute subset space using Scatter Search.
523   *
524   * @param ASEval the attribute evaluator to guide the search
525   * @param data the training instances.
526   * @return an array of selected attribute indexes
527   * @exception Exception if the search can't be completed
528   */
529  public int[] search(ASEvaluation ASEval, Instances data)
530    throws Exception{
531
532    m_totalEvals = 0;
533    m_popSize = m_initialPopSize;
534    m_calculatedInitialPopSize = m_initialPopSize;
535    m_treshold = m_initialThreshold;
536    m_processinTime =System.currentTimeMillis ();
537    m_InformationReports = new StringBuffer();
538
539    m_numAttribs =data.numAttributes ();
540    m_classIndex =data.classIndex ();
541
542    if(m_popSize<=0) {
543      m_popSize =m_numAttribs/2;
544      m_calculatedInitialPopSize = m_popSize;
545    }
546
547    ASEvaluator =(SubsetEvaluator)ASEval;
548
549    if(!(m_treshold >= 0)){
550      m_treshold =calculateTreshhold();
551      m_totalEvals++;
552    }
553
554    m_random = new Random(m_seed);
555
556    m_attributeRanking =RankEachAttribute();
557
558    CreatePopulation(m_popSize);
559
560    int bestSolutions =m_popSize/4;
561    int divSolutions =m_popSize/4;
562
563    if(m_popSize < 4){
564
565      bestSolutions = m_popSize/2;
566      divSolutions = m_popSize/2;
567
568      if(m_popSize == 1) return attributeList(((Subset)m_population.get (0)).subset);
569    }
570
571
572    m_ReferenceSet =new ArrayList<Subset>();
573
574    for (int i = 0; i<m_population.size (); i++) {
575      m_ReferenceSet.add (m_population.get (i)) ;
576    }
577
578
579    GenerateReferenceSet(m_ReferenceSet, bestSolutions, divSolutions);
580
581
582    m_InformationReports.append ("Population: "+m_population.size ()+"\n");
583    m_InformationReports.append ("merit    \tsubset\n");
584
585    for (int i = 0; i < m_population.size (); i++)
586        m_InformationReports.append (printSubset (m_population.get (i)));
587
588
589    m_ReferenceSet =m_ReferenceSet.subList (0,bestSolutions+divSolutions);
590
591
592    /*TEST*/
593    m_InformationReports.append ("\nReferenceSet:");
594    m_InformationReports.append ("\n----------------Most Significants Solutions--------------\n");
595    for (int i = 0; i<m_ReferenceSet.size (); i++) {
596      if(i ==bestSolutions) m_InformationReports.append ("----------------Most Diverses Solutions--------------\n");
597      m_InformationReports.append(printSubset (m_ReferenceSet.get (i)));
598    }
599
600
601    Subset bestTemp =new Subset(new BitSet(m_numAttribs),0);
602
603    while (!(bestTemp.isEqual (m_ReferenceSet.get (0))) /*|| (m_treshold > bestTemp.merit)*/) {
604          //while(){
605              CombineParents();
606                    ImproveSolutions();
607   // }
608        bestTemp =m_ReferenceSet.get (0);
609
610        int numBest =m_ReferenceSet.size ()/2;
611        int numDiverses =m_ReferenceSet.size ()/2;
612
613              UpdateReferenceSet(numBest, numDiverses);
614              m_ReferenceSet = m_ReferenceSet.subList (0,numBest+numDiverses);
615
616    }
617
618    m_InformationReports.append("\nLast Reference Set Updated:\n");
619    m_InformationReports.append ("merit    \tsubset\n");
620
621    for (int i = 0; i <m_ReferenceSet.size (); i++)
622        m_InformationReports.append (printSubset (m_ReferenceSet.get (i)));
623
624
625          m_bestMerit =bestTemp.merit;
626
627          m_processinTime =System.currentTimeMillis () -m_processinTime;
628
629    return attributeList (bestTemp.subset);
630  }
631
632  /**
633   * Generate the a ReferenceSet containing the n best solutions and the m most diverse solutions of
634   * the initial Population.
635   *
636   * @param ReferenceSet the ReferenceSet for storing these solutions
637   * @param bestSolutions the number of the most pure solutions.
638   * @param divSolutions the number of the most diverses solutions acording to the bestSolutions.
639   */
640  public void GenerateReferenceSet(List<Subset> ReferenceSet, int bestSolutions, int divSolutions){
641
642    //Sorting the Initial ReferenceSet
643    ReferenceSet =bubbleSubsetSort (ReferenceSet);
644
645    // storing all the attributes that are now in the ReferenceSet (just till bestSolutions)
646    BitSet allBits_RefSet =getAllBits (ReferenceSet.subList (0,bestSolutions));
647
648    // for stopping when ReferenceSet.size () ==bestSolutions+divSolutions
649    int refSetlength =bestSolutions;
650    int total =bestSolutions+divSolutions;
651
652    while (refSetlength <total) {
653
654      List<Integer> aux =new ArrayList<Integer>();
655
656      for (int i =refSetlength; i <ReferenceSet.size (); i ++) {
657        aux.add (SimetricDiference (((Subset)ReferenceSet.get (i)).clone (),allBits_RefSet));
658          }
659
660
661          int mostDiv =getIndexofBiggest(aux);
662          ReferenceSet.set(refSetlength, ReferenceSet.get (refSetlength+mostDiv));
663          //ReferenceSet.remove (refSetlength +mostDiv);
664
665          refSetlength++;
666
667          allBits_RefSet =getAllBits (ReferenceSet.subList (0,refSetlength));
668        }
669
670          ReferenceSet =filterSubset (ReferenceSet,refSetlength);
671  }
672
673  /**
674   * Update the ReferenceSet putting the new obtained Solutions there
675   *
676   * @param numBestSolutions the number of the most pure solutions.
677   * @param numDivsSolutions the number of the most diverses solutions acording to the bestSolutions.
678  */
679  public void UpdateReferenceSet(int numBestSolutions, int numDivsSolutions){
680
681    for (int i = 0; i <m_parentsCombination.size (); i++) m_ReferenceSet.add (i, m_parentsCombination.get (i));
682
683      GenerateReferenceSet (m_ReferenceSet,numBestSolutions,numDivsSolutions);
684  }
685
686  /**
687   * Improve the solutions previously combined by adding the attributes that improve that solution
688   * @exception Exception if there is some trouble evaluating the candidate solutions
689   */
690  public void ImproveSolutions()
691    throws Exception{
692
693    for (int i = 0; i<m_parentsCombination.size (); i++) {
694
695      BitSet aux1 =(BitSet)((Subset)m_parentsCombination.get (i)).subset.clone ();
696      List<Subset> ranking =new ArrayList<Subset>();
697
698      /*
699      for(int j=aux1.nextClearBit (0); j<=m_numAttribs; j=aux1.nextClearBit(j+1)){
700        if(j ==m_classIndex)continue;
701
702        BitSet aux2 =new BitSet(m_numAttribs);
703        aux2.set (j);
704
705        double merit =ASEvaluator.evaluateSubset (aux2);
706        m_totalEvals++;
707
708        ranking.add (new Subset((BitSet)aux2.clone (), merit));
709      }
710
711      ranking =bubbleSubsetSort (ranking);
712      */
713
714      for (int k =0; k <m_attributeRanking.size (); k ++) {
715        Subset s1 =((Subset)m_attributeRanking.get (k)).clone ();
716        BitSet b1 =(BitSet)s1.subset.clone ();
717
718        Subset s2 =((Subset)m_parentsCombination.get (i)).clone ();
719        BitSet b2 =(BitSet)s2.subset.clone ();
720
721        if(b2.get (b1.nextSetBit (0))) continue;
722
723        b2.or (b1);
724        double newMerit =ASEvaluator.evaluateSubset (b2);
725        m_totalEvals++;
726
727        if(newMerit <= s2.merit)break;
728
729        m_parentsCombination.set (i,new Subset(b2,newMerit));
730            }
731
732      filterSubset (m_parentsCombination,m_ReferenceSet.size());
733    }
734  }
735
736   /**
737   * Combine all the posible pair solutions existing in the Population
738   *
739   * @exception Exception if there is some trouble evaluating the new childs
740   */
741  public void CombineParents()
742    throws Exception{
743
744    m_parentsCombination =new ArrayList<Subset>();
745
746    // this two 'for' are for selecting parents in the refSet
747        for (int i= 0; i <m_ReferenceSet.size ()-1; i ++) {
748          for (int j= i+1; j <m_ReferenceSet.size (); j ++) {
749
750            // Selecting parents
751            Subset parent1 =m_ReferenceSet.get (i);
752            Subset parent2 =m_ReferenceSet.get (j);
753
754            // Initializing childs Intersecting parents
755            Subset child1 = intersectSubsets (parent1, parent2);
756            Subset child2 =child1.clone ();
757
758            // Initializing childs Intersecting parents
759            Subset simDif =simetricDif (parent1, parent2, getCombination ().getSelectedTag ().getID ());
760
761            BitSet aux =(BitSet)simDif.subset.clone ();
762
763            boolean improvement =true;
764
765            while (improvement) {
766
767              Subset best1 =getBestgen (child1,aux);
768              Subset best2 =getBestgen (child2,aux);
769
770              if(best1 !=null || best2!=null){
771
772                if(best2 ==null){
773                  child1 =best1.clone ();
774                  continue;
775                }
776                if(best1 ==null){
777                  child2 =best2.clone ();
778                  continue;
779                }
780                if(best1 !=null && best2 !=null){
781                  double merit1 =best1.merit;
782                  double merit2 =best2.merit;
783
784                  if(merit1 >merit2){
785                        child1 =best1.clone ();
786                        continue;
787                  }
788                  if(merit1 <merit2){
789                        child2 =best2.clone ();
790                        continue;
791                  }
792                  if(merit1 ==merit2){
793                        if(best1.subset.cardinality () > best2.subset.cardinality ()){
794                          child2 =best2.clone ();
795                          continue;
796                        }
797                        if(best1.subset.cardinality () < best2.subset.cardinality ()){
798                          child1 =best1.clone ();
799                          continue;
800                        }
801                        if(best1.subset.cardinality () == best2.subset.cardinality ()){
802                          double random = m_random.nextDouble ();
803                          if(random < 0.5)child1 =best1.clone ();
804                          else child2 =best2.clone ();
805                          continue;
806                    }
807                  }
808                }
809
810              }else{
811                m_parentsCombination.add (child1);
812                m_parentsCombination.add (child2);
813                improvement =false;
814              }
815            }
816          }
817    }
818    m_parentsCombination = filterSubset (m_parentsCombination,m_ReferenceSet.size());
819
820    GenerateReferenceSet (m_parentsCombination,m_ReferenceSet.size ()/2, m_ReferenceSet.size ()/2);
821    m_parentsCombination = m_parentsCombination.subList (0, m_ReferenceSet.size ());
822
823  }
824  /**
825   * Create the initial Population
826   *
827   * @param popSize the size of the initial population
828   * @exception Exception if there is a trouble evaluating any solution
829   */
830  public void CreatePopulation(int popSize)
831    throws Exception{
832
833    InitPopulation(popSize);
834
835        /** Delimit the best attributes from the worst*/
836        int segmentation =m_numAttribs/2;
837
838        /*TEST*/
839  /*  System.out.println ("AttributeRanking");
840    for (int i = 0; i <attributeRanking.size (); i++){
841      if(i ==segmentation)System.out.println ("-------------------------SEGMENTATION------------------------");
842      printSubset (attributeRanking.get (i));
843    }
844    */
845        for (int i = 0; i<m_popSize; i++) {
846
847          List<Subset> attributeRankingCopy = new ArrayList<Subset>();
848          for (int j = 0; j<m_attributeRanking.size (); j++) attributeRankingCopy.add (m_attributeRanking.get (j));
849
850
851          double last_evaluation =-999;
852          double current_evaluation =0;
853
854          boolean doneAnew =true;
855
856          while (true) {
857
858            // generate a random number in the interval[0..segmentation]
859                int random_number = m_random.nextInt (segmentation+1) /*generateRandomNumber (segmentation)*/;
860
861                if(doneAnew && i <=segmentation)random_number =i;
862                doneAnew =false;
863
864                Subset s1 =((Subset)attributeRankingCopy.get (random_number)).clone ();
865                Subset s2 =((Subset)m_population.get (i)).clone ();
866
867
868                // trying to add a new gen in the chromosome i of the population
869                Subset joiners =joinSubsets (s1, s2 );
870
871                current_evaluation =joiners.merit;
872
873                if(current_evaluation > last_evaluation){
874                  m_population.set (i,joiners);
875                  last_evaluation =current_evaluation;
876
877                  try {
878                attributeRankingCopy.set (random_number, attributeRankingCopy.get (segmentation+1));
879                    attributeRankingCopy.remove (segmentation+1);
880          }catch (IndexOutOfBoundsException ex) {
881            attributeRankingCopy.set (random_number,new Subset(new BitSet(m_numAttribs),0));
882          continue;
883           }
884                }
885                else{
886                // there's not more improvement
887                break;
888                }
889
890
891          }
892        }
893
894        //m_population =bubbleSubsetSort (m_population);
895  }
896
897
898    /**
899   * Rank all the attributes individually acording to their merits
900   *
901   * @return an ordered List  of Subsets with just one attribute
902   * @exception Exception if the evaluation can not be completed
903   */
904  public List<Subset> RankEachAttribute()
905    throws Exception{
906
907    List<Subset> result =new ArrayList<Subset>();
908
909    for (int i = 0; i<m_numAttribs; i++) {
910      if(i==m_classIndex)continue;
911
912          BitSet an_Attribute =new BitSet(m_numAttribs);
913          an_Attribute.set (i);
914
915          double merit =ASEvaluator.evaluateSubset (an_Attribute);
916      m_totalEvals++;
917
918          result.add (new Subset(an_Attribute, merit));
919        }
920
921        return bubbleSubsetSort(result);
922  }
923
924
925    //..........
926
927
928    /**
929   * Evaluate each gen of a BitSet inserted in a Subset and get the most significant for that Subset
930   *
931   * @return a new Subset with the union of subset and the best gen of gens.
932   *  in case that there's not improvement with each gen return null
933   * @exception Exception if the evaluation of can not be completed
934   */
935  public Subset getBestgen(Subset subset, BitSet gens)
936    throws Exception{
937    Subset result =null;
938
939    double merit1 =subset.merit;
940
941    for(int i =gens.nextSetBit(0); i >=0; i =gens.nextSetBit(i+1)){
942      BitSet aux =(BitSet)subset.subset.clone ();
943
944          if(aux.get (i))continue;
945          aux.set (i);
946
947          double merit2 =ASEvaluator.evaluateSubset (aux);
948          m_totalEvals++;
949
950          if(merit2 >merit1){
951            merit1 =merit2;
952            result =new Subset(aux,merit1);
953          }
954        }
955
956        return result;
957  }
958
959  /**
960   * Sort a List of subsets according to their merits
961   *
962   * @param subsetList the subsetList to be ordered
963   * @return a List with ordered subsets
964   */
965  public List<Subset> bubbleSubsetSort(List<Subset> subsetList){
966    List<Subset> result =new ArrayList<Subset>();
967
968    for (int i = 0; i<subsetList.size ()-1; i++) {
969      Subset subset1 =subsetList.get (i);
970      double merit1 =subset1.merit;
971
972      for (int j = i+1; j<subsetList.size (); j++) {
973        Subset subset2 =subsetList.get (j);
974        double merit2 =subset2.merit;
975
976        if(merit2 > merit1){
977          Subset temp =subset1;
978
979          subsetList.set (i,subset2);
980          subsetList.set (j,temp);
981
982          subset1 =subset2;
983          merit1 =subset1.merit;
984        }
985            }
986    }
987          return subsetList;
988  }
989
990
991  /**
992   * get the index in a List where this have the biggest number
993   *
994   * @param simDif the Lists of numbers for getting from them the index of the bigger
995   * @return an index that represents where the bigest number is.
996   */
997  public int getIndexofBiggest(List<Integer> simDif){
998    int aux =-99999;
999    int result1 =-1;
1000    List<Integer> equalSimDif =new ArrayList<Integer>();
1001
1002    if(simDif.size ()==0) return -1;
1003
1004    for (int i = 0; i<simDif.size (); i++) {
1005      if(simDif.get (i) >aux){
1006        aux =simDif.get (i);
1007        result1 =i;
1008      }
1009        }
1010
1011        for (int i =0; i <simDif.size (); i++) {
1012          if(simDif.get (i) ==aux){
1013        equalSimDif.add (i);
1014    }
1015        }
1016
1017        int finalResult =equalSimDif.get (m_random.nextInt (equalSimDif.size ()) /*generateRandomNumber (equalSimDif.size ()-1)*/);
1018
1019        return finalResult;
1020  }
1021
1022   /**
1023   * Save in Bitset all the gens that are in many others subsets.
1024   *
1025   * @param subsets the Lists of subsets for getting from them all their gens
1026   * @return a Bitset with all the gens contained in many others subsets.
1027   */
1028  public BitSet getAllBits(List<Subset> subsets){
1029    BitSet result =new BitSet(m_numAttribs);
1030
1031    for (int i =0; i <subsets.size (); i ++) {
1032      BitSet aux =((Subset)subsets.get (i)).clone ().subset;
1033
1034      for(int j=aux.nextSetBit(0); j>=0; j=aux.nextSetBit(j+1)) {
1035        result.set (j);
1036            }
1037          }
1038
1039        return result;
1040  }
1041
1042   /**
1043   * Creating space for introducing the population
1044   *
1045   * @param popSize the number of subset in the initial population
1046   */
1047  public void InitPopulation(int popSize){
1048    m_population =new ArrayList<Subset>();
1049    for (int i = 0; i<popSize; i++)m_population.add (new Subset(new BitSet(m_numAttribs),0));
1050  }
1051
1052   /**
1053   * Join two subsets
1054   *
1055   * @param subset1 one of the subsets
1056   * @param subset2 the other subset
1057   * @return a new Subset that is te result of the Join
1058   * @exception Exception if the evaluation of the subsets can not be completed
1059   */
1060  public Subset joinSubsets(Subset subset1, Subset subset2)
1061    throws Exception{
1062    BitSet b1 =(BitSet)subset1.subset.clone ();
1063    BitSet b2 =(BitSet)subset2.subset.clone ();
1064
1065    b1.or (b2);
1066
1067    double newMerit =ASEvaluator.evaluateSubset (b1);
1068    m_totalEvals++;
1069
1070    return new Subset((BitSet)b1.clone (), newMerit);
1071    }
1072
1073  /**
1074   * Intersects two subsets
1075   *
1076   * @param subset1 one of the subsets
1077   * @param subset2 the other subset
1078   * @return a new Subset that is te result of the intersection
1079   * @exception Exception if the evaluation of the subsets can not be completed
1080   */
1081  public Subset intersectSubsets(Subset subset1, Subset subset2)
1082    throws Exception{
1083    BitSet b1 =(BitSet)subset1.subset.clone ();
1084    BitSet b2 =(BitSet)subset2.subset.clone ();
1085
1086    b1.and (b2);
1087
1088    double newMerit =ASEvaluator.evaluateSubset (b1);
1089    m_totalEvals++;
1090
1091    return new Subset((BitSet)b1.clone (), newMerit);
1092  }
1093
1094  public Subset simetricDif(Subset subset1, Subset subset2, int mode)
1095    throws Exception{
1096    BitSet b1 =(BitSet)subset1.subset.clone ();
1097    BitSet b2 =(BitSet)subset2.subset.clone ();
1098
1099    b1.xor (b2);
1100
1101    double newMerit =ASEvaluator.evaluateSubset (b1);
1102    m_totalEvals++;
1103
1104    Subset result =new Subset((BitSet)b1.clone (), newMerit);
1105
1106    if(mode == COMBINATION_REDUCED){
1107
1108      double avgAcurracy =0;
1109      int totalSolutions =0;
1110      List<Subset> weightVector =new ArrayList<Subset>();
1111
1112      BitSet res =result.subset;
1113      for(int i=res.nextSetBit(0); i>=0; i=res.nextSetBit(i+1)){
1114
1115        double merits =0;
1116        int numSolutions =0;
1117        Subset solution =null;
1118
1119        for (int j = 0; j <m_ReferenceSet.size (); j ++) {
1120          solution =(Subset)m_ReferenceSet.get (j);
1121          if(solution.subset.get (i)){
1122            merits +=solution.merit;
1123            numSolutions ++;
1124          }
1125            }
1126            BitSet b =new BitSet(m_numAttribs);
1127            b.set (i);
1128            Subset s =new Subset(b, merits/(double)numSolutions);
1129            weightVector.add (s);
1130
1131            avgAcurracy +=merits;
1132        totalSolutions ++;
1133
1134      }
1135      avgAcurracy =avgAcurracy/(double)totalSolutions;
1136
1137      BitSet newResult =new BitSet(m_numAttribs);
1138      for (int i = 0; i<weightVector.size (); i++) {
1139        Subset aux =weightVector.get (i);
1140        if(aux.merit >=avgAcurracy){
1141          newResult.or (aux.subset);
1142        }
1143          }
1144          double merit =ASEvaluator.evaluateSubset (newResult);
1145          result =new Subset(newResult, merit);
1146
1147    }
1148
1149    return result;
1150  }
1151
1152  public int generateRandomNumber(int limit){
1153
1154    return (int)Math.round (Math.random ()*(limit+0.4));
1155  }
1156
1157
1158
1159    /**
1160   * Calculate the treshold of a dataSet given an evaluator
1161   *
1162   * @return the treshhold of the dataSet
1163   * @exception Exception if the calculation can not be completed
1164   */
1165  public double calculateTreshhold()
1166    throws Exception{
1167    BitSet fullSet =new BitSet(m_numAttribs);
1168
1169    for (int i= 0; i< m_numAttribs; i++) {
1170      if(i ==m_classIndex)continue;
1171        fullSet.set (i);
1172        }
1173
1174        return ASEvaluator.evaluateSubset (fullSet);
1175  }
1176
1177    /**
1178   * Calculate the Simetric Diference of two subsets
1179   *
1180   * @return the Simetric Diference
1181   * @exception Exception if the calculation can not be completed
1182   */
1183    public int SimetricDiference(Subset subset, BitSet bitset){
1184      BitSet aux =subset.clone ().subset;
1185      aux.xor (bitset);
1186
1187      return aux.cardinality ();
1188    }
1189
1190    /**
1191   * Filter a given Lis of Subsets removing the equals subsets
1192   * @param subsetList to filter
1193   * @param preferredSize the preferred size of the new List (if it is -1, then the filter is make it
1194   *                      for all subsets, else then the filter method stops when the given preferred
1195   *                      size is reached or all the subset have been filtered).
1196   * @return a new List filtered
1197   * @exception Exception if the calculation can not be completed
1198   */
1199    public List<Subset> filterSubset(List<Subset> subsetList, int preferredSize){
1200      if(subsetList.size () <=preferredSize && preferredSize !=-1)return subsetList;
1201
1202      for (int i =0; i <subsetList.size ()-1; i ++) {
1203        for (int j =i+1; j <subsetList.size (); j ++) {
1204          Subset focus =subsetList.get (i);
1205          if(focus.isEqual (subsetList.get (j))){
1206            subsetList.remove (j);
1207            j--;
1208
1209            if(subsetList.size () <=preferredSize && preferredSize !=-1)return subsetList;
1210          }
1211                }
1212          }
1213          return subsetList;
1214    }
1215    //..........
1216
1217
1218// Test Methods
1219
1220  public String printSubset(Subset subset){
1221    StringBuffer bufferString = new StringBuffer();
1222
1223    if(subset == null){
1224      //System.out.println ("null");
1225      return "";
1226    }
1227
1228    BitSet bits =subset.subset;
1229    double merit =subset.merit;
1230    List<Integer> indexes =new ArrayList<Integer>();
1231
1232    for (int i = 0; i<m_numAttribs; i++) {
1233      if(bits.get (i)){
1234        //System.out.print ("1");
1235        indexes.add (i+1);
1236      }
1237      //else System.out.print ("0");
1238    }
1239    bufferString.append (Utils.doubleToString (merit,8,5)+"\t "+indexes.toString ()+"\n");
1240    //System.out.print (" with a merit of: "+merit);
1241
1242    return bufferString.toString ();
1243  }
1244
1245    //........
1246
1247  protected void resetOptions () {
1248        m_popSize = -1;
1249        m_initialPopSize = -1;
1250        m_calculatedInitialPopSize = -1;
1251        m_treshold = -1;
1252        m_typeOfCombination = COMBINATION_NOT_REDUCED;
1253        m_seed = 1;
1254        m_debug = true;
1255        m_totalEvals = 0;
1256        m_bestMerit = 0;
1257        m_processinTime = 0;
1258  }
1259
1260    /**
1261   * converts a BitSet into a list of attribute indexes
1262   * @param group the BitSet to convert
1263   * @return an array of attribute indexes
1264   **/
1265  public int[] attributeList (BitSet group) {
1266    int count = 0;
1267
1268    // count how many were selected
1269    for (int i = 0; i < m_numAttribs; i++) {
1270      if (group.get(i)) {
1271        count++;
1272      }
1273    }
1274
1275    int[] list = new int[count];
1276    count = 0;
1277
1278    for (int i = 0; i < m_numAttribs; i++) {
1279      if (group.get(i)) {
1280        list[count++] = i;
1281      }
1282    }
1283
1284    return  list;
1285  }
1286
1287
1288    // Auxiliar Class for handling Chromosomes and its respective merit
1289
1290  public class Subset implements Serializable {
1291
1292    double merit;
1293    BitSet subset;
1294
1295    public Subset(BitSet subset, double merit){
1296      this.subset =(BitSet)subset.clone ();
1297      this.merit =merit;
1298    }
1299
1300    public boolean isEqual(Subset othersubset){
1301      if(subset.equals (othersubset.subset))return true;
1302      return false;
1303    }
1304
1305    public Subset clone(){
1306      return new Subset((BitSet)subset.clone (), merit);
1307    }
1308  }
1309  //..........
1310
1311}
1312
Note: See TracBrowser for help on using the repository browser.