source: src/main/java/weka/attributeSelection/ExhaustiveSearch.java @ 6

Last change on this file since 6 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 11.4 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    ExhaustiveSearch.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package  weka.attributeSelection;
24
25import weka.core.Instances;
26import weka.core.Option;
27import weka.core.OptionHandler;
28import weka.core.RevisionUtils;
29import weka.core.Utils;
30
31import java.math.BigInteger;
32import java.util.BitSet;
33import java.util.Enumeration;
34import java.util.Vector;
35
36/**
37 <!-- globalinfo-start -->
38 * ExhaustiveSearch : <br/>
39 * <br/>
40 * Performs an exhaustive search through the space of attribute subsets starting from the empty set of attrubutes. Reports the best subset found.
41 * <p/>
42 <!-- globalinfo-end -->
43 *
44 <!-- options-start -->
45 * Valid options are: <p/>
46 *
47 * <pre> -V
48 *  Output subsets as the search progresses.
49 *  (default = false).</pre>
50 *
51 <!-- options-end -->
52 *
53 * @author Mark Hall (mhall@cs.waikato.ac.nz)
54 * @version $Revision: 1.15 $
55 */
56public class ExhaustiveSearch 
57  extends ASSearch
58  implements OptionHandler {
59
60  /** for serialization */
61  static final long serialVersionUID = 5741842861142379712L;
62 
63  /** the best feature set found during the search */
64  private BitSet m_bestGroup;
65
66  /** the merit of the best subset found */
67  private double m_bestMerit;
68
69 /** does the data have a class */
70  private boolean m_hasClass;
71 
72  /** holds the class index */
73  private int m_classIndex;
74 
75  /** number of attributes in the data */
76  private int m_numAttribs;
77
78  /** if true, then ouput new best subsets as the search progresses */
79  private boolean m_verbose;
80 
81  /** the number of subsets evaluated during the search */
82  private int m_evaluations;
83
84  /**
85   * Returns a string describing this search method
86   * @return a description of the search suitable for
87   * displaying in the explorer/experimenter gui
88   */
89  public String globalInfo() {
90    return "ExhaustiveSearch : \n\nPerforms an exhaustive search through "
91      +"the space of attribute subsets starting from the empty set of "
92      +"attrubutes. Reports the best subset found.";
93  }
94
95  /**
96   * Constructor
97   */
98  public ExhaustiveSearch () {
99    resetOptions();
100  }
101
102  /**
103   * Returns an enumeration describing the available options.
104   * @return an enumeration of all the available options.
105   **/
106  public Enumeration listOptions () {
107    Vector newVector = new Vector(2);
108
109    newVector.addElement(new Option("\tOutput subsets as the search progresses."
110                                    +"\n\t(default = false)."
111                                    , "V", 0
112                                    , "-V"));
113    return  newVector.elements();
114  }
115
116  /**
117   * Parses a given list of options. <p/>
118   *
119   <!-- options-start -->
120   * Valid options are: <p/>
121   *
122   * <pre> -V
123   *  Output subsets as the search progresses.
124   *  (default = false).</pre>
125   *
126   <!-- options-end -->
127   *
128   * @param options the list of options as an array of strings
129   * @throws Exception if an option is not supported
130   *
131   **/
132  public void setOptions (String[] options)
133    throws Exception {
134
135    resetOptions();
136
137    setVerbose(Utils.getFlag('V',options));
138  }
139 
140  /**
141   * Returns the tip text for this property
142   * @return tip text for this property suitable for
143   * displaying in the explorer/experimenter gui
144   */
145  public String verboseTipText() {
146    return "Print progress information. Sends progress info to the terminal "
147      +"as the search progresses.";
148  }
149
150  /**
151   * set whether or not to output new best subsets as the search proceeds
152   * @param v true if output is to be verbose
153   */
154  public void setVerbose(boolean v) {
155    m_verbose = v;
156  }
157
158  /**
159   * get whether or not output is verbose
160   * @return true if output is set to verbose
161   */
162  public boolean getVerbose() {
163    return m_verbose;
164  }
165
166  /**
167   * Gets the current settings of RandomSearch.
168   * @return an array of strings suitable for passing to setOptions()
169   */
170  public String[] getOptions () {
171    String[] options = new String[1];
172    int current = 0;
173       
174    if (m_verbose) {
175      options[current++] = "-V";
176    }
177
178    while (current < options.length) {
179      options[current++] = "";
180    }
181    return  options;
182  }
183
184  /**
185   * prints a description of the search
186   * @return a description of the search as a string
187   */
188  public String toString() {
189    StringBuffer text = new StringBuffer();
190   
191    text.append("\tExhaustive Search.\n\tStart set: ");
192
193    text.append("no attributes\n");
194
195    text.append("\tNumber of evaluations: "+m_evaluations+"\n");
196    text.append("\tMerit of best subset found: "
197                +Utils.doubleToString(Math.abs(m_bestMerit),8,3)+"\n");
198
199    return text.toString();
200  }
201
202  /**
203   * Searches the attribute subset space using an exhaustive search.
204   *
205   * @param ASEval the attribute evaluator to guide the search
206   * @param data the training instances.
207   * @return an array (not necessarily ordered) of selected attribute indexes
208   * @throws Exception if the search can't be completed
209   */
210   public int[] search (ASEvaluation ASEval, Instances data)
211     throws Exception {
212     double best_merit;
213     double tempMerit;
214     boolean done = false;
215     int sizeOfBest;
216     int tempSize;
217     
218     BigInteger space = BigInteger.ZERO;
219
220     m_evaluations = 0;
221     m_numAttribs = data.numAttributes();
222     m_bestGroup = new BitSet(m_numAttribs);
223     
224     if (!(ASEval instanceof SubsetEvaluator)) {
225       throw  new Exception(ASEval.getClass().getName() 
226                            + " is not a " 
227                            + "Subset evaluator!");
228     }
229     
230     if (ASEval instanceof UnsupervisedSubsetEvaluator) {
231       m_hasClass = false;
232     }
233     else {
234       m_hasClass = true;
235       m_classIndex = data.classIndex();
236     }
237     
238     SubsetEvaluator ASEvaluator = (SubsetEvaluator)ASEval;
239     m_numAttribs = data.numAttributes();
240
241     best_merit = ASEvaluator.evaluateSubset(m_bestGroup);
242     m_evaluations++;
243     sizeOfBest = countFeatures(m_bestGroup);
244
245     BitSet tempGroup = new BitSet(m_numAttribs);
246     tempMerit = ASEvaluator.evaluateSubset(tempGroup);
247
248     if (m_verbose) {
249       System.out.println("Zero feature subset ("
250                          +Utils.doubleToString(Math.
251                                                abs(tempMerit),8,5)
252                          +")");
253     }
254
255     if (tempMerit >= best_merit) {
256       tempSize = countFeatures(tempGroup);
257       if (tempMerit > best_merit || 
258           (tempSize < sizeOfBest)) {
259         best_merit = tempMerit;
260         m_bestGroup = (BitSet)(tempGroup.clone());
261         sizeOfBest = tempSize;
262       }
263     }
264
265     int numatts = (m_hasClass) 
266       ? m_numAttribs - 1
267       : m_numAttribs;
268     BigInteger searchSpaceEnd = 
269       BigInteger.ONE.add(BigInteger.ONE).pow(numatts).subtract(BigInteger.ONE);
270
271     while (!done) {
272       // the next subset
273       space = space.add(BigInteger.ONE);
274       if (space.equals(searchSpaceEnd)) {
275         done = true;
276       }
277       tempGroup.clear();
278       for (int i = 0; i < numatts; i++) {
279         if (space.testBit(i)) {
280           if (!m_hasClass) {
281             tempGroup.set(i);
282           } else {
283             int j = (i >= m_classIndex)
284               ? i + 1
285               : i;
286             tempGroup.set(j);
287           }
288         }
289       }
290
291       tempMerit = ASEvaluator.evaluateSubset(tempGroup);
292       m_evaluations++;
293       if (tempMerit >= best_merit) {
294         tempSize = countFeatures(tempGroup);
295         if (tempMerit > best_merit || 
296             (tempSize < sizeOfBest)) {
297           best_merit = tempMerit;
298           m_bestGroup = (BitSet)(tempGroup.clone());
299           sizeOfBest = tempSize;
300           if (m_verbose) {
301             System.out.println("New best subset ("
302                                +Utils.doubleToString(Math.
303                                                      abs(best_merit),8,5)
304                                +"): "+printSubset(m_bestGroup));
305           }
306         }
307       }
308     }
309
310     m_bestMerit = best_merit;
311     
312     return attributeList(m_bestGroup);
313   }
314
315  /**
316   * counts the number of features in a subset
317   * @param featureSet the feature set for which to count the features
318   * @return the number of features in the subset
319   */
320  private int countFeatures(BitSet featureSet) {
321    int count = 0;
322    for (int i=0;i<m_numAttribs;i++) {
323      if (featureSet.get(i)) {
324        count++;
325      }
326    }
327    return count;
328  }   
329
330  /**
331   * prints a subset as a series of attribute numbers
332   * @param temp the subset to print
333   * @return a subset as a String of attribute numbers
334   */
335  private String printSubset(BitSet temp) {
336    StringBuffer text = new StringBuffer();
337
338    for (int j=0;j<m_numAttribs;j++) {
339      if (temp.get(j)) {
340        text.append((j+1)+" ");
341      }
342    }
343    return text.toString();
344  }
345
346  /**
347   * converts a BitSet into a list of attribute indexes
348   * @param group the BitSet to convert
349   * @return an array of attribute indexes
350   **/
351  private int[] attributeList (BitSet group) {
352    int count = 0;
353   
354    // count how many were selected
355    for (int i = 0; i < m_numAttribs; i++) {
356      if (group.get(i)) {
357        count++;
358      }
359    }
360   
361    int[] list = new int[count];
362    count = 0;
363   
364    for (int i = 0; i < m_numAttribs; i++) {
365      if (group.get(i)) {
366        list[count++] = i;
367      }
368    }
369   
370    return  list;
371  }
372
373  /**
374   * generates the next subset of size "size" given the subset "temp".
375   * @param size the size of the feature subset (eg. 2 means that the
376   * current subset contains two features and the next generated subset
377   * should also contain 2 features).
378   * @param temp will hold the generated subset as a BitSet
379   */
380  private void generateNextSubset(int size, BitSet temp) {
381    int i,j;
382    int counter = 0;
383    boolean done = false;
384    BitSet temp2 = (BitSet)temp.clone();
385
386    System.err.println("Size: "+size);
387    for (i=0;i<m_numAttribs;i++) {
388      temp2.clear(i);
389    }
390
391    while ((!done) && (counter < size)) {
392      for (i=m_numAttribs-1-counter;i>=0;i--) {
393        if (temp.get(i)) {
394
395          temp.clear(i);
396
397          int newP;
398          if (i != (m_numAttribs-1-counter)) {
399            newP = i+1;
400            if (newP == m_classIndex) {
401              newP++;
402            }
403
404            if (newP < m_numAttribs) {
405              temp.set(newP);
406
407              for (j=0;j<counter;j++) {
408                if (newP+1+j == m_classIndex) {
409                  newP++;
410                }
411
412                if (newP+1+j < m_numAttribs) {
413                  temp.set(newP+1+j);
414                }
415              }
416              done = true;
417            } else {
418              counter++;
419            }
420            break;
421          } else {
422            counter++;
423            break;
424          }
425        }
426      }
427    }
428
429    if (temp.cardinality() < size) {
430      temp.clear();
431    }
432    System.err.println(printSubset(temp).toString());
433  }
434     
435  /**
436   * resets to defaults
437   */
438  private void resetOptions() {
439    m_verbose = false;
440    m_evaluations = 0;
441  }
442 
443  /**
444   * Returns the revision string.
445   *
446   * @return            the revision
447   */
448  public String getRevision() {
449    return RevisionUtils.extract("$Revision: 1.15 $");
450  }
451}
Note: See TracBrowser for help on using the repository browser.