source: src/main/java/weka/attributeSelection/RankSearch.java @ 6

Last change on this file since 6 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 16.0 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    RankSearch.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.attributeSelection;
24
25import weka.core.Instances;
26import weka.core.Option;
27import weka.core.OptionHandler;
28import weka.core.RevisionUtils;
29import weka.core.Utils;
30
31import java.util.BitSet;
32import java.util.Enumeration;
33import java.util.Vector;
34
35/**
36 <!-- globalinfo-start -->
37 * RankSearch : <br/>
38 * <br/>
39 * Uses an attribute/subset evaluator to rank all attributes. If a subset evaluator is specified, then a forward selection search is used to generate a ranked list. From the ranked list of attributes, subsets of increasing size are evaluated, ie. The best attribute, the best attribute plus the next best attribute, etc.... The best attribute set is reported. RankSearch is linear in the number of attributes if a simple attribute evaluator is used such as GainRatioAttributeEval.<br/>
40 * <p/>
41 <!-- globalinfo-end -->
42 *
43 <!-- options-start -->
44 * Valid options are: <p/>
45 *
46 * <pre> -A &lt;attribute evaluator&gt;
47 *  class name of attribute evaluator to use for ranking. Place any
48 *  evaluator options LAST on the command line following a "--".
49 *  eg.:
50 *   -A weka.attributeSelection.GainRatioAttributeEval ... -- -M
51 *  (default: weka.attributeSelection.GainRatioAttributeEval)</pre>
52 *
53 * <pre> -S &lt;step size&gt;
54 *  number of attributes to be added from the
55 *  ranking in each iteration (default = 1).</pre>
56 *
57 * <pre> -R &lt;start point&gt;
58 *  point in the ranking to start evaluating from.
59 *  (default = 0, ie. the head of the ranking).</pre>
60 *
61 * <pre>
62 * Options specific to evaluator weka.attributeSelection.GainRatioAttributeEval:
63 * </pre>
64 *
65 * <pre> -M
66 *  treat missing values as a seperate value.</pre>
67 *
68 <!-- options-end -->
69 *
70 * @author Mark Hall (mhall@cs.waikato.ac.nz)
71 * @version $Revision: 4614 $
72 */
73public class RankSearch 
74  extends ASSearch
75  implements OptionHandler {
76 
77  /** for serialization */
78  static final long serialVersionUID = -7992268736874353755L;
79
80  /** does the data have a class */
81  private boolean m_hasClass;
82 
83  /** holds the class index */
84  private int m_classIndex;
85 
86  /** number of attributes in the data */
87  private int m_numAttribs;
88
89  /** the best subset found */
90  private BitSet m_best_group;
91
92  /** the attribute evaluator to use for generating the ranking */
93  private ASEvaluation m_ASEval;
94
95  /** the subset evaluator with which to evaluate the ranking */
96  private ASEvaluation m_SubsetEval;
97
98  /** the training instances */
99  private Instances m_Instances;
100
101  /** the merit of the best subset found */
102  private double m_bestMerit;
103
104  /** will hold the attribute ranking */
105  private int [] m_Ranking;
106
107  /** add this many attributes in each iteration from the ranking */
108  protected int m_add = 1;
109
110  /** start from this point in the ranking */
111  protected int m_startPoint = 0;
112
113  /**
114   * Returns a string describing this search method
115   * @return a description of the search method suitable for
116   * displaying in the explorer/experimenter gui
117   */
118  public String globalInfo() {
119    return "RankSearch : \n\n"
120      +"Uses an attribute/subset evaluator to rank all attributes. "
121      +"If a subset evaluator is specified, then a forward selection "
122      +"search is used to generate a ranked list. From the ranked "
123      +"list of attributes, subsets of increasing size are evaluated, ie. "
124      +"The best attribute, the best attribute plus the next best attribute, "
125      +"etc.... The best attribute set is reported. RankSearch is linear in "
126      +"the number of attributes if a simple attribute evaluator is used "
127      +"such as GainRatioAttributeEval.\n";
128  }
129
130  /**
131   * Constructor
132   */
133  public RankSearch () {
134    resetOptions();
135  }
136
137  /**
138   * Returns the tip text for this property
139   * @return tip text for this property suitable for
140   * displaying in the explorer/experimenter gui
141   */
142  public String attributeEvaluatorTipText() {
143    return "Attribute evaluator to use for generating a ranking.";   
144  }
145
146  /**
147   * Set the attribute evaluator to use for generating the ranking.
148   * @param newEvaluator the attribute evaluator to use.
149   */
150  public void setAttributeEvaluator(ASEvaluation newEvaluator) {
151    m_ASEval = newEvaluator;
152  }
153
154  /**
155   * Get the attribute evaluator used to generate the ranking.
156   * @return the evaluator used to generate the ranking.
157   */
158  public ASEvaluation getAttributeEvaluator() {
159    return m_ASEval;
160  }
161
162  /**
163   * Returns the tip text for this property
164   * @return tip text for this property suitable for
165   * displaying in the explorer/experimenter gui
166   */
167  public String stepSizeTipText() {
168    return "Add this many attributes from the ranking in each iteration.";
169  }
170
171  /**
172   * Set the number of attributes to add from the rankining
173   * in each iteration
174   * @param ss the number of attribes to add.
175   */
176  public void setStepSize(int ss) {
177    if (ss > 0) {
178      m_add = ss;
179    }
180  }
181
182  /**
183   * Get the number of attributes to add from the rankining
184   * in each iteration
185   * @return the number of attributes to add.
186   */
187  public int getStepSize() {
188    return m_add;
189  }
190
191  /**
192   * Returns the tip text for this property
193   * @return tip text for this property suitable for
194   * displaying in the explorer/experimenter gui
195   */
196  public String startPointTipText() {
197    return "Start evaluating from this point in the ranking.";
198  }
199
200  /**
201   * Set the point at which to start evaluating the ranking
202   * @param sp the position in the ranking to start at
203   */
204  public void setStartPoint(int sp) {
205    if (sp >= 0) {
206      m_startPoint = sp;
207    }
208  }
209
210  /**
211   * Get the point at which to start evaluating the ranking
212   * @return the position in the ranking to start at
213   */
214  public int getStartPoint() {
215    return m_startPoint;
216  }
217
218  /**
219   * Returns an enumeration describing the available options.
220   * @return an enumeration of all the available options.
221   **/
222  public Enumeration listOptions () {
223    Vector newVector = new Vector(4);
224   
225    newVector.addElement(new Option(
226        "\tclass name of attribute evaluator to use for ranking. Place any\n" 
227        + "\tevaluator options LAST on the command line following a \"--\".\n" 
228        + "\teg.:\n"
229        + "\t\t-A weka.attributeSelection.GainRatioAttributeEval ... -- -M\n"
230        + "\t(default: weka.attributeSelection.GainRatioAttributeEval)", 
231        "A", 1, "-A <attribute evaluator>"));
232   
233    newVector.addElement(new Option(
234        "\tnumber of attributes to be added from the"
235        +"\n\tranking in each iteration (default = 1).", 
236        "S", 1,"-S <step size>"));
237   
238    newVector.addElement(new Option(
239        "\tpoint in the ranking to start evaluating from. "
240        +"\n\t(default = 0, ie. the head of the ranking).", 
241        "R", 1,"-R <start point>"));
242
243    if ((m_ASEval != null) && 
244        (m_ASEval instanceof OptionHandler)) {
245      newVector.addElement(new Option("", "", 0, "\nOptions specific to " 
246                                      + "evaluator " 
247                                      + m_ASEval.getClass().getName() 
248                                      + ":"));
249      Enumeration enu = ((OptionHandler)m_ASEval).listOptions();
250
251      while (enu.hasMoreElements()) {
252        newVector.addElement(enu.nextElement());
253      }
254    }
255
256    return newVector.elements();
257  }
258
259
260  /**
261   * Parses a given list of options. <p/>
262   *
263   <!-- options-start -->
264   * Valid options are: <p/>
265   *
266   * <pre> -A &lt;attribute evaluator&gt;
267   *  class name of attribute evaluator to use for ranking. Place any
268   *  evaluator options LAST on the command line following a "--".
269   *  eg.:
270   *   -A weka.attributeSelection.GainRatioAttributeEval ... -- -M
271   *  (default: weka.attributeSelection.GainRatioAttributeEval)</pre>
272   *
273   * <pre> -S &lt;step size&gt;
274   *  number of attributes to be added from the
275   *  ranking in each iteration (default = 1).</pre>
276   *
277   * <pre> -R &lt;start point&gt;
278   *  point in the ranking to start evaluating from.
279   *  (default = 0, ie. the head of the ranking).</pre>
280   *
281   * <pre>
282   * Options specific to evaluator weka.attributeSelection.GainRatioAttributeEval:
283   * </pre>
284   *
285   * <pre> -M
286   *  treat missing values as a seperate value.</pre>
287   *
288   <!-- options-end -->
289   *
290   * @param options the list of options as an array of strings
291   * @throws Exception if an option is not supported
292   */
293  public void setOptions (String[] options)
294    throws Exception {
295    String optionString;
296    resetOptions();
297
298    optionString = Utils.getOption('S', options);
299    if (optionString.length() != 0) {
300      setStepSize(Integer.parseInt(optionString));
301    }
302
303    optionString = Utils.getOption('R', options);
304    if (optionString.length() != 0) {
305      setStartPoint(Integer.parseInt(optionString));
306    }
307
308    optionString = Utils.getOption('A', options);
309    if (optionString.length() == 0)
310      optionString = GainRatioAttributeEval.class.getName();
311    setAttributeEvaluator(ASEvaluation.forName(optionString, 
312                                     Utils.partitionOptions(options)));
313  }
314
315  /**
316   * Gets the current settings of WrapperSubsetEval.
317   *
318   * @return an array of strings suitable for passing to setOptions()
319   */
320  public String[] getOptions () {
321    String[] evaluatorOptions = new String[0];
322
323    if ((m_ASEval != null) && 
324        (m_ASEval instanceof OptionHandler)) {
325      evaluatorOptions = ((OptionHandler)m_ASEval).getOptions();
326    }
327
328    String[] options = new String[8 + evaluatorOptions.length];
329    int current = 0;
330
331    options[current++] = "-S"; options[current++] = ""+getStepSize();
332
333    options[current++] = "-R"; options[current++] = ""+getStartPoint();
334
335    if (getAttributeEvaluator() != null) {
336      options[current++] = "-A";
337      options[current++] = getAttributeEvaluator().getClass().getName();
338    }
339
340    if (evaluatorOptions.length > 0) {
341      options[current++] = "--";
342      System.arraycopy(evaluatorOptions, 0, options, current, 
343          evaluatorOptions.length);
344      current += evaluatorOptions.length;
345    }
346
347    while (current < options.length) {
348      options[current++] = "";
349    }
350
351    return  options;
352  }
353
354  /**
355   * Reset the search method.
356   */
357  protected void resetOptions () {
358    m_ASEval = new GainRatioAttributeEval();
359    m_Ranking = null;
360  }
361
362  /**
363   * Ranks attributes using the specified attribute evaluator and then
364   * searches the ranking using the supplied subset evaluator.
365   *
366   * @param ASEval the subset evaluator to guide the search
367   * @param data the training instances.
368   * @return an array (not necessarily ordered) of selected attribute indexes
369   * @throws Exception if the search can't be completed
370   */
371  public int[] search (ASEvaluation ASEval, Instances data)
372    throws Exception {
373   
374    double best_merit = -Double.MAX_VALUE;
375    double temp_merit;
376    BitSet temp_group, best_group=null;
377   
378    if (!(ASEval instanceof SubsetEvaluator)) {
379      throw  new Exception(ASEval.getClass().getName() 
380                           + " is not a " 
381                           + "Subset evaluator!");
382    }
383
384    m_SubsetEval = ASEval;
385    m_Instances = data;
386    m_numAttribs = m_Instances.numAttributes();
387
388    /*    if (m_ASEval instanceof AttributeTransformer) {
389      throw new Exception("Can't use an attribute transformer "
390                          +"with RankSearch");
391                          } */
392    if (m_ASEval instanceof UnsupervisedAttributeEvaluator || 
393        m_ASEval instanceof UnsupervisedSubsetEvaluator) {
394      m_hasClass = false;
395      /*      if (!(m_SubsetEval instanceof UnsupervisedSubsetEvaluator)) {
396        throw new Exception("Must use an unsupervised subset evaluator.");
397        } */
398    }
399    else {
400      m_hasClass = true;
401      m_classIndex = m_Instances.classIndex();
402    }
403
404    if (m_ASEval instanceof AttributeEvaluator) {
405      // generate the attribute ranking first
406      Ranker ranker = new Ranker();
407      m_ASEval.buildEvaluator(m_Instances);
408      if (m_ASEval instanceof AttributeTransformer) {
409        // get the transformed data a rebuild the subset evaluator
410        m_Instances = ((AttributeTransformer)m_ASEval).
411          transformedData(m_Instances);
412        ((ASEvaluation)m_SubsetEval).buildEvaluator(m_Instances);
413      }
414      m_Ranking = ranker.search(m_ASEval, m_Instances);
415    } else {
416      GreedyStepwise fs = new GreedyStepwise();
417      double [][]rankres; 
418      fs.setGenerateRanking(true);
419      ((ASEvaluation)m_ASEval).buildEvaluator(m_Instances);
420      fs.search(m_ASEval, m_Instances);
421      rankres = fs.rankedAttributes();
422      m_Ranking = new int[rankres.length];
423      for (int i=0;i<rankres.length;i++) {
424        m_Ranking[i] = (int)rankres[i][0];
425      }
426    }
427
428    // now evaluate the attribute ranking
429    for (int i=m_startPoint;i<m_Ranking.length;i+=m_add) {
430      temp_group = new BitSet(m_numAttribs);
431      for (int j=0;j<=i;j++) {
432        temp_group.set(m_Ranking[j]);
433      }
434      temp_merit = ((SubsetEvaluator)m_SubsetEval).evaluateSubset(temp_group);
435
436      if (temp_merit > best_merit) {
437        best_merit = temp_merit;;
438        best_group = temp_group;
439      }
440    }
441    m_bestMerit = best_merit;
442    return attributeList(best_group);
443  }
444   
445  /**
446   * converts a BitSet into a list of attribute indexes
447   * @param group the BitSet to convert
448   * @return an array of attribute indexes
449   **/
450  private int[] attributeList (BitSet group) {
451    int count = 0;
452   
453    // count how many were selected
454    for (int i = 0; i < m_numAttribs; i++) {
455      if (group.get(i)) {
456        count++;
457      }
458    }
459
460    int[] list = new int[count];
461    count = 0;
462
463    for (int i = 0; i < m_numAttribs; i++) {
464      if (group.get(i)) {
465        list[count++] = i;
466      }
467    }
468
469    return  list;
470  }
471
472   /**
473   * returns a description of the search as a String
474   * @return a description of the search
475   */
476  public String toString () {
477    StringBuffer text = new StringBuffer();
478    text.append("\tRankSearch :\n");
479    text.append("\tAttribute evaluator : "
480                + getAttributeEvaluator().getClass().getName() +" ");
481    if (m_ASEval instanceof OptionHandler) {
482      String[] evaluatorOptions = new String[0];
483      evaluatorOptions = ((OptionHandler)m_ASEval).getOptions();
484      for (int i=0;i<evaluatorOptions.length;i++) {
485        text.append(evaluatorOptions[i]+' ');
486      }
487    }
488    text.append("\n");
489    text.append("\tAttribute ranking : \n");
490    int rlength = (int)(Math.log(m_Ranking.length) / Math.log(10) + 1);
491    for (int i=0;i<m_Ranking.length;i++) {
492      text.append("\t "+Utils.doubleToString((double)(m_Ranking[i]+1),
493                                             rlength,0)
494                  +" "+m_Instances.attribute(m_Ranking[i]).name()+'\n');
495    }
496    text.append("\tMerit of best subset found : ");
497    int fieldwidth = 3;
498    double precision = (m_bestMerit - (int)m_bestMerit);
499    if (Math.abs(m_bestMerit) > 0) {
500      fieldwidth = (int)Math.abs((Math.log(Math.abs(m_bestMerit)) / Math.log(10)))+2;
501    }
502    if (Math.abs(precision) > 0) {
503      precision = Math.abs((Math.log(Math.abs(precision)) / Math.log(10)))+3;
504    } else {
505      precision = 2;
506    }
507
508    text.append(Utils.doubleToString(Math.abs(m_bestMerit),
509                                     fieldwidth+(int)precision,
510                                     (int)precision)+"\n");
511    return text.toString();
512  }
513 
514  /**
515   * Returns the revision string.
516   *
517   * @return            the revision
518   */
519  public String getRevision() {
520    return RevisionUtils.extract("$Revision: 4614 $");
521  }
522}
Note: See TracBrowser for help on using the repository browser.