source: src/main/java/weka/attributeSelection/Ranker.java @ 10

Last change on this file since 10 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 17.5 KB
RevLine 
[4]1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    Ranker.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.attributeSelection;
24
25import weka.core.Instances;
26import weka.core.Option;
27import weka.core.OptionHandler;
28import weka.core.Range;
29import weka.core.RevisionUtils;
30import weka.core.Utils;
31
32import java.util.Enumeration;
33import java.util.Vector;
34
35/**
36 <!-- globalinfo-start -->
37 * Ranker : <br/>
38 * <br/>
39 * Ranks attributes by their individual evaluations. Use in conjunction with attribute evaluators (ReliefF, GainRatio, Entropy etc).<br/>
40 * <p/>
41 <!-- globalinfo-end -->
42 *
43 <!-- options-start -->
44 * Valid options are: <p/>
45 *
46 * <pre> -P &lt;start set&gt;
47 *  Specify a starting set of attributes.
48 *  Eg. 1,3,5-7.
49 *  Any starting attributes specified are
50 *  ignored during the ranking.</pre>
51 *
52 * <pre> -T &lt;threshold&gt;
53 *  Specify a theshold by which attributes
54 *  may be discarded from the ranking.</pre>
55 *
56 * <pre> -N &lt;num to select&gt;
57 *  Specify number of attributes to select</pre>
58 *
59 <!-- options-end -->
60 *
61 * @author Mark Hall (mhall@cs.waikato.ac.nz)
62 * @version $Revision: 1.26 $
63 */
64public class Ranker 
65  extends ASSearch
66  implements RankedOutputSearch, StartSetHandler, OptionHandler {
67 
68  /** for serialization */
69  static final long serialVersionUID = -9086714848510751934L;
70
71  /** Holds the starting set as an array of attributes */
72  private int[] m_starting;
73
74  /** Holds the start set for the search as a range */
75  private Range m_startRange;
76
77  /** Holds the ordered list of attributes */
78  private int[] m_attributeList;
79
80  /** Holds the list of attribute merit scores */
81  private double[] m_attributeMerit;
82
83  /** Data has class attribute---if unsupervised evaluator then no class */
84  private boolean m_hasClass;
85
86  /** Class index of the data if supervised evaluator */
87  private int m_classIndex;
88
89  /** The number of attribtes */
90  private int m_numAttribs;
91
92  /**
93   * A threshold by which to discard attributes---used by the
94   * AttributeSelection module
95   */
96  private double m_threshold;
97
98  /** The number of attributes to select. -1 indicates that all attributes
99      are to be retained. Has precedence over m_threshold */
100  private int m_numToSelect = -1;
101
102  /** Used to compute the number to select */
103  private int m_calculatedNumToSelect = -1;
104
105  /**
106   * Returns a string describing this search method
107   * @return a description of the search suitable for
108   * displaying in the explorer/experimenter gui
109   */
110  public String globalInfo() {
111    return "Ranker : \n\nRanks attributes by their individual evaluations. "
112      +"Use in conjunction with attribute evaluators (ReliefF, GainRatio, "
113      +"Entropy etc).\n";
114  }
115
116  /**
117   * Constructor
118   */
119  public Ranker () {
120    resetOptions();
121  }
122
123  /**
124   * Returns the tip text for this property
125   * @return tip text for this property suitable for
126   * displaying in the explorer/experimenter gui
127   */
128  public String numToSelectTipText() {
129    return "Specify the number of attributes to retain. The default value "
130      +"(-1) indicates that all attributes are to be retained. Use either "
131      +"this option or a threshold to reduce the attribute set.";
132  }
133
134  /**
135   * Specify the number of attributes to select from the ranked list. -1
136   * indicates that all attributes are to be retained.
137   * @param n the number of attributes to retain
138   */
139  public void setNumToSelect(int n) {
140    m_numToSelect = n;
141  }
142
143  /**
144   * Gets the number of attributes to be retained.
145   * @return the number of attributes to retain
146   */
147  public int getNumToSelect() {
148    return m_numToSelect;
149  }
150
151  /**
152   * Gets the calculated number to select. This might be computed
153   * from a threshold, or if < 0 is set as the number to select then
154   * it is set to the number of attributes in the (transformed) data.
155   * @return the calculated number of attributes to select
156   */
157  public int getCalculatedNumToSelect() {
158    if (m_numToSelect >= 0) {
159      m_calculatedNumToSelect = m_numToSelect;
160    }
161    return m_calculatedNumToSelect;
162  }
163
164  /**
165   * Returns the tip text for this property
166   * @return tip text for this property suitable for
167   * displaying in the explorer/experimenter gui
168   */
169  public String thresholdTipText() {
170    return "Set threshold by which attributes can be discarded. Default value "
171      + "results in no attributes being discarded. Use either this option or "
172      +"numToSelect to reduce the attribute set.";
173  }
174
175  /**
176   * Set the threshold by which the AttributeSelection module can discard
177   * attributes.
178   * @param threshold the threshold.
179   */
180  public void setThreshold(double threshold) {
181    m_threshold = threshold;
182  }
183
184  /**
185   * Returns the threshold so that the AttributeSelection module can
186   * discard attributes from the ranking.
187   */
188  public double getThreshold() {
189    return m_threshold;
190  }
191 
192  /**
193   * Returns the tip text for this property
194   * @return tip text for this property suitable for
195   * displaying in the explorer/experimenter gui
196   */
197  public String generateRankingTipText() {
198    return "A constant option. Ranker is only capable of generating "
199      +" attribute rankings.";
200  }
201
202  /**
203   * This is a dummy set method---Ranker is ONLY capable of producing
204   * a ranked list of attributes for attribute evaluators.
205   * @param doRank this parameter is N/A and is ignored
206   */
207  public void setGenerateRanking(boolean doRank) {
208   
209  }
210
211  /**
212   * This is a dummy method. Ranker can ONLY be used with attribute
213   * evaluators and as such can only produce a ranked list of attributes
214   * @return true all the time.
215   */
216  public boolean getGenerateRanking() {
217    return true;
218  }
219
220  /**
221   * Returns the tip text for this property
222   * @return tip text for this property suitable for
223   * displaying in the explorer/experimenter gui
224   */
225  public String startSetTipText() {
226    return "Specify a set of attributes to ignore. "
227      +" When generating the ranking, Ranker will not evaluate the attributes "
228      +" in this list. "
229      +"This is specified as a comma " 
230      +"seperated list off attribute indexes starting at 1. It can include "
231      +"ranges. Eg. 1,2,5-9,17.";
232  }
233
234  /**
235   * Sets a starting set of attributes for the search. It is the
236   * search method's responsibility to report this start set (if any)
237   * in its toString() method.
238   * @param startSet a string containing a list of attributes (and or ranges),
239   * eg. 1,2,6,10-15.
240   * @throws Exception if start set can't be set.
241   */
242  public void setStartSet (String startSet) throws Exception {
243    m_startRange.setRanges(startSet);
244  }
245
246  /**
247   * Returns a list of attributes (and or attribute ranges) as a String
248   * @return a list of attributes (and or attribute ranges)
249   */
250  public String getStartSet () {
251    return m_startRange.getRanges();
252  }
253
254  /**
255   * Returns an enumeration describing the available options.
256   * @return an enumeration of all the available options.
257   **/
258  public Enumeration listOptions () {
259    Vector newVector = new Vector(3);
260
261    newVector
262      .addElement(new Option("\tSpecify a starting set of attributes.\n" 
263                             + "\tEg. 1,3,5-7.\n"
264                             +"\tAny starting attributes specified are\n"
265                             +"\tignored during the ranking."
266                             ,"P",1
267                             , "-P <start set>"));
268    newVector
269      .addElement(new Option("\tSpecify a theshold by which attributes\n" 
270                             + "\tmay be discarded from the ranking.","T",1
271                             , "-T <threshold>"));
272
273    newVector
274      .addElement(new Option("\tSpecify number of attributes to select" 
275                             ,"N",1
276                             , "-N <num to select>"));
277
278    return newVector.elements();
279
280  }
281 
282  /**
283   * Parses a given list of options. <p/>
284   *
285   <!-- options-start -->
286   * Valid options are: <p/>
287   *
288   * <pre> -P &lt;start set&gt;
289   *  Specify a starting set of attributes.
290   *  Eg. 1,3,5-7.
291   *  Any starting attributes specified are
292   *  ignored during the ranking.</pre>
293   *
294   * <pre> -T &lt;threshold&gt;
295   *  Specify a theshold by which attributes
296   *  may be discarded from the ranking.</pre>
297   *
298   * <pre> -N &lt;num to select&gt;
299   *  Specify number of attributes to select</pre>
300   *
301   <!-- options-end -->
302   *
303   * @param options the list of options as an array of strings
304   * @throws Exception if an option is not supported
305   */
306  public void setOptions (String[] options)
307    throws Exception {
308    String optionString;
309    resetOptions();
310
311    optionString = Utils.getOption('P', options);
312    if (optionString.length() != 0) {
313      setStartSet(optionString);
314    }
315
316    optionString = Utils.getOption('T', options);
317    if (optionString.length() != 0) {
318      Double temp;
319      temp = Double.valueOf(optionString);
320      setThreshold(temp.doubleValue());
321    }
322
323    optionString = Utils.getOption('N', options);
324    if (optionString.length() != 0) {
325      setNumToSelect(Integer.parseInt(optionString));
326    }
327  }
328
329  /**
330   * Gets the current settings of ReliefFAttributeEval.
331   *
332   * @return an array of strings suitable for passing to setOptions()
333   */
334  public String[] getOptions () {
335    String[] options = new String[6];
336    int current = 0;
337
338    if (!(getStartSet().equals(""))) {
339      options[current++] = "-P";
340      options[current++] = ""+startSetToString();
341    }
342
343    options[current++] = "-T";
344    options[current++] = "" + getThreshold();
345
346    options[current++] = "-N";
347    options[current++] = ""+getNumToSelect();
348
349    while (current < options.length) {
350      options[current++] = "";
351    }
352    return  options;
353  }
354
355  /**
356   * converts the array of starting attributes to a string. This is
357   * used by getOptions to return the actual attributes specified
358   * as the starting set. This is better than using m_startRanges.getRanges()
359   * as the same start set can be specified in different ways from the
360   * command line---eg 1,2,3 == 1-3. This is to ensure that stuff that
361   * is stored in a database is comparable.
362   * @return a comma seperated list of individual attribute numbers as a String
363   */
364  private String startSetToString() {
365    StringBuffer FString = new StringBuffer();
366    boolean didPrint;
367   
368    if (m_starting == null) {
369      return getStartSet();
370    }
371
372    for (int i = 0; i < m_starting.length; i++) {
373      didPrint = false;
374     
375      if ((m_hasClass == false) || 
376          (m_hasClass == true && i != m_classIndex)) {
377        FString.append((m_starting[i] + 1));
378        didPrint = true;
379      }
380     
381      if (i == (m_starting.length - 1)) {
382        FString.append("");
383      }
384      else {
385        if (didPrint) {
386          FString.append(",");
387        }
388      }
389    }
390   
391    return FString.toString();
392  }
393
394  /**
395   * Kind of a dummy search algorithm. Calls a Attribute evaluator to
396   * evaluate each attribute not included in the startSet and then sorts
397   * them to produce a ranked list of attributes.
398   *
399   * @param ASEval the attribute evaluator to guide the search
400   * @param data the training instances.
401   * @return an array (not necessarily ordered) of selected attribute indexes
402   * @throws Exception if the search can't be completed
403   */
404  public int[] search (ASEvaluation ASEval, Instances data)
405    throws Exception {
406    int i, j;
407
408    if (!(ASEval instanceof AttributeEvaluator)) {
409      throw  new Exception(ASEval.getClass().getName() 
410                           + " is not a" 
411                           + "Attribute evaluator!");
412    }
413
414    m_numAttribs = data.numAttributes();
415
416    if (ASEval instanceof UnsupervisedAttributeEvaluator) {
417      m_hasClass = false;
418    }
419    else {
420      m_classIndex = data.classIndex();
421      if (m_classIndex >= 0) { 
422        m_hasClass = true;
423      } else {
424        m_hasClass = false;
425      }
426    }
427
428    // get the transformed data and check to see if the transformer
429    // preserves a class index
430    if (ASEval instanceof AttributeTransformer) {
431      data = ((AttributeTransformer)ASEval).transformedHeader();
432      if (m_classIndex >= 0 && data.classIndex() >= 0) {
433        m_classIndex = data.classIndex();
434        m_hasClass = true;
435      }
436    }
437
438
439    m_startRange.setUpper(m_numAttribs - 1);
440    if (!(getStartSet().equals(""))) {
441      m_starting = m_startRange.getSelection();
442    }
443   
444    int sl=0;
445    if (m_starting != null) {
446      sl = m_starting.length;
447    }
448    if ((m_starting != null) && (m_hasClass == true)) {
449      // see if the supplied list contains the class index
450      boolean ok = false;
451      for (i = 0; i < sl; i++) {
452        if (m_starting[i] == m_classIndex) {
453          ok = true;
454          break;
455        }
456      }
457     
458      if (ok == false) {
459        sl++;
460      }
461    }
462    else {
463      if (m_hasClass == true) {
464        sl++;
465      }
466    }
467
468
469    m_attributeList = new int[m_numAttribs - sl];
470    m_attributeMerit = new double[m_numAttribs - sl];
471
472    // add in those attributes not in the starting (omit list)
473    for (i = 0, j = 0; i < m_numAttribs; i++) {
474      if (!inStarting(i)) {
475        m_attributeList[j++] = i;
476      }
477    }
478
479    AttributeEvaluator ASEvaluator = (AttributeEvaluator)ASEval;
480
481    for (i = 0; i < m_attributeList.length; i++) {
482      m_attributeMerit[i] = ASEvaluator.evaluateAttribute(m_attributeList[i]);
483    }
484
485    double[][] tempRanked = rankedAttributes();
486    int[] rankedAttributes = new int[m_attributeList.length];
487
488    for (i = 0; i < m_attributeList.length; i++) {
489      rankedAttributes[i] = (int)tempRanked[i][0];
490    }
491
492    return  rankedAttributes;
493  }
494
495
496  /**
497   * Sorts the evaluated attribute list
498   *
499   * @return an array of sorted (highest eval to lowest) attribute indexes
500   * @throws Exception of sorting can't be done.
501   */
502  public double[][] rankedAttributes ()
503    throws Exception {
504    int i, j;
505
506    if (m_attributeList == null || m_attributeMerit == null) {
507      throw  new Exception("Search must be performed before a ranked " 
508                           + "attribute list can be obtained");
509    }
510
511    int[] ranked = Utils.sort(m_attributeMerit);
512    // reverse the order of the ranked indexes
513    double[][] bestToWorst = new double[ranked.length][2];
514
515    for (i = ranked.length - 1, j = 0; i >= 0; i--) {
516      bestToWorst[j++][0] = ranked[i];
517    }
518
519    // convert the indexes to attribute indexes
520    for (i = 0; i < bestToWorst.length; i++) {
521      int temp = ((int)bestToWorst[i][0]);
522      bestToWorst[i][0] = m_attributeList[temp];
523      bestToWorst[i][1] = m_attributeMerit[temp];
524    }
525   
526    if (m_numToSelect > bestToWorst.length) {
527      throw new Exception("More attributes requested than exist in the data");
528    }
529
530    if (m_numToSelect <= 0) {
531      if (m_threshold == -Double.MAX_VALUE) {
532        m_calculatedNumToSelect = bestToWorst.length;
533      } else {
534        determineNumToSelectFromThreshold(bestToWorst);
535      }
536    }
537    /*    if (m_numToSelect > 0) {
538      determineThreshFromNumToSelect(bestToWorst);
539      } */
540
541    return  bestToWorst;
542  }
543
544  private void determineNumToSelectFromThreshold(double [][] ranking) {
545    int count = 0;
546    for (int i = 0; i < ranking.length; i++) {
547      if (ranking[i][1] > m_threshold) {
548        count++;
549      }
550    }
551    m_calculatedNumToSelect = count;
552  }
553
554  private void determineThreshFromNumToSelect(double [][] ranking) 
555    throws Exception {
556    if (m_numToSelect > ranking.length) {
557      throw new Exception("More attributes requested than exist in the data");
558    }
559
560    if (m_numToSelect == ranking.length) {
561      return;
562    }
563
564    m_threshold = (ranking[m_numToSelect-1][1] + 
565                   ranking[m_numToSelect][1]) / 2.0;
566  }
567
568  /**
569   * returns a description of the search as a String
570   * @return a description of the search
571   */
572  public String toString () {
573    StringBuffer BfString = new StringBuffer();
574    BfString.append("\tAttribute ranking.\n");
575
576    if (m_starting != null) {
577      BfString.append("\tIgnored attributes: ");
578
579      BfString.append(startSetToString());
580      BfString.append("\n");
581    }
582
583    if (m_threshold != -Double.MAX_VALUE) {
584      BfString.append("\tThreshold for discarding attributes: "
585                      + Utils.doubleToString(m_threshold,8,4)+"\n");
586    }
587
588    return BfString.toString();
589  }
590
591
592  /**
593   * Resets stuff to default values
594   */
595  protected void resetOptions () {
596    m_starting = null;
597    m_startRange = new Range();
598    m_attributeList = null;
599    m_attributeMerit = null;
600    m_threshold = -Double.MAX_VALUE;
601  }
602
603
604  private boolean inStarting (int feat) {
605    // omit the class from the evaluation
606    if ((m_hasClass == true) && (feat == m_classIndex)) {
607      return  true;
608    }
609
610    if (m_starting == null) {
611      return  false;
612    }
613
614    for (int i = 0; i < m_starting.length; i++) {
615      if (m_starting[i] == feat) {
616        return  true;
617      }
618    }
619
620    return  false;
621  }
622 
623  /**
624   * Returns the revision string.
625   *
626   * @return            the revision
627   */
628  public String getRevision() {
629    return RevisionUtils.extract("$Revision: 1.26 $");
630  }
631}
Note: See TracBrowser for help on using the repository browser.