source: src/main/java/weka/attributeSelection/FCBFSearch.java @ 6

Last change on this file since 6 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 25.2 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17 /*
18 *    RELEASE INFORMATION (December 27, 2004)
19 *   
20 *    FCBF algorithm:
21 *      Template obtained from Weka
22 *      Developed for Weka by Zheng Alan Zhao   
23 *      December 27, 2004
24 *
25 *    FCBF algorithm is a feature selection method based on Symmetrical Uncertainty Measurement for
26 *    relevance redundancy analysis. The details of FCBF algorithm are in:
27 *
28 <!-- technical-plaintext-start -->
29 * Lei Yu, Huan Liu: Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution. In: Proceedings of the Twentieth International Conference on Machine Learning, 856-863, 2003.
30 <!-- technical-plaintext-end -->
31 *   
32 *   
33 *    CONTACT INFORMATION
34 *   
35 *    For algorithm implementation:
36 *    Zheng Zhao: zhaozheng at asu.edu
37 *     
38 *    For the algorithm:
39 *    Lei Yu: leiyu at asu.edu
40 *    Huan Liu: hliu at asu.edu
41 *     
42 *    Data Mining and Machine Learning Lab
43 *    Computer Science and Engineering Department
44 *    Fulton School of Engineering
45 *    Arizona State University
46 *    Tempe, AZ 85287
47 *
48 *    FCBFSearch.java
49 *
50 *    Copyright (C) 2004 Data Mining and Machine Learning Lab,
51 *                       Computer Science and Engineering Department,
52 *                       Fulton School of Engineering,
53 *                       Arizona State University
54 *
55 */
56
57
58package weka.attributeSelection;
59
60import weka.core.Instances;
61import weka.core.Option;
62import weka.core.OptionHandler;
63import weka.core.Range;
64import weka.core.RevisionUtils;
65import weka.core.TechnicalInformation;
66import weka.core.TechnicalInformation.Type;
67import weka.core.TechnicalInformation.Field;
68import weka.core.TechnicalInformationHandler;
69import weka.core.Utils;
70
71import java.util.Enumeration;
72import java.util.Vector;
73
74/**
75 <!-- globalinfo-start -->
76 * FCBF : <br/>
77 * <br/>
78 * Feature selection method based on correlation measureand relevance&amp;redundancy analysis. Use in conjunction with an attribute set evaluator (SymmetricalUncertAttributeEval).<br/>
79 * <br/>
80 * For more information see:<br/>
81 * <br/>
82 * Lei Yu, Huan Liu: Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution. In: Proceedings of the Twentieth International Conference on Machine Learning, 856-863, 2003.
83 * <p/>
84 <!-- globalinfo-end -->
85 *
86 <!-- technical-bibtex-start -->
87 * BibTeX:
88 * <pre>
89 * &#64;inproceedings{Yu2003,
90 *    author = {Lei Yu and Huan Liu},
91 *    booktitle = {Proceedings of the Twentieth International Conference on Machine Learning},
92 *    pages = {856-863},
93 *    publisher = {AAAI Press},
94 *    title = {Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution},
95 *    year = {2003}
96 * }
97 * </pre>
98 * <p/>
99 <!-- technical-bibtex-end -->
100 *
101 <!-- options-start -->
102 * Valid options are: <p/>
103 *
104 * <pre> -D &lt;create dataset&gt;
105 *  Specify Whether the selector generates a new dataset.</pre>
106 *
107 * <pre> -P &lt;start set&gt;
108 *  Specify a starting set of attributes.
109 *   Eg. 1,3,5-7.
110 *  Any starting attributes specified are
111 *  ignored during the ranking.</pre>
112 *
113 * <pre> -T &lt;threshold&gt;
114 *  Specify a theshold by which attributes
115 *  may be discarded from the ranking.</pre>
116 *
117 * <pre> -N &lt;num to select&gt;
118 *  Specify number of attributes to select</pre>
119 *
120 <!-- options-end -->
121 *
122 * @author Zheng Zhao: zhaozheng at asu.edu
123 * @version $Revision: 1.7 $
124 */
125public class FCBFSearch 
126  extends ASSearch
127  implements RankedOutputSearch, StartSetHandler, OptionHandler,
128             TechnicalInformationHandler {
129
130  /** for serialization */
131  static final long serialVersionUID = 8209699587428369942L;
132 
133  /** Holds the starting set as an array of attributes */
134  private int[] m_starting;
135
136  /** Holds the start set for the search as a range */
137  private Range m_startRange;
138
139  /** Holds the ordered list of attributes */
140  private int[] m_attributeList;
141
142  /** Holds the list of attribute merit scores */
143  private double[] m_attributeMerit;
144
145  /** Data has class attribute---if unsupervised evaluator then no class */
146  private boolean m_hasClass;
147
148  /** Class index of the data if supervised evaluator */
149  private int m_classIndex;
150
151  /** The number of attribtes */
152  private int m_numAttribs;
153
154  /**
155   * A threshold by which to discard attributes---used by the
156   * AttributeSelection module
157   */
158  private double m_threshold;
159
160  /** The number of attributes to select. -1 indicates that all attributes
161      are to be retained. Has precedence over m_threshold */
162  private int m_numToSelect = -1;
163
164  /** Used to compute the number to select */
165  private int m_calculatedNumToSelect = -1;
166
167  /*-----------------add begin 2004-11-15 by alan-----------------*/
168  /** Used to determine whether we create a new dataset according to the selected features */
169  private boolean m_generateOutput = false;
170
171  /** Used to store the ref of the Evaluator we use*/
172  private ASEvaluation m_asEval;
173
174  /** Holds the list of attribute merit scores generated by FCBF */
175  private double[][] m_rankedFCBF;
176
177  /** Hold the list of selected features*/
178  private double[][] m_selectedFeatures;
179  /*-----------------add end 2004-11-15 by alan-----------------*/
180
181   /**
182   * Returns a string describing this search method
183   * @return a description of the search suitable for
184   * displaying in the explorer/experimenter gui
185   */
186  public String globalInfo() {
187    return 
188        "FCBF : \n\nFeature selection method based on correlation measure"
189      + "and relevance&redundancy analysis. "
190      + "Use in conjunction with an attribute set evaluator (SymmetricalUncertAttributeEval).\n\n"
191      + "For more information see:\n\n"
192      + getTechnicalInformation().toString();
193  }
194
195  /**
196   * Returns an instance of a TechnicalInformation object, containing
197   * detailed information about the technical background of this class,
198   * e.g., paper reference or book this class is based on.
199   *
200   * @return the technical information about this class
201   */
202  public TechnicalInformation getTechnicalInformation() {
203    TechnicalInformation        result;
204   
205    result = new TechnicalInformation(Type.INPROCEEDINGS);
206    result.setValue(Field.AUTHOR, "Lei Yu and Huan Liu");
207    result.setValue(Field.TITLE, "Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution");
208    result.setValue(Field.BOOKTITLE, "Proceedings of the Twentieth International Conference on Machine Learning");
209    result.setValue(Field.YEAR, "2003");
210    result.setValue(Field.PAGES, "856-863");
211    result.setValue(Field.PUBLISHER, "AAAI Press");
212   
213    return result;
214  }
215
216  /**
217   * Constructor
218   */
219  public FCBFSearch () {
220    resetOptions();
221  }
222
223  /**
224   * Returns the tip text for this property
225   * @return tip text for this property suitable for
226   * displaying in the explorer/experimenter gui
227   */
228  public String numToSelectTipText() {
229    return "Specify the number of attributes to retain. The default value "
230      +"(-1) indicates that all attributes are to be retained. Use either "
231      +"this option or a threshold to reduce the attribute set.";
232  }
233
234  /**
235   * Specify the number of attributes to select from the ranked list. -1
236   * indicates that all attributes are to be retained.
237   * @param n the number of attributes to retain
238   */
239  public void setNumToSelect(int n) {
240    m_numToSelect = n;
241  }
242
243  /**
244   * Gets the number of attributes to be retained.
245   * @return the number of attributes to retain
246   */
247  public int getNumToSelect() {
248    return m_numToSelect;
249  }
250
251  /**
252   * Gets the calculated number to select. This might be computed
253   * from a threshold, or if < 0 is set as the number to select then
254   * it is set to the number of attributes in the (transformed) data.
255   * @return the calculated number of attributes to select
256   */
257  public int getCalculatedNumToSelect() {
258    if (m_numToSelect >= 0) {
259      m_calculatedNumToSelect = m_numToSelect;
260    }
261    if (m_selectedFeatures.length>0
262        && m_selectedFeatures.length<m_calculatedNumToSelect)
263    {
264      m_calculatedNumToSelect = m_selectedFeatures.length;
265    }
266
267    return m_calculatedNumToSelect;
268  }
269
270  /**
271   * Returns the tip text for this property
272   * @return tip text for this property suitable for
273   * displaying in the explorer/experimenter gui
274   */
275  public String thresholdTipText() {
276    return "Set threshold by which attributes can be discarded. Default value "
277      + "results in no attributes being discarded. Use either this option or "
278      +"numToSelect to reduce the attribute set.";
279  }
280
281  /**
282   * Set the threshold by which the AttributeSelection module can discard
283   * attributes.
284   * @param threshold the threshold.
285   */
286  public void setThreshold(double threshold) {
287    m_threshold = threshold;
288  }
289
290  /**
291   * Returns the threshold so that the AttributeSelection module can
292   * discard attributes from the ranking.
293   * @return the threshold
294   */
295  public double getThreshold() {
296    return m_threshold;
297  }
298
299  /**
300   * Returns the tip text for this property
301   * @return tip text for this property suitable for
302   * displaying in the explorer/experimenter gui
303   */
304  public String generateRankingTipText() {
305    return "A constant option. FCBF is capable of generating"
306      +" attribute rankings.";
307  }
308
309  /**
310   * This is a dummy set method---Ranker is ONLY capable of producing
311   * a ranked list of attributes for attribute evaluators.
312   * @param doRank this parameter is N/A and is ignored
313   */
314  public void setGenerateRanking(boolean doRank) {
315  }
316
317  /**
318   * This is a dummy method. Ranker can ONLY be used with attribute
319   * evaluators and as such can only produce a ranked list of attributes
320   * @return true all the time.
321   */
322  public boolean getGenerateRanking() {
323    return true;
324  }
325
326  /**
327   * Returns the tip text for this property
328   * @return tip text for this property suitable for
329   * displaying in the explorer/experimenter gui
330   */
331
332  public String generateDataOutputTipText() {
333    return "Generating new dataset according to the selected features."
334      +" ";
335  }
336
337  /**
338   * Sets the flag, by which the AttributeSelection module decide
339   * whether create a new dataset according to the selected features.
340   * @param doGenerate the flag, by which the AttributeSelection module
341   * decide whether create a new dataset according to the selected
342   * features
343   */
344  public void setGenerateDataOutput(boolean doGenerate) {
345    this.m_generateOutput = doGenerate;
346
347  }
348
349  /**
350   * Returns the flag, by which the AttributeSelection module decide
351   * whether create a new dataset according to the selected features.
352   * @return the flag, by which the AttributeSelection module decide
353   * whether create a new dataset according to the selected features.
354   */
355  public boolean getGenerateDataOutput() {
356    return this.m_generateOutput;
357  }
358
359  /**
360   * Returns the tip text for this property
361   * @return tip text for this property suitable for
362   * displaying in the explorer/experimenter gui
363   */
364  public String startSetTipText() {
365    return "Specify a set of attributes to ignore. "
366      +" When generating the ranking, FCBF will not evaluate the attributes "
367      +" in this list. "
368      +"This is specified as a comma "
369      +"seperated list off attribute indexes starting at 1. It can include "
370      +"ranges. Eg. 1,2,5-9,17.";
371  }
372
373  /**
374   * Sets a starting set of attributes for the search. It is the
375   * search method's responsibility to report this start set (if any)
376   * in its toString() method.
377   * @param startSet a string containing a list of attributes (and or ranges),
378   * eg. 1,2,6,10-15.
379   * @throws Exception if start set can't be set.
380   */
381  public void setStartSet (String startSet) throws Exception {
382    m_startRange.setRanges(startSet);
383  }
384
385  /**
386   * Returns a list of attributes (and or attribute ranges) as a String
387   * @return a list of attributes (and or attribute ranges)
388   */
389  public String getStartSet () {
390    return m_startRange.getRanges();
391  }
392
393  /**
394   * Returns an enumeration describing the available options.
395   * @return an enumeration of all the available options.
396   **/
397  public Enumeration listOptions () {
398    Vector newVector = new Vector(4);
399
400    newVector.addElement(new Option(
401        "\tSpecify Whether the selector generates a new dataset.",
402        "D", 1, "-D <create dataset>"));
403
404    newVector.addElement(new Option(
405        "\tSpecify a starting set of attributes.\n"
406        + "\t\tEg. 1,3,5-7.\n"
407        + "\tAny starting attributes specified are\n"
408        + "\tignored during the ranking.",
409        "P", 1 , "-P <start set>"));
410
411    newVector.addElement(new Option(
412        "\tSpecify a theshold by which attributes\n"
413        + "\tmay be discarded from the ranking.",
414        "T", 1, "-T <threshold>"));
415
416    newVector.addElement(new Option(
417        "\tSpecify number of attributes to select",
418        "N", 1, "-N <num to select>"));
419
420    return newVector.elements();
421
422  }
423
424  /**
425   * Parses a given list of options. <p/>
426   *
427   <!-- options-start -->
428   * Valid options are: <p/>
429   *
430   * <pre> -D &lt;create dataset&gt;
431   *  Specify Whether the selector generates a new dataset.</pre>
432   *
433   * <pre> -P &lt;start set&gt;
434   *  Specify a starting set of attributes.
435   *   Eg. 1,3,5-7.
436   *  Any starting attributes specified are
437   *  ignored during the ranking.</pre>
438   *
439   * <pre> -T &lt;threshold&gt;
440   *  Specify a theshold by which attributes
441   *  may be discarded from the ranking.</pre>
442   *
443   * <pre> -N &lt;num to select&gt;
444   *  Specify number of attributes to select</pre>
445   *
446   <!-- options-end -->
447   *
448   * @param options the list of options as an array of strings
449   * @throws Exception if an option is not supported
450   *
451   **/
452  public void setOptions (String[] options)
453    throws Exception {
454    String optionString;
455    resetOptions();
456
457    optionString = Utils.getOption('D', options);
458    if (optionString.length() != 0) {
459      setGenerateDataOutput(Boolean.getBoolean(optionString));
460    }
461
462    optionString = Utils.getOption('P', options);
463    if (optionString.length() != 0) {
464      setStartSet(optionString);
465    }
466
467    optionString = Utils.getOption('T', options);
468    if (optionString.length() != 0) {
469      Double temp;
470      temp = Double.valueOf(optionString);
471      setThreshold(temp.doubleValue());
472    }
473
474    optionString = Utils.getOption('N', options);
475    if (optionString.length() != 0) {
476      setNumToSelect(Integer.parseInt(optionString));
477    }
478  }
479
480  /**
481   * Gets the current settings of ReliefFAttributeEval.
482   *
483   * @return an array of strings suitable for passing to setOptions()
484   */
485  public String[] getOptions () {
486    String[] options = new String[8];
487    int current = 0;
488
489      options[current++] = "-D";
490      options[current++] = ""+getGenerateDataOutput();
491
492    if (!(getStartSet().equals(""))) {
493      options[current++] = "-P";
494      options[current++] = ""+startSetToString();
495    }
496
497    options[current++] = "-T";
498    options[current++] = "" + getThreshold();
499
500    options[current++] = "-N";
501    options[current++] = ""+getNumToSelect();
502
503    while (current < options.length) {
504      options[current++] = "";
505    }
506    return  options;
507  }
508
509  /**
510   * converts the array of starting attributes to a string. This is
511   * used by getOptions to return the actual attributes specified
512   * as the starting set. This is better than using m_startRanges.getRanges()
513   * as the same start set can be specified in different ways from the
514   * command line---eg 1,2,3 == 1-3. This is to ensure that stuff that
515   * is stored in a database is comparable.
516   * @return a comma seperated list of individual attribute numbers as a String
517   */
518  private String startSetToString() {
519    StringBuffer FString = new StringBuffer();
520    boolean didPrint;
521
522    if (m_starting == null) {
523      return getStartSet();
524    }
525
526    for (int i = 0; i < m_starting.length; i++) {
527      didPrint = false;
528
529      if ((m_hasClass == false) ||
530          (m_hasClass == true && i != m_classIndex)) {
531        FString.append((m_starting[i] + 1));
532        didPrint = true;
533      }
534
535      if (i == (m_starting.length - 1)) {
536        FString.append("");
537      }
538      else {
539        if (didPrint) {
540          FString.append(",");
541        }
542      }
543    }
544
545    return FString.toString();
546  }
547
548  /**
549   * Kind of a dummy search algorithm. Calls a Attribute evaluator to
550   * evaluate each attribute not included in the startSet and then sorts
551   * them to produce a ranked list of attributes.
552   *
553   * @param ASEval the attribute evaluator to guide the search
554   * @param data the training instances.
555   * @return an array (not necessarily ordered) of selected attribute indexes
556   * @throws Exception if the search can't be completed
557   */
558  public int[] search (ASEvaluation ASEval, Instances data)
559    throws Exception {
560    int i, j;
561
562    if (!(ASEval instanceof AttributeSetEvaluator)) {
563      throw  new Exception(ASEval.getClass().getName()
564                           + " is not an "
565                           + "Attribute Set evaluator!");
566    }
567
568    m_numAttribs = data.numAttributes();
569
570    if (ASEval instanceof UnsupervisedAttributeEvaluator) {
571      m_hasClass = false;
572    }
573    else {
574      m_classIndex = data.classIndex();
575      if (m_classIndex >= 0) {
576        m_hasClass = true;
577      } else {
578        m_hasClass = false;
579      }
580    }
581
582    // get the transformed data and check to see if the transformer
583    // preserves a class index
584    if (ASEval instanceof AttributeTransformer) {
585      data = ((AttributeTransformer)ASEval).transformedHeader();
586      if (m_classIndex >= 0 && data.classIndex() >= 0) {
587        m_classIndex = data.classIndex();
588        m_hasClass = true;
589      }
590    }
591
592
593    m_startRange.setUpper(m_numAttribs - 1);
594    if (!(getStartSet().equals(""))) {
595      m_starting = m_startRange.getSelection();
596    }
597
598    int sl=0;
599    if (m_starting != null) {
600      sl = m_starting.length;
601    }
602    if ((m_starting != null) && (m_hasClass == true)) {
603      // see if the supplied list contains the class index
604      boolean ok = false;
605      for (i = 0; i < sl; i++) {
606        if (m_starting[i] == m_classIndex) {
607          ok = true;
608          break;
609        }
610      }
611
612      if (ok == false) {
613        sl++;
614      }
615    }
616    else {
617      if (m_hasClass == true) {
618        sl++;
619      }
620    }
621
622
623    m_attributeList = new int[m_numAttribs - sl];
624    m_attributeMerit = new double[m_numAttribs - sl];
625
626    // add in those attributes not in the starting (omit list)
627    for (i = 0, j = 0; i < m_numAttribs; i++) {
628      if (!inStarting(i)) {
629        m_attributeList[j++] = i;
630      }
631    }
632
633    this.m_asEval = ASEval;
634    AttributeSetEvaluator ASEvaluator = (AttributeSetEvaluator)ASEval;
635
636    for (i = 0; i < m_attributeList.length; i++) {
637      m_attributeMerit[i] = ASEvaluator.evaluateAttribute(m_attributeList[i]);
638    }
639
640    double[][] tempRanked = rankedAttributes();
641    int[] rankedAttributes = new int[m_selectedFeatures.length];
642
643    for (i = 0; i < m_selectedFeatures.length; i++) {
644      rankedAttributes[i] = (int)tempRanked[i][0];
645    }
646    return  rankedAttributes;
647  }
648
649
650
651  /**
652   * Sorts the evaluated attribute list
653   *
654   * @return an array of sorted (highest eval to lowest) attribute indexes
655   * @throws Exception of sorting can't be done.
656   */
657  public double[][] rankedAttributes ()
658    throws Exception {
659    int i, j;
660
661    if (m_attributeList == null || m_attributeMerit == null) {
662      throw  new Exception("Search must be performed before a ranked "
663                           + "attribute list can be obtained");
664    }
665
666    int[] ranked = Utils.sort(m_attributeMerit);
667    // reverse the order of the ranked indexes
668    double[][] bestToWorst = new double[ranked.length][2];
669
670    for (i = ranked.length - 1, j = 0; i >= 0; i--) {
671      bestToWorst[j++][0] = ranked[i];
672    //alan: means in the arrary ranked, varialbe is from ranked as from small to large
673    }
674
675    // convert the indexes to attribute indexes
676    for (i = 0; i < bestToWorst.length; i++) {
677      int temp = ((int)bestToWorst[i][0]);
678      bestToWorst[i][0] = m_attributeList[temp];     //for the index
679      bestToWorst[i][1] = m_attributeMerit[temp];    //for the value of the index
680    }
681
682    if (m_numToSelect > bestToWorst.length) {
683      throw new Exception("More attributes requested than exist in the data");
684    }
685
686    this.FCBFElimination(bestToWorst);
687
688    if (m_numToSelect <= 0) {
689      if (m_threshold == -Double.MAX_VALUE) {
690        m_calculatedNumToSelect = m_selectedFeatures.length;
691      } else {
692        determineNumToSelectFromThreshold(m_selectedFeatures);
693      }
694    }
695    /*    if (m_numToSelect > 0) {
696      determineThreshFromNumToSelect(bestToWorst);
697      } */
698
699    return  m_selectedFeatures;
700  }
701
702  private void determineNumToSelectFromThreshold(double [][] ranking) {
703    int count = 0;
704    for (int i = 0; i < ranking.length; i++) {
705      if (ranking[i][1] > m_threshold) {
706        count++;
707      }
708    }
709    m_calculatedNumToSelect = count;
710  }
711
712  private void determineThreshFromNumToSelect(double [][] ranking)
713    throws Exception {
714    if (m_numToSelect > ranking.length) {
715      throw new Exception("More attributes requested than exist in the data");
716    }
717
718    if (m_numToSelect == ranking.length) {
719      return;
720    }
721
722    m_threshold = (ranking[m_numToSelect-1][1] +
723                   ranking[m_numToSelect][1]) / 2.0;
724  }
725
726  /**
727   * returns a description of the search as a String
728   * @return a description of the search
729   */
730  public String toString () {
731    StringBuffer BfString = new StringBuffer();
732    BfString.append("\tAttribute ranking.\n");
733
734    if (m_starting != null) {
735      BfString.append("\tIgnored attributes: ");
736
737      BfString.append(startSetToString());
738      BfString.append("\n");
739    }
740
741    if (m_threshold != -Double.MAX_VALUE) {
742      BfString.append("\tThreshold for discarding attributes: "
743                      + Utils.doubleToString(m_threshold,8,4)+"\n");
744    }
745
746    BfString.append("\n\n");
747
748    BfString.append("     J || SU(j,Class) ||    I || SU(i,j). \n");
749
750    for (int i=0; i<m_rankedFCBF.length; i++)
751    {
752      BfString.append(Utils.doubleToString(m_rankedFCBF[i][0]+1,6,0)+" ; "
753                      +Utils.doubleToString(m_rankedFCBF[i][1],12,7)+" ; ");
754      if (m_rankedFCBF[i][2] == m_rankedFCBF[i][0])
755      {
756        BfString.append("    *\n");
757      }
758      else
759      {
760        BfString.append(Utils.doubleToString(m_rankedFCBF[i][2] + 1,5,0) + " ; "
761                     + m_rankedFCBF[i][3] + "\n");
762      }
763    }
764
765    return BfString.toString();
766  }
767
768
769  /**
770   * Resets stuff to default values
771   */
772  protected void resetOptions () {
773    m_starting = null;
774    m_startRange = new Range();
775    m_attributeList = null;
776    m_attributeMerit = null;
777    m_threshold = -Double.MAX_VALUE;
778  }
779
780
781  private boolean inStarting (int feat) {
782    // omit the class from the evaluation
783    if ((m_hasClass == true) && (feat == m_classIndex)) {
784      return  true;
785    }
786
787    if (m_starting == null) {
788      return  false;
789    }
790
791    for (int i = 0; i < m_starting.length; i++) {
792      if (m_starting[i] == feat) {
793        return  true;
794      }
795    }
796
797    return  false;
798  }
799
800  private void FCBFElimination(double[][]rankedFeatures)
801  throws Exception {
802
803    int i,j;
804
805    m_rankedFCBF = new double[m_attributeList.length][4];
806    int[] attributes = new int[1];
807    int[] classAtrributes = new int[1];
808
809    int numSelectedAttributes = 0;
810
811    int startPoint = 0;
812    double tempSUIJ = 0;
813
814    AttributeSetEvaluator ASEvaluator = (AttributeSetEvaluator)m_asEval;
815
816    for (i = 0; i < rankedFeatures.length; i++) {
817      m_rankedFCBF[i][0] = rankedFeatures[i][0];
818      m_rankedFCBF[i][1] = rankedFeatures[i][1];
819      m_rankedFCBF[i][2] = -1;
820    }
821
822    while (startPoint < rankedFeatures.length)
823    {
824      if (m_rankedFCBF[startPoint][2] != -1)
825      {
826        startPoint++;
827        continue;
828      }
829
830      m_rankedFCBF[startPoint][2] = m_rankedFCBF[startPoint][0];
831      numSelectedAttributes++;
832
833      for (i = startPoint + 1; i < m_attributeList.length; i++)
834      {
835        if (m_rankedFCBF[i][2] != -1)
836        {
837          continue;
838        }
839        attributes[0] = (int) m_rankedFCBF[startPoint][0];
840        classAtrributes[0] = (int) m_rankedFCBF[i][0];
841        tempSUIJ = ASEvaluator.evaluateAttribute(attributes, classAtrributes);
842        if (m_rankedFCBF[i][1] < tempSUIJ || Math.abs(tempSUIJ-m_rankedFCBF[i][1])<1E-8)
843        {
844          m_rankedFCBF[i][2] = m_rankedFCBF[startPoint][0];
845          m_rankedFCBF[i][3] = tempSUIJ;
846        }
847      }
848      startPoint++;
849    }
850
851    m_selectedFeatures = new double[numSelectedAttributes][2];
852
853    for (i = 0, j = 0; i < m_attributeList.length; i++)
854    {
855      if (m_rankedFCBF[i][2] == m_rankedFCBF[i][0])
856      {
857        m_selectedFeatures[j][0] = m_rankedFCBF[i][0];
858        m_selectedFeatures[j][1] = m_rankedFCBF[i][1];
859        j++;
860      }
861    }
862  }
863 
864  /**
865   * Returns the revision string.
866   *
867   * @return            the revision
868   */
869  public String getRevision() {
870    return RevisionUtils.extract("$Revision: 1.7 $");
871  }
872}
Note: See TracBrowser for help on using the repository browser.