source: branches/MetisMQI/src/main/java/weka/filters/supervised/attribute/AttributeSelection.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 16.9 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    AttributeSelection.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.filters.supervised.attribute;
24
25import weka.attributeSelection.ASEvaluation;
26import weka.attributeSelection.ASSearch;
27import weka.attributeSelection.AttributeEvaluator;
28import weka.attributeSelection.AttributeTransformer;
29import weka.attributeSelection.BestFirst;
30import weka.attributeSelection.CfsSubsetEval;
31import weka.attributeSelection.Ranker;
32import weka.attributeSelection.UnsupervisedAttributeEvaluator;
33import weka.attributeSelection.UnsupervisedSubsetEvaluator;
34import weka.core.Capabilities;
35import weka.core.FastVector;
36import weka.core.Instance;
37import weka.core.DenseInstance;
38import weka.core.Instances;
39import weka.core.Option;
40import weka.core.OptionHandler;
41import weka.core.RevisionUtils;
42import weka.core.SparseInstance;
43import weka.core.Utils;
44import weka.core.Capabilities.Capability;
45import weka.filters.Filter;
46import weka.filters.SupervisedFilter;
47
48import java.util.Enumeration;
49import java.util.Vector;
50
51/**
52 <!-- globalinfo-start -->
53 * A supervised attribute filter that can be used to select attributes. It is very flexible and allows various search and evaluation methods to be combined.
54 * <p/>
55 <!-- globalinfo-end -->
56 *
57 <!-- options-start -->
58 * Valid options are: <p/>
59 *
60 * <pre> -S &lt;"Name of search class [search options]"&gt;
61 *  Sets search method for subset evaluators.
62 *  eg. -S "weka.attributeSelection.BestFirst -S 8"</pre>
63 *
64 * <pre> -E &lt;"Name of attribute/subset evaluation class [evaluator options]"&gt;
65 *  Sets attribute/subset evaluator.
66 *  eg. -E "weka.attributeSelection.CfsSubsetEval -L"</pre>
67 *
68 * <pre>
69 * Options specific to evaluator weka.attributeSelection.CfsSubsetEval:
70 * </pre>
71 *
72 * <pre> -M
73 *  Treat missing values as a seperate value.</pre>
74 *
75 * <pre> -L
76 *  Don't include locally predictive attributes.</pre>
77 *
78 * <pre>
79 * Options specific to search weka.attributeSelection.BestFirst:
80 * </pre>
81 *
82 * <pre> -P &lt;start set&gt;
83 *  Specify a starting set of attributes.
84 *  Eg. 1,3,5-7.</pre>
85 *
86 * <pre> -D &lt;0 = backward | 1 = forward | 2 = bi-directional&gt;
87 *  Direction of search. (default = 1).</pre>
88 *
89 * <pre> -N &lt;num&gt;
90 *  Number of non-improving nodes to
91 *  consider before terminating search.</pre>
92 *
93 * <pre> -S &lt;num&gt;
94 *  Size of lookup cache for evaluated subsets.
95 *  Expressed as a multiple of the number of
96 *  attributes in the data set. (default = 1)</pre>
97 *
98 <!-- options-end -->
99 *
100 * @author Mark Hall (mhall@cs.waikato.ac.nz)
101 * @version $Revision: 5987 $
102 */
103public class AttributeSelection 
104  extends Filter
105  implements SupervisedFilter, OptionHandler {
106 
107  /** for serialization */
108  static final long serialVersionUID = -296211247688169716L;
109
110  /** the attribute selection evaluation object */
111  private weka.attributeSelection.AttributeSelection m_trainSelector;
112
113  /** the attribute evaluator to use */
114  private ASEvaluation m_ASEvaluator;
115
116  /** the search method if any */
117  private ASSearch m_ASSearch;
118
119  /** holds a copy of the full set of valid  options passed to the filter */
120  private String [] m_FilterOptions;
121
122  /** holds the selected attributes  */
123  private int [] m_SelectedAttributes;
124
125  /**
126   * Returns a string describing this filter
127   *
128   * @return a description of the filter suitable for
129   * displaying in the explorer/experimenter gui
130   */
131  public String globalInfo() {
132
133    return "A supervised attribute filter that can be used to select " 
134      + "attributes. It is very flexible and allows various search " 
135      + "and evaluation methods to be combined.";
136  }
137
138  /**
139   * Constructor
140   */
141  public AttributeSelection () {
142   
143    resetOptions();
144  }
145
146  /**
147   * Returns an enumeration describing the available options.
148   * @return an enumeration of all the available options.
149   */
150  public Enumeration listOptions() {
151   
152    Vector newVector = new Vector(6);
153
154    newVector.addElement(new Option(
155        "\tSets search method for subset evaluators.\n"
156        + "\teg. -S \"weka.attributeSelection.BestFirst -S 8\"", 
157        "S", 1,
158        "-S <\"Name of search class [search options]\">"));
159
160    newVector.addElement(new Option(
161        "\tSets attribute/subset evaluator.\n"
162        + "\teg. -E \"weka.attributeSelection.CfsSubsetEval -L\"",
163        "E", 1,
164        "-E <\"Name of attribute/subset evaluation class [evaluator options]\">"));
165   
166    if ((m_ASEvaluator != null) && (m_ASEvaluator instanceof OptionHandler)) {
167      Enumeration enu = ((OptionHandler)m_ASEvaluator).listOptions();
168     
169      newVector.addElement(new Option("", "", 0, "\nOptions specific to "
170           + "evaluator " + m_ASEvaluator.getClass().getName() + ":"));
171      while (enu.hasMoreElements()) {
172        newVector.addElement((Option)enu.nextElement());
173      }
174    }
175 
176    if ((m_ASSearch != null) && (m_ASSearch instanceof OptionHandler)) {
177      Enumeration enu = ((OptionHandler)m_ASSearch).listOptions();
178   
179      newVector.addElement(new Option("", "", 0, "\nOptions specific to "
180              + "search " + m_ASSearch.getClass().getName() + ":"));
181      while (enu.hasMoreElements()) {
182        newVector.addElement((Option)enu.nextElement());
183      }
184    }
185    return newVector.elements();
186  }
187
188  /**
189   * Parses a given list of options. <p/>
190   *
191   <!-- options-start -->
192   * Valid options are: <p/>
193   *
194   * <pre> -S &lt;"Name of search class [search options]"&gt;
195   *  Sets search method for subset evaluators.
196   *  eg. -S "weka.attributeSelection.BestFirst -S 8"</pre>
197   *
198   * <pre> -E &lt;"Name of attribute/subset evaluation class [evaluator options]"&gt;
199   *  Sets attribute/subset evaluator.
200   *  eg. -E "weka.attributeSelection.CfsSubsetEval -L"</pre>
201   *
202   * <pre>
203   * Options specific to evaluator weka.attributeSelection.CfsSubsetEval:
204   * </pre>
205   *
206   * <pre> -M
207   *  Treat missing values as a seperate value.</pre>
208   *
209   * <pre> -L
210   *  Don't include locally predictive attributes.</pre>
211   *
212   * <pre>
213   * Options specific to search weka.attributeSelection.BestFirst:
214   * </pre>
215   *
216   * <pre> -P &lt;start set&gt;
217   *  Specify a starting set of attributes.
218   *  Eg. 1,3,5-7.</pre>
219   *
220   * <pre> -D &lt;0 = backward | 1 = forward | 2 = bi-directional&gt;
221   *  Direction of search. (default = 1).</pre>
222   *
223   * <pre> -N &lt;num&gt;
224   *  Number of non-improving nodes to
225   *  consider before terminating search.</pre>
226   *
227   * <pre> -S &lt;num&gt;
228   *  Size of lookup cache for evaluated subsets.
229   *  Expressed as a multiple of the number of
230   *  attributes in the data set. (default = 1)</pre>
231   *
232   <!-- options-end -->
233   *
234   * @param options the list of options as an array of strings
235   * @throws Exception if an option is not supported
236   */
237  public void setOptions(String[] options) throws Exception {
238   
239    String optionString;
240    resetOptions();
241
242    if (Utils.getFlag('X',options)) {
243        throw new Exception("Cross validation is not a valid option"
244                            + " when using attribute selection as a Filter.");
245    }
246
247    optionString = Utils.getOption('E',options);
248    if (optionString.length() != 0) {
249      optionString = optionString.trim();
250      // split a quoted evaluator name from its options (if any)
251      int breakLoc = optionString.indexOf(' ');
252      String evalClassName = optionString;
253      String evalOptionsString = "";
254      String [] evalOptions=null;
255      if (breakLoc != -1) {
256        evalClassName = optionString.substring(0, breakLoc);
257        evalOptionsString = optionString.substring(breakLoc).trim();
258        evalOptions = Utils.splitOptions(evalOptionsString);
259      }
260      setEvaluator(ASEvaluation.forName(evalClassName, evalOptions));
261    }
262
263    if (m_ASEvaluator instanceof AttributeEvaluator) {
264      setSearch(new Ranker());
265    }
266
267    optionString = Utils.getOption('S',options);
268    if (optionString.length() != 0) {
269      optionString = optionString.trim();
270      int breakLoc = optionString.indexOf(' ');
271      String SearchClassName = optionString;
272      String SearchOptionsString = "";
273      String [] SearchOptions=null;
274      if (breakLoc != -1) {
275        SearchClassName = optionString.substring(0, breakLoc);
276        SearchOptionsString = optionString.substring(breakLoc).trim();
277        SearchOptions = Utils.splitOptions(SearchOptionsString);
278      }
279      setSearch(ASSearch.forName(SearchClassName, SearchOptions));
280    }
281
282    Utils.checkForRemainingOptions(options);
283  }
284
285
286  /**
287   * Gets the current settings for the attribute selection (search, evaluator)
288   * etc.
289   *
290   * @return an array of strings suitable for passing to setOptions()
291   */
292  public String [] getOptions() {
293    String [] EvaluatorOptions = new String[0];
294    String [] SearchOptions = new String[0];
295    int current = 0;
296
297    if (m_ASEvaluator instanceof OptionHandler) {
298      EvaluatorOptions = ((OptionHandler)m_ASEvaluator).getOptions();
299    }
300
301    if (m_ASSearch instanceof OptionHandler) {
302      SearchOptions = ((OptionHandler)m_ASSearch).getOptions();
303    }
304
305    String [] setOptions = new String [10];
306    setOptions[current++]="-E";
307    setOptions[current++]= getEvaluator().getClass().getName()
308      +" "+Utils.joinOptions(EvaluatorOptions);
309
310    setOptions[current++]="-S";
311    setOptions[current++]=getSearch().getClass().getName() 
312      + " "+Utils.joinOptions(SearchOptions);
313
314    while (current < setOptions.length) {
315      setOptions[current++] = "";
316    }
317   
318    return setOptions;
319  }
320 
321  /**
322   * Returns the tip text for this property
323   *
324   * @return tip text for this property suitable for
325   * displaying in the explorer/experimenter gui
326   */
327  public String evaluatorTipText() {
328
329    return "Determines how attributes/attribute subsets are evaluated.";
330  }
331
332  /**
333   * set attribute/subset evaluator
334   *
335   * @param evaluator the evaluator to use
336   */
337  public void setEvaluator(ASEvaluation evaluator) {
338    m_ASEvaluator = evaluator;
339  }
340 
341  /**
342   * Returns the tip text for this property
343   *
344   * @return tip text for this property suitable for
345   * displaying in the explorer/experimenter gui
346   */
347  public String searchTipText() {
348
349    return "Determines the search method.";
350  }
351
352  /**
353   * Set search class
354   *
355   * @param search the search class to use
356   */
357  public void setSearch(ASSearch search) {
358    m_ASSearch = search;
359  }
360
361  /**
362   * Get the name of the attribute/subset evaluator
363   *
364   * @return the name of the attribute/subset evaluator as a string
365   */
366  public ASEvaluation getEvaluator() {
367   
368      return m_ASEvaluator;
369  }
370
371  /**
372   * Get the name of the search method
373   *
374   * @return the name of the search method as a string
375   */
376  public ASSearch getSearch() {
377   
378      return m_ASSearch;
379  }
380
381  /**
382   * Returns the Capabilities of this filter.
383   *
384   * @return            the capabilities of this object
385   * @see               Capabilities
386   */
387  public Capabilities getCapabilities() {
388    Capabilities        result;
389   
390    if (m_ASEvaluator == null) {
391      result = super.getCapabilities();
392      result.disableAll();
393    } else {
394      result = m_ASEvaluator.getCapabilities();
395      // class index will be set if necessary, so we always allow the dataset
396      // to have no class attribute set. see the following method:
397      //   weka.attributeSelection.AttributeSelection.SelectAttributes(Instances)
398      result.enable(Capability.NO_CLASS);
399    }
400   
401    result.setMinimumNumberInstances(0);
402   
403    return result;
404  }
405
406  /**
407   * Input an instance for filtering. Ordinarily the instance is processed
408   * and made available for output immediately. Some filters require all
409   * instances be read before producing output.
410   *
411   * @param instance the input instance
412   * @return true if the filtered instance may now be
413   * collected with output().
414   * @throws IllegalStateException if no input format has been defined.
415   * @throws Exception if the input instance was not of the correct format
416   * or if there was a problem with the filtering.
417   */
418  public boolean input(Instance instance) throws Exception {
419   
420    if (getInputFormat() == null) {
421      throw new IllegalStateException("No input instance format defined");
422    }
423
424    if (m_NewBatch) {
425      resetQueue();
426      m_NewBatch = false;
427    }
428
429    if (isOutputFormatDefined()) {
430      convertInstance(instance);
431      return true;
432    }
433
434    bufferInput(instance);
435    return false;
436  }
437
438  /**
439   * Signify that this batch of input to the filter is finished. If the filter
440   * requires all instances prior to filtering, output() may now be called
441   * to retrieve the filtered instances.
442   *
443   * @return true if there are instances pending output.
444   * @throws IllegalStateException if no input structure has been defined.
445   * @throws Exception if there is a problem during the attribute selection.
446   */
447  public boolean batchFinished() throws Exception {
448   
449    if (getInputFormat() == null) {
450      throw new IllegalStateException("No input instance format defined");
451    }
452
453    if (!isOutputFormatDefined()) {
454      m_trainSelector.setEvaluator(m_ASEvaluator);
455      m_trainSelector.setSearch(m_ASSearch);
456      m_trainSelector.SelectAttributes(getInputFormat());
457      //      System.out.println(m_trainSelector.toResultsString());
458
459      m_SelectedAttributes = m_trainSelector.selectedAttributes();
460      if (m_SelectedAttributes == null) {
461        throw new Exception("No selected attributes\n");
462      }
463     
464      setOutputFormat();
465     
466      // Convert pending input instances
467      for (int i = 0; i < getInputFormat().numInstances(); i++) {
468        convertInstance(getInputFormat().instance(i));
469      }
470      flushInput();
471    }
472   
473    m_NewBatch = true;
474    return (numPendingOutput() != 0);
475  }
476
477  /**
478   * Set the output format. Takes the currently defined attribute set
479   * m_InputFormat and calls setOutputFormat(Instances) appropriately.
480   *
481   * @throws Exception if something goes wrong
482   */
483  protected void setOutputFormat() throws Exception {
484    Instances informat;
485
486    if (m_SelectedAttributes == null) {
487      setOutputFormat(null);
488      return;
489    }
490
491    FastVector attributes = new FastVector(m_SelectedAttributes.length);
492
493    int i;
494    if (m_ASEvaluator instanceof AttributeTransformer) {
495      informat = ((AttributeTransformer)m_ASEvaluator).transformedHeader();
496    } else {
497      informat = getInputFormat();
498    }
499
500    for (i=0;i < m_SelectedAttributes.length;i++) {
501      attributes.
502        addElement(informat.attribute(m_SelectedAttributes[i]).copy());
503    }
504
505    Instances outputFormat = 
506      new Instances(getInputFormat().relationName(), attributes, 0);
507
508
509    if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) &&
510        !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) {
511      outputFormat.setClassIndex(m_SelectedAttributes.length - 1);
512    }
513   
514    setOutputFormat(outputFormat); 
515  }
516
517  /**
518   * Convert a single instance over. Selected attributes only are transfered.
519   * The converted instance is added to the end of
520   * the output queue.
521   *
522   * @param instance the instance to convert
523   * @throws Exception if something goes wrong
524   */
525  protected void convertInstance(Instance instance) throws Exception {
526    double[] newVals = new double[getOutputFormat().numAttributes()];
527
528    if (m_ASEvaluator instanceof AttributeTransformer) {
529      Instance tempInstance = ((AttributeTransformer)m_ASEvaluator).
530        convertInstance(instance);
531      for (int i = 0; i < m_SelectedAttributes.length; i++) {
532        int current = m_SelectedAttributes[i];
533        newVals[i] = tempInstance.value(current);
534      }
535    } else {
536      for (int i = 0; i < m_SelectedAttributes.length; i++) {
537        int current = m_SelectedAttributes[i];
538        newVals[i] = instance.value(current);
539      }
540    }
541    if (instance instanceof SparseInstance) {
542      push(new SparseInstance(instance.weight(), newVals));
543    } else {
544      push(new DenseInstance(instance.weight(), newVals));
545    }
546  }
547
548  /**
549   * set options to their default values
550   */
551  protected void resetOptions() {
552
553    m_trainSelector = new weka.attributeSelection.AttributeSelection();
554    setEvaluator(new CfsSubsetEval());
555    setSearch(new BestFirst());
556    m_SelectedAttributes = null;
557    m_FilterOptions = null;
558  }
559 
560  /**
561   * Returns the revision string.
562   *
563   * @return            the revision
564   */
565  public String getRevision() {
566    return RevisionUtils.extract("$Revision: 5987 $");
567  }
568
569  /**
570   * Main method for testing this class.
571   *
572   * @param argv should contain arguments to the filter: use -h for help
573   */
574  public static void main(String [] argv) {
575    runFilter(new AttributeSelection(), argv);
576  }
577}
578
Note: See TracBrowser for help on using the repository browser.