source: branches/MetisMQI/src/main/java/weka/classifiers/meta/AttributeSelectedClassifier.java @ 30

Last change on this file since 30 was 29, checked in by gnappo, 14 years ago

Taggata versione per la demo e aggiunto branch.

File size: 20.0 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    AttributeSelectedClassifier.java
19 *    Copyright (C) 2000 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.classifiers.meta;
24
25import weka.attributeSelection.ASEvaluation;
26import weka.attributeSelection.ASSearch;
27import weka.attributeSelection.AttributeSelection;
28import weka.classifiers.SingleClassifierEnhancer;
29import weka.core.AdditionalMeasureProducer;
30import weka.core.Capabilities;
31import weka.core.Drawable;
32import weka.core.Instance;
33import weka.core.Instances;
34import weka.core.Option;
35import weka.core.OptionHandler;
36import weka.core.RevisionUtils;
37import weka.core.Utils;
38import weka.core.WeightedInstancesHandler;
39import weka.core.Capabilities.Capability;
40
41import java.util.Enumeration;
42import java.util.Random;
43import java.util.Vector;
44
45/**
46 <!-- globalinfo-start -->
47 * Dimensionality of training and test data is reduced by attribute selection before being passed on to a classifier.
48 * <p/>
49 <!-- globalinfo-end -->
50 *
51 <!-- options-start -->
52 * Valid options are: <p/>
53 *
54 * <pre> -E &lt;attribute evaluator specification&gt;
55 *  Full class name of attribute evaluator, followed
56 *  by its options.
57 *  eg: "weka.attributeSelection.CfsSubsetEval -L"
58 *  (default weka.attributeSelection.CfsSubsetEval)</pre>
59 *
60 * <pre> -S &lt;search method specification&gt;
61 *  Full class name of search method, followed
62 *  by its options.
63 *  eg: "weka.attributeSelection.BestFirst -D 1"
64 *  (default weka.attributeSelection.BestFirst)</pre>
65 *
66 * <pre> -D
67 *  If set, classifier is run in debug mode and
68 *  may output additional info to the console</pre>
69 *
70 * <pre> -W
71 *  Full name of base classifier.
72 *  (default: weka.classifiers.trees.J48)</pre>
73 *
74 * <pre>
75 * Options specific to classifier weka.classifiers.trees.J48:
76 * </pre>
77 *
78 * <pre> -U
79 *  Use unpruned tree.</pre>
80 *
81 * <pre> -C &lt;pruning confidence&gt;
82 *  Set confidence threshold for pruning.
83 *  (default 0.25)</pre>
84 *
85 * <pre> -M &lt;minimum number of instances&gt;
86 *  Set minimum number of instances per leaf.
87 *  (default 2)</pre>
88 *
89 * <pre> -R
90 *  Use reduced error pruning.</pre>
91 *
92 * <pre> -N &lt;number of folds&gt;
93 *  Set number of folds for reduced error
94 *  pruning. One fold is used as pruning set.
95 *  (default 3)</pre>
96 *
97 * <pre> -B
98 *  Use binary splits only.</pre>
99 *
100 * <pre> -S
101 *  Don't perform subtree raising.</pre>
102 *
103 * <pre> -L
104 *  Do not clean up after the tree has been built.</pre>
105 *
106 * <pre> -A
107 *  Laplace smoothing for predicted probabilities.</pre>
108 *
109 * <pre> -Q &lt;seed&gt;
110 *  Seed for random data shuffling (default 1).</pre>
111 *
112 <!-- options-end -->
113 *
114 * @author Mark Hall (mhall@cs.waikato.ac.nz)
115 * @version $Revision: 1.26 $
116 */
117public class AttributeSelectedClassifier 
118  extends SingleClassifierEnhancer
119  implements OptionHandler, Drawable, AdditionalMeasureProducer,
120             WeightedInstancesHandler {
121
122  /** for serialization */
123  static final long serialVersionUID = -5951805453487947577L;
124 
125  /** The attribute selection object */
126  protected AttributeSelection m_AttributeSelection = null;
127
128  /** The attribute evaluator to use */
129  protected ASEvaluation m_Evaluator = 
130    new weka.attributeSelection.CfsSubsetEval();
131
132  /** The search method to use */
133  protected ASSearch m_Search = new weka.attributeSelection.BestFirst();
134
135  /** The header of the dimensionally reduced data */
136  protected Instances m_ReducedHeader;
137
138  /** The number of class vals in the training data (1 if class is numeric) */
139  protected int m_numClasses;
140
141  /** The number of attributes selected by the attribute selection phase */
142  protected double m_numAttributesSelected;
143
144  /** The time taken to select attributes in milliseconds */
145  protected double m_selectionTime;
146
147  /** The time taken to select attributes AND build the classifier */
148  protected double m_totalTime;
149
150 
151  /**
152   * String describing default classifier.
153   *
154   * @return the default classifier classname
155   */
156  protected String defaultClassifierString() {
157   
158    return "weka.classifiers.trees.J48";
159  }
160 
161  /**
162   * Default constructor.
163   */
164  public AttributeSelectedClassifier() {
165    m_Classifier = new weka.classifiers.trees.J48();
166  }
167
168  /**
169   * Returns a string describing this search method
170   * @return a description of the search method suitable for
171   * displaying in the explorer/experimenter gui
172   */
173  public String globalInfo() {
174    return "Dimensionality of training and test data is reduced by "
175      +"attribute selection before being passed on to a classifier.";
176  }
177
178  /**
179   * Returns an enumeration describing the available options.
180   *
181   * @return an enumeration of all the available options.
182   */
183  public Enumeration listOptions() {
184     Vector newVector = new Vector(3);
185   
186    newVector.addElement(new Option(
187              "\tFull class name of attribute evaluator, followed\n"
188              + "\tby its options.\n"
189              + "\teg: \"weka.attributeSelection.CfsSubsetEval -L\"\n"
190              + "\t(default weka.attributeSelection.CfsSubsetEval)",
191              "E", 1, "-E <attribute evaluator specification>"));
192
193    newVector.addElement(new Option(
194              "\tFull class name of search method, followed\n"
195              + "\tby its options.\n"
196              + "\teg: \"weka.attributeSelection.BestFirst -D 1\"\n"
197              + "\t(default weka.attributeSelection.BestFirst)",
198              "S", 1, "-S <search method specification>"));
199   
200    Enumeration enu = super.listOptions();
201    while (enu.hasMoreElements()) {
202      newVector.addElement(enu.nextElement());
203    }
204    return newVector.elements();
205  }
206
207  /**
208   * Parses a given list of options. <p/>
209   *
210   <!-- options-start -->
211   * Valid options are: <p/>
212   *
213   * <pre> -E &lt;attribute evaluator specification&gt;
214   *  Full class name of attribute evaluator, followed
215   *  by its options.
216   *  eg: "weka.attributeSelection.CfsSubsetEval -L"
217   *  (default weka.attributeSelection.CfsSubsetEval)</pre>
218   *
219   * <pre> -S &lt;search method specification&gt;
220   *  Full class name of search method, followed
221   *  by its options.
222   *  eg: "weka.attributeSelection.BestFirst -D 1"
223   *  (default weka.attributeSelection.BestFirst)</pre>
224   *
225   * <pre> -D
226   *  If set, classifier is run in debug mode and
227   *  may output additional info to the console</pre>
228   *
229   * <pre> -W
230   *  Full name of base classifier.
231   *  (default: weka.classifiers.trees.J48)</pre>
232   *
233   * <pre>
234   * Options specific to classifier weka.classifiers.trees.J48:
235   * </pre>
236   *
237   * <pre> -U
238   *  Use unpruned tree.</pre>
239   *
240   * <pre> -C &lt;pruning confidence&gt;
241   *  Set confidence threshold for pruning.
242   *  (default 0.25)</pre>
243   *
244   * <pre> -M &lt;minimum number of instances&gt;
245   *  Set minimum number of instances per leaf.
246   *  (default 2)</pre>
247   *
248   * <pre> -R
249   *  Use reduced error pruning.</pre>
250   *
251   * <pre> -N &lt;number of folds&gt;
252   *  Set number of folds for reduced error
253   *  pruning. One fold is used as pruning set.
254   *  (default 3)</pre>
255   *
256   * <pre> -B
257   *  Use binary splits only.</pre>
258   *
259   * <pre> -S
260   *  Don't perform subtree raising.</pre>
261   *
262   * <pre> -L
263   *  Do not clean up after the tree has been built.</pre>
264   *
265   * <pre> -A
266   *  Laplace smoothing for predicted probabilities.</pre>
267   *
268   * <pre> -Q &lt;seed&gt;
269   *  Seed for random data shuffling (default 1).</pre>
270   *
271   <!-- options-end -->
272   *
273   * @param options the list of options as an array of strings
274   * @throws Exception if an option is not supported
275   */
276  public void setOptions(String[] options) throws Exception {
277
278    // same for attribute evaluator
279    String evaluatorString = Utils.getOption('E', options);
280    if (evaluatorString.length() == 0)
281      evaluatorString = weka.attributeSelection.CfsSubsetEval.class.getName();
282    String [] evaluatorSpec = Utils.splitOptions(evaluatorString);
283    if (evaluatorSpec.length == 0) {
284      throw new Exception("Invalid attribute evaluator specification string");
285    }
286    String evaluatorName = evaluatorSpec[0];
287    evaluatorSpec[0] = "";
288    setEvaluator(ASEvaluation.forName(evaluatorName, evaluatorSpec));
289
290    // same for search method
291    String searchString = Utils.getOption('S', options);
292    if (searchString.length() == 0)
293      searchString = weka.attributeSelection.BestFirst.class.getName();
294    String [] searchSpec = Utils.splitOptions(searchString);
295    if (searchSpec.length == 0) {
296      throw new Exception("Invalid search specification string");
297    }
298    String searchName = searchSpec[0];
299    searchSpec[0] = "";
300    setSearch(ASSearch.forName(searchName, searchSpec));
301
302    super.setOptions(options);
303  }
304
305  /**
306   * Gets the current settings of the Classifier.
307   *
308   * @return an array of strings suitable for passing to setOptions
309   */
310  public String [] getOptions() {
311
312    String [] superOptions = super.getOptions();
313    String [] options = new String [superOptions.length + 4];
314
315    int current = 0;
316
317    // same attribute evaluator
318    options[current++] = "-E";
319    options[current++] = "" +getEvaluatorSpec();
320   
321    // same for search
322    options[current++] = "-S";
323    options[current++] = "" + getSearchSpec();
324
325    System.arraycopy(superOptions, 0, options, current, 
326                     superOptions.length);
327   
328    return options;
329  }
330
331  /**
332   * Returns the tip text for this property
333   * @return tip text for this property suitable for
334   * displaying in the explorer/experimenter gui
335   */
336  public String evaluatorTipText() {
337    return "Set the attribute evaluator to use. This evaluator is used "
338      +"during the attribute selection phase before the classifier is "
339      +"invoked.";
340  }
341
342  /**
343   * Sets the attribute evaluator
344   *
345   * @param evaluator the evaluator with all options set.
346   */
347  public void setEvaluator(ASEvaluation evaluator) {
348    m_Evaluator = evaluator;
349  }
350
351  /**
352   * Gets the attribute evaluator used
353   *
354   * @return the attribute evaluator
355   */
356  public ASEvaluation getEvaluator() {
357    return m_Evaluator;
358  }
359
360  /**
361   * Gets the evaluator specification string, which contains the class name of
362   * the attribute evaluator and any options to it
363   *
364   * @return the evaluator string.
365   */
366  protected String getEvaluatorSpec() {
367   
368    ASEvaluation e = getEvaluator();
369    if (e instanceof OptionHandler) {
370      return e.getClass().getName() + " "
371        + Utils.joinOptions(((OptionHandler)e).getOptions());
372    }
373    return e.getClass().getName();
374  }
375
376  /**
377   * Returns the tip text for this property
378   * @return tip text for this property suitable for
379   * displaying in the explorer/experimenter gui
380   */
381  public String searchTipText() {
382    return "Set the search method. This search method is used "
383      +"during the attribute selection phase before the classifier is "
384      +"invoked.";
385  }
386 
387  /**
388   * Sets the search method
389   *
390   * @param search the search method with all options set.
391   */
392  public void setSearch(ASSearch search) {
393    m_Search = search;
394  }
395
396  /**
397   * Gets the search method used
398   *
399   * @return the search method
400   */
401  public ASSearch getSearch() {
402    return m_Search;
403  }
404
405  /**
406   * Gets the search specification string, which contains the class name of
407   * the search method and any options to it
408   *
409   * @return the search string.
410   */
411  protected String getSearchSpec() {
412   
413    ASSearch s = getSearch();
414    if (s instanceof OptionHandler) {
415      return s.getClass().getName() + " "
416        + Utils.joinOptions(((OptionHandler)s).getOptions());
417    }
418    return s.getClass().getName();
419  }
420
421  /**
422   * Returns default capabilities of the classifier.
423   *
424   * @return      the capabilities of this classifier
425   */
426  public Capabilities getCapabilities() {
427    Capabilities        result;
428   
429    if (getEvaluator() == null)
430      result = super.getCapabilities();
431    else
432      result = getEvaluator().getCapabilities();
433   
434    // set dependencies
435    for (Capability cap: Capability.values())
436      result.enableDependency(cap);
437   
438    return result;
439  }
440
441  /**
442   * Build the classifier on the dimensionally reduced data.
443   *
444   * @param data the training data
445   * @throws Exception if the classifier could not be built successfully
446   */
447  public void buildClassifier(Instances data) throws Exception {
448    if (m_Classifier == null) {
449      throw new Exception("No base classifier has been set!");
450    }
451
452    if (m_Evaluator == null) {
453      throw new Exception("No attribute evaluator has been set!");
454    }
455
456    if (m_Search == null) {
457      throw new Exception("No search method has been set!");
458    }
459   
460    // can classifier handle the data?
461    getCapabilities().testWithFail(data);
462
463    // remove instances with missing class
464    Instances newData = new Instances(data);
465    newData.deleteWithMissingClass();
466   
467    if (newData.numInstances() == 0) {
468      m_Classifier.buildClassifier(newData);
469      return;
470    }
471    if (newData.classAttribute().isNominal()) {
472      m_numClasses = newData.classAttribute().numValues();
473    } else {
474      m_numClasses = 1;
475    }
476
477    Instances resampledData = null;
478    // check to see if training data has all equal weights
479    double weight = newData.instance(0).weight();
480    boolean ok = false;
481    for (int i = 1; i < newData.numInstances(); i++) {
482      if (newData.instance(i).weight() != weight) {
483        ok = true;
484        break;
485      }
486    }
487   
488    if (ok) {
489      if (!(m_Evaluator instanceof WeightedInstancesHandler) || 
490          !(m_Classifier instanceof WeightedInstancesHandler)) {
491        Random r = new Random(1);
492        for (int i = 0; i < 10; i++) {
493          r.nextDouble();
494        }
495        resampledData = newData.resampleWithWeights(r);
496      }
497    } else {
498      // all equal weights in the training data so just use as is
499      resampledData = newData;
500    }
501
502    m_AttributeSelection = new AttributeSelection();
503    m_AttributeSelection.setEvaluator(m_Evaluator);
504    m_AttributeSelection.setSearch(m_Search);
505    long start = System.currentTimeMillis();
506    m_AttributeSelection.
507      SelectAttributes((m_Evaluator instanceof WeightedInstancesHandler) 
508                       ? newData
509                       : resampledData);
510    long end = System.currentTimeMillis();
511    if (m_Classifier instanceof WeightedInstancesHandler) {
512      newData = m_AttributeSelection.reduceDimensionality(newData);
513      m_Classifier.buildClassifier(newData);
514    } else {
515      resampledData = m_AttributeSelection.reduceDimensionality(resampledData);
516      m_Classifier.buildClassifier(resampledData);
517    }
518
519    long end2 = System.currentTimeMillis();
520    m_numAttributesSelected = m_AttributeSelection.numberAttributesSelected();
521    m_ReducedHeader = 
522      new Instances((m_Classifier instanceof WeightedInstancesHandler) ?
523                    newData
524                    : resampledData, 0);
525    m_selectionTime = (double)(end - start);
526    m_totalTime = (double)(end2 - start);
527  }
528
529  /**
530   * Classifies a given instance after attribute selection
531   *
532   * @param instance the instance to be classified
533   * @return the class distribution
534   * @throws Exception if instance could not be classified
535   * successfully
536   */
537  public double [] distributionForInstance(Instance instance)
538    throws Exception {
539
540    Instance newInstance;
541    if (m_AttributeSelection == null) {
542      //      throw new Exception("AttributeSelectedClassifier: No model built yet!");
543      newInstance = instance;
544    } else {
545      newInstance = m_AttributeSelection.reduceDimensionality(instance);
546    }
547
548    return m_Classifier.distributionForInstance(newInstance);
549  }
550
551  /**
552   *  Returns the type of graph this classifier
553   *  represents.
554   * 
555   *  @return the type of graph
556   */   
557  public int graphType() {
558   
559    if (m_Classifier instanceof Drawable)
560      return ((Drawable)m_Classifier).graphType();
561    else 
562      return Drawable.NOT_DRAWABLE;
563  }
564
565  /**
566   * Returns graph describing the classifier (if possible).
567   *
568   * @return the graph of the classifier in dotty format
569   * @throws Exception if the classifier cannot be graphed
570   */
571  public String graph() throws Exception {
572   
573    if (m_Classifier instanceof Drawable)
574      return ((Drawable)m_Classifier).graph();
575    else throw new Exception("Classifier: " + getClassifierSpec()
576                             + " cannot be graphed");
577  }
578
579  /**
580   * Output a representation of this classifier
581   *
582   * @return a representation of this classifier
583   */
584  public String toString() {
585    if (m_AttributeSelection == null) {
586      return "AttributeSelectedClassifier: No attribute selection possible.\n\n"
587        +m_Classifier.toString();
588    }
589
590    StringBuffer result = new StringBuffer();
591    result.append("AttributeSelectedClassifier:\n\n");
592    result.append(m_AttributeSelection.toResultsString());
593    result.append("\n\nHeader of reduced data:\n"+m_ReducedHeader.toString());
594    result.append("\n\nClassifier Model\n"+m_Classifier.toString());
595
596    return result.toString();
597  }
598
599  /**
600   * Additional measure --- number of attributes selected
601   * @return the number of attributes selected
602   */
603  public double measureNumAttributesSelected() {
604    return m_numAttributesSelected;
605  }
606
607  /**
608   * Additional measure --- time taken (milliseconds) to select the attributes
609   * @return the time taken to select attributes
610   */
611  public double measureSelectionTime() {
612    return m_selectionTime;
613  }
614
615  /**
616   * Additional measure --- time taken (milliseconds) to select attributes
617   * and build the classifier
618   * @return the total time (select attributes + build classifier)
619   */
620  public double measureTime() {
621    return m_totalTime;
622  }
623
624  /**
625   * Returns an enumeration of the additional measure names
626   * @return an enumeration of the measure names
627   */
628  public Enumeration enumerateMeasures() {
629    Vector newVector = new Vector(3);
630    newVector.addElement("measureNumAttributesSelected");
631    newVector.addElement("measureSelectionTime");
632    newVector.addElement("measureTime");
633    if (m_Classifier instanceof AdditionalMeasureProducer) {
634      Enumeration en = ((AdditionalMeasureProducer)m_Classifier).
635        enumerateMeasures();
636      while (en.hasMoreElements()) {
637        String mname = (String)en.nextElement();
638        newVector.addElement(mname);
639      }
640    }
641    return newVector.elements();
642  }
643 
644  /**
645   * Returns the value of the named measure
646   * @param additionalMeasureName the name of the measure to query for its value
647   * @return the value of the named measure
648   * @throws IllegalArgumentException if the named measure is not supported
649   */
650  public double getMeasure(String additionalMeasureName) {
651    if (additionalMeasureName.compareToIgnoreCase("measureNumAttributesSelected") == 0) {
652      return measureNumAttributesSelected();
653    } else if (additionalMeasureName.compareToIgnoreCase("measureSelectionTime") == 0) {
654      return measureSelectionTime();
655    } else if (additionalMeasureName.compareToIgnoreCase("measureTime") == 0) {
656      return measureTime();
657    } else if (m_Classifier instanceof AdditionalMeasureProducer) {
658      return ((AdditionalMeasureProducer)m_Classifier).
659        getMeasure(additionalMeasureName);
660    } else {
661      throw new IllegalArgumentException(additionalMeasureName
662                          + " not supported (AttributeSelectedClassifier)");
663    }
664  }
665 
666  /**
667   * Returns the revision string.
668   *
669   * @return            the revision
670   */
671  public String getRevision() {
672    return RevisionUtils.extract("$Revision: 1.26 $");
673  }
674
675  /**
676   * Main method for testing this class.
677   *
678   * @param argv should contain the following arguments:
679   * -t training file [-T test file] [-c class index]
680   */
681  public static void main(String [] argv) {
682    runClassifier(new AttributeSelectedClassifier(), argv);
683  }
684}
Note: See TracBrowser for help on using the repository browser.