source: src/main/java/weka/attributeSelection/WrapperSubsetEval.java @ 17

Last change on this file since 17 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 23.8 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    WrapperSubsetEval.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.attributeSelection;
24
25import weka.classifiers.Classifier;
26import weka.classifiers.AbstractClassifier;
27import weka.classifiers.Evaluation;
28import weka.classifiers.rules.ZeroR;
29import weka.core.Capabilities;
30import weka.core.Instances;
31import weka.core.Option;
32import weka.core.OptionHandler;
33import weka.core.RevisionUtils;
34import weka.core.SelectedTag;
35import weka.core.Tag;
36import weka.core.TechnicalInformation;
37import weka.core.TechnicalInformationHandler;
38import weka.core.Utils;
39import weka.core.Capabilities.Capability;
40import weka.core.TechnicalInformation.Field;
41import weka.core.TechnicalInformation.Type;
42import weka.filters.Filter;
43import weka.filters.unsupervised.attribute.Remove;
44
45import java.util.BitSet;
46import java.util.Enumeration;
47import java.util.Random;
48import java.util.Vector;
49
50/**
51 <!-- globalinfo-start -->
52 * WrapperSubsetEval:<br/>
53 * <br/>
54 * Evaluates attribute sets by using a learning scheme. Cross validation is used to estimate the accuracy of the learning scheme for a set of attributes.<br/>
55 * <br/>
56 * For more information see:<br/>
57 * <br/>
58 * Ron Kohavi, George H. John (1997). Wrappers for feature subset selection. Artificial Intelligence. 97(1-2):273-324.
59 * <p/>
60 <!-- globalinfo-end -->
61 *
62 <!-- technical-bibtex-start -->
63 * BibTeX:
64 * <pre>
65 * &#64;article{Kohavi1997,
66 *    author = {Ron Kohavi and George H. John},
67 *    journal = {Artificial Intelligence},
68 *    note = {Special issue on relevance},
69 *    number = {1-2},
70 *    pages = {273-324},
71 *    title = {Wrappers for feature subset selection},
72 *    volume = {97},
73 *    year = {1997},
74 *    ISSN = {0004-3702}
75 * }
76 * </pre>
77 * <p/>
78 <!-- technical-bibtex-end -->
79 *
80 <!-- options-start -->
81 * Valid options are: <p/>
82 *
83 * <pre> -B &lt;base learner&gt;
84 *  class name of base learner to use for  accuracy estimation.
85 *  Place any classifier options LAST on the command line
86 *  following a "--". eg.:
87 *   -B weka.classifiers.bayes.NaiveBayes ... -- -K
88 *  (default: weka.classifiers.rules.ZeroR)</pre>
89 *
90 * <pre> -F &lt;num&gt;
91 *  number of cross validation folds to use for estimating accuracy.
92 *  (default=5)</pre>
93 *
94 * <pre> -R &lt;seed&gt;
95 *  Seed for cross validation accuracy testimation.
96 *  (default = 1)</pre>
97 *
98 * <pre> -T &lt;num&gt;
99 *  threshold by which to execute another cross validation
100 *  (standard deviation---expressed as a percentage of the mean).
101 *  (default: 0.01 (1%))</pre>
102 *
103 * <pre> -E &lt;acc | rmse | mae | f-meas | auc&gt;
104 *  Performance evaluation measure to use for selecting attributes.
105 *  (Default = accuracy for discrete class and rmse for numeric class)</pre>
106 *
107 * <pre>
108 * Options specific to scheme weka.classifiers.rules.ZeroR:
109 * </pre>
110 *
111 * <pre> -D
112 *  If set, classifier is run in debug mode and
113 *  may output additional info to the console</pre>
114 *
115 <!-- options-end -->
116 *
117 * @author Mark Hall (mhall@cs.waikato.ac.nz)
118 * @version $Revision: 5928 $
119 */
120public class WrapperSubsetEval
121  extends ASEvaluation
122  implements SubsetEvaluator,
123             OptionHandler, 
124             TechnicalInformationHandler {
125 
126  /** for serialization */
127  static final long serialVersionUID = -4573057658746728675L;
128
129  /** training instances */
130  private Instances m_trainInstances;
131  /** class index */
132  private int m_classIndex;
133  /** number of attributes in the training data */
134  private int m_numAttribs;
135  /** number of instances in the training data */
136  private int m_numInstances;
137  /** holds an evaluation object */
138  private Evaluation m_Evaluation;
139  /** holds the base classifier object */
140  private Classifier m_BaseClassifier;
141  /** number of folds to use for cross validation */
142  private int m_folds;
143  /** random number seed */
144  private int m_seed;
145  /**
146   * the threshold by which to do further cross validations when
147   * estimating the accuracy of a subset
148   */
149  private double m_threshold;
150 
151  public static final int EVAL_DEFAULT = 1;
152  public static final int EVAL_ACCURACY = 2;
153  public static final int EVAL_RMSE = 3;
154  public static final int EVAL_MAE = 4;
155  public static final int EVAL_FMEASURE = 5;
156  public static final int EVAL_AUC = 6;
157 
158  public static final Tag[] TAGS_EVALUATION = {
159    new Tag(EVAL_DEFAULT, "Default: accuracy (discrete class); RMSE (numeric class)"),
160    new Tag(EVAL_ACCURACY, "Accuracy (discrete class only)"),
161    new Tag(EVAL_RMSE, "RMSE (of the class probabilities for discrete class)"),
162    new Tag(EVAL_MAE, "MAE (of the class probabilities for discrete class)"),
163    new Tag(EVAL_FMEASURE, "F-measure (discrete class only)"),
164    new Tag(EVAL_AUC, "AUC (area under the ROC curve - discrete class only)")
165  };
166 
167  /** The evaluation measure to use */
168  protected int m_evaluationMeasure = EVAL_DEFAULT;
169
170  /**
171   * Returns a string describing this attribute evaluator
172   * @return a description of the evaluator suitable for
173   * displaying in the explorer/experimenter gui
174   */
175  public String globalInfo() {
176    return "WrapperSubsetEval:\n\n"
177      +"Evaluates attribute sets by using a learning scheme. Cross "
178      +"validation is used to estimate the accuracy of the learning "
179      +"scheme for a set of attributes.\n\n"
180      + "For more information see:\n\n"
181      + getTechnicalInformation().toString();
182  }
183
184  /**
185   * Returns an instance of a TechnicalInformation object, containing
186   * detailed information about the technical background of this class,
187   * e.g., paper reference or book this class is based on.
188   *
189   * @return the technical information about this class
190   */
191  public TechnicalInformation getTechnicalInformation() {
192    TechnicalInformation        result;
193   
194    result = new TechnicalInformation(Type.ARTICLE);
195    result.setValue(Field.AUTHOR, "Ron Kohavi and George H. John");
196    result.setValue(Field.YEAR, "1997");
197    result.setValue(Field.TITLE, "Wrappers for feature subset selection");
198    result.setValue(Field.JOURNAL, "Artificial Intelligence");
199    result.setValue(Field.VOLUME, "97");
200    result.setValue(Field.NUMBER, "1-2");
201    result.setValue(Field.PAGES, "273-324");
202    result.setValue(Field.NOTE, "Special issue on relevance");
203    result.setValue(Field.ISSN, "0004-3702");
204   
205    return result;
206  }
207
208  /**
209   * Constructor. Calls restOptions to set default options
210   **/
211  public WrapperSubsetEval () {
212    resetOptions();
213  }
214
215
216  /**
217   * Returns an enumeration describing the available options.
218   * @return an enumeration of all the available options.
219   **/
220  public Enumeration listOptions () {
221    Vector newVector = new Vector(4);
222    newVector.addElement(new Option(
223        "\tclass name of base learner to use for \taccuracy estimation.\n"
224        + "\tPlace any classifier options LAST on the command line\n"
225        + "\tfollowing a \"--\". eg.:\n"
226        + "\t\t-B weka.classifiers.bayes.NaiveBayes ... -- -K\n"
227        + "\t(default: weka.classifiers.rules.ZeroR)", 
228        "B", 1, "-B <base learner>"));
229   
230    newVector.addElement(new Option(
231        "\tnumber of cross validation folds to use for estimating accuracy.\n" 
232        + "\t(default=5)", 
233        "F", 1, "-F <num>"));
234   
235    newVector.addElement(new Option(
236        "\tSeed for cross validation accuracy testimation.\n"
237        + "\t(default = 1)", 
238        "R", 1,"-R <seed>"));
239   
240    newVector.addElement(new Option(
241        "\tthreshold by which to execute another cross validation\n" 
242        + "\t(standard deviation---expressed as a percentage of the mean).\n"
243        + "\t(default: 0.01 (1%))", 
244        "T", 1, "-T <num>"));
245   
246    newVector.addElement(new Option(
247        "\tPerformance evaluation measure to use for selecting attributes.\n" +
248        "\t(Default = accuracy for discrete class and rmse for numeric class)",
249        "E", 1, "-E <acc | rmse | mae | f-meas | auc>"));
250
251    if ((m_BaseClassifier != null) && 
252        (m_BaseClassifier instanceof OptionHandler)) {
253      newVector.addElement(new Option("", "", 0, "\nOptions specific to scheme " 
254                                      + m_BaseClassifier.getClass().getName() 
255                                      + ":"));
256      Enumeration enu = ((OptionHandler)m_BaseClassifier).listOptions();
257
258      while (enu.hasMoreElements()) {
259        newVector.addElement(enu.nextElement());
260      }
261    }
262
263    return  newVector.elements();
264  }
265
266
267  /**
268   * Parses a given list of options. <p/>
269   *
270   <!-- options-start -->
271   * Valid options are: <p/>
272   *
273   * <pre> -B &lt;base learner&gt;
274   *  class name of base learner to use for  accuracy estimation.
275   *  Place any classifier options LAST on the command line
276   *  following a "--". eg.:
277   *   -B weka.classifiers.bayes.NaiveBayes ... -- -K
278   *  (default: weka.classifiers.rules.ZeroR)</pre>
279   *
280   * <pre> -F &lt;num&gt;
281   *  number of cross validation folds to use for estimating accuracy.
282   *  (default=5)</pre>
283   *
284   * <pre> -R &lt;seed&gt;
285   *  Seed for cross validation accuracy testimation.
286   *  (default = 1)</pre>
287   *
288   * <pre> -T &lt;num&gt;
289   *  threshold by which to execute another cross validation
290   *  (standard deviation---expressed as a percentage of the mean).
291   *  (default: 0.01 (1%))</pre>
292   *
293   * <pre> -E &lt;acc | rmse | mae | f-meas | auc&gt;
294   *  Performance evaluation measure to use for selecting attributes.
295   *  (Default = accuracy for discrete class and rmse for numeric class)</pre>
296   *
297   * <pre>
298   * Options specific to scheme weka.classifiers.rules.ZeroR:
299   * </pre>
300   *
301   * <pre> -D
302   *  If set, classifier is run in debug mode and
303   *  may output additional info to the console</pre>
304   *
305   <!-- options-end -->
306   *
307   * @param options the list of options as an array of strings
308   * @throws Exception if an option is not supported
309   */
310  public void setOptions (String[] options)
311    throws Exception {
312    String optionString;
313    resetOptions();
314    optionString = Utils.getOption('B', options);
315
316    if (optionString.length() == 0)
317      optionString = ZeroR.class.getName();
318    setClassifier(AbstractClassifier.forName(optionString, 
319                                     Utils.partitionOptions(options)));
320    optionString = Utils.getOption('F', options);
321
322    if (optionString.length() != 0) {
323      setFolds(Integer.parseInt(optionString));
324    }
325
326    optionString = Utils.getOption('R', options);
327    if (optionString.length() != 0) {
328      setSeed(Integer.parseInt(optionString));
329    }
330
331    //       optionString = Utils.getOption('S',options);
332    //       if (optionString.length() != 0)
333    //         {
334    //           seed = Integer.parseInt(optionString);
335    //         }
336    optionString = Utils.getOption('T', options);
337
338    if (optionString.length() != 0) {
339      Double temp;
340      temp = Double.valueOf(optionString);
341      setThreshold(temp.doubleValue());
342    }
343   
344    optionString = Utils.getOption('E', options);
345    if (optionString.length() != 0) {
346      if (optionString.equals("acc")) {
347        setEvaluationMeasure(new SelectedTag(EVAL_ACCURACY, TAGS_EVALUATION));
348      } else if (optionString.equals("rmse")) {
349        setEvaluationMeasure(new SelectedTag(EVAL_RMSE, TAGS_EVALUATION));
350      } else if (optionString.equals("mae")) {
351        setEvaluationMeasure(new SelectedTag(EVAL_MAE, TAGS_EVALUATION));
352      } else if (optionString.equals("f-meas")) {
353        setEvaluationMeasure(new SelectedTag(EVAL_FMEASURE, TAGS_EVALUATION));
354      } else if (optionString.equals("auc")) {
355        setEvaluationMeasure(new SelectedTag(EVAL_AUC, TAGS_EVALUATION));
356      } else {
357        throw new IllegalArgumentException("Invalid evaluation measure");
358      }
359    }
360  }
361 
362  /**
363   * Returns the tip text for this property
364   * @return tip text for this property suitable for
365   * displaying in the explorer/experimenter gui
366   */
367  public String evaluationMeasureTipText() {
368    return "The measure used to evaluate the performance of attribute combinations.";
369  }
370  /**
371   * Gets the currently set performance evaluation measure used for selecting
372   * attributes for the decision table
373   *
374   * @return the performance evaluation measure
375   */
376  public SelectedTag getEvaluationMeasure() {
377    return new SelectedTag(m_evaluationMeasure, TAGS_EVALUATION);
378  }
379
380  /**
381   * Sets the performance evaluation measure to use for selecting attributes
382   * for the decision table
383   *
384   * @param newMethod the new performance evaluation metric to use
385   */
386  public void setEvaluationMeasure(SelectedTag newMethod) {
387    if (newMethod.getTags() == TAGS_EVALUATION) {
388      m_evaluationMeasure = newMethod.getSelectedTag().getID();
389    }
390  }
391 
392  /**
393   * Returns the tip text for this property
394   * @return tip text for this property suitable for
395   * displaying in the explorer/experimenter gui
396   */
397  public String thresholdTipText() {
398    return "Repeat xval if stdev of mean exceeds this value.";
399  }
400
401  /**
402   * Set the value of the threshold for repeating cross validation
403   *
404   * @param t the value of the threshold
405   */
406  public void setThreshold (double t) {
407    m_threshold = t;
408  }
409
410
411  /**
412   * Get the value of the threshold
413   *
414   * @return the threshold as a double
415   */
416  public double getThreshold () {
417    return  m_threshold;
418  }
419
420  /**
421   * Returns the tip text for this property
422   * @return tip text for this property suitable for
423   * displaying in the explorer/experimenter gui
424   */
425  public String foldsTipText() {
426    return "Number of xval folds to use when estimating subset accuracy.";
427  }
428
429  /**
430   * Set the number of folds to use for accuracy estimation
431   *
432   * @param f the number of folds
433   */
434  public void setFolds (int f) {
435    m_folds = f;
436  }
437
438
439  /**
440   * Get the number of folds used for accuracy estimation
441   *
442   * @return the number of folds
443   */
444  public int getFolds () {
445    return  m_folds;
446  }
447
448  /**
449   * Returns the tip text for this property
450   * @return tip text for this property suitable for
451   * displaying in the explorer/experimenter gui
452   */
453  public String seedTipText() {
454    return "Seed to use for randomly generating xval splits.";
455  }
456
457  /**
458   * Set the seed to use for cross validation
459   *
460   * @param s the seed
461   */
462  public void setSeed (int s) {
463    m_seed = s;
464  }
465
466
467  /**
468   * Get the random number seed used for cross validation
469   *
470   * @return the seed
471   */
472  public int getSeed () {
473    return  m_seed;
474  }
475
476  /**
477   * Returns the tip text for this property
478   * @return tip text for this property suitable for
479   * displaying in the explorer/experimenter gui
480   */
481  public String classifierTipText() {
482    return "Classifier to use for estimating the accuracy of subsets";
483  }
484
485  /**
486   * Set the classifier to use for accuracy estimation
487   *
488   * @param newClassifier the Classifier to use.
489   */
490  public void setClassifier (Classifier newClassifier) {
491    m_BaseClassifier = newClassifier;
492  }
493
494
495  /**
496   * Get the classifier used as the base learner.
497   *
498   * @return the classifier used as the classifier
499   */
500  public Classifier getClassifier () {
501    return  m_BaseClassifier;
502  }
503
504
505  /**
506   * Gets the current settings of WrapperSubsetEval.
507   *
508   * @return an array of strings suitable for passing to setOptions()
509   */
510  public String[] getOptions () {
511    String[] classifierOptions = new String[0];
512
513    if ((m_BaseClassifier != null) && 
514        (m_BaseClassifier instanceof OptionHandler)) {
515      classifierOptions = ((OptionHandler)m_BaseClassifier).getOptions();
516    }
517
518    String[] options = new String[9 + classifierOptions.length];
519    int current = 0;
520
521    if (getClassifier() != null) {
522      options[current++] = "-B";
523      options[current++] = getClassifier().getClass().getName();
524    }
525
526    options[current++] = "-F";
527    options[current++] = "" + getFolds();
528    options[current++] = "-T";
529    options[current++] = "" + getThreshold();
530    options[current++] = "-R";
531    options[current++] = "" + getSeed();
532    options[current++] = "--";
533    System.arraycopy(classifierOptions, 0, options, current, 
534                     classifierOptions.length);
535    current += classifierOptions.length;
536
537    while (current < options.length) {
538      options[current++] = "";
539    }
540
541    return  options;
542  }
543
544
545  protected void resetOptions () {
546    m_trainInstances = null;
547    m_Evaluation = null;
548    m_BaseClassifier = new ZeroR();
549    m_folds = 5;
550    m_seed = 1;
551    m_threshold = 0.01;
552  }
553
554  /**
555   * Returns the capabilities of this evaluator.
556   *
557   * @return            the capabilities of this evaluator
558   * @see               Capabilities
559   */
560  public Capabilities getCapabilities() {
561    Capabilities        result;
562   
563    if (getClassifier() == null) {
564      result = super.getCapabilities();
565      result.disableAll();
566    } else {
567      result = getClassifier().getCapabilities();
568    }
569   
570    // set dependencies
571    for (Capability cap: Capability.values())
572      result.enableDependency(cap);
573   
574    // adjustment for class based on selected evaluation metric
575    result.disable(Capability.NUMERIC_CLASS);
576    result.disable(Capability.DATE_CLASS);
577    if (m_evaluationMeasure != EVAL_ACCURACY && m_evaluationMeasure != EVAL_FMEASURE &&
578        m_evaluationMeasure != EVAL_AUC) {
579      result.enable(Capability.NUMERIC_CLASS);
580      result.enable(Capability.DATE_CLASS);
581    }
582   
583    result.setMinimumNumberInstances(getFolds());
584   
585    return result;
586  }
587
588  /**
589   * Generates a attribute evaluator. Has to initialize all fields of the
590   * evaluator that are not being set via options.
591   *
592   * @param data set of instances serving as training data
593   * @throws Exception if the evaluator has not been
594   * generated successfully
595   */
596  public void buildEvaluator (Instances data)
597    throws Exception {
598
599    // can evaluator handle data?
600    getCapabilities().testWithFail(data);
601
602    m_trainInstances = data;
603    m_classIndex = m_trainInstances.classIndex();
604    m_numAttribs = m_trainInstances.numAttributes();
605    m_numInstances = m_trainInstances.numInstances();
606  }
607
608
609  /**
610   * Evaluates a subset of attributes
611   *
612   * @param subset a bitset representing the attribute subset to be
613   * evaluated
614   * @return the error rate
615   * @throws Exception if the subset could not be evaluated
616   */
617  public double evaluateSubset (BitSet subset)
618    throws Exception {
619    double evalMetric = 0;
620    double[] repError = new double[5];
621    int numAttributes = 0;
622    int i, j;
623    Random Rnd = new Random(m_seed);
624    Remove delTransform = new Remove();
625    delTransform.setInvertSelection(true);
626    // copy the instances
627    Instances trainCopy = new Instances(m_trainInstances);
628
629    // count attributes set in the BitSet
630    for (i = 0; i < m_numAttribs; i++) {
631      if (subset.get(i)) {
632        numAttributes++;
633      }
634    }
635
636    // set up an array of attribute indexes for the filter (+1 for the class)
637    int[] featArray = new int[numAttributes + 1];
638
639    for (i = 0, j = 0; i < m_numAttribs; i++) {
640      if (subset.get(i)) {
641        featArray[j++] = i;
642      }
643    }
644
645    featArray[j] = m_classIndex;
646    delTransform.setAttributeIndicesArray(featArray);
647    delTransform.setInputFormat(trainCopy);
648    trainCopy = Filter.useFilter(trainCopy, delTransform);
649
650    // max of 5 repetitions of cross validation
651    for (i = 0; i < 5; i++) {
652      m_Evaluation = new Evaluation(trainCopy);
653      m_Evaluation.crossValidateModel(m_BaseClassifier, trainCopy, m_folds, Rnd);
654     
655      switch (m_evaluationMeasure) {
656      case EVAL_DEFAULT:
657        repError[i] = m_Evaluation.errorRate();
658        break;
659      case EVAL_ACCURACY:
660        repError[i] = m_Evaluation.errorRate();
661        break;
662      case EVAL_RMSE:
663        repError[i] = m_Evaluation.rootMeanSquaredError();
664        break;
665      case EVAL_MAE:
666        repError[i] = m_Evaluation.meanAbsoluteError();
667        break;
668      case EVAL_FMEASURE:
669        repError[i] = m_Evaluation.weightedFMeasure();
670        break;
671      case EVAL_AUC:
672        repError[i] = m_Evaluation.weightedAreaUnderROC();
673        break;
674      }
675
676      // check on the standard deviation
677      if (!repeat(repError, i + 1)) {
678        i++;
679        break;
680      }
681    }
682
683    for (j = 0; j < i; j++) {
684      evalMetric += repError[j];
685    }
686
687    evalMetric /= (double)i;
688    m_Evaluation = null;
689   
690    switch (m_evaluationMeasure) {
691    case EVAL_DEFAULT:
692    case EVAL_ACCURACY:
693    case EVAL_RMSE:
694    case EVAL_MAE:
695      evalMetric = -evalMetric; // maximize
696      break;
697    }
698   
699    return evalMetric;
700  }
701
702
703  /**
704   * Returns a string describing the wrapper
705   *
706   * @return the description as a string
707   */
708  public String toString () {
709    StringBuffer text = new StringBuffer();
710
711    if (m_trainInstances == null) {
712      text.append("\tWrapper subset evaluator has not been built yet\n");
713    }
714    else {
715      text.append("\tWrapper Subset Evaluator\n");
716      text.append("\tLearning scheme: " 
717                  + getClassifier().getClass().getName() + "\n");
718      text.append("\tScheme options: ");
719      String[] classifierOptions = new String[0];
720
721      if (m_BaseClassifier instanceof OptionHandler) {
722        classifierOptions = ((OptionHandler)m_BaseClassifier).getOptions();
723
724        for (int i = 0; i < classifierOptions.length; i++) {
725          text.append(classifierOptions[i] + " ");
726        }
727      }
728
729      text.append("\n");
730      switch (m_evaluationMeasure) {
731      case EVAL_DEFAULT:
732      case EVAL_ACCURACY:
733        if (m_trainInstances.attribute(m_classIndex).isNumeric()) {
734          text.append("\tSubset evaluation: RMSE\n");
735        } else {
736          text.append("\tSubset evaluation: classification error\n");
737        }
738        break;
739      case EVAL_RMSE:
740        if (m_trainInstances.attribute(m_classIndex).isNumeric()) {
741          text.append("\tSubset evaluation: RMSE\n");
742        } else {
743          text.append("\tSubset evaluation: RMSE (probability estimates)\n");
744        }
745        break;
746      case EVAL_MAE:
747        if (m_trainInstances.attribute(m_classIndex).isNumeric()) {
748          text.append("\tSubset evaluation: MAE\n");
749        } else {
750          text.append("\tSubset evaluation: MAE (probability estimates)\n");
751        }
752        break;
753      case EVAL_FMEASURE:
754        text.append("\tSubset evaluation: F-measure\n");
755        break;
756      case EVAL_AUC:
757        text.append("\tSubset evaluation: area under the ROC curve\n");
758        break;
759      }
760     
761      text.append("\tNumber of folds for accuracy estimation: " 
762          + m_folds
763          + "\n");
764    }
765
766    return  text.toString();
767  }
768
769
770  /**
771   * decides whether to do another repeat of cross validation. If the
772   * standard deviation of the cross validations
773   * is greater than threshold% of the mean (default 1%) then another
774   * repeat is done.
775   *
776   * @param repError an array of cross validation results
777   * @param entries the number of cross validations done so far
778   * @return true if another cv is to be done
779   */
780  private boolean repeat (double[] repError, int entries) {
781    int i;
782    double mean = 0;
783    double variance = 0;
784
785    if (entries == 1) {
786      return  true;
787    }
788
789    for (i = 0; i < entries; i++) {
790      mean += repError[i];
791    }
792
793    mean /= (double)entries;
794
795    for (i = 0; i < entries; i++) {
796      variance += ((repError[i] - mean)*(repError[i] - mean));
797    }
798
799    variance /= (double)entries;
800
801    if (variance > 0) {
802      variance = Math.sqrt(variance);
803    }
804
805    if ((variance/mean) > m_threshold) {
806      return  true;
807    }
808
809    return  false;
810  }
811 
812  /**
813   * Returns the revision string.
814   *
815   * @return            the revision
816   */
817  public String getRevision() {
818    return RevisionUtils.extract("$Revision: 5928 $");
819  }
820
821  /**
822   * Main method for testing this class.
823   *
824   * @param args the options
825   */
826  public static void main (String[] args) {
827    runEvaluator(new WrapperSubsetEval(), args);
828  }
829}
830
Note: See TracBrowser for help on using the repository browser.