source: branches/MetisMQI/src/main/java/weka/experiment/RandomSplitResultProducer.java

Last change on this file was 29, checked in by gnappo, 14 years ago

Taggata versione per la demo e aggiunto branch.

File size: 27.5 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    RandomSplitResultProducer.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23
24package weka.experiment;
25
26import weka.core.AdditionalMeasureProducer;
27import weka.core.Instance;
28import weka.core.Instances;
29import weka.core.Option;
30import weka.core.OptionHandler;
31import weka.core.RevisionHandler;
32import weka.core.RevisionUtils;
33import weka.core.Utils;
34
35import java.io.File;
36import java.util.Calendar;
37import java.util.Enumeration;
38import java.util.Random;
39import java.util.TimeZone;
40import java.util.Vector;
41
42/**
43 <!-- globalinfo-start -->
44 * Generates a single train/test split and calls the appropriate SplitEvaluator to generate some results.
45 * <p/>
46 <!-- globalinfo-end -->
47 *
48 <!-- options-start -->
49 * Valid options are: <p/>
50 *
51 * <pre> -P &lt;percent&gt;
52 *  The percentage of instances to use for training.
53 *  (default 66)</pre>
54 *
55 * <pre> -D
56 * Save raw split evaluator output.</pre>
57 *
58 * <pre> -O &lt;file/directory name/path&gt;
59 *  The filename where raw output will be stored.
60 *  If a directory name is specified then then individual
61 *  outputs will be gzipped, otherwise all output will be
62 *  zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)</pre>
63 *
64 * <pre> -W &lt;class name&gt;
65 *  The full class name of a SplitEvaluator.
66 *  eg: weka.experiment.ClassifierSplitEvaluator</pre>
67 *
68 * <pre> -R
69 *  Set when data is not to be randomized and the data sets' size.
70 *  Is not to be determined via probabilistic rounding.</pre>
71 *
72 * <pre>
73 * Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
74 * </pre>
75 *
76 * <pre> -W &lt;class name&gt;
77 *  The full class name of the classifier.
78 *  eg: weka.classifiers.bayes.NaiveBayes</pre>
79 *
80 * <pre> -C &lt;index&gt;
81 *  The index of the class for which IR statistics
82 *  are to be output. (default 1)</pre>
83 *
84 * <pre> -I &lt;index&gt;
85 *  The index of an attribute to output in the
86 *  results. This attribute should identify an
87 *  instance in order to know which instances are
88 *  in the test set of a cross validation. if 0
89 *  no output (default 0).</pre>
90 *
91 * <pre> -P
92 *  Add target and prediction columns to the result
93 *  for each fold.</pre>
94 *
95 * <pre>
96 * Options specific to classifier weka.classifiers.rules.ZeroR:
97 * </pre>
98 *
99 * <pre> -D
100 *  If set, classifier is run in debug mode and
101 *  may output additional info to the console</pre>
102 *
103 <!-- options-end -->
104 *
105 * All options after -- will be passed to the split evaluator.
106 *
107 * @author Len Trigg (trigg@cs.waikato.ac.nz)
108 * @version $Revision: 1.20 $
109 */
110public class RandomSplitResultProducer 
111  implements ResultProducer, OptionHandler, AdditionalMeasureProducer, 
112             RevisionHandler {
113 
114  /** for serialization */
115  static final long serialVersionUID = 1403798165056795073L;
116 
117  /** The dataset of interest */
118  protected Instances m_Instances;
119
120  /** The ResultListener to send results to */
121  protected ResultListener m_ResultListener = new CSVResultListener();
122
123  /** The percentage of instances to use for training */
124  protected double m_TrainPercent = 66;
125
126  /** Whether dataset is to be randomized */
127  protected boolean m_randomize = true;
128
129  /** The SplitEvaluator used to generate results */
130  protected SplitEvaluator m_SplitEvaluator = new ClassifierSplitEvaluator();
131
132  /** The names of any additional measures to look for in SplitEvaluators */
133  protected String [] m_AdditionalMeasures = null;
134
135  /** Save raw output of split evaluators --- for debugging purposes */
136  protected boolean m_debugOutput = false;
137
138  /** The output zipper to use for saving raw splitEvaluator output */
139  protected OutputZipper m_ZipDest = null;
140
141  /** The destination output file/directory for raw output */
142  protected File m_OutputFile = new File(
143                                new File(System.getProperty("user.dir")), 
144                                "splitEvalutorOut.zip");
145
146  /** The name of the key field containing the dataset name */
147  public static String DATASET_FIELD_NAME = "Dataset";
148
149  /** The name of the key field containing the run number */
150  public static String RUN_FIELD_NAME = "Run";
151
152  /** The name of the result field containing the timestamp */
153  public static String TIMESTAMP_FIELD_NAME = "Date_time";
154
155  /**
156   * Returns a string describing this result producer
157   * @return a description of the result producer suitable for
158   * displaying in the explorer/experimenter gui
159   */
160  public String globalInfo() {
161    return
162        "Generates a single train/test split and calls the appropriate "
163      + "SplitEvaluator to generate some results.";
164  }
165
166  /**
167   * Sets the dataset that results will be obtained for.
168   *
169   * @param instances a value of type 'Instances'.
170   */
171  public void setInstances(Instances instances) {
172   
173    m_Instances = instances;
174  }
175
176  /**
177   * Set a list of method names for additional measures to look for
178   * in SplitEvaluators. This could contain many measures (of which only a
179   * subset may be produceable by the current SplitEvaluator) if an experiment
180   * is the type that iterates over a set of properties.
181   * @param additionalMeasures an array of measure names, null if none
182   */
183  public void setAdditionalMeasures(String [] additionalMeasures) {
184    m_AdditionalMeasures = additionalMeasures;
185
186    if (m_SplitEvaluator != null) {
187      System.err.println("RandomSplitResultProducer: setting additional "
188                         +"measures for "
189                         +"split evaluator");
190      m_SplitEvaluator.setAdditionalMeasures(m_AdditionalMeasures);
191    }
192  }
193 
194    /**
195     * Returns an enumeration of any additional measure names that might be
196   * in the SplitEvaluator
197   * @return an enumeration of the measure names
198   */
199  public Enumeration enumerateMeasures() {
200    Vector newVector = new Vector();
201    if (m_SplitEvaluator instanceof AdditionalMeasureProducer) {
202      Enumeration en = ((AdditionalMeasureProducer)m_SplitEvaluator).
203        enumerateMeasures();
204      while (en.hasMoreElements()) {
205        String mname = (String)en.nextElement();
206        newVector.addElement(mname);
207      }
208    }
209    return newVector.elements();
210  }
211 
212  /**
213   * Returns the value of the named measure
214   * @param additionalMeasureName the name of the measure to query for its value
215   * @return the value of the named measure
216   * @throws IllegalArgumentException if the named measure is not supported
217   */
218  public double getMeasure(String additionalMeasureName) {
219    if (m_SplitEvaluator instanceof AdditionalMeasureProducer) {
220      return ((AdditionalMeasureProducer)m_SplitEvaluator).
221        getMeasure(additionalMeasureName);
222    } else {
223      throw new IllegalArgumentException("RandomSplitResultProducer: "
224                          +"Can't return value for : "+additionalMeasureName
225                          +". "+m_SplitEvaluator.getClass().getName()+" "
226                          +"is not an AdditionalMeasureProducer");
227    }
228  }
229 
230  /**
231   * Sets the object to send results of each run to.
232   *
233   * @param listener a value of type 'ResultListener'
234   */
235  public void setResultListener(ResultListener listener) {
236
237    m_ResultListener = listener;
238  }
239
240  /**
241   * Gets a Double representing the current date and time.
242   * eg: 1:46pm on 20/5/1999 -> 19990520.1346
243   *
244   * @return a value of type Double
245   */
246  public static Double getTimestamp() {
247
248    Calendar now = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
249    double timestamp = now.get(Calendar.YEAR) * 10000
250      + (now.get(Calendar.MONTH) + 1) * 100
251      + now.get(Calendar.DAY_OF_MONTH)
252      + now.get(Calendar.HOUR_OF_DAY) / 100.0
253      + now.get(Calendar.MINUTE) / 10000.0;
254    return new Double(timestamp);
255  }
256
257  /**
258   * Prepare to generate results.
259   *
260   * @throws Exception if an error occurs during preprocessing.
261   */
262  public void preProcess() throws Exception {
263
264    if (m_SplitEvaluator == null) {
265      throw new Exception("No SplitEvalutor set");
266    }
267    if (m_ResultListener == null) {
268      throw new Exception("No ResultListener set");
269    }
270    m_ResultListener.preProcess(this);
271  }
272 
273  /**
274   * Perform any postprocessing. When this method is called, it indicates
275   * that no more requests to generate results for the current experiment
276   * will be sent.
277   *
278   * @throws Exception if an error occurs
279   */
280  public void postProcess() throws Exception {
281
282    m_ResultListener.postProcess(this);
283    if (m_debugOutput) {
284      if (m_ZipDest != null) {
285        m_ZipDest.finished();
286        m_ZipDest = null;
287      }
288    }
289  }
290
291  /**
292   * Gets the keys for a specified run number. Different run
293   * numbers correspond to different randomizations of the data. Keys
294   * produced should be sent to the current ResultListener
295   *
296   * @param run the run number to get keys for.
297   * @throws Exception if a problem occurs while getting the keys
298   */
299  public void doRunKeys(int run) throws Exception {
300    if (m_Instances == null) {
301      throw new Exception("No Instances set");
302    }
303    // Add in some fields to the key like run number, dataset name
304    Object [] seKey = m_SplitEvaluator.getKey();
305    Object [] key = new Object [seKey.length + 2];
306    key[0] = Utils.backQuoteChars(m_Instances.relationName());
307    key[1] = "" + run;
308    System.arraycopy(seKey, 0, key, 2, seKey.length);
309    if (m_ResultListener.isResultRequired(this, key)) {
310      try {
311        m_ResultListener.acceptResult(this, key, null);
312      } catch (Exception ex) {
313        // Save the train and test datasets for debugging purposes?
314        throw ex;
315      }
316    }
317  }
318
319  /**
320   * Gets the results for a specified run number. Different run
321   * numbers correspond to different randomizations of the data. Results
322   * produced should be sent to the current ResultListener
323   *
324   * @param run the run number to get results for.
325   * @throws Exception if a problem occurs while getting the results
326   */
327  public void doRun(int run) throws Exception {
328
329    if (getRawOutput()) {
330      if (m_ZipDest == null) {
331        m_ZipDest = new OutputZipper(m_OutputFile);
332      }
333    }
334
335    if (m_Instances == null) {
336      throw new Exception("No Instances set");
337    }
338    // Add in some fields to the key like run number, dataset name
339    Object [] seKey = m_SplitEvaluator.getKey();
340    Object [] key = new Object [seKey.length + 2];
341    key[0] = Utils.backQuoteChars(m_Instances.relationName());
342    key[1] = "" + run;
343    System.arraycopy(seKey, 0, key, 2, seKey.length);
344    if (m_ResultListener.isResultRequired(this, key)) {
345
346      // Randomize on a copy of the original dataset
347      Instances runInstances = new Instances(m_Instances);
348
349      Instances train;
350      Instances test;
351
352      if (!m_randomize) {
353
354        // Don't do any randomization
355        int trainSize = Utils.round(runInstances.numInstances() * m_TrainPercent / 100);
356        int testSize = runInstances.numInstances() - trainSize;
357        train = new Instances(runInstances, 0, trainSize);
358        test = new Instances(runInstances, trainSize, testSize);
359      } else {
360        Random rand = new Random(run);
361        runInstances.randomize(rand);
362       
363        // Nominal class
364        if (runInstances.classAttribute().isNominal()) {
365         
366          // create the subset for each classs
367          int numClasses = runInstances.numClasses();
368          Instances[] subsets = new Instances[numClasses + 1];
369          for (int i=0; i < numClasses + 1; i++) {
370            subsets[i] = new Instances(runInstances, 10);
371          }
372         
373          // divide instances into subsets
374          Enumeration e = runInstances.enumerateInstances();
375          while(e.hasMoreElements()) {
376            Instance inst = (Instance) e.nextElement();
377            if (inst.classIsMissing()) {
378              subsets[numClasses].add(inst);
379            } else {
380              subsets[(int) inst.classValue()].add(inst);
381            }
382          }
383         
384          // Compactify them
385          for (int i=0; i < numClasses + 1; i++) {
386            subsets[i].compactify();
387          }
388         
389          // merge into train and test sets
390          train = new Instances(runInstances, runInstances.numInstances());
391          test = new Instances(runInstances, runInstances.numInstances());
392          for (int i = 0; i < numClasses + 1; i++) {
393            int trainSize = 
394              Utils.probRound(subsets[i].numInstances() * m_TrainPercent / 100, rand);
395            for (int j = 0; j < trainSize; j++) {
396              train.add(subsets[i].instance(j));
397            }
398            for (int j = trainSize; j < subsets[i].numInstances(); j++) {
399              test.add(subsets[i].instance(j));
400            }
401            // free memory
402            subsets[i] = null;
403          }
404          train.compactify();
405          test.compactify();
406         
407          // randomize the final sets
408          train.randomize(rand);
409          test.randomize(rand);
410        } else {
411         
412          // Numeric target
413          int trainSize = 
414            Utils.probRound(runInstances.numInstances() * m_TrainPercent / 100, rand);
415          int testSize = runInstances.numInstances() - trainSize;
416          train = new Instances(runInstances, 0, trainSize);
417          test = new Instances(runInstances, trainSize, testSize);
418        }
419      }
420      try {
421        Object [] seResults = m_SplitEvaluator.getResult(train, test);
422        Object [] results = new Object [seResults.length + 1];
423        results[0] = getTimestamp();
424        System.arraycopy(seResults, 0, results, 1,
425                         seResults.length);
426        if (m_debugOutput) {
427          String resultName = 
428            (""+run+"."+
429             Utils.backQuoteChars(runInstances.relationName())
430             +"."
431             +m_SplitEvaluator.toString()).replace(' ','_');
432          resultName = Utils.removeSubstring(resultName, 
433                                             "weka.classifiers.");
434          resultName = Utils.removeSubstring(resultName, 
435                                             "weka.filters.");
436          resultName = Utils.removeSubstring(resultName, 
437                                             "weka.attributeSelection.");
438          m_ZipDest.zipit(m_SplitEvaluator.getRawResultOutput(), resultName);
439        }
440        m_ResultListener.acceptResult(this, key, results);
441      } catch (Exception ex) {
442        // Save the train and test datasets for debugging purposes?
443        throw ex;
444      }
445    }
446  }
447
448  /**
449   * Gets the names of each of the columns produced for a single run.
450   * This method should really be static.
451   *
452   * @return an array containing the name of each column
453   */
454  public String [] getKeyNames() {
455
456    String [] keyNames = m_SplitEvaluator.getKeyNames();
457    // Add in the names of our extra key fields
458    String [] newKeyNames = new String [keyNames.length + 2];
459    newKeyNames[0] = DATASET_FIELD_NAME;
460    newKeyNames[1] = RUN_FIELD_NAME;
461    System.arraycopy(keyNames, 0, newKeyNames, 2, keyNames.length);
462    return newKeyNames;
463  }
464
465  /**
466   * Gets the data types of each of the columns produced for a single run.
467   * This method should really be static.
468   *
469   * @return an array containing objects of the type of each column. The
470   * objects should be Strings, or Doubles.
471   */
472  public Object [] getKeyTypes() {
473
474    Object [] keyTypes = m_SplitEvaluator.getKeyTypes();
475    // Add in the types of our extra fields
476    Object [] newKeyTypes = new String [keyTypes.length + 2];
477    newKeyTypes[0] = new String();
478    newKeyTypes[1] = new String();
479    System.arraycopy(keyTypes, 0, newKeyTypes, 2, keyTypes.length);
480    return newKeyTypes;
481  }
482
483  /**
484   * Gets the names of each of the columns produced for a single run.
485   * This method should really be static.
486   *
487   * @return an array containing the name of each column
488   */
489  public String [] getResultNames() {
490
491    String [] resultNames = m_SplitEvaluator.getResultNames();
492    // Add in the names of our extra Result fields
493    String [] newResultNames = new String [resultNames.length + 1];
494    newResultNames[0] = TIMESTAMP_FIELD_NAME;
495    System.arraycopy(resultNames, 0, newResultNames, 1, resultNames.length);
496    return newResultNames;
497  }
498
499  /**
500   * Gets the data types of each of the columns produced for a single run.
501   * This method should really be static.
502   *
503   * @return an array containing objects of the type of each column. The
504   * objects should be Strings, or Doubles.
505   */
506  public Object [] getResultTypes() {
507
508    Object [] resultTypes = m_SplitEvaluator.getResultTypes();
509    // Add in the types of our extra Result fields
510    Object [] newResultTypes = new Object [resultTypes.length + 1];
511    newResultTypes[0] = new Double(0);
512    System.arraycopy(resultTypes, 0, newResultTypes, 1, resultTypes.length);
513    return newResultTypes;
514  }
515
516  /**
517   * Gets a description of the internal settings of the result
518   * producer, sufficient for distinguishing a ResultProducer
519   * instance from another with different settings (ignoring
520   * those settings set through this interface). For example,
521   * a cross-validation ResultProducer may have a setting for the
522   * number of folds. For a given state, the results produced should
523   * be compatible. Typically if a ResultProducer is an OptionHandler,
524   * this string will represent the command line arguments required
525   * to set the ResultProducer to that state.
526   *
527   * @return the description of the ResultProducer state, or null
528   * if no state is defined
529   */
530  public String getCompatibilityState() {
531
532    String result = "-P " + m_TrainPercent;
533    if (!getRandomizeData()) {
534      result += " -R";
535    }
536    if (m_SplitEvaluator == null) {
537      result += " <null SplitEvaluator>";
538    } else {
539      result += " -W " + m_SplitEvaluator.getClass().getName();
540    }
541    return result + " --";
542  }
543
544  /**
545   * Returns the tip text for this property
546   * @return tip text for this property suitable for
547   * displaying in the explorer/experimenter gui
548   */
549  public String outputFileTipText() {
550    return "Set the destination for saving raw output. If the rawOutput "
551      +"option is selected, then output from the splitEvaluator for "
552      +"individual train-test splits is saved. If the destination is a "
553      +"directory, "
554      +"then each output is saved to an individual gzip file; if the "
555      +"destination is a file, then each output is saved as an entry "
556      +"in a zip file.";
557  }
558
559  /**
560   * Get the value of OutputFile.
561   *
562   * @return Value of OutputFile.
563   */
564  public File getOutputFile() {
565   
566    return m_OutputFile;
567  }
568 
569  /**
570   * Set the value of OutputFile.
571   *
572   * @param newOutputFile Value to assign to OutputFile.
573   */
574  public void setOutputFile(File newOutputFile) {
575   
576    m_OutputFile = newOutputFile;
577  } 
578
579  /**
580   * Returns the tip text for this property
581   * @return tip text for this property suitable for
582   * displaying in the explorer/experimenter gui
583   */
584  public String randomizeDataTipText() {
585    return "Do not randomize dataset and do not perform probabilistic rounding " +
586      "if true";
587  }
588
589  /**
590   * Get if dataset is to be randomized
591   * @return true if dataset is to be randomized
592   */
593  public boolean getRandomizeData() {
594    return m_randomize;
595  }
596 
597  /**
598   * Set to true if dataset is to be randomized
599   * @param d true if dataset is to be randomized
600   */
601  public void setRandomizeData(boolean d) {
602    m_randomize = d;
603  }
604
605  /**
606   * Returns the tip text for this property
607   * @return tip text for this property suitable for
608   * displaying in the explorer/experimenter gui
609   */
610  public String rawOutputTipText() {
611    return "Save raw output (useful for debugging). If set, then output is "
612      +"sent to the destination specified by outputFile";
613  }
614
615  /**
616   * Get if raw split evaluator output is to be saved
617   * @return true if raw split evalutor output is to be saved
618   */
619  public boolean getRawOutput() {
620    return m_debugOutput;
621  }
622 
623  /**
624   * Set to true if raw split evaluator output is to be saved
625   * @param d true if output is to be saved
626   */
627  public void setRawOutput(boolean d) {
628    m_debugOutput = d;
629  }
630
631  /**
632   * Returns the tip text for this property
633   * @return tip text for this property suitable for
634   * displaying in the explorer/experimenter gui
635   */
636  public String trainPercentTipText() {
637    return "Set the percentage of data to use for training.";
638  }
639
640  /**
641   * Get the value of TrainPercent.
642   *
643   * @return Value of TrainPercent.
644   */
645  public double getTrainPercent() {
646   
647    return m_TrainPercent;
648  }
649 
650  /**
651   * Set the value of TrainPercent.
652   *
653   * @param newTrainPercent Value to assign to TrainPercent.
654   */
655  public void setTrainPercent(double newTrainPercent) {
656   
657    m_TrainPercent = newTrainPercent;
658  }
659
660  /**
661   * Returns the tip text for this property
662   * @return tip text for this property suitable for
663   * displaying in the explorer/experimenter gui
664   */
665  public String splitEvaluatorTipText() {
666    return "The evaluator to apply to the test data. "
667      +"This may be a classifier, regression scheme etc.";
668  }
669
670  /**
671   * Get the SplitEvaluator.
672   *
673   * @return the SplitEvaluator.
674   */
675  public SplitEvaluator getSplitEvaluator() {
676   
677    return m_SplitEvaluator;
678  }
679 
680  /**
681   * Set the SplitEvaluator.
682   *
683   * @param newSplitEvaluator new SplitEvaluator to use.
684   */
685  public void setSplitEvaluator(SplitEvaluator newSplitEvaluator) {
686   
687    m_SplitEvaluator = newSplitEvaluator;
688    m_SplitEvaluator.setAdditionalMeasures(m_AdditionalMeasures);
689  }
690
691  /**
692   * Returns an enumeration describing the available options..
693   *
694   * @return an enumeration of all the available options.
695   */
696  public Enumeration listOptions() {
697
698    Vector newVector = new Vector(5);
699
700    newVector.addElement(new Option(
701             "\tThe percentage of instances to use for training.\n"
702              +"\t(default 66)", 
703             "P", 1, 
704             "-P <percent>"));
705
706    newVector.addElement(new Option(
707             "Save raw split evaluator output.",
708             "D",0,"-D"));
709
710    newVector.addElement(new Option(
711             "\tThe filename where raw output will be stored.\n"
712             +"\tIf a directory name is specified then then individual\n"
713             +"\toutputs will be gzipped, otherwise all output will be\n"
714             +"\tzipped to the named file. Use in conjuction with -D."
715             +"\t(default splitEvalutorOut.zip)", 
716             "O", 1, 
717             "-O <file/directory name/path>"));
718
719    newVector.addElement(new Option(
720             "\tThe full class name of a SplitEvaluator.\n"
721              +"\teg: weka.experiment.ClassifierSplitEvaluator", 
722             "W", 1, 
723             "-W <class name>"));
724
725    newVector.addElement(new Option(
726             "\tSet when data is not to be randomized and the data sets' size.\n"
727             + "\tIs not to be determined via probabilistic rounding.",
728             "R",0,"-R"));
729
730 
731    if ((m_SplitEvaluator != null) &&
732        (m_SplitEvaluator instanceof OptionHandler)) {
733      newVector.addElement(new Option(
734             "",
735             "", 0, "\nOptions specific to split evaluator "
736             + m_SplitEvaluator.getClass().getName() + ":"));
737      Enumeration enu = ((OptionHandler)m_SplitEvaluator).listOptions();
738      while (enu.hasMoreElements()) {
739        newVector.addElement(enu.nextElement());
740      }
741    }
742    return newVector.elements();
743  }
744
745  /**
746   * Parses a given list of options. <p/>
747   *
748   <!-- options-start -->
749   * Valid options are: <p/>
750   *
751   * <pre> -P &lt;percent&gt;
752   *  The percentage of instances to use for training.
753   *  (default 66)</pre>
754   *
755   * <pre> -D
756   * Save raw split evaluator output.</pre>
757   *
758   * <pre> -O &lt;file/directory name/path&gt;
759   *  The filename where raw output will be stored.
760   *  If a directory name is specified then then individual
761   *  outputs will be gzipped, otherwise all output will be
762   *  zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)</pre>
763   *
764   * <pre> -W &lt;class name&gt;
765   *  The full class name of a SplitEvaluator.
766   *  eg: weka.experiment.ClassifierSplitEvaluator</pre>
767   *
768   * <pre> -R
769   *  Set when data is not to be randomized and the data sets' size.
770   *  Is not to be determined via probabilistic rounding.</pre>
771   *
772   * <pre>
773   * Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
774   * </pre>
775   *
776   * <pre> -W &lt;class name&gt;
777   *  The full class name of the classifier.
778   *  eg: weka.classifiers.bayes.NaiveBayes</pre>
779   *
780   * <pre> -C &lt;index&gt;
781   *  The index of the class for which IR statistics
782   *  are to be output. (default 1)</pre>
783   *
784   * <pre> -I &lt;index&gt;
785   *  The index of an attribute to output in the
786   *  results. This attribute should identify an
787   *  instance in order to know which instances are
788   *  in the test set of a cross validation. if 0
789   *  no output (default 0).</pre>
790   *
791   * <pre> -P
792   *  Add target and prediction columns to the result
793   *  for each fold.</pre>
794   *
795   * <pre>
796   * Options specific to classifier weka.classifiers.rules.ZeroR:
797   * </pre>
798   *
799   * <pre> -D
800   *  If set, classifier is run in debug mode and
801   *  may output additional info to the console</pre>
802   *
803   <!-- options-end -->
804   *
805   * All options after -- will be passed to the split evaluator.
806   *
807   * @param options the list of options as an array of strings
808   * @throws Exception if an option is not supported
809   */
810  public void setOptions(String[] options) throws Exception {
811   
812    setRawOutput(Utils.getFlag('D', options));
813    setRandomizeData(!Utils.getFlag('R', options));
814
815    String fName = Utils.getOption('O', options);
816    if (fName.length() != 0) {
817      setOutputFile(new File(fName));
818    }
819
820    String trainPct = Utils.getOption('P', options);
821    if (trainPct.length() != 0) {
822      setTrainPercent((new Double(trainPct)).doubleValue());
823    } else {
824      setTrainPercent(66);
825    }
826
827    String seName = Utils.getOption('W', options);
828    if (seName.length() == 0) {
829      throw new Exception("A SplitEvaluator must be specified with"
830                          + " the -W option.");
831    }
832    // Do it first without options, so if an exception is thrown during
833    // the option setting, listOptions will contain options for the actual
834    // SE.
835    setSplitEvaluator((SplitEvaluator)Utils.forName(
836                      SplitEvaluator.class,
837                      seName,
838                      null));
839    if (getSplitEvaluator() instanceof OptionHandler) {
840      ((OptionHandler) getSplitEvaluator())
841        .setOptions(Utils.partitionOptions(options));
842    }
843  }
844
845  /**
846   * Gets the current settings of the result producer.
847   *
848   * @return an array of strings suitable for passing to setOptions
849   */
850  public String [] getOptions() {
851
852    String [] seOptions = new String [0];
853    if ((m_SplitEvaluator != null) && 
854        (m_SplitEvaluator instanceof OptionHandler)) {
855      seOptions = ((OptionHandler)m_SplitEvaluator).getOptions();
856    }
857   
858    String [] options = new String [seOptions.length + 9];
859    int current = 0;
860
861    options[current++] = "-P"; options[current++] = "" + getTrainPercent();
862   
863    if (getRawOutput()) {
864      options[current++] = "-D";
865    }
866   
867    if (!getRandomizeData()) {
868      options[current++] = "-R";
869    }
870
871    options[current++] = "-O"; 
872    options[current++] = getOutputFile().getName();
873
874    if (getSplitEvaluator() != null) {
875      options[current++] = "-W";
876      options[current++] = getSplitEvaluator().getClass().getName();
877    }
878    options[current++] = "--";
879
880    System.arraycopy(seOptions, 0, options, current, 
881                     seOptions.length);
882    current += seOptions.length;
883    while (current < options.length) {
884      options[current++] = "";
885    }
886    return options;
887  }
888
889  /**
890   * Gets a text descrption of the result producer.
891   *
892   * @return a text description of the result producer.
893   */
894  public String toString() {
895
896    String result = "RandomSplitResultProducer: ";
897    result += getCompatibilityState();
898    if (m_Instances == null) {
899      result += ": <null Instances>";
900    } else {
901      result += ": " + Utils.backQuoteChars(m_Instances.relationName());
902    }
903    return result;
904  }
905
906  /**
907   * Returns the revision string.
908   *
909   * @return            the revision
910   */
911  public String getRevision() {
912    return RevisionUtils.extract("$Revision: 1.20 $");
913  }
914} // RandomSplitResultProducer
Note: See TracBrowser for help on using the repository browser.