source: src/main/java/weka/experiment/ExplicitTestsetResultProducer.java @ 28

Last change on this file since 28 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 34.2 KB
RevLine 
[4]1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    ExplicitTestsetResultProducer.java
19 *    Copyright (C) 2009 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.experiment;
24
25import weka.core.AdditionalMeasureProducer;
26import weka.core.Instances;
27import weka.core.Option;
28import weka.core.OptionHandler;
29import weka.core.RevisionHandler;
30import weka.core.RevisionUtils;
31import weka.core.Utils;
32import weka.core.WekaException;
33import weka.core.converters.ConverterUtils.DataSource;
34
35import java.io.File;
36import java.util.Calendar;
37import java.util.Enumeration;
38import java.util.Random;
39import java.util.TimeZone;
40import java.util.Vector;
41
42/**
43 <!-- globalinfo-start -->
44 * Loads the external test set and calls the appropriate SplitEvaluator to generate some results.<br/>
45 * The filename of the test set is constructed as follows:<br/>
46 *    &lt;dir&gt; + / + &lt;prefix&gt; + &lt;relation-name&gt; + &lt;suffix&gt;<br/>
47 * The relation-name can be modified by using the regular expression to replace the matching sub-string with a specified replacement string. In order to get rid of the string that the Weka filters add to the end of the relation name, just use '.*-weka' as the regular expression to find.<br/>
48 * The suffix determines the type of file to load, i.e., one is not restricted to ARFF files. As long as Weka recognizes the extension specified in the suffix, the data will be loaded with one of Weka's converters.
49 * <p/>
50 <!-- globalinfo-end -->
51 *
52 <!-- options-start -->
53 * Valid options are: <p/>
54 *
55 * <pre> -D
56 * Save raw split evaluator output.</pre>
57 *
58 * <pre> -O &lt;file/directory name/path&gt;
59 *  The filename where raw output will be stored.
60 *  If a directory name is specified then then individual
61 *  outputs will be gzipped, otherwise all output will be
62 *  zipped to the named file. Use in conjuction with -D.
63 *  (default: splitEvalutorOut.zip)</pre>
64 *
65 * <pre> -W &lt;class name&gt;
66 *  The full class name of a SplitEvaluator.
67 *  eg: weka.experiment.ClassifierSplitEvaluator</pre>
68 *
69 * <pre> -R
70 *  Set when data is to be randomized.</pre>
71 *
72 * <pre> -dir &lt;directory&gt;
73 *  The directory containing the test sets.
74 *  (default: current directory)</pre>
75 *
76 * <pre> -prefix &lt;string&gt;
77 *  An optional prefix for the test sets (before the relation name).
78 * (default: empty string)</pre>
79 *
80 * <pre> -suffix &lt;string&gt;
81 *  The suffix to append to the test set.
82 *  (default: _test.arff)</pre>
83 *
84 * <pre> -find &lt;regular expression&gt;
85 *  The regular expression to search the relation name with.
86 *  Not used if an empty string.
87 *  (default: empty string)</pre>
88 *
89 * <pre> -replace &lt;string&gt;
90 *  The replacement string for the all the matches of '-find'.
91 *  (default: empty string)</pre>
92 *
93 * <pre>
94 * Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
95 * </pre>
96 *
97 * <pre> -W &lt;class name&gt;
98 *  The full class name of the classifier.
99 *  eg: weka.classifiers.bayes.NaiveBayes</pre>
100 *
101 * <pre> -C &lt;index&gt;
102 *  The index of the class for which IR statistics
103 *  are to be output. (default 1)</pre>
104 *
105 * <pre> -I &lt;index&gt;
106 *  The index of an attribute to output in the
107 *  results. This attribute should identify an
108 *  instance in order to know which instances are
109 *  in the test set of a cross validation. if 0
110 *  no output (default 0).</pre>
111 *
112 * <pre> -P
113 *  Add target and prediction columns to the result
114 *  for each fold.</pre>
115 *
116 * <pre>
117 * Options specific to classifier weka.classifiers.rules.ZeroR:
118 * </pre>
119 *
120 * <pre> -D
121 *  If set, classifier is run in debug mode and
122 *  may output additional info to the console</pre>
123 *
124 <!-- options-end -->
125 *
126 * All options after -- will be passed to the split evaluator.
127 *
128 * @author Len Trigg (trigg@cs.waikato.ac.nz)
129 * @author FracPete (fracpete at waikato dot ac dot nz)
130 * @version $Revision: 5353 $
131 */
132public class ExplicitTestsetResultProducer 
133  implements ResultProducer, OptionHandler, AdditionalMeasureProducer, 
134             RevisionHandler {
135 
136  /** for serialization. */
137  private static final long serialVersionUID = 2613585409333652530L;
138
139  /** the default suffix. */
140  public final static String DEFAULT_SUFFIX = "_test.arff";
141 
142  /** The dataset of interest. */
143  protected Instances m_Instances;
144
145  /** The ResultListener to send results to. */
146  protected ResultListener m_ResultListener = new CSVResultListener();
147
148  /** The directory containing all the test sets. */
149  protected File m_TestsetDir = new File(System.getProperty("user.dir"));
150
151  /** The prefix for all the test sets. */
152  protected String m_TestsetPrefix = "";
153
154  /** The suffix for all the test sets. */
155  protected String m_TestsetSuffix = DEFAULT_SUFFIX;
156
157  /** The regular expression to search for in the relation name. */
158  protected String m_RelationFind = "";
159
160  /** The string to use to replace the matches of the regular expression. */
161  protected String m_RelationReplace = "";
162
163  /** Whether dataset is to be randomized. */
164  protected boolean m_randomize = false;
165
166  /** The SplitEvaluator used to generate results. */
167  protected SplitEvaluator m_SplitEvaluator = new ClassifierSplitEvaluator();
168
169  /** The names of any additional measures to look for in SplitEvaluators. */
170  protected String[] m_AdditionalMeasures = null;
171
172  /** Save raw output of split evaluators --- for debugging purposes. */
173  protected boolean m_debugOutput = false;
174
175  /** The output zipper to use for saving raw splitEvaluator output. */
176  protected OutputZipper m_ZipDest = null;
177
178  /** The destination output file/directory for raw output. */
179  protected File m_OutputFile = new File(
180                                new File(System.getProperty("user.dir")), 
181                                "splitEvalutorOut.zip");
182
183  /** The name of the key field containing the dataset name. */
184  public static String DATASET_FIELD_NAME = "Dataset";
185
186  /** The name of the key field containing the run number. */
187  public static String RUN_FIELD_NAME = "Run";
188
189  /** The name of the result field containing the timestamp. */
190  public static String TIMESTAMP_FIELD_NAME = "Date_time";
191
192  /**
193   * Returns a string describing this result producer.
194   *
195   * @return            a description of the result producer suitable for
196   *                    displaying in the explorer/experimenter gui
197   */
198  public String globalInfo() {
199    return
200        "Loads the external test set and calls the appropriate "
201      + "SplitEvaluator to generate some results.\n"
202      + "The filename of the test set is constructed as follows:\n"
203      + "   <dir> + / + <prefix> + <relation-name> + <suffix>\n"
204      + "The relation-name can be modified by using the regular expression "
205      + "to replace the matching sub-string with a specified replacement "
206      + "string. In order to get rid of the string that the Weka filters "
207      + "add to the end of the relation name, just use '.*-weka' as the "
208      + "regular expression to find.\n"
209      + "The suffix determines the type of file to load, i.e., one is "
210      + "not restricted to ARFF files. As long as Weka recognizes the "
211      + "extension specified in the suffix, the data will be loaded with "
212      + "one of Weka's converters.";
213  }
214
215  /**
216   * Returns an enumeration describing the available options..
217   *
218   * @return            an enumeration of all the available options.
219   */
220  public Enumeration listOptions() {
221    Vector result = new Vector();
222
223    result.addElement(new Option(
224        "Save raw split evaluator output.",
225        "D", 0, "-D"));
226
227    result.addElement(new Option(
228        "\tThe filename where raw output will be stored.\n"
229        +"\tIf a directory name is specified then then individual\n"
230        +"\toutputs will be gzipped, otherwise all output will be\n"
231        +"\tzipped to the named file. Use in conjuction with -D.\n"
232        +"\t(default: splitEvalutorOut.zip)", 
233        "O", 1, "-O <file/directory name/path>"));
234
235    result.addElement(new Option(
236        "\tThe full class name of a SplitEvaluator.\n"
237        +"\teg: weka.experiment.ClassifierSplitEvaluator", 
238        "W", 1, "-W <class name>"));
239
240    result.addElement(new Option(
241        "\tSet when data is to be randomized.",
242        "R", 0 ,"-R"));
243
244    result.addElement(new Option(
245        "\tThe directory containing the test sets.\n"
246        + "\t(default: current directory)", 
247        "dir", 1, "-dir <directory>"));
248
249    result.addElement(new Option(
250        "\tAn optional prefix for the test sets (before the relation name).\n"
251        + "(default: empty string)", 
252        "prefix", 1, "-prefix <string>"));
253
254    result.addElement(new Option(
255        "\tThe suffix to append to the test set.\n"
256        + "\t(default: " + DEFAULT_SUFFIX + ")", 
257        "suffix", 1, "-suffix <string>"));
258
259    result.addElement(new Option(
260        "\tThe regular expression to search the relation name with.\n"
261        + "\tNot used if an empty string.\n"
262        + "\t(default: empty string)", 
263        "find", 1, "-find <regular expression>"));
264
265    result.addElement(new Option(
266        "\tThe replacement string for the all the matches of '-find'.\n"
267        + "\t(default: empty string)", 
268        "replace", 1, "-replace <string>"));
269   
270    if ((m_SplitEvaluator != null) && (m_SplitEvaluator instanceof OptionHandler)) {
271      result.addElement(new Option(
272          "",
273          "", 0, "\nOptions specific to split evaluator "
274          + m_SplitEvaluator.getClass().getName() + ":"));
275      Enumeration enu = ((OptionHandler)m_SplitEvaluator).listOptions();
276      while (enu.hasMoreElements())
277        result.addElement(enu.nextElement());
278    }
279   
280    return result.elements();
281  }
282
283  /**
284   * Parses a given list of options. <p/>
285   *
286   <!-- options-start -->
287   * Valid options are: <p/>
288   *
289   * <pre> -D
290   * Save raw split evaluator output.</pre>
291   *
292   * <pre> -O &lt;file/directory name/path&gt;
293   *  The filename where raw output will be stored.
294   *  If a directory name is specified then then individual
295   *  outputs will be gzipped, otherwise all output will be
296   *  zipped to the named file. Use in conjuction with -D.
297   *  (default: splitEvalutorOut.zip)</pre>
298   *
299   * <pre> -W &lt;class name&gt;
300   *  The full class name of a SplitEvaluator.
301   *  eg: weka.experiment.ClassifierSplitEvaluator</pre>
302   *
303   * <pre> -R
304   *  Set when data is to be randomized.</pre>
305   *
306   * <pre> -dir &lt;directory&gt;
307   *  The directory containing the test sets.
308   *  (default: current directory)</pre>
309   *
310   * <pre> -prefix &lt;string&gt;
311   *  An optional prefix for the test sets (before the relation name).
312   * (default: empty string)</pre>
313   *
314   * <pre> -suffix &lt;string&gt;
315   *  The suffix to append to the test set.
316   *  (default: _test.arff)</pre>
317   *
318   * <pre> -find &lt;regular expression&gt;
319   *  The regular expression to search the relation name with.
320   *  Not used if an empty string.
321   *  (default: empty string)</pre>
322   *
323   * <pre> -replace &lt;string&gt;
324   *  The replacement string for the all the matches of '-find'.
325   *  (default: empty string)</pre>
326   *
327   * <pre>
328   * Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
329   * </pre>
330   *
331   * <pre> -W &lt;class name&gt;
332   *  The full class name of the classifier.
333   *  eg: weka.classifiers.bayes.NaiveBayes</pre>
334   *
335   * <pre> -C &lt;index&gt;
336   *  The index of the class for which IR statistics
337   *  are to be output. (default 1)</pre>
338   *
339   * <pre> -I &lt;index&gt;
340   *  The index of an attribute to output in the
341   *  results. This attribute should identify an
342   *  instance in order to know which instances are
343   *  in the test set of a cross validation. if 0
344   *  no output (default 0).</pre>
345   *
346   * <pre> -P
347   *  Add target and prediction columns to the result
348   *  for each fold.</pre>
349   *
350   * <pre>
351   * Options specific to classifier weka.classifiers.rules.ZeroR:
352   * </pre>
353   *
354   * <pre> -D
355   *  If set, classifier is run in debug mode and
356   *  may output additional info to the console</pre>
357   *
358   <!-- options-end -->
359   *
360   * All options after -- will be passed to the split evaluator.
361   *
362   * @param options     the list of options as an array of strings
363   * @throws Exception  if an option is not supported
364   */
365  public void setOptions(String[] options) throws Exception {
366    String      tmpStr;
367   
368    setRawOutput(Utils.getFlag('D', options));
369    setRandomizeData(!Utils.getFlag('R', options));
370
371    tmpStr = Utils.getOption('O', options);
372    if (tmpStr.length() != 0)
373      setOutputFile(new File(tmpStr));
374
375    tmpStr = Utils.getOption("dir", options);
376    if (tmpStr.length() > 0)
377      setTestsetDir(new File(tmpStr));
378    else
379      setTestsetDir(new File(System.getProperty("user.dir")));
380
381    tmpStr = Utils.getOption("prefix", options);
382    if (tmpStr.length() > 0)
383      setTestsetPrefix(tmpStr);
384    else
385      setTestsetPrefix("");
386
387    tmpStr = Utils.getOption("suffix", options);
388    if (tmpStr.length() > 0)
389      setTestsetSuffix(tmpStr);
390    else
391      setTestsetSuffix(DEFAULT_SUFFIX);
392   
393    tmpStr = Utils.getOption("find", options);
394    if (tmpStr.length() > 0)
395      setRelationFind(tmpStr);
396    else
397      setRelationFind("");
398   
399    tmpStr = Utils.getOption("replace", options);
400    if ((tmpStr.length() > 0) && (getRelationFind().length() > 0))
401      setRelationReplace(tmpStr);
402    else
403      setRelationReplace("");
404   
405    tmpStr = Utils.getOption('W', options);
406    if (tmpStr.length() == 0)
407      throw new Exception("A SplitEvaluator must be specified with the -W option.");
408   
409    // Do it first without options, so if an exception is thrown during
410    // the option setting, listOptions will contain options for the actual
411    // SE.
412    setSplitEvaluator((SplitEvaluator)Utils.forName(SplitEvaluator.class, tmpStr, null));
413    if (getSplitEvaluator() instanceof OptionHandler)
414      ((OptionHandler) getSplitEvaluator()).setOptions(Utils.partitionOptions(options));
415  }
416
417  /**
418   * Gets the current settings of the result producer.
419   *
420   * @return            an array of strings suitable for passing to setOptions
421   */
422  public String[] getOptions() {
423    Vector<String>      result;
424    String[]            seOptions;
425    int                 i;
426   
427    result = new Vector<String>();
428   
429    seOptions = new String [0];
430    if ((m_SplitEvaluator != null) && (m_SplitEvaluator instanceof OptionHandler))
431      seOptions = ((OptionHandler)m_SplitEvaluator).getOptions();
432
433    if (getRawOutput())
434      result.add("-D");
435   
436    if (!getRandomizeData())
437      result.add("-R");
438
439    result.add("-O"); 
440    result.add(getOutputFile().getName());
441
442    result.add("-dir");
443    result.add(getTestsetDir().getPath());
444   
445    if (getTestsetPrefix().length() > 0) {
446      result.add("-prefix");
447      result.add(getTestsetPrefix());
448    }
449
450    result.add("-suffix");
451    result.add(getTestsetSuffix());
452   
453    if (getRelationFind().length() > 0) {
454      result.add("-find");
455      result.add(getRelationFind());
456     
457      if (getRelationReplace().length() > 0) {
458        result.add("-replace");
459        result.add(getRelationReplace());
460      }
461    }
462
463    if (getSplitEvaluator() != null) {
464      result.add("-W");
465      result.add(getSplitEvaluator().getClass().getName());
466    }
467   
468    if (seOptions.length > 0) {
469      result.add("--");
470      for (i = 0; i < seOptions.length; i++)
471        result.add(seOptions[i]);
472    }
473
474    return result.toArray(new String[result.size()]);
475  }
476
477  /**
478   * Sets the dataset that results will be obtained for.
479   *
480   * @param instances a value of type 'Instances'.
481   */
482  public void setInstances(Instances instances) {
483    m_Instances = instances;
484  }
485
486  /**
487   * Set a list of method names for additional measures to look for
488   * in SplitEvaluators. This could contain many measures (of which only a
489   * subset may be produceable by the current SplitEvaluator) if an experiment
490   * is the type that iterates over a set of properties.
491   *
492   * @param additionalMeasures  an array of measure names, null if none
493   */
494  public void setAdditionalMeasures(String[] additionalMeasures) {
495    m_AdditionalMeasures = additionalMeasures;
496
497    if (m_SplitEvaluator != null) {
498      System.err.println(
499          "ExplicitTestsetResultProducer: setting additional "
500          + "measures for split evaluator");
501      m_SplitEvaluator.setAdditionalMeasures(m_AdditionalMeasures);
502    }
503  }
504 
505  /**
506   * Returns an enumeration of any additional measure names that might be
507   * in the SplitEvaluator.
508   *
509   * @return            an enumeration of the measure names
510   */
511  public Enumeration enumerateMeasures() {
512    Vector result = new Vector();
513    if (m_SplitEvaluator instanceof AdditionalMeasureProducer) {
514      Enumeration en = ((AdditionalMeasureProducer)m_SplitEvaluator).enumerateMeasures();
515      while (en.hasMoreElements()) {
516        String mname = (String) en.nextElement();
517        result.addElement(mname);
518      }
519    }
520    return result.elements();
521  }
522 
523  /**
524   * Returns the value of the named measure.
525   *
526   * @param additionalMeasureName       the name of the measure to query for its value
527   * @return                            the value of the named measure
528   * @throws IllegalArgumentException   if the named measure is not supported
529   */
530  public double getMeasure(String additionalMeasureName) {
531    if (m_SplitEvaluator instanceof AdditionalMeasureProducer)
532      return ((AdditionalMeasureProducer)m_SplitEvaluator).getMeasure(additionalMeasureName);
533    else
534      throw new IllegalArgumentException(
535          "ExplicitTestsetResultProducer: "
536          + "Can't return value for : " + additionalMeasureName
537          + ". " + m_SplitEvaluator.getClass().getName() + " "
538          + "is not an AdditionalMeasureProducer");
539  }
540 
541  /**
542   * Sets the object to send results of each run to.
543   *
544   * @param listener    a value of type 'ResultListener'
545   */
546  public void setResultListener(ResultListener listener) {
547    m_ResultListener = listener;
548  }
549
550  /**
551   * Gets a Double representing the current date and time.
552   * eg: 1:46pm on 20/5/1999 -> 19990520.1346
553   *
554   * @return            a value of type Double
555   */
556  public static Double getTimestamp() {
557    Calendar now = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
558    double timestamp = now.get(Calendar.YEAR) * 10000
559      + (now.get(Calendar.MONTH) + 1) * 100
560      + now.get(Calendar.DAY_OF_MONTH)
561      + now.get(Calendar.HOUR_OF_DAY) / 100.0
562      + now.get(Calendar.MINUTE) / 10000.0;
563    return new Double(timestamp);
564  }
565
566  /**
567   * Prepare to generate results.
568   *
569   * @throws Exception  if an error occurs during preprocessing.
570   */
571  public void preProcess() throws Exception {
572    if (m_SplitEvaluator == null)
573      throw new Exception("No SplitEvalutor set");
574
575    if (m_ResultListener == null)
576      throw new Exception("No ResultListener set");
577
578    m_ResultListener.preProcess(this);
579  }
580 
581  /**
582   * Perform any postprocessing. When this method is called, it indicates
583   * that no more requests to generate results for the current experiment
584   * will be sent.
585   *
586   * @throws Exception  if an error occurs
587   */
588  public void postProcess() throws Exception {
589    m_ResultListener.postProcess(this);
590    if (m_debugOutput) {
591      if (m_ZipDest != null) {
592        m_ZipDest.finished();
593        m_ZipDest = null;
594      }
595    }
596  }
597
598  /**
599   * Gets the keys for a specified run number. Different run
600   * numbers correspond to different randomizations of the data. Keys
601   * produced should be sent to the current ResultListener
602   *
603   * @param run         the run number to get keys for.
604   * @throws Exception  if a problem occurs while getting the keys
605   */
606  public void doRunKeys(int run) throws Exception {
607    if (m_Instances == null)
608      throw new Exception("No Instances set");
609
610    // Add in some fields to the key like run number, dataset name
611    Object[] seKey = m_SplitEvaluator.getKey();
612    Object[] key = new Object [seKey.length + 2];
613    key[0] = Utils.backQuoteChars(m_Instances.relationName());
614    key[1] = "" + run;
615    System.arraycopy(seKey, 0, key, 2, seKey.length);
616    if (m_ResultListener.isResultRequired(this, key)) {
617      try {
618        m_ResultListener.acceptResult(this, key, null);
619      }
620      catch (Exception ex) {
621        // Save the train and test datasets for debugging purposes?
622        throw ex;
623      }
624    }
625  }
626
627  /**
628   * Generates a new filename for the given relation based on the current
629   * setup.
630   *
631   * @param inst        the instances to create the filename for
632   * @return            the generated filename
633   */
634  protected String createFilename(Instances inst) {
635    String      result;
636    String      name;
637
638    name = inst.relationName();
639    if (getRelationFind().length() > 0)
640      name = name.replaceAll(getRelationFind(), getRelationReplace());
641   
642    result  = getTestsetDir().getPath() + File.separator;
643    result += getTestsetPrefix() + name + getTestsetSuffix();
644   
645    return result;
646  }
647 
648  /**
649   * Gets the results for a specified run number. Different run
650   * numbers correspond to different randomizations of the data. Results
651   * produced should be sent to the current ResultListener
652   *
653   * @param run         the run number to get results for.
654   * @throws Exception  if a problem occurs while getting the results
655   */
656  public void doRun(int run) throws Exception {
657    if (getRawOutput()) {
658      if (m_ZipDest == null)
659        m_ZipDest = new OutputZipper(m_OutputFile);
660    }
661
662    if (m_Instances == null)
663      throw new Exception("No Instances set");
664   
665    // Add in some fields to the key like run number, dataset name
666    Object[] seKey = m_SplitEvaluator.getKey();
667    Object[] key = new Object [seKey.length + 2];
668    key[0] = Utils.backQuoteChars(m_Instances.relationName());
669    key[1] = "" + run;
670    System.arraycopy(seKey, 0, key, 2, seKey.length);
671    if (m_ResultListener.isResultRequired(this, key)) {
672      // training set
673      Instances train = new Instances(m_Instances);
674      if (m_randomize) {
675        Random rand = new Random(run);
676        train.randomize(rand);
677      }
678
679      // test set
680      String filename = createFilename(train);
681      File file = new File(filename);
682      if (!file.exists())
683        throw new WekaException("Test set '" + filename + "' not found!");
684      Instances test = DataSource.read(filename);
685      // can we set the class attribute safely?
686      if (train.numAttributes() == test.numAttributes())
687        test.setClassIndex(train.classIndex());
688      else
689        throw new WekaException(
690            "Train and test set (= " + filename + ") "
691            + "differ in number of attributes: "
692            + train.numAttributes() + " != " + test.numAttributes());
693      // test headers
694      if (!train.equalHeaders(test))
695        throw new WekaException(
696            "Train and test set (= " + filename + ") "
697            + "are not compatible:\n"
698            + train.equalHeadersMsg(test));
699     
700      try {
701        Object[] seResults = m_SplitEvaluator.getResult(train, test);
702        Object[] results = new Object [seResults.length + 1];
703        results[0] = getTimestamp();
704        System.arraycopy(seResults, 0, results, 1,
705                         seResults.length);
706        if (m_debugOutput) {
707          String resultName = 
708            (""+run+"."+
709             Utils.backQuoteChars(train.relationName())
710             +"."
711             +m_SplitEvaluator.toString()).replace(' ','_');
712          resultName = Utils.removeSubstring(resultName, 
713                                             "weka.classifiers.");
714          resultName = Utils.removeSubstring(resultName, 
715                                             "weka.filters.");
716          resultName = Utils.removeSubstring(resultName, 
717                                             "weka.attributeSelection.");
718          m_ZipDest.zipit(m_SplitEvaluator.getRawResultOutput(), resultName);
719        }
720        m_ResultListener.acceptResult(this, key, results);
721      }
722      catch (Exception e) {
723        // Save the train and test datasets for debugging purposes?
724        throw e;
725      }
726    }
727  }
728
729  /**
730   * Gets the names of each of the columns produced for a single run.
731   * This method should really be static.
732   *
733   * @return            an array containing the name of each column
734   */
735  public String[] getKeyNames() {
736    String[] keyNames = m_SplitEvaluator.getKeyNames();
737    // Add in the names of our extra key fields
738    String[] newKeyNames = new String [keyNames.length + 2];
739    newKeyNames[0] = DATASET_FIELD_NAME;
740    newKeyNames[1] = RUN_FIELD_NAME;
741    System.arraycopy(keyNames, 0, newKeyNames, 2, keyNames.length);
742    return newKeyNames;
743  }
744
745  /**
746   * Gets the data types of each of the columns produced for a single run.
747   * This method should really be static.
748   *
749   * @return            an array containing objects of the type of each column.
750   *                    The objects should be Strings, or Doubles.
751   */
752  public Object[] getKeyTypes() {
753    Object[] keyTypes = m_SplitEvaluator.getKeyTypes();
754    // Add in the types of our extra fields
755    Object[] newKeyTypes = new String [keyTypes.length + 2];
756    newKeyTypes[0] = new String();
757    newKeyTypes[1] = new String();
758    System.arraycopy(keyTypes, 0, newKeyTypes, 2, keyTypes.length);
759    return newKeyTypes;
760  }
761
762  /**
763   * Gets the names of each of the columns produced for a single run.
764   * This method should really be static.
765   *
766   * @return            an array containing the name of each column
767   */
768  public String[] getResultNames() {
769    String[] resultNames = m_SplitEvaluator.getResultNames();
770    // Add in the names of our extra Result fields
771    String[] newResultNames = new String [resultNames.length + 1];
772    newResultNames[0] = TIMESTAMP_FIELD_NAME;
773    System.arraycopy(resultNames, 0, newResultNames, 1, resultNames.length);
774    return newResultNames;
775  }
776
777  /**
778   * Gets the data types of each of the columns produced for a single run.
779   * This method should really be static.
780   *
781   * @return            an array containing objects of the type of each column.
782   *                    The objects should be Strings, or Doubles.
783   */
784  public Object[] getResultTypes() {
785    Object[] resultTypes = m_SplitEvaluator.getResultTypes();
786    // Add in the types of our extra Result fields
787    Object[] newResultTypes = new Object [resultTypes.length + 1];
788    newResultTypes[0] = new Double(0);
789    System.arraycopy(resultTypes, 0, newResultTypes, 1, resultTypes.length);
790    return newResultTypes;
791  }
792
793  /**
794   * Gets a description of the internal settings of the result
795   * producer, sufficient for distinguishing a ResultProducer
796   * instance from another with different settings (ignoring
797   * those settings set through this interface). For example,
798   * a cross-validation ResultProducer may have a setting for the
799   * number of folds. For a given state, the results produced should
800   * be compatible. Typically if a ResultProducer is an OptionHandler,
801   * this string will represent the command line arguments required
802   * to set the ResultProducer to that state.
803   *
804   * @return            the description of the ResultProducer state, or null
805   *                    if no state is defined
806   */
807  public String getCompatibilityState() {
808    String      result;
809   
810    result = "";
811    if (getRandomizeData())
812      result += " -R";
813
814    result += " -dir " + getTestsetDir();
815   
816    if (getTestsetPrefix().length() > 0)
817      result += " -prefix " + getTestsetPrefix();
818   
819    result += " -suffix " + getTestsetSuffix();
820   
821    if (getRelationFind().length() > 0) {
822      result += " -find " + getRelationFind();
823     
824      if (getRelationReplace().length() > 0)
825        result += " -replace " + getRelationReplace();
826    }
827   
828    if (m_SplitEvaluator == null)
829      result += " <null SplitEvaluator>";
830    else
831      result += " -W " + m_SplitEvaluator.getClass().getName();
832
833    return result + " --";
834  }
835
836  /**
837   * Returns the tip text for this property.
838   *
839   * @return            tip text for this property suitable for
840   *                    displaying in the explorer/experimenter gui
841   */
842  public String outputFileTipText() {
843    return "Set the destination for saving raw output. If the rawOutput "
844      +"option is selected, then output from the splitEvaluator for "
845      +"individual train-test splits is saved. If the destination is a "
846      +"directory, "
847      +"then each output is saved to an individual gzip file; if the "
848      +"destination is a file, then each output is saved as an entry "
849      +"in a zip file.";
850  }
851
852  /**
853   * Get the value of OutputFile.
854   *
855   * @return            Value of OutputFile.
856   */
857  public File getOutputFile() {
858    return m_OutputFile;
859  }
860 
861  /**
862   * Set the value of OutputFile.
863   *
864   * @param value       Value to assign to OutputFile.
865   */
866  public void setOutputFile(File value) {
867    m_OutputFile = value;
868  } 
869
870  /**
871   * Returns the tip text for this property.
872   *
873   * @return            tip text for this property suitable for
874   *                    displaying in the explorer/experimenter gui
875   */
876  public String randomizeDataTipText() {
877    return "Do not randomize dataset and do not perform probabilistic rounding " +
878      "if true";
879  }
880
881  /**
882   * Get if dataset is to be randomized.
883   *
884   * @return            true if dataset is to be randomized
885   */
886  public boolean getRandomizeData() {
887    return m_randomize;
888  }
889 
890  /**
891   * Set to true if dataset is to be randomized.
892   *
893   * @param value               true if dataset is to be randomized
894   */
895  public void setRandomizeData(boolean value) {
896    m_randomize = value;
897  }
898
899  /**
900   * Returns the tip text for this property.
901   *
902   * @return            tip text for this property suitable for
903   *                    displaying in the explorer/experimenter gui
904   */
905  public String rawOutputTipText() {
906    return "Save raw output (useful for debugging). If set, then output is "
907      +"sent to the destination specified by outputFile";
908  }
909
910  /**
911   * Get if raw split evaluator output is to be saved.
912   *
913   * @return            true if raw split evalutor output is to be saved
914   */
915  public boolean getRawOutput() {
916    return m_debugOutput;
917  }
918 
919  /**
920   * Set to true if raw split evaluator output is to be saved.
921   *
922   * @param value               true if output is to be saved
923   */
924  public void setRawOutput(boolean value) {
925    m_debugOutput = value;
926  }
927
928  /**
929   * Returns the tip text for this property.
930   *
931   * @return            tip text for this property suitable for
932   *                    displaying in the explorer/experimenter gui
933   */
934  public String splitEvaluatorTipText() {
935    return "The evaluator to apply to the test data. "
936      +"This may be a classifier, regression scheme etc.";
937  }
938
939  /**
940   * Get the SplitEvaluator.
941   *
942   * @return            the SplitEvaluator.
943   */
944  public SplitEvaluator getSplitEvaluator() {
945    return m_SplitEvaluator;
946  }
947 
948  /**
949   * Set the SplitEvaluator.
950   *
951   * @param value       new SplitEvaluator to use.
952   */
953  public void setSplitEvaluator(SplitEvaluator value) {
954    m_SplitEvaluator = value;
955    m_SplitEvaluator.setAdditionalMeasures(m_AdditionalMeasures);
956  }
957
958  /**
959   * Returns the tip text for this property.
960   *
961   * @return            tip text for this property suitable for
962   *                    displaying in the explorer/experimenter gui
963   */
964  public String testsetDirTipText() {
965    return "The directory containing the test sets.";
966  }
967
968  /**
969   * Returns the currently set directory for the test sets.
970   *
971   * @return            the directory
972   */
973  public File getTestsetDir() {
974    return m_TestsetDir;
975  }
976 
977  /**
978   * Sets the directory to use for the test sets.
979   *
980   * @param value       the directory to use
981   */
982  public void setTestsetDir(File value) {
983    m_TestsetDir = value;
984  } 
985
986  /**
987   * Returns the tip text for this property.
988   *
989   * @return            tip text for this property suitable for
990   *                    displaying in the explorer/experimenter gui
991   */
992  public String testsetPrefixTipText() {
993    return "The prefix to use for the filename of the test sets.";
994  }
995
996  /**
997   * Returns the currently set prefix.
998   *
999   * @return            the prefix
1000   */
1001  public String getTestsetPrefix() {
1002    return m_TestsetPrefix;
1003  }
1004 
1005  /**
1006   * Sets the prefix to use for the test sets.
1007   *
1008   * @param value       the prefix
1009   */
1010  public void setTestsetPrefix(String value) {
1011    m_TestsetPrefix = value;
1012  } 
1013
1014  /**
1015   * Returns the tip text for this property.
1016   *
1017   * @return            tip text for this property suitable for
1018   *                    displaying in the explorer/experimenter gui
1019   */
1020  public String testsetSuffixTipText() {
1021    return 
1022        "The suffix to use for the filename of the test sets - must contain "
1023      + "the file extension.";
1024  }
1025
1026  /**
1027   * Returns the currently set suffix.
1028   *
1029   * @return            the suffix
1030   */
1031  public String getTestsetSuffix() {
1032    return m_TestsetSuffix;
1033  }
1034 
1035  /**
1036   * Sets the suffix to use for the test sets.
1037   *
1038   * @param value       the suffix
1039   */
1040  public void setTestsetSuffix(String value) {
1041    if ((value == null) || (value.length() == 0))
1042      value = DEFAULT_SUFFIX;
1043    m_TestsetSuffix = value;
1044  } 
1045
1046  /**
1047   * Returns the tip text for this property.
1048   *
1049   * @return            tip text for this property suitable for
1050   *                    displaying in the explorer/experimenter gui
1051   */
1052  public String relationFindTipText() {
1053    return 
1054        "The regular expression to use for removing parts of the relation "
1055      + "name, ignored if empty.";
1056  }
1057
1058  /**
1059   * Returns the currently set regular expression to use on the relation name.
1060   *
1061   * @return            the regular expression
1062   */
1063  public String getRelationFind() {
1064    return m_RelationFind;
1065  }
1066 
1067  /**
1068   * Sets the regular expression to use on the relation name.
1069   *
1070   * @param value       the regular expression
1071   */
1072  public void setRelationFind(String value) {
1073    m_RelationFind = value;
1074  } 
1075
1076  /**
1077   * Returns the tip text for this property.
1078   *
1079   * @return            tip text for this property suitable for
1080   *                    displaying in the explorer/experimenter gui
1081   */
1082  public String relationReplaceTipText() {
1083    return "The string to replace all matches of the regular expression with.";
1084  }
1085
1086  /**
1087   * Returns the currently set replacement string to use on the relation name.
1088   *
1089   * @return            the replacement string
1090   */
1091  public String getRelationReplace() {
1092    return m_RelationReplace;
1093  }
1094 
1095  /**
1096   * Sets the replacement string to use on the relation name.
1097   *
1098   * @param value       the regular expression
1099   */
1100  public void setRelationReplace(String value) {
1101    m_RelationReplace = value;
1102  } 
1103
1104  /**
1105   * Gets a text descrption of the result producer.
1106   *
1107   * @return            a text description of the result producer.
1108   */
1109  public String toString() {
1110    String result = "ExplicitTestsetResultProducer: ";
1111    result += getCompatibilityState();
1112    if (m_Instances == null)
1113      result += ": <null Instances>";
1114    else
1115      result += ": " + Utils.backQuoteChars(m_Instances.relationName());
1116    return result;
1117  }
1118
1119  /**
1120   * Returns the revision string.
1121   *
1122   * @return            the revision
1123   */
1124  public String getRevision() {
1125    return RevisionUtils.extract("$Revision: 5353 $");
1126  }
1127}
Note: See TracBrowser for help on using the repository browser.